In [1]:
# How to setup a Multi-Layer Perceptron model for review classification in Keras

def Snippet_385(): 

    print()
    print(format('How to setup a Multi-Layer Perceptron model for review classification in Keras','*^92'))

    import time
    start_time = time.time()

    # load library
    from keras.layers.core import Dense, Dropout, Flatten
    from keras.layers.embeddings import Embedding
    from keras.models import Sequential
    from keras.preprocessing.sequence import pad_sequences
    from keras.utils import np_utils
    from sklearn.model_selection import train_test_split
    import collections, nltk

    INPUT_FILE = "UniMishigan-Sentiment-trainingdata.txt"
    VOCAB_SIZE = 5000; EMBED_SIZE = 100; 

    counter = collections.Counter()
    fin = open(INPUT_FILE, "r", encoding="utf8")
    maxlen = 0
    for line in fin:
        _, sent = line.strip().split("\t")
        words = [x.lower() for x in nltk.word_tokenize(sent)]
        if len(words) > maxlen:
            maxlen = len(words)
        for word in words:
            counter[word] += 1
    fin.close()

    word2index = collections.defaultdict(int)
    for wid, word in enumerate(counter.most_common(VOCAB_SIZE)):
        word2index[word[0]] = wid + 1
    vocab_sz = len(word2index) + 1
    #index2word = {v:k for k, v in word2index.items()}
    
    xs, ys = [], []
    fin = open(INPUT_FILE, "r", encoding="utf8")
    for line in fin:
        label, sent = line.strip().split("\t")
        ys.append(int(label))
        words = [x.lower() for x in nltk.word_tokenize(sent)]
        wids = [word2index[word] for word in words]
        xs.append(wids)
    fin.close()
    X = pad_sequences(xs, maxlen=maxlen)
    Y = np_utils.to_categorical(ys)

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

    # setup a MLP network
    model = Sequential()
    model.add(Embedding(vocab_sz, EMBED_SIZE, input_length=maxlen))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(500, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()

    # Fit the model
    model.fit(X_train, y_train, validation_data=(X_test, y_test), 
              epochs=20, batch_size=128, verbose=1)

    # Final evaluation of the model
    scores = model.evaluate(X_test, y_test, verbose=1)

    print("Accuracy: %.2f%%" % (scores[1]*100))
    print(); print("Execution Time %s seconds: " % (time.time() - start_time))

Snippet_385()
*******How to setup a Multi-Layer Perceptron model for review classification in Keras*******
Using TensorFlow backend.
(5668, 42) (1418, 42) (5668, 2) (1418, 2)
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_1 (Embedding)      (None, 42, 100)           232700    
_________________________________________________________________
dropout_1 (Dropout)          (None, 42, 100)           0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 4200)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 500)               2100500   
_________________________________________________________________
dropout_2 (Dropout)          (None, 500)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 1002      
=================================================================
Total params: 2,334,202
Trainable params: 2,334,202
Non-trainable params: 0
_________________________________________________________________
/Users/nilimesh/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/indexed_slices.py:433: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
Train on 5668 samples, validate on 1418 samples
Epoch 1/20
5668/5668 [==============================] - 4s 648us/step - loss: 0.2787 - accuracy: 0.8784 - val_loss: 0.0935 - val_accuracy: 0.9584
Epoch 2/20
5668/5668 [==============================] - 2s 334us/step - loss: 0.0393 - accuracy: 0.9869 - val_loss: 0.0550 - val_accuracy: 0.9767
Epoch 3/20
5668/5668 [==============================] - 2s 372us/step - loss: 0.0078 - accuracy: 0.9991 - val_loss: 0.0466 - val_accuracy: 0.9810
Epoch 4/20
5668/5668 [==============================] - 2s 328us/step - loss: 0.0031 - accuracy: 0.9995 - val_loss: 0.0466 - val_accuracy: 0.9810
Epoch 5/20
5668/5668 [==============================] - 2s 363us/step - loss: 0.0014 - accuracy: 1.0000 - val_loss: 0.0449 - val_accuracy: 0.9831
Epoch 6/20
5668/5668 [==============================] - 2s 415us/step - loss: 9.6772e-04 - accuracy: 1.0000 - val_loss: 0.0455 - val_accuracy: 0.9838
Epoch 7/20
5668/5668 [==============================] - 2s 398us/step - loss: 5.6289e-04 - accuracy: 1.0000 - val_loss: 0.0474 - val_accuracy: 0.9845
Epoch 8/20
5668/5668 [==============================] - 2s 374us/step - loss: 4.0103e-04 - accuracy: 1.0000 - val_loss: 0.0463 - val_accuracy: 0.9845
Epoch 9/20
5668/5668 [==============================] - 2s 406us/step - loss: 2.9191e-04 - accuracy: 1.0000 - val_loss: 0.0469 - val_accuracy: 0.9838
Epoch 10/20
5668/5668 [==============================] - 2s 411us/step - loss: 2.2276e-04 - accuracy: 1.0000 - val_loss: 0.0463 - val_accuracy: 0.9852
Epoch 11/20
5668/5668 [==============================] - 2s 437us/step - loss: 1.9926e-04 - accuracy: 1.0000 - val_loss: 0.0471 - val_accuracy: 0.9838
Epoch 12/20
5668/5668 [==============================] - 3s 445us/step - loss: 1.4722e-04 - accuracy: 1.0000 - val_loss: 0.0478 - val_accuracy: 0.9845
Epoch 13/20
5668/5668 [==============================] - 2s 430us/step - loss: 1.3512e-04 - accuracy: 1.0000 - val_loss: 0.0483 - val_accuracy: 0.9866
Epoch 14/20
5668/5668 [==============================] - 2s 381us/step - loss: 1.2025e-04 - accuracy: 1.0000 - val_loss: 0.0484 - val_accuracy: 0.9866
Epoch 15/20
5668/5668 [==============================] - 2s 339us/step - loss: 9.0563e-05 - accuracy: 1.0000 - val_loss: 0.0483 - val_accuracy: 0.9852
Epoch 16/20
5668/5668 [==============================] - 2s 345us/step - loss: 8.9983e-05 - accuracy: 1.0000 - val_loss: 0.0484 - val_accuracy: 0.9845
Epoch 17/20
5668/5668 [==============================] - 2s 386us/step - loss: 8.2625e-05 - accuracy: 1.0000 - val_loss: 0.0484 - val_accuracy: 0.9845
Epoch 18/20
5668/5668 [==============================] - 2s 394us/step - loss: 7.0212e-05 - accuracy: 1.0000 - val_loss: 0.0480 - val_accuracy: 0.9852
Epoch 19/20
5668/5668 [==============================] - 2s 409us/step - loss: 5.7268e-05 - accuracy: 1.0000 - val_loss: 0.0484 - val_accuracy: 0.9845
Epoch 20/20
5668/5668 [==============================] - 2s 370us/step - loss: 4.7264e-05 - accuracy: 1.0000 - val_loss: 0.0485 - val_accuracy: 0.9845
1418/1418 [==============================] - 0s 87us/step
Accuracy: 98.45%

Execution Time 96.25430822372437 seconds: