In [1]:
# How to setup a CNN model for review classification in Keras

def Snippet_386(): 

    print()
    print(format('How to setup a CNN model for review classification in Keras','*^92'))

    import time
    start_time = time.time()

    # load library
    from keras.layers.core import Dense, Flatten
    from keras.layers.convolutional import Conv1D, MaxPooling1D
    from keras.layers.embeddings import Embedding
    from keras.models import Sequential
    from keras.preprocessing.sequence import pad_sequences
    from keras.utils import np_utils
    from sklearn.model_selection import train_test_split
    import collections, nltk

    INPUT_FILE = "UniMishigan-Sentiment-trainingdata.txt"
    VOCAB_SIZE = 5000; EMBED_SIZE = 100; 

    counter = collections.Counter()
    fin = open(INPUT_FILE, "r", encoding="utf8")
    maxlen = 0
    for line in fin:
        _, sent = line.strip().split("\t")
        words = [x.lower() for x in nltk.word_tokenize(sent)]
        if len(words) > maxlen:
            maxlen = len(words)
        for word in words:
            counter[word] += 1
    fin.close()

    word2index = collections.defaultdict(int)
    for wid, word in enumerate(counter.most_common(VOCAB_SIZE)):
        word2index[word[0]] = wid + 1
    vocab_sz = len(word2index) + 1
    #index2word = {v:k for k, v in word2index.items()}
    
    xs, ys = [], []
    fin = open(INPUT_FILE, "r", encoding="utf8")
    for line in fin:
        label, sent = line.strip().split("\t")
        ys.append(int(label))
        words = [x.lower() for x in nltk.word_tokenize(sent)]
        wids = [word2index[word] for word in words]
        xs.append(wids)
    fin.close()

    X = pad_sequences(xs, maxlen=maxlen)
    Y = np_utils.to_categorical(ys)

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

    # setup a Convulation Neural Network (CNN)
    model = Sequential()
    model.add(Embedding(vocab_sz, EMBED_SIZE, input_length=maxlen))

    model.add(Conv1D(filters=128, kernel_size=3, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(250, activation='relu'))

    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()

    # Fit the model
    model.fit(X_train, y_train, validation_data=(X_test, y_test), 
              epochs=20, batch_size=128, verbose=1)

    # Final evaluation of the model
    scores = model.evaluate(X_test, y_test, verbose=1)
    print("Accuracy: %.2f%%" % (scores[1]*100))
    print(); print("Execution Time %s seconds: " % (time.time() - start_time))

Snippet_386()
****************How to setup a CNN model for review classification in Keras*****************
Using TensorFlow backend.
(5668, 42) (1418, 42) (5668, 2) (1418, 2)
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_1 (Embedding)      (None, 42, 100)           232700    
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 42, 128)           38528     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 21, 128)           0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2688)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 250)               672250    
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 502       
=================================================================
Total params: 943,980
Trainable params: 943,980
Non-trainable params: 0
_________________________________________________________________
/Users/nilimesh/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/indexed_slices.py:433: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
Train on 5668 samples, validate on 1418 samples
Epoch 1/20
5668/5668 [==============================] - 3s 488us/step - loss: 0.3064 - accuracy: 0.8594 - val_loss: 0.0666 - val_accuracy: 0.9725
Epoch 2/20
5668/5668 [==============================] - 1s 256us/step - loss: 0.0221 - accuracy: 0.9936 - val_loss: 0.0412 - val_accuracy: 0.9817
Epoch 3/20
5668/5668 [==============================] - 1s 237us/step - loss: 0.0055 - accuracy: 0.9989 - val_loss: 0.0304 - val_accuracy: 0.9859
Epoch 4/20
5668/5668 [==============================] - 2s 265us/step - loss: 0.0018 - accuracy: 0.9996 - val_loss: 0.0297 - val_accuracy: 0.9859
Epoch 5/20
5668/5668 [==============================] - 1s 239us/step - loss: 0.0014 - accuracy: 0.9998 - val_loss: 0.0310 - val_accuracy: 0.9887
Epoch 6/20
5668/5668 [==============================] - 1s 240us/step - loss: 0.0031 - accuracy: 0.9996 - val_loss: 0.0301 - val_accuracy: 0.9866
Epoch 7/20
5668/5668 [==============================] - 2s 282us/step - loss: 0.0014 - accuracy: 0.9998 - val_loss: 0.0312 - val_accuracy: 0.9859
Epoch 8/20
5668/5668 [==============================] - 1s 264us/step - loss: 0.0011 - accuracy: 0.9998 - val_loss: 0.0371 - val_accuracy: 0.9852
Epoch 9/20
5668/5668 [==============================] - 2s 308us/step - loss: 0.0022 - accuracy: 0.9996 - val_loss: 0.0285 - val_accuracy: 0.9901
Epoch 10/20
5668/5668 [==============================] - 2s 268us/step - loss: 0.0011 - accuracy: 0.9998 - val_loss: 0.0381 - val_accuracy: 0.9838
Epoch 11/20
5668/5668 [==============================] - 1s 243us/step - loss: 0.0013 - accuracy: 0.9998 - val_loss: 0.0432 - val_accuracy: 0.9845
Epoch 12/20
5668/5668 [==============================] - 2s 287us/step - loss: 0.0021 - accuracy: 0.9996 - val_loss: 0.0419 - val_accuracy: 0.9838
Epoch 13/20
5668/5668 [==============================] - 1s 247us/step - loss: 0.0015 - accuracy: 0.9996 - val_loss: 0.0353 - val_accuracy: 0.9866
Epoch 14/20
5668/5668 [==============================] - 1s 245us/step - loss: 8.7543e-04 - accuracy: 0.9998 - val_loss: 0.0382 - val_accuracy: 0.9852
Epoch 15/20
5668/5668 [==============================] - 1s 236us/step - loss: 0.0011 - accuracy: 0.9998 - val_loss: 0.0491 - val_accuracy: 0.9831
Epoch 16/20
5668/5668 [==============================] - 1s 242us/step - loss: 0.0016 - accuracy: 0.9996 - val_loss: 0.0329 - val_accuracy: 0.9873
Epoch 17/20
5668/5668 [==============================] - 1s 240us/step - loss: 7.1747e-04 - accuracy: 0.9998 - val_loss: 0.0296 - val_accuracy: 0.9880
Epoch 18/20
5668/5668 [==============================] - 1s 237us/step - loss: 0.0012 - accuracy: 0.9996 - val_loss: 0.0318 - val_accuracy: 0.9873
Epoch 19/20
5668/5668 [==============================] - 2s 284us/step - loss: 0.0014 - accuracy: 0.9996 - val_loss: 0.0290 - val_accuracy: 0.9873
Epoch 20/20
5668/5668 [==============================] - 2s 316us/step - loss: 7.0881e-04 - accuracy: 0.9998 - val_loss: 0.0323 - val_accuracy: 0.9887
1418/1418 [==============================] - 0s 101us/step
Accuracy: 98.87%

Execution Time 83.54084610939026 seconds: