# How to setup a CNN model for review classification in Keras
def Snippet_386():
print()
print(format('How to setup a CNN model for review classification in Keras','*^92'))
import time
start_time = time.time()
# load library
from keras.layers.core import Dense, Flatten
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.models import Sequential
from keras.preprocessing.sequence import pad_sequences
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import collections, nltk
INPUT_FILE = "UniMishigan-Sentiment-trainingdata.txt"
VOCAB_SIZE = 5000; EMBED_SIZE = 100;
counter = collections.Counter()
fin = open(INPUT_FILE, "r", encoding="utf8")
maxlen = 0
for line in fin:
_, sent = line.strip().split("\t")
words = [x.lower() for x in nltk.word_tokenize(sent)]
if len(words) > maxlen:
maxlen = len(words)
for word in words:
counter[word] += 1
fin.close()
word2index = collections.defaultdict(int)
for wid, word in enumerate(counter.most_common(VOCAB_SIZE)):
word2index[word[0]] = wid + 1
vocab_sz = len(word2index) + 1
#index2word = {v:k for k, v in word2index.items()}
xs, ys = [], []
fin = open(INPUT_FILE, "r", encoding="utf8")
for line in fin:
label, sent = line.strip().split("\t")
ys.append(int(label))
words = [x.lower() for x in nltk.word_tokenize(sent)]
wids = [word2index[word] for word in words]
xs.append(wids)
fin.close()
X = pad_sequences(xs, maxlen=maxlen)
Y = np_utils.to_categorical(ys)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
# setup a Convulation Neural Network (CNN)
model = Sequential()
model.add(Embedding(vocab_sz, EMBED_SIZE, input_length=maxlen))
model.add(Conv1D(filters=128, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test),
epochs=20, batch_size=128, verbose=1)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))
print(); print("Execution Time %s seconds: " % (time.time() - start_time))
Snippet_386()