Learn by Coding Examples in Applied Machine Learning

How to setup MLP and CNN for MNIST dataset in Keras

In [5]:
# ignore warnings
import warnings
warnings.filterwarnings("ignore")

MLP for MNIST dataset using Keras

In [6]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils

# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# flatten 28*28 images to a 784 vector for each image
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape((X_train.shape[0], num_pixels)).astype('float32')
X_test = X_test.reshape((X_test.shape[0], num_pixels)).astype('float32')

# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255

# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

# define baseline model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
    model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# build the model
model = baseline_model()

# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)

# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Baseline Error: %.2f%%" % (100-scores[1]*100))
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 2s - loss: 0.2819 - acc: 0.9203 - val_loss: 0.1340 - val_acc: 0.9614
Epoch 2/10
 - 2s - loss: 0.1098 - acc: 0.9682 - val_loss: 0.0953 - val_acc: 0.9709
Epoch 3/10
 - 2s - loss: 0.0718 - acc: 0.9788 - val_loss: 0.0744 - val_acc: 0.9778
Epoch 4/10
 - 2s - loss: 0.0500 - acc: 0.9854 - val_loss: 0.0724 - val_acc: 0.9783
Epoch 5/10
 - 2s - loss: 0.0371 - acc: 0.9892 - val_loss: 0.0725 - val_acc: 0.9768
Epoch 6/10
 - 2s - loss: 0.0270 - acc: 0.9930 - val_loss: 0.0650 - val_acc: 0.9796
Epoch 7/10
 - 2s - loss: 0.0207 - acc: 0.9946 - val_loss: 0.0621 - val_acc: 0.9814
Epoch 8/10
 - 2s - loss: 0.0143 - acc: 0.9968 - val_loss: 0.0572 - val_acc: 0.9822
Epoch 9/10
 - 2s - loss: 0.0106 - acc: 0.9979 - val_loss: 0.0613 - val_acc: 0.9807
Epoch 10/10
 - 2s - loss: 0.0085 - acc: 0.9982 - val_loss: 0.0658 - val_acc: 0.9811
Baseline Error: 1.89%

CNN for MNIST dataset

In [7]:
# Simple CNN for the MNIST Dataset
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils

# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# reshape to be [samples][width][height][channels]
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1)).astype('float32')
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1)).astype('float32')

# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255

# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

# define a simple CNN model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu'))
    model.add(MaxPooling2D())
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# build the model
model = baseline_model()

# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200)

# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("CNN Error: %.2f%%" % (100-scores[1]*100))
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
60000/60000 [==============================] - 9s 156us/step - loss: 0.2535 - acc: 0.9272 - val_loss: 0.0941 - val_acc: 0.9739
Epoch 2/10
60000/60000 [==============================] - 9s 148us/step - loss: 0.0758 - acc: 0.9777 - val_loss: 0.0507 - val_acc: 0.9833
Epoch 3/10
60000/60000 [==============================] - 9s 148us/step - loss: 0.0542 - acc: 0.9838 - val_loss: 0.0476 - val_acc: 0.9849
Epoch 4/10
60000/60000 [==============================] - 9s 147us/step - loss: 0.0427 - acc: 0.9870 - val_loss: 0.0380 - val_acc: 0.9878
Epoch 5/10
60000/60000 [==============================] - 9s 148us/step - loss: 0.0338 - acc: 0.9894 - val_loss: 0.0339 - val_acc: 0.9881
Epoch 6/10
60000/60000 [==============================] - 9s 148us/step - loss: 0.0287 - acc: 0.9911 - val_loss: 0.0317 - val_acc: 0.9892
Epoch 7/10
60000/60000 [==============================] - 9s 148us/step - loss: 0.0230 - acc: 0.9928 - val_loss: 0.0324 - val_acc: 0.9895
Epoch 8/10
60000/60000 [==============================] - 9s 148us/step - loss: 0.0191 - acc: 0.9940 - val_loss: 0.0318 - val_acc: 0.9900
Epoch 9/10
60000/60000 [==============================] - 9s 151us/step - loss: 0.0170 - acc: 0.9946 - val_loss: 0.0343 - val_acc: 0.9889
Epoch 10/10
60000/60000 [==============================] - 9s 149us/step - loss: 0.0146 - acc: 0.9952 - val_loss: 0.0409 - val_acc: 0.9878
CNN Error: 1.22%

Large CNN for MNIST dataset

In [8]:
# Larger CNN for the MNIST Dataset
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils

# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# reshape to be [samples][width][height][channels]
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1)).astype('float32')
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1)).astype('float32')

# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255

# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

# define the larger model
def larger_model():
    # create model
    model = Sequential()
    model.add(Conv2D(30, (5, 5), input_shape=(28, 28, 1), activation='relu'))
    model.add(MaxPooling2D())
    model.add(Conv2D(15, (3, 3), activation='relu'))
    model.add(MaxPooling2D())
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# build the model
model = larger_model()

# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200)

# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Large CNN Error: %.2f%%" % (100-scores[1]*100))
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
60000/60000 [==============================] - 10s 167us/step - loss: 0.3636 - acc: 0.8864 - val_loss: 0.0763 - val_acc: 0.9749
Epoch 2/10
60000/60000 [==============================] - 9s 158us/step - loss: 0.0943 - acc: 0.9716 - val_loss: 0.0457 - val_acc: 0.9858
Epoch 3/10
60000/60000 [==============================] - 9s 157us/step - loss: 0.0677 - acc: 0.9788 - val_loss: 0.0367 - val_acc: 0.9888
Epoch 4/10
60000/60000 [==============================] - 9s 158us/step - loss: 0.0572 - acc: 0.9820 - val_loss: 0.0336 - val_acc: 0.9890
Epoch 5/10
60000/60000 [==============================] - 9s 157us/step - loss: 0.0471 - acc: 0.9852 - val_loss: 0.0298 - val_acc: 0.9890
Epoch 6/10
60000/60000 [==============================] - 9s 158us/step - loss: 0.0415 - acc: 0.9868 - val_loss: 0.0261 - val_acc: 0.9902
Epoch 7/10
60000/60000 [==============================] - 9s 157us/step - loss: 0.0386 - acc: 0.9880 - val_loss: 0.0262 - val_acc: 0.9914
Epoch 8/10
60000/60000 [==============================] - 9s 158us/step - loss: 0.0334 - acc: 0.9893 - val_loss: 0.0309 - val_acc: 0.9895
Epoch 9/10
60000/60000 [==============================] - 9s 157us/step - loss: 0.0324 - acc: 0.9900 - val_loss: 0.0242 - val_acc: 0.9917
Epoch 10/10
60000/60000 [==============================] - 10s 158us/step - loss: 0.0281 - acc: 0.9907 - val_loss: 0.0270 - val_acc: 0.9908
Large CNN Error: 0.92%