For more projects visit: https://setscholars.net
# Suppress warnings in Jupyter Notebooks
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_datasets as tfds
import autokeras as ak
print(ak.__version__)
import logging
tf.get_logger().setLevel(logging.ERROR)
1.0.16
In this notebook, we will learn how to build a wine quality classification model in Python using AutoKeras package.
ds, info = tfds.load('wine_quality', split = 'train', shuffle_files=True, with_info=True)
df = tfds.as_dataframe(ds)
print(); print(df.shape)
print(); print(df.head())
print(); print(df.columns.values)
(4898, 12) features/alcohol features/chlorides features/citric acid \ 0 9.0 0.054 0.34 1 12.2 0.063 0.49 2 11.2 0.029 0.11 3 10.3 0.055 0.39 4 10.7 0.054 0.35 features/density features/fixed acidity features/free sulfur dioxide \ 0 1.00080 7.6 44.0 1 0.99110 6.3 35.0 2 0.99076 5.3 6.0 3 0.99652 7.0 42.0 4 0.99178 7.3 31.0 features/pH features/residual sugar features/sulphates \ 0 3.22 18.35 0.55 1 3.38 1.20 0.42 2 3.51 1.10 0.48 3 3.37 7.50 0.54 4 3.18 1.60 0.47 features/total sulfur dioxide features/volatile acidity quality 0 197.0 0.32 5 1 92.0 0.27 6 2 51.0 0.43 4 3 218.0 0.31 5 4 148.0 0.28 5 ['features/alcohol' 'features/chlorides' 'features/citric acid' 'features/density' 'features/fixed acidity' 'features/free sulfur dioxide' 'features/pH' 'features/residual sugar' 'features/sulphates' 'features/total sulfur dioxide' 'features/volatile acidity' 'quality']
#import pandas_profiling
#df.profile_report()
#import sweetviz as sv
#sweet_report = sv.analyze(df)
#sweet_report.show_notebook(layout='vertical', w=880, h=1000,scale=0.8)
y = df.pop('quality')
X = df
print(y.shape)
print(X.shape)
(4898,) (4898, 11)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
print(); print("Training Dataset:")
print(); print(X_train.shape)
print(); print(X_train.head())
print(); print(y_train.shape)
print(); print(y_train.head())
print(); print("\n\nTesting Dataset:")
print(); print(X_test.shape)
print(); print(X_test.head())
print(); print(y_test.shape)
print(); print(y_test.head())
Training Dataset: (3281, 11) features/alcohol features/chlorides features/citric acid \ 3167 12.50 0.030 0.43 324 9.75 0.069 0.47 4731 9.10 0.050 0.30 2305 9.50 0.039 0.47 1777 12.10 0.036 0.28 features/density features/fixed acidity features/free sulfur dioxide \ 3167 0.99164 6.8 30.0 324 0.99391 5.7 35.0 4731 0.99652 7.2 40.0 2305 0.99590 9.5 21.0 1777 0.99206 6.6 10.0 features/pH features/residual sugar features/sulphates \ 3167 3.08 7.6 0.59 324 3.11 6.3 0.46 4731 3.15 8.1 0.49 2305 2.90 1.3 0.64 1777 3.07 9.2 0.35 features/total sulfur dioxide features/volatile acidity 3167 110.0 0.28 324 182.0 0.24 4731 188.0 0.30 2305 123.0 0.21 1777 92.0 0.39 (3281,) 3167 8 324 5 4731 6 2305 5 1777 6 Name: quality, dtype: int64 Testing Dataset: (1617, 11) features/alcohol features/chlorides features/citric acid \ 4656 10.7 0.048 0.28 3659 8.7 0.122 0.99 907 9.4 0.042 0.72 4352 9.5 0.054 0.26 3271 13.0 0.021 0.29 features/density features/fixed acidity features/free sulfur dioxide \ 4656 0.99556 6.8 54.0 3659 0.99360 6.6 45.0 907 0.99990 8.0 62.0 4352 0.99538 6.4 47.0 3271 0.99026 7.5 38.0 features/pH features/residual sugar features/sulphates \ 4656 3.19 12.600000 0.37 3659 3.09 1.200000 0.31 907 2.92 17.549999 0.68 4352 3.12 8.200000 0.50 3271 3.08 4.900000 0.48 features/total sulfur dioxide features/volatile acidity 4656 136.0 0.20 3659 129.0 0.19 907 233.0 0.66 4352 182.0 0.24 3271 113.0 0.38 (1617,) 4656 6 3659 6 907 4 4352 5 3271 7 Name: quality, dtype: int64
print(y_train.dtype)
print(y_test.dtype)
int64 int64
y_train = y_train.astype('str')
y_test = y_test.astype('str')
print(y_train.dtype)
print(y_test.dtype)
object object
# It tries 10 different models.
clf = ak.StructuredDataClassifier(overwrite=True, max_trials=10)
# Feed the structured data classifier with training data.
clf.fit(X_train, y_train, validation_split=0.20, epochs=50, batch_size=32, verbose=1)
Trial 10 Complete [00h 00m 13s] val_accuracy: 0.553600013256073 Best val_accuracy So Far: 0.553600013256073 Total elapsed time: 00h 01m 37s Epoch 1/50 103/103 [==============================] - 1s 1ms/step - loss: 1.8509 - accuracy: 0.2509 Epoch 2/50 103/103 [==============================] - 0s 1ms/step - loss: 1.2746 - accuracy: 0.4719 Epoch 3/50 103/103 [==============================] - 0s 1ms/step - loss: 1.2128 - accuracy: 0.4871 Epoch 4/50 103/103 [==============================] - 0s 1ms/step - loss: 1.1861 - accuracy: 0.4700 Epoch 5/50 103/103 [==============================] - 0s 1ms/step - loss: 1.1632 - accuracy: 0.4973 Epoch 6/50 103/103 [==============================] - 0s 1ms/step - loss: 1.1536 - accuracy: 0.5011 Epoch 7/50 103/103 [==============================] - 0s 1ms/step - loss: 1.1356 - accuracy: 0.4951 Epoch 8/50 103/103 [==============================] - 0s 1ms/step - loss: 1.1318 - accuracy: 0.4950 Epoch 9/50 103/103 [==============================] - 0s 1ms/step - loss: 1.1215 - accuracy: 0.5002 Epoch 10/50 103/103 [==============================] - 0s 1ms/step - loss: 1.1160 - accuracy: 0.5070 Epoch 11/50 103/103 [==============================] - 0s 1ms/step - loss: 1.1119 - accuracy: 0.5140 Epoch 12/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0918 - accuracy: 0.5084 Epoch 13/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0958 - accuracy: 0.5184 Epoch 14/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0868 - accuracy: 0.5143 Epoch 15/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0791 - accuracy: 0.5249 Epoch 16/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0847 - accuracy: 0.5323 Epoch 17/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0681 - accuracy: 0.5290 Epoch 18/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0656 - accuracy: 0.5385 Epoch 19/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0605 - accuracy: 0.5324 Epoch 20/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0601 - accuracy: 0.5245 Epoch 21/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0580 - accuracy: 0.5306 Epoch 22/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0575 - accuracy: 0.5351 Epoch 23/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0496 - accuracy: 0.5429 Epoch 24/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0412 - accuracy: 0.5297 Epoch 25/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0386 - accuracy: 0.5517 Epoch 26/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0361 - accuracy: 0.5411 Epoch 27/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0350 - accuracy: 0.5617 Epoch 28/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0331 - accuracy: 0.5599 Epoch 29/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0289 - accuracy: 0.5561 Epoch 30/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0296 - accuracy: 0.5480 Epoch 31/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0254 - accuracy: 0.5473 Epoch 32/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0232 - accuracy: 0.5487 Epoch 33/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0121 - accuracy: 0.5571 Epoch 34/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0188 - accuracy: 0.5425 Epoch 35/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0132 - accuracy: 0.5652 Epoch 36/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0169 - accuracy: 0.5537 Epoch 37/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0123 - accuracy: 0.5548 Epoch 38/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0189 - accuracy: 0.5583 Epoch 39/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0151 - accuracy: 0.5540 Epoch 40/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0102 - accuracy: 0.5642 Epoch 41/50 103/103 [==============================] - 0s 1ms/step - loss: 0.9941 - accuracy: 0.5637 Epoch 42/50 103/103 [==============================] - 0s 1ms/step - loss: 0.9983 - accuracy: 0.5735 Epoch 43/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0053 - accuracy: 0.5624 Epoch 44/50 103/103 [==============================] - 0s 1ms/step - loss: 1.0060 - accuracy: 0.5546 Epoch 45/50 103/103 [==============================] - 0s 1ms/step - loss: 0.9978 - accuracy: 0.5671 Epoch 46/50 103/103 [==============================] - 0s 1ms/step - loss: 0.9907 - accuracy: 0.5810 Epoch 47/50 103/103 [==============================] - 0s 1ms/step - loss: 0.9902 - accuracy: 0.5669 Epoch 48/50 103/103 [==============================] - 0s 1ms/step - loss: 0.9789 - accuracy: 0.5705 Epoch 49/50 103/103 [==============================] - 0s 1ms/step - loss: 0.9830 - accuracy: 0.5704 Epoch 50/50 103/103 [==============================] - 0s 1ms/step - loss: 0.9828 - accuracy: 0.5825
<tensorflow.python.keras.callbacks.History at 0x7f007ff510d0>
# Evaluate the best model with testing data.
print(); print();
print(clf.evaluate(X_test, y_test, verbose=0))
# Evaluate the model on the test data using `evaluate`
print()
print("Evaluate on test data")
results = clf.evaluate(X_test, y_test, batch_size=32, verbose=0)
print("test loss, test acc: \n", results)
[1.1242220401763916, 0.5275201201438904] Evaluate on test data test loss, test acc: [1.1242220401763916, 0.5275201201438904]
import scikitplot as skplt
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import cohen_kappa_score, confusion_matrix
# Predict with the best model.
predicted_y = clf.predict(X_test, verbose=0)
# Evaluate the skill of the Trained model
acc = accuracy_score(y_test, predicted_y)
classReport = classification_report(y_test, predicted_y)
confMatrix = confusion_matrix(y_test, predicted_y)
print(); print('Testing Results of the trained model: ')
print(); print('Accuracy : ', acc)
print(); print('Confusion Matrix :\n', confMatrix)
print(); print('Classification Report :\n',classReport)
# Confusion matrix
skplt.metrics.plot_confusion_matrix(y_test, predicted_y,figsize=(7,7)); plt.show()
Testing Results of the trained model: Accuracy : 0.5275200989486704 Confusion Matrix : [[ 0 2 2 3 0 0 0] [ 0 1 36 12 1 0 0] [ 0 4 282 200 4 0 0] [ 0 2 179 487 38 0 0] [ 0 0 23 180 83 0 0] [ 0 0 7 59 9 0 0] [ 0 0 0 1 2 0 0]] Classification Report : precision recall f1-score support 3 0.00 0.00 0.00 7 4 0.11 0.02 0.03 50 5 0.53 0.58 0.55 490 6 0.52 0.69 0.59 706 7 0.61 0.29 0.39 286 8 0.00 0.00 0.00 75 9 0.00 0.00 0.00 3 accuracy 0.53 1617 macro avg 0.25 0.23 0.22 1617 weighted avg 0.50 0.53 0.50 1617
model = clf.export_model()
model.summary()
Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 11)] 0 _________________________________________________________________ multi_category_encoding (Mul (None, 11) 0 _________________________________________________________________ normalization (Normalization (None, 11) 23 _________________________________________________________________ dense (Dense) (None, 32) 384 _________________________________________________________________ re_lu (ReLU) (None, 32) 0 _________________________________________________________________ dense_1 (Dense) (None, 32) 1056 _________________________________________________________________ re_lu_1 (ReLU) (None, 32) 0 _________________________________________________________________ dropout (Dropout) (None, 32) 0 _________________________________________________________________ dense_2 (Dense) (None, 7) 231 _________________________________________________________________ classification_head_1 (Softm (None, 7) 0 ================================================================= Total params: 1,694 Trainable params: 1,671 Non-trainable params: 23 _________________________________________________________________
from tensorflow.keras.utils import plot_model
plot_model(model)
print(type(model))
try:
model.save("best_keras_model", save_format="tf")
except Exception:
model.save("best_keras_model.h5")
<class 'tensorflow.python.keras.engine.functional.Functional'>
model = tf.keras.models.load_model('best_keras_model', custom_objects=ak.CUSTOM_OBJECTS)
# Predict with the best model
predicted_probablity = model.predict(X_test, verbose=1)
print(); print(predicted_probablity)
51/51 [==============================] - 0s 689us/step [[3.1682735e-04 5.8419011e-03 1.6993020e-01 ... 8.6702302e-02 3.6141742e-02 3.1056977e-04] [1.1442063e-03 2.5611144e-02 2.4990357e-01 ... 2.0381109e-01 2.6276793e-02 8.9784770e-04] [2.7582890e-04 4.5949418e-02 9.1155684e-01 ... 6.2132633e-04 2.9407036e-06 1.0756439e-09] ... [4.9888232e-04 7.0865517e-03 1.3117082e-01 ... 3.2787517e-01 1.4668092e-02 1.7885838e-04] [5.1948999e-04 4.7347620e-02 5.7962453e-01 ... 4.4063956e-02 1.6590484e-03 7.5194264e-05] [5.1107685e-07 1.2151637e-03 4.4980764e-01 ... 1.3518638e-02 1.0267833e-04 5.0248076e-08]]
In this coding recipe, we discussed how to build a Classification Model in Python using AutoKeras.
Specifically, we have learned the followings: