# Suppress warnings in Jupyter Notebooks
import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_datasets as tfds
import autokeras as ak

print(ak.__version__)

import logging
tf.get_logger().setLevel(logging.ERROR)

1.0.16


ds, info = tfds.load('wine_quality', split = 'train', shuffle_files=True, with_info=True)
df = tfds.as_dataframe(ds)


print(); print(df.shape)
print(); print(df.head())
print(); print(df.columns.values)

(4898, 12)

   features/alcohol  features/chlorides  features/citric acid  \
0               9.0               0.054                  0.34   
1              12.2               0.063                  0.49   
2              11.2               0.029                  0.11   
3              10.3               0.055                  0.39   
4              10.7               0.054                  0.35   

   features/density  features/fixed acidity  features/free sulfur dioxide  \
0           1.00080                     7.6                          44.0   
1           0.99110                     6.3                          35.0   
2           0.99076                     5.3                           6.0   
3           0.99652                     7.0                          42.0   
4           0.99178                     7.3                          31.0   

   features/pH  features/residual sugar  features/sulphates  \
0         3.22                    18.35                0.55   
1         3.38                     1.20                0.42   
2         3.51                     1.10                0.48   
3         3.37                     7.50                0.54   
4         3.18                     1.60                0.47   

   features/total sulfur dioxide  features/volatile acidity  quality  
0                          197.0                       0.32        5  
1                           92.0                       0.27        6  
2                           51.0                       0.43        4  
3                          218.0                       0.31        5  
4                          148.0                       0.28        5  

['features/alcohol' 'features/chlorides' 'features/citric acid'
 'features/density' 'features/fixed acidity'
 'features/free sulfur dioxide' 'features/pH' 'features/residual sugar'
 'features/sulphates' 'features/total sulfur dioxide'
 'features/volatile acidity' 'quality']


#import pandas_profiling
#df.profile_report()


#import sweetviz as sv
#sweet_report = sv.analyze(df)
#sweet_report.show_notebook(layout='vertical', w=880, h=1000,scale=0.8)


y = df.pop('quality')
X = df


print(y.shape)
print(X.shape)

(4898,)
(4898, 11)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


print(); print("Training Dataset:")
print(); print(X_train.shape)
print(); print(X_train.head())
print(); print(y_train.shape)
print(); print(y_train.head())

print(); print("\n\nTesting Dataset:")
print(); print(X_test.shape)
print(); print(X_test.head())
print(); print(y_test.shape)
print(); print(y_test.head())

Training Dataset:

(3281, 11)

      features/alcohol  features/chlorides  features/citric acid  \
3167             12.50               0.030                  0.43   
324               9.75               0.069                  0.47   
4731              9.10               0.050                  0.30   
2305              9.50               0.039                  0.47   
1777             12.10               0.036                  0.28   

      features/density  features/fixed acidity  features/free sulfur dioxide  \
3167           0.99164                     6.8                          30.0   
324            0.99391                     5.7                          35.0   
4731           0.99652                     7.2                          40.0   
2305           0.99590                     9.5                          21.0   
1777           0.99206                     6.6                          10.0   

      features/pH  features/residual sugar  features/sulphates  \
3167         3.08                      7.6                0.59   
324          3.11                      6.3                0.46   
4731         3.15                      8.1                0.49   
2305         2.90                      1.3                0.64   
1777         3.07                      9.2                0.35   

      features/total sulfur dioxide  features/volatile acidity  
3167                          110.0                       0.28  
324                           182.0                       0.24  
4731                          188.0                       0.30  
2305                          123.0                       0.21  
1777                           92.0                       0.39  

(3281,)

3167    8
324     5
4731    6
2305    5
1777    6
Name: quality, dtype: int64


Testing Dataset:

(1617, 11)

      features/alcohol  features/chlorides  features/citric acid  \
4656              10.7               0.048                  0.28   
3659               8.7               0.122                  0.99   
907                9.4               0.042                  0.72   
4352               9.5               0.054                  0.26   
3271              13.0               0.021                  0.29   

      features/density  features/fixed acidity  features/free sulfur dioxide  \
4656           0.99556                     6.8                          54.0   
3659           0.99360                     6.6                          45.0   
907            0.99990                     8.0                          62.0   
4352           0.99538                     6.4                          47.0   
3271           0.99026                     7.5                          38.0   

      features/pH  features/residual sugar  features/sulphates  \
4656         3.19                12.600000                0.37   
3659         3.09                 1.200000                0.31   
907          2.92                17.549999                0.68   
4352         3.12                 8.200000                0.50   
3271         3.08                 4.900000                0.48   

      features/total sulfur dioxide  features/volatile acidity  
4656                          136.0                       0.20  
3659                          129.0                       0.19  
907                           233.0                       0.66  
4352                          182.0                       0.24  
3271                          113.0                       0.38  

(1617,)

4656    6
3659    6
907     4
4352    5
3271    7
Name: quality, dtype: int64


print(y_train.dtype)
print(y_test.dtype)

int64
int64


y_train = y_train.astype('str')
y_test  = y_test.astype('str')


print(y_train.dtype)
print(y_test.dtype)

object
object


# It tries 10 different models.
clf = ak.StructuredDataClassifier(overwrite=True, max_trials=10)

# Feed the structured data classifier with training data.
clf.fit(X_train, y_train, validation_split=0.20, epochs=50, batch_size=32, verbose=1)

Trial 10 Complete [00h 00m 13s]
val_accuracy: 0.553600013256073

Best val_accuracy So Far: 0.553600013256073
Total elapsed time: 00h 01m 37s
Epoch 1/50
103/103 [==============================] - 1s 1ms/step - loss: 1.8509 - accuracy: 0.2509
Epoch 2/50
103/103 [==============================] - 0s 1ms/step - loss: 1.2746 - accuracy: 0.4719
Epoch 3/50
103/103 [==============================] - 0s 1ms/step - loss: 1.2128 - accuracy: 0.4871
Epoch 4/50
103/103 [==============================] - 0s 1ms/step - loss: 1.1861 - accuracy: 0.4700
Epoch 5/50
103/103 [==============================] - 0s 1ms/step - loss: 1.1632 - accuracy: 0.4973
Epoch 6/50
103/103 [==============================] - 0s 1ms/step - loss: 1.1536 - accuracy: 0.5011
Epoch 7/50
103/103 [==============================] - 0s 1ms/step - loss: 1.1356 - accuracy: 0.4951
Epoch 8/50
103/103 [==============================] - 0s 1ms/step - loss: 1.1318 - accuracy: 0.4950
Epoch 9/50
103/103 [==============================] - 0s 1ms/step - loss: 1.1215 - accuracy: 0.5002
Epoch 10/50
103/103 [==============================] - 0s 1ms/step - loss: 1.1160 - accuracy: 0.5070
Epoch 11/50
103/103 [==============================] - 0s 1ms/step - loss: 1.1119 - accuracy: 0.5140
Epoch 12/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0918 - accuracy: 0.5084
Epoch 13/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0958 - accuracy: 0.5184
Epoch 14/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0868 - accuracy: 0.5143
Epoch 15/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0791 - accuracy: 0.5249
Epoch 16/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0847 - accuracy: 0.5323
Epoch 17/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0681 - accuracy: 0.5290
Epoch 18/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0656 - accuracy: 0.5385
Epoch 19/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0605 - accuracy: 0.5324
Epoch 20/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0601 - accuracy: 0.5245
Epoch 21/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0580 - accuracy: 0.5306
Epoch 22/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0575 - accuracy: 0.5351
Epoch 23/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0496 - accuracy: 0.5429
Epoch 24/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0412 - accuracy: 0.5297
Epoch 25/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0386 - accuracy: 0.5517
Epoch 26/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0361 - accuracy: 0.5411
Epoch 27/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0350 - accuracy: 0.5617
Epoch 28/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0331 - accuracy: 0.5599
Epoch 29/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0289 - accuracy: 0.5561
Epoch 30/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0296 - accuracy: 0.5480
Epoch 31/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0254 - accuracy: 0.5473
Epoch 32/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0232 - accuracy: 0.5487
Epoch 33/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0121 - accuracy: 0.5571
Epoch 34/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0188 - accuracy: 0.5425
Epoch 35/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0132 - accuracy: 0.5652
Epoch 36/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0169 - accuracy: 0.5537
Epoch 37/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0123 - accuracy: 0.5548
Epoch 38/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0189 - accuracy: 0.5583
Epoch 39/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0151 - accuracy: 0.5540
Epoch 40/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0102 - accuracy: 0.5642
Epoch 41/50
103/103 [==============================] - 0s 1ms/step - loss: 0.9941 - accuracy: 0.5637
Epoch 42/50
103/103 [==============================] - 0s 1ms/step - loss: 0.9983 - accuracy: 0.5735
Epoch 43/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0053 - accuracy: 0.5624
Epoch 44/50
103/103 [==============================] - 0s 1ms/step - loss: 1.0060 - accuracy: 0.5546
Epoch 45/50
103/103 [==============================] - 0s 1ms/step - loss: 0.9978 - accuracy: 0.5671
Epoch 46/50
103/103 [==============================] - 0s 1ms/step - loss: 0.9907 - accuracy: 0.5810
Epoch 47/50
103/103 [==============================] - 0s 1ms/step - loss: 0.9902 - accuracy: 0.5669
Epoch 48/50
103/103 [==============================] - 0s 1ms/step - loss: 0.9789 - accuracy: 0.5705
Epoch 49/50
103/103 [==============================] - 0s 1ms/step - loss: 0.9830 - accuracy: 0.5704
Epoch 50/50
103/103 [==============================] - 0s 1ms/step - loss: 0.9828 - accuracy: 0.5825

<tensorflow.python.keras.callbacks.History at 0x7f007ff510d0>


# Evaluate the best model with testing data.
print(); print();
print(clf.evaluate(X_test, y_test, verbose=0))

# Evaluate the model on the test data using `evaluate`
print()
print("Evaluate on test data")
results = clf.evaluate(X_test, y_test, batch_size=32, verbose=0)
print("test loss,     test acc: \n", results)


[1.1242220401763916, 0.5275201201438904]

Evaluate on test data
test loss,     test acc: 
 [1.1242220401763916, 0.5275201201438904]


import scikitplot as skplt
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import cohen_kappa_score, confusion_matrix

# Predict with the best model.
predicted_y = clf.predict(X_test, verbose=0)

# Evaluate the skill of the Trained model
acc                 = accuracy_score(y_test, predicted_y)
classReport         = classification_report(y_test, predicted_y)
confMatrix          = confusion_matrix(y_test, predicted_y) 
        
print(); print('Testing Results of the trained model: ')
print(); print('Accuracy : ', acc)
print(); print('Confusion Matrix :\n', confMatrix)
print(); print('Classification Report :\n',classReport)

# Confusion matrix
skplt.metrics.plot_confusion_matrix(y_test, predicted_y,figsize=(7,7)); plt.show()

Testing Results of the trained model: 

Accuracy :  0.5275200989486704

Confusion Matrix :
 [[  0   2   2   3   0   0   0]
 [  0   1  36  12   1   0   0]
 [  0   4 282 200   4   0   0]
 [  0   2 179 487  38   0   0]
 [  0   0  23 180  83   0   0]
 [  0   0   7  59   9   0   0]
 [  0   0   0   1   2   0   0]]

Classification Report :
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         7
           4       0.11      0.02      0.03        50
           5       0.53      0.58      0.55       490
           6       0.52      0.69      0.59       706
           7       0.61      0.29      0.39       286
           8       0.00      0.00      0.00        75
           9       0.00      0.00      0.00         3

    accuracy                           0.53      1617
   macro avg       0.25      0.23      0.22      1617
weighted avg       0.50      0.53      0.50      1617


model = clf.export_model()


model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 11)]              0         
_________________________________________________________________
multi_category_encoding (Mul (None, 11)                0         
_________________________________________________________________
normalization (Normalization (None, 11)                23        
_________________________________________________________________
dense (Dense)                (None, 32)                384       
_________________________________________________________________
re_lu (ReLU)                 (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1056      
_________________________________________________________________
re_lu_1 (ReLU)               (None, 32)                0         
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 7)                 231       
_________________________________________________________________
classification_head_1 (Softm (None, 7)                 0         
=================================================================
Total params: 1,694
Trainable params: 1,671
Non-trainable params: 23
_________________________________________________________________


from tensorflow.keras.utils import plot_model
plot_model(model)


print(type(model))  

try:
    model.save("best_keras_model", save_format="tf")
except Exception:
    model.save("best_keras_model.h5")

<class 'tensorflow.python.keras.engine.functional.Functional'>


model = tf.keras.models.load_model('best_keras_model', custom_objects=ak.CUSTOM_OBJECTS)


# Predict with the best model
predicted_probablity = model.predict(X_test, verbose=1)
print(); print(predicted_probablity)

51/51 [==============================] - 0s 689us/step

[[3.1682735e-04 5.8419011e-03 1.6993020e-01 ... 8.6702302e-02
  3.6141742e-02 3.1056977e-04]
 [1.1442063e-03 2.5611144e-02 2.4990357e-01 ... 2.0381109e-01
  2.6276793e-02 8.9784770e-04]
 [2.7582890e-04 4.5949418e-02 9.1155684e-01 ... 6.2132633e-04
  2.9407036e-06 1.0756439e-09]
 ...
 [4.9888232e-04 7.0865517e-03 1.3117082e-01 ... 3.2787517e-01
  1.4668092e-02 1.7885838e-04]
 [5.1948999e-04 4.7347620e-02 5.7962453e-01 ... 4.4063956e-02
  1.6590484e-03 7.5194264e-05]
 [5.1107685e-07 1.2151637e-03 4.4980764e-01 ... 1.3518638e-02
  1.0267833e-04 5.0248076e-08]]

Application of AutoKeras for Deep Learning: Notebook in Python for Beginners to Professionals¶

AutoKeras Project – A Guide to build a wine quality classification model in Python¶

AutoKeras for Beginners - A Guide to build a wine quality classification model in Python¶

Python Codes¶

Load Dataset from Tensorflow built-in datasets¶

Profiling dataset and EDA with visualisation¶

Train and Test data split¶

Training of AutoKeras Model for Structre Dataset (Tabular dataset)¶

Performance of the AutoKeras Model¶

Model Architecture¶

Save model¶

Load model¶

Prediction using the model¶

Summary¶