# Suppress warnings in Jupyter Notebooks
import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
#plt.style.use('fivethirtyeight')
#plt.style.use('ggplot')

import numpy as np
import pandas as pd


# OpenML Dataset ID
whichDataset = 6 # provide dataset id

import openml
from openml.datasets import get_dataset

dataset = openml.datasets.get_dataset(whichDataset)

# Print a summary
print(
    f"This is dataset '{dataset.name}', the target feature is "
    f"'{dataset.default_target_attribute}'" 
     )
print(f"URL: {dataset.url}")
print(dataset.description)

This is dataset 'letter', the target feature is 'class'
URL: https://www.openml.org/data/v1/download/6/letter.arff
**Author**: David J. Slate  
**Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/Letter+Recognition) - 01-01-1991  
**Please cite**: P. W. Frey and D. J. Slate. "Letter Recognition Using Holland-style Adaptive Classifiers". Machine Learning 6(2), 1991  

1. TITLE: 
  Letter Image Recognition Data 
 
    The objective is to identify each of a large number of black-and-white
    rectangular pixel displays as one of the 26 capital letters in the English
    alphabet.  The character images were based on 20 different fonts and each
    letter within these 20 fonts was randomly distorted to produce a file of
    20,000 unique stimuli.  Each stimulus was converted into 16 primitive
    numerical attributes (statistical moments and edge counts) which were then
    scaled to fit into a range of integer values from 0 through 15.  We
    typically train on the first 16000 items and then use the resulting model
    to predict the letter category for the remaining 4000.  See the article
    cited above for more details.


X, y, categorical_indicator, attribute_names = dataset.get_data(
    dataset_format="array", target=dataset.default_target_attribute)

dataset = pd.DataFrame(X, columns=attribute_names)
dataset["target"] = y


print(); print(dataset.shape)
print(); print(dataset.head())
print(); print(dataset.columns.values)

(20000, 17)

   x-box  y-box  width  high  onpix  x-bar  y-bar  x2bar  y2bar  xybar  x2ybr  \
0    2.0    4.0    4.0   3.0    2.0    7.0    8.0    2.0    9.0   11.0    7.0   
1    4.0    7.0    5.0   5.0    5.0    5.0    9.0    6.0    4.0    8.0    7.0   
2    7.0   10.0    8.0   7.0    4.0    8.0    8.0    5.0   10.0   11.0    2.0   
3    4.0    9.0    5.0   7.0    4.0    7.0    7.0   13.0    1.0    7.0    6.0   
4    6.0    7.0    8.0   5.0    4.0    7.0    6.0    3.0    7.0   10.0    7.0   

   xy2br  x-ege  xegvy  y-ege  yegvx  target  
0    7.0    1.0    8.0    5.0    6.0      25  
1    9.0    2.0    9.0    7.0   10.0      15  
2    8.0    2.0    5.0    5.0   10.0      18  
3    8.0    3.0    8.0    0.0    8.0       7  
4    9.0    3.0    8.0    3.0    7.0       7  

['x-box' 'y-box' 'width' 'high' 'onpix' 'x-bar' 'y-bar' 'x2bar' 'y2bar'
 'xybar' 'x2ybr' 'xy2br' 'x-ege' 'xegvy' 'y-ege' 'yegvx' 'target']


# find missing values in data frame
print()
print(dataset.isnull().sum().sum())

# group by 'target'
#print()
#print(dataset.groupby('target').count())

0


# training and test data split
data = dataset.sample(frac=0.75, random_state=1234)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (15000, 17)
Unseen Data For Predictions: (5000, 17)


import pandas_profiling
#dataset.profile_report()


#import sweetviz as sv
#sweet_report = sv.analyze(df)
#sweet_report.show_notebook(layout='vertical', w=880, h=1000,scale=0.8)


import h2o
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.14" 2022-01-18; OpenJDK Runtime Environment (build 11.0.14+9-post-Debian-1deb10u1); OpenJDK 64-Bit Server VM (build 11.0.14+9-post-Debian-1deb10u1, mixed mode, sharing)
  Starting server from /opt/conda/lib/python3.7/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmptgsgs760
  JVM stdout: /tmp/tmptgsgs760/h2o_jupyter_started_from_python.out
  JVM stderr: /tmp/tmptgsgs760/h2o_jupyter_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.
Warning: Your H2O cluster version is too old (6 months and 9 days)!Please download and install the latest version from http://h2o.ai/download/


# Convert to h2o dataframe
hf = h2o.H2OFrame(data)

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


hf.head()


# Change the column type to a factor:
hf['target'] = hf['target'].asfactor()


# Data Transform - Split train : test datasets
train, valid = hf.split_frame(ratios = [.90], seed = 1234)
print("Training Dataset", train.shape)
print("Validation Dataset", valid.shape)

Training Dataset (13498, 17)
Validation Dataset (1502, 17)


train.head(5)


valid.head(5)


# Identify predictors and response
featureColumns = train.columns
targetColumn   = "target"
featureColumns.remove(targetColumn)


print("Feature Columns : ");     print(featureColumns)
print("\n\nTarget Column : ");   print(targetColumn)

Feature Columns : 
['x-box', 'y-box', 'width', 'high', 'onpix', 'x-bar', 'y-bar', 'x2bar', 'y2bar', 'xybar', 'x2ybr', 'xy2br', 'x-ege', 'xegvy', 'y-ege', 'yegvx']


Target Column : 
target


import time
from h2o.automl import H2OAutoML

localtime = time.asctime( time.localtime(time.time()) )
print("Local current time :", localtime)
print()
        
# Run AutoML for YY base models (limited to 1 hour max runtime by default)
aml = H2OAutoML(max_models=10, seed=1234, exclude_algos = ["StackedEnsemble"],
                #sort_metric = 'rmse' 
                #balance_classes = True, 
                sort_metric = 'AUC'
               )

aml.train(x=featureColumns, y=targetColumn, training_frame = train, validation_frame = valid)

localtime = time.asctime( time.localtime(time.time()) )
print()
print("Local current time :", localtime)

Local current time : Thu Mar 24 01:24:08 2022

AutoML progress: |
01:24:08.180: User specified a validation frame with cross-validation still enabled. Please note that the models will still be validated using cross-validation only, the validation frame will be used to provide purely informative validation metrics on the trained models.

███████████████████████████████████████████████████████████████| (done) 100%

Local current time : Thu Mar 24 02:09:32 2022


lb = aml.leaderboard
print(lb.head(rows = lb.nrows))

# Explain an AutoML object i.e. explain all models
#exa = aml.explain(valid)

# *****************************
# save all models +++++++++++++
# -----------------------------
#model_ids = list(lb['model_id'].as_data_frame().iloc[:,0])
#for m_id in model_ids:
#    mdl = h2o.get_model(m_id)
#    h2o.save_model(model=mdl, path=os.getcwd(), force=True)
#h2o.export_file(lb, os.path.join(os.getcwd(), 'aml_leaderboard.h2o'), force=True)


# Evaluate the best model with testing data.
model = aml.leader


# For Classification
import scikitplot as skplt
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import cohen_kappa_score, confusion_matrix

# Predict with the best model.
predicted_y = model.predict(valid[featureColumns])

predicted_data = predicted_y.as_data_frame()
valid_dataset = valid.as_data_frame()

# Evaluate the skill of the Trained model
acc                 = accuracy_score(valid_dataset[targetColumn], predicted_data['predict'])
classReport         = classification_report(valid_dataset[targetColumn], predicted_data['predict'])
confMatrix          = confusion_matrix(valid_dataset[targetColumn], predicted_data['predict']) 
        
print(); print('Testing Results of the trained model: ')
print(); print('Accuracy : ', acc)
#print(); print('Confusion Matrix :\n', confMatrix)
print(); print('Classification Report :\n',classReport)

# Confusion matrix
skplt.metrics.plot_confusion_matrix(valid_dataset[targetColumn], predicted_data['predict'], figsize=(12,12)); plt.show()

xgboost prediction progress: |███████████████████████████████████████████████████| (done) 100%

Testing Results of the trained model: 

Accuracy :  0.9500665778961385

Classification Report :
               precision    recall  f1-score   support

           0       0.97      0.98      0.97        58
           1       0.89      0.94      0.92        52
           2       0.96      0.94      0.95        69
           3       0.95      0.97      0.96        65
           4       0.89      0.98      0.93        55
           5       0.96      0.93      0.95        57
           6       0.96      0.88      0.92        52
           7       0.89      0.89      0.89        54
           8       0.99      0.99      0.99        71
           9       0.98      0.91      0.94        56
          10       0.89      0.93      0.91        67
          11       1.00      0.98      0.99        43
          12       0.96      0.96      0.96        74
          13       0.96      0.98      0.97        51
          14       0.90      0.93      0.92        60
          15       0.94      0.96      0.95        50
          16       0.95      0.93      0.94        58
          17       0.92      0.91      0.92        67
          18       0.95      0.92      0.94        65
          19       0.96      1.00      0.98        46
          20       0.98      0.95      0.96        56
          21       1.00      0.98      0.99        46
          22       0.97      0.95      0.96        61
          23       0.95      1.00      0.97        58
          24       0.98      0.96      0.97        55
          25       1.00      0.96      0.98        56

    accuracy                           0.95      1502
   macro avg       0.95      0.95      0.95      1502
weighted avg       0.95      0.95      0.95      1502


print(); print();
#pd.set_option("display.max_rows", None, "display.max_columns", None)
VI = model.varimp(use_pandas=True)
print(VI[['variable', 'percentage']].head(25))

print()
#pd.set_option("display.max_rows", None, "display.max_columns", None)
VI = model.varimp(use_pandas=True)
VI = VI[['variable', 'percentage']]
VI.head(25).plot(x = 'variable', y = 'percentage', kind = 'barh', figsize = (14,16), grid = True,
                 title = "Variable Importance: H2O model")
plt.gca().invert_yaxis(); plt.show()


   variable  percentage
0     x-ege    0.131625
1     xy2br    0.106082
2     y-ege    0.100406
3     y2bar    0.099933
4     x2ybr    0.089981
5     x2bar    0.086003
6     xegvy    0.081352
7     xybar    0.064461
8     y-bar    0.062993
9     yegvx    0.052287
10    x-bar    0.044058
11    onpix    0.019278
12    y-box    0.016757
13     high    0.016441
14    x-box    0.015953
15    width    0.012389


# Explain a model
#exm = model.explain(valid)


# Model Hyperparameters Used in
model.params.keys()

dict_keys(['model_id', 'training_frame', 'validation_frame', 'nfolds', 'keep_cross_validation_models', 'keep_cross_validation_predictions', 'keep_cross_validation_fold_assignment', 'score_each_iteration', 'fold_assignment', 'fold_column', 'response_column', 'ignored_columns', 'ignore_const_cols', 'offset_column', 'weights_column', 'stopping_rounds', 'stopping_metric', 'stopping_tolerance', 'max_runtime_secs', 'seed', 'distribution', 'tweedie_power', 'categorical_encoding', 'quiet_mode', 'checkpoint', 'export_checkpoints_dir', 'ntrees', 'max_depth', 'min_rows', 'min_child_weight', 'learn_rate', 'eta', 'sample_rate', 'subsample', 'col_sample_rate', 'colsample_bylevel', 'col_sample_rate_per_tree', 'colsample_bytree', 'colsample_bynode', 'max_abs_leafnode_pred', 'max_delta_step', 'monotone_constraints', 'interaction_constraints', 'score_tree_interval', 'min_split_improvement', 'gamma', 'nthread', 'save_matrix_directory', 'build_tree_one_node', 'calibrate_model', 'calibration_frame', 'max_bins', 'max_leaves', 'sample_type', 'normalize_type', 'rate_drop', 'one_drop', 'skip_drop', 'tree_method', 'grow_policy', 'booster', 'reg_lambda', 'reg_alpha', 'dmatrix_type', 'backend', 'gpu_id', 'gainslift_bins', 'auc_type', 'scale_pos_weight'])


# Model Hyperparameter value 
model.params

{'model_id': {'default': None,
  'actual': {'__meta': {'schema_version': 3,
    'schema_name': 'ModelKeyV3',
    'schema_type': 'Key<Model>'},
   'name': 'XGBoost_1_AutoML_1_20220324_12408',
   'type': 'Key<Model>',
   'URL': '/3/Models/XGBoost_1_AutoML_1_20220324_12408'},
  'input': None},
 'training_frame': {'default': None,
  'actual': {'__meta': {'schema_version': 3,
    'schema_name': 'FrameKeyV3',
    'schema_type': 'Key<Frame>'},
   'name': 'AutoML_1_20220324_12408_training_py_4_sid_86bc',
   'type': 'Key<Frame>',
   'URL': '/3/Frames/AutoML_1_20220324_12408_training_py_4_sid_86bc'},
  'input': {'__meta': {'schema_version': 3,
    'schema_name': 'FrameKeyV3',
    'schema_type': 'Key<Frame>'},
   'name': 'AutoML_1_20220324_12408_training_py_4_sid_86bc',
   'type': 'Key<Frame>',
   'URL': '/3/Frames/AutoML_1_20220324_12408_training_py_4_sid_86bc'}},
 'validation_frame': {'default': None,
  'actual': {'__meta': {'schema_version': 3,
    'schema_name': 'FrameKeyV3',
    'schema_type': 'Key<Frame>'},
   'name': 'py_5_sid_86bc',
   'type': 'Key<Frame>',
   'URL': '/3/Frames/py_5_sid_86bc'},
  'input': {'__meta': {'schema_version': 3,
    'schema_name': 'FrameKeyV3',
    'schema_type': 'Key<Frame>'},
   'name': 'py_5_sid_86bc',
   'type': 'Key<Frame>',
   'URL': '/3/Frames/py_5_sid_86bc'}},
 'nfolds': {'default': 0, 'actual': 5, 'input': 5},
 'keep_cross_validation_models': {'default': True,
  'actual': False,
  'input': False},
 'keep_cross_validation_predictions': {'default': False,
  'actual': True,
  'input': True},
 'keep_cross_validation_fold_assignment': {'default': False,
  'actual': False,
  'input': False},
 'score_each_iteration': {'default': False, 'actual': False, 'input': False},
 'fold_assignment': {'default': 'AUTO', 'actual': 'Modulo', 'input': 'Modulo'},
 'fold_column': {'default': None, 'actual': None, 'input': None},
 'response_column': {'default': None,
  'actual': {'__meta': {'schema_version': 3,
    'schema_name': 'ColSpecifierV3',
    'schema_type': 'VecSpecifier'},
   'column_name': 'target',
   'is_member_of_frames': None},
  'input': {'__meta': {'schema_version': 3,
    'schema_name': 'ColSpecifierV3',
    'schema_type': 'VecSpecifier'},
   'column_name': 'target',
   'is_member_of_frames': None}},
 'ignored_columns': {'default': None, 'actual': [], 'input': []},
 'ignore_const_cols': {'default': True, 'actual': True, 'input': True},
 'offset_column': {'default': None, 'actual': None, 'input': None},
 'weights_column': {'default': None, 'actual': None, 'input': None},
 'stopping_rounds': {'default': 0, 'actual': 0, 'input': 3},
 'stopping_metric': {'default': 'AUTO',
  'actual': 'logloss',
  'input': 'logloss'},
 'stopping_tolerance': {'default': 0.001,
  'actual': 0.008607267257206742,
  'input': 0.008607267257206742},
 'max_runtime_secs': {'default': 0.0,
  'actual': 0.0,
  'input': 3074457437244227.5},
 'seed': {'default': -1, 'actual': 1234, 'input': 1234},
 'distribution': {'default': 'AUTO',
  'actual': 'multinomial',
  'input': 'multinomial'},
 'tweedie_power': {'default': 1.5, 'actual': 1.5, 'input': 1.5},
 'categorical_encoding': {'default': 'AUTO',
  'actual': 'OneHotInternal',
  'input': 'AUTO'},
 'quiet_mode': {'default': True, 'actual': True, 'input': True},
 'checkpoint': {'default': None, 'actual': None, 'input': None},
 'export_checkpoints_dir': {'default': None, 'actual': None, 'input': None},
 'ntrees': {'default': 50, 'actual': 194, 'input': 10000},
 'max_depth': {'default': 6, 'actual': 15, 'input': 15},
 'min_rows': {'default': 1.0, 'actual': 10.0, 'input': 10.0},
 'min_child_weight': {'default': 1.0, 'actual': 10.0, 'input': 1.0},
 'learn_rate': {'default': 0.3, 'actual': 0.3, 'input': 0.3},
 'eta': {'default': 0.3, 'actual': 0.3, 'input': 0.3},
 'sample_rate': {'default': 1.0, 'actual': 0.6, 'input': 0.6},
 'subsample': {'default': 1.0, 'actual': 0.6, 'input': 1.0},
 'col_sample_rate': {'default': 1.0, 'actual': 0.8, 'input': 0.8},
 'colsample_bylevel': {'default': 1.0, 'actual': 0.8, 'input': 1.0},
 'col_sample_rate_per_tree': {'default': 1.0, 'actual': 0.8, 'input': 0.8},
 'colsample_bytree': {'default': 1.0, 'actual': 0.8, 'input': 1.0},
 'colsample_bynode': {'default': 1.0, 'actual': 1.0, 'input': 1.0},
 'max_abs_leafnode_pred': {'default': 0.0, 'actual': 0.0, 'input': 0.0},
 'max_delta_step': {'default': 0.0, 'actual': 0.0, 'input': 0.0},
 'monotone_constraints': {'default': None, 'actual': None, 'input': None},
 'interaction_constraints': {'default': None, 'actual': None, 'input': None},
 'score_tree_interval': {'default': 0, 'actual': 5, 'input': 5},
 'min_split_improvement': {'default': 0.0, 'actual': 0.0, 'input': 0.0},
 'gamma': {'default': 0.0, 'actual': 0.0, 'input': 0.0},
 'nthread': {'default': -1, 'actual': -1, 'input': -1},
 'save_matrix_directory': {'default': None, 'actual': None, 'input': None},
 'build_tree_one_node': {'default': False, 'actual': False, 'input': False},
 'calibrate_model': {'default': False, 'actual': False, 'input': False},
 'calibration_frame': {'default': None, 'actual': None, 'input': None},
 'max_bins': {'default': 256, 'actual': 256, 'input': 256},
 'max_leaves': {'default': 0, 'actual': 0, 'input': 0},
 'sample_type': {'default': 'uniform',
  'actual': 'uniform',
  'input': 'uniform'},
 'normalize_type': {'default': 'tree', 'actual': 'tree', 'input': 'tree'},
 'rate_drop': {'default': 0.0, 'actual': 0.0, 'input': 0.0},
 'one_drop': {'default': False, 'actual': False, 'input': False},
 'skip_drop': {'default': 0.0, 'actual': 0.0, 'input': 0.0},
 'tree_method': {'default': 'auto', 'actual': 'exact', 'input': 'auto'},
 'grow_policy': {'default': 'depthwise',
  'actual': 'depthwise',
  'input': 'depthwise'},
 'booster': {'default': 'gbtree', 'actual': 'gbtree', 'input': 'gbtree'},
 'reg_lambda': {'default': 1.0, 'actual': 1.0, 'input': 1.0},
 'reg_alpha': {'default': 0.0, 'actual': 0.0, 'input': 0.0},
 'dmatrix_type': {'default': 'auto', 'actual': 'dense', 'input': 'auto'},
 'backend': {'default': 'auto', 'actual': 'cpu', 'input': 'auto'},
 'gpu_id': {'default': None, 'actual': None, 'input': None},
 'gainslift_bins': {'default': -1, 'actual': -1, 'input': -1},
 'auc_type': {'default': 'AUTO', 'actual': 'AUTO', 'input': 'AUTO'},
 'scale_pos_weight': {'default': 1.0, 'actual': 1.0, 'input': 1.0}}


model.params['nfolds']

{'default': 0, 'actual': 5, 'input': 5}


# save the model
model_path = h2o.save_model(model=model, force=True)
print(model_path)

/home/jupyter/DSP/XGBoost_1_AutoML_1_20220324_12408


saved_model = h2o.load_model(model_path)


# For Classification
import scikitplot as skplt
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import cohen_kappa_score, confusion_matrix

hf = h2o.H2OFrame(data_unseen)

# Predict with the best model.
predicted_y = model.predict(hf[featureColumns])
predicted_data = predicted_y.as_data_frame()

# Evaluate the skill of the Trained model
acc                 = accuracy_score(data_unseen[targetColumn], predicted_data['predict'])
classReport         = classification_report(data_unseen[targetColumn], predicted_data['predict'])
confMatrix          = confusion_matrix(data_unseen[targetColumn], predicted_data['predict']) 
        
print(); print('Testing Results of the trained model: ')
print(); print('Accuracy : ', acc)
#print(); print('Confusion Matrix :\n', confMatrix)
print(); print('Classification Report :\n',classReport)

# Confusion matrix
skplt.metrics.plot_confusion_matrix(data_unseen[targetColumn], predicted_data['predict'], figsize=(12,12)); plt.show()

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
xgboost prediction progress: |███████████████████████████████████████████████████| (done) 100%

Testing Results of the trained model: 

Accuracy :  0.9418

Classification Report :
               precision    recall  f1-score   support

           0       0.98      0.99      0.98       193
           1       0.89      0.92      0.91       199
           2       0.97      0.92      0.95       201
           3       0.87      0.94      0.90       188
           4       0.92      0.96      0.94       195
           5       0.94      0.90      0.92       189
           6       0.92      0.93      0.93       196
           7       0.90      0.89      0.89       166
           8       0.90      0.94      0.92       171
           9       0.98      0.89      0.93       187
          10       0.94      0.93      0.94       176
          11       0.97      0.95      0.96       192
          12       0.97      0.94      0.96       195
          13       0.93      0.90      0.92       197
          14       0.92      0.92      0.92       189
          15       0.95      0.95      0.95       209
          16       0.94      0.94      0.94       206
          17       0.93      0.91      0.92       183
          18       0.93      0.96      0.95       173
          19       0.97      0.97      0.97       200
          20       0.96      0.97      0.96       207
          21       0.95      0.95      0.95       214
          22       0.94      0.96      0.95       196
          23       0.94      0.98      0.96       186
          24       0.99      0.97      0.98       194
          25       0.97      0.97      0.97       198

    accuracy                           0.94      5000
   macro avg       0.94      0.94      0.94      5000
weighted avg       0.94      0.94      0.94      5000

H2O_cluster_uptime:	02 secs
H2O_cluster_timezone:	Etc/UTC
H2O_data_parsing_timezone:	UTC
H2O_cluster_version:	3.34.0.1
H2O_cluster_version_age:	6 months and 9 days !!!
H2O_cluster_name:	H2O_from_python_jupyter_6zxd37
H2O_cluster_total_nodes:	1
H2O_cluster_free_memory:	3.840 Gb
H2O_cluster_total_cores:	4
H2O_cluster_allowed_cores:	4
H2O_cluster_status:	locked, healthy
H2O_connection_url:	http://127.0.0.1:54321
H2O_connection_proxy:	{"http": null, "https": null}
H2O_internal_security:	False
H2O_API_Extensions:	Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4
Python_version:	3.7.10 final

model_id	auc	mean_per_class_error	logloss	rmse	mse
XGBoost_1_AutoML_1_20220324_12408	nan	0.0674354	0.227839	0.258868	0.0670126
XGBoost_2_AutoML_1_20220324_12408	nan	0.0582295	0.196607	0.239715	0.0574632
GBM_1_AutoML_1_20220324_12408	nan	0.0475039	0.157054	0.204885	0.0419777
GBM_2_AutoML_1_20220324_12408	nan	0.0451039	0.16025	0.203156	0.0412725
XGBoost_3_AutoML_1_20220324_12408	nan	0.0525349	0.168448	0.219329	0.0481053
GBM_4_AutoML_1_20220324_12408	nan	0.0477506	0.162967	0.205025	0.0420354
GLM_1_AutoML_1_20220324_12408	nan	0.225652	0.860485	0.4854	0.235613
DRF_1_AutoML_1_20220324_12408	nan	0.0540272	0.382077	0.326603	0.106669
GBM_3_AutoML_1_20220324_12408	nan	0.0466329	0.158466	0.203734	0.0415076
XRT_1_AutoML_1_20220324_12408	nan	0.0566795	0.432375	0.369382	0.136443

Application of AutoML (H2O): Notebook in Python for Beginners to Professionals¶

AutoML (H2O) Project – A Guide to build a multi-class classification model in Python using 'Letter Image Recognition' dataset in OpenML¶

AutoML - H2O for Beginners - A Guide to build a multi-class classification model in Python using car description Data¶

Python Codes¶

Load Dataset¶

Profiling dataset and EDA with visualisation¶

Train and Test data split using H2O¶

Training with AutoML (H2O) Model for Structre Dataset (Tabular dataset)¶

Performance of the AutoML (H2O) Model¶

Best Model Deep Dive¶

Save model¶

Load model¶

Prediction with the Model¶

Summary¶

x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx	target
9	13	8	8	4	8	3	4	3	8	4	5	4	7	5	9	6
5	9	6	7	5	7	9	4	6	6	4	8	4	5	6	9	17
4	10	6	8	5	12	3	2	2	9	2	8	2	6	2	8	0
3	5	4	3	3	7	7	7	5	9	6	8	2	8	3	8	14
2	4	3	3	2	9	6	3	5	10	4	6	2	8	2	8	3
3	7	5	4	2	7	5	3	0	6	1	8	2	7	2	7	0
3	9	4	7	2	3	8	6	10	7	6	15	0	8	7	7	4
7	9	6	4	3	8	7	5	5	9	4	9	6	5	6	11	17
2	1	3	2	2	7	7	5	5	6	6	10	2	9	3	9	6
3	1	5	3	3	6	7	4	8	7	6	10	6	8	4	9	10

x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx	target
9	13	8	8	4	8	3	4	3	8	4	5	4	7	5	9	6
5	9	6	7	5	7	9	4	6	6	4	8	4	5	6	9	17
4	10	6	8	5	12	3	2	2	9	2	8	2	6	2	8	0
2	4	3	3	2	9	6	3	5	10	4	6	2	8	2	8	3
3	7	5	4	2	7	5	3	0	6	1	8	2	7	2	7	0

x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx	target
3	5	4	3	3	7	7	7	5	9	6	8	2	8	3	8	14
3	1	5	3	3	6	7	4	8	7	6	10	6	8	4	9	10
2	5	3	6	4	8	7	8	3	6	6	10	2	9	4	9	16
2	1	3	1	0	7	15	2	4	7	10	8	0	8	0	8	19
3	7	5	6	4	6	8	2	4	7	7	9	5	9	3	7	0

x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx	target
9	13	8	8	4	8	3	4	3	8	4	5	4	7	5	9	6
5	9	6	7	5	7	9	4	6	6	4	8	4	5	6	9	17
4	10	6	8	5	12	3	2	2	9	2	8	2	6	2	8	0
3	5	4	3	3	7	7	7	5	9	6	8	2	8	3	8	14
2	4	3	3	2	9	6	3	5	10	4	6	2	8	2	8	3
3	7	5	4	2	7	5	3	0	6	1	8	2	7	2	7	0
3	9	4	7	2	3	8	6	10	7	6	15	0	8	7	7	4
7	9	6	4	3	8	7	5	5	9	4	9	6	5	6	11	17
2	1	3	2	2	7	7	5	5	6	6	10	2	9	3	9	6
3	1	5	3	3	6	7	4	8	7	6	10	6	8	4	9	10

x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx	target
9	13	8	8	4	8	3	4	3	8	4	5	4	7	5	9	6
5	9	6	7	5	7	9	4	6	6	4	8	4	5	6	9	17
4	10	6	8	5	12	3	2	2	9	2	8	2	6	2	8	0
2	4	3	3	2	9	6	3	5	10	4	6	2	8	2	8	3
3	7	5	4	2	7	5	3	0	6	1	8	2	7	2	7	0

x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx	target
3	5	4	3	3	7	7	7	5	9	6	8	2	8	3	8	14
3	1	5	3	3	6	7	4	8	7	6	10	6	8	4	9	10
2	5	3	6	4	8	7	8	3	6	6	10	2	9	4	9	16
2	1	3	1	0	7	15	2	4	7	10	8	0	8	0	8	19
3	7	5	6	4	6	8	2	4	7	7	9	5	9	3	7	0

x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx	target
9	13	8	8	4	8	3	4	3	8	4	5	4	7	5	9	6
5	9	6	7	5	7	9	4	6	6	4	8	4	5	6	9	17
4	10	6	8	5	12	3	2	2	9	2	8	2	6	2	8	0
3	5	4	3	3	7	7	7	5	9	6	8	2	8	3	8	14
2	4	3	3	2	9	6	3	5	10	4	6	2	8	2	8	3
3	7	5	4	2	7	5	3	0	6	1	8	2	7	2	7	0
3	9	4	7	2	3	8	6	10	7	6	15	0	8	7	7	4
7	9	6	4	3	8	7	5	5	9	4	9	6	5	6	11	17
2	1	3	2	2	7	7	5	5	6	6	10	2	9	3	9	6
3	1	5	3	3	6	7	4	8	7	6	10	6	8	4	9	10

x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx	target
9	13	8	8	4	8	3	4	3	8	4	5	4	7	5	9	6
5	9	6	7	5	7	9	4	6	6	4	8	4	5	6	9	17
4	10	6	8	5	12	3	2	2	9	2	8	2	6	2	8	0
2	4	3	3	2	9	6	3	5	10	4	6	2	8	2	8	3
3	7	5	4	2	7	5	3	0	6	1	8	2	7	2	7	0

x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx	target
3	5	4	3	3	7	7	7	5	9	6	8	2	8	3	8	14
3	1	5	3	3	6	7	4	8	7	6	10	6	8	4	9	10
2	5	3	6	4	8	7	8	3	6	6	10	2	9	4	9	16
2	1	3	1	0	7	15	2	4	7	10	8	0	8	0	8	19
3	7	5	6	4	6	8	2	4	7	7	9	5	9	3	7	0