# Suppress warnings in Jupyter Notebooks
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# OpenML Dataset ID
whichDataset = 6 # provide dataset id

import openml
from openml.datasets import get_dataset

dataset = openml.datasets.get_dataset(whichDataset)

# Print a summary
print(
    f"This is dataset '{dataset.name}', the target feature is "
    f"'{dataset.default_target_attribute}'" 
     )
print(f"URL: {dataset.url}")
print(dataset.description)

This is dataset 'letter', the target feature is 'class'
URL: https://www.openml.org/data/v1/download/6/letter.arff
**Author**: David J. Slate  
**Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/Letter+Recognition) - 01-01-1991  
**Please cite**: P. W. Frey and D. J. Slate. "Letter Recognition Using Holland-style Adaptive Classifiers". Machine Learning 6(2), 1991  

1. TITLE: 
  Letter Image Recognition Data 
 
    The objective is to identify each of a large number of black-and-white
    rectangular pixel displays as one of the 26 capital letters in the English
    alphabet.  The character images were based on 20 different fonts and each
    letter within these 20 fonts was randomly distorted to produce a file of
    20,000 unique stimuli.  Each stimulus was converted into 16 primitive
    numerical attributes (statistical moments and edge counts) which were then
    scaled to fit into a range of integer values from 0 through 15.  We
    typically train on the first 16000 items and then use the resulting model
    to predict the letter category for the remaining 4000.  See the article
    cited above for more details.


import warnings
warnings.filterwarnings("ignore")


X, y, categorical_indicator, attribute_names = dataset.get_data(
    dataset_format="array", target=dataset.default_target_attribute)

dataset = pd.DataFrame(X, columns=attribute_names)
dataset["target"] = y


dataset.shape

(20000, 17)


dataset.head()


#dataset.columns.to_list()


# find missing values in data frame
dataset.isnull().sum().sum()

0


dataset = dataset.fillna(0)


dataset.isnull().sum().sum()

0


# group by 'target'
dataset.groupby('target').count()


# training and test data split
data = dataset.sample(frac=0.75, random_state=1234)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (15000, 17)
Unseen Data For Predictions: (5000, 17)


env_setup = setup(data = data, target = 'target', session_id=1234)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# --------------------------------------
best_model = compare_models(exclude = ['catboost'], sort = 'Accuracy')
# --------------------------------------


model_1 = create_model('et')


tuned_model_1 = tune_model(model_1)


print(tuned_model_1)

ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight={},
                     criterion='entropy', max_depth=9, max_features=1.0,
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0005, min_impurity_split=None,
                     min_samples_leaf=5, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=130, n_jobs=-1,
                     oob_score=False, random_state=1234, verbose=0,
                     warm_start=False)


tuned_model_1 = tune_model(model_1, n_iter=100)


print(tuned_model_1)

ExtraTreesClassifier(bootstrap=True, ccp_alpha=0.0,
                     class_weight='balanced_subsample', criterion='entropy',
                     max_depth=10, max_features=1.0, max_leaf_nodes=None,
                     max_samples=None, min_impurity_decrease=0.001,
                     min_impurity_split=None, min_samples_leaf=6,
                     min_samples_split=9, min_weight_fraction_leaf=0.0,
                     n_estimators=110, n_jobs=-1, oob_score=False,
                     random_state=1234, verbose=0, warm_start=False)


plot_model(tuned_model_1, plot = 'auc')


plot_model(tuned_model_1, plot = 'pr')


#plot_model(tuned_model_1, plot='feature')


plot_model(tuned_model_1, plot = 'confusion_matrix')

findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif


plot_model(tuned_model_1, plot = 'learning')


#plot_model(tuned_model_1, plot = 'threshold')


plot_model(tuned_model_1, plot = 'boundary')


plot_model(tuned_model_1, plot = 'error')


model_2 = create_model('xgboost')


tuned_model_2 = tune_model(model_2)


print(tuned_model_2)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.9, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.2, max_delta_step=0, max_depth=11,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=280, n_jobs=-1, num_parallel_tree=1,
              objective='multi:softprob', random_state=1234, reg_alpha=1e-06,
              reg_lambda=0.05, scale_pos_weight=46.6, subsample=0.7,
              tree_method='auto', validate_parameters=1, verbosity=0)


plot_model(tuned_model_2, plot = 'auc')


#plot_model(tuned_model_2, plot = 'pr')


plot_model(tuned_model_2, plot = 'feature')


plot_model(tuned_model_2, plot = 'confusion_matrix')


#plot_model(tuned_model_2, plot = 'learning')


#plot_model(tuned_model_2, plot = 'threshold')


plot_model(tuned_model_2, plot = 'boundary')


plot_model(tuned_model_2, plot = 'error')


predict_model(tuned_model_1);


predict_model(tuned_model_2);


final_model = finalize_model(tuned_model_2);


# Final model parameters for deployment
print(final_model)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.9, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.2, max_delta_step=0, max_depth=11,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=280, n_jobs=-1, num_parallel_tree=1,
              objective='multi:softprob', random_state=1234, reg_alpha=1e-06,
              reg_lambda=0.05, scale_pos_weight=46.6, subsample=0.7,
              tree_method='auto', validate_parameters=1, verbosity=0)


predict_model(final_model);


unseen_predictions = predict_model(final_model, data=data_unseen)
unseen_predictions.head()


from pycaret.utils import check_metric
check_metric(unseen_predictions['target'], unseen_predictions['Label'], metric = 'Accuracy')

0.9572


check_metric(unseen_predictions['target'], unseen_predictions['Label'], metric = 'Recall')

0.9566


check_metric(unseen_predictions['target'], unseen_predictions['Label'], metric = 'Precision')

0.9575


#check_metric(unseen_predictions['target'], unseen_predictions['Label'], metric = 'AUC')


check_metric(unseen_predictions['target'], unseen_predictions['Label'], metric = 'F1')

0.9572


save_model(final_model,'Final_Model')

Transformation Pipeline and Model Successfully Saved

(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='target',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strat...
                                interaction_constraints='', learning_rate=0.2,
                                max_delta_step=0, max_depth=11,
                                min_child_weight=1, missing=nan,
                                monotone_constraints='()', n_estimators=280,
                                n_jobs=-1, num_parallel_tree=1,
                                objective='multi:softprob', random_state=1234,
                                reg_alpha=1e-06, reg_lambda=0.05,
                                scale_pos_weight=46.6, subsample=0.7,
                                tree_method='auto', validate_parameters=1,
                                verbosity=0)]],
          verbose=False),
 'Final_Model.pkl')


load_saved_model = load_model('Final_Model')

Transformation Pipeline and Model Successfully Loaded


new_prediction = predict_model(load_saved_model, data=data_unseen)


new_prediction[["Label", "Score"]].head()


import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# OpenML Dataset ID
whichDataset = 6 # provide dataset id

import openml
from openml.datasets import get_dataset

dataset = openml.datasets.get_dataset(whichDataset)


X, y, categorical_indicator, attribute_names = dataset.get_data(
    dataset_format="array", target=dataset.default_target_attribute)

dataset = pd.DataFrame(X, columns=attribute_names)
dataset["target"] = y

data = dataset.sample(frac=0.70, random_state=421)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (14000, 17)
Unseen Data For Predictions: (6000, 17)


clf = setup(data = data, target = 'target', session_id=1234)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# compare all baseline models and select top 5
top_models = compare_models(n_select = 3, exclude = ['catboost', 'lightgbm'], sort = 'Accuracy')

/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: The outputs_2d_ attribute is deprecated in version 0.22 and will be removed in version 0.24. It is equivalent to n_outputs_ > 1.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: The outputs_2d_ attribute is deprecated in version 0.22 and will be removed in version 0.24. It is equivalent to n_outputs_ > 1.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: The outputs_2d_ attribute is deprecated in version 0.22 and will be removed in version 0.24. It is equivalent to n_outputs_ > 1.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: The outputs_2d_ attribute is deprecated in version 0.22 and will be removed in version 0.24. It is equivalent to n_outputs_ > 1.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: The outputs_2d_ attribute is deprecated in version 0.22 and will be removed in version 0.24. It is equivalent to n_outputs_ > 1.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: The outputs_2d_ attribute is deprecated in version 0.22 and will be removed in version 0.24. It is equivalent to n_outputs_ > 1.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: The outputs_2d_ attribute is deprecated in version 0.22 and will be removed in version 0.24. It is equivalent to n_outputs_ > 1.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: The outputs_2d_ attribute is deprecated in version 0.22 and will be removed in version 0.24. It is equivalent to n_outputs_ > 1.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute standard_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_coef_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: Attribute average_intercept_ was deprecated in version 0.23 and will be removed in 0.25.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: The outputs_2d_ attribute is deprecated in version 0.22 and will be removed in version 0.24. It is equivalent to n_outputs_ > 1.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/deprecation.py:101: FutureWarning: The outputs_2d_ attribute is deprecated in version 0.22 and will be removed in version 0.24. It is equivalent to n_outputs_ > 1.
  warnings.warn(msg, category=FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:846: RuntimeWarning: invalid value encountered in double_scalars
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


top_models

[ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                      criterion='gini', max_depth=None, max_features='auto',
                      max_leaf_nodes=None, max_samples=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                      oob_score=False, random_state=1234, verbose=0,
                      warm_start=False),
 RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                        criterion='gini', max_depth=None, max_features='auto',
                        max_leaf_nodes=None, max_samples=None,
                        min_impurity_decrease=0.0, min_impurity_split=None,
                        min_samples_leaf=1, min_samples_split=2,
                        min_weight_fraction_leaf=0.0, n_estimators=100,
                        n_jobs=-1, oob_score=False, random_state=1234, verbose=0,
                        warm_start=False),
 XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
               importance_type='gain', interaction_constraints='',
               learning_rate=0.300000012, max_delta_step=0, max_depth=6,
               min_child_weight=1, missing=nan, monotone_constraints='()',
               n_estimators=100, n_jobs=-1, num_parallel_tree=1,
               objective='multi:softprob', random_state=1234, reg_alpha=0,
               reg_lambda=1, scale_pos_weight=None, subsample=1,
               tree_method='auto', validate_parameters=1, verbosity=0)]


# tune top base models
tuned_top_models = [tune_model(i) for i in top_models]


tuned_top_models

[ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight={},
                      criterion='entropy', max_depth=9, max_features=1.0,
                      max_leaf_nodes=None, max_samples=None,
                      min_impurity_decrease=0.0005, min_impurity_split=None,
                      min_samples_leaf=5, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=130, n_jobs=-1,
                      oob_score=False, random_state=1234, verbose=0,
                      warm_start=False),
 RandomForestClassifier(bootstrap=False, ccp_alpha=0.0, class_weight='balanced',
                        criterion='entropy', max_depth=10, max_features='sqrt',
                        max_leaf_nodes=None, max_samples=None,
                        min_impurity_decrease=0.001, min_impurity_split=None,
                        min_samples_leaf=6, min_samples_split=10,
                        min_weight_fraction_leaf=0.0, n_estimators=140,
                        n_jobs=-1, oob_score=False, random_state=1234, verbose=0,
                        warm_start=False),
 XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=0.9, gamma=0, gpu_id=-1,
               importance_type='gain', interaction_constraints='',
               learning_rate=0.2, max_delta_step=0, max_depth=11,
               min_child_weight=1, missing=nan, monotone_constraints='()',
               n_estimators=280, n_jobs=-1, num_parallel_tree=1,
               objective='multi:softprob', random_state=1234, reg_alpha=1e-06,
               reg_lambda=0.05, scale_pos_weight=46.6, subsample=0.7,
               tree_method='auto', validate_parameters=1, verbosity=0)]


# ensemble top tuned models
bagged_top_models = [ensemble_model(i) for i in tuned_top_models]


bagged_top_models

[BaggingClassifier(base_estimator=ExtraTreesClassifier(bootstrap=False,
                                                       ccp_alpha=0.0,
                                                       class_weight={},
                                                       criterion='entropy',
                                                       max_depth=9,
                                                       max_features=1.0,
                                                       max_leaf_nodes=None,
                                                       max_samples=None,
                                                       min_impurity_decrease=0.0005,
                                                       min_impurity_split=None,
                                                       min_samples_leaf=5,
                                                       min_samples_split=2,
                                                       min_weight_fraction_leaf=0.0,
                                                       n_estimators=130,
                                                       n_jobs=-1,
                                                       oob_score=False,
                                                       random_state=1234,
                                                       verbose=0,
                                                       warm_start=False),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=RandomForestClassifier(bootstrap=False,
                                                         ccp_alpha=0.0,
                                                         class_weight='balanced',
                                                         criterion='entropy',
                                                         max_depth=10,
                                                         max_features='sqrt',
                                                         max_leaf_nodes=None,
                                                         max_samples=None,
                                                         min_impurity_decrease=0.001,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=6,
                                                         min_samples_split=10,
                                                         min_weight_fraction_leaf=0.0,
                                                         n_estimators=140,
                                                         n_jobs=-1,
                                                         oob_score=False,
                                                         random_state=1234,
                                                         verbose=0,
                                                         warm_start=False),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                                colsample_bylevel=1,
                                                colsample_bynode=1,
                                                colsample_bytree=0.9, gamma=0,
                                                gpu_id=-1,
                                                importance_type='gain',
                                                interaction_constraints='',
                                                learning_rate=0.2,
                                                max_delta_step=0, max_depth=11,
                                                min_child_weight=1, missing=nan,
                                                monotone_constraints='()',
                                                n_estimators=280, n_jobs=-1,
                                                num_parallel_tree=1,
                                                objective='multi:softprob',
                                                random_state=1234,
                                                reg_alpha=1e-06, reg_lambda=0.05,
                                                scale_pos_weight=46.6,
                                                subsample=0.7,
                                                tree_method='auto',
                                                validate_parameters=1,
                                                verbosity=0),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False)]


# select best model based on AUC
best1 = automl(optimize = 'AUC')
best2 = automl(optimize = 'Accuracy')
best3 = automl(optimize = 'Recall')
best4 = automl(optimize = 'Precision')
best5 = automl(optimize = 'F1')


print(); print("Best model based on AUC: ");       print(best1)
print(); print("Best model based on Accuracy: ");  print(best2)
print(); print("Best model based on Recall: ");    print(best3)
print(); print("Best model based on Precision: "); print(best4)
print(); print("Best model based on F1: ");        print(best5)

Best model based on AUC: 
<catboost.core.CatBoostClassifier object at 0x7f2ae0fbb590>

Best model based on Accuracy: 
ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=1234, verbose=0,
                     warm_start=False)

Best model based on Recall: 
ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=1234, verbose=0,
                     warm_start=False)

Best model based on Precision: 
ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=1234, verbose=0,
                     warm_start=False)

Best model based on F1: 
ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=1234, verbose=0,
                     warm_start=False)


plot_model(best2, plot = 'auc')


plot_model(best2, plot = 'confusion_matrix')


plot_model(best2, plot = 'learning')


save_model(best2,'Final_Model')

Transformation Pipeline and Model Successfully Saved

(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='target',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strat...
                  ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0,
                                       class_weight=None, criterion='gini',
                                       max_depth=None, max_features='auto',
                                       max_leaf_nodes=None, max_samples=None,
                                       min_impurity_decrease=0.0,
                                       min_impurity_split=None,
                                       min_samples_leaf=1, min_samples_split=2,
                                       min_weight_fraction_leaf=0.0,
                                       n_estimators=100, n_jobs=-1,
                                       oob_score=False, random_state=1234,
                                       verbose=0, warm_start=False)]],
          verbose=False),
 'Final_Model.pkl')


load_saved_model = load_model('Final_Model')

new_prediction = predict_model(load_saved_model, data=data_unseen)
new_prediction[["Label", "Score"]].head()

Transformation Pipeline and Model Successfully Loaded

	x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx	target
0	2.0	4.0	4.0	3.0	2.0	7.0	8.0	2.0	9.0	11.0	7.0	7.0	1.0	8.0	5.0	6.0	25
1	4.0	7.0	5.0	5.0	5.0	5.0	9.0	6.0	4.0	8.0	7.0	9.0	2.0	9.0	7.0	10.0	15
2	7.0	10.0	8.0	7.0	4.0	8.0	8.0	5.0	10.0	11.0	2.0	8.0	2.0	5.0	5.0	10.0	18
3	4.0	9.0	5.0	7.0	4.0	7.0	7.0	13.0	1.0	7.0	6.0	8.0	3.0	8.0	0.0	8.0	7
4	6.0	7.0	8.0	5.0	4.0	7.0	6.0	3.0	7.0	10.0	7.0	9.0	3.0	8.0	3.0	7.0	7

	x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx
target
0	789	789	789	789	789	789	789	789	789	789	789	789	789	789	789	789
1	766	766	766	766	766	766	766	766	766	766	766	766	766	766	766	766
2	736	736	736	736	736	736	736	736	736	736	736	736	736	736	736	736
3	805	805	805	805	805	805	805	805	805	805	805	805	805	805	805	805
4	768	768	768	768	768	768	768	768	768	768	768	768	768	768	768	768
5	775	775	775	775	775	775	775	775	775	775	775	775	775	775	775	775
6	773	773	773	773	773	773	773	773	773	773	773	773	773	773	773	773
7	734	734	734	734	734	734	734	734	734	734	734	734	734	734	734	734
8	755	755	755	755	755	755	755	755	755	755	755	755	755	755	755	755
9	747	747	747	747	747	747	747	747	747	747	747	747	747	747	747	747
10	739	739	739	739	739	739	739	739	739	739	739	739	739	739	739	739
11	761	761	761	761	761	761	761	761	761	761	761	761	761	761	761	761
12	792	792	792	792	792	792	792	792	792	792	792	792	792	792	792	792
13	783	783	783	783	783	783	783	783	783	783	783	783	783	783	783	783
14	753	753	753	753	753	753	753	753	753	753	753	753	753	753	753	753
15	803	803	803	803	803	803	803	803	803	803	803	803	803	803	803	803
16	783	783	783	783	783	783	783	783	783	783	783	783	783	783	783	783
17	758	758	758	758	758	758	758	758	758	758	758	758	758	758	758	758
18	748	748	748	748	748	748	748	748	748	748	748	748	748	748	748	748
19	796	796	796	796	796	796	796	796	796	796	796	796	796	796	796	796
20	813	813	813	813	813	813	813	813	813	813	813	813	813	813	813	813
21	764	764	764	764	764	764	764	764	764	764	764	764	764	764	764	764
22	752	752	752	752	752	752	752	752	752	752	752	752	752	752	752	752
23	787	787	787	787	787	787	787	787	787	787	787	787	787	787	787	787
24	786	786	786	786	786	786	786	786	786	786	786	786	786	786	786	786
25	734	734	734	734	734	734	734	734	734	734	734	734	734	734	734	734

	Description	Value
0	session_id	1234
1	Target	target
2	Target Type	Multiclass
3	Label Encoded	None
4	Original Data	(15000, 17)
5	Missing Values	False
6	Numeric Features	16
7	Categorical Features	0
8	Ordinal Features	False
9	High Cardinality Features	False
10	High Cardinality Method	None
11	Transformed Train Set	(10499, 16)
12	Transformed Test Set	(4501, 16)
13	Shuffle Train-Test	True
14	Stratify Train-Test	False
15	Fold Generator	StratifiedKFold
16	Fold Number	10
17	CPU Jobs	-1
18	Use GPU	False
19	Log Experiment	False
20	Experiment Name	clf-default-name
21	USI	1114
22	Imputation Type	simple
23	Iterative Imputation Iteration	None
24	Numeric Imputer	mean
25	Iterative Imputation Numeric Model	None
26	Categorical Imputer	constant
27	Iterative Imputation Categorical Model	None
28	Unknown Categoricals Handling	least_frequent
29	Normalize	False
30	Normalize Method	None
31	Transformation	False
32	Transformation Method	None
33	PCA	False
34	PCA Method	None
35	PCA Components	None
36	Ignore Low Variance	False
37	Combine Rare Levels	False
38	Rare Level Threshold	None
39	Numeric Binning	False
40	Remove Outliers	False
41	Outliers Threshold	None
42	Remove Multicollinearity	False
43	Multicollinearity Threshold	None
44	Remove Perfect Collinearity	True
45	Clustering	False
46	Clustering Iteration	None
47	Polynomial Features	False
48	Polynomial Degree	None
49	Trignometry Features	False
50	Polynomial Threshold	None
51	Group Features	False
52	Feature Selection	False
53	Feature Selection Method	classic
54	Features Selection Threshold	None
55	Feature Interaction	False
56	Feature Ratio	False
57	Interaction Threshold	None
58	Fix Imbalance	False
59	Fix Imbalance Method	SMOTE

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC	TT (Sec)
et	Extra Trees Classifier	0.9566	0.9992	0.9564	0.9578	0.9566	0.9548	0.9549	0.6230
lightgbm	Light Gradient Boosting Machine	0.9524	0.9992	0.9522	0.9536	0.9525	0.9505	0.9505	1.6180
rf	Random Forest Classifier	0.9478	0.9991	0.9476	0.9493	0.9478	0.9457	0.9458	0.8200
xgboost	Extreme Gradient Boosting	0.9463	0.9992	0.9461	0.9475	0.9463	0.9441	0.9442	9.2070
knn	K Neighbors Classifier	0.9343	0.9950	0.9339	0.9369	0.9345	0.9316	0.9317	0.1540
gbc	Gradient Boosting Classifier	0.9082	0.9975	0.9078	0.9111	0.9084	0.9045	0.9046	14.4530
qda	Quadratic Discriminant Analysis	0.8826	0.9957	0.8823	0.8865	0.8825	0.8779	0.8780	0.0230
dt	Decision Tree Classifier	0.8380	0.9158	0.8378	0.8410	0.8381	0.8315	0.8316	0.0340
lr	Logistic Regression	0.7770	0.9800	0.7756	0.7786	0.7757	0.7681	0.7683	4.2490
lda	Linear Discriminant Analysis	0.7000	0.9671	0.6993	0.7139	0.7004	0.6880	0.6885	0.0240
nb	Naive Bayes	0.6377	0.9560	0.6373	0.6537	0.6336	0.6232	0.6241	0.0200
ridge	Ridge Classifier	0.5603	0.0000	0.5597	0.5841	0.5231	0.5427	0.5465	0.0120
svm	SVM - Linear Kernel	0.5567	0.0000	0.5547	0.6837	0.5433	0.5389	0.5475	0.5110
ada	Ada Boost Classifier	0.2197	0.7295	0.2199	0.2224	0.1743	0.1884	0.1948	0.3320
dummy	Dummy Classifier	0.0413	0.5000	0.0385	0.0017	0.0033	0.0000	0.0000	0.0120

		Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Split	Fold
CV-Val	0	0.9610	0.9997	0.9615	0.9623	0.9610	0.9594	0.9594
	1	0.9543	0.9993	0.9535	0.9552	0.9541	0.9525	0.9525
	2	0.9629	0.9987	0.9625	0.9638	0.9629	0.9614	0.9614
	3	0.9533	0.9994	0.9535	0.9544	0.9535	0.9515	0.9515
	4	0.9457	0.9990	0.9449	0.9472	0.9455	0.9435	0.9436
	5	0.9600	0.9995	0.9598	0.9614	0.9602	0.9584	0.9584
	6	0.9581	0.9994	0.9576	0.9594	0.9582	0.9564	0.9565
	7	0.9514	0.9985	0.9509	0.9529	0.9514	0.9495	0.9495
	8	0.9562	0.9993	0.9563	0.9573	0.9561	0.9544	0.9545
	9	0.9628	0.9990	0.9632	0.9637	0.9629	0.9613	0.9614
	Mean	0.9566	0.9992	0.9564	0.9578	0.9566	0.9548	0.9549
	Std	0.0052	0.0004	0.0055	0.0051	0.0053	0.0054	0.0054
Train	nan	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000

Application of PyCaret Machine Learning: Notebook in Python for Beginners to Professionals : [ Classification ]¶

PyCaret Machine Learning Project – A Guide to build a multiclass classification model in PyCaret using 'Letter Image Recognition' dataset in OpenML¶

PyCaret for Beginners - A Guide to a machine learning model in Python and PyCaret¶

Load Dataset for Modelling¶

Setup environment for modelling purposes¶

Compare the results of different available algorithms¶

Build few individual models¶

Applying model 1¶

Plot the outcomes of the trained model¶

Applying "model 2"¶

Plot the outcomes of the trained model¶

Predict on test / hold-out Sample¶

Finalise Model¶

Predict on unseen data¶

Save model¶

Load model¶

PyCaret AutoML in Practice¶

Load Dataset¶

Select best model based om metric¶

Plots¶

Save model¶

Load model¶

Summary¶

	x-box	y-box	width	high	onpix	x-bar	y-bar	x2bar	y2bar	xybar	x2ybr	xy2br	x-ege	xegvy	y-ege	yegvx	target	Label	Score
0	4.0	9.0	5.0	7.0	4.0	7.0	7.0	13.0	1.0	7.0	6.0	8.0	3.0	8.0	0.0	8.0	7	7	0.9989
1	2.0	7.0	4.0	5.0	1.0	9.0	8.0	4.0	2.0	5.0	13.0	8.0	3.0	10.0	0.0	8.0	21	21	0.9997
2	7.0	11.0	10.0	8.0	9.0	9.0	7.0	3.0	6.0	10.0	4.0	7.0	5.0	6.0	4.0	9.0	7	7	0.9762
3	2.0	4.0	5.0	3.0	2.0	7.0	8.0	2.0	9.0	11.0	7.0	7.0	1.0	8.0	5.0	6.0	25	25	0.9981
4	5.0	10.0	7.0	9.0	3.0	8.0	5.0	9.0	8.0	6.0	4.0	8.0	3.0	8.0	4.0	8.0	16	16	0.8965

		Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Split	Fold
CV-Val	0	0.8962	0.9968	0.8963	0.9060	0.8964	0.8920	0.8924
	1	0.8810	0.9958	0.8793	0.8956	0.8824	0.8762	0.8767
	2	0.8743	0.9951	0.8735	0.8876	0.8771	0.8692	0.8697
	3	0.8790	0.9960	0.8785	0.8893	0.8813	0.8742	0.8745
	4	0.8629	0.9950	0.8619	0.8773	0.8649	0.8574	0.8578
	5	0.8771	0.9957	0.8761	0.8865	0.8784	0.8722	0.8725
	6	0.8771	0.9953	0.8768	0.8889	0.8784	0.8722	0.8726
	7	0.8752	0.9947	0.8745	0.8848	0.8760	0.8702	0.8706
	8	0.8886	0.9956	0.8886	0.8974	0.8898	0.8841	0.8844
	9	0.8875	0.9960	0.8882	0.9006	0.8901	0.8830	0.8834
	Mean	0.8799	0.9956	0.8794	0.8914	0.8815	0.8751	0.8754
	Std	0.0087	0.0006	0.0091	0.0080	0.0084	0.0091	0.0090
Train	nan	0.9296	0.9981	0.9293	0.9345	0.9305	0.9268	0.9269

		Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Split	Fold
CV-Val	0	0.9067	0.9971	0.9069	0.9166	0.9075	0.9029	0.9033
	1	0.8952	0.9964	0.8942	0.9055	0.8959	0.8910	0.8914
	2	0.8886	0.9963	0.8882	0.8954	0.8895	0.8841	0.8843
	3	0.8943	0.9968	0.8944	0.9023	0.8961	0.8901	0.8903
	4	0.8657	0.9959	0.8649	0.8772	0.8674	0.8603	0.8607
	5	0.8829	0.9966	0.8826	0.8901	0.8837	0.8782	0.8784
	6	0.8876	0.9961	0.8875	0.8959	0.8885	0.8831	0.8834
	7	0.8838	0.9956	0.8830	0.8910	0.8846	0.8791	0.8794
	8	0.8990	0.9965	0.8991	0.9054	0.8994	0.8950	0.8952
	9	0.8951	0.9964	0.8961	0.9053	0.8972	0.8909	0.8912
	Mean	0.8899	0.9964	0.8897	0.8985	0.8910	0.8855	0.8858
	Std	0.0106	0.0004	0.0109	0.0104	0.0105	0.0110	0.0110
Train	nan	0.9402	0.9987	0.9400	0.9429	0.9407	0.9378	0.9379

		Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Split	Fold
CV-Val	0	0.9552	0.9995	0.9555	0.9564	0.9553	0.9534	0.9535
	1	0.9505	0.9992	0.9496	0.9516	0.9504	0.9485	0.9485
	2	0.9467	0.9993	0.9468	0.9480	0.9466	0.9445	0.9446
	3	0.9476	0.9994	0.9476	0.9485	0.9477	0.9455	0.9455
	4	0.9324	0.9989	0.9314	0.9344	0.9323	0.9297	0.9298
	5	0.9486	0.9988	0.9488	0.9501	0.9486	0.9465	0.9466
	6	0.9552	0.9995	0.9547	0.9560	0.9551	0.9534	0.9535
	7	0.9410	0.9989	0.9408	0.9420	0.9409	0.9386	0.9386
	8	0.9400	0.9990	0.9402	0.9409	0.9398	0.9376	0.9376
	9	0.9457	0.9991	0.9460	0.9467	0.9458	0.9435	0.9435
	Mean	0.9463	0.9992	0.9461	0.9475	0.9463	0.9441	0.9442
	Std	0.0067	0.0002	0.0068	0.0065	0.0067	0.0070	0.0069
Train	nan	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000

		Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Split	Fold
CV-Val	0	0.9505	0.9995	0.9506	0.9521	0.9505	0.9485	0.9486
	1	0.9543	0.9991	0.9534	0.9554	0.9543	0.9525	0.9525
	2	0.9514	0.9992	0.9512	0.9530	0.9514	0.9495	0.9495
	3	0.9448	0.9993	0.9446	0.9464	0.9450	0.9425	0.9426
	4	0.9371	0.9991	0.9365	0.9387	0.9372	0.9346	0.9347
	5	0.9429	0.9989	0.9429	0.9445	0.9429	0.9406	0.9406
	6	0.9552	0.9995	0.9545	0.9564	0.9551	0.9534	0.9535
	7	0.9467	0.9986	0.9462	0.9476	0.9467	0.9445	0.9446
	8	0.9448	0.9993	0.9448	0.9457	0.9444	0.9425	0.9426
	9	0.9438	0.9990	0.9442	0.9453	0.9440	0.9415	0.9415
	Mean	0.9471	0.9991	0.9469	0.9485	0.9472	0.9450	0.9451
	Std	0.0054	0.0003	0.0052	0.0053	0.0053	0.0056	0.0056
Train	nan	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000