# Suppress warnings in Jupyter Notebooks
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# provide the dataset name as shown in pycaret
whichDataset = 'glass'


from pycaret.datasets import get_data
dataset = get_data(whichDataset)


dataset.shape

(214, 10)


dataset.columns.to_list()

['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe', 'Type']


data = dataset.sample(frac=0.75, random_state=1234)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (160, 10)
Unseen Data For Predictions: (54, 10)


env_setup = setup(data = data, target = 'Type', session_id=1234)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# --------------------------------------
best_model = compare_models()
# --------------------------------------


et = create_model('et')


tuned_et = tune_model(et)


print(tuned_et)

ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight={},
                     criterion='entropy', max_depth=9, max_features=1.0,
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0005, min_impurity_split=None,
                     min_samples_leaf=5, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=130, n_jobs=-1,
                     oob_score=False, random_state=1234, verbose=0,
                     warm_start=False)


#plot_model(tuned_et, plot = 'auc')


plot_model(tuned_et, plot = 'pr')


plot_model(tuned_et, plot='feature')


plot_model(tuned_et, plot = 'confusion_matrix')


plot_model(tuned_et, plot = 'learning')


#plot_model(tuned_et, plot = 'threshold')


plot_model(tuned_et, plot = 'boundary')


rf = create_model('rf')


tuned_rf = tune_model(rf)


print(tuned_rf)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='entropy', max_depth=4, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.01, min_impurity_split=None,
                       min_samples_leaf=3, min_samples_split=5,
                       min_weight_fraction_leaf=0.0, n_estimators=120,
                       n_jobs=-1, oob_score=False, random_state=1234, verbose=0,
                       warm_start=False)


#plot_model(tuned_lr, plot = 'auc')


plot_model(tuned_rf, plot = 'pr')


plot_model(tuned_rf, plot = 'feature')


plot_model(tuned_rf, plot = 'confusion_matrix')


plot_model(tuned_rf, plot = 'learning')


#plot_model(tuned_rf, plot = 'threshold')


plot_model(tuned_rf, plot = 'boundary')


dataset = get_data(whichDataset)


predict_model(tuned_et, data=dataset);


predict_model(tuned_rf);


final_rf = finalize_model(tuned_rf);


# Final model parameters for deployment
print(final_rf)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='entropy', max_depth=4, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.01, min_impurity_split=None,
                       min_samples_leaf=3, min_samples_split=5,
                       min_weight_fraction_leaf=0.0, n_estimators=120,
                       n_jobs=-1, oob_score=False, random_state=1234, verbose=0,
                       warm_start=False)


predict_model(final_rf);

/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/opt/conda/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))


unseen_predictions = predict_model(final_lr, data=data_unseen)
unseen_predictions.head()


from pycaret.utils import check_metric
check_metric(unseen_predictions['type'], unseen_predictions['Label'], metric = 'Accuracy')


check_metric(unseen_predictions['type'], unseen_predictions['Label'], metric = 'Recall')


check_metric(unseen_predictions['type'], unseen_predictions['Label'], metric = 'Precision')


check_metric(unseen_predictions['type'], unseen_predictions['Label'], metric = 'AUC')


check_metric(unseen_predictions['type'], unseen_predictions['Label'], metric = 'F1')


save_model(final_lr,'Final_Model')


load_saved_model = load_model('Final_Model')


new_prediction = predict_model(load_saved_model, data=data_unseen)


new_prediction[["Label", "Score"]].head(10)


import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# provide the dataset name as shown in pycaret
whichDataset = 'glass'


from pycaret.datasets import get_data
dataset = get_data(whichDataset)

data = dataset.sample(frac=0.75, random_state=421)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (160, 10)
Unseen Data For Predictions: (54, 10)


clf = setup(data = data, target = 'Type', session_id=1234)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# compare all baseline models and select top 5
top_models = compare_models(n_select = 5)


top_models

[RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                        criterion='gini', max_depth=None, max_features='auto',
                        max_leaf_nodes=None, max_samples=None,
                        min_impurity_decrease=0.0, min_impurity_split=None,
                        min_samples_leaf=1, min_samples_split=2,
                        min_weight_fraction_leaf=0.0, n_estimators=100,
                        n_jobs=-1, oob_score=False, random_state=1234, verbose=0,
                        warm_start=False),
 LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
                importance_type='split', learning_rate=0.1, max_depth=-1,
                min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
                n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
                random_state=1234, reg_alpha=0.0, reg_lambda=0.0, silent=True,
                subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
 <catboost.core.CatBoostClassifier at 0x7f1b73218c10>,
 XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
               importance_type='gain', interaction_constraints='',
               learning_rate=0.300000012, max_delta_step=0, max_depth=6,
               min_child_weight=1, missing=nan, monotone_constraints='()',
               n_estimators=100, n_jobs=-1, num_parallel_tree=1,
               objective='multi:softprob', random_state=1234, reg_alpha=0,
               reg_lambda=1, scale_pos_weight=None, subsample=1,
               tree_method='auto', validate_parameters=1, verbosity=0),
 GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                            learning_rate=0.1, loss='deviance', max_depth=3,
                            max_features=None, max_leaf_nodes=None,
                            min_impurity_decrease=0.0, min_impurity_split=None,
                            min_samples_leaf=1, min_samples_split=2,
                            min_weight_fraction_leaf=0.0, n_estimators=100,
                            n_iter_no_change=None, presort='deprecated',
                            random_state=1234, subsample=1.0, tol=0.0001,
                            validation_fraction=0.1, verbose=0,
                            warm_start=False)]


# tune top base models
tuned_top_models = [tune_model(i) for i in top_models]


tuned_top_models

[RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                        class_weight='balanced_subsample', criterion='gini',
                        max_depth=10, max_features='sqrt', max_leaf_nodes=None,
                        max_samples=None, min_impurity_decrease=0,
                        min_impurity_split=None, min_samples_leaf=5,
                        min_samples_split=7, min_weight_fraction_leaf=0.0,
                        n_estimators=160, n_jobs=-1, oob_score=False,
                        random_state=1234, verbose=0, warm_start=False),
 LGBMClassifier(bagging_fraction=0.9, bagging_freq=4, boosting_type='gbdt',
                class_weight=None, colsample_bytree=1.0, feature_fraction=0.4,
                importance_type='split', learning_rate=0.05, max_depth=-1,
                min_child_samples=21, min_child_weight=0.001, min_split_gain=0.3,
                n_estimators=170, n_jobs=-1, num_leaves=2, objective=None,
                random_state=1234, reg_alpha=0.01, reg_lambda=3, silent=True,
                subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
 <catboost.core.CatBoostClassifier at 0x7f1b7214fe50>,
 XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=0.7, gamma=0, gpu_id=-1,
               importance_type='gain', interaction_constraints='',
               learning_rate=0.2, max_delta_step=0, max_depth=1,
               min_child_weight=3, missing=nan, monotone_constraints='()',
               n_estimators=200, n_jobs=-1, num_parallel_tree=1,
               objective='multi:softprob', random_state=1234, reg_alpha=0.5,
               reg_lambda=1e-06, scale_pos_weight=23.700000000000003,
               subsample=0.7, tree_method='auto', validate_parameters=1,
               verbosity=0),
 GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                            learning_rate=0.15, loss='deviance', max_depth=7,
                            max_features='sqrt', max_leaf_nodes=None,
                            min_impurity_decrease=0.05, min_impurity_split=None,
                            min_samples_leaf=4, min_samples_split=4,
                            min_weight_fraction_leaf=0.0, n_estimators=290,
                            n_iter_no_change=None, presort='deprecated',
                            random_state=1234, subsample=0.55, tol=0.0001,
                            validation_fraction=0.1, verbose=0,
                            warm_start=False)]


# ensemble top tuned models
bagged_top_models = [ensemble_model(i) for i in tuned_top_models]


bagged_top_models

[BaggingClassifier(base_estimator=RandomForestClassifier(bootstrap=True,
                                                         ccp_alpha=0.0,
                                                         class_weight='balanced_subsample',
                                                         criterion='gini',
                                                         max_depth=10,
                                                         max_features='sqrt',
                                                         max_leaf_nodes=None,
                                                         max_samples=None,
                                                         min_impurity_decrease=0,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=5,
                                                         min_samples_split=7,
                                                         min_weight_fraction_leaf=0.0,
                                                         n_estimators=160,
                                                         n_jobs=-1,
                                                         oob_score=False,
                                                         random_state=1234,
                                                         verbose=0,
                                                         warm_start=False),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=LGBMClassifier(bagging_fraction=0.9,
                                                 bagging_freq=4,
                                                 boosting_type='gbdt',
                                                 class_weight=None,
                                                 colsample_bytree=1.0,
                                                 feature_fraction=0.4,
                                                 importance_type='split',
                                                 learning_rate=0.05,
                                                 max_depth=-1,
                                                 min_child_samples=21,
                                                 min_child_weight=0.001,
                                                 min_split_gain=0.3,
                                                 n_estimators=170, n_jobs=-1,
                                                 num_leaves=2, objective=None,
                                                 random_state=1234,
                                                 reg_alpha=0.01, reg_lambda=3,
                                                 silent=True, subsample=1.0,
                                                 subsample_for_bin=200000,
                                                 subsample_freq=0),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=<catboost.core.CatBoostClassifier object at 0x7f1b71f7a990>,
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                                colsample_bylevel=1,
                                                colsample_bynode=1,
                                                colsample_bytree=0.7, gamma=0,
                                                gpu_id=-1,
                                                importance_type='gain',
                                                interaction_constraints='',
                                                learning_rate=0.2,
                                                max_delta_step=0, max_depth=1,
                                                min_child_weight=3, missing=nan,
                                                monotone_constraints='()',
                                                n_estimators=200, n_jobs=-1,
                                                num_parallel_tree=1,
                                                objective='multi:softprob',
                                                random_state=1234, reg_alpha=0.5,
                                                reg_lambda=1e-06,
                                                scale_pos_weight=23.700000000000003,
                                                subsample=0.7,
                                                tree_method='auto',
                                                validate_parameters=1,
                                                verbosity=0),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=GradientBoostingClassifier(ccp_alpha=0.0,
                                                             criterion='friedman_mse',
                                                             init=None,
                                                             learning_rate=0.15,
                                                             loss='deviance',
                                                             max_depth=7,
                                                             max_features='sqrt',
                                                             max_leaf_nodes=None,
                                                             min_impurity_decrease=0.05,
                                                             min_impurity_split=None,
                                                             min_samples_leaf=4,
                                                             min_samples_split=4,
                                                             min_weight_fraction_leaf=0.0,
                                                             n_estimators=290,
                                                             n_iter_no_change=None,
                                                             presort='deprecated',
                                                             random_state=1234,
                                                             subsample=0.55,
                                                             tol=0.0001,
                                                             validation_fraction=0.1,
                                                             verbose=0,
                                                             warm_start=False),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False)]


# select best model based on AUC
best1 = automl(optimize = 'AUC')
best2 = automl(optimize = 'Accuracy')
best3 = automl(optimize = 'Recall')
best4 = automl(optimize = 'Precision')
best5 = automl(optimize = 'F1')


print(); print("Best model based on AUC: ");       print(best1)
print(); print("Best model based on Accuracy: ");  print(best2)
print(); print("Best model based on Recall: ");    print(best3)
print(); print("Best model based on Precision: "); print(best4)
print(); print("Best model based on F1: ");        print(best5)

Best model based on AUC: 
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=1234, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

Best model based on Accuracy: 
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.7, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.2, max_delta_step=0, max_depth=1,
              min_child_weight=3, missing=nan, monotone_constraints='()',
              n_estimators=200, n_jobs=-1, num_parallel_tree=1,
              objective='multi:softprob', random_state=1234, reg_alpha=0.5,
              reg_lambda=1e-06, scale_pos_weight=23.700000000000003,
              subsample=0.7, tree_method='auto', validate_parameters=1,
              verbosity=0)

Best model based on Recall: 
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='gini',
                       max_depth=10, max_features='sqrt', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0,
                       min_impurity_split=None, min_samples_leaf=5,
                       min_samples_split=7, min_weight_fraction_leaf=0.0,
                       n_estimators=160, n_jobs=-1, oob_score=False,
                       random_state=1234, verbose=0, warm_start=False)

Best model based on Precision: 
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='gini',
                       max_depth=10, max_features='sqrt', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0,
                       min_impurity_split=None, min_samples_leaf=5,
                       min_samples_split=7, min_weight_fraction_leaf=0.0,
                       n_estimators=160, n_jobs=-1, oob_score=False,
                       random_state=1234, verbose=0, warm_start=False)

Best model based on F1: 
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='gini',
                       max_depth=10, max_features='sqrt', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0,
                       min_impurity_split=None, min_samples_leaf=5,
                       min_samples_split=7, min_weight_fraction_leaf=0.0,
                       n_estimators=160, n_jobs=-1, oob_score=False,
                       random_state=1234, verbose=0, warm_start=False)


plot_model(best2, plot = 'auc')


plot_model(best2, plot = 'confusion_matrix')

findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif


plot_model(best2, plot = 'learning')


save_model(best2,'Final_Model')

Transformation Pipeline and Model Successfully Saved

(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='Type',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strateg...
                                interaction_constraints='', learning_rate=0.2,
                                max_delta_step=0, max_depth=1,
                                min_child_weight=3, missing=nan,
                                monotone_constraints='()', n_estimators=200,
                                n_jobs=-1, num_parallel_tree=1,
                                objective='multi:softprob', random_state=1234,
                                reg_alpha=0.5, reg_lambda=1e-06,
                                scale_pos_weight=23.700000000000003,
                                subsample=0.7, tree_method='auto',
                                validate_parameters=1, verbosity=0)]],
          verbose=False),
 'Final_Model.pkl')


load_saved_model = load_model('Final_Model')

new_prediction = predict_model(load_saved_model, data=data_unseen)
new_prediction[["Label", "Score"]].head()

	RI	Na	Mg	Al	Si	K	Ca	Type
0	1.52101	13.64	4.49	1.10	71.78	0.06	8.75	1
1	1.51761	13.89	3.60	1.36	72.73	0.48	7.83	1
2	1.51618	13.53	3.55	1.54	72.99	0.39	7.78	1
3	1.51766	13.21	3.69	1.29	72.61	0.57	8.22	1
4	1.51742	13.27	3.62	1.24	73.08	0.55	8.07	1

	Description	Value
0	session_id	1234
1	Target	Type
2	Target Type	Multiclass
3	Label Encoded	None
4	Original Data	(160, 10)
5	Missing Values	False
6	Numeric Features	9
7	Categorical Features	0
8	Ordinal Features	False
9	High Cardinality Features	False
10	High Cardinality Method	None
11	Transformed Train Set	(111, 9)
12	Transformed Test Set	(49, 9)
13	Shuffle Train-Test	True
14	Stratify Train-Test	False
15	Fold Generator	StratifiedKFold
16	Fold Number	10
17	CPU Jobs	-1
18	Use GPU	False
19	Log Experiment	False
20	Experiment Name	clf-default-name
21	USI	55bc
22	Imputation Type	simple
23	Iterative Imputation Iteration	None
24	Numeric Imputer	mean
25	Iterative Imputation Numeric Model	None
26	Categorical Imputer	constant
27	Iterative Imputation Categorical Model	None
28	Unknown Categoricals Handling	least_frequent
29	Normalize	False
30	Normalize Method	None
31	Transformation	False
32	Transformation Method	None
33	PCA	False
34	PCA Method	None
35	PCA Components	None
36	Ignore Low Variance	False
37	Combine Rare Levels	False
38	Rare Level Threshold	None
39	Numeric Binning	False
40	Remove Outliers	False
41	Outliers Threshold	None
42	Remove Multicollinearity	False
43	Multicollinearity Threshold	None
44	Remove Perfect Collinearity	True
45	Clustering	False
46	Clustering Iteration	None
47	Polynomial Features	False
48	Polynomial Degree	None
49	Trignometry Features	False
50	Polynomial Threshold	None
51	Group Features	False
52	Feature Selection	False
53	Feature Selection Method	classic
54	Features Selection Threshold	None
55	Feature Interaction	False
56	Feature Ratio	False
57	Interaction Threshold	None
58	Fix Imbalance	False
59	Fix Imbalance Method	SMOTE

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC	TT (Sec)
et	Extra Trees Classifier	0.7848	0.1819	0.7472	0.7585	0.7616	0.7080	0.7204	0.2570
catboost	CatBoost Classifier	0.7826	0.1966	0.6767	0.7520	0.7542	0.7028	0.7187	1.7660
rf	Random Forest Classifier	0.7568	0.1886	0.6750	0.7342	0.7332	0.6677	0.6819	0.2660
xgboost	Extreme Gradient Boosting	0.7553	0.1927	0.6872	0.7523	0.7354	0.6702	0.6895	0.0860
lightgbm	Light Gradient Boosting Machine	0.7303	0.1873	0.6033	0.6778	0.6889	0.6312	0.6490	0.0250
gbc	Gradient Boosting Classifier	0.7129	0.1781	0.6600	0.6861	0.6837	0.6117	0.6290	0.1890
dt	Decision Tree Classifier	0.6492	0.1589	0.5803	0.6670	0.6266	0.5324	0.5504	0.0060
knn	K Neighbors Classifier	0.6318	0.1579	0.4714	0.5902	0.5845	0.4873	0.5144	0.0670
lda	Linear Discriminant Analysis	0.6220	0.1758	0.5614	0.5776	0.5778	0.4771	0.4959	0.0070
lr	Logistic Regression	0.5955	0.1601	0.5064	0.5348	0.5517	0.4383	0.4525	0.3210
ridge	Ridge Classifier	0.5955	0.0000	0.5031	0.5118	0.5263	0.4351	0.4659	0.0140
nb	Naive Bayes	0.4962	0.1670	0.5297	0.5860	0.4902	0.3673	0.3970	0.0060
svm	SVM - Linear Kernel	0.3788	0.0000	0.2833	0.1902	0.2418	0.1382	0.1928	0.0360
ada	Ada Boost Classifier	0.3697	0.1271	0.3150	0.1987	0.2335	0.1556	0.2334	0.0380
dummy	Dummy Classifier	0.2879	0.1000	0.1933	0.0838	0.1296	0.0000	0.0000	0.0050
qda	Quadratic Discriminant Analysis	0.0720	0.0000	0.1533	0.0065	0.0119	0.0000	0.0000	0.0070

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.6667	0.8778	0.7083	0.6306	0.6349	0.5556	0.5661
1	0.7273	0.0000	0.5500	0.6667	0.6818	0.6163	0.6390
2	0.9091	0.0000	0.9333	0.9273	0.9051	0.8764	0.8866
3	0.7273	0.0000	0.7000	0.6788	0.6851	0.6292	0.6519
4	0.7273	0.0000	0.7000	0.6788	0.6851	0.6292	0.6519
5	0.9091	0.0000	0.8000	0.8485	0.8727	0.8764	0.8866
6	0.8182	0.0000	0.8833	0.8182	0.8182	0.7556	0.7556
7	0.7273	0.0000	0.6833	0.6818	0.7013	0.6250	0.6326
8	0.8182	0.0000	0.6111	0.8364	0.8141	0.7556	0.7729
9	0.8182	0.9416	0.9028	0.8182	0.8182	0.7609	0.7609
Mean	0.7848	0.1819	0.7472	0.7585	0.7616	0.7080	0.7204
SD	0.0784	0.3642	0.1215	0.0965	0.0894	0.1072	0.1041

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.7500	0.8570	0.6250	0.6333	0.6852	0.6471	0.6621
1	0.7273	0.0000	0.5500	0.6848	0.6804	0.6163	0.6471
2	0.7273	0.0000	0.5694	0.7273	0.7273	0.6374	0.6444
3	0.7273	0.0000	0.7000	0.6788	0.6851	0.6292	0.6519
4	0.6364	0.0000	0.5000	0.5273	0.5682	0.4884	0.5078
5	0.9091	0.0000	0.8000	0.8485	0.8727	0.8764	0.8866
6	0.8182	0.0000	0.8833	0.8182	0.8182	0.7556	0.7556
7	0.7273	0.0000	0.6667	0.6515	0.6727	0.6163	0.6408
8	0.7273	0.0000	0.4444	0.6545	0.6869	0.6250	0.6402
9	0.6364	0.9682	0.4444	0.4727	0.5397	0.4943	0.5142
Mean	0.7386	0.1825	0.6183	0.6697	0.6936	0.6386	0.6551
SD	0.0755	0.3659	0.1392	0.1089	0.0943	0.1072	0.1029

Application of PyCaret Machine Learning: Notebook in Python for Beginners to Professionals : [ Classification ]¶

PyCaret Machine Learning Project – A Guide to build a multiclass classification model in PyCaret using Glass Type dataset¶

PyCaret for Beginners - A Guide to a machine learning model in Python and PyCaret¶

Load Dataset for Modelling¶

Setup environment for modelling purposes¶

Compare the results of different available algorithms¶

Build few individual models¶

Applying "et"¶

Plot the outcomes of the trained model¶

Applying "rf"¶

Plot the outcomes of the trained model¶

Predict on test / hold-out Sample¶

Finalise Model¶

Predict on unseen data¶

Save model¶

Load model¶

PyCaret AutoML in Practice¶

Load Dataset¶

Select best model based om metric¶

Plots¶

Save model¶

Load model¶

Summary¶

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.7500	0.8494	0.7500	0.6944	0.7071	0.6571	0.6776
1	0.6364	0.0000	0.4167	0.6515	0.5939	0.4943	0.5319
2	0.7273	0.0000	0.6944	0.8788	0.7896	0.6598	0.6815
3	0.6364	0.0000	0.6500	0.7532	0.5818	0.5165	0.6098
4	0.8182	0.0000	0.9000	0.8909	0.8106	0.7609	0.7957
5	0.9091	0.0000	0.8000	0.8485	0.8727	0.8764	0.8866
6	0.7273	0.0000	0.8167	0.7273	0.7152	0.6413	0.6484
7	0.6364	0.0000	0.6167	0.6273	0.6242	0.5056	0.5115
8	0.8182	0.0000	0.6111	0.8364	0.8141	0.7556	0.7729
9	0.8182	0.9659	0.7778	0.8182	0.7939	0.7660	0.7826
Mean	0.7477	0.1815	0.7033	0.7726	0.7303	0.6633	0.6898
SD	0.0889	0.3640	0.1303	0.0903	0.0969	0.1223	0.1142

	Model	Accuracy	Recall	Prec.	F1	Kappa	MCC	TT (Sec)
rf	Random Forest Classifier	0.7288	0.6286	0.6754	0.6920	0.6257	0.6388	0.2660
lightgbm	Light Gradient Boosting Machine	0.7205	0.5975	0.6728	0.6780	0.6138	0.6330	0.0240
catboost	CatBoost Classifier	0.7098	0.6311	0.6717	0.6797	0.6021	0.6140	1.6920
xgboost	Extreme Gradient Boosting	0.6841	0.6044	0.6526	0.6541	0.5694	0.5856	0.0860
gbc	Gradient Boosting Classifier	0.6833	0.5878	0.6476	0.6503	0.5654	0.5852	0.1940
et	Extra Trees Classifier	0.6833	0.6178	0.6610	0.6589	0.5675	0.5799	0.2480
knn	K Neighbors Classifier	0.6379	0.5597	0.5850	0.5991	0.4999	0.5142	0.0670
lda	Linear Discriminant Analysis	0.6045	0.5350	0.5835	0.5814	0.4606	0.4730	0.0060
ada	Ada Boost Classifier	0.5765	0.4992	0.3865	0.4482	0.3775	0.4913	0.0380
lr	Logistic Regression	0.5598	0.4689	0.4991	0.5102	0.3775	0.4037	0.2910
ridge	Ridge Classifier	0.5598	0.4306	0.4945	0.5030	0.3676	0.3996	0.0050
dt	Decision Tree Classifier	0.5591	0.4856	0.6020	0.5576	0.4161	0.4351	0.0060
nb	Naive Bayes	0.4318	0.5331	0.5087	0.4179	0.3030	0.3563	0.0060
dummy	Dummy Classifier	0.3697	0.2050	0.1376	0.2002	0.0000	0.0000	0.0050
svm	SVM - Linear Kernel	0.3242	0.3150	0.1983	0.2269	0.1635	0.2437	0.0360
qda	Quadratic Discriminant Analysis	0.2492	0.2817	0.1757	0.1704	0.1230	0.1444	0.0080

	Accuracy	Recall	Prec.	F1	Kappa	MCC
0	0.7500	0.6000	0.6389	0.6833	0.6571	0.6786
1	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
2	0.7273	0.5500	0.6045	0.6580	0.6118	0.6287
3	0.6364	0.4667	0.5606	0.5818	0.4884	0.5078
4	0.5455	0.3750	0.5000	0.4848	0.3956	0.4353
5	0.7273	0.5500	0.6045	0.6580	0.6118	0.6287
6	0.6364	0.4833	0.5364	0.5801	0.4824	0.4957
7	0.6364	0.6167	0.5909	0.6000	0.4884	0.5078
8	0.6364	0.6333	0.5909	0.6104	0.5000	0.5060
9	0.7273	0.6000	0.5455	0.6187	0.6118	0.6551
Mean	0.7023	0.5875	0.6172	0.6475	0.5847	0.6044
SD	0.1161	0.1571	0.1332	0.1287	0.1587	0.1531