# Suppress warnings in Jupyter Notebooks
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# provide the dataset name as shown in pycaret
whichDataset = 'satellite'


from pycaret.datasets import get_data
dataset = get_data(whichDataset)


dataset.shape

(6435, 37)


dataset.columns.to_list()

['Attribute1',
 'Attribute2',
 'Attribute3',
 'Attribute4',
 'Attribute5',
 'Attribute6',
 'Attribute7',
 'Attribute8',
 'Attribute9',
 'Attribute10',
 'Attribute11',
 'Attribute12',
 'Attribute13',
 'Attribute14',
 'Attribute15',
 'Attribute16',
 'Attribute17',
 'Attribute18',
 'Attribute19',
 'Attribute20',
 'Attribute21',
 'Attribute22',
 'Attribute23',
 'Attribute24',
 'Attribute25',
 'Attribute26',
 'Attribute27',
 'Attribute28',
 'Attribute29',
 'Attribute30',
 'Attribute31',
 'Attribute32',
 'Attribute33',
 'Attribute34',
 'Attribute35',
 'Attribute36',
 'Class']


data = dataset.sample(frac=0.75, random_state=1234)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (4826, 37)
Unseen Data For Predictions: (1609, 37)


env_setup = setup(data = data, target = 'Class', session_id=1234)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# --------------------------------------
best_model = compare_models()
# --------------------------------------


lightgbm = create_model('lightgbm')


tuned_lightgbm = tune_model(lightgbm)


print(tuned_lightgbm)

LGBMClassifier(bagging_fraction=0.9, bagging_freq=0, boosting_type='gbdt',
               class_weight=None, colsample_bytree=1.0, feature_fraction=1.0,
               importance_type='split', learning_rate=0.3, max_depth=-1,
               min_child_samples=61, min_child_weight=0.001, min_split_gain=0.3,
               n_estimators=190, n_jobs=-1, num_leaves=20, objective=None,
               random_state=1234, reg_alpha=0.15, reg_lambda=0.0001,
               silent=True, subsample=1.0, subsample_for_bin=200000,
               subsample_freq=0)


plot_model(tuned_lightgbm, plot = 'auc')


plot_model(tuned_lightgbm, plot = 'pr')


plot_model(tuned_lightgbm, plot='feature')


plot_model(tuned_lightgbm, plot = 'confusion_matrix')


plot_model(tuned_lightgbm, plot = 'learning')


#plot_model(tuned_lightgbm, plot = 'threshold')


plot_model(tuned_lightgbm, plot = 'boundary')


plot_model(tuned_lightgbm, plot = 'error')


catboost = create_model('catboost')


tuned_catboost = tune_model(catboost)


print(tuned_catboost)

<catboost.core.CatBoostClassifier object at 0x7f8350631490>


plot_model(tuned_catboost, plot = 'auc')


plot_model(tuned_catboost, plot = 'pr')


plot_model(tuned_catboost, plot = 'feature')


plot_model(tuned_catboost, plot = 'confusion_matrix')


plot_model(tuned_catboost, plot = 'learning')


#plot_model(tuned_rf, plot = 'threshold')


plot_model(tuned_catboost, plot = 'boundary')


plot_model(tuned_catboost, plot = 'error')


predict_model(tuned_lightgbm);


predict_model(tuned_catboost);


final_catboost = finalize_model(tuned_catboost);


# Final model parameters for deployment
print(final_catboost)

<catboost.core.CatBoostClassifier object at 0x7f8350136310>


predict_model(final_catboost);


unseen_predictions = predict_model(final_catboost, data=data_unseen)
unseen_predictions.head()


from pycaret.utils import check_metric
check_metric(unseen_predictions['Class'], unseen_predictions['Label'], metric = 'Accuracy')


check_metric(unseen_predictions['Class'], unseen_predictions['Label'], metric = 'Recall')


check_metric(unseen_predictions['Class'], unseen_predictions['Label'], metric = 'Precision')


#check_metric(unseen_predictions['Class'], unseen_predictions['Label'], metric = 'AUC')


check_metric(unseen_predictions['Class'], unseen_predictions['Label'], metric = 'F1')


save_model(final_catboost,'Final_Model')


load_saved_model = load_model('Final_Model')


new_prediction = predict_model(load_saved_model, data=data_unseen)


#new_prediction[["Label", "Score"]].head(10)


import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# provide the dataset name as shown in pycaret
whichDataset = 'satellite'


from pycaret.datasets import get_data
dataset = get_data(whichDataset)

data = dataset.sample(frac=0.70, random_state=421)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (4504, 37)
Unseen Data For Predictions: (1931, 37)


clf = setup(data = data, target = 'Class', session_id=1234)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# compare all baseline models and select top 5
top_models = compare_models(n_select = 5)


top_models

[<catboost.core.CatBoostClassifier at 0x7f8350072250>,
 LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
                importance_type='split', learning_rate=0.1, max_depth=-1,
                min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
                n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
                random_state=1234, reg_alpha=0.0, reg_lambda=0.0, silent=True,
                subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
 ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                      criterion='gini', max_depth=None, max_features='auto',
                      max_leaf_nodes=None, max_samples=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                      oob_score=False, random_state=1234, verbose=0,
                      warm_start=False),
 RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                        criterion='gini', max_depth=None, max_features='auto',
                        max_leaf_nodes=None, max_samples=None,
                        min_impurity_decrease=0.0, min_impurity_split=None,
                        min_samples_leaf=1, min_samples_split=2,
                        min_weight_fraction_leaf=0.0, n_estimators=100,
                        n_jobs=-1, oob_score=False, random_state=1234, verbose=0,
                        warm_start=False),
 XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
               importance_type='gain', interaction_constraints='',
               learning_rate=0.300000012, max_delta_step=0, max_depth=6,
               min_child_weight=1, missing=nan, monotone_constraints='()',
               n_estimators=100, n_jobs=-1, num_parallel_tree=1,
               objective='multi:softprob', random_state=1234, reg_alpha=0,
               reg_lambda=1, scale_pos_weight=None, subsample=1,
               tree_method='auto', validate_parameters=1, verbosity=0)]


# tune top base models
tuned_top_models = [tune_model(i) for i in top_models]


tuned_top_models

[<catboost.core.CatBoostClassifier at 0x7f833a7ce150>,
 LGBMClassifier(bagging_fraction=0.6, bagging_freq=6, boosting_type='gbdt',
                class_weight=None, colsample_bytree=1.0, feature_fraction=1.0,
                importance_type='split', learning_rate=0.4, max_depth=-1,
                min_child_samples=61, min_child_weight=0.001, min_split_gain=0.3,
                n_estimators=110, n_jobs=-1, num_leaves=70, objective=None,
                random_state=1234, reg_alpha=0.2, reg_lambda=0.15, silent=True,
                subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
 ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight={},
                      criterion='entropy', max_depth=9, max_features=1.0,
                      max_leaf_nodes=None, max_samples=None,
                      min_impurity_decrease=0.0005, min_impurity_split=None,
                      min_samples_leaf=5, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=130, n_jobs=-1,
                      oob_score=False, random_state=1234, verbose=0,
                      warm_start=False),
 RandomForestClassifier(bootstrap=False, ccp_alpha=0.0, class_weight='balanced',
                        criterion='entropy', max_depth=10, max_features='sqrt',
                        max_leaf_nodes=None, max_samples=None,
                        min_impurity_decrease=0.001, min_impurity_split=None,
                        min_samples_leaf=6, min_samples_split=10,
                        min_weight_fraction_leaf=0.0, n_estimators=140,
                        n_jobs=-1, oob_score=False, random_state=1234, verbose=0,
                        warm_start=False),
 XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=0.9, gamma=0, gpu_id=-1,
               importance_type='gain', interaction_constraints='',
               learning_rate=0.2, max_delta_step=0, max_depth=11,
               min_child_weight=1, missing=nan, monotone_constraints='()',
               n_estimators=280, n_jobs=-1, num_parallel_tree=1,
               objective='multi:softprob', random_state=1234, reg_alpha=1e-06,
               reg_lambda=0.05, scale_pos_weight=46.6, subsample=0.7,
               tree_method='auto', validate_parameters=1, verbosity=0)]


# ensemble top tuned models
bagged_top_models = [ensemble_model(i) for i in tuned_top_models]


bagged_top_models

[BaggingClassifier(base_estimator=<catboost.core.CatBoostClassifier object at 0x7f833a79be90>,
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=LGBMClassifier(bagging_fraction=0.6,
                                                 bagging_freq=6,
                                                 boosting_type='gbdt',
                                                 class_weight=None,
                                                 colsample_bytree=1.0,
                                                 feature_fraction=1.0,
                                                 importance_type='split',
                                                 learning_rate=0.4, max_depth=-1,
                                                 min_child_samples=61,
                                                 min_child_weight=0.001,
                                                 min_split_gain=0.3,
                                                 n_estimators=110, n_jobs=-1,
                                                 num_leaves=70, objective=None,
                                                 random_state=1234,
                                                 reg_alpha=0.2, reg_lambda=0.15,
                                                 silent=True, subsample=1.0,
                                                 subsample_for_bin=200000,
                                                 subsample_freq=0),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=ExtraTreesClassifier(bootstrap=False,
                                                       ccp_alpha=0.0,
                                                       class_weight={},
                                                       criterion='entropy',
                                                       max_depth=9,
                                                       max_features=1.0,
                                                       max_leaf_nodes=None,
                                                       max_samples=None,
                                                       min_impurity_decrease=0.0005,
                                                       min_impurity_split=None,
                                                       min_samples_leaf=5,
                                                       min_samples_split=2,
                                                       min_weight_fraction_leaf=0.0,
                                                       n_estimators=130,
                                                       n_jobs=-1,
                                                       oob_score=False,
                                                       random_state=1234,
                                                       verbose=0,
                                                       warm_start=False),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=RandomForestClassifier(bootstrap=False,
                                                         ccp_alpha=0.0,
                                                         class_weight='balanced',
                                                         criterion='entropy',
                                                         max_depth=10,
                                                         max_features='sqrt',
                                                         max_leaf_nodes=None,
                                                         max_samples=None,
                                                         min_impurity_decrease=0.001,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=6,
                                                         min_samples_split=10,
                                                         min_weight_fraction_leaf=0.0,
                                                         n_estimators=140,
                                                         n_jobs=-1,
                                                         oob_score=False,
                                                         random_state=1234,
                                                         verbose=0,
                                                         warm_start=False),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                                colsample_bylevel=1,
                                                colsample_bynode=1,
                                                colsample_bytree=0.9, gamma=0,
                                                gpu_id=-1,
                                                importance_type='gain',
                                                interaction_constraints='',
                                                learning_rate=0.2,
                                                max_delta_step=0, max_depth=11,
                                                min_child_weight=1, missing=nan,
                                                monotone_constraints='()',
                                                n_estimators=280, n_jobs=-1,
                                                num_parallel_tree=1,
                                                objective='multi:softprob',
                                                random_state=1234,
                                                reg_alpha=1e-06, reg_lambda=0.05,
                                                scale_pos_weight=46.6,
                                                subsample=0.7,
                                                tree_method='auto',
                                                validate_parameters=1,
                                                verbosity=0),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False)]


# select best model based on AUC
best1 = automl(optimize = 'AUC')
best2 = automl(optimize = 'Accuracy')
best3 = automl(optimize = 'Recall')
best4 = automl(optimize = 'Precision')
best5 = automl(optimize = 'F1')


print(); print("Best model based on AUC: ");       print(best1)
print(); print("Best model based on Accuracy: ");  print(best2)
print(); print("Best model based on Recall: ");    print(best3)
print(); print("Best model based on Precision: "); print(best4)
print(); print("Best model based on F1: ");        print(best5)

Best model based on AUC: 
<catboost.core.CatBoostClassifier object at 0x7f833a596fd0>

Best model based on Accuracy: 
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.9, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.2, max_delta_step=0, max_depth=11,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=280, n_jobs=-1, num_parallel_tree=1,
              objective='multi:softprob', random_state=1234, reg_alpha=1e-06,
              reg_lambda=0.05, scale_pos_weight=46.6, subsample=0.7,
              tree_method='auto', validate_parameters=1, verbosity=0)

Best model based on Recall: 
LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.1, max_depth=-1,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
               random_state=1234, reg_alpha=0.0, reg_lambda=0.0, silent=True,
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

Best model based on Precision: 
<catboost.core.CatBoostClassifier object at 0x7f833a4bca50>

Best model based on F1: 
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.9, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.2, max_delta_step=0, max_depth=11,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=280, n_jobs=-1, num_parallel_tree=1,
              objective='multi:softprob', random_state=1234, reg_alpha=1e-06,
              reg_lambda=0.05, scale_pos_weight=46.6, subsample=0.7,
              tree_method='auto', validate_parameters=1, verbosity=0)
[LightGBM] [Warning] bagging_fraction is set=0.6, subsample=1.0 will be ignored. Current value: bagging_fraction=0.6
[LightGBM] [Warning] feature_fraction is set=1.0, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=1.0
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6
[LightGBM] [Warning] bagging_fraction is set=0.6, subsample=1.0 will be ignored. Current value: bagging_fraction=0.6
[LightGBM] [Warning] feature_fraction is set=1.0, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=1.0
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6
[LightGBM] [Warning] bagging_fraction is set=0.6, subsample=1.0 will be ignored. Current value: bagging_fraction=0.6
[LightGBM] [Warning] feature_fraction is set=1.0, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=1.0
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6
[LightGBM] [Warning] bagging_fraction is set=0.6, subsample=1.0 will be ignored. Current value: bagging_fraction=0.6
[LightGBM] [Warning] feature_fraction is set=1.0, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=1.0
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6
[LightGBM] [Warning] bagging_fraction is set=0.6, subsample=1.0 will be ignored. Current value: bagging_fraction=0.6
[LightGBM] [Warning] feature_fraction is set=1.0, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=1.0
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6
[LightGBM] [Warning] bagging_fraction is set=0.6, subsample=1.0 will be ignored. Current value: bagging_fraction=0.6
[LightGBM] [Warning] feature_fraction is set=1.0, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=1.0
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6
[LightGBM] [Warning] bagging_fraction is set=0.6, subsample=1.0 will be ignored. Current value: bagging_fraction=0.6
[LightGBM] [Warning] feature_fraction is set=1.0, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=1.0
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6
[LightGBM] [Warning] bagging_fraction is set=0.6, subsample=1.0 will be ignored. Current value: bagging_fraction=0.6
[LightGBM] [Warning] feature_fraction is set=1.0, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=1.0
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6
[LightGBM] [Warning] bagging_fraction is set=0.6, subsample=1.0 will be ignored. Current value: bagging_fraction=0.6
[LightGBM] [Warning] feature_fraction is set=1.0, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=1.0
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6
[LightGBM] [Warning] bagging_fraction is set=0.6, subsample=1.0 will be ignored. Current value: bagging_fraction=0.6
[LightGBM] [Warning] feature_fraction is set=1.0, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=1.0
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6


plot_model(best2, plot = 'auc')


plot_model(best2, plot = 'confusion_matrix')


plot_model(best2, plot = 'learning')


save_model(best2,'Final_Model')

Transformation Pipeline and Model Successfully Saved

(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='Class',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strate...
                                interaction_constraints='', learning_rate=0.2,
                                max_delta_step=0, max_depth=11,
                                min_child_weight=1, missing=nan,
                                monotone_constraints='()', n_estimators=280,
                                n_jobs=-1, num_parallel_tree=1,
                                objective='multi:softprob', random_state=1234,
                                reg_alpha=1e-06, reg_lambda=0.05,
                                scale_pos_weight=46.6, subsample=0.7,
                                tree_method='auto', validate_parameters=1,
                                verbosity=0)]],
          verbose=False),
 'Final_Model.pkl')


load_saved_model = load_model('Final_Model')

new_prediction = predict_model(load_saved_model, data=data_unseen)
new_prediction[["Label", "Score"]].head()

	Attribute1	Attribute2	Attribute3	Attribute4	Attribute5	Attribute6	Attribute7	Attribute8	Attribute9	Attribute10	Attribute11	Attribute12	Attribute13	Attribute14	Attribute15	Attribute16	Attribute17	Attribute18	Attribute19	Attribute20	Attribute21	Attribute22	Attribute23	Attribute24	Attribute25	Attribute26	Attribute27	Attribute28	Attribute29	Attribute30	Attribute31	Attribute32	Attribute33	Attribute34	Attribute35	Attribute36	Class
0	80	102	102	79	76	102	102	79	76	102	106	83	76	99	108	85	76	103	118	88	80	107	118	88	79	107	109	87	79	107	109	87	79	107	113	87	3
1	76	102	102	79	76	102	106	83	76	102	106	87	76	103	118	88	80	107	118	88	80	112	118	88	79	107	109	87	79	107	113	87	79	103	104	83	3
2	80	98	106	79	76	94	102	76	76	94	102	76	80	107	113	85	80	95	100	78	80	95	100	78	79	103	104	79	79	95	100	79	79	95	96	75	4
3	76	94	102	76	76	94	102	76	76	94	102	76	80	95	100	78	80	95	100	78	80	91	100	78	79	95	100	79	79	95	96	75	79	95	100	75	4
4	76	94	102	76	76	94	102	76	76	89	94	76	80	95	100	78	80	91	100	78	80	91	100	74	79	95	96	75	79	95	100	75	75	95	100	79	4

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC	TT (Sec)
lightgbm	Light Gradient Boosting Machine	0.9195	0.9911	0.8912	0.9196	0.9173	0.9000	0.9006	0.4120
catboost	CatBoost Classifier	0.9138	0.9919	0.8835	0.9123	0.9110	0.8929	0.8935	10.1880
xgboost	Extreme Gradient Boosting	0.9100	0.9908	0.8796	0.9091	0.9076	0.8882	0.8888	1.3480
et	Extra Trees Classifier	0.9079	0.9915	0.8725	0.9064	0.9045	0.8854	0.8862	0.3200
rf	Random Forest Classifier	0.9032	0.9898	0.8674	0.9018	0.8994	0.8795	0.8804	0.4360
knn	K Neighbors Classifier	0.8928	0.9823	0.8637	0.8933	0.8920	0.8671	0.8675	0.0780
gbc	Gradient Boosting Classifier	0.8911	0.9884	0.8578	0.8897	0.8886	0.8647	0.8653	2.8260
qda	Quadratic Discriminant Analysis	0.8520	0.9709	0.7970	0.8400	0.8381	0.8155	0.8176	0.0320
lda	Linear Discriminant Analysis	0.8440	0.9785	0.7798	0.8398	0.8366	0.8052	0.8072	0.0170
dt	Decision Tree Classifier	0.8390	0.9027	0.8040	0.8418	0.8392	0.8007	0.8011	0.0230
lr	Logistic Regression	0.8067	0.9594	0.7397	0.7885	0.7879	0.7581	0.7617	0.8460
nb	Naive Bayes	0.7948	0.9624	0.7813	0.8241	0.8041	0.7487	0.7510	0.0090
ridge	Ridge Classifier	0.7679	0.0000	0.6428	0.7691	0.7015	0.7040	0.7207	0.0340
svm	SVM - Linear Kernel	0.6631	0.0000	0.6262	0.6986	0.6256	0.5853	0.6174	0.0780
ada	Ada Boost Classifier	0.6294	0.8620	0.5727	0.6246	0.6062	0.5406	0.5530	0.1360
dummy	Dummy Classifier	0.2386	0.5000	0.1667	0.0569	0.0919	0.0000	0.0000	0.0070

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.9201	0.9921	0.8919	0.9185	0.9184	0.9008	0.9012
1	0.9172	0.9891	0.8938	0.9206	0.9170	0.8972	0.8978
2	0.9320	0.9912	0.9028	0.9323	0.9289	0.9153	0.9161
3	0.9260	0.9945	0.9132	0.9271	0.9265	0.9085	0.9086
4	0.9172	0.9887	0.8885	0.9146	0.9141	0.8971	0.8976
5	0.9112	0.9909	0.8774	0.9103	0.9081	0.8897	0.8905
6	0.9201	0.9937	0.8822	0.9274	0.9162	0.9004	0.9023
7	0.9083	0.9910	0.8846	0.9059	0.9063	0.8864	0.8867
8	0.9407	0.9915	0.9215	0.9411	0.9403	0.9264	0.9266
9	0.9021	0.9887	0.8562	0.8986	0.8978	0.8779	0.8788
Mean	0.9195	0.9911	0.8912	0.9196	0.9173	0.9000	0.9006
SD	0.0107	0.0019	0.0176	0.0122	0.0116	0.0134	0.0133

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.9142	0.9915	0.8865	0.9126	0.9126	0.8934	0.8937
1	0.9142	0.9908	0.8917	0.9178	0.9138	0.8934	0.8940
2	0.9172	0.9897	0.8789	0.9135	0.9119	0.8968	0.8977
3	0.9112	0.9942	0.8896	0.9122	0.9115	0.8900	0.8902
4	0.8964	0.9862	0.8707	0.8933	0.8942	0.8716	0.8718
5	0.8935	0.9891	0.8540	0.8883	0.8891	0.8677	0.8684
6	0.9172	0.9943	0.8828	0.9184	0.9131	0.8970	0.8981
7	0.9024	0.9891	0.8816	0.9017	0.9012	0.8792	0.8797
8	0.9080	0.9902	0.8827	0.9059	0.9060	0.8857	0.8861
9	0.8843	0.9857	0.8327	0.8780	0.8769	0.8556	0.8570
Mean	0.9059	0.9901	0.8751	0.9042	0.9030	0.8830	0.8837
SD	0.0107	0.0027	0.0175	0.0129	0.0120	0.0134	0.0133

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.9142	0.9942	0.8804	0.9126	0.9117	0.8932	0.8937
1	0.9290	0.9920	0.9131	0.9311	0.9291	0.9120	0.9124
2	0.9024	0.9916	0.8644	0.8964	0.8975	0.8785	0.8791
3	0.9290	0.9944	0.9140	0.9298	0.9293	0.9121	0.9122
4	0.9083	0.9894	0.8771	0.9080	0.9051	0.8859	0.8867
5	0.9142	0.9911	0.8818	0.9131	0.9111	0.8934	0.8944
6	0.9172	0.9935	0.8783	0.9143	0.9115	0.8969	0.8980
7	0.9142	0.9924	0.8912	0.9125	0.9127	0.8935	0.8937
8	0.9080	0.9910	0.8768	0.9066	0.9051	0.8855	0.8862
9	0.9021	0.9899	0.8585	0.8989	0.8972	0.8778	0.8789
Mean	0.9138	0.9919	0.8835	0.9123	0.9110	0.8929	0.8935
SD	0.0090	0.0016	0.0173	0.0107	0.0105	0.0114	0.0112

Application of PyCaret Machine Learning: Notebook in Python for Beginners to Professionals : [ Classification ]¶

PyCaret Machine Learning Project – A Guide to build a multiclass classification model in PyCaret using Satellite dataset¶

PyCaret for Beginners - A Guide to a machine learning model in Python and PyCaret¶

Load Dataset for Modelling¶

Setup environment for modelling purposes¶

Compare the results of different available algorithms¶

Build few individual models¶

Applying "lightgbm"¶

Plot the outcomes of the trained model¶

Applying "catboost"¶

Plot the outcomes of the trained model¶

Predict on test / hold-out Sample¶

Finalise Model¶

Predict on unseen data¶

Save model¶

Load model¶

PyCaret AutoML in Practice¶

Load Dataset¶

Select best model based om metric¶

Plots¶

Save model¶

Load model¶

Summary¶

	Attribute1	Attribute2	Attribute3	Attribute4	Attribute5	Attribute6	Attribute7	Attribute8	Attribute9	Attribute10	Attribute11	Attribute12	Attribute13	Attribute14	Attribute15	Attribute16	Attribute17	Attribute18	Attribute19	Attribute20	Attribute21	Attribute22	Attribute23	Attribute24	Attribute25	Attribute26	Attribute27	Attribute28	Attribute29	Attribute30	Attribute31	Attribute32	Attribute33	Attribute34	Attribute35	Attribute36	Class
0	80	102	102	79	76	102	102	79	76	102	106	83	76	99	108	85	76	103	118	88	80	107	118	88	79	107	109	87	79	107	109	87	79	107	113	87	3
1	76	102	102	79	76	102	106	83	76	102	106	87	76	103	118	88	80	107	118	88	80	112	118	88	79	107	109	87	79	107	113	87	79	103	104	83	3
2	80	98	106	79	76	94	102	76	76	94	102	76	80	107	113	85	80	95	100	78	80	95	100	78	79	103	104	79	79	95	100	79	79	95	96	75	4
3	76	94	102	76	76	94	102	76	76	94	102	76	80	95	100	78	80	95	100	78	80	91	100	78	79	95	100	79	79	95	96	75	79	95	100	75	4
4	76	94	102	76	76	94	102	76	76	89	94	76	80	95	100	78	80	91	100	78	80	91	100	74	79	95	96	75	79	95	100	75	75	95	100	79	4

	Description	Value
0	session_id	1234
1	Target	Class
2	Target Type	Multiclass
3	Label Encoded	None
4	Original Data	(4826, 37)
5	Missing Values	False
6	Numeric Features	36
7	Categorical Features	0
8	Ordinal Features	False
9	High Cardinality Features	False
10	High Cardinality Method	None
11	Transformed Train Set	(3378, 36)
12	Transformed Test Set	(1448, 36)
13	Shuffle Train-Test	True
14	Stratify Train-Test	False
15	Fold Generator	StratifiedKFold
16	Fold Number	10
17	CPU Jobs	-1
18	Use GPU	False
19	Log Experiment	False
20	Experiment Name	clf-default-name
21	USI	3d98
22	Imputation Type	simple
23	Iterative Imputation Iteration	None
24	Numeric Imputer	mean
25	Iterative Imputation Numeric Model	None
26	Categorical Imputer	constant
27	Iterative Imputation Categorical Model	None
28	Unknown Categoricals Handling	least_frequent
29	Normalize	False
30	Normalize Method	None
31	Transformation	False
32	Transformation Method	None
33	PCA	False
34	PCA Method	None
35	PCA Components	None
36	Ignore Low Variance	False
37	Combine Rare Levels	False
38	Rare Level Threshold	None
39	Numeric Binning	False
40	Remove Outliers	False
41	Outliers Threshold	None
42	Remove Multicollinearity	False
43	Multicollinearity Threshold	None
44	Remove Perfect Collinearity	True
45	Clustering	False
46	Clustering Iteration	None
47	Polynomial Features	False
48	Polynomial Degree	None
49	Trignometry Features	False
50	Polynomial Threshold	None
51	Group Features	False
52	Feature Selection	False
53	Feature Selection Method	classic
54	Features Selection Threshold	None
55	Feature Interaction	False
56	Feature Ratio	False
57	Interaction Threshold	None
58	Fix Imbalance	False
59	Fix Imbalance Method	SMOTE

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.9142	0.9919	0.8755	0.9131	0.9098	0.8930	0.8940
1	0.9112	0.9893	0.8871	0.9123	0.9104	0.8897	0.8903
2	0.9112	0.9875	0.8781	0.9089	0.9072	0.8895	0.8903
3	0.9053	0.9924	0.8867	0.9060	0.9053	0.8828	0.8830
4	0.8994	0.9876	0.8658	0.8977	0.8959	0.8749	0.8757
5	0.9112	0.9883	0.8772	0.9091	0.9080	0.8897	0.8904
6	0.9112	0.9926	0.8701	0.9085	0.9065	0.8896	0.8905
7	0.8935	0.9890	0.8657	0.8907	0.8907	0.8679	0.8683
8	0.8902	0.9851	0.8542	0.8886	0.8867	0.8632	0.8640
9	0.8902	0.9879	0.8415	0.8869	0.8846	0.8629	0.8642
Mean	0.9038	0.9892	0.8702	0.9022	0.9005	0.8803	0.8811
SD	0.0091	0.0023	0.0135	0.0097	0.0095	0.0114	0.0113

	Attribute1	Attribute2	Attribute3	Attribute4	Attribute5	Attribute6	Attribute7	Attribute8	Attribute9	Attribute10	...	Attribute28	Attribute29	Attribute30	Attribute31	Attribute32	Attribute33	Attribute34	Attribute35	Attribute36	Class
0	80	102	102	79	76	102	102	79	76	102	...	87	79	107	109	87	79	107	113	87	3
1	76	102	102	79	76	102	106	83	76	102	...	87	79	107	113	87	79	103	104	83	3
2	80	98	106	79	76	94	102	76	76	94	...	79	79	95	100	79	79	95	96	75	4
3	76	94	102	76	76	94	102	76	76	94	...	79	79	95	96	75	79	95	100	75	4
4	76	94	102	76	76	94	102	76	76	89	...	75	79	95	100	75	75	95	100	79	4

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC	TT (Sec)
catboost	CatBoost Classifier	0.9150	0.9917	0.8948	0.9150	0.9136	0.8948	0.8953	10.0180
lightgbm	Light Gradient Boosting Machine	0.9140	0.9908	0.8955	0.9137	0.9128	0.8937	0.8941	0.3760
et	Extra Trees Classifier	0.9118	0.9907	0.8884	0.9105	0.9098	0.8908	0.8913	0.3190
rf	Random Forest Classifier	0.9077	0.9896	0.8843	0.9066	0.9059	0.8857	0.8862	0.4180
xgboost	Extreme Gradient Boosting	0.9074	0.9903	0.8868	0.9068	0.9060	0.8854	0.8858	1.2500
knn	K Neighbors Classifier	0.9013	0.9805	0.8803	0.9019	0.9005	0.8780	0.8784	0.0760
gbc	Gradient Boosting Classifier	0.8950	0.9881	0.8703	0.8939	0.8932	0.8701	0.8705	2.6670
qda	Quadratic Discriminant Analysis	0.8480	0.9712	0.7948	0.8338	0.8277	0.8107	0.8144	0.0110
lda	Linear Discriminant Analysis	0.8360	0.9766	0.7790	0.8329	0.8285	0.7957	0.7982	0.0150
dt	Decision Tree Classifier	0.8344	0.9002	0.8107	0.8388	0.8354	0.7959	0.7964	0.0230
lr	Logistic Regression	0.7989	0.9575	0.7402	0.7853	0.7796	0.7491	0.7533	0.5890
nb	Naive Bayes	0.7931	0.9608	0.7852	0.8184	0.8005	0.7471	0.7494	0.0090
ridge	Ridge Classifier	0.7608	0.0000	0.6563	0.7598	0.7027	0.6967	0.7129	0.0080
svm	SVM - Linear Kernel	0.6618	0.0000	0.6256	0.7520	0.6235	0.5828	0.6194	0.0640
ada	Ada Boost Classifier	0.5536	0.8416	0.5485	0.5519	0.5073	0.4552	0.4762	0.1290
dummy	Dummy Classifier	0.2360	0.5000	0.1667	0.0557	0.0902	0.0000	0.0000	0.0070

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.9146	0.9873	0.8957	0.9135	0.9131	0.8943	0.8947
1	0.9399	0.9931	0.9237	0.9427	0.9389	0.9256	0.9266
2	0.9143	0.9902	0.8990	0.9142	0.9131	0.8942	0.8946
3	0.9111	0.9920	0.8883	0.9108	0.9102	0.8899	0.8903
4	0.9302	0.9926	0.9067	0.9302	0.9288	0.9135	0.9139
5	0.9048	0.9895	0.8779	0.9043	0.9034	0.8821	0.8826
6	0.9079	0.9890	0.8875	0.9077	0.9073	0.8861	0.8864
7	0.9397	0.9943	0.9205	0.9387	0.9380	0.9254	0.9258
8	0.9016	0.9899	0.8852	0.9012	0.9003	0.8787	0.8792
9	0.8889	0.9884	0.8622	0.8845	0.8854	0.8625	0.8629
Mean	0.9153	0.9906	0.8947	0.9148	0.9138	0.8952	0.8957
SD	0.0158	0.0021	0.0179	0.0170	0.0161	0.0195	0.0196

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.9114	0.9872	0.8879	0.9111	0.9093	0.8902	0.8909
1	0.9335	0.9941	0.9192	0.9366	0.9325	0.9178	0.9188
2	0.9016	0.9898	0.8832	0.9010	0.9006	0.8785	0.8788
3	0.8857	0.9906	0.8522	0.8842	0.8837	0.8583	0.8588
4	0.9238	0.9913	0.9015	0.9234	0.9219	0.9056	0.9061
5	0.8984	0.9891	0.8707	0.8976	0.8968	0.8742	0.8747
6	0.8889	0.9884	0.8713	0.8884	0.8885	0.8629	0.8629
7	0.9175	0.9947	0.8909	0.9162	0.9148	0.8977	0.8984
8	0.8921	0.9884	0.8675	0.8906	0.8897	0.8666	0.8673
9	0.8952	0.9883	0.8641	0.8925	0.8913	0.8700	0.8709
Mean	0.9048	0.9902	0.8809	0.9042	0.9029	0.8822	0.8828
SD	0.0152	0.0024	0.0188	0.0162	0.0153	0.0188	0.0189

	Attribute1	Attribute2	Attribute3	Attribute4	Attribute5	Attribute6	Attribute7	Attribute8	Attribute9	Attribute10	Attribute11	Attribute12	Attribute13	Attribute14	Attribute15	Attribute16	Attribute17	Attribute18	Attribute19	Attribute20	Attribute21	Attribute22	Attribute23	Attribute24	Attribute25	Attribute26	Attribute27	Attribute28	Attribute29	Attribute30	Attribute31	Attribute32	Attribute33	Attribute34	Attribute35	Attribute36	Class
0	80	102	102	79	76	102	102	79	76	102	106	83	76	99	108	85	76	103	118	88	80	107	118	88	79	107	109	87	79	107	109	87	79	107	113	87	3
1	76	102	102	79	76	102	106	83	76	102	106	87	76	103	118	88	80	107	118	88	80	112	118	88	79	107	109	87	79	107	113	87	79	103	104	83	3
2	80	98	106	79	76	94	102	76	76	94	102	76	80	107	113	85	80	95	100	78	80	95	100	78	79	103	104	79	79	95	100	79	79	95	96	75	4
3	76	94	102	76	76	94	102	76	76	94	102	76	80	95	100	78	80	95	100	78	80	91	100	78	79	95	100	79	79	95	96	75	79	95	100	75	4
4	76	94	102	76	76	94	102	76	76	89	94	76	80	95	100	78	80	91	100	78	80	91	100	74	79	95	96	75	79	95	100	75	75	95	100	79	4

	Attribute1	Attribute2	Attribute3	Attribute4	Attribute5	Attribute6	Attribute7	Attribute8	Attribute9	Attribute10	...	Attribute28	Attribute29	Attribute30	Attribute31	Attribute32	Attribute33	Attribute34	Attribute35	Attribute36	Class
0	80	102	102	79	76	102	102	79	76	102	...	87	79	107	109	87	79	107	113	87	3
1	76	102	102	79	76	102	106	83	76	102	...	87	79	107	113	87	79	103	104	83	3
2	80	98	106	79	76	94	102	76	76	94	...	79	79	95	100	79	79	95	96	75	4
3	76	94	102	76	76	94	102	76	76	94	...	79	79	95	96	75	79	95	100	75	4
4	76	94	102	76	76	94	102	76	76	89	...	75	79	95	100	75	75	95	100	79	4

	Attribute1	Attribute2	Attribute3	Attribute4	Attribute5	Attribute6	Attribute7	Attribute8	Attribute9	Attribute10	Attribute11	Attribute12	Attribute13	Attribute14	Attribute15	Attribute16	Attribute17	Attribute18	Attribute19	Attribute20	Attribute21	Attribute22	Attribute23	Attribute24	Attribute25	Attribute26	Attribute27	Attribute28	Attribute29	Attribute30	Attribute31	Attribute32	Attribute33	Attribute34	Attribute35	Attribute36	Class
0	80	102	102	79	76	102	102	79	76	102	106	83	76	99	108	85	76	103	118	88	80	107	118	88	79	107	109	87	79	107	109	87	79	107	113	87	3
1	76	102	102	79	76	102	106	83	76	102	106	87	76	103	118	88	80	107	118	88	80	112	118	88	79	107	109	87	79	107	113	87	79	103	104	83	3
2	80	98	106	79	76	94	102	76	76	94	102	76	80	107	113	85	80	95	100	78	80	95	100	78	79	103	104	79	79	95	100	79	79	95	96	75	4
3	76	94	102	76	76	94	102	76	76	94	102	76	80	95	100	78	80	95	100	78	80	91	100	78	79	95	100	79	79	95	96	75	79	95	100	75	4
4	76	94	102	76	76	94	102	76	76	89	94	76	80	95	100	78	80	91	100	78	80	91	100	74	79	95	96	75	79	95	100	75	75	95	100	79	4

	Attribute1	Attribute2	Attribute3	Attribute4	Attribute5	Attribute6	Attribute7	Attribute8	Attribute9	Attribute10	...	Attribute28	Attribute29	Attribute30	Attribute31	Attribute32	Attribute33	Attribute34	Attribute35	Attribute36	Class
0	80	102	102	79	76	102	102	79	76	102	...	87	79	107	109	87	79	107	113	87	3
1	76	102	102	79	76	102	106	83	76	102	...	87	79	107	113	87	79	103	104	83	3
2	80	98	106	79	76	94	102	76	76	94	...	79	79	95	100	79	79	95	96	75	4
3	76	94	102	76	76	94	102	76	76	94	...	79	79	95	96	75	79	95	100	75	4
4	76	94	102	76	76	94	102	76	76	89	...	75	79	95	100	75	75	95	100	79	4