# Suppress warnings in Jupyter Notebooks
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# OpenML Dataset ID
whichDataset = 12 # provide dataset id

import openml
from openml.datasets import get_dataset

dataset = openml.datasets.get_dataset(whichDataset)

# Print a summary
print(
    f"This is dataset '{dataset.name}', the target feature is "
    f"'{dataset.default_target_attribute}'" 
     )
print(f"URL: {dataset.url}")
print(dataset.description)

This is dataset 'mfeat-factors', the target feature is 'class'
URL: https://www.openml.org/data/v1/download/12/mfeat-factors.arff
**Author**: Robert P.W. Duin, Department of Applied Physics, Delft University of Technology  
**Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/Multiple+Features) - 1998  
**Please cite**: [UCI](https://archive.ics.uci.edu/ml/citation_policy.html)   

**Multiple Features Dataset: Factors**  
One of a set of 6 datasets describing features of handwritten numerals (0 - 9) extracted from a collection of Dutch utility maps. Corresponding patterns in different datasets correspond to the same original character. 200 instances per class (for a total of 2,000 instances) have been digitized in binary images. 

### Attribute Information  
The attributes represent 216 profile correlations. No more information is known.

### Relevant Papers  
A slightly different version of the database is used in  
M. van Breukelen, R.P.W. Duin, D.M.J. Tax, and J.E. den Hartog, Handwritten digit recognition by combined classifiers, Kybernetika, vol. 34, no. 4, 1998, 381-386.
 
The database as is is used in:  
A.K. Jain, R.P.W. Duin, J. Mao, Statistical Pattern Recognition: A Review, IEEE Transactions on Pattern Analysis and Machine Intelligence archive, Volume 22 Issue 1, January 2000


import warnings
warnings.filterwarnings("ignore")


X, y, categorical_indicator, attribute_names = dataset.get_data(
    dataset_format="array", target=dataset.default_target_attribute)

dataset = pd.DataFrame(X, columns=attribute_names)
dataset["target"] = y


dataset.shape

(2000, 217)


dataset.head()


#dataset.columns.to_list()


# find missing values in data frame
dataset.isnull().sum().sum()

0


# replace missing values with 0
dataset = dataset.fillna(0)


dataset.isnull().sum().sum()

0


# group by 'target'
dataset.groupby('target').count()


# training and test data split
data = dataset.sample(frac=0.75, random_state=1234)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (1500, 217)
Unseen Data For Predictions: (500, 217)


env_setup = setup(data = data, target = 'target', session_id=1234)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# --------------------------------------
best_model = compare_models(exclude = ['catboost', 'lda'], sort = 'Accuracy')
# --------------------------------------


model_1 = create_model('lightgbm')


tuned_model_1 = tune_model(model_1)


print(tuned_model_1)

LGBMClassifier(bagging_fraction=0.9, bagging_freq=0, boosting_type='gbdt',
               class_weight=None, colsample_bytree=1.0, feature_fraction=0.8,
               importance_type='split', learning_rate=0.1, max_depth=-1,
               min_child_samples=36, min_child_weight=0.001, min_split_gain=0.1,
               n_estimators=30, n_jobs=-1, num_leaves=100, objective=None,
               random_state=1234, reg_alpha=0.005, reg_lambda=0.05, silent=True,
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0)


tuned_model_1 = tune_model(model_1, n_iter=100)


print(tuned_model_1)

LGBMClassifier(bagging_fraction=0.6, bagging_freq=2, boosting_type='gbdt',
               class_weight=None, colsample_bytree=1.0, feature_fraction=0.7,
               importance_type='split', learning_rate=0.05, max_depth=-1,
               min_child_samples=100, min_child_weight=0.001,
               min_split_gain=0.6, n_estimators=300, n_jobs=-1, num_leaves=50,
               objective=None, random_state=1234, reg_alpha=0.05,
               reg_lambda=0.2, silent=True, subsample=1.0,
               subsample_for_bin=200000, subsample_freq=0)


plot_model(tuned_model_1, plot = 'auc')


plot_model(tuned_model_1, plot = 'pr')


plot_model(tuned_model_1, plot='feature')


plot_model(tuned_model_1, plot = 'confusion_matrix')

findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif


plot_model(tuned_model_1, plot = 'learning')


#plot_model(tuned_model_1, plot = 'threshold')


plot_model(tuned_model_1, plot = 'boundary')


plot_model(tuned_model_1, plot = 'error')


model_2 = create_model('xgboost')


tuned_model_2 = tune_model(model_2, n_iter=100)


print(tuned_model_2)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.5, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.05, max_delta_step=0, max_depth=8,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=210, n_jobs=-1, num_parallel_tree=1,
              objective='multi:softprob', random_state=1234, reg_alpha=0.01,
              reg_lambda=0.5, scale_pos_weight=19.900000000000006,
              subsample=0.5, tree_method='auto', validate_parameters=1,
              verbosity=0)


plot_model(tuned_model_2, plot = 'auc')


#plot_model(tuned_model_2, plot = 'pr')


plot_model(tuned_model_2, plot = 'feature')


plot_model(tuned_model_2, plot = 'confusion_matrix')


#plot_model(tuned_model_2, plot = 'learning')


#plot_model(tuned_model_2, plot = 'threshold')


plot_model(tuned_model_2, plot = 'boundary')


plot_model(tuned_model_2, plot = 'error')


predict_model(tuned_model_1);


predict_model(tuned_model_2);


final_model = finalize_model(tuned_model_2);


# Final model parameters for deployment
print(final_model)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.5, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.05, max_delta_step=0, max_depth=8,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=210, n_jobs=-1, num_parallel_tree=1,
              objective='multi:softprob', random_state=1234, reg_alpha=0.01,
              reg_lambda=0.5, scale_pos_weight=19.900000000000006,
              subsample=0.5, tree_method='auto', validate_parameters=1,
              verbosity=0)


predict_model(final_model);


unseen_predictions = predict_model(final_model, data=data_unseen)
unseen_predictions.head()


from pycaret.utils import check_metric
check_metric(unseen_predictions['target'], unseen_predictions['Label'], metric = 'Accuracy')

0.95


check_metric(unseen_predictions['target'], unseen_predictions['Label'], metric = 'Recall')

0.9506


check_metric(unseen_predictions['target'], unseen_predictions['Label'], metric = 'Precision')

0.9522


#check_metric(unseen_predictions['target'], unseen_predictions['Label'], metric = 'AUC')


check_metric(unseen_predictions['target'], unseen_predictions['Label'], metric = 'F1')

0.9502


save_model(final_model,'Final_Model')

Transformation Pipeline and Model Successfully Saved

(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='target',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strat...
                                interaction_constraints='', learning_rate=0.05,
                                max_delta_step=0, max_depth=8,
                                min_child_weight=1, missing=nan,
                                monotone_constraints='()', n_estimators=210,
                                n_jobs=-1, num_parallel_tree=1,
                                objective='multi:softprob', random_state=1234,
                                reg_alpha=0.01, reg_lambda=0.5,
                                scale_pos_weight=19.900000000000006,
                                subsample=0.5, tree_method='auto',
                                validate_parameters=1, verbosity=0)]],
          verbose=False),
 'Final_Model.pkl')


load_saved_model = load_model('Final_Model')

Transformation Pipeline and Model Successfully Loaded


new_prediction = predict_model(load_saved_model, data=data_unseen)


#new_prediction[["Label", "Score"]].head()


import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# OpenML Dataset ID
whichDataset = 12 # provide dataset id

import openml
from openml.datasets import get_dataset

dataset = openml.datasets.get_dataset(whichDataset)


X, y, categorical_indicator, attribute_names = dataset.get_data(
    dataset_format="array", target=dataset.default_target_attribute)

dataset = pd.DataFrame(X, columns=attribute_names)
dataset["target"] = y

data = dataset.sample(frac=0.70, random_state=421)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (1400, 217)
Unseen Data For Predictions: (600, 217)


clf = setup(data = data, target = 'target', session_id=1234)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# compare all baseline models and select top 5
top_models = compare_models(n_select = 3, exclude = ['catboost'], sort = 'Accuracy')


top_models

[RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True, fit_intercept=True,
                 max_iter=None, normalize=False, random_state=1234,
                 solver='auto', tol=0.001),
 LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                            solver='svd', store_covariance=False, tol=0.0001),
 LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                    intercept_scaling=1, l1_ratio=None, max_iter=1000,
                    multi_class='auto', n_jobs=None, penalty='l2',
                    random_state=1234, solver='lbfgs', tol=0.0001, verbose=0,
                    warm_start=False)]


# tune top base models
tuned_top_models = [tune_model(i) for i in top_models]


tuned_top_models

[RidgeClassifier(alpha=6.85, class_weight=None, copy_X=True, fit_intercept=True,
                 max_iter=None, normalize=False, random_state=1234,
                 solver='auto', tol=0.001),
 LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=0.005,
                            solver='lsqr', store_covariance=False, tol=0.0001),
 LogisticRegression(C=0.66, class_weight='balanced', dual=False,
                    fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                    max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',
                    random_state=1234, solver='lbfgs', tol=0.0001, verbose=0,
                    warm_start=False)]


# ensemble top tuned models
#bagged_top_models = [ensemble_model(i) for i in tuned_top_models]


#bagged_top_models


# select best model based on AUC
best1 = automl(optimize = 'AUC')
best2 = automl(optimize = 'Accuracy')
best3 = automl(optimize = 'Recall')
best4 = automl(optimize = 'Precision')
best5 = automl(optimize = 'F1')


print(); print("Best model based on AUC: ");       print(best1)
print(); print("Best model based on Accuracy: ");  print(best2)
print(); print("Best model based on Recall: ");    print(best3)
print(); print("Best model based on Precision: "); print(best4)
print(); print("Best model based on F1: ");        print(best5)

Best model based on AUC: 
LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                           solver='svd', store_covariance=False, tol=0.0001)

Best model based on Accuracy: 
RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True, fit_intercept=True,
                max_iter=None, normalize=False, random_state=1234,
                solver='auto', tol=0.001)

Best model based on Recall: 
LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                           solver='svd', store_covariance=False, tol=0.0001)

Best model based on Precision: 
LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                           solver='svd', store_covariance=False, tol=0.0001)

Best model based on F1: 
LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                           solver='svd', store_covariance=False, tol=0.0001)


#plot_model(best2, plot = 'auc')


plot_model(best2, plot = 'confusion_matrix')


plot_model(best2, plot = 'learning')

findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'sans-serif' not found because none of the following families were found: Arial, Liberation Sans, Bitstream Vera Sans, sans-serif


save_model(best2,'Final_Model')

Transformation Pipeline and Model Successfully Saved

(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='target',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strat...
                 ('fix_perfect', Remove_100(target='target')),
                 ('clean_names', Clean_Colum_Names()),
                 ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
                 ('dfs', 'passthrough'), ('pca', 'passthrough'),
                 ['trained_model',
                  RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True,
                                  fit_intercept=True, max_iter=None,
                                  normalize=False, random_state=1234,
                                  solver='auto', tol=0.001)]],
          verbose=False),
 'Final_Model.pkl')


load_saved_model = load_model('Final_Model')

new_prediction = predict_model(load_saved_model, data=data_unseen)
new_prediction.head()

	att1	att2	att3	att4	att5	att6	att7	att8	att9	att10	att11	att12	att13	att14	att15	att16	att17	att18	att19	att20	att21	att22	att23	att24	att25	att26	att27	att28	att29	att30	att31	att32	att33	att34	att35	att36	att37	att38	att39	att40	att41	att42	att43	att44	att45	att46	att47	att48	att49	att50	att51	att52	att53	att54	att55	att56	att57	att58	att59	att60	att61	att62	att63	att64	att65	att66	att67	att68	att69	att70	att71	att72	att73	att74	att75	att76	att77	att78	att79	att80	att81	att82	att83	att84	att85	att86	att87	att88	att89	att90	att91	att92	att93	att94	att95	att96	att97	att98	att99	att100	att101	att102	att103	att104	att105	att106	att107	att108	att109	att110	att111	att112	att113	att114	att115	att116	att117	att118	att119	att120	att121	att122	att123	att124	att125	att126	att127	att128	att129	att130	att131	att132	att133	att134	att135	att136	att137	att138	att139	att140	att141	att142	att143	att144	att145	att146	att147	att148	att149	att150	att151	att152	att153	att154	att155	att156	att157	att158	att159	att160	att161	att162	att163	att164	att165	att166	att167	att168	att169	att170	att171	att172	att173	att174	att175	att176	att177	att178	att179	att180	att181	att182	att183	att184	att185	att186	att187	att188	att189	att190	att191	att192	att193	att194	att195	att196	att197	att198	att199	att200	att201	att202	att203	att204	att205	att206	att207	att208	att209	att210	att211	att212	att213	att214	att215	att216
0	98.0	236.0	531.0	673.0	607.0	647.0	2.0	9.0	3.0	6.0	8.0	5.0	225.0	517.0	652.0	624.0	628.0	994.0	7.0	22.0	28.0	13.0	10.0	19.0	305.0	481.0	667.0	663.0	1009.0	727.0	38.0	28.0	18.0	11.0	20.0	10.0	287.0	567.0	651.0	742.0	824.0	900.0	26.0	34.0	30.0	8.0	16.0	13.0	248.0	556.0	631.0	796.0	926.0	748.0	39.0	34.0	18.0	9.0	17.0	12.0	248.0	540.0	506.0	814.0	1051.0	728.0	38.0	28.0	5.0	13.0	16.0	8.0	246.0	518.0	751.0	579.0	699.0	1062.0	13.0	30.0	28.0	10.0	16.0	16.0	276.0	344.0	682.0	500.0	709.0	916.0	10.0	30.0	23.0	17.0	15.0	14.0	357.0	435.0	829.0	610.0	745.0	994.0	20.0	7.0	24.0	12.0	10.0	9.0	355.0	409.0	477.0	886.0	976.0	723.0	30.0	24.0	14.0	7.0	7.0	8.0	290.0	352.0	435.0	753.0	894.0	751.0	29.0	29.0	2.0	13.0	13.0	14.0	260.0	286.0	562.0	698.0	665.0	757.0	11.0	8.0	15.0	14.0	9.0	9.0	238.0	292.0	586.0	698.0	733.0	707.0	9.0	6.0	15.0	11.0	10.0	16.0	294.0	406.0	654.0	644.0	741.0	1000.0	18.0	23.0	17.0	9.0	20.0	11.0	302.0	418.0	561.0	709.0	961.0	776.0	21.0	25.0	12.0	12.0	19.0	10.0	360.0	328.0	607.0	984.0	1186.0	599.0	29.0	7.0	14.0	6.0	9.0	9.0	362.0	314.0	924.0	733.0	601.0	1216.0	4.0	8.0	20.0	10.0	9.0	5.0	251.0	421.0	474.0	536.0	628.0	632.0	18.0	36.0	8.0	15.0	12.0	13.0
1	121.0	193.0	607.0	611.0	585.0	665.0	7.0	9.0	2.0	4.0	3.0	7.0	214.0	514.0	690.0	548.0	630.0	1006.0	2.0	18.0	31.0	15.0	5.0	15.0	260.0	344.0	655.0	605.0	993.0	743.0	29.0	32.0	25.0	11.0	17.0	6.0	308.0	412.0	719.0	700.0	784.0	902.0	31.0	32.0	37.0	8.0	13.0	15.0	259.0	429.0	699.0	758.0	900.0	762.0	34.0	30.0	25.0	9.0	14.0	10.0	207.0	445.0	558.0	784.0	1037.0	750.0	29.0	30.0	12.0	11.0	17.0	6.0	199.0	427.0	791.0	553.0	707.0	1074.0	10.0	26.0	31.0	12.0	17.0	10.0	253.0	345.0	726.0	440.0	731.0	932.0	7.0	26.0	30.0	19.0	10.0	12.0	326.0	452.0	795.0	540.0	701.0	1012.0	17.0	7.0	31.0	12.0	11.0	11.0	320.0	264.0	563.0	796.0	972.0	741.0	29.0	20.0	17.0	7.0	8.0	6.0	243.0	255.0	467.0	711.0	896.0	769.0	26.0	25.0	9.0	9.0	12.0	10.0	265.0	171.0	648.0	626.0	687.0	777.0	16.0	10.0	12.0	10.0	4.0	5.0	279.0	269.0	634.0	628.0	737.0	727.0	14.0	10.0	12.0	13.0	5.0	14.0	253.0	317.0	680.0	614.0	755.0	1016.0	15.0	19.0	24.0	11.0	17.0	11.0	255.0	321.0	617.0	651.0	979.0	792.0	20.0	29.0	19.0	8.0	18.0	8.0	351.0	249.0	687.0	922.0	1194.0	617.0	26.0	11.0	11.0	4.0	10.0	7.0	347.0	439.0	950.0	675.0	577.0	1222.0	13.0	6.0	27.0	10.0	10.0	3.0	224.0	354.0	520.0	458.0	570.0	634.0	15.0	32.0	11.0	13.0	15.0	11.0
2	115.0	141.0	590.0	605.0	557.0	627.0	12.0	6.0	3.0	3.0	5.0	4.0	196.0	404.0	611.0	560.0	594.0	986.0	7.0	21.0	28.0	14.0	7.0	12.0	278.0	406.0	670.0	579.0	991.0	703.0	24.0	29.0	18.0	12.0	19.0	9.0	378.0	468.0	668.0	654.0	786.0	900.0	32.0	31.0	28.0	11.0	17.0	12.0	299.0	495.0	650.0	698.0	902.0	726.0	29.0	31.0	18.0	12.0	16.0	5.0	249.0	389.0	465.0	722.0	1033.0	704.0	24.0	27.0	5.0	12.0	17.0	11.0	229.0	377.0	700.0	483.0	675.0	1056.0	5.0	29.0	28.0	11.0	17.0	15.0	247.0	309.0	645.0	444.0	697.0	898.0	6.0	29.0	21.0	18.0	12.0	7.0	312.0	328.0	774.0	550.0	689.0	980.0	14.0	6.0	20.0	11.0	9.0	6.0	360.0	294.0	568.0	790.0	956.0	733.0	24.0	23.0	18.0	6.0	8.0	11.0	235.0	259.0	436.0	665.0	876.0	719.0	21.0	28.0	0.0	10.0	14.0	13.0	253.0	111.0	653.0	638.0	653.0	737.0	17.0	9.0	15.0	11.0	6.0	10.0	297.0	185.0	637.0	638.0	715.0	677.0	9.0	9.0	15.0	12.0	7.0	9.0	271.0	335.0	591.0	542.0	719.0	970.0	10.0	22.0	13.0	12.0	19.0	6.0	293.0	335.0	536.0	613.0	943.0	746.0	15.0	26.0	10.0	9.0	20.0	13.0	415.0	323.0	672.0	896.0	1168.0	595.0	21.0	8.0	14.0	3.0	10.0	12.0	343.0	365.0	877.0	663.0	547.0	1192.0	18.0	7.0	20.0	13.0	10.0	2.0	196.0	348.0	535.0	498.0	572.0	656.0	20.0	35.0	16.0	14.0	13.0	6.0
3	90.0	122.0	627.0	692.0	607.0	642.0	0.0	6.0	4.0	5.0	3.0	5.0	201.0	445.0	664.0	629.0	626.0	945.0	5.0	21.0	27.0	16.0	7.0	9.0	273.0	397.0	703.0	662.0	987.0	710.0	36.0	29.0	21.0	8.0	15.0	10.0	331.0	459.0	731.0	765.0	818.0	865.0	24.0	29.0	33.0	9.0	15.0	11.0	256.0	510.0	707.0	801.0	900.0	735.0	37.0	29.0	21.0	10.0	16.0	2.0	246.0	438.0	534.0	817.0	1031.0	721.0	36.0	27.0	8.0	10.0	15.0	8.0	248.0	390.0	767.0	586.0	693.0	1013.0	11.0	29.0	27.0	13.0	15.0	12.0	278.0	312.0	702.0	509.0	707.0	873.0	8.0	29.0	26.0	18.0	8.0	8.0	361.0	347.0	831.0	631.0	739.0	955.0	18.0	8.0	27.0	13.0	13.0	9.0	369.0	293.0	589.0	879.0	962.0	718.0	28.0	23.0	13.0	8.0	10.0	8.0	268.0	364.0	449.0	754.0	884.0	728.0	27.0	28.0	5.0	8.0	10.0	14.0	232.0	172.0	674.0	713.0	663.0	722.0	9.0	9.0	14.0	9.0	2.0	9.0	244.0	202.0	682.0	719.0	725.0	684.0	7.0	9.0	16.0	14.0	5.0	6.0	260.0	412.0	640.0	651.0	735.0	973.0	16.0	22.0	20.0	12.0	15.0	3.0	276.0	416.0	601.0	712.0	959.0	757.0	19.0	26.0	15.0	7.0	16.0	10.0	380.0	354.0	711.0	987.0	1180.0	606.0	27.0	8.0	15.0	5.0	12.0	9.0	400.0	358.0	918.0	756.0	603.0	1171.0	6.0	7.0	23.0	11.0	12.0	5.0	263.0	419.0	576.0	549.0	628.0	621.0	16.0	35.0	7.0	12.0	15.0	9.0
4	157.0	167.0	681.0	666.0	587.0	666.0	8.0	6.0	1.0	4.0	5.0	5.0	212.0	386.0	740.0	613.0	614.0	997.0	5.0	21.0	30.0	11.0	7.0	17.0	248.0	390.0	703.0	662.0	949.0	744.0	28.0	29.0	20.0	13.0	19.0	10.0	244.0	468.0	797.0	743.0	750.0	909.0	32.0	29.0	32.0	10.0	15.0	11.0	231.0	479.0	773.0	799.0	850.0	769.0	37.0	29.0	20.0	11.0	14.0	10.0	189.0	361.0	620.0	819.0	995.0	743.0	32.0	27.0	7.0	15.0	17.0	8.0	183.0	365.0	851.0	588.0	701.0	1063.0	13.0	29.0	30.0	8.0	17.0	14.0	257.0	237.0	782.0	501.0	715.0	925.0	10.0	31.0	25.0	15.0	12.0	12.0	314.0	334.0	847.0	599.0	689.0	1003.0	16.0	8.0	26.0	8.0	9.0	13.0	310.0	300.0	633.0	871.0	936.0	752.0	32.0	23.0	16.0	7.0	8.0	8.0	257.0	233.0	511.0	756.0	860.0	758.0	29.0	30.0	4.0	11.0	14.0	14.0	265.0	137.0	718.0	687.0	671.0	768.0	17.0	9.0	13.0	10.0	6.0	9.0	253.0	169.0	704.0	687.0	707.0	714.0	15.0	9.0	13.0	9.0	7.0	14.0	251.0	283.0	714.0	651.0	733.0	1009.0	18.0	22.0	19.0	7.0	19.0	9.0	239.0	281.0	685.0	716.0	955.0	785.0	23.0	26.0	14.0	8.0	20.0	6.0	337.0	303.0	763.0	989.0	1156.0	628.0	29.0	8.0	12.0	2.0	10.0	7.0	321.0	337.0	988.0	726.0	577.0	1225.0	14.0	7.0	22.0	8.0	10.0	5.0	276.0	342.0	594.0	525.0	568.0	653.0	16.0	35.0	10.0	15.0	13.0	13.0

	Description	Value
0	session_id	1234
1	Target	target
2	Target Type	Multiclass
3	Label Encoded	None
4	Original Data	(1500, 217)
5	Missing Values	False
6	Numeric Features	216
7	Categorical Features	0
8	Ordinal Features	False
9	High Cardinality Features	False
10	High Cardinality Method	None
11	Transformed Train Set	(1049, 206)
12	Transformed Test Set	(451, 206)
13	Shuffle Train-Test	True
14	Stratify Train-Test	False
15	Fold Generator	StratifiedKFold
16	Fold Number	10
17	CPU Jobs	-1
18	Use GPU	False
19	Log Experiment	False
20	Experiment Name	clf-default-name
21	USI	2c95
22	Imputation Type	simple
23	Iterative Imputation Iteration	None
24	Numeric Imputer	mean
25	Iterative Imputation Numeric Model	None
26	Categorical Imputer	constant
27	Iterative Imputation Categorical Model	None
28	Unknown Categoricals Handling	least_frequent
29	Normalize	False
30	Normalize Method	None
31	Transformation	False
32	Transformation Method	None
33	PCA	False
34	PCA Method	None
35	PCA Components	None
36	Ignore Low Variance	False
37	Combine Rare Levels	False
38	Rare Level Threshold	None
39	Numeric Binning	False
40	Remove Outliers	False
41	Outliers Threshold	None
42	Remove Multicollinearity	False
43	Multicollinearity Threshold	None
44	Remove Perfect Collinearity	True
45	Clustering	False
46	Clustering Iteration	None
47	Polynomial Features	False
48	Polynomial Degree	None
49	Trignometry Features	False
50	Polynomial Threshold	None
51	Group Features	False
52	Feature Selection	False
53	Feature Selection Method	classic
54	Features Selection Threshold	None
55	Feature Interaction	False
56	Feature Ratio	False
57	Interaction Threshold	None
58	Fix Imbalance	False
59	Fix Imbalance Method	SMOTE

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC	TT (Sec)
ridge	Ridge Classifier	0.9752	0.0000	0.9748	0.9768	0.9751	0.9724	0.9727	0.0090
lightgbm	Light Gradient Boosting Machine	0.9733	0.9986	0.9728	0.9761	0.9732	0.9703	0.9707	0.9020
lr	Logistic Regression	0.9695	0.9970	0.9691	0.9732	0.9697	0.9661	0.9665	0.3640
rf	Random Forest Classifier	0.9667	0.9978	0.9658	0.9697	0.9664	0.9629	0.9633	0.1470
et	Extra Trees Classifier	0.9638	0.9984	0.9629	0.9670	0.9636	0.9597	0.9601	0.1160
xgboost	Extreme Gradient Boosting	0.9600	0.9979	0.9593	0.9626	0.9597	0.9555	0.9559	0.5380
gbc	Gradient Boosting Classifier	0.9581	0.9978	0.9578	0.9615	0.9581	0.9534	0.9538	3.6300
knn	K Neighbors Classifier	0.9428	0.9911	0.9414	0.9473	0.9426	0.9364	0.9370	0.1420
nb	Naive Bayes	0.9314	0.9941	0.9303	0.9378	0.9311	0.9237	0.9245	0.0080
svm	SVM - Linear Kernel	0.8665	0.0000	0.8658	0.9020	0.8639	0.8517	0.8567	0.0230
dt	Decision Tree Classifier	0.8437	0.9131	0.8428	0.8568	0.8439	0.8262	0.8275	0.0180
qda	Quadratic Discriminant Analysis	0.3518	0.6400	0.3505	0.3050	0.3166	0.2798	0.2854	0.0210
ada	Ada Boost Classifier	0.2506	0.6248	0.2425	0.2335	0.1899	0.1640	0.2283	0.0910
dummy	Dummy Classifier	0.1077	0.5000	0.1000	0.0116	0.0210	0.0000	0.0000	0.0040

		Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Split	Fold
CV-Val	0	0.9905	0.9972	0.9909	0.9913	0.9905	0.9894	0.9895
	1	0.9714	0.9992	0.9700	0.9757	0.9713	0.9682	0.9687
	2	0.9810	0.9999	0.9800	0.9839	0.9812	0.9788	0.9791
	3	0.9810	0.9994	0.9800	0.9839	0.9807	0.9788	0.9792
	4	0.9524	0.9982	0.9526	0.9570	0.9531	0.9470	0.9474
	5	0.9905	1.0000	0.9909	0.9912	0.9904	0.9894	0.9895
	6	0.9619	0.9994	0.9617	0.9651	0.9623	0.9577	0.9580
	7	0.9810	0.9969	0.9800	0.9826	0.9803	0.9788	0.9791
	8	0.9429	0.9961	0.9409	0.9484	0.9413	0.9365	0.9373
	9	0.9808	0.9997	0.9809	0.9824	0.9807	0.9786	0.9788
	Mean	0.9733	0.9986	0.9728	0.9761	0.9732	0.9703	0.9707
	Std	0.0152	0.0013	0.0156	0.0138	0.0154	0.0169	0.0168
Train	nan	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000

		Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Split	Fold
CV-Val	0	0.9810	0.9982	0.9809	0.9825	0.9809	0.9788	0.9790
	1	0.9810	0.9989	0.9800	0.9844	0.9808	0.9788	0.9792
	2	0.9810	1.0000	0.9800	0.9839	0.9812	0.9788	0.9791
	3	0.9619	0.9994	0.9600	0.9678	0.9607	0.9576	0.9585
	4	0.9429	0.9976	0.9442	0.9477	0.9436	0.9365	0.9368
	5	0.9714	0.9999	0.9718	0.9738	0.9709	0.9682	0.9686
	6	0.9429	0.9987	0.9417	0.9453	0.9433	0.9365	0.9367
	7	0.9810	0.9970	0.9800	0.9826	0.9803	0.9788	0.9791
	8	0.9143	0.9958	0.9136	0.9144	0.9126	0.9047	0.9051
	9	0.9615	0.9995	0.9609	0.9639	0.9619	0.9572	0.9574
	Mean	0.9619	0.9985	0.9613	0.9646	0.9616	0.9576	0.9580
	Std	0.0213	0.0013	0.0212	0.0217	0.0215	0.0237	0.0237
Train	nan	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000

Application of PyCaret Machine Learning: Notebook in Python for Beginners to Professionals : [ Classification ]¶

PyCaret Machine Learning Project – A Guide to build a multiclass classification model in PyCaret using OpenML dataset¶

PyCaret for Beginners - A Guide to a machine learning model in Python and PyCaret¶

Load Dataset for Modelling¶

Setup environment for modelling purposes¶

Compare the results of different available algorithms¶

Build few individual models¶

Applying model 1¶

Plot the outcomes of the trained model¶

Applying "model 2"¶

Plot the outcomes of the trained model¶

Predict on test / hold-out Sample¶

Finalise Model¶

Predict on unseen data¶

Save model¶

Load model¶

PyCaret AutoML in Practice¶

Load Dataset¶

Select best model based om metric¶

Plots¶

Save model¶

Load model¶

Summary¶

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC	TT (Sec)
ridge	Ridge Classifier	0.9755	0.0000	0.9751	0.9778	0.9754	0.9728	0.9730	0.0070
lda	Linear Discriminant Analysis	0.9755	0.9992	0.9754	0.9785	0.9755	0.9728	0.9731	0.0130
lr	Logistic Regression	0.9663	0.9987	0.9659	0.9687	0.9659	0.9625	0.9629	0.3490
rf	Random Forest Classifier	0.9632	0.9981	0.9632	0.9662	0.9631	0.9591	0.9595	0.1470
et	Extra Trees Classifier	0.9632	0.9977	0.9629	0.9659	0.9631	0.9591	0.9594	0.1160
lightgbm	Light Gradient Boosting Machine	0.9530	0.9980	0.9530	0.9570	0.9531	0.9478	0.9482	0.8620
xgboost	Extreme Gradient Boosting	0.9510	0.9977	0.9511	0.9552	0.9508	0.9455	0.9460	0.5130
gbc	Gradient Boosting Classifier	0.9479	0.9951	0.9480	0.9532	0.9485	0.9421	0.9426	3.3940
knn	K Neighbors Classifier	0.9377	0.9923	0.9370	0.9432	0.9379	0.9307	0.9313	0.1430
nb	Naive Bayes	0.9265	0.9928	0.9255	0.9323	0.9261	0.9182	0.9190	0.0070
dt	Decision Tree Classifier	0.8539	0.9189	0.8527	0.8671	0.8543	0.8376	0.8391	0.0170
svm	SVM - Linear Kernel	0.8408	0.0000	0.8351	0.8696	0.8252	0.8228	0.8300	0.0220
qda	Quadratic Discriminant Analysis	0.3402	0.6332	0.3380	0.3049	0.3084	0.2663	0.2711	0.0110
ada	Ada Boost Classifier	0.2983	0.6760	0.2856	0.2629	0.2259	0.2141	0.2704	0.0850
dummy	Dummy Classifier	0.1103	0.5000	0.1000	0.0122	0.0219	0.0000	0.0000	0.0050

		Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Split	Fold
CV-Val	0	0.9898	1.0000	0.9900	0.9908	0.9898	0.9887	0.9888
	1	0.9796	0.9997	0.9800	0.9830	0.9794	0.9773	0.9778
	2	0.9796	0.9963	0.9789	0.9814	0.9795	0.9773	0.9775
	3	0.9796	0.9989	0.9818	0.9830	0.9795	0.9773	0.9778
	4	0.9286	0.9984	0.9287	0.9295	0.9280	0.9206	0.9208
	5	0.9490	0.9994	0.9476	0.9533	0.9488	0.9433	0.9438
	6	0.9694	0.9993	0.9689	0.9713	0.9688	0.9660	0.9663
	7	0.9796	0.9976	0.9789	0.9814	0.9795	0.9773	0.9775
	8	0.9694	0.9993	0.9700	0.9723	0.9693	0.9660	0.9663
	9	0.9691	0.9994	0.9689	0.9710	0.9690	0.9656	0.9658
	Mean	0.9694	0.9988	0.9694	0.9717	0.9692	0.9659	0.9662
	Std	0.0171	0.0011	0.0173	0.0171	0.0172	0.0190	0.0190
Train	nan	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000