# Suppress warnings in Jupyter Notebooks
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# provide the dataset name as shown in pycaret
whichDataset = 'wine'


from pycaret.datasets import get_data
dataset = get_data(whichDataset)


dataset.shape

(6497, 13)


dataset.columns.to_list()

['fixed acidity',
 'volatile acidity',
 'citric acid',
 'residual sugar',
 'chlorides',
 'free sulfur dioxide',
 'total sulfur dioxide',
 'density',
 'pH',
 'sulphates',
 'alcohol',
 'quality',
 'type']


data = dataset.sample(frac=0.75, random_state=1234)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (4873, 13)
Unseen Data For Predictions: (1624, 13)


env_setup = setup(data = data, target = 'type', session_id=1234)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# --------------------------------------
best_model = compare_models()
# --------------------------------------


lda = create_model('lda')


tuned_lda = tune_model(lda)


print(tuned_lda)

LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=0.0001,
                           solver='eigen', store_covariance=False, tol=0.0001)


plot_model(tuned_lda, plot = 'auc')


plot_model(tuned_lda, plot = 'pr')


plot_model(tuned_lda, plot='feature')


plot_model(tuned_lda, plot = 'confusion_matrix')


plot_model(tuned_lda, plot = 'learning')


plot_model(tuned_lda, plot = 'threshold')


lr = create_model('lr')


tuned_lr = tune_model(lr)


print(tuned_lr)

LogisticRegression(C=4.725000000000001, class_weight={}, dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=1234, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


plot_model(tuned_lr, plot = 'auc')


plot_model(tuned_lr, plot = 'pr')


plot_model(tuned_lr, plot = 'feature')


plot_model(tuned_lr, plot = 'confusion_matrix')


plot_model(tuned_lr, plot = 'learning')


plot_model(tuned_lr, plot = 'threshold')


predict_model(tuned_lda);


predict_model(tuned_lr);


final_lr = finalize_model(tuned_lr);


# Final model parameters for deployment
print(final_lr)

LogisticRegression(C=4.725000000000001, class_weight={}, dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=1000, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=1234, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


predict_model(final_lr);


unseen_predictions = predict_model(final_lr, data=data_unseen)
unseen_predictions.head()


from pycaret.utils import check_metric
check_metric(unseen_predictions['type'], unseen_predictions['Label'], metric = 'Accuracy')

0.9914


check_metric(unseen_predictions['type'], unseen_predictions['Label'], metric = 'Recall')

0.9967


check_metric(unseen_predictions['type'], unseen_predictions['Label'], metric = 'Precision')

0.9919


check_metric(unseen_predictions['type'], unseen_predictions['Label'], metric = 'AUC')

0.9858


check_metric(unseen_predictions['type'], unseen_predictions['Label'], metric = 'F1')

0.9943


save_model(final_lr,'Final_Model')

Transformation Pipeline and Model Successfully Saved

(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='type',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strateg...
                 ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
                 ('dfs', 'passthrough'), ('pca', 'passthrough'),
                 ['trained_model',
                  LogisticRegression(C=4.725000000000001, class_weight={},
                                     dual=False, fit_intercept=True,
                                     intercept_scaling=1, l1_ratio=None,
                                     max_iter=1000, multi_class='auto',
                                     n_jobs=None, penalty='l2',
                                     random_state=1234, solver='lbfgs',
                                     tol=0.0001, verbose=0, warm_start=False)]],
          verbose=False),
 'Final_Model.pkl')


load_saved_model = load_model('Final_Model')

Transformation Pipeline and Model Successfully Loaded


new_prediction = predict_model(load_saved_model, data=data_unseen)


new_prediction[["Label", "Score"]].head(10)


import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# provide the dataset name as shown in pycaret
whichDataset = 'wine'


from pycaret.datasets import get_data
dataset = get_data(whichDataset)

data = dataset.sample(frac=0.75, random_state=421)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (4873, 13)
Unseen Data For Predictions: (1624, 13)


clf = setup(data = data, target = 'type', session_id=1234)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# compare all baseline models and select top 5
top_models = compare_models(n_select = 5)


top_models

[ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                      criterion='gini', max_depth=None, max_features='auto',
                      max_leaf_nodes=None, max_samples=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                      oob_score=False, random_state=1234, verbose=0,
                      warm_start=False),
 <catboost.core.CatBoostClassifier at 0x7fe300582d90>,
 LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
                importance_type='split', learning_rate=0.1, max_depth=-1,
                min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
                n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
                random_state=1234, reg_alpha=0.0, reg_lambda=0.0, silent=True,
                subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
 RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                        criterion='gini', max_depth=None, max_features='auto',
                        max_leaf_nodes=None, max_samples=None,
                        min_impurity_decrease=0.0, min_impurity_split=None,
                        min_samples_leaf=1, min_samples_split=2,
                        min_weight_fraction_leaf=0.0, n_estimators=100,
                        n_jobs=-1, oob_score=False, random_state=1234, verbose=0,
                        warm_start=False),
 AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                    n_estimators=50, random_state=1234)]


# tune top base models
tuned_top_models = [tune_model(i) for i in top_models]


tuned_top_models

[ExtraTreesClassifier(bootstrap=True, ccp_alpha=0.0,
                      class_weight='balanced_subsample', criterion='gini',
                      max_depth=10, max_features='sqrt', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0,
                      min_impurity_split=None, min_samples_leaf=5,
                      min_samples_split=7, min_weight_fraction_leaf=0.0,
                      n_estimators=160, n_jobs=-1, oob_score=False,
                      random_state=1234, verbose=0, warm_start=False),
 <catboost.core.CatBoostClassifier at 0x7fe327934a50>,
 LGBMClassifier(bagging_fraction=0.9, bagging_freq=0, boosting_type='gbdt',
                class_weight=None, colsample_bytree=1.0, feature_fraction=1.0,
                importance_type='split', learning_rate=0.3, max_depth=-1,
                min_child_samples=61, min_child_weight=0.001, min_split_gain=0.3,
                n_estimators=190, n_jobs=-1, num_leaves=20, objective=None,
                random_state=1234, reg_alpha=0.15, reg_lambda=0.0001,
                silent=True, subsample=1.0, subsample_for_bin=200000,
                subsample_freq=0),
 RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                        class_weight='balanced_subsample', criterion='gini',
                        max_depth=10, max_features='sqrt', max_leaf_nodes=None,
                        max_samples=None, min_impurity_decrease=0,
                        min_impurity_split=None, min_samples_leaf=5,
                        min_samples_split=7, min_weight_fraction_leaf=0.0,
                        n_estimators=160, n_jobs=-1, oob_score=False,
                        random_state=1234, verbose=0, warm_start=False),
 AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=0.1,
                    n_estimators=300, random_state=1234)]


# ensemble top tuned models
bagged_top_models = [ensemble_model(i) for i in tuned_top_models]


bagged_top_models

[BaggingClassifier(base_estimator=ExtraTreesClassifier(bootstrap=True,
                                                       ccp_alpha=0.0,
                                                       class_weight='balanced_subsample',
                                                       criterion='gini',
                                                       max_depth=10,
                                                       max_features='sqrt',
                                                       max_leaf_nodes=None,
                                                       max_samples=None,
                                                       min_impurity_decrease=0,
                                                       min_impurity_split=None,
                                                       min_samples_leaf=5,
                                                       min_samples_split=7,
                                                       min_weight_fraction_leaf=0.0,
                                                       n_estimators=160,
                                                       n_jobs=-1,
                                                       oob_score=False,
                                                       random_state=1234,
                                                       verbose=0,
                                                       warm_start=False),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=<catboost.core.CatBoostClassifier object at 0x7fe300310350>,
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=LGBMClassifier(bagging_fraction=0.9,
                                                 bagging_freq=0,
                                                 boosting_type='gbdt',
                                                 class_weight=None,
                                                 colsample_bytree=1.0,
                                                 feature_fraction=1.0,
                                                 importance_type='split',
                                                 learning_rate=0.3, max_depth=-1,
                                                 min_child_samples=61,
                                                 min_child_weight=0.001,
                                                 min_split_gain=0.3,
                                                 n_estimators=190, n_jobs=-1,
                                                 num_leaves=20, objective=None,
                                                 random_state=1234,
                                                 reg_alpha=0.15,
                                                 reg_lambda=0.0001, silent=True,
                                                 subsample=1.0,
                                                 subsample_for_bin=200000,
                                                 subsample_freq=0),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=RandomForestClassifier(bootstrap=True,
                                                         ccp_alpha=0.0,
                                                         class_weight='balanced_subsample',
                                                         criterion='gini',
                                                         max_depth=10,
                                                         max_features='sqrt',
                                                         max_leaf_nodes=None,
                                                         max_samples=None,
                                                         min_impurity_decrease=0,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=5,
                                                         min_samples_split=7,
                                                         min_weight_fraction_leaf=0.0,
                                                         n_estimators=160,
                                                         n_jobs=-1,
                                                         oob_score=False,
                                                         random_state=1234,
                                                         verbose=0,
                                                         warm_start=False),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=AdaBoostClassifier(algorithm='SAMME.R',
                                                     base_estimator=None,
                                                     learning_rate=0.1,
                                                     n_estimators=300,
                                                     random_state=1234),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=1234, verbose=0,
                   warm_start=False)]


# select best model based on AUC
best1 = automl(optimize = 'AUC')
best2 = automl(optimize = 'Accuracy')
best3 = automl(optimize = 'Recall')
best4 = automl(optimize = 'Precision')
best5 = automl(optimize = 'F1')


print(); print("Best model based on AUC: ");       print(best1)
print(); print("Best model based on Accuracy: ");  print(best2)
print(); print("Best model based on Recall: ");    print(best3)
print(); print("Best model based on Precision: "); print(best4)
print(); print("Best model based on F1: ");        print(best5)

Best model based on AUC: 
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=-1, num_parallel_tree=1,
              objective='binary:logistic', random_state=1234, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='auto',
              validate_parameters=1, verbosity=0)

Best model based on Accuracy: 
ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=1234, verbose=0,
                     warm_start=False)

Best model based on Recall: 
DummyClassifier(constant=None, random_state=1234, strategy='prior')

Best model based on Precision: 
AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=50, random_state=1234)

Best model based on F1: 
ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=1234, verbose=0,
                     warm_start=False)


plot_model(best2, plot = 'auc')


plot_model(best2, plot = 'confusion_matrix')


plot_model(best2, plot = 'learning')


save_model(best2,'Final_Model')

Transformation Pipeline and Model Successfully Saved

(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='type',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strateg...
                  ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0,
                                       class_weight=None, criterion='gini',
                                       max_depth=None, max_features='auto',
                                       max_leaf_nodes=None, max_samples=None,
                                       min_impurity_decrease=0.0,
                                       min_impurity_split=None,
                                       min_samples_leaf=1, min_samples_split=2,
                                       min_weight_fraction_leaf=0.0,
                                       n_estimators=100, n_jobs=-1,
                                       oob_score=False, random_state=1234,
                                       verbose=0, warm_start=False)]],
          verbose=False),
 'Final_Model.pkl')


load_saved_model = load_model('Final_Model')

new_prediction = predict_model(load_saved_model, data=data_unseen)
new_prediction[["Label", "Score"]].head()

Transformation Pipeline and Model Successfully Loaded

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol	quality	type
0	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5	red
1	7.8	0.88	0.00	2.6	0.098	25.0	67.0	0.9968	3.20	0.68	9.8	5	red
2	7.8	0.76	0.04	2.3	0.092	15.0	54.0	0.9970	3.26	0.65	9.8	5	red
3	11.2	0.28	0.56	1.9	0.075	17.0	60.0	0.9980	3.16	0.58	9.8	6	red
4	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5	red

	Description	Value
0	session_id	1234
1	Target	type
2	Target Type	Binary
3	Label Encoded	red: 0, white: 1
4	Original Data	(4873, 13)
5	Missing Values	False
6	Numeric Features	11
7	Categorical Features	1
8	Ordinal Features	False
9	High Cardinality Features	False
10	High Cardinality Method	None
11	Transformed Train Set	(3411, 18)
12	Transformed Test Set	(1462, 18)
13	Shuffle Train-Test	True
14	Stratify Train-Test	False
15	Fold Generator	StratifiedKFold
16	Fold Number	10
17	CPU Jobs	-1
18	Use GPU	False
19	Log Experiment	False
20	Experiment Name	clf-default-name
21	USI	ee21
22	Imputation Type	simple
23	Iterative Imputation Iteration	None
24	Numeric Imputer	mean
25	Iterative Imputation Numeric Model	None
26	Categorical Imputer	constant
27	Iterative Imputation Categorical Model	None
28	Unknown Categoricals Handling	least_frequent
29	Normalize	False
30	Normalize Method	None
31	Transformation	False
32	Transformation Method	None
33	PCA	False
34	PCA Method	None
35	PCA Components	None
36	Ignore Low Variance	False
37	Combine Rare Levels	False
38	Rare Level Threshold	None
39	Numeric Binning	False
40	Remove Outliers	False
41	Outliers Threshold	None
42	Remove Multicollinearity	False
43	Multicollinearity Threshold	None
44	Remove Perfect Collinearity	True
45	Clustering	False
46	Clustering Iteration	None
47	Polynomial Features	False
48	Polynomial Degree	None
49	Trignometry Features	False
50	Polynomial Threshold	None
51	Group Features	False
52	Feature Selection	False
53	Feature Selection Method	classic
54	Features Selection Threshold	None
55	Feature Interaction	False
56	Feature Ratio	False
57	Interaction Threshold	None
58	Fix Imbalance	False
59	Fix Imbalance Method	SMOTE

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC	TT (Sec)
lda	Linear Discriminant Analysis	0.9947	0.9945	0.9980	0.9949	0.9965	0.9859	0.9859	0.0100
et	Extra Trees Classifier	0.9947	0.9966	0.9992	0.9938	0.9965	0.9858	0.9859	0.2880
catboost	CatBoost Classifier	0.9944	0.9966	0.9980	0.9946	0.9963	0.9851	0.9851	1.9960
xgboost	Extreme Gradient Boosting	0.9941	0.9964	0.9980	0.9942	0.9961	0.9842	0.9843	0.1360
ada	Ada Boost Classifier	0.9924	0.9946	0.9961	0.9938	0.9949	0.9796	0.9796	0.0850
lightgbm	Light Gradient Boosting Machine	0.9924	0.9961	0.9977	0.9923	0.9949	0.9795	0.9796	0.0480
rf	Random Forest Classifier	0.9912	0.9970	0.9965	0.9919	0.9942	0.9764	0.9765	0.3390
gbc	Gradient Boosting Classifier	0.9903	0.9955	0.9953	0.9919	0.9936	0.9740	0.9741	0.2200
ridge	Ridge Classifier	0.9853	0.0000	0.9945	0.9861	0.9903	0.9605	0.9607	0.0070
lr	Logistic Regression	0.9845	0.9929	0.9941	0.9854	0.9897	0.9581	0.9586	0.4450
dt	Decision Tree Classifier	0.9774	0.9708	0.9844	0.9856	0.9850	0.9398	0.9400	0.0120
knn	K Neighbors Classifier	0.9352	0.9476	0.9699	0.9453	0.9574	0.8222	0.8235	0.0720
svm	SVM - Linear Kernel	0.9261	0.0000	0.9559	0.9481	0.9499	0.8058	0.8158	0.0130
nb	Naive Bayes	0.9229	0.9759	0.9082	0.9882	0.9464	0.8100	0.8196	0.0070
dummy	Dummy Classifier	0.7505	0.5000	1.0000	0.7505	0.8575	0.0000	0.0000	0.0050
qda	Quadratic Discriminant Analysis	0.2495	0.0000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0120

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.9971	0.9965	1.0000	0.9961	0.9981	0.9922	0.9922
1	0.9971	0.9917	1.0000	0.9961	0.9981	0.9921	0.9922
2	0.9971	0.9998	1.0000	0.9961	0.9981	0.9921	0.9922
3	0.9941	0.9995	1.0000	0.9922	0.9961	0.9842	0.9843
4	0.9941	0.9982	0.9961	0.9961	0.9961	0.9843	0.9843
5	0.9971	0.9913	1.0000	0.9961	0.9981	0.9921	0.9922
6	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
7	0.9912	0.9886	0.9961	0.9922	0.9942	0.9764	0.9764
8	0.9883	0.9812	0.9922	0.9922	0.9922	0.9687	0.9687
9	0.9912	0.9981	0.9961	0.9922	0.9942	0.9764	0.9764
Mean	0.9947	0.9945	0.9980	0.9949	0.9965	0.9859	0.9859
SD	0.0034	0.0059	0.0026	0.0025	0.0023	0.0092	0.0092

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.9912	0.9955	0.9961	0.9922	0.9942	0.9766	0.9766
1	0.9707	0.9862	0.9961	0.9659	0.9808	0.9191	0.9210
2	0.9795	0.9986	0.9883	0.9844	0.9864	0.9449	0.9450
3	0.9853	0.9979	1.0000	0.9808	0.9903	0.9600	0.9608
4	0.9853	0.9911	1.0000	0.9808	0.9903	0.9600	0.9608
5	0.9912	0.9879	1.0000	0.9884	0.9942	0.9762	0.9765
6	0.9941	0.9996	1.0000	0.9922	0.9961	0.9842	0.9843
7	0.9765	0.9858	1.0000	0.9697	0.9846	0.9353	0.9372
8	0.9795	0.9923	0.9922	0.9807	0.9864	0.9445	0.9448
9	0.9736	0.9919	0.9922	0.9732	0.9826	0.9281	0.9288
Mean	0.9827	0.9927	0.9965	0.9808	0.9886	0.9529	0.9536
SD	0.0076	0.0048	0.0041	0.0086	0.0050	0.0210	0.0205

Application of PyCaret Machine Learning: Notebook in Python for Beginners to Professionals : [ Classification ]¶

PyCaret Machine Learning Project – A Guide to build a binary classification model in PyCaret using wine dataset¶

PyCaret for Beginners - A Guide to a machine learning model in Python and PyCaret¶

Load Dataset for Modelling¶

Setup environment for modelling purposes¶

Compare the results of different available algorithms¶

Build few individual models¶

Applying "lda"¶

Plot the outcomes of the trained model¶

Applying "lr"¶

Plot the outcomes of the trained model¶

Predict on test / hold-out Sample¶

Finalise Model¶

Predict on unseen data¶

Save model¶

Load model¶

PyCaret AutoML in Practice¶

Load Dataset¶

Select best model based om metric¶

Plots¶

Save model¶

Load model¶

Summary¶

	Label	Score
0	red	0.9787
1	red	0.9979
2	red	0.9599
3	red	0.9969
4	red	0.6801
5	red	0.9987
6	red	0.9617
7	red	0.5282
8	red	0.9993
9	red	1.0000

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.9971	0.9974	1.0000	0.9961	0.9981	0.9921	0.9921
1	0.9971	1.0000	0.9961	1.0000	0.9981	0.9921	0.9921
2	0.9912	0.9990	1.0000	0.9885	0.9942	0.9760	0.9763
3	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
4	0.9971	0.9895	1.0000	0.9961	0.9981	0.9921	0.9921
5	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000	1.0000
6	0.9971	1.0000	1.0000	0.9961	0.9981	0.9921	0.9921
7	0.9941	0.9993	0.9961	0.9961	0.9961	0.9842	0.9842
8	0.9941	0.9998	1.0000	0.9923	0.9961	0.9841	0.9842
9	0.9941	0.9924	0.9961	0.9961	0.9961	0.9842	0.9842
Mean	0.9962	0.9977	0.9988	0.9961	0.9975	0.9897	0.9897
SD	0.0026	0.0035	0.0018	0.0034	0.0017	0.0072	0.0071