# Suppress warnings in Jupyter Notebooks
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# provide the dataset name as shown in pycaret
whichDataset = 'diabetes'


from pycaret.datasets import get_data
dataset = get_data(whichDataset)


dataset.shape

(768, 9)


data = dataset.sample(frac=0.75, random_state=421)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (576, 9)
Unseen Data For Predictions: (192, 9)


env_setup = setup(data = data, target = 'Class variable', session_id=412)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

best_model = compare_models()


catboost = create_model('catboost')


tuned_catboost = tune_model(catboost)


print(tuned_catboost)

<catboost.core.CatBoostClassifier object at 0x7fd8c613fa90>


plot_model(tuned_catboost, plot = 'auc')


plot_model(tuned_catboost, plot = 'pr')


plot_model(tuned_catboost, plot='feature')


plot_model(tuned_catboost, plot = 'confusion_matrix')


plot_model(tuned_catboost, plot = 'learning')


plot_model(tuned_catboost, plot = 'threshold')


lr = create_model('lr')


tuned_lr = tune_model(lr)


print(tuned_lr)

LogisticRegression(C=1.908, class_weight={}, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=412, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


plot_model(tuned_lr, plot = 'auc')


plot_model(tuned_lr, plot = 'pr')


plot_model(tuned_lr, plot = 'feature')


plot_model(tuned_lr, plot = 'confusion_matrix')


plot_model(tuned_lr, plot = 'learning')


plot_model(tuned_lr, plot = 'threshold')


predict_model(tuned_catboost);


predict_model(tuned_lr);


final_lr = finalize_model(tuned_lr)


# Final model parameters for deployment
print(final_lr)

LogisticRegression(C=1.908, class_weight={}, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=412, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


predict_model(final_lr);


unseen_predictions = predict_model(final_lr, data=data_unseen)
unseen_predictions.head()


from pycaret.utils import check_metric
check_metric(unseen_predictions['Class variable'], unseen_predictions['Label'], metric = 'Accuracy')

0.7083


check_metric(unseen_predictions['Class variable'], unseen_predictions['Label'], metric = 'Recall')

0.5


check_metric(unseen_predictions['Class variable'], unseen_predictions['Label'], metric = 'Precision')

0.6607


check_metric(unseen_predictions['Class variable'], unseen_predictions['Label'], metric = 'AUC')

0.6695


check_metric(unseen_predictions['Class variable'], unseen_predictions['Label'], metric = 'F1')

0.5692


save_model(final_lr,'Final_Model')

Transformation Pipeline and Model Successfully Saved

(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[],
                                       target='Class variable',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeri...
                 ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
                 ('dfs', 'passthrough'), ('pca', 'passthrough'),
                 ['trained_model',
                  LogisticRegression(C=1.908, class_weight={}, dual=False,
                                     fit_intercept=True, intercept_scaling=1,
                                     l1_ratio=None, max_iter=1000,
                                     multi_class='auto', n_jobs=None,
                                     penalty='l2', random_state=412,
                                     solver='lbfgs', tol=0.0001, verbose=0,
                                     warm_start=False)]],
          verbose=False),
 'Final_Model.pkl')


load_saved_model = load_model('Final_Model')

Transformation Pipeline and Model Successfully Loaded


new_prediction = predict_model(load_saved_model, data=data_unseen)


new_prediction[["Label", "Score"]].head(10)


import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

from pycaret.classification import *


# provide the dataset name as shown in pycaret
whichDataset = 'diabetes'


from pycaret.datasets import get_data
dataset = get_data(whichDataset)

data = dataset.sample(frac=0.75, random_state=421)

data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (576, 9)
Unseen Data For Predictions: (192, 9)


clf = setup(data = data, target = 'Class variable', session_id=412)


import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')

# compare all baseline models and select top 5
top_models = compare_models(n_select = 5)


top_models

[LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                    intercept_scaling=1, l1_ratio=None, max_iter=1000,
                    multi_class='auto', n_jobs=None, penalty='l2',
                    random_state=412, solver='lbfgs', tol=0.0001, verbose=0,
                    warm_start=False),
 <catboost.core.CatBoostClassifier at 0x7f5c6e10de10>,
 RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True, fit_intercept=True,
                 max_iter=None, normalize=False, random_state=412, solver='auto',
                 tol=0.001),
 LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                            solver='svd', store_covariance=False, tol=0.0001),
 RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                        criterion='gini', max_depth=None, max_features='auto',
                        max_leaf_nodes=None, max_samples=None,
                        min_impurity_decrease=0.0, min_impurity_split=None,
                        min_samples_leaf=1, min_samples_split=2,
                        min_weight_fraction_leaf=0.0, n_estimators=100,
                        n_jobs=-1, oob_score=False, random_state=412, verbose=0,
                        warm_start=False)]


# tune top base models
tuned_top_models = [tune_model(i) for i in top_models]


tuned_top_models

[LogisticRegression(C=1.908, class_weight={}, dual=False, fit_intercept=True,
                    intercept_scaling=1, l1_ratio=None, max_iter=1000,
                    multi_class='auto', n_jobs=None, penalty='l2',
                    random_state=412, solver='lbfgs', tol=0.0001, verbose=0,
                    warm_start=False),
 <catboost.core.CatBoostClassifier at 0x7f5c6c0fd350>,
 RidgeClassifier(alpha=6.49, class_weight=None, copy_X=True, fit_intercept=True,
                 max_iter=None, normalize=False, random_state=412, solver='auto',
                 tol=0.001),
 LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=0.0005,
                            solver='lsqr', store_covariance=False, tol=0.0001),
 RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                        criterion='gini', max_depth=8, max_features='sqrt',
                        max_leaf_nodes=None, max_samples=None,
                        min_impurity_decrease=0.001, min_impurity_split=None,
                        min_samples_leaf=5, min_samples_split=5,
                        min_weight_fraction_leaf=0.0, n_estimators=270,
                        n_jobs=-1, oob_score=False, random_state=412, verbose=0,
                        warm_start=False)]


# ensemble top tuned models
bagged_top_models = [ensemble_model(i) for i in tuned_top_models]


bagged_top_models

[BaggingClassifier(base_estimator=LogisticRegression(C=1.908, class_weight={},
                                                     dual=False,
                                                     fit_intercept=True,
                                                     intercept_scaling=1,
                                                     l1_ratio=None,
                                                     max_iter=1000,
                                                     multi_class='auto',
                                                     n_jobs=None, penalty='l2',
                                                     random_state=412,
                                                     solver='lbfgs', tol=0.0001,
                                                     verbose=0,
                                                     warm_start=False),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=412, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=<catboost.core.CatBoostClassifier object at 0x7f5c6c0d6310>,
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=412, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=RidgeClassifier(alpha=6.49, class_weight=None,
                                                  copy_X=True,
                                                  fit_intercept=True,
                                                  max_iter=None, normalize=False,
                                                  random_state=412,
                                                  solver='auto', tol=0.001),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=412, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=LinearDiscriminantAnalysis(n_components=None,
                                                             priors=None,
                                                             shrinkage=0.0005,
                                                             solver='lsqr',
                                                             store_covariance=False,
                                                             tol=0.0001),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=412, verbose=0,
                   warm_start=False),
 BaggingClassifier(base_estimator=RandomForestClassifier(bootstrap=True,
                                                         ccp_alpha=0.0,
                                                         class_weight='balanced',
                                                         criterion='gini',
                                                         max_depth=8,
                                                         max_features='sqrt',
                                                         max_leaf_nodes=None,
                                                         max_samples=None,
                                                         min_impurity_decrease=0.001,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=5,
                                                         min_samples_split=5,
                                                         min_weight_fraction_leaf=0.0,
                                                         n_estimators=270,
                                                         n_jobs=-1,
                                                         oob_score=False,
                                                         random_state=412,
                                                         verbose=0,
                                                         warm_start=False),
                   bootstrap=True, bootstrap_features=False, max_features=1.0,
                   max_samples=1.0, n_estimators=10, n_jobs=None,
                   oob_score=False, random_state=412, verbose=0,
                   warm_start=False)]


# select best model based on AUC
best1 = automl(optimize = 'AUC')
best2 = automl(optimize = 'Accuracy')
best3 = automl(optimize = 'Recall')
best4 = automl(optimize = 'Precision')
best5 = automl(optimize = 'F1')


print(); print("Best model based on AUC: ");       print(best1)
print(); print("Best model based on Accuracy: ");  print(best2)
print(); print("Best model based on Recall: ");    print(best3)
print(); print("Best model based on Precision: "); print(best4)
print(); print("Best model based on F1: ");        print(best5)

Best model based on AUC: 
<catboost.core.CatBoostClassifier object at 0x7f5c6ea0a250>

Best model based on Accuracy: 
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=8, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.001, min_impurity_split=None,
                       min_samples_leaf=5, min_samples_split=5,
                       min_weight_fraction_leaf=0.0, n_estimators=270,
                       n_jobs=-1, oob_score=False, random_state=412, verbose=0,
                       warm_start=False)

Best model based on Recall: 
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=8, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.001, min_impurity_split=None,
                       min_samples_leaf=5, min_samples_split=5,
                       min_weight_fraction_leaf=0.0, n_estimators=270,
                       n_jobs=-1, oob_score=False, random_state=412, verbose=0,
                       warm_start=False)

Best model based on Precision: 
BaggingClassifier(base_estimator=RidgeClassifier(alpha=6.49, class_weight=None,
                                                 copy_X=True,
                                                 fit_intercept=True,
                                                 max_iter=None, normalize=False,
                                                 random_state=412,
                                                 solver='auto', tol=0.001),
                  bootstrap=True, bootstrap_features=False, max_features=1.0,
                  max_samples=1.0, n_estimators=10, n_jobs=None,
                  oob_score=False, random_state=412, verbose=0,
                  warm_start=False)

Best model based on F1: 
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight='balanced',
                       criterion='gini', max_depth=8, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.001, min_impurity_split=None,
                       min_samples_leaf=5, min_samples_split=5,
                       min_weight_fraction_leaf=0.0, n_estimators=270,
                       n_jobs=-1, oob_score=False, random_state=412, verbose=0,
                       warm_start=False)


plot_model(best1, plot = 'auc')


plot_model(best1, plot = 'confusion_matrix')


plot_model(best1, plot = 'learning')


save_model(best1,'Final_Model')

Transformation Pipeline and Model Successfully Saved

(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[],
                                       target='Class variable',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeri...
                 ('cluster_all', 'passthrough'),
                 ('dummy', Dummify(target='Class variable')),
                 ('fix_perfect', Remove_100(target='Class variable')),
                 ('clean_names', Clean_Colum_Names()),
                 ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
                 ('dfs', 'passthrough'), ('pca', 'passthrough'),
                 ['trained_model',
                  <catboost.core.CatBoostClassifier object at 0x7f5c6ea0a250>]],
          verbose=False),
 'Final_Model.pkl')


load_saved_model = load_model('Final_Model')

new_prediction = predict_model(load_saved_model, data=data_unseen)
new_prediction[["Label", "Score"]].head()

Transformation Pipeline and Model Successfully Loaded

	Number of times pregnant	Plasma glucose concentration a 2 hours in an oral glucose tolerance test	Diastolic blood pressure (mm Hg)	Triceps skin fold thickness (mm)	2-Hour serum insulin (mu U/ml)	Body mass index (weight in kg/(height in m)^2)	Diabetes pedigree function	Age (years)	Class variable
0	6	148	72	35	0	33.6	0.627	50	1
1	1	85	66	29	0	26.6	0.351	31	0
2	8	183	64	0	0	23.3	0.672	32	1
3	1	89	66	23	94	28.1	0.167	21	0
4	0	137	40	35	168	43.1	2.288	33	1

	Description	Value
0	session_id	412
1	Target	Class variable
2	Target Type	Binary
3	Label Encoded	None
4	Original Data	(576, 9)
5	Missing Values	False
6	Numeric Features	7
7	Categorical Features	1
8	Ordinal Features	False
9	High Cardinality Features	False
10	High Cardinality Method	None
11	Transformed Train Set	(403, 23)
12	Transformed Test Set	(173, 23)
13	Shuffle Train-Test	True
14	Stratify Train-Test	False
15	Fold Generator	StratifiedKFold
16	Fold Number	10
17	CPU Jobs	-1
18	Use GPU	False
19	Log Experiment	False
20	Experiment Name	clf-default-name
21	USI	f6ff
22	Imputation Type	simple
23	Iterative Imputation Iteration	None
24	Numeric Imputer	mean
25	Iterative Imputation Numeric Model	None
26	Categorical Imputer	constant
27	Iterative Imputation Categorical Model	None
28	Unknown Categoricals Handling	least_frequent
29	Normalize	False
30	Normalize Method	None
31	Transformation	False
32	Transformation Method	None
33	PCA	False
34	PCA Method	None
35	PCA Components	None
36	Ignore Low Variance	False
37	Combine Rare Levels	False
38	Rare Level Threshold	None
39	Numeric Binning	False
40	Remove Outliers	False
41	Outliers Threshold	None
42	Remove Multicollinearity	False
43	Multicollinearity Threshold	None
44	Remove Perfect Collinearity	True
45	Clustering	False
46	Clustering Iteration	None
47	Polynomial Features	False
48	Polynomial Degree	None
49	Trignometry Features	False
50	Polynomial Threshold	None
51	Group Features	False
52	Feature Selection	False
53	Feature Selection Method	classic
54	Features Selection Threshold	None
55	Feature Interaction	False
56	Feature Ratio	False
57	Interaction Threshold	None
58	Fix Imbalance	False
59	Fix Imbalance Method	SMOTE

	Model	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC	TT (Sec)
lr	Logistic Regression	0.7768	0.8229	0.5901	0.7170	0.6383	0.4813	0.4918	0.3080
catboost	CatBoost Classifier	0.7743	0.8624	0.6253	0.6887	0.6524	0.4867	0.4900	0.8420
ridge	Ridge Classifier	0.7742	0.0000	0.5824	0.7175	0.6344	0.4751	0.4864	0.0060
lda	Linear Discriminant Analysis	0.7717	0.8130	0.5753	0.7142	0.6282	0.4681	0.4798	0.0060
rf	Random Forest Classifier	0.7668	0.8399	0.5390	0.7154	0.6104	0.4500	0.4617	0.2650
et	Extra Trees Classifier	0.7641	0.8169	0.5308	0.7039	0.6019	0.4408	0.4510	0.2630
xgboost	Extreme Gradient Boosting	0.7568	0.8148	0.6033	0.6616	0.6256	0.4474	0.4521	0.0860
knn	K Neighbors Classifier	0.7544	0.8033	0.5962	0.6602	0.6201	0.4406	0.4466	0.0670
gbc	Gradient Boosting Classifier	0.7520	0.8277	0.5890	0.6576	0.6194	0.4367	0.4396	0.0460
lightgbm	Light Gradient Boosting Machine	0.7516	0.8279	0.5967	0.6551	0.6191	0.4368	0.4415	0.0160
ada	Ada Boost Classifier	0.7396	0.7920	0.5604	0.6367	0.5912	0.4028	0.4080	0.0420
dt	Decision Tree Classifier	0.7171	0.6906	0.6044	0.5984	0.5944	0.3797	0.3846	0.0060
nb	Naive Bayes	0.6698	0.7454	0.2225	0.5583	0.3126	0.1484	0.1768	0.0060
dummy	Dummy Classifier	0.6551	0.5000	0.0000	0.0000	0.0000	0.0000	0.0000	0.0040
svm	SVM - Linear Kernel	0.6374	0.0000	0.3824	0.4822	0.3683	0.1532	0.1773	0.0060
qda	Quadratic Discriminant Analysis	0.5701	0.6264	0.5857	0.3600	0.4278	0.1410	0.1375	0.0090

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.8049	0.9101	0.6429	0.7500	0.6923	0.5507	0.5542
1	0.7073	0.7513	0.5000	0.5833	0.5385	0.3260	0.3281
2	0.7561	0.8545	0.6429	0.6429	0.6429	0.4577	0.4577
3	0.7500	0.8034	0.5385	0.6364	0.5833	0.4065	0.4094
4	0.7750	0.8846	0.7143	0.6667	0.6897	0.5135	0.5143
5	0.8500	0.9148	0.7143	0.8333	0.7692	0.6591	0.6634
6	0.7250	0.8929	0.5714	0.6154	0.5926	0.3855	0.3861
7	0.8750	0.9038	0.8571	0.8000	0.8276	0.7297	0.7308
8	0.7250	0.8626	0.4286	0.6667	0.5217	0.3413	0.3577
9	0.7750	0.8462	0.6429	0.6923	0.6667	0.4972	0.4980
Mean	0.7743	0.8624	0.6253	0.6887	0.6524	0.4867	0.4900
SD	0.0520	0.0493	0.1163	0.0770	0.0927	0.1258	0.1244

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.8293	0.9048	0.6429	0.8182	0.7200	0.5997	0.6087
1	0.6341	0.6984	0.3571	0.4545	0.4000	0.1423	0.1444
2	0.7317	0.8280	0.6429	0.6000	0.6207	0.4135	0.4141
3	0.7500	0.7721	0.6154	0.6154	0.6154	0.4302	0.4302
4	0.8000	0.8984	0.7143	0.7143	0.7143	0.5604	0.5604
5	0.8500	0.8681	0.8571	0.7500	0.8000	0.6809	0.6847
6	0.7500	0.8874	0.5714	0.6667	0.6154	0.4318	0.4346
7	0.8000	0.8736	0.6429	0.7500	0.6923	0.5455	0.5490
8	0.7750	0.8242	0.5000	0.7778	0.6087	0.4611	0.4832
9	0.7750	0.8242	0.7143	0.6667	0.6897	0.5135	0.5143
Mean	0.7695	0.8379	0.6258	0.6814	0.6476	0.4779	0.4824
SD	0.0570	0.0608	0.1269	0.1007	0.1011	0.1379	0.1387

Application of PyCaret Machine Learning: Notebook in Python for Beginners to Professionals : [ Classification ]¶

PyCaret Machine Learning Project – A Guide to build a binary classification model in PyCaret using diabetes dataset¶

PyCaret for Beginners - A Guide to a machine learning model in Python and PyCaret¶

Load Dataset for Modelling¶

Setup environment for modelling purposes¶

Compare the results of different available algorithms¶

Build few individual models¶

Applying "catboost"¶

Plot the outcomes of the trained model¶

Applying "lr"¶

Plot the outcomes of the trained model¶

Predict on test / hold-out Sample¶

Finalise Model¶

Predict on unseen data¶

Save model¶

Load model¶

PyCaret AutoML in Practice¶

Load Dataset¶

Select best model based om metric¶

Plots¶

Save model¶

Load model¶

Summary¶

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.8049	0.8598	0.5000	0.8750	0.6364	0.5162	0.5540
1	0.6829	0.6640	0.3571	0.5556	0.4348	0.2287	0.2394
2	0.8049	0.8757	0.5714	0.8000	0.6667	0.5341	0.5492
3	0.8000	0.8291	0.6154	0.7273	0.6667	0.5252	0.5290
4	0.7750	0.8462	0.7143	0.6667	0.6897	0.5135	0.5143
5	0.8250	0.8187	0.7857	0.7333	0.7586	0.6216	0.6225
6	0.7250	0.8324	0.5000	0.6364	0.5600	0.3642	0.3698
7	0.8250	0.8544	0.7857	0.7333	0.7586	0.6216	0.6225
8	0.7500	0.8242	0.4286	0.7500	0.5455	0.3902	0.4193
9	0.7750	0.8242	0.6429	0.6923	0.6667	0.4972	0.4980
Mean	0.7768	0.8229	0.5901	0.7170	0.6383	0.4813	0.4918
SD	0.0436	0.0557	0.1387	0.0832	0.0950	0.1151	0.1127

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.8049	0.8492	0.5000	0.8750	0.6364	0.5162	0.5540
1	0.6829	0.6772	0.3571	0.5556	0.4348	0.2287	0.2394
2	0.8049	0.8757	0.5714	0.8000	0.6667	0.5341	0.5492
3	0.8000	0.8319	0.6154	0.7273	0.6667	0.5252	0.5290
4	0.7750	0.8489	0.7143	0.6667	0.6897	0.5135	0.5143
5	0.8000	0.8132	0.7857	0.6875	0.7333	0.5745	0.5777
6	0.7500	0.8352	0.5000	0.7000	0.5833	0.4118	0.4237
7	0.8250	0.8516	0.7857	0.7333	0.7586	0.6216	0.6225
8	0.7500	0.8159	0.4286	0.7500	0.5455	0.3902	0.4193
9	0.7750	0.8214	0.6429	0.6923	0.6667	0.4972	0.4980
Mean	0.7768	0.8220	0.5901	0.7188	0.6382	0.4813	0.4927
SD	0.0390	0.0515	0.1387	0.0798	0.0903	0.1061	0.1035

	Number of times pregnant	Plasma glucose concentration a 2 hours in an oral glucose tolerance test	Diastolic blood pressure (mm Hg)	Triceps skin fold thickness (mm)	2-Hour serum insulin (mu U/ml)	Body mass index (weight in kg/(height in m)^2)	Diabetes pedigree function	Age (years)	Class variable	Label	Score
0	8	183	64	0	0	23.3	0.672	32	1	1	0.8475
1	0	137	40	35	168	43.1	2.288	33	1	1	0.8887
2	5	116	74	0	0	25.6	0.201	30	0	0	0.8673
3	10	168	74	0	0	38.0	0.537	34	1	1	0.9040
4	10	139	80	0	0	27.1	1.441	57	0	1	0.7729

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.8293	0.8915	0.7857	0.7333	0.7586	0.6268	0.6277
1	0.7317	0.7619	0.5714	0.6154	0.5926	0.3930	0.3936
2	0.8049	0.8571	0.8571	0.6667	0.7500	0.5941	0.6067
3	0.6750	0.7350	0.6154	0.5000	0.5517	0.3011	0.3051
4	0.8000	0.8709	0.7857	0.6875	0.7333	0.5745	0.5777
5	0.8500	0.9066	0.8571	0.7500	0.8000	0.6809	0.6847
6	0.8250	0.9038	0.7857	0.7333	0.7586	0.6216	0.6225
7	0.8000	0.8709	0.9286	0.6500	0.7647	0.6000	0.6290
8	0.7500	0.8434	0.6429	0.6429	0.6429	0.4505	0.4505
9	0.8000	0.8681	0.7143	0.7143	0.7143	0.5604	0.5604
Mean	0.7866	0.8509	0.7544	0.6693	0.7067	0.5403	0.5458
SD	0.0500	0.0549	0.1101	0.0705	0.0782	0.1135	0.1157

	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
0	0.8537	0.9153	0.7857	0.7857	0.7857	0.6746	0.6746
1	0.6585	0.7407	0.5000	0.5000	0.5000	0.2407	0.2407
2	0.8049	0.8571	0.8571	0.6667	0.7500	0.5941	0.6067
3	0.7000	0.7493	0.6923	0.5294	0.6000	0.3668	0.3752
4	0.7750	0.8599	0.7143	0.6667	0.6897	0.5135	0.5143
5	0.8000	0.9038	0.7857	0.6875	0.7333	0.5745	0.5777
6	0.8250	0.8901	0.7857	0.7333	0.7586	0.6216	0.6225
7	0.7500	0.8626	0.8571	0.6000	0.7059	0.5000	0.5241
8	0.7250	0.8544	0.5714	0.6154	0.5926	0.3855	0.3861
9	0.8250	0.8846	0.7857	0.7333	0.7586	0.6216	0.6225
Mean	0.7717	0.8518	0.7335	0.6518	0.6874	0.5093	0.5145
SD	0.0591	0.0569	0.1115	0.0866	0.0883	0.1311	0.1313

	Label	Score
0	1	0.7370
1	1	0.6251
2	0	0.8190
3	1	0.9534
4	0	0.7222