## How to use sklearn Naive Bayes Classifier in Binary Classification
def KickStarter_Example_402():
print()
print(format('How to use sklearn Naive Bayes Classifier in Binary Classification','*^92'))
# -------------------------------------------------------------------------------------------
# install Penn Machine Learning Benchmarks - Datasets using pip command --> pip install pmlb
# -------------------------------------------------------------------------------------------
# load libraries
from pmlb import fetch_data
from sklearn.naive_bayes import GaussianNB #, MultinomialNB, BernoulliNB
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix, cohen_kappa_score
# load dataset and descriptive statistics
dataset_Name = 'adult'; dataset = fetch_data(dataset_Name)
print(); print(dataset.head())
print(); print(dataset.columns)
cols = ['age', 'workclass', 'fnlwgt', 'education', 'education-num',
'marital-status', 'occupation', 'relationship', 'race', 'sex',
'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']
print(); print(dataset[cols].info())
print(); print(dataset[cols].describe())
print(); print(dataset[cols].corr())
# load features and target from dataset
X, y = fetch_data(dataset_Name, return_X_y=True)
# Split Train and Test Datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
# train model using naive bayes classifier
model = GaussianNB()
# Cross Validation
cv_results = cross_val_score(model, X_train, y_train, cv = 10, scoring='accuracy',
n_jobs = -1, verbose = 0)
# Cross Validation Results
print('# ----------------------------------------------------------------')
print(" %s: " % ('GaussianNB Algorithm: PS-1'))
print('# ----------------------------------------------------------------')
print('\nCross Validation results: \n', cv_results)
prt_string = "\nCV Mean Accuracy: %f (Std: %f)"% (cv_results.mean(), cv_results.std())
print(prt_string)
# Train the Model
trained_Model = model.fit(X_train, y_train)
# Evaluate the skill of the Trained model
pred_Class = trained_Model.predict(X_test)
print('\n Accuracy score: \n', accuracy_score(y_test, pred_Class))
print('\n Classification report: \n', classification_report(y_test, pred_Class))
print('\n Confusion matrix: \n', confusion_matrix(y_test, pred_Class))
print('\n Cohen kappa_score: \n', cohen_kappa_score(y_test, pred_Class))
KickStarter_Example_402()