In [2]:
## How to compare sklearn classification algorithms in Python
## DataSet: skleran.datasets.load_breast_cancer()
def Snippet_183(): 
    print()
    print(format('How to compare sklearn classification algorithms in Python','*^82'))    
    
    import warnings
    warnings.filterwarnings("ignore")
    
    # load libraries
    import matplotlib.pyplot as plt
    from sklearn import model_selection
    from sklearn.linear_model import LogisticRegression
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
    from sklearn.naive_bayes import GaussianNB
    from sklearn.svm import SVC
    from sklearn.model_selection import train_test_split
    from sklearn import datasets    
    import matplotlib.pyplot as plt    
    
    plt.style.use('ggplot')
    
    # load datasets
    seed = 42
    dataset = datasets.load_breast_cancer()
    X = dataset.data; y = dataset.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    kfold = model_selection.KFold(n_splits=10, random_state=seed)    
    
    # prepare models
    models = []
    models.append(('LR', LogisticRegression()))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    models.append(('KNN', KNeighborsClassifier()))
    models.append(('CART', DecisionTreeClassifier()))
    models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC()))
    
    # evaluate each model
    results = []
    names = []
    scoring = 'accuracy'
    
    for name, model in models:
	     kfold = model_selection.KFold(n_splits=10, random_state=seed)
	     cv_results = model_selection.cross_val_score(model, X_train, y_train, cv=kfold, scoring=scoring)
	     results.append(cv_results)
	     names.append(name)
	     msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	     print(msg)
    
    # boxplot algorithm comparison
    fig = plt.figure(figsize=(12,12))
    fig.suptitle('How to compare sklearn classification algorithms')
    ax = fig.add_subplot(111)
    plt.boxplot(results)
    ax.set_xticklabels(names)
    plt.show()
Snippet_183()
************How to compare sklearn classification algorithms in Python************
LR: 0.953212 (0.025429)
LDA: 0.952990 (0.029905)
KNN: 0.927298 (0.054290)
CART: 0.929568 (0.036540)
NB: 0.936656 (0.027765)
SVM: 0.640421 (0.090904)
In [ ]: