How to apply sklearn Bagging Classifier to adult income data

In [2]:
## How to apply sklearn Bagging Classifier to adult income data

def KickStarter_Example_405(): 

    print()
    print(format('How to apply sklearn Bagging Classifier to adult income data','*^92'))    
    # -------------------------------------------------------------------------------------------
    # install Penn Machine Learning Benchmarks - Datasets using pip command --> pip install pmlb
    # -------------------------------------------------------------------------------------------    
    
    # load libraries
    from pmlb import fetch_data
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import BaggingClassifier
    from sklearn.model_selection import train_test_split
    import matplotlib.pyplot as plt
    import seaborn as sns    
    
    # load dataset and descriptive statistics
    dataset_Name = 'adult'; dataset = fetch_data(dataset_Name)
    
    print();  print(dataset.head())    
    print();  print(dataset.columns)
    
    cols = ['age', 'workclass', 'fnlwgt', 'education', 'education-num',
            'marital-status', 'occupation', 'relationship', 'race', 'sex',
            'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']
    
    print();  print(dataset[cols].info())    
    print();  print(dataset[cols].describe())
    print();  print(dataset[cols].corr())    

    # load features and target from dataset
    X, y = fetch_data(dataset_Name, return_X_y=True)
    
    # Split Train and Test Datasets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)    
    
    scores1 = []; scores2 = []; scores3 = [];

    dtree = BaggingClassifier(DecisionTreeClassifier(random_state = 7, max_depth=4), n_estimators=50) 
    dtree.fit(X_train, y_train)
    scores1.append(dtree.score(X_test, y_test))
    
    dtree = BaggingClassifier(DecisionTreeClassifier(random_state = 23, max_depth=8), n_estimators=100)
    dtree.fit(X_train, y_train)
    scores2.append(dtree.score(X_test, y_test))

    dtree = BaggingClassifier(DecisionTreeClassifier(random_state = 42, max_depth=12), n_estimators=200)
    dtree.fit(X_train, y_train)
    scores3.append(dtree.score(X_test, y_test))
    
    sns.boxplot(data=[scores1, scores2, scores3], notch=True)
    plt.xticks([0,1,2], ['BaggingClf1', 'BaggingClf2', 'BaggingClf3'])
    plt.ylabel('Test Accuracy')

KickStarter_Example_405()
****************How to apply sklearn Bagging Classifier to adult income data****************

    age  workclass    fnlwgt  education  education-num  marital-status  \
0  39.0          7   77516.0          9           13.0               4   
1  50.0          6   83311.0          9           13.0               2   
2  38.0          4  215646.0         11            9.0               0   
3  53.0          4  234721.0          1            7.0               2   
4  28.0          4  338409.0          9           13.0               2   

   occupation  relationship  race  sex  capital-gain  capital-loss  \
0           1             1     4    1        2174.0           0.0   
1           4             0     4    1           0.0           0.0   
2           6             1     4    1           0.0           0.0   
3           6             0     2    1           0.0           0.0   
4          10             5     2    0           0.0           0.0   

   hours-per-week  native-country  target  
0            40.0              39       1  
1            13.0              39       1  
2            40.0              39       1  
3            40.0              39       1  
4            40.0               5       1  

Index(['age', 'workclass', 'fnlwgt', 'education', 'education-num',
       'marital-status', 'occupation', 'relationship', 'race', 'sex',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
       'target'],
      dtype='object')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 14 columns):
age               48842 non-null float64
workclass         48842 non-null int64
fnlwgt            48842 non-null float64
education         48842 non-null int64
education-num     48842 non-null float64
marital-status    48842 non-null int64
occupation        48842 non-null int64
relationship      48842 non-null int64
race              48842 non-null int64
sex               48842 non-null int64
capital-gain      48842 non-null float64
capital-loss      48842 non-null float64
hours-per-week    48842 non-null float64
native-country    48842 non-null int64
dtypes: float64(6), int64(8)
memory usage: 5.2 MB
None

                age     workclass        fnlwgt     education  education-num  \
count  48842.000000  48842.000000  4.884200e+04  48842.000000   48842.000000   
mean      38.643585      3.870439  1.896641e+05     10.288420      10.078089   
std       13.710510      1.464234  1.056040e+05      3.874492       2.570973   
min       17.000000      0.000000  1.228500e+04      0.000000       1.000000   
25%       28.000000      4.000000  1.175505e+05      9.000000       9.000000   
50%       37.000000      4.000000  1.781445e+05     11.000000      10.000000   
75%       48.000000      4.000000  2.376420e+05     12.000000      12.000000   
max       90.000000      8.000000  1.490400e+06     15.000000      16.000000   

       marital-status    occupation  relationship          race           sex  \
count    48842.000000  48842.000000  48842.000000  48842.000000  48842.000000   
mean         2.618750      6.577700      1.443287      3.668052      0.668482   
std          1.507703      4.230509      1.602151      0.845986      0.470764   
min          0.000000      0.000000      0.000000      0.000000      0.000000   
25%          2.000000      3.000000      0.000000      4.000000      0.000000   
50%          2.000000      7.000000      1.000000      4.000000      1.000000   
75%          4.000000     10.000000      3.000000      4.000000      1.000000   
max          6.000000     14.000000      5.000000      4.000000      1.000000   

       capital-gain  capital-loss  hours-per-week  native-country  
count  48842.000000  48842.000000    48842.000000    48842.000000  
mean    1079.067626     87.502314       40.422382       36.749355  
std     7452.019058    403.004552       12.391444        7.775343  
min        0.000000      0.000000        1.000000        0.000000  
25%        0.000000      0.000000       40.000000       39.000000  
50%        0.000000      0.000000       40.000000       39.000000  
75%        0.000000      0.000000       45.000000       39.000000  
max    99999.000000   4356.000000       99.000000       41.000000  

                     age  workclass    fnlwgt  education  education-num  \
age             1.000000   0.017526 -0.076628  -0.015058       0.030940   
workclass       0.017526   1.000000 -0.016546   0.017187       0.055918   
fnlwgt         -0.076628  -0.016546  1.000000  -0.022570      -0.038761   
education      -0.015058   0.017187 -0.022570   1.000000       0.359668   
education-num   0.030940   0.055918 -0.038761   0.359668       1.000000   
marital-status -0.263978  -0.068441  0.029851  -0.037417      -0.069992   
occupation     -0.014259   0.260005  0.000860  -0.020972       0.112265   
relationship   -0.263383  -0.092365  0.009092  -0.010758      -0.090534   
race            0.028421   0.052932 -0.027062   0.013250       0.029239   
sex             0.088120   0.091223  0.027739  -0.027041       0.009328   
capital-gain    0.077229   0.036044 -0.003706   0.028928       0.125146   
capital-loss    0.056944   0.010880 -0.004366   0.017638       0.080972   
hours-per-week  0.071558   0.141283 -0.013519   0.057659       0.143689   
native-country -0.002861  -0.008631 -0.048680   0.061469       0.049107   

                marital-status  occupation  relationship      race       sex  \
age                  -0.263978   -0.014259     -0.263383  0.028421  0.088120   
workclass            -0.068441    0.260005     -0.092365  0.052932  0.091223   
fnlwgt                0.029851    0.000860      0.009092 -0.027062  0.027739   
education            -0.037417   -0.020972     -0.010758  0.013250 -0.027041   
education-num        -0.069992    0.112265     -0.090534  0.029239  0.009328   
marital-status        1.000000   -0.017179      0.187800 -0.070104 -0.127479   
occupation           -0.017179    1.000000     -0.076356  0.005671  0.075081   
relationship          0.187800   -0.076356      1.000000 -0.117041 -0.579797   
race                 -0.070104    0.005671     -0.117041  1.000000  0.086734   
sex                  -0.127479    0.075081     -0.579797  0.086734  1.000000   
capital-gain         -0.043969    0.024163     -0.056510  0.011581  0.047094   
capital-loss         -0.033872    0.017180     -0.057201  0.018595  0.045480   
hours-per-week       -0.185567    0.079986     -0.250400  0.039694  0.228560   
native-country       -0.021375   -0.013424     -0.003962  0.138231 -0.009780   

                capital-gain  capital-loss  hours-per-week  native-country  
age                 0.077229      0.056944        0.071558       -0.002861  
workclass           0.036044      0.010880        0.141283       -0.008631  
fnlwgt             -0.003706     -0.004366       -0.013519       -0.048680  
education           0.028928      0.017638        0.057659        0.061469  
education-num       0.125146      0.080972        0.143689        0.049107  
marital-status     -0.043969     -0.033872       -0.185567       -0.021375  
occupation          0.024163      0.017180        0.079986       -0.013424  
relationship       -0.056510     -0.057201       -0.250400       -0.003962  
race                0.011581      0.018595        0.039694        0.138231  
sex                 0.047094      0.045480        0.228560       -0.009780  
capital-gain        1.000000     -0.031441        0.082157       -0.001816  
capital-loss       -0.031441      1.000000        0.054467        0.003449  
hours-per-week      0.082157      0.054467        1.000000        0.000705  
native-country     -0.001816      0.003449        0.000705        1.000000