For more projects visit: https://setscholars.net
# Suppress warnings in Jupyter Notebooks
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
#plt.style.use('fivethirtyeight')
#plt.style.use('ggplot')
import numpy as np
import pandas as pd
# OpenML Dataset ID
whichDataset = 12 # provide dataset id
import openml
from openml.datasets import get_dataset
dataset = openml.datasets.get_dataset(whichDataset)
# Print a summary
print(
f"This is dataset '{dataset.name}', the target feature is "
f"'{dataset.default_target_attribute}'"
)
print(f"URL: {dataset.url}")
print(dataset.description)
This is dataset 'mfeat-factors', the target feature is 'class' URL: https://www.openml.org/data/v1/download/12/mfeat-factors.arff **Author**: Robert P.W. Duin, Department of Applied Physics, Delft University of Technology **Source**: [UCI](https://archive.ics.uci.edu/ml/datasets/Multiple+Features) - 1998 **Please cite**: [UCI](https://archive.ics.uci.edu/ml/citation_policy.html) **Multiple Features Dataset: Factors** One of a set of 6 datasets describing features of handwritten numerals (0 - 9) extracted from a collection of Dutch utility maps. Corresponding patterns in different datasets correspond to the same original character. 200 instances per class (for a total of 2,000 instances) have been digitized in binary images. ### Attribute Information The attributes represent 216 profile correlations. No more information is known. ### Relevant Papers A slightly different version of the database is used in M. van Breukelen, R.P.W. Duin, D.M.J. Tax, and J.E. den Hartog, Handwritten digit recognition by combined classifiers, Kybernetika, vol. 34, no. 4, 1998, 381-386. The database as is is used in: A.K. Jain, R.P.W. Duin, J. Mao, Statistical Pattern Recognition: A Review, IEEE Transactions on Pattern Analysis and Machine Intelligence archive, Volume 22 Issue 1, January 2000
X, y, categorical_indicator, attribute_names = dataset.get_data(
dataset_format="array", target=dataset.default_target_attribute)
dataset = pd.DataFrame(X, columns=attribute_names)
dataset["target"] = y
print(); print(dataset.shape)
print(); print(dataset.head())
print(); print(dataset.columns.values)
(2000, 217) att1 att2 att3 att4 att5 att6 att7 att8 att9 att10 ... \ 0 98.0 236.0 531.0 673.0 607.0 647.0 2.0 9.0 3.0 6.0 ... 1 121.0 193.0 607.0 611.0 585.0 665.0 7.0 9.0 2.0 4.0 ... 2 115.0 141.0 590.0 605.0 557.0 627.0 12.0 6.0 3.0 3.0 ... 3 90.0 122.0 627.0 692.0 607.0 642.0 0.0 6.0 4.0 5.0 ... 4 157.0 167.0 681.0 666.0 587.0 666.0 8.0 6.0 1.0 4.0 ... att208 att209 att210 att211 att212 att213 att214 att215 att216 \ 0 536.0 628.0 632.0 18.0 36.0 8.0 15.0 12.0 13.0 1 458.0 570.0 634.0 15.0 32.0 11.0 13.0 15.0 11.0 2 498.0 572.0 656.0 20.0 35.0 16.0 14.0 13.0 6.0 3 549.0 628.0 621.0 16.0 35.0 7.0 12.0 15.0 9.0 4 525.0 568.0 653.0 16.0 35.0 10.0 15.0 13.0 13.0 target 0 0 1 0 2 0 3 0 4 0 [5 rows x 217 columns] ['att1' 'att2' 'att3' 'att4' 'att5' 'att6' 'att7' 'att8' 'att9' 'att10' 'att11' 'att12' 'att13' 'att14' 'att15' 'att16' 'att17' 'att18' 'att19' 'att20' 'att21' 'att22' 'att23' 'att24' 'att25' 'att26' 'att27' 'att28' 'att29' 'att30' 'att31' 'att32' 'att33' 'att34' 'att35' 'att36' 'att37' 'att38' 'att39' 'att40' 'att41' 'att42' 'att43' 'att44' 'att45' 'att46' 'att47' 'att48' 'att49' 'att50' 'att51' 'att52' 'att53' 'att54' 'att55' 'att56' 'att57' 'att58' 'att59' 'att60' 'att61' 'att62' 'att63' 'att64' 'att65' 'att66' 'att67' 'att68' 'att69' 'att70' 'att71' 'att72' 'att73' 'att74' 'att75' 'att76' 'att77' 'att78' 'att79' 'att80' 'att81' 'att82' 'att83' 'att84' 'att85' 'att86' 'att87' 'att88' 'att89' 'att90' 'att91' 'att92' 'att93' 'att94' 'att95' 'att96' 'att97' 'att98' 'att99' 'att100' 'att101' 'att102' 'att103' 'att104' 'att105' 'att106' 'att107' 'att108' 'att109' 'att110' 'att111' 'att112' 'att113' 'att114' 'att115' 'att116' 'att117' 'att118' 'att119' 'att120' 'att121' 'att122' 'att123' 'att124' 'att125' 'att126' 'att127' 'att128' 'att129' 'att130' 'att131' 'att132' 'att133' 'att134' 'att135' 'att136' 'att137' 'att138' 'att139' 'att140' 'att141' 'att142' 'att143' 'att144' 'att145' 'att146' 'att147' 'att148' 'att149' 'att150' 'att151' 'att152' 'att153' 'att154' 'att155' 'att156' 'att157' 'att158' 'att159' 'att160' 'att161' 'att162' 'att163' 'att164' 'att165' 'att166' 'att167' 'att168' 'att169' 'att170' 'att171' 'att172' 'att173' 'att174' 'att175' 'att176' 'att177' 'att178' 'att179' 'att180' 'att181' 'att182' 'att183' 'att184' 'att185' 'att186' 'att187' 'att188' 'att189' 'att190' 'att191' 'att192' 'att193' 'att194' 'att195' 'att196' 'att197' 'att198' 'att199' 'att200' 'att201' 'att202' 'att203' 'att204' 'att205' 'att206' 'att207' 'att208' 'att209' 'att210' 'att211' 'att212' 'att213' 'att214' 'att215' 'att216' 'target']
# find missing values in data frame
print()
print(dataset.isnull().sum().sum())
# group by 'target'
#print()
#print(dataset.groupby('target').count())
0
# training and test data split
data = dataset.sample(frac=0.75, random_state=1234)
data_unseen = dataset.drop(data.index)
data.reset_index(inplace=True, drop=True)
data_unseen.reset_index(inplace=True, drop=True)
print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))
Data for Modeling: (1500, 217) Unseen Data For Predictions: (500, 217)
import pandas_profiling
#dataset.profile_report()
#import sweetviz as sv
#sweet_report = sv.analyze(df)
#sweet_report.show_notebook(layout='vertical', w=880, h=1000,scale=0.8)
import h2o
h2o.init()
Checking whether there is an H2O instance running at http://localhost:54321 ..... not found. Attempting to start a local H2O server... Java Version: openjdk version "11.0.14" 2022-01-18; OpenJDK Runtime Environment (build 11.0.14+9-post-Debian-1deb10u1); OpenJDK 64-Bit Server VM (build 11.0.14+9-post-Debian-1deb10u1, mixed mode, sharing) Starting server from /opt/conda/lib/python3.7/site-packages/h2o/backend/bin/h2o.jar Ice root: /tmp/tmpo06mozzr JVM stdout: /tmp/tmpo06mozzr/h2o_jupyter_started_from_python.out JVM stderr: /tmp/tmpo06mozzr/h2o_jupyter_started_from_python.err Server is running at http://127.0.0.1:54321 Connecting to H2O server at http://127.0.0.1:54321 ... successful. Warning: Your H2O cluster version is too old (6 months and 10 days)!Please download and install the latest version from http://h2o.ai/download/
H2O_cluster_uptime: | 02 secs |
H2O_cluster_timezone: | Etc/UTC |
H2O_data_parsing_timezone: | UTC |
H2O_cluster_version: | 3.34.0.1 |
H2O_cluster_version_age: | 6 months and 10 days !!! |
H2O_cluster_name: | H2O_from_python_jupyter_nydu2c |
H2O_cluster_total_nodes: | 1 |
H2O_cluster_free_memory: | 15.65 Gb |
H2O_cluster_total_cores: | 16 |
H2O_cluster_allowed_cores: | 16 |
H2O_cluster_status: | locked, healthy |
H2O_connection_url: | http://127.0.0.1:54321 |
H2O_connection_proxy: | {"http": null, "https": null} |
H2O_internal_security: | False |
H2O_API_Extensions: | Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4 |
Python_version: | 3.7.10 final |
# Convert to h2o dataframe
hf = h2o.H2OFrame(data)
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
hf.head()
att1 | att2 | att3 | att4 | att5 | att6 | att7 | att8 | att9 | att10 | att11 | att12 | att13 | att14 | att15 | att16 | att17 | att18 | att19 | att20 | att21 | att22 | att23 | att24 | att25 | att26 | att27 | att28 | att29 | att30 | att31 | att32 | att33 | att34 | att35 | att36 | att37 | att38 | att39 | att40 | att41 | att42 | att43 | att44 | att45 | att46 | att47 | att48 | att49 | att50 | att51 | att52 | att53 | att54 | att55 | att56 | att57 | att58 | att59 | att60 | att61 | att62 | att63 | att64 | att65 | att66 | att67 | att68 | att69 | att70 | att71 | att72 | att73 | att74 | att75 | att76 | att77 | att78 | att79 | att80 | att81 | att82 | att83 | att84 | att85 | att86 | att87 | att88 | att89 | att90 | att91 | att92 | att93 | att94 | att95 | att96 | att97 | att98 | att99 | att100 | att101 | att102 | att103 | att104 | att105 | att106 | att107 | att108 | att109 | att110 | att111 | att112 | att113 | att114 | att115 | att116 | att117 | att118 | att119 | att120 | att121 | att122 | att123 | att124 | att125 | att126 | att127 | att128 | att129 | att130 | att131 | att132 | att133 | att134 | att135 | att136 | att137 | att138 | att139 | att140 | att141 | att142 | att143 | att144 | att145 | att146 | att147 | att148 | att149 | att150 | att151 | att152 | att153 | att154 | att155 | att156 | att157 | att158 | att159 | att160 | att161 | att162 | att163 | att164 | att165 | att166 | att167 | att168 | att169 | att170 | att171 | att172 | att173 | att174 | att175 | att176 | att177 | att178 | att179 | att180 | att181 | att182 | att183 | att184 | att185 | att186 | att187 | att188 | att189 | att190 | att191 | att192 | att193 | att194 | att195 | att196 | att197 | att198 | att199 | att200 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
129 | 235 | 599 | 752 | 607 | 632 | 2 | 15 | 22 | 7 | 12 | 7 | 260 | 332 | 644 | 681 | 612 | 977 | 3 | 38 | 9 | 10 | 10 | 17 | 292 | 336 | 665 | 618 | 893 | 712 | 34 | 10 | 1 | 14 | 2 | 8 | 384 | 336 | 709 | 797 | 788 | 909 | 26 | 10 | 13 | 13 | 14 | 13 | 351 | 365 | 691 | 821 | 874 | 735 | 35 | 16 | 1 | 14 | 15 | 14 | 311 | 387 | 548 | 781 | 933 | 705 | 34 | 8 | 14 | 12 | 8 | 8 | 301 | 323 | 757 | 594 | 619 | 1043 | 9 | 30 | 9 | 13 | 8 | 18 | 339 | 273 | 682 | 551 | 589 | 889 | 6 | 24 | 6 | 10 | 9 | 16 | 354 | 304 | 827 | 703 | 755 | 977 | 20 | 27 | 7 | 13 | 14 | 9 | 410 | 280 | 549 | 865 | 882 | 734 | 26 | 28 | 27 | 6 | 15 | 8 | 291 | 383 | 441 | 722 | 784 | 702 | 25 | 25 | 19 | 4 | 9 | 12 | 249 | 213 | 634 | 773 | 559 | 720 | 11 | 16 | 18 | 3 | 11 | 7 | 313 | 211 | 642 | 791 | 627 | 670 | 9 | 28 | 16 | 12 | 10 | 18 | 281 | 335 | 644 | 653 | 671 | 953 | 14 | 31 | 6 | 14 | 2 | 13 | 285 | 353 | 607 | 614 | 841 | 745 | 17 | 9 | 9 | 7 | 3 | 10 | 473 | 485 | 671 | 903 | 1068 | 596 | 25 | 27 | 33 | 7 | 17 | 9 | 379 | 383 | 910 | 810 | 611 | 1179 | 8 | 26 |
425 | 293 | 798 | 862 | 652 | 523 | 20 | 28 | 12 | 10 | 7 | 11 | 198 | 282 | 661 | 739 | 609 | 836 | 15 | 13 | 19 | 3 | 5 | 11 | 166 | 348 | 734 | 656 | 772 | 547 | 16 | 19 | 9 | 5 | 11 | 2 | 484 | 310 | 778 | 859 | 687 | 746 | 28 | 35 | 19 | 12 | 15 | 9 | 397 | 325 | 750 | 811 | 719 | 566 | 23 | 35 | 9 | 9 | 16 | 10 | 301 | 219 | 635 | 687 | 788 | 570 | 18 | 23 | 4 | 7 | 13 | 0 | 227 | 251 | 662 | 684 | 618 | 906 | 9 | 21 | 19 | 0 | 11 | 4 | 239 | 263 | 635 | 699 | 678 | 748 | 12 | 13 | 12 | 7 | 10 | 16 | 286 | 184 | 694 | 851 | 696 | 830 | 16 | 8 | 11 | 0 | 15 | 17 | 472 | 302 | 802 | 659 | 751 | 607 | 18 | 15 | 31 | 13 | 16 | 0 | 297 | 261 | 696 | 656 | 671 | 653 | 15 | 10 | 9 | 17 | 10 | 6 | 423 | 237 | 887 | 907 | 592 | 615 | 11 | 27 | 8 | 16 | 6 | 1 | 291 | 179 | 843 | 939 | 622 | 559 | 13 | 15 | 8 | 1 | 5 | 14 | 185 | 265 | 621 | 689 | 624 | 814 | 4 | 14 | 4 | 1 | 11 | 9 | 265 | 261 | 630 | 568 | 758 | 590 | 9 | 16 | 1 | 8 | 12 | 2 | 493 | 339 | 870 | 751 | 951 | 505 | 15 | 16 | 23 | 8 | 18 | 1 | 411 | 323 | 859 | 928 | 628 | 1042 | 26 | 17 |
334 | 210 | 701 | 690 | 647 | 652 | 14 | 19 | 15 | 12 | 8 | 11 | 381 | 339 | 460 | 605 | 670 | 993 | 19 | 32 | 22 | 9 | 6 | 11 | 263 | 339 | 671 | 578 | 1049 | 726 | 24 | 18 | 12 | 9 | 10 | 2 | 369 | 383 | 661 | 663 | 864 | 927 | 18 | 22 | 22 | 12 | 16 | 15 | 376 | 384 | 633 | 619 | 966 | 745 | 25 | 22 | 12 | 11 | 17 | 10 | 274 | 316 | 530 | 589 | 1091 | 705 | 24 | 16 | 7 | 7 | 10 | 0 | 320 | 292 | 533 | 516 | 733 | 1063 | 13 | 26 | 22 | 8 | 10 | 10 | 434 | 230 | 482 | 543 | 747 | 905 | 16 | 20 | 15 | 5 | 11 | 16 | 495 | 317 | 577 | 689 | 787 | 987 | 32 | 17 | 16 | 8 | 16 | 17 | 423 | 283 | 705 | 669 | 1014 | 762 | 16 | 20 | 24 | 11 | 17 | 0 | 330 | 246 | 557 | 528 | 934 | 718 | 15 | 27 | 12 | 9 | 11 | 6 | 204 | 136 | 790 | 759 | 705 | 744 | 5 | 10 | 5 | 8 | 7 | 1 | 348 | 170 | 776 | 777 | 773 | 676 | 7 | 18 | 5 | 7 | 6 | 14 | 346 | 234 | 486 | 515 | 777 | 971 | 8 | 25 | 9 | 9 | 10 | 9 | 288 | 228 | 523 | 518 | 999 | 747 | 7 | 15 | 4 | 6 | 11 | 2 | 442 | 378 | 769 | 739 | 1226 | 618 | 15 | 19 | 20 | 12 | 19 | 1 | 484 | 382 | 638 | 750 | 643 | 1199 | 18 | 18 |
235 | 377 | 717 | 563 | 647 | 588 | 21 | 31 | 6 | 14 | 9 | 9 | 330 | 396 | 674 | 542 | 692 | 957 | 16 | 22 | 37 | 13 | 7 | 5 | 412 | 394 | 695 | 537 | 1031 | 668 | 15 | 16 | 27 | 7 | 5 | 14 | 536 | 336 | 759 | 638 | 808 | 867 | 27 | 10 | 37 | 10 | 17 | 15 | 453 | 389 | 743 | 676 | 924 | 687 | 24 | 6 | 27 | 11 | 18 | 8 | 439 | 453 | 564 | 708 | 1079 | 667 | 25 | 12 | 14 | 5 | 11 | 16 | 441 | 521 | 749 | 469 | 771 | 1027 | 12 | 14 | 37 | 18 | 11 | 12 | 383 | 587 | 708 | 386 | 795 | 869 | 13 | 24 | 30 | 9 | 12 | 0 | 464 | 430 | 773 | 494 | 755 | 951 | 11 | 37 | 29 | 10 | 17 | 7 | 526 | 362 | 703 | 782 | 1026 | 704 | 29 | 20 | 23 | 13 | 18 | 16 | 441 | 403 | 577 | 635 | 950 | 682 | 26 | 25 | 9 | 7 | 12 | 10 | 377 | 355 | 788 | 600 | 751 | 708 | 22 | 32 | 10 | 6 | 8 | 15 | 365 | 421 | 726 | 582 | 795 | 640 | 20 | 34 | 12 | 9 | 7 | 2 | 405 | 521 | 642 | 530 | 819 | 935 | 15 | 21 | 22 | 17 | 5 | 11 | 401 | 501 | 597 | 599 | 1043 | 711 | 20 | 19 | 19 | 10 | 6 | 18 | 529 | 477 | 787 | 866 | 1234 | 558 | 26 | 37 | 5 | 14 | 20 | 17 | 577 | 659 | 940 | 629 | 639 | 1163 | 27 | 36 |
408 | 446 | 890 | 902 | 726 | 574 | 26 | 16 | 31 | 14 | 10 | 11 | 251 | 235 | 921 | 815 | 703 | 943 | 21 | 29 | 10 | 5 | 10 | 9 | 335 | 127 | 856 | 680 | 766 | 654 | 10 | 21 | 10 | 3 | 12 | 18 | 345 | 95 | 938 | 931 | 735 | 853 | 20 | 21 | 2 | 0 | 0 | 19 | 266 | 190 | 934 | 891 | 745 | 673 | 25 | 17 | 10 | 1 | 1 | 12 | 376 | 232 | 785 | 781 | 766 | 653 | 26 | 19 | 23 | 5 | 8 | 20 | 236 | 196 | 1012 | 740 | 716 | 1013 | 19 | 29 | 0 | 10 | 10 | 16 | 262 | 348 | 967 | 723 | 616 | 855 | 18 | 29 | 7 | 7 | 7 | 4 | 73 | 287 | 996 | 875 | 776 | 937 | 8 | 26 | 8 | 0 | 0 | 3 | 483 | 191 | 886 | 813 | 839 | 666 | 34 | 17 | 30 | 13 | 13 | 20 | 298 | 284 | 756 | 732 | 727 | 668 | 35 | 30 | 28 | 17 | 11 | 14 | 486 | 330 | 971 | 931 | 638 | 694 | 31 | 17 | 27 | 16 | 11 | 19 | 376 | 402 | 911 | 963 | 642 | 626 | 21 | 21 | 25 | 1 | 10 | 6 | 246 | 306 | 843 | 759 | 740 | 921 | 24 | 30 | 15 | 9 | 12 | 15 | 436 | 304 | 836 | 594 | 792 | 697 | 29 | 18 | 18 | 10 | 11 | 22 | 482 | 384 | 970 | 817 | 901 | 526 | 35 | 26 | 42 | 12 | 15 | 21 | 174 | 466 | 1125 | 974 | 724 | 1149 | 32 | 25 |
350 | 360 | 738 | 859 | 641 | 734 | 24 | 25 | 23 | 3 | 8 | 11 | 401 | 305 | 523 | 742 | 682 | 839 | 19 | 10 | 24 | 14 | 14 | 11 | 311 | 215 | 764 | 717 | 1079 | 728 | 20 | 24 | 36 | 10 | 12 | 6 | 305 | 213 | 778 | 948 | 874 | 765 | 18 | 24 | 26 | 9 | 0 | 1 | 316 | 226 | 742 | 960 | 990 | 693 | 13 | 16 | 36 | 10 | 1 | 8 | 214 | 266 | 597 | 910 | 1121 | 743 | 12 | 26 | 33 | 12 | 8 | 14 | 382 | 220 | 672 | 749 | 761 | 859 | 13 | 14 | 26 | 11 | 8 | 4 | 310 | 268 | 615 | 656 | 777 | 753 | 16 | 8 | 33 | 18 | 11 | 16 | 529 | 289 | 712 | 800 | 773 | 839 | 38 | 21 | 34 | 11 | 8 | 17 | 173 | 151 | 728 | 816 | 1044 | 768 | 4 | 8 | 4 | 6 | 1 | 0 | 350 | 232 | 574 | 849 | 964 | 720 | 3 | 13 | 30 | 10 | 11 | 6 | 298 | 254 | 813 | 834 | 735 | 678 | 15 | 34 | 33 | 11 | 9 | 3 | 450 | 308 | 807 | 888 | 803 | 692 | 17 | 22 | 33 | 12 | 14 | 14 | 460 | 196 | 495 | 808 | 805 | 887 | 8 | 13 | 41 | 10 | 12 | 9 | 220 | 184 | 616 | 675 | 1029 | 753 | 5 | 21 | 40 | 9 | 11 | 14 | 122 | 352 | 830 | 962 | 1256 | 714 | 3 | 23 | 32 | 3 | 3 | 11 | 526 | 404 | 721 | 937 | 633 | 925 | 30 | 24 |
255 | 269 | 635 | 909 | 560 | 658 | 10 | 25 | 17 | 6 | 11 | 12 | 286 | 212 | 564 | 816 | 609 | 855 | 15 | 16 | 14 | 11 | 9 | 14 | 180 | 362 | 655 | 839 | 996 | 658 | 26 | 20 | 36 | 15 | 3 | 11 | 352 | 322 | 693 | 998 | 791 | 703 | 14 | 26 | 30 | 10 | 15 | 4 | 389 | 339 | 663 | 1050 | 907 | 625 | 27 | 26 | 36 | 11 | 16 | 11 | 193 | 257 | 510 | 1032 | 1038 | 739 | 26 | 22 | 23 | 15 | 9 | 15 | 319 | 287 | 677 | 835 | 682 | 897 | 21 | 24 | 30 | 10 | 9 | 7 | 277 | 187 | 610 | 722 | 706 | 783 | 18 | 18 | 37 | 15 | 10 | 19 | 508 | 180 | 751 | 856 | 690 | 863 | 28 | 15 | 38 | 10 | 15 | 20 | 236 | 328 | 623 | 902 | 961 | 638 | 18 | 8 | 0 | 7 | 16 | 5 | 281 | 293 | 479 | 965 | 881 | 782 | 17 | 13 | 20 | 13 | 10 | 11 | 283 | 229 | 708 | 884 | 658 | 694 | 7 | 26 | 25 | 14 | 10 | 8 | 367 | 157 | 688 | 944 | 720 | 746 | 11 | 20 | 29 | 9 | 9 | 17 | 327 | 203 | 554 | 904 | 734 | 913 | 16 | 19 | 35 | 11 | 3 | 8 | 143 | 213 | 539 | 805 | 958 | 729 | 13 | 19 | 30 | 12 | 4 | 13 | 261 | 423 | 729 | 1074 | 1177 | 604 | 17 | 13 | 28 | 6 | 18 | 14 | 557 | 289 | 824 | 983 | 554 | 1007 | 16 | 26 |
202 | 202 | 684 | 626 | 670 | 648 | 19 | 14 | 1 | 6 | 13 | 9 | 183 | 303 | 687 | 585 | 689 | 1017 | 14 | 19 | 32 | 11 | 11 | 19 | 223 | 367 | 688 | 590 | 930 | 728 | 17 | 25 | 22 | 11 | 1 | 12 | 301 | 357 | 798 | 697 | 753 | 927 | 27 | 23 | 32 | 10 | 13 | 11 | 328 | 382 | 778 | 749 | 835 | 747 | 32 | 23 | 22 | 9 | 14 | 16 | 286 | 290 | 625 | 759 | 980 | 727 | 33 | 21 | 9 | 13 | 7 | 10 | 234 | 352 | 828 | 532 | 770 | 1087 | 22 | 21 | 32 | 8 | 7 | 16 | 306 | 338 | 753 | 435 | 748 | 929 | 19 | 21 | 25 | 15 | 8 | 18 | 333 | 265 | 818 | 565 | 752 | 1011 | 5 | 14 | 24 | 8 | 13 | 11 | 453 | 285 | 634 | 821 | 979 | 740 | 41 | 23 | 18 | 9 | 14 | 10 | 324 | 240 | 506 | 690 | 883 | 742 | 38 | 26 | 4 | 13 | 8 | 16 | 314 | 178 | 719 | 659 | 718 | 768 | 28 | 15 | 11 | 12 | 12 | 11 | 232 | 190 | 705 | 653 | 722 | 700 | 24 | 15 | 11 | 9 | 11 | 20 | 216 | 296 | 645 | 597 | 800 | 995 | 27 | 14 | 17 | 7 | 1 | 15 | 288 | 296 | 694 | 624 | 986 | 771 | 32 | 28 | 14 | 4 | 2 | 8 | 482 | 314 | 762 | 907 | 1127 | 600 | 38 | 16 | 10 | 4 | 16 | 9 | 302 | 454 | 919 | 688 | 662 | 1223 | 25 | 13 |
466 | 482 | 921 | 952 | 625 | 940 | 31 | 30 | 30 | 11 | 12 | 11 | 449 | 253 | 678 | 823 | 666 | 933 | 26 | 23 | 17 | 16 | 12 | 13 | 383 | 157 | 753 | 740 | 1063 | 906 | 21 | 25 | 27 | 2 | 14 | 6 | 269 | 157 | 967 | 1025 | 858 | 907 | 11 | 9 | 17 | 1 | 2 | 3 | 276 | 174 | 937 | 1021 | 974 | 865 | 6 | 3 | 27 | 2 | 1 | 10 | 286 | 298 | 778 | 935 | 1105 | 883 | 5 | 21 | 40 | 4 | 8 | 14 | 394 | 286 | 849 | 822 | 745 | 841 | 20 | 15 | 17 | 19 | 8 | 6 | 390 | 404 | 802 | 753 | 761 | 829 | 23 | 25 | 24 | 12 | 9 | 18 | 495 | 305 | 765 | 905 | 757 | 893 | 33 | 38 | 25 | 7 | 4 | 19 | 177 | 205 | 909 | 879 | 1028 | 988 | 3 | 21 | 13 | 14 | 1 | 2 | 428 | 364 | 755 | 886 | 948 | 764 | 4 | 26 | 37 | 16 | 9 | 8 | 418 | 384 | 994 | 943 | 719 | 852 | 22 | 31 | 30 | 15 | 11 | 3 | 436 | 450 | 910 | 993 | 787 | 826 | 24 | 29 | 28 | 10 | 12 | 16 | 494 | 322 | 590 | 869 | 789 | 873 | 15 | 22 | 32 | 18 | 14 | 7 | 302 | 340 | 803 | 684 | 1013 | 903 | 12 | 28 | 35 | 11 | 13 | 14 | 118 | 366 | 1009 | 963 | 1240 | 948 | 4 | 38 | 41 | 11 | 5 | 11 | 464 | 530 | 810 | 1030 | 617 | 871 | 37 | 37 |
135 | 137 | 621 | 681 | 566 | 658 | 1 | 7 | 1 | 8 | 2 | 3 | 238 | 426 | 726 | 636 | 611 | 999 | 4 | 20 | 32 | 11 | 4 | 17 | 286 | 486 | 715 | 671 | 974 | 738 | 35 | 30 | 22 | 17 | 16 | 10 | 344 | 574 | 739 | 764 | 769 | 881 | 25 | 30 | 34 | 10 | 16 | 13 | 279 | 589 | 717 | 816 | 885 | 759 | 36 | 30 | 22 | 13 | 15 | 10 | 243 | 465 | 592 | 852 | 1016 | 739 | 35 | 28 | 9 | 15 | 16 | 8 | 251 | 461 | 831 | 621 | 684 | 1065 | 10 | 28 | 32 | 8 | 16 | 12 | 295 | 317 | 762 | 508 | 708 | 927 | 7 | 26 | 27 | 15 | 9 | 12 | 344 | 344 | 877 | 610 | 684 | 1005 | 19 | 7 | 28 | 8 | 8 | 11 | 356 | 388 | 571 | 862 | 959 | 734 | 27 | 22 | 18 | 9 | 9 | 8 | 293 | 307 | 499 | 779 | 877 | 760 | 26 | 25 | 6 | 15 | 11 | 14 | 283 | 209 | 656 | 704 | 660 | 768 | 10 | 16 | 11 | 16 | 3 | 9 | 307 | 189 | 674 | 698 | 710 | 716 | 8 | 12 | 11 | 9 | 4 | 14 | 271 | 389 | 718 | 676 | 736 | 1011 | 15 | 21 | 21 | 7 | 16 | 9 | 275 | 389 | 647 | 715 | 960 | 787 | 18 | 27 | 16 | 14 | 17 | 8 | 371 | 311 | 701 | 984 | 1177 | 610 | 26 | 9 | 10 | 8 | 11 | 7 | 383 | 305 | 986 | 743 | 562 | 1227 | 7 | 6 |
# Change the column type to a factor:
hf['target'] = hf['target'].asfactor()
# Data Transform - Split train : test datasets
train, valid = hf.split_frame(ratios = [.90], seed = 1234)
print("Training Dataset", train.shape)
print("Validation Dataset", valid.shape)
Training Dataset (1353, 217) Validation Dataset (147, 217)
train.head(5)
att1 | att2 | att3 | att4 | att5 | att6 | att7 | att8 | att9 | att10 | att11 | att12 | att13 | att14 | att15 | att16 | att17 | att18 | att19 | att20 | att21 | att22 | att23 | att24 | att25 | att26 | att27 | att28 | att29 | att30 | att31 | att32 | att33 | att34 | att35 | att36 | att37 | att38 | att39 | att40 | att41 | att42 | att43 | att44 | att45 | att46 | att47 | att48 | att49 | att50 | att51 | att52 | att53 | att54 | att55 | att56 | att57 | att58 | att59 | att60 | att61 | att62 | att63 | att64 | att65 | att66 | att67 | att68 | att69 | att70 | att71 | att72 | att73 | att74 | att75 | att76 | att77 | att78 | att79 | att80 | att81 | att82 | att83 | att84 | att85 | att86 | att87 | att88 | att89 | att90 | att91 | att92 | att93 | att94 | att95 | att96 | att97 | att98 | att99 | att100 | att101 | att102 | att103 | att104 | att105 | att106 | att107 | att108 | att109 | att110 | att111 | att112 | att113 | att114 | att115 | att116 | att117 | att118 | att119 | att120 | att121 | att122 | att123 | att124 | att125 | att126 | att127 | att128 | att129 | att130 | att131 | att132 | att133 | att134 | att135 | att136 | att137 | att138 | att139 | att140 | att141 | att142 | att143 | att144 | att145 | att146 | att147 | att148 | att149 | att150 | att151 | att152 | att153 | att154 | att155 | att156 | att157 | att158 | att159 | att160 | att161 | att162 | att163 | att164 | att165 | att166 | att167 | att168 | att169 | att170 | att171 | att172 | att173 | att174 | att175 | att176 | att177 | att178 | att179 | att180 | att181 | att182 | att183 | att184 | att185 | att186 | att187 | att188 | att189 | att190 | att191 | att192 | att193 | att194 | att195 | att196 | att197 | att198 | att199 | att200 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
129 | 235 | 599 | 752 | 607 | 632 | 2 | 15 | 22 | 7 | 12 | 7 | 260 | 332 | 644 | 681 | 612 | 977 | 3 | 38 | 9 | 10 | 10 | 17 | 292 | 336 | 665 | 618 | 893 | 712 | 34 | 10 | 1 | 14 | 2 | 8 | 384 | 336 | 709 | 797 | 788 | 909 | 26 | 10 | 13 | 13 | 14 | 13 | 351 | 365 | 691 | 821 | 874 | 735 | 35 | 16 | 1 | 14 | 15 | 14 | 311 | 387 | 548 | 781 | 933 | 705 | 34 | 8 | 14 | 12 | 8 | 8 | 301 | 323 | 757 | 594 | 619 | 1043 | 9 | 30 | 9 | 13 | 8 | 18 | 339 | 273 | 682 | 551 | 589 | 889 | 6 | 24 | 6 | 10 | 9 | 16 | 354 | 304 | 827 | 703 | 755 | 977 | 20 | 27 | 7 | 13 | 14 | 9 | 410 | 280 | 549 | 865 | 882 | 734 | 26 | 28 | 27 | 6 | 15 | 8 | 291 | 383 | 441 | 722 | 784 | 702 | 25 | 25 | 19 | 4 | 9 | 12 | 249 | 213 | 634 | 773 | 559 | 720 | 11 | 16 | 18 | 3 | 11 | 7 | 313 | 211 | 642 | 791 | 627 | 670 | 9 | 28 | 16 | 12 | 10 | 18 | 281 | 335 | 644 | 653 | 671 | 953 | 14 | 31 | 6 | 14 | 2 | 13 | 285 | 353 | 607 | 614 | 841 | 745 | 17 | 9 | 9 | 7 | 3 | 10 | 473 | 485 | 671 | 903 | 1068 | 596 | 25 | 27 | 33 | 7 | 17 | 9 | 379 | 383 | 910 | 810 | 611 | 1179 | 8 | 26 |
425 | 293 | 798 | 862 | 652 | 523 | 20 | 28 | 12 | 10 | 7 | 11 | 198 | 282 | 661 | 739 | 609 | 836 | 15 | 13 | 19 | 3 | 5 | 11 | 166 | 348 | 734 | 656 | 772 | 547 | 16 | 19 | 9 | 5 | 11 | 2 | 484 | 310 | 778 | 859 | 687 | 746 | 28 | 35 | 19 | 12 | 15 | 9 | 397 | 325 | 750 | 811 | 719 | 566 | 23 | 35 | 9 | 9 | 16 | 10 | 301 | 219 | 635 | 687 | 788 | 570 | 18 | 23 | 4 | 7 | 13 | 0 | 227 | 251 | 662 | 684 | 618 | 906 | 9 | 21 | 19 | 0 | 11 | 4 | 239 | 263 | 635 | 699 | 678 | 748 | 12 | 13 | 12 | 7 | 10 | 16 | 286 | 184 | 694 | 851 | 696 | 830 | 16 | 8 | 11 | 0 | 15 | 17 | 472 | 302 | 802 | 659 | 751 | 607 | 18 | 15 | 31 | 13 | 16 | 0 | 297 | 261 | 696 | 656 | 671 | 653 | 15 | 10 | 9 | 17 | 10 | 6 | 423 | 237 | 887 | 907 | 592 | 615 | 11 | 27 | 8 | 16 | 6 | 1 | 291 | 179 | 843 | 939 | 622 | 559 | 13 | 15 | 8 | 1 | 5 | 14 | 185 | 265 | 621 | 689 | 624 | 814 | 4 | 14 | 4 | 1 | 11 | 9 | 265 | 261 | 630 | 568 | 758 | 590 | 9 | 16 | 1 | 8 | 12 | 2 | 493 | 339 | 870 | 751 | 951 | 505 | 15 | 16 | 23 | 8 | 18 | 1 | 411 | 323 | 859 | 928 | 628 | 1042 | 26 | 17 |
334 | 210 | 701 | 690 | 647 | 652 | 14 | 19 | 15 | 12 | 8 | 11 | 381 | 339 | 460 | 605 | 670 | 993 | 19 | 32 | 22 | 9 | 6 | 11 | 263 | 339 | 671 | 578 | 1049 | 726 | 24 | 18 | 12 | 9 | 10 | 2 | 369 | 383 | 661 | 663 | 864 | 927 | 18 | 22 | 22 | 12 | 16 | 15 | 376 | 384 | 633 | 619 | 966 | 745 | 25 | 22 | 12 | 11 | 17 | 10 | 274 | 316 | 530 | 589 | 1091 | 705 | 24 | 16 | 7 | 7 | 10 | 0 | 320 | 292 | 533 | 516 | 733 | 1063 | 13 | 26 | 22 | 8 | 10 | 10 | 434 | 230 | 482 | 543 | 747 | 905 | 16 | 20 | 15 | 5 | 11 | 16 | 495 | 317 | 577 | 689 | 787 | 987 | 32 | 17 | 16 | 8 | 16 | 17 | 423 | 283 | 705 | 669 | 1014 | 762 | 16 | 20 | 24 | 11 | 17 | 0 | 330 | 246 | 557 | 528 | 934 | 718 | 15 | 27 | 12 | 9 | 11 | 6 | 204 | 136 | 790 | 759 | 705 | 744 | 5 | 10 | 5 | 8 | 7 | 1 | 348 | 170 | 776 | 777 | 773 | 676 | 7 | 18 | 5 | 7 | 6 | 14 | 346 | 234 | 486 | 515 | 777 | 971 | 8 | 25 | 9 | 9 | 10 | 9 | 288 | 228 | 523 | 518 | 999 | 747 | 7 | 15 | 4 | 6 | 11 | 2 | 442 | 378 | 769 | 739 | 1226 | 618 | 15 | 19 | 20 | 12 | 19 | 1 | 484 | 382 | 638 | 750 | 643 | 1199 | 18 | 18 |
408 | 446 | 890 | 902 | 726 | 574 | 26 | 16 | 31 | 14 | 10 | 11 | 251 | 235 | 921 | 815 | 703 | 943 | 21 | 29 | 10 | 5 | 10 | 9 | 335 | 127 | 856 | 680 | 766 | 654 | 10 | 21 | 10 | 3 | 12 | 18 | 345 | 95 | 938 | 931 | 735 | 853 | 20 | 21 | 2 | 0 | 0 | 19 | 266 | 190 | 934 | 891 | 745 | 673 | 25 | 17 | 10 | 1 | 1 | 12 | 376 | 232 | 785 | 781 | 766 | 653 | 26 | 19 | 23 | 5 | 8 | 20 | 236 | 196 | 1012 | 740 | 716 | 1013 | 19 | 29 | 0 | 10 | 10 | 16 | 262 | 348 | 967 | 723 | 616 | 855 | 18 | 29 | 7 | 7 | 7 | 4 | 73 | 287 | 996 | 875 | 776 | 937 | 8 | 26 | 8 | 0 | 0 | 3 | 483 | 191 | 886 | 813 | 839 | 666 | 34 | 17 | 30 | 13 | 13 | 20 | 298 | 284 | 756 | 732 | 727 | 668 | 35 | 30 | 28 | 17 | 11 | 14 | 486 | 330 | 971 | 931 | 638 | 694 | 31 | 17 | 27 | 16 | 11 | 19 | 376 | 402 | 911 | 963 | 642 | 626 | 21 | 21 | 25 | 1 | 10 | 6 | 246 | 306 | 843 | 759 | 740 | 921 | 24 | 30 | 15 | 9 | 12 | 15 | 436 | 304 | 836 | 594 | 792 | 697 | 29 | 18 | 18 | 10 | 11 | 22 | 482 | 384 | 970 | 817 | 901 | 526 | 35 | 26 | 42 | 12 | 15 | 21 | 174 | 466 | 1125 | 974 | 724 | 1149 | 32 | 25 |
350 | 360 | 738 | 859 | 641 | 734 | 24 | 25 | 23 | 3 | 8 | 11 | 401 | 305 | 523 | 742 | 682 | 839 | 19 | 10 | 24 | 14 | 14 | 11 | 311 | 215 | 764 | 717 | 1079 | 728 | 20 | 24 | 36 | 10 | 12 | 6 | 305 | 213 | 778 | 948 | 874 | 765 | 18 | 24 | 26 | 9 | 0 | 1 | 316 | 226 | 742 | 960 | 990 | 693 | 13 | 16 | 36 | 10 | 1 | 8 | 214 | 266 | 597 | 910 | 1121 | 743 | 12 | 26 | 33 | 12 | 8 | 14 | 382 | 220 | 672 | 749 | 761 | 859 | 13 | 14 | 26 | 11 | 8 | 4 | 310 | 268 | 615 | 656 | 777 | 753 | 16 | 8 | 33 | 18 | 11 | 16 | 529 | 289 | 712 | 800 | 773 | 839 | 38 | 21 | 34 | 11 | 8 | 17 | 173 | 151 | 728 | 816 | 1044 | 768 | 4 | 8 | 4 | 6 | 1 | 0 | 350 | 232 | 574 | 849 | 964 | 720 | 3 | 13 | 30 | 10 | 11 | 6 | 298 | 254 | 813 | 834 | 735 | 678 | 15 | 34 | 33 | 11 | 9 | 3 | 450 | 308 | 807 | 888 | 803 | 692 | 17 | 22 | 33 | 12 | 14 | 14 | 460 | 196 | 495 | 808 | 805 | 887 | 8 | 13 | 41 | 10 | 12 | 9 | 220 | 184 | 616 | 675 | 1029 | 753 | 5 | 21 | 40 | 9 | 11 | 14 | 122 | 352 | 830 | 962 | 1256 | 714 | 3 | 23 | 32 | 3 | 3 | 11 | 526 | 404 | 721 | 937 | 633 | 925 | 30 | 24 |
valid.head(5)
att1 | att2 | att3 | att4 | att5 | att6 | att7 | att8 | att9 | att10 | att11 | att12 | att13 | att14 | att15 | att16 | att17 | att18 | att19 | att20 | att21 | att22 | att23 | att24 | att25 | att26 | att27 | att28 | att29 | att30 | att31 | att32 | att33 | att34 | att35 | att36 | att37 | att38 | att39 | att40 | att41 | att42 | att43 | att44 | att45 | att46 | att47 | att48 | att49 | att50 | att51 | att52 | att53 | att54 | att55 | att56 | att57 | att58 | att59 | att60 | att61 | att62 | att63 | att64 | att65 | att66 | att67 | att68 | att69 | att70 | att71 | att72 | att73 | att74 | att75 | att76 | att77 | att78 | att79 | att80 | att81 | att82 | att83 | att84 | att85 | att86 | att87 | att88 | att89 | att90 | att91 | att92 | att93 | att94 | att95 | att96 | att97 | att98 | att99 | att100 | att101 | att102 | att103 | att104 | att105 | att106 | att107 | att108 | att109 | att110 | att111 | att112 | att113 | att114 | att115 | att116 | att117 | att118 | att119 | att120 | att121 | att122 | att123 | att124 | att125 | att126 | att127 | att128 | att129 | att130 | att131 | att132 | att133 | att134 | att135 | att136 | att137 | att138 | att139 | att140 | att141 | att142 | att143 | att144 | att145 | att146 | att147 | att148 | att149 | att150 | att151 | att152 | att153 | att154 | att155 | att156 | att157 | att158 | att159 | att160 | att161 | att162 | att163 | att164 | att165 | att166 | att167 | att168 | att169 | att170 | att171 | att172 | att173 | att174 | att175 | att176 | att177 | att178 | att179 | att180 | att181 | att182 | att183 | att184 | att185 | att186 | att187 | att188 | att189 | att190 | att191 | att192 | att193 | att194 | att195 | att196 | att197 | att198 | att199 | att200 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
235 | 377 | 717 | 563 | 647 | 588 | 21 | 31 | 6 | 14 | 9 | 9 | 330 | 396 | 674 | 542 | 692 | 957 | 16 | 22 | 37 | 13 | 7 | 5 | 412 | 394 | 695 | 537 | 1031 | 668 | 15 | 16 | 27 | 7 | 5 | 14 | 536 | 336 | 759 | 638 | 808 | 867 | 27 | 10 | 37 | 10 | 17 | 15 | 453 | 389 | 743 | 676 | 924 | 687 | 24 | 6 | 27 | 11 | 18 | 8 | 439 | 453 | 564 | 708 | 1079 | 667 | 25 | 12 | 14 | 5 | 11 | 16 | 441 | 521 | 749 | 469 | 771 | 1027 | 12 | 14 | 37 | 18 | 11 | 12 | 383 | 587 | 708 | 386 | 795 | 869 | 13 | 24 | 30 | 9 | 12 | 0 | 464 | 430 | 773 | 494 | 755 | 951 | 11 | 37 | 29 | 10 | 17 | 7 | 526 | 362 | 703 | 782 | 1026 | 704 | 29 | 20 | 23 | 13 | 18 | 16 | 441 | 403 | 577 | 635 | 950 | 682 | 26 | 25 | 9 | 7 | 12 | 10 | 377 | 355 | 788 | 600 | 751 | 708 | 22 | 32 | 10 | 6 | 8 | 15 | 365 | 421 | 726 | 582 | 795 | 640 | 20 | 34 | 12 | 9 | 7 | 2 | 405 | 521 | 642 | 530 | 819 | 935 | 15 | 21 | 22 | 17 | 5 | 11 | 401 | 501 | 597 | 599 | 1043 | 711 | 20 | 19 | 19 | 10 | 6 | 18 | 529 | 477 | 787 | 866 | 1234 | 558 | 26 | 37 | 5 | 14 | 20 | 17 | 577 | 659 | 940 | 629 | 639 | 1163 | 27 | 36 |
135 | 137 | 621 | 681 | 566 | 658 | 1 | 7 | 1 | 8 | 2 | 3 | 238 | 426 | 726 | 636 | 611 | 999 | 4 | 20 | 32 | 11 | 4 | 17 | 286 | 486 | 715 | 671 | 974 | 738 | 35 | 30 | 22 | 17 | 16 | 10 | 344 | 574 | 739 | 764 | 769 | 881 | 25 | 30 | 34 | 10 | 16 | 13 | 279 | 589 | 717 | 816 | 885 | 759 | 36 | 30 | 22 | 13 | 15 | 10 | 243 | 465 | 592 | 852 | 1016 | 739 | 35 | 28 | 9 | 15 | 16 | 8 | 251 | 461 | 831 | 621 | 684 | 1065 | 10 | 28 | 32 | 8 | 16 | 12 | 295 | 317 | 762 | 508 | 708 | 927 | 7 | 26 | 27 | 15 | 9 | 12 | 344 | 344 | 877 | 610 | 684 | 1005 | 19 | 7 | 28 | 8 | 8 | 11 | 356 | 388 | 571 | 862 | 959 | 734 | 27 | 22 | 18 | 9 | 9 | 8 | 293 | 307 | 499 | 779 | 877 | 760 | 26 | 25 | 6 | 15 | 11 | 14 | 283 | 209 | 656 | 704 | 660 | 768 | 10 | 16 | 11 | 16 | 3 | 9 | 307 | 189 | 674 | 698 | 710 | 716 | 8 | 12 | 11 | 9 | 4 | 14 | 271 | 389 | 718 | 676 | 736 | 1011 | 15 | 21 | 21 | 7 | 16 | 9 | 275 | 389 | 647 | 715 | 960 | 787 | 18 | 27 | 16 | 14 | 17 | 8 | 371 | 311 | 701 | 984 | 1177 | 610 | 26 | 9 | 10 | 8 | 11 | 7 | 383 | 305 | 986 | 743 | 562 | 1227 | 7 | 6 |
154 | 538 | 896 | 545 | 571 | 737 | 37 | 33 | 7 | 7 | 16 | 17 | 251 | 431 | 671 | 538 | 610 | 810 | 32 | 20 | 38 | 14 | 16 | 3 | 311 | 259 | 788 | 553 | 979 | 741 | 13 | 14 | 28 | 14 | 4 | 6 | 437 | 301 | 918 | 630 | 774 | 914 | 13 | 12 | 38 | 17 | 8 | 7 | 368 | 310 | 898 | 684 | 890 | 716 | 8 | 8 | 28 | 18 | 9 | 10 | 286 | 384 | 733 | 724 | 1021 | 664 | 3 | 14 | 15 | 12 | 4 | 8 | 304 | 536 | 796 | 485 | 699 | 828 | 26 | 12 | 38 | 17 | 10 | 4 | 312 | 668 | 765 | 394 | 723 | 762 | 29 | 22 | 31 | 10 | 15 | 8 | 421 | 459 | 756 | 462 | 675 | 854 | 31 | 35 | 30 | 17 | 12 | 11 | 283 | 379 | 892 | 798 | 946 | 853 | 9 | 18 | 24 | 6 | 9 | 8 | 224 | 398 | 738 | 651 | 870 | 595 | 10 | 23 | 10 | 0 | 9 | 2 | 292 | 464 | 977 | 578 | 679 | 697 | 28 | 34 | 11 | 1 | 15 | 7 | 366 | 548 | 903 | 550 | 713 | 563 | 30 | 36 | 13 | 16 | 16 | 6 | 314 | 574 | 539 | 544 | 739 | 728 | 21 | 19 | 23 | 18 | 4 | 11 | 300 | 558 | 754 | 615 | 961 | 716 | 18 | 17 | 20 | 11 | 3 | 10 | 412 | 446 | 972 | 882 | 1168 | 723 | 10 | 35 | 4 | 9 | 11 | 9 | 470 | 744 | 831 | 603 | 561 | 956 | 43 | 34 |
88 | 104 | 598 | 666 | 629 | 666 | 1 | 7 | 3 | 3 | 2 | 2 | 265 | 437 | 671 | 611 | 678 | 995 | 6 | 20 | 28 | 14 | 4 | 16 | 329 | 459 | 684 | 676 | 1033 | 740 | 37 | 30 | 24 | 14 | 16 | 11 | 309 | 535 | 712 | 745 | 812 | 883 | 25 | 30 | 36 | 9 | 16 | 12 | 244 | 574 | 692 | 807 | 924 | 761 | 38 | 30 | 24 | 10 | 15 | 9 | 282 | 480 | 553 | 849 | 1079 | 753 | 37 | 28 | 11 | 12 | 16 | 9 | 286 | 450 | 784 | 610 | 751 | 1061 | 12 | 28 | 30 | 11 | 16 | 15 | 320 | 306 | 709 | 513 | 775 | 923 | 9 | 30 | 29 | 18 | 9 | 11 | 367 | 349 | 846 | 593 | 747 | 1001 | 19 | 7 | 30 | 11 | 10 | 8 | 381 | 349 | 544 | 869 | 1026 | 730 | 29 | 22 | 14 | 6 | 9 | 9 | 308 | 332 | 442 | 780 | 944 | 776 | 28 | 29 | 8 | 10 | 11 | 15 | 232 | 210 | 629 | 683 | 727 | 768 | 10 | 8 | 15 | 11 | 3 | 10 | 270 | 186 | 643 | 681 | 777 | 732 | 8 | 8 | 15 | 12 | 4 | 13 | 316 | 386 | 657 | 665 | 803 | 1021 | 17 | 21 | 23 | 10 | 16 | 8 | 320 | 390 | 618 | 722 | 1027 | 799 | 20 | 27 | 18 | 9 | 17 | 11 | 360 | 354 | 670 | 993 | 1244 | 620 | 28 | 9 | 14 | 3 | 11 | 10 | 380 | 322 | 925 | 724 | 625 | 1223 | 5 | 6 |
238 | 492 | 840 | 703 | 676 | 573 | 33 | 28 | 4 | 16 | 13 | 9 | 175 | 309 | 805 | 624 | 691 | 914 | 28 | 25 | 27 | 13 | 11 | 7 | 247 | 239 | 748 | 667 | 916 | 651 | 3 | 19 | 23 | 5 | 1 | 16 | 503 | 237 | 892 | 784 | 715 | 810 | 19 | 7 | 35 | 8 | 13 | 17 | 434 | 276 | 864 | 828 | 785 | 670 | 18 | 3 | 23 | 9 | 14 | 10 | 290 | 360 | 723 | 830 | 972 | 658 | 19 | 15 | 10 | 3 | 7 | 18 | 330 | 406 | 902 | 603 | 774 | 980 | 22 | 17 | 29 | 18 | 7 | 14 | 218 | 572 | 847 | 500 | 794 | 840 | 25 | 27 | 28 | 9 | 10 | 2 | 349 | 375 | 898 | 644 | 728 | 920 | 15 | 40 | 29 | 8 | 13 | 5 | 401 | 327 | 828 | 860 | 955 | 651 | 27 | 23 | 13 | 15 | 14 | 18 | 296 | 362 | 686 | 767 | 871 | 677 | 28 | 28 | 7 | 9 | 8 | 12 | 388 | 402 | 913 | 716 | 746 | 685 | 30 | 29 | 14 | 8 | 12 | 17 | 296 | 486 | 843 | 732 | 748 | 635 | 26 | 31 | 16 | 7 | 11 | 4 | 290 | 530 | 745 | 674 | 800 | 924 | 23 | 24 | 22 | 17 | 1 | 13 | 258 | 506 | 748 | 711 | 1006 | 700 | 28 | 22 | 17 | 10 | 2 | 20 | 472 | 450 | 928 | 988 | 1093 | 527 | 28 | 40 | 15 | 16 | 16 | 19 | 498 | 606 | 1037 | 769 | 660 | 1124 | 39 | 39 |
# Identify predictors and response
featureColumns = train.columns
targetColumn = "target"
featureColumns.remove(targetColumn)
print("Feature Columns : "); print(featureColumns)
print("\n\nTarget Column : "); print(targetColumn)
Feature Columns : ['att1', 'att2', 'att3', 'att4', 'att5', 'att6', 'att7', 'att8', 'att9', 'att10', 'att11', 'att12', 'att13', 'att14', 'att15', 'att16', 'att17', 'att18', 'att19', 'att20', 'att21', 'att22', 'att23', 'att24', 'att25', 'att26', 'att27', 'att28', 'att29', 'att30', 'att31', 'att32', 'att33', 'att34', 'att35', 'att36', 'att37', 'att38', 'att39', 'att40', 'att41', 'att42', 'att43', 'att44', 'att45', 'att46', 'att47', 'att48', 'att49', 'att50', 'att51', 'att52', 'att53', 'att54', 'att55', 'att56', 'att57', 'att58', 'att59', 'att60', 'att61', 'att62', 'att63', 'att64', 'att65', 'att66', 'att67', 'att68', 'att69', 'att70', 'att71', 'att72', 'att73', 'att74', 'att75', 'att76', 'att77', 'att78', 'att79', 'att80', 'att81', 'att82', 'att83', 'att84', 'att85', 'att86', 'att87', 'att88', 'att89', 'att90', 'att91', 'att92', 'att93', 'att94', 'att95', 'att96', 'att97', 'att98', 'att99', 'att100', 'att101', 'att102', 'att103', 'att104', 'att105', 'att106', 'att107', 'att108', 'att109', 'att110', 'att111', 'att112', 'att113', 'att114', 'att115', 'att116', 'att117', 'att118', 'att119', 'att120', 'att121', 'att122', 'att123', 'att124', 'att125', 'att126', 'att127', 'att128', 'att129', 'att130', 'att131', 'att132', 'att133', 'att134', 'att135', 'att136', 'att137', 'att138', 'att139', 'att140', 'att141', 'att142', 'att143', 'att144', 'att145', 'att146', 'att147', 'att148', 'att149', 'att150', 'att151', 'att152', 'att153', 'att154', 'att155', 'att156', 'att157', 'att158', 'att159', 'att160', 'att161', 'att162', 'att163', 'att164', 'att165', 'att166', 'att167', 'att168', 'att169', 'att170', 'att171', 'att172', 'att173', 'att174', 'att175', 'att176', 'att177', 'att178', 'att179', 'att180', 'att181', 'att182', 'att183', 'att184', 'att185', 'att186', 'att187', 'att188', 'att189', 'att190', 'att191', 'att192', 'att193', 'att194', 'att195', 'att196', 'att197', 'att198', 'att199', 'att200', 'att201', 'att202', 'att203', 'att204', 'att205', 'att206', 'att207', 'att208', 'att209', 'att210', 'att211', 'att212', 'att213', 'att214', 'att215', 'att216'] Target Column : target
import time
from h2o.automl import H2OAutoML
localtime = time.asctime( time.localtime(time.time()) )
print("Local current time :", localtime)
print()
# Run AutoML for YY base models (limited to 1 hour max runtime by default)
aml = H2OAutoML(max_models=10, seed=1234, exclude_algos = ["StackedEnsemble"],
#sort_metric = 'rmse'
#balance_classes = True,
#sort_metric = 'AUC'
)
aml.train(x=featureColumns, y=targetColumn, training_frame = train, validation_frame = valid)
localtime = time.asctime( time.localtime(time.time()) )
print()
print("Local current time :", localtime)
Local current time : Fri Mar 25 04:29:57 2022 AutoML progress: | 04:29:57.912: User specified a validation frame with cross-validation still enabled. Please note that the models will still be validated using cross-validation only, the validation frame will be used to provide purely informative validation metrics on the trained models. ███████████████████████████████████████████████████████████████| (done) 100% Local current time : Fri Mar 25 04:33:34 2022
lb = aml.leaderboard
print(lb.head(rows = lb.nrows))
# Explain an AutoML object i.e. explain all models
#exa = aml.explain(valid)
# *****************************
# save all models +++++++++++++
# -----------------------------
#model_ids = list(lb['model_id'].as_data_frame().iloc[:,0])
#for m_id in model_ids:
# mdl = h2o.get_model(m_id)
# h2o.save_model(model=mdl, path=os.getcwd(), force=True)
#h2o.export_file(lb, os.path.join(os.getcwd(), 'aml_leaderboard.h2o'), force=True)
model_id | mean_per_class_error | logloss | rmse | mse |
---|---|---|---|---|
GLM_1_AutoML_2_20220325_42957 | 0.025988 | 0.112016 | 0.154593 | 0.023899 |
GBM_1_AutoML_2_20220325_42957 | 0.0320282 | 0.118526 | 0.173597 | 0.030136 |
XRT_1_AutoML_2_20220325_42957 | 0.0325853 | 0.28171 | 0.264908 | 0.0701761 |
XGBoost_3_AutoML_2_20220325_42957 | 0.0362208 | 0.137474 | 0.193865 | 0.0375835 |
GBM_3_AutoML_2_20220325_42957 | 0.0368302 | 0.140383 | 0.1793 | 0.0321485 |
GBM_2_AutoML_2_20220325_42957 | 0.0368813 | 0.141657 | 0.188488 | 0.0355278 |
DRF_1_AutoML_2_20220325_42957 | 0.0383542 | 0.287884 | 0.269802 | 0.0727933 |
GBM_4_AutoML_2_20220325_42957 | 0.0397385 | 0.155426 | 0.19891 | 0.0395651 |
XGBoost_2_AutoML_2_20220325_42957 | 0.0418623 | 0.156293 | 0.207078 | 0.0428813 |
XGBoost_1_AutoML_2_20220325_42957 | 0.0444101 | 0.200067 | 0.233968 | 0.0547411 |
# Evaluate the best model with testing data.
model = aml.leader
# For Classification
import scikitplot as skplt
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import cohen_kappa_score, confusion_matrix
# Predict with the best model.
predicted_y = model.predict(valid[featureColumns])
predicted_data = predicted_y.as_data_frame()
valid_dataset = valid.as_data_frame()
# Evaluate the skill of the Trained model
acc = accuracy_score(valid_dataset[targetColumn], predicted_data['predict'])
classReport = classification_report(valid_dataset[targetColumn], predicted_data['predict'])
confMatrix = confusion_matrix(valid_dataset[targetColumn], predicted_data['predict'])
print(); print('Testing Results of the trained model: ')
print(); print('Accuracy : ', acc)
#print(); print('Confusion Matrix :\n', confMatrix)
print(); print('Classification Report :\n',classReport)
# Confusion matrix
skplt.metrics.plot_confusion_matrix(valid_dataset[targetColumn], predicted_data['predict'], figsize=(12,12)); plt.show()
glm prediction progress: |███████████████████████████████████████████████████████| (done) 100% Testing Results of the trained model: Accuracy : 0.9523809523809523 Classification Report : precision recall f1-score support 0 1.00 0.94 0.97 16 1 0.94 0.89 0.91 18 2 1.00 1.00 1.00 10 3 0.85 1.00 0.92 11 4 1.00 0.89 0.94 18 5 1.00 1.00 1.00 12 6 0.94 1.00 0.97 15 7 0.94 0.89 0.92 19 8 0.92 1.00 0.96 11 9 0.94 1.00 0.97 17 accuracy 0.95 147 macro avg 0.95 0.96 0.96 147 weighted avg 0.96 0.95 0.95 147
print(); print();
#pd.set_option("display.max_rows", None, "display.max_columns", None)
VI = model.varimp(use_pandas=True)
print(VI[['variable', 'percentage']].head(25))
print()
#pd.set_option("display.max_rows", None, "display.max_columns", None)
VI = model.varimp(use_pandas=True)
VI = VI[['variable', 'percentage']]
VI.head(25).plot(x = 'variable', y = 'percentage', kind = 'barh', figsize = (14,16), grid = True,
title = "Variable Importance: H2O model")
plt.gca().invert_yaxis(); plt.show()
variable percentage 0 att213 0.009736 1 att1 0.009460 2 att94 0.008408 3 att80 0.008064 4 att85 0.007921 5 att176 0.007491 6 att204 0.007469 7 att42 0.007372 8 att145 0.007311 9 att8 0.007154 10 att210 0.007152 11 att99 0.007107 12 att205 0.007093 13 att49 0.007069 14 att169 0.006965 15 att121 0.006896 16 att190 0.006818 17 att142 0.006711 18 att46 0.006700 19 att133 0.006697 20 att84 0.006664 21 att112 0.006564 22 att12 0.006561 23 att101 0.006550 24 att108 0.006485
# Explain a model
#exm = model.explain(valid)
# Model Hyperparameters Used in
model.params.keys()
# Model Hyperparameter value
model.params
model.params['nfolds']
# save the model
model_path = h2o.save_model(model=model, force=True)
print(model_path)
saved_model = h2o.load_model(model_path)
# For Classification
import scikitplot as skplt
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import cohen_kappa_score, confusion_matrix
hf = h2o.H2OFrame(data_unseen)
# Predict with the best model.
predicted_y = model.predict(hf[featureColumns])
predicted_data = predicted_y.as_data_frame()
# Evaluate the skill of the Trained model
acc = accuracy_score(data_unseen[targetColumn], predicted_data['predict'])
classReport = classification_report(data_unseen[targetColumn], predicted_data['predict'])
confMatrix = confusion_matrix(data_unseen[targetColumn], predicted_data['predict'])
print(); print('Testing Results of the trained model: ')
print(); print('Accuracy : ', acc)
#print(); print('Confusion Matrix :\n', confMatrix)
print(); print('Classification Report :\n',classReport)
# Confusion matrix
skplt.metrics.plot_confusion_matrix(data_unseen[targetColumn], predicted_data['predict'], figsize=(12,12)); plt.show()
In this coding recipe, we discussed how to build a classification model in Python with H2O.
Specifically, we have learned the followings: