# ignore warnings
import warnings
warnings.filterwarnings("ignore")
import pandas
from pandas import read_csv
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
# load data
filename = 'pima.indians.diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
print(); print(dataframe.head())
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)
model = LogisticRegression()
model.fit(X_train, Y_train)
result = model.score(X_test, Y_test)
print(); print("Accuracy: %.3f%%" % (result*100.0))
import pandas
from pandas import read_csv
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
# load data
filename = 'pima.indians.diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
print(); print(dataframe.head())
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
num_instances = len(X)
seed = 7
kfold = model_selection.KFold(n_splits=10, random_state=seed)
model = LogisticRegression()
results = model_selection.cross_val_score(model, X, Y, cv=kfold)
print(); print("Accuracy: %.3f%% (%.3f%%)" % (results.mean()*100.0, results.std()*100.0))
import pandas
from pandas import read_csv
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
# load data
filename = 'pima.indians.diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
print(); print(dataframe.head())
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
num_folds = 10
num_instances = len(X)
loocv = model_selection.LeaveOneOut()
model = LogisticRegression()
results = model_selection.cross_val_score(model, X, Y, cv=loocv)
print(); print("Accuracy: %.3f%% (%.3f%%)" % (results.mean()*100.0, results.std()*100.0))
import pandas
from pandas import read_csv
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
# load data
filename = 'pima.indians.diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
print(); print(dataframe.head())
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
num_samples = 10
test_size = 0.33
num_instances = len(X)
seed = 7
kfold = model_selection.ShuffleSplit(n_splits=10, test_size=test_size, random_state=seed)
model = LogisticRegression()
results = model_selection.cross_val_score(model, X, Y, cv=kfold)
print(); print("Accuracy: %.3f%% (%.3f%%)" % (results.mean()*100.0, results.std()*100.0))