# How to predict and visualise a timeseries using GradientBoost in Python
def Snippet_397():
print()
print(format('How to predict and visualise a timeseries using GradientBoost in Python','*^92'))
# load libraries
import pandas, time
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
start_time = time.time()
# load the dataset
dataframe = pandas.read_csv('international-airline-passengers.csv', usecols=[1],
engine='python', skipfooter=3)
dataset = dataframe.values; dataset = dataset.astype('float32')
# split into train and test sets
train_size = int(len(dataset) * 0.67)
train_dataset, test_dataset = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# Window -> X timestep back
step_back = 2
X_train, Y_train = [], []
for i in range(len(train_dataset)-step_back - 1):
a = train_dataset[i:(i+step_back), 0]
X_train.append(a)
Y_train.append(train_dataset[i + step_back, 0])
X_train = np.array(X_train); Y_train = np.array(Y_train);
X_test, Y_test = [], []
for i in range(len(test_dataset)-step_back - 1):
a = test_dataset[i:(i+step_back), 0]
X_test.append(a)
Y_test.append(test_dataset[i + step_back, 0])
X_test = np.array(X_test); Y_test = np.array(Y_test);
#print(X_train); print(Y_train); print(X_test); print(Y_test);
# -------------------------------------
# setup a GradientBoost model in Python
# -------------------------------------
model = GradientBoostingRegressor(n_estimators=1000)
# Cross Validation
cv_results = cross_val_score(model, X_train, Y_train, cv = 4, scoring='r2', n_jobs = -1, verbose = 1)
prt_string = "CV Mean R2 score: %f (Std: %f)"% (cv_results.mean(), cv_results.std())
print(prt_string)
# Train the Model
trained_Model = model.fit(X_train, Y_train)
# Evaluate the skill of the Trained model
trainPredict = np.array(trained_Model.predict(X_train))
trainPredict = np.reshape(trainPredict, (len(trainPredict), 1))
testPredict = np.array(trained_Model.predict(X_test))
testPredict = np.reshape(testPredict, (len(testPredict), 1))
# shift train predictions for plotting
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[step_back:len(trainPredict)+step_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(step_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
plt.plot(dataset)
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()
print(); print("Execution Time %s seconds: " % (time.time() - start_time))
Snippet_397()