# How to predict a timeseries using XGBoost in Python
def Snippet_394():
print()
print(format('How to predict a timeseries using XGBoost in Python','*^92'))
# load libraries
import pandas, time
import numpy as np
import xgboost as xgb
from sklearn.model_selection import cross_val_score
from sklearn.metrics import median_absolute_error, mean_absolute_error
from sklearn.metrics import r2_score, mean_squared_error
start_time = time.time()
# load the dataset
dataframe = pandas.read_csv('international-airline-passengers.csv', usecols=[1],
engine='python', skipfooter=3)
dataset = dataframe.values; dataset = dataset.astype('float32')
# split into train and test sets
train_size = int(len(dataset) * 0.67)
train_dataset, test_dataset = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# Window -> X timestep back
step_back = 2
X_train, Y_train = [], []
for i in range(len(train_dataset)-step_back - 1):
a = train_dataset[i:(i+step_back), 0]
X_train.append(a)
Y_train.append(train_dataset[i + step_back, 0])
X_train = np.array(X_train); Y_train = np.array(Y_train);
X_test, Y_test = [], []
for i in range(len(test_dataset)-step_back - 1):
a = test_dataset[i:(i+step_back), 0]
X_test.append(a)
Y_test.append(test_dataset[i + step_back, 0])
X_test = np.array(X_test); Y_test = np.array(Y_test);
print(X_train); print(Y_train); print(X_test); print(Y_test);
# -------------------------------------
# setup a XGBoost model in Python
# -------------------------------------
model = xgb.XGBRegressor(n_estimators=20000)
# Cross Validation
cv_results = cross_val_score(model, X_train, Y_train, cv = 4, scoring='r2', n_jobs = -1, verbose = 1)
prt_string = "CV Mean R2 score: %f (Std: %f)"% (cv_results.mean(), cv_results.std())
print(prt_string)
# Train the Model
trained_Model = model.fit(X_train, Y_train, verbose = 1)
# Evaluate the skill of the Trained model
pred_Value = trained_Model.predict(X_test)
r2_val = r2_score(Y_test, pred_Value)
m_err_val = median_absolute_error(Y_test, pred_Value)
mean_err_val = mean_absolute_error(Y_test, pred_Value,
sample_weight = Y_test, multioutput='uniform_average')
mean_sqr_err_Value = mean_squared_error(Y_test, pred_Value, sample_weight = Y_test,
multioutput='uniform_average')
rmse_Value = np.sqrt(mean_sqr_err_Value)
yMax_Value = np.max(Y_test)
yMin_Value = np.min(Y_test)
nrmse_Value = rmse_Value / (yMax_Value - yMin_Value)
print("\tR2 (r-squared) Value: ", round(r2_val,2))
print("\tMedian Absolute Error Value: ", round(m_err_val,2))
print("\tMean Absolute Value: ", round(mean_err_val,2))
print("\tRMSE : ", round(rmse_Value,2))
print("\tNormalised RMSE : ", round(nrmse_Value,2))
print('Y_test', Y_test); print('pred_Value', pred_Value);
print(); print("Execution Time %s seconds: " % (time.time() - start_time))
Snippet_394()