In [1]:
# How to predict a timeseries using XGBoost in Python

def Snippet_394(): 

    print()
    print(format('How to predict a timeseries using XGBoost in Python','*^92'))

    # load libraries
    import pandas, time
    import numpy as np
    import xgboost as xgb
    from sklearn.model_selection import cross_val_score  
    from sklearn.metrics import median_absolute_error, mean_absolute_error 
    from sklearn.metrics import r2_score, mean_squared_error

    start_time = time.time()    

    # load the dataset
    dataframe = pandas.read_csv('international-airline-passengers.csv', usecols=[1], 
                                engine='python', skipfooter=3)
    dataset = dataframe.values; dataset = dataset.astype('float32')

    # split into train and test sets
    train_size = int(len(dataset) * 0.67) 
    train_dataset, test_dataset = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

    # Window -> X timestep back
    step_back = 2
    X_train, Y_train = [], []
    for i in range(len(train_dataset)-step_back - 1):
        a = train_dataset[i:(i+step_back), 0]
        X_train.append(a)
        Y_train.append(train_dataset[i + step_back, 0])
    X_train = np.array(X_train); Y_train = np.array(Y_train);
    
    X_test, Y_test = [], []
    for i in range(len(test_dataset)-step_back - 1):
        a = test_dataset[i:(i+step_back), 0]
        X_test.append(a)
        Y_test.append(test_dataset[i + step_back, 0])
    X_test = np.array(X_test); Y_test = np.array(Y_test);

    print(X_train); print(Y_train);             print(X_test); print(Y_test);

    # -------------------------------------
    # setup a XGBoost model in Python
    # -------------------------------------
    model = xgb.XGBRegressor(n_estimators=20000)
    # Cross Validation
    cv_results = cross_val_score(model, X_train, Y_train, cv = 4, scoring='r2', n_jobs = -1, verbose = 1)
    prt_string = "CV Mean R2 score: %f (Std: %f)"% (cv_results.mean(), cv_results.std())
    print(prt_string)
    
    # Train the Model
    trained_Model = model.fit(X_train, Y_train, verbose = 1)

    # Evaluate the skill of the Trained model
    pred_Value         = trained_Model.predict(X_test)
    r2_val             = r2_score(Y_test, pred_Value)
    m_err_val          = median_absolute_error(Y_test, pred_Value)
    mean_err_val       = mean_absolute_error(Y_test, pred_Value, 
                                               sample_weight = Y_test, multioutput='uniform_average')
    mean_sqr_err_Value   = mean_squared_error(Y_test, pred_Value, sample_weight = Y_test, 
                            multioutput='uniform_average')
    rmse_Value           = np.sqrt(mean_sqr_err_Value)
    yMax_Value           = np.max(Y_test)
    yMin_Value           = np.min(Y_test)
    nrmse_Value          = rmse_Value / (yMax_Value - yMin_Value)
    
    print("\tR2 (r-squared) Value: ", round(r2_val,2))
    print("\tMedian Absolute Error Value: ", round(m_err_val,2))        
    print("\tMean Absolute Value: ", round(mean_err_val,2))        
    print("\tRMSE : ",              round(rmse_Value,2))        
    print("\tNormalised RMSE : ",  round(nrmse_Value,2))        
    print('Y_test', Y_test); print('pred_Value', pred_Value);
    print(); print("Execution Time %s seconds: " % (time.time() - start_time))

Snippet_394()
********************How to predict a timeseries using XGBoost in Python*********************
[[112. 118.]
 [118. 132.]
 [132. 129.]
 [129. 121.]
 [121. 135.]
 [135. 148.]
 [148. 148.]
 [148. 136.]
 [136. 119.]
 [119. 104.]
 [104. 118.]
 [118. 115.]
 [115. 126.]
 [126. 141.]
 [141. 135.]
 [135. 125.]
 [125. 149.]
 [149. 170.]
 [170. 170.]
 [170. 158.]
 [158. 133.]
 [133. 114.]
 [114. 140.]
 [140. 145.]
 [145. 150.]
 [150. 178.]
 [178. 163.]
 [163. 172.]
 [172. 178.]
 [178. 199.]
 [199. 199.]
 [199. 184.]
 [184. 162.]
 [162. 146.]
 [146. 166.]
 [166. 171.]
 [171. 180.]
 [180. 193.]
 [193. 181.]
 [181. 183.]
 [183. 218.]
 [218. 230.]
 [230. 242.]
 [242. 209.]
 [209. 191.]
 [191. 172.]
 [172. 194.]
 [194. 196.]
 [196. 196.]
 [196. 236.]
 [236. 235.]
 [235. 229.]
 [229. 243.]
 [243. 264.]
 [264. 272.]
 [272. 237.]
 [237. 211.]
 [211. 180.]
 [180. 201.]
 [201. 204.]
 [204. 188.]
 [188. 235.]
 [235. 227.]
 [227. 234.]
 [234. 264.]
 [264. 302.]
 [302. 293.]
 [293. 259.]
 [259. 229.]
 [229. 203.]
 [203. 229.]
 [229. 242.]
 [242. 233.]
 [233. 267.]
 [267. 269.]
 [269. 270.]
 [270. 315.]
 [315. 364.]
 [364. 347.]
 [347. 312.]
 [312. 274.]
 [274. 237.]
 [237. 278.]
 [278. 284.]
 [284. 277.]
 [277. 317.]
 [317. 313.]
 [313. 318.]
 [318. 374.]
 [374. 413.]
 [413. 405.]]
[132. 129. 121. 135. 148. 148. 136. 119. 104. 118. 115. 126. 141. 135.
 125. 149. 170. 170. 158. 133. 114. 140. 145. 150. 178. 163. 172. 178.
 199. 199. 184. 162. 146. 166. 171. 180. 193. 181. 183. 218. 230. 242.
 209. 191. 172. 194. 196. 196. 236. 235. 229. 243. 264. 272. 237. 211.
 180. 201. 204. 188. 235. 227. 234. 264. 302. 293. 259. 229. 203. 229.
 242. 233. 267. 269. 270. 315. 364. 347. 312. 274. 237. 278. 284. 277.
 317. 313. 318. 374. 413. 405. 355.]
[[271. 306.]
 [306. 315.]
 [315. 301.]
 [301. 356.]
 [356. 348.]
 [348. 355.]
 [355. 422.]
 [422. 465.]
 [465. 467.]
 [467. 404.]
 [404. 347.]
 [347. 305.]
 [305. 336.]
 [336. 340.]
 [340. 318.]
 [318. 362.]
 [362. 348.]
 [348. 363.]
 [363. 435.]
 [435. 491.]
 [491. 505.]
 [505. 404.]
 [404. 359.]
 [359. 310.]
 [310. 337.]
 [337. 360.]
 [360. 342.]
 [342. 406.]
 [406. 396.]
 [396. 420.]
 [420. 472.]
 [472. 548.]
 [548. 559.]
 [559. 463.]
 [463. 407.]
 [407. 362.]
 [362. 405.]
 [405. 417.]
 [417. 391.]
 [391. 419.]
 [419. 461.]
 [461. 472.]
 [472. 535.]
 [535. 622.]]
[315. 301. 356. 348. 355. 422. 465. 467. 404. 347. 305. 336. 340. 318.
 362. 348. 363. 435. 491. 505. 404. 359. 310. 337. 360. 342. 406. 396.
 420. 472. 548. 559. 463. 407. 362. 405. 417. 391. 419. 461. 472. 535.
 622. 606.]
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   4 | elapsed:    2.8s remaining:    2.8s
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    2.8s finished
CV Mean R2 score: -0.482996 (Std: 0.592262)
[16:22:41] WARNING: src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.
	R2 (r-squared) Value:  -0.48
	Median Absolute Error Value:  53.5
	Mean Absolute Value:  81.93
	RMSE :  109.16
	Normalised RMSE :  0.34
Y_test [315. 301. 356. 348. 355. 422. 465. 467. 404. 347. 305. 336. 340. 318.
 362. 348. 363. 435. 491. 505. 404. 359. 310. 337. 360. 342. 406. 396.
 420. 472. 548. 559. 463. 407. 362. 405. 417. 391. 419. 461. 472. 535.
 622. 606.]
pred_Value [270.04395 324.29797 277.95496 347.56223 312.00043 328.41724 399.06433
 356.27673 356.27673 355.00037 318.6101  274.01132 347.6104  344.99
 351.6232  347.92178 312.00043 328.37164 398.211   356.27673 356.27673
 355.00037 318.5645  257.61276 347.6119  344.94446 312.00043 402.27637
 355.00037 356.27673 356.27673 356.27673 356.27673 356.27673 355.00037
 318.5645  396.93457 356.27673 355.00037 404.99963 356.27673 356.27673
 356.27673 356.27673]

Execution Time 7.409864902496338 seconds: