Time Series Forecasting - Box-Jekins Methon (ARIMA) in Python and

Time Series Forecasting - FBProphet in Python

USA Inflation - Consumer Price Annual Percentage Forecasting

In [3]:
# ignore warnings
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

Load dataset

In [4]:
# load dataset
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot
from pandas.plotting import autocorrelation_plot

filename = 'USA_Inflation_ConsumerPriceData_Annual_percentages.csv'

df = read_csv(filename)
df = df.set_index('Year')

df.plot(figsize = (8,6))

fig = pyplot.figure(figsize = (8,6))
autocorrelation_plot(df)
pyplot.show()

print(df.head(5))
      Inflation_ConsumerPrice_Annual_Percentage
Year                                           
1960                                   1.457976
1961                                   1.070724
1962                                   1.198773
1963                                   1.239669
1964                                   1.278912

Autocorrelation and Partial Autocorrelation in Python

In [5]:
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.pyplot as plt

plt.rcParams.update({'figure.figsize':(6,4), 'figure.dpi':120})

plot_acf(df)
pyplot.show()
In [6]:
from statsmodels.graphics.tsaplots import plot_pacf

plot_pacf(df)
pyplot.show()

ADF test

In [7]:
from statsmodels.tsa.stattools import adfuller

# ADF Test
def adf_test(series):
    result = adfuller(series, autolag='AIC')
    print(); print(f'ADF Statistic: {result[0]}')
    print();  print(f'n_lags: {result[1]}')
    print();  print(f'p-value: {result[1]}')

    print(); print('Critial Values:')
    for key, value in result[4].items():
        print(f'   {key}, {value}')   

adf_test(df["Inflation_ConsumerPrice_Annual_Percentage"])
ADF Statistic: -2.00789437929894

n_lags: 0.2831224525158605

p-value: 0.2831224525158605

Critial Values:
   1%, -3.548493559596539
   5%, -2.912836594776334
   10%, -2.594129155766944

KPSS Test

In [8]:
from statsmodels.tsa.stattools import kpss

def kpss_test(series, **kw):    
    
    statistic, p_value, n_lags, critical_values = kpss(series, **kw)
    
    # Format Output
    print(); print(f'KPSS Statistic: {statistic}')
    print(); print(f'p-value: {p_value}')
    print(); print(f'num lags: {n_lags}')
    print(); print('Critial Values:')
    for key, value in critical_values.items():
        print(f'   {key} : {value}')
    
kpss_test(df["Inflation_ConsumerPrice_Annual_Percentage"])
KPSS Statistic: 0.2424327669196838

p-value: 0.1

num lags: 11

Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
/home/crown/miniconda/envs/nilimeshdss/lib/python3.6/site-packages/statsmodels/tsa/stattools.py:1278: InterpolationWarning: p-value is greater than the indicated p-value
  warn("p-value is greater than the indicated p-value", InterpolationWarning)

How to find the order of differencing (d) in ARIMA model

In [9]:
import numpy as np, pandas as pd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.figsize':(12,14), 'figure.dpi':120})

# Import data
#df = pd.read_csv('shampoo.csv', header=0, names = ['Sales'])
df.reset_index(drop=True, inplace=True)

# Original Series
fig, axes = plt.subplots(5, 2, sharex=True)
axes[0, 0].plot(df.values); axes[0, 0].set_title('Original Series')
plot_acf(df.values, ax=axes[0, 1])

# 1st Differencing
df1 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff()
axes[1, 0].plot(df1); axes[1, 0].set_title('1st Order Differencing')
plot_acf(df1.dropna(), ax=axes[1, 1])

# 2nd Differencing
df2 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff()
axes[2, 0].plot(df2); axes[2, 0].set_title('2nd Order Differencing')
plot_acf(df2.dropna(), ax=axes[2, 1])

# 3rd Differencing
df3 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff().diff()
axes[3, 0].plot(df3); axes[3, 0].set_title('3rd Order Differencing')
plot_acf(df3.dropna(), ax=axes[3, 1])

# 3rd Differencing
df4 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff().diff().diff()
axes[4, 0].plot(df4); axes[4, 0].set_title('4th Order Differencing')
plot_acf(df4.dropna(), ax=axes[4, 1])

plt.show()

ADF and KPSS statistics

In [10]:
warnings.filterwarnings("ignore")

print("---------------------------------------------")
print("First Diffencing: ")
print("---------------------------------------------")
df1 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff()
print(); print("---------------------------------------------")
adf_test(df1.dropna())
print(); print("---------------------------------------------")
kpss_test(df1.dropna())
print(); print("---------------------------------------------")


print(); print("---------------------------------------------")
print("2nd Diffencing: ")
print("---------------------------------------------")
df2 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff()
print(); print("---------------------------------------------")
adf_test(df2.dropna())
print(); print("---------------------------------------------")
kpss_test(df2.dropna())
print(); print("---------------------------------------------")

print(); print("---------------------------------------------")
print("3rd Diffencing: ")
print("---------------------------------------------")
df3 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff().diff()
print(); print("---------------------------------------------")
adf_test(df3.dropna())
print(); print("---------------------------------------------")
kpss_test(df3.dropna())
print(); print("---------------------------------------------")

print(); print("---------------------------------------------")
print("4th Diffencing: ")
print("---------------------------------------------")
df4 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff().diff().diff()
print(); print("---------------------------------------------")
adf_test(df4.dropna())
print(); print("---------------------------------------------")
kpss_test(df4.dropna())
print(); print("---------------------------------------------")
---------------------------------------------
First Diffencing: 
---------------------------------------------

---------------------------------------------

ADF Statistic: -7.369435723520059

n_lags: 9.05338669806213e-11

p-value: 9.05338669806213e-11

Critial Values:
   1%, -3.548493559596539
   5%, -2.912836594776334
   10%, -2.594129155766944

---------------------------------------------

KPSS Statistic: 0.17056940350310107

p-value: 0.1

num lags: 11

Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739

---------------------------------------------

---------------------------------------------
2nd Diffencing: 
---------------------------------------------

---------------------------------------------

ADF Statistic: -7.014316432007098

n_lags: 6.794504033658939e-10

p-value: 6.794504033658939e-10

Critial Values:
   1%, -3.55770911573439
   5%, -2.9167703434435808
   10%, -2.59622219478738

---------------------------------------------

KPSS Statistic: 0.09865895441617496

p-value: 0.1

num lags: 11

Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739

---------------------------------------------

---------------------------------------------
3rd Diffencing: 
---------------------------------------------

---------------------------------------------

ADF Statistic: -5.20090225747649

n_lags: 8.754869426913789e-06

p-value: 8.754869426913789e-06

Critial Values:
   1%, -3.5745892596209488
   5%, -2.9239543084490744
   10%, -2.6000391840277777

---------------------------------------------

KPSS Statistic: 0.09155447361710153

p-value: 0.1

num lags: 11

Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739

---------------------------------------------

---------------------------------------------
4th Diffencing: 
---------------------------------------------

---------------------------------------------

ADF Statistic: -4.625797957982303

n_lags: 0.0001157421652520446

p-value: 0.0001157421652520446

Critial Values:
   1%, -3.584828853223594
   5%, -2.9282991495198907
   10%, -2.6023438271604937

---------------------------------------------

KPSS Statistic: 0.09017989281798572

p-value: 0.1

num lags: 11

Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739

---------------------------------------------
In [ ]:
 

How to find the order of the AR term (p)

In [11]:
from statsmodels.graphics.tsaplots import plot_pacf

# PACF plot 
plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120})
fig, axes = plt.subplots(1, 2, sharex=True)

df2 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff() #.diff() #.diff()

axes[0].plot(df2); axes[0].set_title('2nd Differencing')
axes[1].set(ylim=(-3,3))
plot_pacf(df2.dropna(), ax=axes[1]) #PACF

plt.show()

How to find the order of the MA term (q)

In [12]:
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.pyplot as plt

plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120})
fig, axes = plt.subplots(1, 2, sharex=True)

df2 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff() #.diff() #.diff()
axes[0].plot(df2); axes[0].set_title('2nd Differencing')
#axes[1].set(ylim=(0,1.2))
plot_acf(df2.dropna(), ax=axes[1]) # ACF

plt.show()
In [13]:
## ADF test
adf_test(df2.dropna())
ADF Statistic: -7.014316432007098

n_lags: 6.794504033658939e-10

p-value: 6.794504033658939e-10

Critial Values:
   1%, -3.55770911573439
   5%, -2.9167703434435808
   10%, -2.59622219478738

Build the ARIMA(p,d,q) Model

In [14]:
from statsmodels.tsa.arima_model import ARIMA

plt.rcParams.update({'figure.figsize':(16,6), 'figure.dpi':220})

df = read_csv(filename)
df = df.set_index('Year')

# ARIMA Model
model = ARIMA(df["Inflation_ConsumerPrice_Annual_Percentage"], order=(2,2,0))
model_fit = model.fit(disp=0)
print(model_fit.summary())

# Plot residual errors
residuals = pd.DataFrame(model_fit.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title="Residuals", ax=ax[0])
residuals.plot(kind='kde', title='Density', ax=ax[1])
plt.show()

# Actual vs Fitted
model_fit.plot_predict(dynamic=False)
plt.show()
                                          ARIMA Model Results                                           
========================================================================================================
Dep. Variable:     D2.Inflation_ConsumerPrice_Annual_Percentage   No. Observations:                   59
Model:                                           ARIMA(2, 2, 0)   Log Likelihood                -122.191
Method:                                                 css-mle   S.D. of innovations              1.913
Date:                                          Wed, 04 Aug 2021   AIC                            252.382
Time:                                                  16:51:55   BIC                            260.692
Sample:                                                       2   HQIC                           255.626
                                                                                                        
======================================================================================================================
                                                         coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------------------------------------
const                                                 -0.0029      0.150     -0.019      0.985      -0.297       0.292
ar.L1.D2.Inflation_ConsumerPrice_Annual_Percentage    -0.2622      0.117     -2.236      0.029      -0.492      -0.032
ar.L2.D2.Inflation_ConsumerPrice_Annual_Percentage    -0.4144      0.115     -3.595      0.001      -0.640      -0.188
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1           -0.3164           -1.5208j            1.5534           -0.2826
AR.2           -0.3164           +1.5208j            1.5534            0.2826
-----------------------------------------------------------------------------