# ignore warnings
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
# load dataset
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot
from pandas.plotting import autocorrelation_plot
filename = 'UK_Inflation_ConsumerPriceData_Annual_percentages.csv'
df = read_csv(filename)
df = df.set_index('Year')
df.plot(figsize = (8,6))
fig = pyplot.figure(figsize = (8,6))
autocorrelation_plot(df)
pyplot.show()
print(df.head(5))
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.figsize':(6,4), 'figure.dpi':120})
plot_acf(df)
pyplot.show()
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(df)
pyplot.show()
from statsmodels.tsa.stattools import adfuller
# ADF Test
def adf_test(series):
result = adfuller(series, autolag='AIC')
print(); print(f'ADF Statistic: {result[0]}')
print(); print(f'n_lags: {result[1]}')
print(); print(f'p-value: {result[1]}')
print(); print('Critial Values:')
for key, value in result[4].items():
print(f' {key}, {value}')
adf_test(df["Inflation_ConsumerPrice_Annual_Percentage"])
from statsmodels.tsa.stattools import kpss
def kpss_test(series, **kw):
statistic, p_value, n_lags, critical_values = kpss(series, **kw)
# Format Output
print(); print(f'KPSS Statistic: {statistic}')
print(); print(f'p-value: {p_value}')
print(); print(f'num lags: {n_lags}')
print(); print('Critial Values:')
for key, value in critical_values.items():
print(f' {key} : {value}')
kpss_test(df["Inflation_ConsumerPrice_Annual_Percentage"])
import numpy as np, pandas as pd
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.figsize':(12,14), 'figure.dpi':120})
# Import data
#df = pd.read_csv('shampoo.csv', header=0, names = ['Sales'])
df.reset_index(drop=True, inplace=True)
# Original Series
fig, axes = plt.subplots(5, 2, sharex=True)
axes[0, 0].plot(df.values); axes[0, 0].set_title('Original Series')
plot_acf(df.values, ax=axes[0, 1])
# 1st Differencing
df1 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff()
axes[1, 0].plot(df1); axes[1, 0].set_title('1st Order Differencing')
plot_acf(df1.dropna(), ax=axes[1, 1])
# 2nd Differencing
df2 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff()
axes[2, 0].plot(df2); axes[2, 0].set_title('2nd Order Differencing')
plot_acf(df2.dropna(), ax=axes[2, 1])
# 3rd Differencing
df3 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff().diff()
axes[3, 0].plot(df3); axes[3, 0].set_title('3rd Order Differencing')
plot_acf(df3.dropna(), ax=axes[3, 1])
# 3rd Differencing
df4 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff().diff().diff()
axes[4, 0].plot(df4); axes[4, 0].set_title('4th Order Differencing')
plot_acf(df4.dropna(), ax=axes[4, 1])
plt.show()
warnings.filterwarnings("ignore")
print("---------------------------------------------")
print("First Diffencing: ")
print("---------------------------------------------")
df1 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff()
print(); print("---------------------------------------------")
adf_test(df1.dropna())
print(); print("---------------------------------------------")
kpss_test(df1.dropna())
print(); print("---------------------------------------------")
print(); print("---------------------------------------------")
print("2nd Diffencing: ")
print("---------------------------------------------")
df2 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff()
print(); print("---------------------------------------------")
adf_test(df2.dropna())
print(); print("---------------------------------------------")
kpss_test(df2.dropna())
print(); print("---------------------------------------------")
print(); print("---------------------------------------------")
print("3rd Diffencing: ")
print("---------------------------------------------")
df3 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff().diff()
print(); print("---------------------------------------------")
adf_test(df3.dropna())
print(); print("---------------------------------------------")
kpss_test(df3.dropna())
print(); print("---------------------------------------------")
print(); print("---------------------------------------------")
print("4th Diffencing: ")
print("---------------------------------------------")
df4 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff().diff().diff()
print(); print("---------------------------------------------")
adf_test(df4.dropna())
print(); print("---------------------------------------------")
kpss_test(df4.dropna())
print(); print("---------------------------------------------")
from statsmodels.graphics.tsaplots import plot_pacf
# PACF plot
plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120})
fig, axes = plt.subplots(1, 2, sharex=True)
df2 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff() #.diff() #.diff()
axes[0].plot(df2); axes[0].set_title('2nd Differencing')
axes[1].set(ylim=(-3,3))
plot_pacf(df2.dropna(), ax=axes[1]) #PACF
plt.show()
from statsmodels.graphics.tsaplots import plot_acf
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120})
fig, axes = plt.subplots(1, 2, sharex=True)
df2 = df["Inflation_ConsumerPrice_Annual_Percentage"].diff().diff() #.diff() #.diff()
axes[0].plot(df2); axes[0].set_title('2nd Differencing')
#axes[1].set(ylim=(0,1.2))
plot_acf(df2.dropna(), ax=axes[1]) # ACF
plt.show()
## ADF test
adf_test(df2.dropna())
from statsmodels.tsa.arima_model import ARIMA
plt.rcParams.update({'figure.figsize':(16,6), 'figure.dpi':220})
df = read_csv(filename)
df = df.set_index('Year')
# ARIMA Model
model = ARIMA(df["Inflation_ConsumerPrice_Annual_Percentage"], order=(2,2,0))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# Plot residual errors
residuals = pd.DataFrame(model_fit.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title="Residuals", ax=ax[0])
residuals.plot(kind='kde', title='Density', ax=ax[1])
plt.show()
# Actual vs Fitted
model_fit.plot_predict(dynamic=False)
plt.show()
import pmdarima as pm
model_with_auto_d = pm.auto_arima(df, start_p=1, start_q=1,
test='adf', # use adftest to find optimal 'd'
max_p=4, max_q=4, # maximum p, q
m=1, # frequency of series
d=None, # let model determine 'd'
seasonal=False, # No Seasonality
start_P=0,
D=0,
trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True)
print(model_with_auto_d.summary())
model_with_d_equals_1 = pm.auto_arima(df, start_p=1, start_q=1,
test='adf', # use adftest to find optimal 'd'
max_p=4, max_q=4, max_d=4, # maximum p, q and d
m=1, # frequency of series
d=1, # let model determine 'd'
seasonal=False, # No Seasonality
start_P=0,
D=0,
trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True)
print(model_with_d_equals_1.summary())
model_with_d_equals_2 = pm.auto_arima(df, start_p=1, start_q=1,
test='adf', # use adftest to find optimal 'd'
max_p=4, max_q=4, # maximum p, q
m=1, # frequency of series
d=2, # let model determine 'd'
seasonal=False, # No Seasonality
start_P=0,
D=0,
trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True)
print(model_with_d_equals_2.summary())
model_with_auto_d.plot_diagnostics(figsize=(12,10))
plt.show()
model_with_d_equals_1.plot_diagnostics(figsize=(12,10))
plt.show()
model_with_d_equals_2.plot_diagnostics(figsize=(12,10))
plt.show()
model = model_with_auto_d
# Forecast
n_periods = 10
fc, confint = model.predict(n_periods=n_periods, return_conf_int=True)
#index_of_fc = np.arange(len(df), len(df)+n_periods)
index_of_fc = np.arange(2020, 2020+n_periods)
# make series for plotting purpose
fc_series = pd.Series(fc, index=index_of_fc)
lower_series = pd.Series(confint[:, 0], index=index_of_fc)
upper_series = pd.Series(confint[:, 1], index=index_of_fc)
# Plot
plt.plot(df)
plt.plot(fc_series, color='darkgreen')
plt.fill_between(lower_series.index,
lower_series,
upper_series,
color='k', alpha=.15)
plt.title("Final Forecast")
plt.show()
print(); print(fc_series)
print(); print(lower_series)
print(); print(upper_series)
from fbprophet import Prophet
import pandas as pd
df = read_csv(filename)
#df = df.set_index('Year')
print(df.head())
#print(); print(df[['Year', 'Population']])
df["End_Year"] = 0
for i in range(0, len(df)):
df.iloc[i, 2] = str(df.iloc[i, 0]) + '-12-' + '31'
print(); print(df.head())
# Create a new Data Frame
df_pop = pd.DataFrame()
df_pop[['ds','y']] = df[['End_Year', 'Inflation_ConsumerPrice_Annual_Percentage']]
# Convert Data Frame to FBProphet Timeseries ds and y
df_pop['ds'] = pd.to_datetime(df_pop['ds'])
df_pop['y'] = pd.to_numeric(df_pop['y'])
print(df_pop.tail())
# Create FBProphet Model with Dataset
m = Prophet(daily_seasonality=False, weekly_seasonality=True, yearly_seasonality=True)
m.fit(df_pop)
future = m.make_future_dataframe(periods=10, freq = 'Y')
print()
print(future.tail(26))
forecast = m.predict(future)
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(25))
fig = m.plot(forecast)
plt.show()
# Save Data in a CSV file
df_final = pd.DataFrame()
df_final[['Year', 'yhat', 'yhat_lower', 'yhat_upper']] = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
df_final['Inflation_ConsumerPrice_Annual_Percentage'] = df_pop['y']
print(df_final.head(len(df_final)))
#df_final.to_csv('Forecast_final.csv',index = False)