In [1]:
import numpy as np
import numpy.random as npr
import matplotlib.pyplot as plt

import pandas as pd
from pandas import read_csv

from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.arima_model import ARMA

from statsmodels.stats.diagnostic import acorr_ljungbox 

import statsmodels.api as sm
# from statsmodels.tsa.seasonal import seasonal_decompose as decompose
# from statsmodels.tsa.stattools import acf 

Exercice 1

Questions (a) et (b)

In [15]:
phi_1 = 0.5

ar_parms = [1,-phi_1]     # Attention, les paramètres AR sont [1,-phi_1,...,-phi_p] et [1,theta_1,...,theta_q] pour MA 
ma_parms = [1]

AR_t = ArmaProcess(ar=ar_parms,ma=ma_parms).generate_sample(300)
plt.plot(AR_t)
plt.show()
In [16]:
# ACF theorique 
plot_acf(ArmaProcess(ar=ar_parms,ma=ma_parms).acf())
plt.show()
In [17]:
# ACF empirique 
plot_acf(AR_t,lags=20)
plt.show()

Question (d)

In [33]:
ar_parms = [1]    # C'est un MA(3) 
ma_parms = [1,0.9,0.6,0.9]

MA_t = ArmaProcess(ar=ar_parms,ma=ma_parms).generate_sample(100)
plt.plot(MA_t)
plt.show()
In [34]:
# ACF theorique 
plot_acf(ArmaProcess(ar=ar_parms,ma=ma_parms).acf())
plt.show()
In [35]:
# ACF empirique 
plot_acf(MA_t,lags=20)
plt.show()

Question (e)

In [24]:
ar_parms = [1,-0.8]     # C'est un ARMA(1,2)
ma_parms = [1,-0.3,0.6]
X_t = ArmaProcess(ar=ar_parms,ma=ma_parms).generate_sample(200)
plt.plot(X_t)
plt.show()
In [72]:
model = ARMA(X_t,order=(1,2)).fit(trend='nc')
model.summary()
Out[72]:
ARMA Model Results
Dep. Variable: y No. Observations: 200
Model: ARMA(1, 2) Log Likelihood -287.716
Method: css-mle S.D. of innovations 1.014
Date: Mon, 10 Feb 2020 AIC 583.431
Time: 16:42:47 BIC 596.624
Sample: 0 HQIC 588.770
coef std err z P>|z| [0.025 0.975]
ar.L1.y 0.8169 0.044 18.402 0.000 0.730 0.904
ma.L1.y -0.2102 0.065 -3.241 0.001 -0.337 -0.083
ma.L2.y 0.5629 0.062 9.115 0.000 0.442 0.684
Roots
Real Imaginary Modulus Frequency
AR.1 1.2241 +0.0000j 1.2241 0.0000
MA.1 0.1867 -1.3198j 1.3329 -0.2276
MA.2 0.1867 +1.3198j 1.3329 0.2276
In [75]:
# Les coefficients estimés ne sont pas loin des valeurs théoriques : AR = 0.8, MA = -0.3, 0.6
model.params
Out[75]:
array([ 0.81691632, -0.21015125,  0.56287537])
In [73]:
residus = model.resid
plt.plot(residus)
plot_acf(residus)
plt.show()
In [44]:
acorr_ljungbox(residus,lags=range(60),boxpierce=True)[3] 
# p-valeurs des 60 premiers lags pour le test de Box-Pierce
Out[44]:
array([           nan, 1.97785436e-03, 1.73827821e-09, 8.79871765e-09,
       3.41549495e-08, 7.88092800e-08, 2.43639651e-07, 1.90651241e-07,
       2.86895068e-07, 5.27248575e-08, 1.22231968e-07, 2.31250262e-07,
       3.88304776e-07, 7.73800116e-07, 1.63385253e-06, 3.31073324e-06,
       5.89980633e-06, 1.05463947e-05, 1.94902211e-05, 3.12380773e-05,
       5.43637317e-05, 8.96419626e-05, 1.48951152e-04, 2.29448187e-04,
       2.77903489e-04, 2.57658554e-04, 3.71532863e-04, 5.41714302e-04,
       7.25254541e-04, 1.08219655e-03, 8.19167944e-04, 9.35640344e-04,
       1.29200479e-04, 4.53924080e-05, 5.46599825e-05, 7.38877268e-05,
       6.40194473e-05, 9.61474093e-05, 1.42012829e-04, 1.32872658e-04,
       9.02768586e-05, 6.00563762e-05, 7.55304086e-05, 9.14528405e-05,
       1.20515593e-04, 1.50302092e-04, 2.14022109e-04, 2.02293390e-04,
       2.63573562e-04, 3.47686234e-04, 3.95561301e-04, 3.42015676e-04,
       4.00956380e-04, 3.57642670e-04, 3.54382080e-04, 4.55663240e-04,
       6.18388262e-04, 7.80319585e-04, 9.10999893e-04, 1.20270762e-03])
In [45]:
acorr_ljungbox(residus,lags=range(60),boxpierce=True)[1] 
# p-valeurs des 60 premiers lags pour Ljung-Box test
Out[45]:
array([           nan, 1.82845045e-03, 1.18523821e-09, 6.04952230e-09,
       2.36570417e-08, 5.42453621e-08, 1.69216196e-07, 1.25496291e-07,
       1.84795270e-07, 2.92167128e-08, 6.81952922e-08, 1.28403554e-07,
       2.13014909e-07, 4.26364028e-07, 9.11963187e-07, 1.87132287e-06,
       3.34313610e-06, 6.01095047e-06, 1.12540967e-05, 1.80085081e-05,
       3.17268569e-05, 5.27415147e-05, 8.87471300e-05, 1.37279035e-04,
       1.60886164e-04, 1.37792587e-04, 1.98787356e-04, 2.91430619e-04,
       3.86776986e-04, 5.86799304e-04, 3.89759155e-04, 4.27851219e-04,
       3.36392177e-05, 8.29779130e-06, 9.65019080e-06, 1.30263277e-05,
       9.91276599e-06, 1.54043352e-05, 2.35148062e-05, 1.96545390e-05,
       1.06653063e-05, 5.57809013e-06, 6.90178627e-06, 8.11809948e-06,
       1.07548990e-05, 1.32235671e-05, 1.96576601e-05, 1.63257597e-05,
       2.14983406e-05, 2.89133002e-05, 3.13693244e-05, 2.26190734e-05,
       2.56659176e-05, 1.92333387e-05, 1.68641834e-05, 2.21220014e-05,
       3.17017077e-05, 4.06687619e-05, 4.62007648e-05, 6.42093119e-05])
In [47]:
acorr_ljungbox(residus,lags=range(60),boxpierce=True)[3] < 0.01
/Users/adrien/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in less
  """Entry point for launching an IPython kernel.
Out[47]:
array([False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True])

L'étude des résidus laisse penser que la modélisation est bonne : d'après le test de Box-Pierce (et Ljung-Box) l'hypothèse $H_0$ que les résidus sont gaussien n'est pas rejetée.

Exercice 2

In [76]:
GNP_US = read_csv('gnpus.csv') 
In [77]:
type(GNP_US)
Out[77]:
pandas.core.frame.DataFrame
In [78]:
plt.plot(GNP_US)
plt.show()
In [79]:
# Plot de l'ACF (avec statsmodels.graphics.tsaplots.plot_acf) 
plot_acf(GNP_US,lags=12)
plt.show()
In [80]:
#Après tatonnements, on choisit p=4 pour que l'hypothèse {résidus=BB} ne soit pas rejetée 
AR_GNPUS = ARMA(X_t,order=(4,0)).fit(trend='nc')
AR_GNPUS.summary()
Out[80]:
ARMA Model Results
Dep. Variable: y No. Observations: 200
Model: ARMA(4, 0) Log Likelihood -291.278
Method: css-mle S.D. of innovations 1.033
Date: Mon, 10 Feb 2020 AIC 592.555
Time: 16:46:08 BIC 609.047
Sample: 0 HQIC 599.229
coef std err z P>|z| [0.025 0.975]
ar.L1.y 0.6410 0.069 9.340 0.000 0.506 0.775
ar.L2.y 0.5805 0.081 7.128 0.000 0.421 0.740
ar.L3.y -0.1872 0.081 -2.305 0.022 -0.346 -0.028
ar.L4.y -0.2210 0.069 -3.200 0.002 -0.356 -0.086
Roots
Real Imaginary Modulus Frequency
AR.1 1.1501 -0.3041j 1.1897 -0.0411
AR.2 1.1501 +0.3041j 1.1897 0.0411
AR.3 -1.5738 -0.8490j 1.7882 -0.4213
AR.4 -1.5738 +0.8490j 1.7882 0.4213
In [81]:
plot_acf(AR_GNPUS.resid)
plt.plot()
Out[81]:
[]
In [84]:
acorr_ljungbox(AR_GNPUS.resid,lags=range(60),boxpierce=True)[3] < 0.01
/Users/adrien/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in less
  """Entry point for launching an IPython kernel.
Out[84]:
array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False])