import numpy as np
import numpy.random as npr
import matplotlib.pyplot as plt
t = np.linspace(1,500,500)
eps = npr.randn(500)
X1 = eps
X1_det = np.zeros(500)
plt.plot(X1)
plt.plot(t,np.zeros(500),linewidth=3,color='cyan') # tendance et saisonnalité
plt.show()
X2_det = 2*np.sin(np.pi*t/100)
X2 = X2_det + eps
plt.plot(X2)
plt.plot(t,X2_det,linewidth=3,color='cyan')
plt.show()
X3_det = np.sqrt(t/10)-4+2*np.sin(np.pi*t/100)
X3 = X3_det + eps
plt.plot(X3)
plt.plot(t,X3_det,linewidth=3,color='cyan')
plt.show()
X4_det = t*np.sin(np.pi*t/100)/200
X4 = X4_det * eps
plt.plot(X4)
plt.plot(t,X4_det,linewidth=3,color='cyan') # tendance et saisonnalité
plt.show()
1) $X_t=\epsilon_t$. On a $X_t=m_t+s_t+\epsilon_t$ avec : $$m_t=s_t=0.$$
2) $X_t=2\sin(\frac{\pi t}{100})+\epsilon_t$. On a $X_t=m_t+s_t+\epsilon_t$ avec : $$m_t=m,\qquad s_t=2\sin(\frac{\pi t}{100})-m, \qquad m:=\frac1T\sum_{k=1}^T2\sin(\frac{\pi t}{100}),\qquad T:=200.$$
3) $X_t= \sqrt{\frac t{10}}-4+2\sin(\frac{\pi t}{100})+\epsilon_t$. On a $X_t=m_t+s_t+\epsilon_t$ avec : $$m_t= \sqrt{\frac t{10}}-4+m,\qquad s_t=2\sin(\frac{\pi t}{100})-m, \qquad m:=\frac1T\sum_{k=1}^T2\sin(\frac{\pi t}{100}),\qquad T:=200.$$
4) $X_t= \frac t{200}\sin(\frac{\pi t}{100})\epsilon_t$. On a $X_t=m_ts_t\epsilon_t$ avec : $$m_t= \frac t{200}m,\qquad s_t=\frac1m\sin(\frac{\pi t}{100}), \qquad m:=\left(\prod_{k=1}^T\sin(\frac{\pi t}{100})\right)^{1/T},\qquad T:=200.$$
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose as decompose
from statsmodels.tsa.stattools import acf
import pandas as pd
EuStockMarkets data set: Daily Closing Prices Of Major European Stock Indices, 1991--1998
Contains the daily closing prices of major European stock indices: Germany DAX (Ibis), Switzerland SMI, France CAC, and UK FTSE. The data are sampled in business time, i.e., weekends and holidays are omitted.
EuStockMarkets = sm.datasets.get_rdataset('EuStockMarkets').data
type(EuStockMarkets) # data.frame - utilise Pandas
EuStockMarkets.head()
EuStockMarkets.columns
EuStockMarkets.plot()
plt.show()
CAC40 = EuStockMarkets.CAC
#CAC40.index
Dec = decompose(CAC40, model='additive', freq = 253)
Dec.plot()
plt.show()
CAC40.plot()
Dec.trend.plot(linewidth=3,color='red')
plt.show()
sum(Dec.seasonal[0:253]) # preque zéro
# moyenne et espérance du bruit
np.mean(Dec.resid), np.var(Dec.resid)
# histogramme du bruit
plt.hist(Dec.resid,bins=40)
plt.show()
# ACF (AutoCorrelation Function) du bruit
ACF_noise = acf(Dec.resid, missing='drop',nlags=50, fft=True)
N = len(ACF_noise)
plt.scatter(range(N), ACF_noise, marker='+',s=2)
plt.show()
L'ACF empirique ne ressemble pas du tout à l'ACF d'un bruit blanc...
Comme l'inégalité de Tchebychev donne pour $\delta>0$ et $t\geq 1$, $$ \mathbb P(|X_t-\tilde X_t|\geq \delta)\leq \frac{\sigma^2\theta^{2t}}{(1-\theta^2)\delta^2}, $$ on obtient qu'avec probabilité supérieure à $1-\alpha$ on a $|X_t-\tilde X_t|<\leq \delta$ pour tout $t\geq t_0$, avec $$ t_0=\frac{\log(\alpha(1-\theta)\delta^2/\sigma^2)}{\log(\theta^2)} $$ Par exemple, on peut prendre $\alpha=1\%$ et $\delta=10^{-5}$.
def AR(N,sigma,theta):
alpha = 0.01
delta = 10**-5
t0 = int(np.log((1-theta)*alpha*delta**2/sigma**2)/np.log(theta**2))+1
Xt = 0
AR = [Xt]
eps = sigma*npr.randn(N+t0) # on prend un bruit blanc gaussien N(0,sigma^2)
for t in range(N+t0):
Xt = theta*Xt + eps[t]
AR.append(Xt)
return(AR[t0+1:N+t0])
AR_sim = AR(1000,1,0.9)
plt.plot(AR_sim)
plt.show()
ACF = acf(AR_sim,fft=True,alpha=0.05)
plt.plot(ACF[0])
plt.plot(ACF[1])
plt.show()