# Load packages
library(itsmr)
library(tidyverse)
library(tsibble)
library(feasts)
Time Series Decomposition
The first step in analyzing any time series is plotting the data. Inspection of the graph may suggest the possibility of representing the data as follows (the classical decomposition):
\[ X_t = m_t + s_t + Y_t \] where
\(m_t\) is the trend component
\(s_t\) is the seasonal component
\(Y_t\) random noise component / Residuals
if seasonality and noise fluctuations (variability) appear to increase with the level of the process (trend), then use a preliminary transformation of the data (natural logarithm,…).
We are going to learn how to estimate and eliminate the trend and the seasonal components in the hope that the residual \(Y_t\) will be a stationary time series.
Trend Estimation
Strikes in the USA from 1951-1980
= read.table("http://users.stat.umn.edu/~kb/classes/5932/data/strikes.txt", header = FALSE)
strikes
# get data to tsibble format
= strikes %>%
strikes mutate(Year=seq(1951,1980,1)) %>%
as_tsibble(index = Year)
# plot the time series
%>%
strikes autoplot(V1) +
labs(y="Number of Strikes (thousands)",
title="Strikes in USA in 1951-1980")
# plot the ACF
%>%
strikes ACF(V1, lag_max = 20) %>%
autoplot()
Trend Estimation: Smoothing Moving Average
%>%
strikes mutate(ma.trend3 = smooth.ma(V1, 3),
ma.trend5 = smooth.ma(V1, 5),
ma.trend7 = smooth.ma(V1, 7),
ma.trend9 = smooth.ma(V1, 9)) %>% # q=3 means averaging 2q+1 values
pivot_longer(cols =-Year,
names_to = "series",
values_to = "values") %>%
group_by(series) %>%
autoplot(values)
Trend Estimation: Exponential Smoothing
%>%
strikes mutate(exp.trend1 = smooth.exp(V1, 0.1),# alpha=0.1 mis the smootheness parameter
exp.trend2 = smooth.exp(V1, 0.5),
exp.trend3 = smooth.exp(V1, 0.7),
exp.trend4 = smooth.exp(V1, 0.9)) %>%
pivot_longer(cols =-Year,
names_to = "series",
values_to = "values") %>%
group_by(series) %>%
autoplot(values)
Trend Estimation: FFT
%>%
strikes mutate(fft.trend1 = smooth.fft(V1, 0.1),# 0.1 is cutoff frequency - 10% of the lowest spectrum passes
fft.trend2 = smooth.fft(V1, 0.25)) %>%
pivot_longer(cols =-Year,
names_to = "series",
values_to = "values") %>%
group_by(series) %>%
autoplot(values)
Trend Estimation: Regression
%>%
strikes mutate(reg.trend1 = trend(V1, 1),# 1 means polynmial of degree 1
reg.trend2 = trend(V1, 2),
reg.trend3 = trend(V1, 3)) %>%
pivot_longer(cols =-Year,
names_to = "series",
values_to = "values") %>%
group_by(series) %>%
autoplot(values)
Seasonality Estimation
plot.ts(deaths)
acf(deaths,lag.max=40)
= season(deaths, d=12)
estimate.season plotc(deaths,estimate.season)
= deaths-estimate.season
r test(r)
Null hypothesis: Residuals are iid noise.
Test Distribution Statistic p-value
Ljung-Box Q Q ~ chisq(20) 202.4 0 *
McLeod-Li Q Q ~ chisq(20) 205.08 0 *
Turning points T (T-46.7)/3.5 ~ N(0,1) 43 0.2993
Diff signs S (S-35.5)/2.5 ~ N(0,1) 36 0.8394
Rank P (P-1278)/102.9 ~ N(0,1) 971 0.0028 *
Test IID hypothesis for the residuals
Use theResid()
function to find the residuals.
# deaths
= c("season",12,"trend",1) # from examining the series
data.model = Resid(deaths,data.model)
R# Test IDD
test(R) # need to tests to fail to reject the null hypothesis
Null hypothesis: Residuals are iid noise.
Test Distribution Statistic p-value
Ljung-Box Q Q ~ chisq(20) 133.05 0 *
McLeod-Li Q Q ~ chisq(20) 51.85 1e-04 *
Turning points T (T-46.7)/3.5 ~ N(0,1) 41 0.1087
Diff signs S (S-35.5)/2.5 ~ N(0,1) 37 0.5431
Rank P (P-1278)/102.9 ~ N(0,1) 1368 0.3816
# strikes
= c("trend",3)
data.model.s = Resid(strikes$V1,data.model.s)
R.s# Test IDD
test(R.s)
Null hypothesis: Residuals are iid noise.
Test Distribution Statistic p-value
Ljung-Box Q Q ~ chisq(20) 9.55 0.9757
McLeod-Li Q Q ~ chisq(20) 8.87 0.9843
Turning points T (T-18.7)/2.2 ~ N(0,1) 17 0.4566
Diff signs S (S-14.5)/1.6 ~ N(0,1) 16 0.3507
Rank P (P-217.5)/28 ~ N(0,1) 217 0.9858
Trend and Seasonality Elimination using the Differencing operator
JohnsonJohnson quarterly earnings data:
plot.ts(JohnsonJohnson)
acf(JohnsonJohnson,lag.max = 50)
# Transformation
library(car)
= powerTransform(JohnsonJohnson)
L = log(JohnsonJohnson)
Tjj
plot.ts(Tjj)
acf(Tjj,lag.max = 50)
We have a trend is assumed to be quadratic. There is variance change. Our data model is:
\[ X_t = m_t + Y_t \]
# Estimations
= c("log","trend",1)
jj.data.model = Resid(JohnsonJohnson,jj.data.model)
r.jj test(r.jj)
Null hypothesis: Residuals are iid noise.
Test Distribution Statistic p-value
Ljung-Box Q Q ~ chisq(20) 103.25 0 *
McLeod-Li Q Q ~ chisq(20) 42.54 0.0023 *
Turning points T (T-54.7)/3.8 ~ N(0,1) 48 0.0811
Diff signs S (S-41.5)/2.7 ~ N(0,1) 44 0.3476
Rank P (P-1743)/129.4 ~ N(0,1) 1829 0.5064
It looks like we have a season of period 4 Then,
\[ X_t = m_t + S_t + Y_t \]
and
# Estimation
= c("log","trend",2,"season",4)
jj.data.model = Resid(JohnsonJohnson,jj.data.model)
r.jj test(r.jj)
Null hypothesis: Residuals are iid noise.
Test Distribution Statistic p-value
Ljung-Box Q Q ~ chisq(20) 74.51 0 *
McLeod-Li Q Q ~ chisq(20) 14.92 0.7808
Turning points T (T-54.7)/3.8 ~ N(0,1) 54 0.8615
Diff signs S (S-41.5)/2.7 ~ N(0,1) 42 0.851
Rank P (P-1743)/129.4 ~ N(0,1) 1812 0.594
By Elimination we have the following:
\[ y_t = \nabla^2 \nabla_4 \log(X_t) \]
rewrite this as:
\[ y_t= (1-B)^2(1-B^4)X_t \]
Eliminate Season
= diff(log(JohnsonJohnson),lag = 4)
deseason.data plot(deseason.data)
Eliminate Trend
= diff(deseason.data,lag = 1,differences = 2) residual.data
Test IID for the residuals
test(residual.data)
Null hypothesis: Residuals are iid noise.
Test Distribution Statistic p-value
Ljung-Box Q Q ~ chisq(20) 103.69 0 *
McLeod-Li Q Q ~ chisq(20) 30.03 0.0694
Turning points T (T-50.7)/3.7 ~ N(0,1) 62 0.0021 *
Diff signs S (S-38.5)/2.6 ~ N(0,1) 41 0.3299
Rank P (P-1501.5)/115.9 ~ N(0,1) 1529 0.8124
Since at least one test is reject the hypothesis of IID, therefore further analysis to be done.