-
Notifications
You must be signed in to change notification settings - Fork 260
Description
I'm running into problems with the relative effect when making the forecast. It always returns much bigger than expected, sometimes goes negative although should be positive. Please find below my code and advise how to fix it.
#install.packages("dplyr")
#install.packages("magrittr")
#install.packages("ggplot2")
#install.packages("scales")
#install.packages("tscount")
#install.packages("mgcv")
#install.packages("CausalImpact")
#install.packages("changepoint")
#install.packages("bcp")
#install.packages("timetk")
#install.packages("xts")
#install.packages("zoo")
#install.packages("tidyverse")
library(dplyr) # For data manipulation
library(magrittr) # For the '%>%' pipe operator
library(ggplot2) # For producing plots
library(scales) # For making plot axes pretty
library(tscount) # For simulating time series count data
library(mgcv) # For fitting generalised additive models
library(CausalImpact) # For fitting Byesian structural time series models for causal inference
library(changepoint) # For fitting changepoint models
library(bcp) # For fitting Bayesian changepoint models
library(timetk)
library(tidyr)
library(xts)
library(zoo)
library(tidyverse)
please input your absolute directory path of "treatment_vs_control.csv" by follow 20230730.csv
data <- read.csv("C:\Users\quynh\Downloads\Task1\DASH\20230730.csv")
data$date <- as.Date(data$date, format="%Y/%m/%d")
#format(data$date, "%d/%m/%Y")
data <- data %>%
mutate(date = as.Date(date))
Visual plot to see data trend
ggplot(data, aes(x = date, y = dash))+
geom_point(alpha= 0.3) +
labs(title = "dash price data from 2019 to 2020",
x = "date",
y = "dash Price")
Convert data to aggregation by week
data_dash_wk <- data %>%
filter(date > as.Date("2019-01-01")) %>%
group_by(week = cut(date, "week")) %>%
summarise(dash = mean(dash, na.rm = TRUE)) %>%
mutate(week = as.Date(as.character(week)))
Create row index
data_dash_wk <- data_dash_wk %>%
mutate(index = row_number())
Visual plot to see data trend
ggplot(data_dash_wk, aes(x = week, y = dash))+
geom_vline(xintercept = as.Date("2020-03-11"), color = "blue", lty = "dashed") +
geom_point(color = "red") +
geom_line(alpha = 0.4) +
labs(title = "dash data from Jan 2019 to end June 2020",
subtitle = "dash data declared on 11th March 2020 (dotted blue line)",
x = "date",
y = "dash")
Post Intervention Period is filled with NA
data_dash_wk_causal <- data_dash_wk %>%
mutate(dash = replace(dash, week >= as.Date("2020-03-11"), NA))
Create ts zoo data
ts_dash_wk <- zoo(data_dash_wk_causal$dash, data_dash_wk_causal$week)
plot(ts_dash_wk)
Model 3
ss3 <- list()
Semi Local trend, weekly-seasonal
ss3 <- AddSemilocalLinearTrend(ss3, ts_dash_wk)
Add weekly seasonal
ss3 <- AddSeasonal(ss3, ts_dash_wk, nseasons = 52)
model3 <- bsts(ts_dash_wk,
state.specification = ss3,
niter = 1500,
burn = 500)
plot(model3, main = "Model 3")
plot(model3, "components")
Model 3
plot(model3$state.specification[[2]], model3,ylim = c(-6000,6000),
ylab = "Distribution", xlab = "Date")
par(new=TRUE)
plot(components3$Date, components3$Seasonality, col = "magenta", type = "l", ylim = c(-6000,6000),
ylab = "Distribution", xlab = "Date")
abline(h = 2000, col = "red")
abline(h = -2000, col = "red")
components3 = cbind.data.frame(
colMeans(model3$state.contributions[-(1:500),"trend",]),
colMeans(model3$state.contributions[-(1:500),"seasonal.52.1",]),
as.Date(time(ts_dash_wk)))
names(components3) = c("Trend", "Seasonality", "Date")
components3 = cbind.data.frame(
colMeans(model3$state.contributions[-(1:500),"trend",]),
colMeans(model3$state.contributions[-(1:500),"seasonal.52.1",]),
as.Date(time(ts_dash_wk)))
names(components3) = c("Trend", "Seasonality", "date")
components3 = pivot_longer(components3, cols =c("Trend","Seasonality"))
names(components3) = c("date", "Component", "Value")
Causal impact of dash and Covid-19
pre.period <- as.Date(c("2019-01-01", "2020-03-11"))
post.period <- as.Date(c("2020-03-11", "2020-06-30"))
Obtain post period data
dat_dash_causal_post <- data_dash_wk %>%
filter(week >= as.Date("2020-03-11"))
Use model 3 for causal impact
impact <- CausalImpact(bsts.model = model3,
post.period.response = dat_dash_causal_post$dash, alpha = 0.05)
plot(impact)
summary(impact)
summary(impact, "report")