Skip to content

Error with Relative effect  #70

@quynhchau4

Description

@quynhchau4

I'm running into problems with the relative effect when making the forecast. It always returns much bigger than expected, sometimes goes negative although should be positive. Please find below my code and advise how to fix it.

#install.packages("dplyr")
#install.packages("magrittr")
#install.packages("ggplot2")
#install.packages("scales")
#install.packages("tscount")
#install.packages("mgcv")
#install.packages("CausalImpact")
#install.packages("changepoint")
#install.packages("bcp")
#install.packages("timetk")
#install.packages("xts")
#install.packages("zoo")
#install.packages("tidyverse")

library(dplyr) # For data manipulation
library(magrittr) # For the '%>%' pipe operator
library(ggplot2) # For producing plots
library(scales) # For making plot axes pretty
library(tscount) # For simulating time series count data
library(mgcv) # For fitting generalised additive models
library(CausalImpact) # For fitting Byesian structural time series models for causal inference
library(changepoint) # For fitting changepoint models
library(bcp) # For fitting Bayesian changepoint models
library(timetk)
library(tidyr)
library(xts)
library(zoo)
library(tidyverse)

please input your absolute directory path of "treatment_vs_control.csv" by follow 20230730.csv

data <- read.csv("C:\Users\quynh\Downloads\Task1\DASH\20230730.csv")
data$date <- as.Date(data$date, format="%Y/%m/%d")
#format(data$date, "%d/%m/%Y")
data <- data %>%
mutate(date = as.Date(date))

Visual plot to see data trend

ggplot(data, aes(x = date, y = dash))+
geom_point(alpha= 0.3) +
labs(title = "dash price data from 2019 to 2020",
x = "date",
y = "dash Price")

Convert data to aggregation by week

data_dash_wk <- data %>%
filter(date > as.Date("2019-01-01")) %>%
group_by(week = cut(date, "week")) %>%
summarise(dash = mean(dash, na.rm = TRUE)) %>%
mutate(week = as.Date(as.character(week)))

Create row index

data_dash_wk <- data_dash_wk %>%
mutate(index = row_number())

Visual plot to see data trend

ggplot(data_dash_wk, aes(x = week, y = dash))+
geom_vline(xintercept = as.Date("2020-03-11"), color = "blue", lty = "dashed") +
geom_point(color = "red") +
geom_line(alpha = 0.4) +
labs(title = "dash data from Jan 2019 to end June 2020",
subtitle = "dash data declared on 11th March 2020 (dotted blue line)",
x = "date",
y = "dash")

Post Intervention Period is filled with NA

data_dash_wk_causal <- data_dash_wk %>%
mutate(dash = replace(dash, week >= as.Date("2020-03-11"), NA))

Create ts zoo data

ts_dash_wk <- zoo(data_dash_wk_causal$dash, data_dash_wk_causal$week)

plot(ts_dash_wk)

Model 3

ss3 <- list()

Semi Local trend, weekly-seasonal

ss3 <- AddSemilocalLinearTrend(ss3, ts_dash_wk)

Add weekly seasonal

ss3 <- AddSeasonal(ss3, ts_dash_wk, nseasons = 52)
model3 <- bsts(ts_dash_wk,
state.specification = ss3,
niter = 1500,
burn = 500)
plot(model3, main = "Model 3")
plot(model3, "components")

Model 3

plot(model3$state.specification[[2]], model3,ylim = c(-6000,6000),
ylab = "Distribution", xlab = "Date")
par(new=TRUE)
plot(components3$Date, components3$Seasonality, col = "magenta", type = "l", ylim = c(-6000,6000),
ylab = "Distribution", xlab = "Date")
abline(h = 2000, col = "red")
abline(h = -2000, col = "red")

components3 = cbind.data.frame(
colMeans(model3$state.contributions[-(1:500),"trend",]),
colMeans(model3$state.contributions[-(1:500),"seasonal.52.1",]),
as.Date(time(ts_dash_wk)))
names(components3) = c("Trend", "Seasonality", "Date")

components3 = cbind.data.frame(
colMeans(model3$state.contributions[-(1:500),"trend",]),
colMeans(model3$state.contributions[-(1:500),"seasonal.52.1",]),
as.Date(time(ts_dash_wk)))
names(components3) = c("Trend", "Seasonality", "date")

components3 = pivot_longer(components3, cols =c("Trend","Seasonality"))

names(components3) = c("date", "Component", "Value")

Causal impact of dash and Covid-19

pre.period <- as.Date(c("2019-01-01", "2020-03-11"))
post.period <- as.Date(c("2020-03-11", "2020-06-30"))

Obtain post period data

dat_dash_causal_post <- data_dash_wk %>%
filter(week >= as.Date("2020-03-11"))

Use model 3 for causal impact

impact <- CausalImpact(bsts.model = model3,
post.period.response = dat_dash_causal_post$dash, alpha = 0.05)
plot(impact)

summary(impact)

summary(impact, "report")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions