#install.packages("vars")
library(dplyr)
library(vars)
library(tidyverse)
library(readxl)
library(urca)
library(car)
#install.packages(foreca)
#install.packages("forecast")
library(forecast)
dev.off()
# A function for black theme:
my_theme <- function(...) {
theme(
axis.line = element_blank(),
axis.text.x = element_text(color = "white", lineheight = 0.9),
axis.text.y = element_text(color = "white", lineheight = 0.9),
axis.ticks = element_line(color = "white", size = 0.2),
axis.title.x = element_text(color = "white", margin = margin(0, 10, 0, 0)),
axis.title.y = element_text(color = "white", angle = 90, margin = margin(0, 10, 0, 0)),
axis.ticks.length = unit(0.3, "lines"),
legend.background = element_rect(color = NA, fill = "gray10"),
legend.key = element_rect(color = "white", fill = "gray10"),
legend.key.size = unit(1.2, "lines"),
legend.key.height = NULL,
legend.key.width = NULL,
legend.text = element_text(color = "white"),
legend.title = element_text(face = "bold", hjust = 0, color = "white"),
legend.text.align = NULL,
legend.title.align = NULL,
legend.direction = "vertical",
legend.box = NULL,
panel.background = element_rect(fill = "gray10", color = NA),
panel.border = element_blank(),
panel.grid.major = element_line(color = "grey35"),
panel.grid.minor = element_line(color = "grey20"),
panel.spacing = unit(0.5, "lines"),
strip.background = element_rect(fill = "grey30", color = "grey10"),
strip.text.x = element_text(color = "white"),
strip.text.y = element_text(color = "white", angle = -90),
plot.background = element_rect(color = "gray10", fill = "gray10"),
plot.title = element_text(color = "white", hjust = 0, lineheight = 1.25,
margin = margin(2, 2, 2, 2)),
plot.subtitle = element_text(color = "white", hjust = 0, margin = margin(2, 2, 2, 2)),
plot.caption = element_text(color = "white", hjust = 0),
plot.margin = unit(rep(1, 4), "lines"))
}
#Loading data
data <- read.csv("D:/Techcombank/data1.csv", sep = ";", header = TRUE)
data.frame(data)
data %>% head()
data <- na.omit(data)
#plot mutivariate data plot
scatterplotMatrix(data[2:4])
dev.off()
plot(data$avg_bal, data$SLGD)
#calculating Correlations for Multivariate DAta
cor.test(data$avg_bal, data$SLGD)
cor.test(data$avg_bal, data$SLKH)
cor.test(data$SLKH, data$SLGD)
#Building a VAR Model
#Vector Autoregression (VAR) is a multivariate forecasting
#algorithm that is used when two or more time series influence each other
#1.Analyze the time series characteristics
#2.Test for causation amongst the time series
#3.Test for stationarity
#4.Transform the series to make it stationary, if needed
#5.Find optimal order (p)
#6.Prepare training and test datasets
#7.Train the model
#8.Roll back the transformations, if any.
#9.Evaluate the model using test set
#10.Forecast to future
#1.Analyze the time series characteristics
data %>% str()
data$MONTHID <- as.character(data$MONTHID)
data$time <- as.Date(data$Monthid,format = "%m/%d/%Y")
data$log.avgbal <- log(data$avg_bal)
data$log.SLGD <- log(data$SLGD)
data$log.SLHD <- log(data$SLHD)
data$log.SLKH <- log(data$SLKH)
data %>% head()
# Visualize time series:
data %>% ggplot(aes(time, avg_bal, series = "red")) +
geom_line() +
my_theme() +
scale_color_manual(values = c("red", "red")) +
labs(x = NULL, y = NULL,
title = "visualization")
#2.Test for causation amongst the time series
# AUTOMATICALLY SEARCH FOR THE MOST SIGNIFICANT RESULT
for (i in 1:4)
{
cat("LAG =", i)
print(causality(VAR(data, p = i, type = "const"), cause = "AVG_BAL")$Granger)
}
#3.Test for stationarity
##Perform Augmented Dicket Fuller
ur.df(data$avg_bal, type = "none") %>%
summary()
ur.df(data$SLGD, type = "none") %>%
summary()
ur.df(data$SLKH, type = "none") %>%
summary()
#test-statistics more than critical values => non-stationary
###difference
ur.df(diff(data$avg_bal, differences = 2), type = "none") %>%
summary()
ur.df(diff(data$SLGD, differences = 2), type = "none") %>%
summary()
ur.df(diff(data$SLHD, differences = 2), type = "none") %>%
summary()
ur.df(diff(data$SLKH, differences = 2), type = "none") %>%
summary()
dif.avgbal <- diff(data$avg_bal, differences = 2)
dif.SLGD <- diff(data$SLGD, differences = 2)
dif.SLHD <- diff(data$SLHD, differences = 2)
dif.SLKH <- diff(data$SLKH, differences = 2)
df2 <- data.frame(dif.avgbal = dif.avgbal, dif.SLGD = dif.SLGD, dif.SLKH = dif.SLKH)
df2 %>% head()
df2 <- ts(df2, frequency = 12, start = c(2017,3))
df2$Monthid <- as.Date(df2$Month, "%d-%m-%y")
str(df2)
df2 %>% str()
df2$Monthid <- as.Date(df2$Month, format = "%m/%d/%Y")
df2 %>% head()
dev.off()
df2 %>% ggplot(aes(Monthid, dif.SLKH)) +
geom_line()
VARselect(df2$dif.avgbal, lag.max = 5, type = "const")
VARselect(df2$dif.SLGD, lag.max = 5, type = "const")
VARselect(df2$dif.SLHD, lag.max = 5, type = "const")
VARselect(df2$dif.SLKH, lag.max = 5, type = "const")
##Plot ACF
acf(df2$dif.avgbal)
acf(df2$dif.SLGD)
acf(df2$dif.SLHD)
acf(df2$dif.SLKH)
#Var estimate
str(df2)
df <- cbind(df2$Monthid, df2$dif.avgbal, df2$dif.SLGD, df2$dif.SLKH)
df$month <- ts(start = c(2017,3), end = c(2019, 11), frequency = 1)
varp <- VAR(df, p = 2, type = "const", season = 4)
varp
lmp <- varp$varresult
summary(lmp)
#install.packages("stargazer")
library(stargazer)
stargazer(lmp$y1, lmp$y2, lmp$y3, type = "text", dep.var.labels.include = FALSE)
#plot residuals and their ACF and PACF
plot(varp$varresult)
#Granger causility
causality(varp, cause = "y1")
causality(varp, cause = "y2")
causality(varp, cause = "y3")
causality(varp, cause = "y4")
varp.f <- predict(varp, n.ahead = 6, ci = 0.95)
data.frame(varp.f)
par(mar=c(1,1,1,1))
plot(varp.f, names = "y1")
fanchart(varp.f, names = "y4")
varp.f %>% head()
#Generate impulse funtion
irf.avgbal <- irf(varp, impulse = "y1", response = "y4",
n.ahead = 12, boot = TRUE)
plot(irf.avgbal, ylab = "avgbal", main = "Shock from SLGD")
head(varp.f)
#undifference a time series
df3 <- diffinv(df2$dif.avgbal, xi = 0.5)
df3 %>% head()
#Backtest result
Result Accuracy: MAPE 0.0025

No comments:
Post a Comment