-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtslm.R
56 lines (44 loc) · 1.98 KB
/
tslm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Rossmann drugstores competition in Kaggle, 14 October 2015
# This scripts fit a linear model to each store time series (Sales) including trend and seasonality components
# We use tslm() and forecast() in the "forecast" R package
library(data.table)
library(forecast)
library(plyr)
train <- fread("./train.csv",stringsAsFactors = T)
test <- fread("./test.csv",stringsAsFactors = T)
store <- fread("./store.csv",stringsAsFactors = T)
train <- merge(train,store,by="Store")
test <- merge(test,store,by="Store")
train[,Date:=as.Date(Date)]
test[,Date:=as.Date(Date)]
train <- train[order(Store,Date)]
test <- test[order(Store,Date)]
ts_fit = function(x) {
Sales <- ts(x$Sales, frequency = 365)
DayOfWeek <- x$DayOfWeek
Open <- x$Open
Promo <- x$Promo
StateHoliday <- x$StateHoliday
SchoolHoliday <- x$SchoolHoliday
fit <- tslm(Sales ~ trend + season + DayOfWeek + Open + Promo + StateHoliday + SchoolHoliday)
return(fit)
}
out <- dlply(train, .(Store), ts_fit)
ts_forecast = function(x,y){
index <- x$Store[1]
fitt <- y[[index]]
return(data.frame(forecast(fitt, newdata = data.frame(DayOfWeek = x$DayOfWeek,
Open = x$Open, Promo = x$Promo,
StateHoliday = x$StateHoliday,
SchoolHoliday = x$SchoolHoliday))))
}
predictions <- ddply(test, .(Store), ts_forecast, out)
predictions$Point.Forecast <- ifelse(predictions$Point.Forecast < 0, 0, predictions$Point.Forecast)
Avg_Sales <- train[,.(AS = mean(Sales,na.rm=T)),.(Store,DayOfWeek)]
test <- merge(test,Avg_Sales,by=c("Store","DayOfWeek"))
test <- test[order(Store,Date)]
test[,FPPredictions:=Open * predictions$Point.Forecast]
test[,FPredictions:=ifelse(is.na(predictions$Point.Forecast),AS,predictions$Point.Forecast)]
results <- data.frame(Id=test$Id, Sales=test$FPredictions)
results <- results[order(results$Id),]
write.csv(results, "./Rossmann_TSLM.csv",row.names=F)