Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 187 additions & 34 deletions R/final_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,7 @@ final_models <- function(run_info,
run_local_models <- prev_log_df$run_local_models
run_ensemble_models <- prev_log_df$run_ensemble_models

if ((length(current_combo_list_final) == 0 & length(prev_combo_list) > 0) | sum(colnames(prev_log_df) %in% "weighted_mape")) {

if (sum(colnames(prev_log_df) %in% "weighted_mape")) {
# check if input values have changed
current_log_df <- tibble::tibble(
average_models = average_models,
Expand Down Expand Up @@ -315,8 +314,7 @@ final_models <- function(run_info,
final_model_list <- c(local_model_list, global_model_list)

# simple model averaging
if (average_models & length(final_model_list) > 1 & !best_model_check) {

if (average_models & length(final_model_list) > 1) {
# create model combinations list
model_combinations <- tibble::tibble()

Expand Down Expand Up @@ -360,7 +358,6 @@ final_models <- function(run_info,
.noexport = NULL
) %op%
{

# get list of models to average
model_list <- strsplit(x, "_")[[1]]

Expand Down Expand Up @@ -622,39 +619,195 @@ final_models <- function(run_info,
}

#' Create prediction intervals
#'
#' #This function calculates prediction intervals using both conformal prediction and z-score methods,
# adjusting for different time horizons in forecasting data.
#' @param fcst_tbl forecast table to use to create prediction intervals
#' @param train_test_split train test split
#' @param conf_levels confidence levels for prediction intervals (default: c(0.80, 0.95))
#' @param split_ratio split ratio for calibration and test sets (default: 0.8)
#'
#' @return data frame with prediction intervals
#' @noRd
create_prediction_intervals <- function(fcst_tbl,
train_test_split) {
back_test_id <- train_test_split %>%
dplyr::filter(Run_Type == "Back_Test") %>%
dplyr::select(Train_Test_ID) %>%
dplyr::pull(Train_Test_ID)

prediction_interval_tbl <- fcst_tbl %>%
dplyr::filter(Train_Test_ID %in% back_test_id) %>%
dplyr::mutate(Residual = Target - Forecast) %>%
dplyr::group_by(Combo, Model_ID) %>%
dplyr::summarise(Residual_Std_Dev = sd(Residual, na.rm = TRUE)) %>%
dplyr::ungroup()

final_tbl <- fcst_tbl %>%
dplyr::left_join(prediction_interval_tbl,
by = c("Model_ID", "Combo")
) %>%
dplyr::mutate(
lo_80 = ifelse(Train_Test_ID == 1, Forecast - (1.28 * Residual_Std_Dev), NA),
lo_95 = ifelse(Train_Test_ID == 1, Forecast - (1.96 * Residual_Std_Dev), NA),
hi_80 = ifelse(Train_Test_ID == 1, Forecast + (1.28 * Residual_Std_Dev), NA),
hi_95 = ifelse(Train_Test_ID == 1, Forecast + (1.96 * Residual_Std_Dev), NA)
) %>%
dplyr::select(-Residual_Std_Dev)
create_prediction_intervals <- function(fcst_tbl, train_test_split, conf_levels = c(0.80, 0.95), split_ratio = 0.9) {
# Step 1: Set up logging

return(final_tbl)

tryCatch({


# Step 2: Filter and prepare data
back_test_ids <- train_test_split %>%
dplyr::filter(Run_Type == "Back_Test") %>%
dplyr::pull(Train_Test_ID)

fcst_tbl_filtered <- fcst_tbl %>%
dplyr::filter(Train_Test_ID %in% back_test_ids) %>%
dplyr::arrange(Date, Horizon)


results <- list()

# Step 3: Process each combination of Combo and Model_ID
for (combo in unique(fcst_tbl_filtered$Combo)) {
for (model_id in unique(fcst_tbl_filtered$Model_ID[fcst_tbl_filtered$Combo == combo])) {

combo_model_data <- fcst_tbl_filtered %>%
dplyr::filter(Combo == combo, Model_ID == model_id)

# Step 4: Split data into calibration and test sets
n <- nrow(combo_model_data)
split_index <- ceiling(split_ratio * n)
calibration_set <- combo_model_data[1:split_index, ]
test_set <- combo_model_data[(split_index + 1):n, ]

# Step 5: Calculate Z-scores using calibration data
residuals <- calibration_set$Target - calibration_set$Forecast
residual_std_dev <- sd(residuals, na.rm = TRUE)

z_scores <- c(qnorm((1 + conf_levels[1]) / 2), qnorm((1 + conf_levels[2]) / 2))
z_vals <- z_scores * residual_std_dev

# Rolling window implementation to catch more uncertainty
# Step 6: Apply adaptive rolling window conformal method per horizon
horizon_results <- purrr::map_dfr(unique(calibration_set$Horizon), function(h) {
horizon_data <- calibration_set %>%
dplyr::filter(Horizon >= h & Horizon <= h + 2) %>%
dplyr::arrange(Date)

n_horizon <- nrow(horizon_data)

# Dynamic window size based on data variance
min_window_size <- max(3, round(n_horizon * 0.1))
window_size <- min(min_window_size, n_horizon - 1)

# Collect all residuals from rolling windows
residuals <- numeric()

for (i in 1:(n_horizon - window_size + 1)) {
window_data <- horizon_data[i:(i + window_size - 1), ]
current_residuals <- window_data$Target - window_data$Forecast
residuals <- c(residuals, current_residuals) # To make sure it changes the variable out of the loop scope
}
# Calculate quantiles using all collected residuals
q_vals <- sapply(conf_levels, function(cl) {
alpha <- 1 - cl
quantile(abs(residuals), probs = 1 - alpha / 2, na.rm = TRUE)
})

dplyr::tibble(
Combo = combo,
Model_ID = model_id,
Horizon = h,
q_val_80 = q_vals[1],
q_val_95 = q_vals[2]
)
})

# Add z-values to horizon_results
horizon_results <- horizon_results %>%
dplyr::mutate(
z_val_80 = z_vals[1],
z_val_95 = z_vals[2]
)

# Step 7: Handle missing horizons
max_horizon <- max(horizon_results$Horizon)
max_horizon_values <- horizon_results %>%
dplyr::filter(Horizon == max_horizon) %>%
dplyr::select(q_val_80, q_val_95, z_val_80, z_val_95)

# Add fallback values for horizons not in calibration data
all_horizons <- unique(combo_model_data$Horizon)
missing_horizons <- setdiff(all_horizons, horizon_results$Horizon)

if (length(missing_horizons) > 0) {


fallback_results <- purrr::map_dfr(missing_horizons, function(h) {
dplyr::tibble(
Combo = combo,
Model_ID = model_id,
Horizon = h,
q_val_80 = max_horizon_values$q_val_80,
q_val_95 = max_horizon_values$q_val_95,
z_val_80 = max_horizon_values$z_val_80,
z_val_95 = max_horizon_values$z_val_95
)
})

horizon_results <- dplyr::bind_rows(horizon_results, fallback_results)
}

# Step 8: Calculate coverage on test set
test_set_with_intervals <- test_set %>%
dplyr::left_join(horizon_results, by = c("Combo", "Model_ID", "Horizon")) %>%
dplyr::mutate(
covered_80_conf = Target >= (Forecast - q_val_80) & Target <= (Forecast + q_val_80),
covered_95_conf = Target >= (Forecast - q_val_95) & Target <= (Forecast + q_val_95),
covered_80_z = Target >= (Forecast - z_val_80) & Target <= (Forecast + z_val_80),
covered_95_z = Target >= (Forecast - z_val_95) & Target <= (Forecast + z_val_95)
)

coverage <- test_set_with_intervals %>%
dplyr::summarise(
coverage_conf_80 = mean(covered_80_conf, na.rm = TRUE),
coverage_conf_95 = mean(covered_95_conf, na.rm = TRUE),
coverage_z_80 = mean(covered_80_z, na.rm = TRUE),
coverage_z_95 = mean(covered_95_z, na.rm = TRUE)
)


# Step 9: Store results
results[[length(results) + 1]] <- horizon_results %>%
dplyr::mutate(
coverage_conf_80 = coverage$coverage_conf_80,
coverage_conf_95 = coverage$coverage_conf_95,
coverage_z_80 = coverage$coverage_z_80,
coverage_z_95 = coverage$coverage_z_95
)
# Step 10: Create calibration plots (function not provided in the original code)

}

}

# Step 11: Combine all results
results <- do.call(rbind, results)

fcst_tbl <- fcst_tbl %>%
dplyr::left_join(results, by = c("Combo", "Model_ID", "Horizon")) %>%
dplyr::mutate(
lo_80 = Forecast - pmax(z_val_80, q_val_80, na.rm = TRUE),
hi_80 = Forecast + pmax(z_val_80, q_val_80, na.rm = TRUE),
lo_95 = Forecast - pmax(z_val_95, q_val_95, na.rm = TRUE),
hi_95 = Forecast + pmax(z_val_95, q_val_95, na.rm = TRUE),
method_80 = ifelse(z_val_80 > q_val_80, "Z-Score", "Conformal"),
method_95 = ifelse(z_val_95 > q_val_95, "Z-Score", "Conformal"),
coverage_80 = pmax(coverage_z_80, coverage_conf_80, na.rm = TRUE),
coverage_95 = pmax(coverage_z_95, coverage_conf_95, na.rm = TRUE)
)


# Step 14: Save results
filename <- paste0("final_forecast_table/forecast_table_", format(Sys.time(), "%Y%m%d_%H%M%S"), ".csv")
write.csv(fcst_tbl, file = filename, row.names = FALSE)

}, error = function(e) {

fcst_tbl <- dplyr::mutate(fcst_tbl,
lo_80 = NA,
hi_80 = NA,
lo_95 = NA,
hi_95 = NA,
method_80 = NA,
method_95 = NA,
coverage_80 = NA,
coverage_95 = NA)
return(fcst_tbl)

})

return(fcst_tbl)
}

#' Convert weekly forecast down to daily
Expand Down Expand Up @@ -691,7 +844,7 @@ convert_weekly_to_daily <- function(fcst_tbl,
) %>%
dplyr::select(
Combo_ID, Model_ID, Model_Name, Model_Type, Recipe_ID, Train_Test_ID, Hyperparameter_ID,
Best_Model, Combo, Horizon, Date, Date_Day, Target, Forecast, lo_95, lo_80, hi_80, hi_95
Best_Model, Combo, Horizon, Date, Date_Day, Target, Forecast, lo_95, lo_80, hi_80, hi_95, coverage_80, coverage_95
)

return(daily_tbl)
Expand All @@ -701,7 +854,7 @@ convert_weekly_to_daily <- function(fcst_tbl,
final_tbl <- fcst_tbl %>%
dplyr::select(
Combo_ID, Model_ID, Model_Name, Model_Type, Recipe_ID, Train_Test_ID, Hyperparameter_ID,
Best_Model, Combo, Horizon, Date, Target, Forecast, lo_95, lo_80, hi_80, hi_95
Best_Model, Combo, Horizon, Date, Target, Forecast, lo_95, lo_80, hi_80, hi_95, coverage_80, coverage_95
)
}

Expand Down