diff --git a/DESCRIPTION b/DESCRIPTION index 2930c75..9c62a60 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: finnts Title: Microsoft Finance Time Series Forecasting Framework -Version: 0.6.0.9016 +Version: 0.6.0.9017 Authors@R: c(person(given = "Mike", family = "Tokic", diff --git a/NEWS.md b/NEWS.md index 87504ff..d2beb0a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# finnts 0.6.0.9016 (development version) +# finnts 0.6.0.9017 (development version) ## Improvements - TimeGPT Integration diff --git a/R/agent_summarize_models.R b/R/agent_summarize_models.R index 678f887..7abc185 100644 --- a/R/agent_summarize_models.R +++ b/R/agent_summarize_models.R @@ -119,7 +119,8 @@ summarize_models <- function(agent_info, "theta" = summarize_model_theta, "xgboost" = summarize_model_xgboost, "nnetar-xregs" = summarize_model_nnetar, - "prophet-xregs" = summarize_model_prophet + "prophet-xregs" = summarize_model_prophet, + "timegpt" = summarize_model_timegpt ) # Setup parallel processing @@ -4807,7 +4808,6 @@ summarize_model_snaive <- function(wf) { } - #' Summarize an STLM-ARIMA Workflow #' #' Extracts and summarizes key information from a fitted STLM-ARIMA workflow. @@ -7761,6 +7761,125 @@ summarize_model_xgboost <- function(wf) { ) } +#' Summarize a TimeGPT Workflow +#' +#' Extracts and summarizes key information from a fitted TimeGPT workflow, +#' including model arguments, engine parameters, and api configuration details. +#' +#' @param wf A fitted tidymodels workflow containing a timegpt_model() +#' model with engine 'timegpt_model'. +#' +#' @return A tibble with columns: section, name, value containing model details. +#' +#' #' @noRd +summarize_model_timegpt <- function(wf) { + digits <- 6 + scan_depth <- 6 + + if (!inherits(wf, "workflow")) stop("summarize_model_timegpt() expects a tidymodels workflow.") + fit <- try(workflows::extract_fit_parsnip(wf), silent = TRUE) + if (inherits(fit, "try-error") || is.null(fit$fit)) stop("Workflow appears untrained. Fit it first.") + + spec <- fit$spec + engine <- if (is.null(spec$engine)) "" else spec$engine + if (!identical(engine, "timegpt_model")) { + stop("summarize_model_timegpt() only supports timegpt_model() with engine 'timegpt_model'.") + } + + .format_numeric <- function(x) { + if (is.character(x) && (x == "auto" || x == "")) { + return(x) + } + x_num <- suppressWarnings(as.numeric(x)) + if (!is.na(x_num) && is.finite(x_num)) { + rounded <- round(x_num, digits) + if (rounded == floor(rounded)) { + return(as.character(as.integer(rounded))) + } + return(format(rounded, scientific = FALSE, trim = TRUE)) + } + as.character(x) + } + + # Predictors / outcomes + mold <- try(workflows::extract_mold(wf), silent = TRUE) + preds_tbl <- .extract_predictors(mold) + outs_tbl <- .extract_outcomes(mold) + + # Flag external regressors (non-date predictors) + xreg_names <- character() + if (!inherits(mold, "try-error") && !is.null(mold$predictors) && ncol(mold$predictors) > 0) { + is_date <- vapply(mold$predictors, function(col) inherits(col, c("Date", "POSIXct", "POSIXt")), logical(1)) + xreg_names <- names(mold$predictors)[!is_date] + if (length(xreg_names) > 0 && nrow(preds_tbl) > 0) { + preds_tbl$value[preds_tbl$name %in% xreg_names] <- + paste0(preds_tbl$value[preds_tbl$name %in% xreg_names], " [regressor]") + } + } + + # Recipe steps + # no significant step + steps_tbl <- tibble::tibble(section = character(), name = character(), value = character()) + + arg_names <- c("finetune_steps", "finetune_depth") + arg_vals <- vapply(arg_names, function(nm) .format_numeric(.chr1(spec$args[[nm]])), FUN.VALUE = character(1)) + args_tbl <- tibble::tibble(section = "model_arg", name = arg_names, value = arg_vals) + + # Engine params + eng_tbl <- tibble::tibble(section = character(), name = character(), value = character()) + + # Locate the timegpt fit object + is_timegpt_fit <- function(o) { + inherits(o, "timegpt_model_fit") || (is.list(o) && !is.null(o$train_data) && !is.null(o$forecast_horizon)) + } + timegpt_obj <- .find_obj(fit$fit, is_timegpt_fit, scan_depth) + if (is.null(timegpt_obj) && is_timegpt_fit(fit$fit)) timegpt_obj <- fit$fit + + if (!is.null(timegpt_obj)) { + # Training data stats - external regressors + if (!is.null(timegpt_obj$train_data)) { + train_data <- timegpt_obj$train_data + original_cols <- colnames(train_data)[grepl("_original", colnames(train_data))] + if (length(original_cols) > 0) { + clean_names <- gsub("_original$", "", original_cols) # Remove _original suffix + eng_tbl <- dplyr::bind_rows(eng_tbl, .kv("engine_param", "n_external_regressors", as.character(length(clean_names)))) + eng_tbl <- dplyr::bind_rows(eng_tbl, .kv("engine_param", "external_regressors", paste(clean_names, collapse = ", "))) + } else { + eng_tbl <- dplyr::bind_rows(eng_tbl, .kv("engine_param", "n_external_regressors", "0")) + } + } + + # API Details + api_key_set <- !is.null(getOption("NIXTLA_API_KEY")) || nzchar(Sys.getenv("NIXTLA_API_KEY")) + base_url <- getOption("NIXTLA_BASE_URL") %||% Sys.getenv("NIXTLA_BASE_URL") + if (api_key_set) { + if (!is.null(base_url) && nzchar(base_url)) { + is_azure <- grepl("^https?://[^/]*azure", base_url, ignore.case = TRUE) + eng_tbl <- dplyr::bind_rows(eng_tbl, .kv("engine_param", "api_endpoint_type", ifelse(is_azure, "azure", "nixtla_default"))) + } else { + eng_tbl <- dplyr::bind_rows(eng_tbl, .kv("engine_param", "api_endpoint_type", "nixtla_default")) + } + } else { + eng_tbl <- dplyr::bind_rows(eng_tbl, .kv("engine_param", "api_configuration", "failed")) + } + + # Model variant (long-horizon flag) + if (!is.null(timegpt_obj$forecast_horizon) && !is.null(timegpt_obj$frequency)) { + date_type <- get_date_type(timegpt_obj$frequency) + if (!is.null(date_type) && is_long_horizon_forecast(timegpt_obj$forecast_horizon, date_type)) { + eng_tbl <- dplyr::bind_rows(eng_tbl, .kv("engine_param", "model_variant", "timegpt-1-long-horizon")) + } + } + } + + .assemble_output( + preds_tbl, outs_tbl, steps_tbl, args_tbl, eng_tbl, + class(fit$fit)[1], engine, + unquote_values = FALSE, digits = digits + ) +} + + #' Convert various R objects to a single character string #' @noRd .chr1 <- function(x) { diff --git a/R/train_models.R b/R/train_models.R index 7ff4854..ba1c477 100644 --- a/R/train_models.R +++ b/R/train_models.R @@ -141,6 +141,8 @@ train_models <- function(run_info, global_model_list <- list_global_models() fs_model_list <- list_multivariate_models() + # Remove timegpt from feature selection list as it doesn't use traditional features, prevents Combo getting dropped from feature selection + fs_model_list <- setdiff(fs_model_list, "timegpt") if (sum(model_workflow_list %in% global_model_list) == 0 & run_global_models) { run_global_models <- FALSE