From e797cd4cda03b1ebf5fec049151925114890a6d7 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 7 Jul 2025 15:13:09 -0700 Subject: [PATCH 01/19] implementation complete --- NAMESPACE | 11 +++++++ R/quantile-pred.R | 76 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index fb1156e5..5d0f0e2f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,13 +40,19 @@ S3method(standardize,double) S3method(standardize,factor) S3method(standardize,integer) S3method(standardize,matrix) +S3method(vec_arith,quantile_pred) +S3method(vec_arith.numeric,quantile_pred) +S3method(vec_arith.quantile_pred,numeric) S3method(vec_cast,double.hardhat_frequency_weights) S3method(vec_cast,double.hardhat_importance_weights) S3method(vec_cast,hardhat_frequency_weights.hardhat_frequency_weights) S3method(vec_cast,hardhat_importance_weights.hardhat_importance_weights) S3method(vec_cast,integer.hardhat_frequency_weights) +S3method(vec_cast,quantile_pred.quantile_pred) +S3method(vec_math,quantile_pred) S3method(vec_ptype2,hardhat_frequency_weights.hardhat_frequency_weights) S3method(vec_ptype2,hardhat_importance_weights.hardhat_importance_weights) +S3method(vec_ptype2,quantile_pred.quantile_pred) S3method(vec_ptype_abbr,hardhat_frequency_weights) S3method(vec_ptype_abbr,hardhat_importance_weights) S3method(vec_ptype_abbr,quantile_pred) @@ -145,3 +151,8 @@ importFrom(stats,model.matrix) importFrom(stats,terms) importFrom(tibble,as_tibble) importFrom(tibble,tibble) +importFrom(vctrs,vec_arith) +importFrom(vctrs,vec_arith.numeric) +importFrom(vctrs,vec_cast) +importFrom(vctrs,vec_math) +importFrom(vctrs,vec_ptype2) diff --git a/R/quantile-pred.R b/R/quantile-pred.R index 4fc4589b..458f6fb2 100644 --- a/R/quantile-pred.R +++ b/R/quantile-pred.R @@ -208,3 +208,79 @@ check_quantile_level_values <- function(levels, arg, call) { } invisible(TRUE) } + + +# vctrs behaviours -------------------------------------------------------- + +#' @importFrom vctrs vec_ptype2 vec_cast +#' @importFrom hardhat extract_quantile_levels +#' @export +#' @keywords internal +vec_ptype2.quantile_pred.quantile_pred <- function( + x, y, ..., x_arg = "", y_arg = "", call = caller_env() +) { + if (all(extract_quantile_levels(y) %in% extract_quantile_levels(x))) { + return(x) + } + if (all(extract_quantile_levels(x) %in% extract_quantile_levels(y))) { + return(y) + } + stop_incompatible_type( + x, y, x_arg = x_arg, y_arg = y_arg, + details = "`quantile_levels` must be compatible (a superset/subset relation)." + ) +} + +#' @export +vec_cast.quantile_pred.quantile_pred <- function(x, to, ..., x_arg = "", to_arg = "") { + x_lvls <- extract_quantile_levels(x) + to_lvls <- extract_quantile_levels(to) + x_in_to <- x_lvls %in% to_lvls + to_in_x <- to_lvls %in% x_lvls + + old_qdata <- as.matrix(x)[, x_in_to] + new_qdata <- matrix(NA, nrow = vec_size(x), ncol = length(to_lvls)) + new_qdata[, to_in_x] <- old_qdata + quantile_pred(new_qdata, quantile_levels = to_lvls) +} + + + +#' @importFrom vctrs vec_math +#' @export +#' @method vec_math quantile_pred +vec_math.quantile_pred <- function(.fn, .x, ...) { + fn <- .fn + .fn <- getExportedValue("base", .fn) + if (fn %in% c("any", "all", "prod", "sum", "cumsum", "cummax", "cummin", "cumprod")) { + cli_abort("{.fn {fn}} is not a supported operation for {.cls quantile_pred}.") + } + quantile_levels <- .x %@% "quantile_levels" + .x <- as.matrix(.x) + quantile_pred(.fn(.x), quantile_levels) +} + +#' @importFrom vctrs vec_arith vec_arith.numeric +#' @export +#' @method vec_arith quantile_pred +vec_arith.quantile_pred <- function(op, x, y, ...) { + UseMethod("vec_arith.quantile_pred", y) +} + +#' @export +#' @method vec_arith.quantile_pred numeric +vec_arith.quantile_pred.numeric <- function(op, x, y, ...) { + op_fn <- getExportedValue("base", op) + l <- vctrs::vec_recycle_common(x = x, y = y) + out <- op_fn(as.matrix(l$x), l$y) + quantile_pred(out, x %@% "quantile_levels") +} + +#' @export +#' @method vec_arith.numeric quantile_pred +vec_arith.numeric.quantile_pred <- function(op, x, y, ...) { + op_fn <- getExportedValue("base", op) + l <- vctrs::vec_recycle_common(x = x, y = y) + out <- op_fn(l$x, as.matrix(l$y)) + quantile_pred(out, y %@% "quantile_levels") +} From 0dee1f82adf8ff36fbf58694b50084605f103f63 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 7 Jul 2025 15:14:20 -0700 Subject: [PATCH 02/19] vctrs is already imported --- NAMESPACE | 5 ----- R/quantile-pred.R | 5 ----- 2 files changed, 10 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5d0f0e2f..ca83a9bb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -151,8 +151,3 @@ importFrom(stats,model.matrix) importFrom(stats,terms) importFrom(tibble,as_tibble) importFrom(tibble,tibble) -importFrom(vctrs,vec_arith) -importFrom(vctrs,vec_arith.numeric) -importFrom(vctrs,vec_cast) -importFrom(vctrs,vec_math) -importFrom(vctrs,vec_ptype2) diff --git a/R/quantile-pred.R b/R/quantile-pred.R index 458f6fb2..efd468d0 100644 --- a/R/quantile-pred.R +++ b/R/quantile-pred.R @@ -212,8 +212,6 @@ check_quantile_level_values <- function(levels, arg, call) { # vctrs behaviours -------------------------------------------------------- -#' @importFrom vctrs vec_ptype2 vec_cast -#' @importFrom hardhat extract_quantile_levels #' @export #' @keywords internal vec_ptype2.quantile_pred.quantile_pred <- function( @@ -245,8 +243,6 @@ vec_cast.quantile_pred.quantile_pred <- function(x, to, ..., x_arg = "", to_arg } - -#' @importFrom vctrs vec_math #' @export #' @method vec_math quantile_pred vec_math.quantile_pred <- function(.fn, .x, ...) { @@ -260,7 +256,6 @@ vec_math.quantile_pred <- function(.fn, .x, ...) { quantile_pred(.fn(.x), quantile_levels) } -#' @importFrom vctrs vec_arith vec_arith.numeric #' @export #' @method vec_arith quantile_pred vec_arith.quantile_pred <- function(op, x, y, ...) { From c96d0121ebd10b3885d5745a827a8870033c54ec Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 7 Jul 2025 15:29:19 -0700 Subject: [PATCH 03/19] switch back to map() --- R/quantile-pred.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/quantile-pred.R b/R/quantile-pred.R index efd468d0..302888d3 100644 --- a/R/quantile-pred.R +++ b/R/quantile-pred.R @@ -42,7 +42,7 @@ quantile_pred <- function(values, quantile_levels = double()) { rownames(values) <- NULL colnames(values) <- NULL - values <- lapply(vctrs::vec_chop(values), drop) + values <- map(vctrs::vec_chop(values), drop) new_quantile_pred(values, quantile_levels) } @@ -249,7 +249,7 @@ vec_math.quantile_pred <- function(.fn, .x, ...) { fn <- .fn .fn <- getExportedValue("base", .fn) if (fn %in% c("any", "all", "prod", "sum", "cumsum", "cummax", "cummin", "cumprod")) { - cli_abort("{.fn {fn}} is not a supported operation for {.cls quantile_pred}.") + cli::cli_abort("{.fn {fn}} is not a supported operation for {.cls quantile_pred}.") } quantile_levels <- .x %@% "quantile_levels" .x <- as.matrix(.x) From 1350550f34ba8f415c4e5ab3486976c9517de978 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 7 Jul 2025 15:29:33 -0700 Subject: [PATCH 04/19] cover various cases for median --- tests/testthat/test-quantile-pred.R | 53 +++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tests/testthat/test-quantile-pred.R b/tests/testthat/test-quantile-pred.R index b649fca1..20b7bb97 100644 --- a/tests/testthat/test-quantile-pred.R +++ b/tests/testthat/test-quantile-pred.R @@ -55,6 +55,23 @@ test_that("extract_quantile_levels", { ) }) +test_that("median for quantile_pred", { + v <- quantile_pred(matrix(1:25, 5), 3:7 / 10) + expect_identical(median(v), as.double(11:15)) # has explicit median, but dbl + + v_above_med <- quantile_pred(matrix(1:10, 2), 11:15 / 20) + expect_equal(median(v_above_med), rep(NA, 2)) + + v_below_med <- quantile_pred(matrix(1:10, 2), 5:9 / 20) + expect_equal(median(v_above_med), rep(NA, 2)) + + v4 <- quantile_pred(matrix(1:10, ncol = 1), 0.4) + expect_equal(median(v4), rep(NA, 10)) + + v5 <- quantile_pred(matrix(1:10, ncol = 1), 0.5) + expect_equal(median(v5), as.double(1:10)) +}) + test_that("quantile_pred formatting", { # multiple quantiles v <- quantile_pred(matrix(1:20, 5), 1:4 / 5) @@ -98,3 +115,39 @@ test_that("as.matrix() for quantile_pred", { expect_true(is.matrix(m)) expect_identical(m, x) }) + + + +test_that("unary math works on quantiles", { + dstn <- quantile_pred( + matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE), + 1:4 / 5 + ) + dstn2 <- quantile_pred( + log(matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE)), + 1:4 / 5 + ) + expect_identical(log(dstn), dstn2) +}) + +test_that("arithmetic works on quantiles", { + dstn <- quantile_pred( + matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE), + 1:4 / 5 + ) + dstn2 <- quantile_pred( + matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE) + 1, + 1:4 / 5 + ) + expect_identical(dstn + 1, dstn2) + expect_identical(1 + dstn, dstn2) + + dstn2 <- quantile_pred( + matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE) / 4, + 1:4 / 5 + ) + expect_identical(dstn / 4, dstn2) + expect_identical((1 / 4) * dstn, dstn2) + + expect_snapshot(error = TRUE, sum(dstn)) +}) From 1f9cedecaee0d7127cf297eda3559be87574bc78 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 7 Jul 2025 15:43:55 -0700 Subject: [PATCH 05/19] add tests for quantile_pred vec methods --- tests/testthat/_snaps/quantile-pred.md | 17 +++++++++++++++++ tests/testthat/test-quantile-pred.R | 23 +++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/tests/testthat/_snaps/quantile-pred.md b/tests/testthat/_snaps/quantile-pred.md index cb2b45e7..a02d6957 100644 --- a/tests/testthat/_snaps/quantile-pred.md +++ b/tests/testthat/_snaps/quantile-pred.md @@ -168,3 +168,20 @@ Output [1] "[1.7154]" "[0.56784]" "[1.2393]" "[2.2062]" "[0.76714]" +# arithmetic works on quantiles + + Code + sum(dstn) + Condition + Error in `vec_math()`: + ! `sum()` is not a supported operation for . + +# vec_ptype works + + Code + vec_ptype2(v1, ugly_levels) + Condition + Error in `vec_ptype2.quantile_pred.quantile_pred()`: + ! Can't combine `v1` and `ugly_levels` . + `quantile_levels` must be compatible (a superset/subset relation). + diff --git a/tests/testthat/test-quantile-pred.R b/tests/testthat/test-quantile-pred.R index 20b7bb97..2f94d123 100644 --- a/tests/testthat/test-quantile-pred.R +++ b/tests/testthat/test-quantile-pred.R @@ -151,3 +151,26 @@ test_that("arithmetic works on quantiles", { expect_snapshot(error = TRUE, sum(dstn)) }) + +test_that("vec_ptype works", { + v1 <- quantile_pred(matrix(1:20, 5), 1:4 / 5) + v2 <- quantile_pred(matrix(1:15, 5), 2:4 / 5) + expect_identical(vec_ptype2(v1, v1), vec_ptype(v1)) + expect_identical(vec_ptype2(v1, v2), vec_ptype(v1)) + expect_identical(vec_ptype2(v2, v1), vec_ptype(v1)) + + ugly_levels <- quantile_pred(matrix(1:20, 5), 1:4 / 5 + .1) + expect_snapshot(error = TRUE, vec_ptype2(v1, ugly_levels)) +}) + +test_that("vec_cast self-self works", { + qp <- quantile_pred(matrix(rnorm(20), 5), c(.2, .4, .6, .8)) + qp2 <- quantile_pred(matrix(rnorm(7), nrow = 1), 2:8 / 10) + expect_identical(vec_cast(qp, qp), qp) + expect_identical(vec_cast(qp2, qp2), qp2) + + qp_dat <- as.matrix(qp) + qp_big <- matrix(NA, nrow(qp_dat), length(2:8)) + qp_big[, c(1, 3, 5, 7)] <- qp_dat + expect_identical(vec_cast(qp, qp2), quantile_pred(qp_big, 2:8 / 10)) +}) From 09983dbf5c7ff62269b6e917e4cfdd99ac1dd12a Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 7 Jul 2025 16:44:19 -0700 Subject: [PATCH 06/19] test for `quantile_pred` type --- R/quantile-pred.R | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/R/quantile-pred.R b/R/quantile-pred.R index 302888d3..990d11f2 100644 --- a/R/quantile-pred.R +++ b/R/quantile-pred.R @@ -55,10 +55,26 @@ new_quantile_pred <- function(values = list(), quantile_levels = double()) { ) } + +#' Test if an object is a `quantile_pred` +#' +#' @param x an object +#' +#' @returns A boolean. +#' @export +#' +#' @examples +#' qp <- quantile_pred(matrix(rnorm(10), nrow = 2), 1:5 / 6) +#' is_quantile_pred(qp) +#' is_quantile_pred(1:5) +is_quantile_pred <- function(x) { + inherits(x, "quantile_pred") +} + #' @export #' @rdname quantile_pred extract_quantile_levels <- function(x) { - if (!inherits(x, "quantile_pred")) { + if (!is_quantile_pred(x)) { cli::cli_abort( "{.arg x} must be a {.cls quantile_pred} object, not {.obj_type_friendly {x}}." From 69cbe896c409ae601c6b5460f60b73b6622353d7 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Mon, 7 Jul 2025 17:00:44 -0700 Subject: [PATCH 07/19] add quantile imputation, document --- NAMESPACE | 5 + R/interpolate-quantile_pred.R | 211 ++++++++++++++++++++++++++++++++++ man/impute_quantiles.Rd | 76 ++++++++++++ man/is_quantile_pred.Rd | 22 ++++ man/snap.Rd | 24 ++++ 5 files changed, 338 insertions(+) create mode 100644 R/interpolate-quantile_pred.R create mode 100644 man/impute_quantiles.Rd create mode 100644 man/is_quantile_pred.Rd create mode 100644 man/snap.Rd diff --git a/NAMESPACE b/NAMESPACE index ca83a9bb..a09fffcb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,8 @@ S3method(run_mold,default) S3method(run_mold,default_formula_blueprint) S3method(run_mold,default_recipe_blueprint) S3method(run_mold,default_xy_blueprint) +S3method(snap,numeric) +S3method(snap,quantile_pred) S3method(standardize,array) S3method(standardize,data.frame) S3method(standardize,default) @@ -94,10 +96,12 @@ export(get_data_classes) export(get_levels) export(get_outcome_levels) export(importance_weights) +export(impute_quantiles) export(is_blueprint) export(is_case_weights) export(is_frequency_weights) export(is_importance_weights) +export(is_quantile_pred) export(model_frame) export(model_matrix) export(model_offset) @@ -120,6 +124,7 @@ export(run_forge) export(run_mold) export(scream) export(shrink) +export(snap) export(spruce_class) export(spruce_class_multiple) export(spruce_numeric) diff --git a/R/interpolate-quantile_pred.R b/R/interpolate-quantile_pred.R new file mode 100644 index 00000000..1fbc1bbb --- /dev/null +++ b/R/interpolate-quantile_pred.R @@ -0,0 +1,211 @@ +#' Restrict numeric data to the interval \[lower, upper\] +#' +#' @param x a numeric vector +#' @param lower number, the lower bound +#' @param upper number, the upper bound +#' @param ... unused +#' @export +#' +#' @return An object ot the same type as `x` +#' +#' @keywords internal +snap <- function(x, lower, upper, ...) { + UseMethod("snap") +} + +#' @export +snap.numeric <- function(x, lower, upper, ...) { + rlang::check_dots_empty() + check_number_decimal(lower) + check_number_decimal(upper) + + pmin(pmax(x, lower), upper) +} + +#' @export +snap.quantile_pred <- function(x, lower, upper, ...) { + values <- as.matrix(x) + quantile_levels <- extract_quantile_levels(x) + values <- map(vctrs::vec_chop(values), ~ snap(.x, lower, upper)) + quantile_pred(do.call(rbind, values), quantile_levels = quantile_levels) +} + + + + +#' Compute quantiles from a `quantile_pred` +#' +#' While a `hardhat::quantile_pred` describes evaluations for the inverse +#' cummulative distribution function (CDF, sometimes called the "quantile +#' function") at particular quantiles, this is not enough +#' to fully describe the distribution. For example, +#' ```r +#' p <- c(.1, .5, .9) +#' quantile_pred(matrix(qnorm(p), nrow = 1), p) +#' ``` +#' encapsulates the 10%, 50%, and 90% quantiles of the standard normal distribution. +#' But, what if we need, say, the 25% and 75% quantiles? This function imputes +#' them if possible. +#' +#' @details +#' If `probs` is simply a subset of `quantile_levels` that already exist in `x`, +#' then these will be returned (up to numeric error). Small errors are possible +#' due to difficulties matching double vectors. +#' +#' For `probs` that do not exist in `x`, these will be interpolated or +#' extrapolated as needed. The process has 3 steps. +#' +#' First, by default (`middle = "cubic"`), missing _internal_ quantiles are +#' interpolated using a cubic spline fit to the observed quantiles with +#' `stats::splinefun()`. Second, if cubic interpolation fails (or if +#' `middle = "linear"`), linear interpolation is used via `stats::approx()`. +#' Finally, missing _external_ quantiles (those outside the range of +#' `quantile_levels`) are extrapolated. This is done using a linear fit on the +#' logistic scale to the two closest tail probabilities. +#' +#' This procedure results in sorted quantiles that interpolate/extrapolate +#' smoothly, while also enforcing heavy tails if none are provided. +#' +#' Optionally, the set of quantiles can be constrained to a compact interval +#' using `lower` and/or `upper`. +#' +#' +#' @param x an object of class `quantile_pred` +#' @param probs vector. probabilities at which to evaluate the inverse CDF +#' @param lower number. lower bound for the resulting values +#' @param upper number. upper bound for the resulting values +#' @param middle character. +#' +#' @returns A matrix with `length(probs)` columns and `length(x)` rows. Each +#' row contains the inverse CDF (quantile function) given by `x`, +#' extrapolated/interpolated to `probs`. +#' @export +#' +#' @examples +#' p <- c(.1, .5, .9) +#' qp <- quantile_pred(matrix(c(qnorm(p), qexp(p)), nrow = 2, byrow = TRUE), p) +#' impute_quantiles(qp, p) +#' as.matrix(qp) # same as the imputation +#' +#' p1 <- c(.05, .25, .75, .95) +#' impute_quantiles(qp, p1) +#' rbind(qnorm(p1), qexp(p1)) # exact values, for comparison +impute_quantiles <- function( + x, + probs = seq(0, 1, 0.25), + lower = -Inf, + upper = Inf, + middle = c("cubic", "linear") +) { + if (!is_quantile_pred(x)) { + cli::cli_abort( + "{.arg x} must be a {.cls quantile_pred} object, not + {.obj_type_friendly {x}}." + ) + } + if (length(extract_quantile_levels(x)) < 2) { + cli::cli_abort( + "Quantile interpolation is not possible when fewer than 2 quantiles + are avaliable." + ) + } + if (is.unsorted(probs)) probs <- sort(probs) + check_quantile_levels(probs) + check_number_decimal(lower) + check_number_decimal(upper) + if (lower > upper) { + cli::cli_abort("`lower` must be less than `upper`.") + } + middle <- rlang::arg_match(middle) + snap(impute_quantile_internal(x, probs, middle), lower, upper) +} + +impute_quantile_internal <- function(x, tau_out, middle) { + tau <- extract_quantile_levels(x) + qvals <- as.matrix(x) + if (all(tau_out %in% tau) && !anyNA(qvals)) { + return(qvals[, match(tau_out, tau), drop = FALSE]) + } + qvals_out <- map( + vctrs::vec_chop(qvals), + ~ impute_quantiles_single(.x, tau, tau_out, middle) + ) + qvals_out <- do.call(rbind, qvals_out) + qvals_out +} + +impute_quantiles_single <- function(qvals, tau, tau_out, middle) { + qvals_out <- rep(NA, length(tau_out)) + good <- !is.na(qvals) + if (!any(good)) { + return(qvals_out) + } + qvals <- qvals[good] + tau <- tau[good] + + # in case we only have one point, and it matches something we wanted + if (length(good) < 2) { + matched_one <- tau_out %in% tau + qvals_out[matched_one] <- qvals[matched_one] + return(qvals_out) + } + + indl <- tau_out < min(tau) + indr <- tau_out > max(tau) + indm <- !indl & !indr + + if (middle == "cubic") { + method <- "cubic" + result <- tryCatch( + { + Q <- stats::splinefun(tau, qvals, method = "hyman") + quartiles <- Q(c(.25, .5, .75)) + }, + error = function(e) { + return(NA) + } + ) + } + if (middle == "linear" || any(is.na(result))) { + method <- "linear" + quartiles <- stats::approx(tau, qvals, c(.25, .5, .75))$y + } + if (any(indm)) { + qvals_out[indm] <- switch( + method, + linear = stats::approx(tau, qvals, tau_out[indm])$y, + cubic = Q(tau_out[indm]) + ) + } + if (any(indl) || any(indr)) { + qv <- data.frame( + q = c(tau, tau_out[indm]), + v = c(qvals, qvals_out[indm]) + ) %>% + dplyr::distinct(q, .keep_all = TRUE) %>% + dplyr::arrange(q) + } + if (any(indl)) { + qvals_out[indl] <- tail_extrapolate(tau_out[indl], utils::head(qv, 2)) + } + if (any(indr)) { + qvals_out[indr] <- tail_extrapolate(tau_out[indr], utils::tail(qv, 2)) + } + qvals_out +} + +logit <- function(p) { + p <- pmax(pmin(p, 1), 0) + log(p) - log(1 - p) +} + +# extrapolates linearly on the logistic scale using +# the two points nearest the tail +tail_extrapolate <- function(tau_out, qv) { + if (nrow(qv) == 1L) return(rep(qv$v[1], length(tau_out))) + x <- logit(qv$q) + x0 <- logit(tau_out) + y <- qv$v + m <- diff(y) / diff(x) + m * (x0 - x[1]) + y[1] +} diff --git a/man/impute_quantiles.Rd b/man/impute_quantiles.Rd new file mode 100644 index 00000000..c3737c0f --- /dev/null +++ b/man/impute_quantiles.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interpolate-quantile_pred.R +\name{impute_quantiles} +\alias{impute_quantiles} +\title{Compute quantiles from a \code{quantile_pred}} +\usage{ +impute_quantiles( + x, + probs = seq(0, 1, 0.25), + lower = -Inf, + upper = Inf, + middle = c("cubic", "linear") +) +} +\arguments{ +\item{x}{an object of class \code{quantile_pred}} + +\item{probs}{vector. probabilities at which to evaluate the inverse CDF} + +\item{lower}{number. lower bound for the resulting values} + +\item{upper}{number. upper bound for the resulting values} + +\item{middle}{character.} +} +\value{ +A matrix with \code{length(probs)} columns and \code{length(x)} rows. Each +row contains the inverse CDF (quantile function) given by \code{x}, +extrapolated/interpolated to \code{probs}. +} +\description{ +While a \code{hardhat::quantile_pred} describes evaluations for the inverse +cummulative distribution function (CDF, sometimes called the "quantile +function") at particular quantiles, this is not enough +to fully describe the distribution. For example, + +\if{html}{\out{
}}\preformatted{p <- c(.1, .5, .9) +quantile_pred(matrix(qnorm(p), nrow = 1), p) +}\if{html}{\out{
}} + +encapsulates the 10\%, 50\%, and 90\% quantiles of the standard normal distribution. +But, what if we need, say, the 25\% and 75\% quantiles? This function imputes +them if possible. +} +\details{ +If \code{probs} is simply a subset of \code{quantile_levels} that already exist in \code{x}, +then these will be returned (up to numeric error). Small errors are possible +due to difficulties matching double vectors. + +For \code{probs} that do not exist in \code{x}, these will be interpolated or +extrapolated as needed. The process has 3 steps. + +First, by default (\code{middle = "cubic"}), missing \emph{internal} quantiles are +interpolated using a cubic spline fit to the observed quantiles with +\code{stats::splinefun()}. Second, if cubic interpolation fails (or if +\code{middle = "linear"}), linear interpolation is used via \code{stats::approx()}. +Finally, missing \emph{external} quantiles (those outside the range of +\code{quantile_levels}) are extrapolated. This is done using a linear fit on the +logistic scale to the two closest tail probabilities. + +This procedure results in sorted quantiles that interpolate/extrapolate +smoothly, while also enforcing heavy tails if none are provided. + +Optionally, the set of quantiles can be constrained to a compact interval +using \code{lower} and/or \code{upper}. +} +\examples{ +p <- c(.1, .5, .9) +qp <- quantile_pred(matrix(c(qnorm(p), qexp(p)), nrow = 2, byrow = TRUE), p) +impute_quantiles(qp, p) +as.matrix(qp) # same as the imputation + +p1 <- c(.05, .25, .75, .95) +impute_quantiles(qp, p1) +rbind(qnorm(p1), qexp(p1)) # exact values, for comparison +} diff --git a/man/is_quantile_pred.Rd b/man/is_quantile_pred.Rd new file mode 100644 index 00000000..6d751ba0 --- /dev/null +++ b/man/is_quantile_pred.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quantile-pred.R +\name{is_quantile_pred} +\alias{is_quantile_pred} +\title{Test if an object is a \code{quantile_pred}} +\usage{ +is_quantile_pred(x) +} +\arguments{ +\item{x}{an object} +} +\value{ +A boolean. +} +\description{ +Test if an object is a \code{quantile_pred} +} +\examples{ +qp <- quantile_pred(matrix(rnorm(10), nrow = 2), 1:5 / 6) +is_quantile_pred(qp) +is_quantile_pred(1:5) +} diff --git a/man/snap.Rd b/man/snap.Rd new file mode 100644 index 00000000..b16e3b48 --- /dev/null +++ b/man/snap.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interpolate-quantile_pred.R +\name{snap} +\alias{snap} +\title{Restrict numeric data to the interval [lower, upper]} +\usage{ +snap(x, lower, upper, ...) +} +\arguments{ +\item{x}{a numeric vector} + +\item{lower}{number, the lower bound} + +\item{upper}{number, the upper bound} + +\item{...}{unused} +} +\value{ +An object ot the same type as \code{x} +} +\description{ +Restrict numeric data to the interval [lower, upper] +} +\keyword{internal} From 254fec271fbac319604e725719405f9ebbf5200a Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Tue, 8 Jul 2025 09:09:23 -0700 Subject: [PATCH 08/19] minor doc edits --- ...quantile_pred.R => impute-quantile_pred.R} | 25 ++++++++++--------- man/impute_quantiles.Rd | 23 +++++++++-------- man/snap.Rd | 2 +- 3 files changed, 26 insertions(+), 24 deletions(-) rename R/{interpolate-quantile_pred.R => impute-quantile_pred.R} (86%) diff --git a/R/interpolate-quantile_pred.R b/R/impute-quantile_pred.R similarity index 86% rename from R/interpolate-quantile_pred.R rename to R/impute-quantile_pred.R index 1fbc1bbb..9f01122c 100644 --- a/R/interpolate-quantile_pred.R +++ b/R/impute-quantile_pred.R @@ -6,7 +6,7 @@ #' @param ... unused #' @export #' -#' @return An object ot the same type as `x` +#' @return An object of the same type as `x` #' #' @keywords internal snap <- function(x, lower, upper, ...) { @@ -33,18 +33,18 @@ snap.quantile_pred <- function(x, lower, upper, ...) { -#' Compute quantiles from a `quantile_pred` +#' Impute additional quantiles from a `quantile_pred` #' #' While a `hardhat::quantile_pred` describes evaluations for the inverse #' cummulative distribution function (CDF, sometimes called the "quantile -#' function") at particular quantiles, this is not enough +#' function") at particular quantile levels, this is not enough #' to fully describe the distribution. For example, #' ```r #' p <- c(.1, .5, .9) #' quantile_pred(matrix(qnorm(p), nrow = 1), p) #' ``` -#' encapsulates the 10%, 50%, and 90% quantiles of the standard normal distribution. -#' But, what if we need, say, the 25% and 75% quantiles? This function imputes +#' encapsulates the 10%, 50%, and 90% quantile levels of the standard normal distribution. +#' But, what if we need, say, the 25% and 75% levels? This function imputes #' them if possible. #' #' @details @@ -55,19 +55,20 @@ snap.quantile_pred <- function(x, lower, upper, ...) { #' For `probs` that do not exist in `x`, these will be interpolated or #' extrapolated as needed. The process has 3 steps. #' -#' First, by default (`middle = "cubic"`), missing _internal_ quantiles are -#' interpolated using a cubic spline fit to the observed quantiles with +#' First, by default (`middle = "cubic"`), missing _internal_ quantile levels are +#' interpolated using a cubic spline fit to the observed values + quantile levels with #' `stats::splinefun()`. Second, if cubic interpolation fails (or if #' `middle = "linear"`), linear interpolation is used via `stats::approx()`. -#' Finally, missing _external_ quantiles (those outside the range of +#' Finally, missing _external_ quantile levels (those outside the range of #' `quantile_levels`) are extrapolated. This is done using a linear fit on the -#' logistic scale to the two closest tail probabilities. +#' logistic scale to the two closest tail values. #' #' This procedure results in sorted quantiles that interpolate/extrapolate -#' smoothly, while also enforcing heavy tails if none are provided. +#' smoothly, while also enforcing heavy tails beyond the range. #' -#' Optionally, the set of quantiles can be constrained to a compact interval -#' using `lower` and/or `upper`. +#' Optionally, the resulting quantiles can be constrained to a compact interval +#' using `lower` and/or `upper`. This is done after extrapolation, so it may +#' result in multiple quantile levels having the same value (a CDF with a spike). #' #' #' @param x an object of class `quantile_pred` diff --git a/man/impute_quantiles.Rd b/man/impute_quantiles.Rd index c3737c0f..412b7f73 100644 --- a/man/impute_quantiles.Rd +++ b/man/impute_quantiles.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/interpolate-quantile_pred.R \name{impute_quantiles} \alias{impute_quantiles} -\title{Compute quantiles from a \code{quantile_pred}} +\title{Impute additional quantiles from a \code{quantile_pred}} \usage{ impute_quantiles( x, @@ -31,15 +31,15 @@ extrapolated/interpolated to \code{probs}. \description{ While a \code{hardhat::quantile_pred} describes evaluations for the inverse cummulative distribution function (CDF, sometimes called the "quantile -function") at particular quantiles, this is not enough +function") at particular quantile levels, this is not enough to fully describe the distribution. For example, \if{html}{\out{
}}\preformatted{p <- c(.1, .5, .9) quantile_pred(matrix(qnorm(p), nrow = 1), p) }\if{html}{\out{
}} -encapsulates the 10\%, 50\%, and 90\% quantiles of the standard normal distribution. -But, what if we need, say, the 25\% and 75\% quantiles? This function imputes +encapsulates the 10\%, 50\%, and 90\% quantile levels of the standard normal distribution. +But, what if we need, say, the 25\% and 75\% levels? This function imputes them if possible. } \details{ @@ -50,19 +50,20 @@ due to difficulties matching double vectors. For \code{probs} that do not exist in \code{x}, these will be interpolated or extrapolated as needed. The process has 3 steps. -First, by default (\code{middle = "cubic"}), missing \emph{internal} quantiles are -interpolated using a cubic spline fit to the observed quantiles with +First, by default (\code{middle = "cubic"}), missing \emph{internal} quantile levels are +interpolated using a cubic spline fit to the observed values + quantile levels with \code{stats::splinefun()}. Second, if cubic interpolation fails (or if \code{middle = "linear"}), linear interpolation is used via \code{stats::approx()}. -Finally, missing \emph{external} quantiles (those outside the range of +Finally, missing \emph{external} quantile levels (those outside the range of \code{quantile_levels}) are extrapolated. This is done using a linear fit on the -logistic scale to the two closest tail probabilities. +logistic scale to the two closest tail values. This procedure results in sorted quantiles that interpolate/extrapolate -smoothly, while also enforcing heavy tails if none are provided. +smoothly, while also enforcing heavy tails beyond the range. -Optionally, the set of quantiles can be constrained to a compact interval -using \code{lower} and/or \code{upper}. +Optionally, the resulting quantiles can be constrained to a compact interval +using \code{lower} and/or \code{upper}. This is done after extrapolation, so it may +result in multiple quantile levels having the same value (a CDF with a spike). } \examples{ p <- c(.1, .5, .9) diff --git a/man/snap.Rd b/man/snap.Rd index b16e3b48..50ea96b8 100644 --- a/man/snap.Rd +++ b/man/snap.Rd @@ -16,7 +16,7 @@ snap(x, lower, upper, ...) \item{...}{unused} } \value{ -An object ot the same type as \code{x} +An object of the same type as \code{x} } \description{ Restrict numeric data to the interval [lower, upper] From 92a51b45f4cb1c5d5a57fe5f633fa2ae11156a38 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Tue, 8 Jul 2025 10:04:14 -0700 Subject: [PATCH 09/19] add tests for quantile imputation --- R/impute-quantile_pred.R | 3 +- tests/testthat/_snaps/impute_quantile_pred.md | 64 ++++++++++++++ tests/testthat/test-impute_quantile_pred.R | 86 +++++++++++++++++++ 3 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/_snaps/impute_quantile_pred.md create mode 100644 tests/testthat/test-impute_quantile_pred.R diff --git a/R/impute-quantile_pred.R b/R/impute-quantile_pred.R index 9f01122c..50fa4f9e 100644 --- a/R/impute-quantile_pred.R +++ b/R/impute-quantile_pred.R @@ -24,6 +24,7 @@ snap.numeric <- function(x, lower, upper, ...) { #' @export snap.quantile_pred <- function(x, lower, upper, ...) { + if (vec_size(x) == 0) return(x) values <- as.matrix(x) quantile_levels <- extract_quantile_levels(x) values <- map(vctrs::vec_chop(values), ~ snap(.x, lower, upper)) @@ -111,7 +112,7 @@ impute_quantiles <- function( ) } if (is.unsorted(probs)) probs <- sort(probs) - check_quantile_levels(probs) + check_quantile_level_values(probs, "probs", call = caller_env()) check_number_decimal(lower) check_number_decimal(upper) if (lower > upper) { diff --git a/tests/testthat/_snaps/impute_quantile_pred.md b/tests/testthat/_snaps/impute_quantile_pred.md new file mode 100644 index 00000000..8471b52e --- /dev/null +++ b/tests/testthat/_snaps/impute_quantile_pred.md @@ -0,0 +1,64 @@ +# impute_quantiles failure modes + + Code + impute_quantiles(x) + Condition + Error in `impute_quantiles()`: + ! `x` must be a object, not a double vector. + +--- + + Code + impute_quantiles(x, c(0.1, 0.5, 0.9)) + Condition + Error in `impute_quantiles()`: + ! Quantile interpolation is not possible when fewer than 2 quantiles are avaliable. + +--- + + Code + impute_quantiles(x, probs = c(-1, 0.2, 2)) + Condition + Error: + ! `probs` must be a number between 0 and 1, not the number -1. + +--- + + Code + impute_quantiles(x, lower = "a") + Condition + Error in `impute_quantiles()`: + ! `lower` must be a number, not the string "a". + +--- + + Code + impute_quantiles(x, upper = "b") + Condition + Error in `impute_quantiles()`: + ! `upper` must be a number, not the string "b". + +--- + + Code + impute_quantiles(x, lower = NULL) + Condition + Error in `impute_quantiles()`: + ! `lower` must be a number, not `NULL`. + +--- + + Code + impute_quantiles(x, lower = 2, upper = -1) + Condition + Error in `impute_quantiles()`: + ! `lower` must be less than `upper`. + +--- + + Code + impute_quantiles(x, middle = "middle") + Condition + Error in `impute_quantiles()`: + ! `middle` must be one of "cubic" or "linear", not "middle". + diff --git a/tests/testthat/test-impute_quantile_pred.R b/tests/testthat/test-impute_quantile_pred.R new file mode 100644 index 00000000..9e932fc0 --- /dev/null +++ b/tests/testthat/test-impute_quantile_pred.R @@ -0,0 +1,86 @@ +test_that("snap.numeric clamps values correctly", { + x <- c(-5, 0, 5, 10, 15) + expect_equal(snap(x, lower = 0, upper = 10), c(0, 0, 5, 10, 10)) + + x <- c(2, 4, 6) + expect_equal(snap(x, lower = 0, upper = 10), x) + + x <- numeric(0) + expect_equal(snap(x, lower = 0, upper = 10), numeric(0)) +}) + +test_that("snap.quantile_pred clamps values correctly", { + x <- quantile_pred(matrix(c(-5, 0, 5, 10, 15), nrow = 1), 1:5 / 6) + s <- snap(x, lower = 0, upper = 10) + expect_s3_class(s, "quantile_pred") + expect_equal(extract_quantile_levels(s), 1:5 / 6) + expect_equal( + drop(as.matrix(snap(x, lower = 0, upper = 10))), + c(0, 0, 5, 10, 10) + ) + + x <- quantile_pred(matrix(c(2, 4, 6), nrow = 1), 1:3 / 4) + s <- snap(x, lower = 0, upper = 10) + expect_s3_class(s, "quantile_pred") + expect_equal(extract_quantile_levels(s), 1:3 / 4) + expect_equal( + drop(as.matrix(snap(x, lower = 0, upper = 10))), + drop(as.matrix(x)) + ) + + x <- new_quantile_pred() + expect_equal(snap(x, lower = 0, upper = 10), x) +}) + +test_that("impute_quantiles failure modes", { + x <- double(7) + expect_snapshot(error = TRUE, impute_quantiles(x)) + + probs <- c(0.5) + x <- quantile_pred(matrix(0, nrow = 1), probs) + expect_snapshot(error = TRUE, impute_quantiles(x, c(0.1, 0.5, 0.9))) + + probs <- c(0.1, 0.5, 0.9) + x <- quantile_pred(matrix(qnorm(probs), nrow = 1), probs) + expect_snapshot(error = TRUE, impute_quantiles(x, probs = c(-1, .2, 2))) + expect_snapshot(error = TRUE, impute_quantiles(x, lower = "a")) + expect_snapshot(error = TRUE, impute_quantiles(x, upper = "b")) + expect_snapshot(error = TRUE, impute_quantiles(x, lower = NULL)) + expect_snapshot(error = TRUE, impute_quantiles(x, lower = 2, upper = -1)) + expect_snapshot(error = TRUE, impute_quantiles(x, middle = "middle")) +}) + +test_that("impute_quantiles returns existing quantiles when matched", { + probs <- c(0.1, 0.5, 0.9) + x <- quantile_pred(matrix(qnorm(probs), nrow = 1), probs) + out <- impute_quantiles(x, probs) + expect_equal(out, as.matrix(x)) +}) + +test_that("impute_quantiles interpolates correctly", { + probs <- c(0.1, 0.9) + x <- quantile_pred(matrix(qnorm(probs), nrow = 1), probs) + out <- impute_quantiles(x, c(0.1, 0.5, 0.9), middle = "linear") + expect_equal(out[1], qnorm(0.1)) + expect_equal(out[3], qnorm(0.9)) + expect_equal(out[2], 0) +}) + +test_that("impute_quantiles extrapolates correctly", { + probs <- c(0.2, 0.8) + x <- quantile_pred(matrix(qnorm(probs), nrow = 1), probs) + out <- impute_quantiles(x, c(0.01, 0.2, 0.8, 0.99)) + expect_equal(out[2], qnorm(0.2)) + expect_equal(out[3], qnorm(0.8)) + tail1 <- tail_extrapolate(.01, tibble(q = c(.2, .8), v = qnorm(q))) + tail2 <- tail_extrapolate(.99, tibble(q = c(.8, .2), v = qnorm(q))) + expect_equal(out[1], tail1) + expect_equal(out[4], tail2) +}) + +test_that("impute_quantiles applies lower/upper bounds", { + probs <- c(0.1, 0.5, 0.9) + x <- quantile_pred(matrix(qnorm(probs), nrow = 1), probs) + out <- impute_quantiles(x, c(0.01, 0.5, 0.99), lower = -1, upper = 1) + expect_true(all(out >= -1 & out <= 1)) +}) From 2fffc7dd31aaa5648a2a713b02fd635f5382c6ed Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Tue, 8 Jul 2025 11:10:52 -0700 Subject: [PATCH 10/19] need dplyr, replace %>% with |> --- DESCRIPTION | 1 + R/impute-quantile_pred.R | 4 ++-- man/impute_quantiles.Rd | 2 +- man/snap.Rd | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b17cb567..32336e52 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,6 +24,7 @@ Depends: R (>= 4.1) Imports: cli (>= 3.6.0), + dplyr, glue (>= 1.6.2), rlang (>= 1.1.0), sparsevctrs (>= 0.2.0), diff --git a/R/impute-quantile_pred.R b/R/impute-quantile_pred.R index 50fa4f9e..152778de 100644 --- a/R/impute-quantile_pred.R +++ b/R/impute-quantile_pred.R @@ -183,8 +183,8 @@ impute_quantiles_single <- function(qvals, tau, tau_out, middle) { qv <- data.frame( q = c(tau, tau_out[indm]), v = c(qvals, qvals_out[indm]) - ) %>% - dplyr::distinct(q, .keep_all = TRUE) %>% + ) |> + dplyr::distinct(q, .keep_all = TRUE) |> dplyr::arrange(q) } if (any(indl)) { diff --git a/man/impute_quantiles.Rd b/man/impute_quantiles.Rd index 412b7f73..9df0b7f3 100644 --- a/man/impute_quantiles.Rd +++ b/man/impute_quantiles.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/interpolate-quantile_pred.R +% Please edit documentation in R/impute-quantile_pred.R \name{impute_quantiles} \alias{impute_quantiles} \title{Impute additional quantiles from a \code{quantile_pred}} diff --git a/man/snap.Rd b/man/snap.Rd index 50ea96b8..f884a2ea 100644 --- a/man/snap.Rd +++ b/man/snap.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/interpolate-quantile_pred.R +% Please edit documentation in R/impute-quantile_pred.R \name{snap} \alias{snap} \title{Restrict numeric data to the interval [lower, upper]} From f7bfb23d17eeb4d79e52edbfcf97b3bdf5343c51 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Tue, 22 Jul 2025 10:42:34 -0700 Subject: [PATCH 11/19] redocument for links --- R/impute-quantile_pred.R | 6 +++--- man/impute_quantiles.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/impute-quantile_pred.R b/R/impute-quantile_pred.R index 152778de..5b1227c1 100644 --- a/R/impute-quantile_pred.R +++ b/R/impute-quantile_pred.R @@ -36,7 +36,7 @@ snap.quantile_pred <- function(x, lower, upper, ...) { #' Impute additional quantiles from a `quantile_pred` #' -#' While a `hardhat::quantile_pred` describes evaluations for the inverse +#' While a [quantile_pred] describes evaluations for the inverse #' cummulative distribution function (CDF, sometimes called the "quantile #' function") at particular quantile levels, this is not enough #' to fully describe the distribution. For example, @@ -58,8 +58,8 @@ snap.quantile_pred <- function(x, lower, upper, ...) { #' #' First, by default (`middle = "cubic"`), missing _internal_ quantile levels are #' interpolated using a cubic spline fit to the observed values + quantile levels with -#' `stats::splinefun()`. Second, if cubic interpolation fails (or if -#' `middle = "linear"`), linear interpolation is used via `stats::approx()`. +#' [stats::splinefun]. Second, if cubic interpolation fails (or if +#' `middle = "linear"`), linear interpolation is used via [stats::approx]. #' Finally, missing _external_ quantile levels (those outside the range of #' `quantile_levels`) are extrapolated. This is done using a linear fit on the #' logistic scale to the two closest tail values. diff --git a/man/impute_quantiles.Rd b/man/impute_quantiles.Rd index 9df0b7f3..35096b67 100644 --- a/man/impute_quantiles.Rd +++ b/man/impute_quantiles.Rd @@ -29,7 +29,7 @@ row contains the inverse CDF (quantile function) given by \code{x}, extrapolated/interpolated to \code{probs}. } \description{ -While a \code{hardhat::quantile_pred} describes evaluations for the inverse +While a \link{quantile_pred} describes evaluations for the inverse cummulative distribution function (CDF, sometimes called the "quantile function") at particular quantile levels, this is not enough to fully describe the distribution. For example, @@ -52,8 +52,8 @@ extrapolated as needed. The process has 3 steps. First, by default (\code{middle = "cubic"}), missing \emph{internal} quantile levels are interpolated using a cubic spline fit to the observed values + quantile levels with -\code{stats::splinefun()}. Second, if cubic interpolation fails (or if -\code{middle = "linear"}), linear interpolation is used via \code{stats::approx()}. +\link[stats:splinefun]{stats::splinefun}. Second, if cubic interpolation fails (or if +\code{middle = "linear"}), linear interpolation is used via \link[stats:approxfun]{stats::approx}. Finally, missing \emph{external} quantile levels (those outside the range of \code{quantile_levels}) are extrapolated. This is done using a linear fit on the logistic scale to the two closest tail values. From 8ebb6b482c2bea88ce8f18a4401d2dd5b38c4fd8 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 6 Aug 2025 16:21:33 -0700 Subject: [PATCH 12/19] muffle pkgdown build error --- _pkgdown.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 6894b4c5..e92498e6 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -26,7 +26,7 @@ reference: - title: Prediction contents: - contains("spruce") - - quantile_pred + - contains("quantile_pred") - title: Utility contents: @@ -37,6 +37,7 @@ reference: - add_intercept_column - weighted_table - fct_encode_one_hot + - impute_quantiles - title: Validation contents: From e89000094d607dd0f17752bcfa22f52ae3f77ac7 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 6 Aug 2025 16:28:29 -0700 Subject: [PATCH 13/19] document is_quantile_pred within quantile_pred --- R/quantile-pred.R | 13 +++---------- _pkgdown.yml | 2 +- man/is_quantile_pred.Rd | 22 ---------------------- man/quantile_pred.Rd | 4 ++++ 4 files changed, 8 insertions(+), 33 deletions(-) delete mode 100644 man/is_quantile_pred.Rd diff --git a/R/quantile-pred.R b/R/quantile-pred.R index 990d11f2..e6c0c6a8 100644 --- a/R/quantile-pred.R +++ b/R/quantile-pred.R @@ -21,6 +21,7 @@ #' `".quantile_levels"`, and `".row"`. #' * `as.matrix()` returns an unnamed matrix with rows as samples, columns as #' quantile levels, and entries are predictions. +#' * `is_quantile_pred()` tests for the "quantile_pred" class #' @examples #' .pred_quantile <- quantile_pred(matrix(rnorm(20), 5), c(.2, .4, .6, .8)) #' @@ -56,17 +57,9 @@ new_quantile_pred <- function(values = list(), quantile_levels = double()) { } -#' Test if an object is a `quantile_pred` -#' -#' @param x an object -#' -#' @returns A boolean. + #' @export -#' -#' @examples -#' qp <- quantile_pred(matrix(rnorm(10), nrow = 2), 1:5 / 6) -#' is_quantile_pred(qp) -#' is_quantile_pred(1:5) +#' @rdname quantile_pred is_quantile_pred <- function(x) { inherits(x, "quantile_pred") } diff --git a/_pkgdown.yml b/_pkgdown.yml index e92498e6..289a0d9e 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -26,7 +26,7 @@ reference: - title: Prediction contents: - contains("spruce") - - contains("quantile_pred") + - quantile_pred - title: Utility contents: diff --git a/man/is_quantile_pred.Rd b/man/is_quantile_pred.Rd deleted file mode 100644 index 6d751ba0..00000000 --- a/man/is_quantile_pred.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quantile-pred.R -\name{is_quantile_pred} -\alias{is_quantile_pred} -\title{Test if an object is a \code{quantile_pred}} -\usage{ -is_quantile_pred(x) -} -\arguments{ -\item{x}{an object} -} -\value{ -A boolean. -} -\description{ -Test if an object is a \code{quantile_pred} -} -\examples{ -qp <- quantile_pred(matrix(rnorm(10), nrow = 2), 1:5 / 6) -is_quantile_pred(qp) -is_quantile_pred(1:5) -} diff --git a/man/quantile_pred.Rd b/man/quantile_pred.Rd index f9ac7cf3..28fa8139 100644 --- a/man/quantile_pred.Rd +++ b/man/quantile_pred.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/quantile-pred.R \name{quantile_pred} \alias{quantile_pred} +\alias{is_quantile_pred} \alias{extract_quantile_levels} \alias{as_tibble.quantile_pred} \alias{as.matrix.quantile_pred} @@ -9,6 +10,8 @@ \usage{ quantile_pred(values, quantile_levels = double()) +is_quantile_pred(x) + extract_quantile_levels(x) \method{as_tibble}{quantile_pred}(x, ..., .rows = NULL, .name_repair = "minimal", rownames = NULL) @@ -37,6 +40,7 @@ quantile levels. \code{".quantile_levels"}, and \code{".row"}. \item \code{as.matrix()} returns an unnamed matrix with rows as samples, columns as quantile levels, and entries are predictions. +\item \code{is_quantile_pred()} tests for the "quantile_pred" class } } \description{ From dce689463149dc174e3023ee8552dac964475287 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Wed, 6 Aug 2025 16:38:29 -0700 Subject: [PATCH 14/19] remove dplyr import --- DESCRIPTION | 1 - R/impute-quantile_pred.R | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 32336e52..b17cb567 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,6 @@ Depends: R (>= 4.1) Imports: cli (>= 3.6.0), - dplyr, glue (>= 1.6.2), rlang (>= 1.1.0), sparsevctrs (>= 0.2.0), diff --git a/R/impute-quantile_pred.R b/R/impute-quantile_pred.R index 5b1227c1..da03fbda 100644 --- a/R/impute-quantile_pred.R +++ b/R/impute-quantile_pred.R @@ -183,9 +183,9 @@ impute_quantiles_single <- function(qvals, tau, tau_out, middle) { qv <- data.frame( q = c(tau, tau_out[indm]), v = c(qvals, qvals_out[indm]) - ) |> - dplyr::distinct(q, .keep_all = TRUE) |> - dplyr::arrange(q) + ) + qv <- qv[vctrs::vec_unique_loc(qv$q), ] + qv <- qv[vctrs::vec_order(qv$q), ] } if (any(indl)) { qvals_out[indl] <- tail_extrapolate(tau_out[indl], utils::head(qv, 2)) From 0714987729a39a92698e9872c02c6fcb64c9516a Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Fri, 31 Oct 2025 08:41:30 -0700 Subject: [PATCH 15/19] Apply suggestions from code review Co-authored-by: Emil Hvitfeldt --- R/impute-quantile_pred.R | 5 +++-- R/quantile-pred.R | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/impute-quantile_pred.R b/R/impute-quantile_pred.R index da03fbda..f2bce340 100644 --- a/R/impute-quantile_pred.R +++ b/R/impute-quantile_pred.R @@ -24,6 +24,7 @@ snap.numeric <- function(x, lower, upper, ...) { #' @export snap.quantile_pred <- function(x, lower, upper, ...) { + rlang::check_dots_empty() if (vec_size(x) == 0) return(x) values <- as.matrix(x) quantile_levels <- extract_quantile_levels(x) @@ -72,7 +73,7 @@ snap.quantile_pred <- function(x, lower, upper, ...) { #' result in multiple quantile levels having the same value (a CDF with a spike). #' #' -#' @param x an object of class `quantile_pred` +#' @param x an object of class [quantile_pred] #' @param probs vector. probabilities at which to evaluate the inverse CDF #' @param lower number. lower bound for the resulting values #' @param upper number. upper bound for the resulting values @@ -116,7 +117,7 @@ impute_quantiles <- function( check_number_decimal(lower) check_number_decimal(upper) if (lower > upper) { - cli::cli_abort("`lower` must be less than `upper`.") + cli::cli_abort("{.arg lower} ({lower}) must be less than {.arg upper} ({upper}).") } middle <- rlang::arg_match(middle) snap(impute_quantile_internal(x, probs, middle), lower, upper) diff --git a/R/quantile-pred.R b/R/quantile-pred.R index e6c0c6a8..b3b47d60 100644 --- a/R/quantile-pred.R +++ b/R/quantile-pred.R @@ -277,7 +277,7 @@ vec_arith.quantile_pred.numeric <- function(op, x, y, ...) { op_fn <- getExportedValue("base", op) l <- vctrs::vec_recycle_common(x = x, y = y) out <- op_fn(as.matrix(l$x), l$y) - quantile_pred(out, x %@% "quantile_levels") + quantile_pred(out, attr(x, "quantile_levels")) } #' @export From c7c7896ab320279e44375f8402f9165ab3f91618 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sat, 1 Nov 2025 14:26:12 -0700 Subject: [PATCH 16/19] resolve most PR comments --- NAMESPACE | 1 + R/impute-quantile_pred.R | 132 ++++++++++-------- R/quantile-pred.R | 42 ++++-- man/hardhat-package.Rd | 2 +- man/impute_quantiles.Rd | 11 +- tests/testthat/_snaps/impute_quantile_pred.md | 20 ++- tests/testthat/test-impute_quantile_pred.R | 16 ++- 7 files changed, 142 insertions(+), 82 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a09fffcb..fd2205e3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ S3method(run_mold,default) S3method(run_mold,default_formula_blueprint) S3method(run_mold,default_recipe_blueprint) S3method(run_mold,default_xy_blueprint) +S3method(snap,default) S3method(snap,numeric) S3method(snap,quantile_pred) S3method(standardize,array) diff --git a/R/impute-quantile_pred.R b/R/impute-quantile_pred.R index f2bce340..3edd7763 100644 --- a/R/impute-quantile_pred.R +++ b/R/impute-quantile_pred.R @@ -13,6 +13,13 @@ snap <- function(x, lower, upper, ...) { UseMethod("snap") } +#' @export +snap.default <- function(x, lower, upper, ...) { + cli::cli_abort( + "No {.fn snap} method provided for {.obj_type_friendly {x}}." + ) +} + #' @export snap.numeric <- function(x, lower, upper, ...) { rlang::check_dots_empty() @@ -25,7 +32,9 @@ snap.numeric <- function(x, lower, upper, ...) { #' @export snap.quantile_pred <- function(x, lower, upper, ...) { rlang::check_dots_empty() - if (vec_size(x) == 0) return(x) + if (vec_size(x) == 0) { + return(x) + } values <- as.matrix(x) quantile_levels <- extract_quantile_levels(x) values <- map(vctrs::vec_chop(values), ~ snap(.x, lower, upper)) @@ -33,8 +42,6 @@ snap.quantile_pred <- function(x, lower, upper, ...) { } - - #' Impute additional quantiles from a `quantile_pred` #' #' While a [quantile_pred] describes evaluations for the inverse @@ -58,7 +65,8 @@ snap.quantile_pred <- function(x, lower, upper, ...) { #' extrapolated as needed. The process has 3 steps. #' #' First, by default (`middle = "cubic"`), missing _internal_ quantile levels are -#' interpolated using a cubic spline fit to the observed values + quantile levels with +#' interpolated using a cubic spline fit to the observed values + quantile +#' levels with #' [stats::splinefun]. Second, if cubic interpolation fails (or if #' `middle = "linear"`), linear interpolation is used via [stats::approx]. #' Finally, missing _external_ quantile levels (those outside the range of @@ -77,7 +85,9 @@ snap.quantile_pred <- function(x, lower, upper, ...) { #' @param probs vector. probabilities at which to evaluate the inverse CDF #' @param lower number. lower bound for the resulting values #' @param upper number. upper bound for the resulting values -#' @param middle character. +#' @param middle character. if `middle = 'cubic'` (the default), a cubic +#' spline is used for interpolation where possible; `middle='linear'` +#' interpolates linearly; see Details below. #' #' @returns A matrix with `length(probs)` columns and `length(x)` rows. Each #' row contains the inverse CDF (quantile function) given by `x`, @@ -94,11 +104,11 @@ snap.quantile_pred <- function(x, lower, upper, ...) { #' impute_quantiles(qp, p1) #' rbind(qnorm(p1), qexp(p1)) # exact values, for comparison impute_quantiles <- function( - x, - probs = seq(0, 1, 0.25), - lower = -Inf, - upper = Inf, - middle = c("cubic", "linear") + x, + probs, + lower = -Inf, + upper = Inf, + middle = c("cubic", "linear") ) { if (!is_quantile_pred(x)) { cli::cli_abort( @@ -112,56 +122,60 @@ impute_quantiles <- function( are avaliable." ) } - if (is.unsorted(probs)) probs <- sort(probs) + if (is.unsorted(probs)) { + probs <- sort(probs) + } check_quantile_level_values(probs, "probs", call = caller_env()) check_number_decimal(lower) check_number_decimal(upper) - if (lower > upper) { - cli::cli_abort("{.arg lower} ({lower}) must be less than {.arg upper} ({upper}).") + if (lower >= upper) { + cli::cli_abort( + "{.arg lower} ({lower}) must be less than {.arg upper} ({upper})." + ) } middle <- rlang::arg_match(middle) snap(impute_quantile_internal(x, probs, middle), lower, upper) } -impute_quantile_internal <- function(x, tau_out, middle) { - tau <- extract_quantile_levels(x) - qvals <- as.matrix(x) - if (all(tau_out %in% tau) && !anyNA(qvals)) { - return(qvals[, match(tau_out, tau), drop = FALSE]) +impute_quantile_internal <- function(x, probs_out, middle) { + probs_in <- extract_quantile_levels(x) + vals_in <- as.matrix(x) + if (all(probs_out %in% probs_in) && !anyNA(vals_in)) { + return(vals_in[, match(probs_out, probs_in), drop = FALSE]) } - qvals_out <- map( - vctrs::vec_chop(qvals), - ~ impute_quantiles_single(.x, tau, tau_out, middle) + vals_out <- map( + vctrs::vec_chop(vals_in), + ~ impute_quantiles_single(.x, probs_in, probs_out, middle) ) - qvals_out <- do.call(rbind, qvals_out) - qvals_out + vals_out <- do.call(rbind, vals_out) + vals_out } -impute_quantiles_single <- function(qvals, tau, tau_out, middle) { - qvals_out <- rep(NA, length(tau_out)) - good <- !is.na(qvals) +impute_quantiles_single <- function(vals_in, probs_in, probs_out, middle) { + vals_out <- rep(NA, length(probs_out)) + good <- !is.na(vals_in) if (!any(good)) { - return(qvals_out) + return(vals_out) } - qvals <- qvals[good] - tau <- tau[good] + vals_in <- vals_in[good] + probs_in <- probs_in[good] # in case we only have one point, and it matches something we wanted if (length(good) < 2) { - matched_one <- tau_out %in% tau - qvals_out[matched_one] <- qvals[matched_one] - return(qvals_out) + matched_one <- probs_out %in% probs_in + vals_out[matched_one] <- vals_in[matched_one] + return(vals_out) } - indl <- tau_out < min(tau) - indr <- tau_out > max(tau) - indm <- !indl & !indr + below <- probs_out < min(probs_in) + above <- probs_out > max(probs_in) + interior <- !below & !above if (middle == "cubic") { method <- "cubic" result <- tryCatch( { - Q <- stats::splinefun(tau, qvals, method = "hyman") + Q <- stats::splinefun(probs_in, vals_in, method = "hyman") quartiles <- Q(c(.25, .5, .75)) }, error = function(e) { @@ -171,30 +185,32 @@ impute_quantiles_single <- function(qvals, tau, tau_out, middle) { } if (middle == "linear" || any(is.na(result))) { method <- "linear" - quartiles <- stats::approx(tau, qvals, c(.25, .5, .75))$y + quartiles <- stats::approx(probs_in, vals_in, c(.25, .5, .75))$y } - if (any(indm)) { - qvals_out[indm] <- switch( + if (any(interior)) { + vals_out[interior] <- switch( method, - linear = stats::approx(tau, qvals, tau_out[indm])$y, - cubic = Q(tau_out[indm]) + linear = stats::approx(probs_in, vals_in, probs_out[interior])$y, + cubic = Q(probs_out[interior]) ) } - if (any(indl) || any(indr)) { - qv <- data.frame( - q = c(tau, tau_out[indm]), - v = c(qvals, qvals_out[indm]) + if (any(below) || any(above)) { + interior_data <- data.frame( + probs = c(probs_in, probs_out[interior]), + vals = c(vals_in, vals_out[interior]) ) - qv <- qv[vctrs::vec_unique_loc(qv$q), ] - qv <- qv[vctrs::vec_order(qv$q), ] + interior_data <- interior_data[vctrs::vec_unique_loc(interior_data$probs), ] + interior_data <- interior_data[vctrs::vec_order(interior_data$probs), ] } - if (any(indl)) { - qvals_out[indl] <- tail_extrapolate(tau_out[indl], utils::head(qv, 2)) + if (any(below)) { + left_tail_data <- utils::head(interior_data, 2) + vals_out[below] <- tail_extrapolate(probs_out[below], left_tail_data) } - if (any(indr)) { - qvals_out[indr] <- tail_extrapolate(tau_out[indr], utils::tail(qv, 2)) + if (any(above)) { + right_tail_data <- utils::tail(interior_data, 2) + vals_out[above] <- tail_extrapolate(probs_out[above], right_tail_data) } - qvals_out + vals_out } logit <- function(p) { @@ -204,11 +220,13 @@ logit <- function(p) { # extrapolates linearly on the logistic scale using # the two points nearest the tail -tail_extrapolate <- function(tau_out, qv) { - if (nrow(qv) == 1L) return(rep(qv$v[1], length(tau_out))) - x <- logit(qv$q) - x0 <- logit(tau_out) - y <- qv$v +tail_extrapolate <- function(tail_probs, interior) { + if (nrow(interior) == 1L) { + return(rep(interior$vals[1], length(tail_probs))) + } + x <- logit(interior$probs) + x0 <- logit(tail_probs) + y <- interior$vals m <- diff(y) / diff(x) m * (x0 - x[1]) + y[1] } diff --git a/R/quantile-pred.R b/R/quantile-pred.R index b3b47d60..f33ea869 100644 --- a/R/quantile-pred.R +++ b/R/quantile-pred.R @@ -57,7 +57,6 @@ new_quantile_pred <- function(values = list(), quantile_levels = double()) { } - #' @export #' @rdname quantile_pred is_quantile_pred <- function(x) { @@ -224,22 +223,38 @@ check_quantile_level_values <- function(levels, arg, call) { #' @export #' @keywords internal vec_ptype2.quantile_pred.quantile_pred <- function( - x, y, ..., x_arg = "", y_arg = "", call = caller_env() + x, + y, + ..., + x_arg = "", + y_arg = "", + call = caller_env() ) { - if (all(extract_quantile_levels(y) %in% extract_quantile_levels(x))) { + x_levels <- extract_quantile_levels(x) + y_levels <- extract_quantile_levels(y) + if (all(y_levels %in% x_levels)) { return(x) } - if (all(extract_quantile_levels(x) %in% extract_quantile_levels(y))) { + if (all(x_levels %in% y_levels)) { return(y) } stop_incompatible_type( - x, y, x_arg = x_arg, y_arg = y_arg, + x, + y, + x_arg = x_arg, + y_arg = y_arg, details = "`quantile_levels` must be compatible (a superset/subset relation)." ) } #' @export -vec_cast.quantile_pred.quantile_pred <- function(x, to, ..., x_arg = "", to_arg = "") { +vec_cast.quantile_pred.quantile_pred <- function( + x, + to, + ..., + x_arg = "", + to_arg = "" +) { x_lvls <- extract_quantile_levels(x) to_lvls <- extract_quantile_levels(to) x_in_to <- x_lvls %in% to_lvls @@ -257,8 +272,13 @@ vec_cast.quantile_pred.quantile_pred <- function(x, to, ..., x_arg = "", to_arg vec_math.quantile_pred <- function(.fn, .x, ...) { fn <- .fn .fn <- getExportedValue("base", .fn) - if (fn %in% c("any", "all", "prod", "sum", "cumsum", "cummax", "cummin", "cumprod")) { - cli::cli_abort("{.fn {fn}} is not a supported operation for {.cls quantile_pred}.") + if ( + fn %in% + c("any", "all", "prod", "sum", "cumsum", "cummax", "cummin", "cumprod") + ) { + cli::cli_abort( + "{.fn {fn}} is not a supported operation for {.cls quantile_pred}." + ) } quantile_levels <- .x %@% "quantile_levels" .x <- as.matrix(.x) @@ -271,6 +291,12 @@ vec_arith.quantile_pred <- function(op, x, y, ...) { UseMethod("vec_arith.quantile_pred", y) } +#' @export +#' @method vec_arith.quantile_pred default +vec_arith.quantile_pred.default <- function(op, x, y, ...) { + stop_incompatible_op(op, x, y) +} + #' @export #' @method vec_arith.quantile_pred numeric vec_arith.quantile_pred.numeric <- function(op, x, y, ...) { diff --git a/man/hardhat-package.Rd b/man/hardhat-package.Rd index eaab5524..efb007ac 100644 --- a/man/hardhat-package.Rd +++ b/man/hardhat-package.Rd @@ -30,7 +30,7 @@ Authors: Other contributors: \itemize{ - \item Posit Software, PBC [copyright holder, funder] + \item Posit Software, PBC (03wc8by49) [copyright holder, funder] } } diff --git a/man/impute_quantiles.Rd b/man/impute_quantiles.Rd index 35096b67..ce7772b0 100644 --- a/man/impute_quantiles.Rd +++ b/man/impute_quantiles.Rd @@ -6,14 +6,14 @@ \usage{ impute_quantiles( x, - probs = seq(0, 1, 0.25), + probs, lower = -Inf, upper = Inf, middle = c("cubic", "linear") ) } \arguments{ -\item{x}{an object of class \code{quantile_pred}} +\item{x}{an object of class \link{quantile_pred}} \item{probs}{vector. probabilities at which to evaluate the inverse CDF} @@ -21,7 +21,9 @@ impute_quantiles( \item{upper}{number. upper bound for the resulting values} -\item{middle}{character.} +\item{middle}{character. if \code{middle = 'cubic'} (the default), a cubic +spline is used for interpolation where possible; \code{middle='linear'} +interpolates linearly; see Details below.} } \value{ A matrix with \code{length(probs)} columns and \code{length(x)} rows. Each @@ -51,7 +53,8 @@ For \code{probs} that do not exist in \code{x}, these will be interpolated or extrapolated as needed. The process has 3 steps. First, by default (\code{middle = "cubic"}), missing \emph{internal} quantile levels are -interpolated using a cubic spline fit to the observed values + quantile levels with +interpolated using a cubic spline fit to the observed values + quantile +levels with \link[stats:splinefun]{stats::splinefun}. Second, if cubic interpolation fails (or if \code{middle = "linear"}), linear interpolation is used via \link[stats:approxfun]{stats::approx}. Finally, missing \emph{external} quantile levels (those outside the range of diff --git a/tests/testthat/_snaps/impute_quantile_pred.md b/tests/testthat/_snaps/impute_quantile_pred.md index 8471b52e..516a2464 100644 --- a/tests/testthat/_snaps/impute_quantile_pred.md +++ b/tests/testthat/_snaps/impute_quantile_pred.md @@ -25,7 +25,15 @@ --- Code - impute_quantiles(x, lower = "a") + impute_quantiles(x) + Condition + Error in `impute_quantiles()`: + ! argument "probs" is missing, with no default + +--- + + Code + impute_quantiles(x, probs, lower = "a") Condition Error in `impute_quantiles()`: ! `lower` must be a number, not the string "a". @@ -33,7 +41,7 @@ --- Code - impute_quantiles(x, upper = "b") + impute_quantiles(x, probs, upper = "b") Condition Error in `impute_quantiles()`: ! `upper` must be a number, not the string "b". @@ -41,7 +49,7 @@ --- Code - impute_quantiles(x, lower = NULL) + impute_quantiles(x, probs, lower = NULL) Condition Error in `impute_quantiles()`: ! `lower` must be a number, not `NULL`. @@ -49,10 +57,10 @@ --- Code - impute_quantiles(x, lower = 2, upper = -1) + impute_quantiles(x, probs, lower = 2, upper = -1) Condition Error in `impute_quantiles()`: - ! `lower` must be less than `upper`. + ! `lower` (2) must be less than `upper` (-1). --- @@ -60,5 +68,5 @@ impute_quantiles(x, middle = "middle") Condition Error in `impute_quantiles()`: - ! `middle` must be one of "cubic" or "linear", not "middle". + ! argument "probs" is missing, with no default diff --git a/tests/testthat/test-impute_quantile_pred.R b/tests/testthat/test-impute_quantile_pred.R index 9e932fc0..ac6a21ad 100644 --- a/tests/testthat/test-impute_quantile_pred.R +++ b/tests/testthat/test-impute_quantile_pred.R @@ -43,10 +43,14 @@ test_that("impute_quantiles failure modes", { probs <- c(0.1, 0.5, 0.9) x <- quantile_pred(matrix(qnorm(probs), nrow = 1), probs) expect_snapshot(error = TRUE, impute_quantiles(x, probs = c(-1, .2, 2))) - expect_snapshot(error = TRUE, impute_quantiles(x, lower = "a")) - expect_snapshot(error = TRUE, impute_quantiles(x, upper = "b")) - expect_snapshot(error = TRUE, impute_quantiles(x, lower = NULL)) - expect_snapshot(error = TRUE, impute_quantiles(x, lower = 2, upper = -1)) + expect_snapshot(error = TRUE, impute_quantiles(x)) + expect_snapshot(error = TRUE, impute_quantiles(x, probs, lower = "a")) + expect_snapshot(error = TRUE, impute_quantiles(x, probs, upper = "b")) + expect_snapshot(error = TRUE, impute_quantiles(x, probs, lower = NULL)) + expect_snapshot( + error = TRUE, + impute_quantiles(x, probs, lower = 2, upper = -1) + ) expect_snapshot(error = TRUE, impute_quantiles(x, middle = "middle")) }) @@ -72,8 +76,8 @@ test_that("impute_quantiles extrapolates correctly", { out <- impute_quantiles(x, c(0.01, 0.2, 0.8, 0.99)) expect_equal(out[2], qnorm(0.2)) expect_equal(out[3], qnorm(0.8)) - tail1 <- tail_extrapolate(.01, tibble(q = c(.2, .8), v = qnorm(q))) - tail2 <- tail_extrapolate(.99, tibble(q = c(.8, .2), v = qnorm(q))) + tail1 <- tail_extrapolate(.01, tibble(probs = c(.2, .8), vals = qnorm(probs))) + tail2 <- tail_extrapolate(.99, tibble(probs = c(.8, .2), vals = qnorm(probs))) expect_equal(out[1], tail1) expect_equal(out[4], tail2) }) From 892884a277826caf894970a1249e542781073d36 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sat, 1 Nov 2025 14:27:05 -0700 Subject: [PATCH 17/19] redocument --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index fd2205e3..263c1604 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,7 @@ S3method(standardize,integer) S3method(standardize,matrix) S3method(vec_arith,quantile_pred) S3method(vec_arith.numeric,quantile_pred) +S3method(vec_arith.quantile_pred,default) S3method(vec_arith.quantile_pred,numeric) S3method(vec_cast,double.hardhat_frequency_weights) S3method(vec_cast,double.hardhat_importance_weights) From 4dc100f993feeee8d5600bbe08770317c967b722 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 2 Nov 2025 11:54:53 -0800 Subject: [PATCH 18/19] fix: incorrect snap with probs as mandatory --- tests/testthat/_snaps/impute_quantile_pred.md | 4 ++-- tests/testthat/test-impute_quantile_pred.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/_snaps/impute_quantile_pred.md b/tests/testthat/_snaps/impute_quantile_pred.md index 516a2464..acd1789e 100644 --- a/tests/testthat/_snaps/impute_quantile_pred.md +++ b/tests/testthat/_snaps/impute_quantile_pred.md @@ -65,8 +65,8 @@ --- Code - impute_quantiles(x, middle = "middle") + impute_quantiles(x, probs, middle = "middle") Condition Error in `impute_quantiles()`: - ! argument "probs" is missing, with no default + ! `middle` must be one of "cubic" or "linear", not "middle". diff --git a/tests/testthat/test-impute_quantile_pred.R b/tests/testthat/test-impute_quantile_pred.R index ac6a21ad..90a3c708 100644 --- a/tests/testthat/test-impute_quantile_pred.R +++ b/tests/testthat/test-impute_quantile_pred.R @@ -51,7 +51,7 @@ test_that("impute_quantiles failure modes", { error = TRUE, impute_quantiles(x, probs, lower = 2, upper = -1) ) - expect_snapshot(error = TRUE, impute_quantiles(x, middle = "middle")) + expect_snapshot(error = TRUE, impute_quantiles(x, probs, middle = "middle")) }) test_that("impute_quantiles returns existing quantiles when matched", { From bd2121e6756ab30d6ffe843b3fd099753c58bc89 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Sun, 2 Nov 2025 12:17:22 -0800 Subject: [PATCH 19/19] fix: refactor interior imputation to avoid switch --- R/impute-quantile_pred.R | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/R/impute-quantile_pred.R b/R/impute-quantile_pred.R index 3edd7763..ef80e501 100644 --- a/R/impute-quantile_pred.R +++ b/R/impute-quantile_pred.R @@ -171,28 +171,24 @@ impute_quantiles_single <- function(vals_in, probs_in, probs_out, middle) { above <- probs_out > max(probs_in) interior <- !below & !above - if (middle == "cubic") { - method <- "cubic" - result <- tryCatch( - { - Q <- stats::splinefun(probs_in, vals_in, method = "hyman") - quartiles <- Q(c(.25, .5, .75)) - }, - error = function(e) { - return(NA) - } - ) - } - if (middle == "linear" || any(is.na(result))) { - method <- "linear" - quartiles <- stats::approx(probs_in, vals_in, c(.25, .5, .75))$y - } if (any(interior)) { - vals_out[interior] <- switch( - method, - linear = stats::approx(probs_in, vals_in, probs_out[interior])$y, - cubic = Q(probs_out[interior]) - ) + if (middle == "cubic") { + result <- tryCatch( + { + interp_fun <- stats::splinefun(probs_in, vals_in, method = "hyman") + quartiles <- interp_fun(c(.25, .5, .75)) + }, + error = function(e) { + return(NA) + } + ) + if (any(is.na(result))) middle <- "linear" + } + if (middle == "linear") { + interp_fun <- function(probs) stats::approx(probs_in, vals_in, probs)$y + quartiles <- interp_fun(c(.25, .5, .75)) + } + vals_out[interior] <- interp_fun(probs_out[interior]) } if (any(below) || any(above)) { interior_data <- data.frame(