diff --git a/.Rbuildignore b/.Rbuildignore index fafba974..7f222f31 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -29,4 +29,5 @@ develop ^dev$ ^\.pre-commit-config\.yaml$ ^vignettes/original$ -^vignettes/Makefile$ \ No newline at end of file +^vignettes/Makefile$ +^node_modules$ diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 5a7dee6d..92a4f5e8 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -13,7 +13,7 @@ name: pkgdown jobs: pkgdown: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 # Only restrict concurrency for non-PR jobs concurrency: group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} diff --git a/DESCRIPTION b/DESCRIPTION index 9d8fb07a..5aba697e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -71,7 +71,7 @@ Encoding: UTF-8 Depends: R (>= 4.1.0) Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Config/testthat/edition: 3 Imports: checkmate, @@ -122,6 +122,7 @@ Collate: 'outcome_class.R' 'analysis_class.R' 'borrowing_details.R' + 'borrowing_fixed_power_prior.R' 'borrowing_full.R' 'borrowing_hierarchical_commensurate.R' 'borrowing_none.R' diff --git a/NAMESPACE b/NAMESPACE index 92e1526c..c4f98c60 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(beta_prior) export(bin_var) export(binary_cutoff) export(borrowing_details) +export(borrowing_fixed_power_prior) export(borrowing_full) export(borrowing_hierarchical_commensurate) export(borrowing_none) diff --git a/R/borrowing_fixed_power_prior.R b/R/borrowing_fixed_power_prior.R new file mode 100644 index 00000000..4fb99bfe --- /dev/null +++ b/R/borrowing_fixed_power_prior.R @@ -0,0 +1,66 @@ +#' `BorrowingFixedPowerPrior` class +#' +#' @slot method_name string. The name of the method. +#' @slot ext_flag_col character. Name of the external flag column in the matrix. +#' @slot powers numeric. The fixed values to be used as the powers in the power prior. +#' +#' @include borrowing_class.R +#' @family borrowing classes +.borrowing_fixed_power_prior <- setClass( + "BorrowingFixedPowerPrior", + slots = c( + power_par = "numeric" + ), + prototype = list( + method_name = "Borrowing with fixed power prior" + ), + contains = "Borrowing" +) + +#' Fixed Power Prior Borrowing +#' +#' @param ext_flag_col character. Name of the external flag column in the matrix. +#' @param power_par numeric. Fixed power parameter for all external data. Must be between 0 and 1. +#' +#' @return Object of class [`BorrowingFixedPowerPrior`][BorrowingFixedPowerPrior-class]. +#' @export +#' @examples +#' borrowing_fixed_power_prior( +#' ext_flag_col = "ext", +#' power_par = 0.5 +#' ) +borrowing_fixed_power_prior <- function(ext_flag_col, power_par) { + assert_string(ext_flag_col) + assert_number(power_par, lower = 0, upper = 1) + .borrowing_fixed_power_prior(ext_flag_col = ext_flag_col, power_par = power_par) +} + +# show ---- +setMethod( + f = "show", + signature = "BorrowingFixedPowerPrior", + definition = function(object) { + callNextMethod() + } +) + +# trim cols ---- +#' @rdname trim_cols +#' @include generics.R +setMethod( + f = "trim_cols", + signature = "BorrowingFixedPowerPrior", + definition = function(borrowing_object, analysis_object) { + get_vars(analysis_object) + } +) +# get vars ----- +#' @rdname get_vars +#' @include generics.R +setMethod( + f = "get_vars", + signature = "BorrowingFixedPowerPrior", + definition = function(object) { + c(ext_flag_col = object@ext_flag_col) + } +) diff --git a/R/check_data_matrix_has_columns.R b/R/check_data_matrix_has_columns.R index 37ce78a1..a33b6e34 100644 --- a/R/check_data_matrix_has_columns.R +++ b/R/check_data_matrix_has_columns.R @@ -39,8 +39,8 @@ check_data_matrix_has_columns <- function(object) { error_col <- c(error_col, get_vars(object@treatment)) } - if (!get_vars(object@borrowing) %in% data_cols) { - error_col <- c(error_col, get_vars(object@borrowing)) + if (any(missing_outcomes <- !get_vars(object@borrowing) %in% data_cols)) { + error_col <- c(error_col, get_vars(object@borrowing)[missing_outcomes]) } missing_covs <- setdiff(get_vars(object@covariates), data_cols) diff --git a/R/load_and_interpolate_stan_model.R b/R/load_and_interpolate_stan_model.R index d07a4680..40020c8b 100644 --- a/R/load_and_interpolate_stan_model.R +++ b/R/load_and_interpolate_stan_model.R @@ -8,7 +8,7 @@ #' @return String containing the interpolated Stan model #' @include outcome_surv_pem.R build_model_string <- function(template_domain, template_filename, outcome, borrowing, analysis_obj, ...) { - + # Load the Stan template template <- load_stan_file(template_domain, template_filename) @@ -72,6 +72,23 @@ setMethod( } ) +### Fixed power prior borrowing ---- +setMethod( + f = "load_and_interpolate_stan_model", + signature = c("OutcomeSurvExponential", "BorrowingFixedPowerPrior", "ANY"), + definition = function(outcome, borrowing, analysis_obj) { + model_string <- build_model_string( + template_domain = "surv", + template_filename = "exp_fpp.stan", + outcome = outcome, + borrowing = borrowing, + analysis_obj = analysis_obj + ) + + return(model_string) + } +) + ### Hierarchical commensurate prior borrowing ---- setMethod( f = "load_and_interpolate_stan_model", @@ -110,6 +127,24 @@ setMethod( } ) +### Fixed power prior borrowing ---- +setMethod( + f = "load_and_interpolate_stan_model", + signature = c("OutcomeSurvWeibullPH", "BorrowingFixedPowerPrior", "ANY"), + definition = function(outcome, borrowing, analysis_obj) { + model_string <- build_model_string( + template_domain = "surv", + template_filename = "weib_ph_fpp.stan", + outcome = outcome, + borrowing = borrowing, + analysis_obj = analysis_obj, + shape.prior = h_glue("shape_weibull ~ {{get_prior_string(outcome@param_priors$shape_weibull)}} ;") + ) + + return(model_string) + } +) + ### Hierarchical commensurate prior borrowing ---- setMethod( f = "load_and_interpolate_stan_model", @@ -149,6 +184,24 @@ setMethod( } ) +### Fixed power prior borrowing ---- +setMethod( + f = "load_and_interpolate_stan_model", + signature = c("OutcomeSurvPEM", "BorrowingFixedPowerPrior", "ANY"), + definition = function(outcome, borrowing, analysis_obj) { + model_string <- build_model_string( + template_domain = "surv", + template_filename = "pem_fpp.stan", + outcome = outcome, + borrowing = borrowing, + analysis_obj = analysis_obj, + baseline.prior = get_prior_string(outcome@baseline_prior) + ) + + return(model_string) + } +) + ### Hierarchical commensurate prior borrowing ---- setMethod( f = "load_and_interpolate_stan_model", @@ -188,6 +241,23 @@ setMethod( } ) +### Fixed power prior borrowing ---- +setMethod( + f = "load_and_interpolate_stan_model", + signature = c("OutcomeBinaryLogistic", "BorrowingFixedPowerPrior", "ANY"), + definition = function(outcome, borrowing, analysis_obj) { + model_string <- build_model_string( + template_domain = "bin", + template_filename = "logit_fpp.stan", + outcome = outcome, + borrowing = borrowing, + analysis_obj = analysis_obj + ) + + return(model_string) + } +) + ### Hierarchical commensurate prior borrowing ---- setMethod( f = "load_and_interpolate_stan_model", @@ -228,6 +298,24 @@ setMethod( } ) +### Fixed power prior borrowing ---- +setMethod( + f = "load_and_interpolate_stan_model", + signature = c("OutcomeContinuousNormal", "BorrowingFixedPowerPrior", "ANY"), + definition = function(outcome, borrowing, analysis_obj) { + model_string <- build_model_string( + template_domain = "cont", + template_filename = "gauss_fpp.stan", + outcome = outcome, + borrowing = borrowing, + analysis_obj = analysis_obj, + stdev.prior = h_glue("std_dev_outcome ~ {{get_prior_string(outcome@param_priors$std_dev_outcome)}} ;") + ) + + return(model_string) + } +) + ### Hierarchical commensurate prior borrowing ---- setMethod( f = "load_and_interpolate_stan_model", diff --git a/R/outcome_surv_pem.R b/R/outcome_surv_pem.R index 43d5e0db..60712182 100644 --- a/R/outcome_surv_pem.R +++ b/R/outcome_surv_pem.R @@ -117,7 +117,7 @@ outcome_surv_pem <- function(time_var, cens_var, baseline_prior, weight_var = "" # show ---- setMethod( f = "show", - signature = "Outcome", + signature = "OutcomeSurvPEM", definition = function(object) { cat("Outcome object with class", class(object)[1], "\n\n") cat("Outcome variables:\n") diff --git a/R/prepare_stan_data_inputs.R b/R/prepare_stan_data_inputs.R index 0eab972a..778351ad 100644 --- a/R/prepare_stan_data_inputs.R +++ b/R/prepare_stan_data_inputs.R @@ -41,7 +41,6 @@ setGeneric( #' @return Named list of data inputs with covariates and weights added. #' @noRd add_covariates_and_weights <- function(data_in, analysis_obj, data_matrix) { - ## Covariate additions if (!is.null(analysis_obj@covariates)) { data_in[["K"]] <- NROW(analysis_obj@covariates@covariates) @@ -84,6 +83,31 @@ setMethod( } ) +### Fixed power prior borrowing ---- +setMethod( + f = "prepare_stan_data_inputs", + signature = c("OutcomeSurvExponentialWeibull", "BorrowingFixedPowerPrior", "ANY"), + definition = function(outcome, borrowing, analysis_obj) { + data_matrix <- trim_data_matrix(analysis_obj) + + assert_number(borrowing@power_par, lower = 0, upper = 1) + power <- ifelse(data_matrix[, borrowing@ext_flag_col] == 1, borrowing@power_par, 1) + + data_in <- list( + N = nrow(data_matrix), + trt = data_matrix[, analysis_obj@treatment@trt_flag_col], + time = data_matrix[, outcome@time_var], + cens = data_matrix[, outcome@cens_var], + power = power + ) + + # Add covariates and weights + data_in <- add_covariates_and_weights(data_in, analysis_obj, data_matrix) + + return(data_in) + } +) + ### Hierarchical commensurate prior borrowing ---- setMethod( f = "prepare_stan_data_inputs", @@ -115,7 +139,6 @@ setMethod( f = "prepare_stan_data_inputs", signature = c("OutcomeSurvPEM", "BorrowingNoneFull", "ANY"), definition = function(outcome, borrowing, analysis_obj) { - analysis_obj@data_matrix <- trim_data_matrix(analysis_obj) analysis_obj <- cast_mat_to_long_pem(analysis_obj) data_matrix <- analysis_obj@data_matrix @@ -142,12 +165,46 @@ setMethod( } ) +### Fixed power prior borrowing ---- +setMethod( + f = "prepare_stan_data_inputs", + signature = c("OutcomeSurvPEM", "BorrowingFixedPowerPrior", "ANY"), + definition = function(outcome, borrowing, analysis_obj) { + analysis_obj@data_matrix <- trim_data_matrix(analysis_obj) + analysis_obj <- cast_mat_to_long_pem(analysis_obj) + data_matrix <- analysis_obj@data_matrix + + assert_number(borrowing@power_par, lower = 0, upper = 1) + power <- ifelse(data_matrix[, borrowing@ext_flag_col] == 1, borrowing@power_par, 1) + + n_periods <- analysis_obj@outcome@n_periods + Z <- matrix(0, nrow = nrow(data_matrix), ncol = n_periods) + for (i in seq_len(nrow(data_matrix))) { + period <- data_matrix[i, "__period__"] + Z[i, period] <- 1 + } + data_in <- list( + N = nrow(data_matrix), + trt = data_matrix[, analysis_obj@treatment@trt_flag_col], + time = data_matrix[, outcome@time_var], + cens = data_matrix[, outcome@cens_var], + N_periods = max(data_matrix[, "__period__"]), + Z = Z, + power = power + ) + + # Add covariates and weights + data_in <- add_covariates_and_weights(data_in, analysis_obj, data_matrix) + + return(data_in) + } +) + ### Hierarchical Commensurate Borrowing ---- setMethod( f = "prepare_stan_data_inputs", signature = c("OutcomeSurvPEM", "BorrowingHierarchicalCommensurate", "ANY"), definition = function(outcome, borrowing, analysis_obj) { - analysis_obj <- cast_mat_to_long_pem(analysis_obj) data_matrix <- analysis_obj@data_matrix @@ -197,6 +254,30 @@ setMethod( } ) +### Fixed power prior borrowing ---- +setMethod( + f = "prepare_stan_data_inputs", + signature = c("OutcomeBinaryLogistic", "BorrowingFixedPowerPrior", "ANY"), + definition = function(outcome, borrowing, analysis_obj) { + data_matrix <- trim_data_matrix(analysis_obj) + + assert_number(borrowing@power_par, lower = 0, upper = 1) + power <- ifelse(data_matrix[, borrowing@ext_flag_col] == 1, borrowing@power_par, 1) + + data_in <- list( + N = nrow(data_matrix), + trt = data_matrix[, analysis_obj@treatment@trt_flag_col], + y = data_matrix[, outcome@binary_var], + power = power + ) + + # Add covariates and weights + data_in <- add_covariates_and_weights(data_in, analysis_obj, data_matrix) + + return(data_in) + } +) + ### Hierarchical commensurate prior borrowing ---- setMethod( f = "prepare_stan_data_inputs", @@ -241,6 +322,30 @@ setMethod( } ) +### Fixed power prior borrowing ---- +setMethod( + f = "prepare_stan_data_inputs", + signature = c("OutcomeContinuousNormal", "BorrowingFixedPowerPrior", "ANY"), + definition = function(outcome, borrowing, analysis_obj) { + data_matrix <- trim_data_matrix(analysis_obj) + + assert_number(borrowing@power_par, lower = 0, upper = 1) + power <- ifelse(data_matrix[, borrowing@ext_flag_col] == 1, borrowing@power_par, 1) + + data_in <- list( + N = nrow(data_matrix), + trt = data_matrix[, analysis_obj@treatment@trt_flag_col], + y = data_matrix[, outcome@continuous_var], + power = power + ) + + # Add covariates and weights + data_in <- add_covariates_and_weights(data_in, analysis_obj, data_matrix) + + return(data_in) + } +) + ### Hierarchical commensurate prior borrowing ---- setMethod( f = "prepare_stan_data_inputs", diff --git a/inst/WORDLIST b/inst/WORDLIST index 3dde3d00..c05ff956 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -15,6 +15,7 @@ biopharmaceutical bootswatch Bove cauchy +cens ci CJ CMD @@ -30,8 +31,10 @@ costructor cov dataLayer desc +dev doi eg +elp Enas estimands et @@ -70,8 +73,16 @@ Kexin Khanal Kinnersley knitr +lccdf +lcdf Lifecycle Lindborg +linpred +lp +lpdf +lprob +lupdf +lupmf Mandrekar Manoj MatchIt @@ -87,6 +98,7 @@ operatorname ORCID parametrized patsubst +ph Pharmaceut poisson propto @@ -113,6 +125,7 @@ sourcenotes src stan Statist +stdev survial tabset tbody @@ -126,6 +139,7 @@ UA vgwzvpznzo Viele Walkthrough +weibull wpfbyktkdu www Yichen diff --git a/inst/stan/bin/logit_fpp.stan b/inst/stan/bin/logit_fpp.stan new file mode 100644 index 00000000..70dce24c --- /dev/null +++ b/inst/stan/bin/logit_fpp.stan @@ -0,0 +1,45 @@ +// Logistic regression +// Fixed power prior + +data { + + int N; // number of observations + vector[N] trt; // treatment indicator + array[N] int y; // outcome + vector[N] power; // power parameter + + {{ weights.data }} + {{ cov.data }} + +} + +parameters { + + real beta_trt; // treatment effect + real alpha; // baseline log-odds + + {{ cov.parameters }} + +} + +transformed parameters { + + real OR_trt = exp(beta_trt); + +} + +model { + + vector[N] lp; + + {{ trt.prior }} + {{ cov.priors }} + {{ baseline.prior }} + + lp = alpha + trt * beta_trt {{ cov.linpred }} ; + + for (i in 1:N) { + target += bernoulli_logit_lupmf(y[i] | lp[i]) * power[i] {{ weights.likelihood }}; + } + +} diff --git a/inst/stan/cont/gauss_fpp.stan b/inst/stan/cont/gauss_fpp.stan new file mode 100644 index 00000000..648e7d84 --- /dev/null +++ b/inst/stan/cont/gauss_fpp.stan @@ -0,0 +1,40 @@ +// Continuous normal +// Fixed power prior + +data { + + int N; // number of observations + vector[N] trt; // treatment indicator + array[N] real y; // outcome + vector[N] power; // power parameter + + {{ weights.data }} + {{ cov.data }} + +} + +parameters { + + real beta_trt; // treatment effect + real alpha; // baseline mean + real std_dev_outcome; // standard deviation of outcome + + {{ cov.parameters }} + +} + +model { + vector[N] lp; + + {{ trt.prior }} + {{ cov.priors }} + {{ baseline.prior }} + {{ stdev.prior }} + + lp = alpha + trt * beta_trt {{ cov.linpred }} ; + + for (i in 1:N) { + target += normal_lupdf(y[i] | lp[i], std_dev_outcome) * power[i] {{ weights.likelihood }}; + } + +} diff --git a/inst/stan/surv/exp_fpp.stan b/inst/stan/surv/exp_fpp.stan new file mode 100644 index 00000000..320ec6ef --- /dev/null +++ b/inst/stan/surv/exp_fpp.stan @@ -0,0 +1,52 @@ +// Exponential survival model +// Fixed power prior + +data { + + int N; // number of observations + vector[N] trt; // treatment indicator + vector[N] time; // survival time + vector[N] cens; // censoring indicator + vector[N] power; // power parameter + + {{ weights.data }} + {{ cov.data }} + +} + +parameters { + + real beta_trt; // treatment effect + real alpha; // baseline hazard + + {{ cov.parameters }} + +} + +transformed parameters { + + real HR_trt = exp(beta_trt); + +} + +model { + + vector[N] lp; + vector[N] elp; + + {{ trt.prior }} + {{ cov.priors }} + {{ baseline.prior }} + + lp = alpha + trt * beta_trt {{ cov.linpred }} ; + elp = exp(lp); + + for (i in 1:N) { + if (cens[i] == 1) { + target += exponential_lccdf(time[i] | elp[i]) * power[i] {{ weights.likelihood }}; + } else { + target += exponential_lpdf(time[i] | elp[i]) * power[i] {{ weights.likelihood }}; + } + } + +} diff --git a/inst/stan/surv/pem_fpp.stan b/inst/stan/surv/pem_fpp.stan new file mode 100644 index 00000000..9fd41129 --- /dev/null +++ b/inst/stan/surv/pem_fpp.stan @@ -0,0 +1,58 @@ +// Piecewise exponential survival model +// Fixed power prior + +data { + + int N; // number of observation-periods + vector[N] trt; // treatment indicator + vector[N] time; // survival time + vector[N] cens; // censoring indicator + int N_periods; // number of periods + matrix[N, N_periods] Z; // period indicators + vector[N] power; // power parameter + + {{ weights.data }} + {{ cov.data }} + +} + +parameters { + + real beta_trt; // treatment effect + vector[N_periods] alpha; // baseline hazard + + {{ cov.parameters }} + +} + +transformed parameters { + + real HR_trt = exp(beta_trt); + +} + +model { + + vector[N] lp; + vector[N] elp; + real sigma; + + {{ trt.prior }} + {{ cov.priors }} + + for (i in 1:N_periods) { + alpha[i] ~ {{ baseline.prior }}; + } + + lp = Z * alpha + trt * beta_trt {{ cov.linpred }} ; + elp = exp(lp); + + for (i in 1:N) { + if (cens[i] == 1) { + target += exponential_lccdf(time[i] | elp[i]) * power[i] {{ weights.likelihood }}; + } else { + target += exponential_lpdf(time[i] | elp[i]) * power[i] {{ weights.likelihood }}; + } + } + +} diff --git a/inst/stan/surv/weib_ph_fpp.stan b/inst/stan/surv/weib_ph_fpp.stan new file mode 100644 index 00000000..0e96cb40 --- /dev/null +++ b/inst/stan/surv/weib_ph_fpp.stan @@ -0,0 +1,73 @@ +// Weibull proportional hazards survival model +// Fixed power prior + +functions { + + real weibull_ph_lpdf(real y, real alpha, real lambda) { + real lprob = log(alpha) + log(lambda) + (alpha - 1) * log(y) - lambda * (y^alpha); + return lprob; + } + + real weibull_ph_lcdf(real y, real alpha, real lambda) { + real lprob = log(1 - exp(-lambda * y^alpha)); + return lprob; + } + + real weibull_ph_lccdf(real y, real alpha, real lambda) { + real lprob = -lambda * y^alpha; + return lprob; + } + +} + +data { + + int N; // number of observations + vector[N] trt; // treatment indicator + vector[N] time; // survival time + vector[N] cens; // censoring indicator + vector[N] power; // power parameter + + {{ weights.data }} + {{ cov.data }} + +} + +parameters { + + real beta_trt; // treatment effect + real alpha; // baseline hazard + real shape_weibull; // weibull shape parameter + + {{ cov.parameters }} + +} + +transformed parameters { + + real HR_trt = exp(beta_trt); + +} + +model { + + vector[N] lp; + vector[N] elp; + + {{ trt.prior }} + {{ cov.priors }} + {{ baseline.prior }} + {{ shape.prior }} + + lp = alpha + trt * beta_trt {{ cov.linpred }} ; + elp = exp(lp); + + for (i in 1:N) { + if (cens[i] == 1) { + target += weibull_ph_lccdf(time[i] | shape_weibull, elp[i]) * power[i] {{ weights.likelihood }}; + } else { + target += weibull_ph_lpdf(time[i] | shape_weibull, elp[i]) * power[i] {{ weights.likelihood }}; + } + } + +} diff --git a/man/Borrowing-class.Rd b/man/Borrowing-class.Rd index 3b1ec53c..a850ac95 100644 --- a/man/Borrowing-class.Rd +++ b/man/Borrowing-class.Rd @@ -21,6 +21,7 @@ A class for defining borrowing details. Objects of class Prior constructor functions: \code{\link[=borrowing_full]{borrowing_full()}}, \code{\link[=borrowing_hierarchical_commensurate]{borrowing_hierarchical_commensurate()}}, \code{\link[=borrowing_none]{borrowing_none()}} Other borrowing classes: +\code{\link{BorrowingFixedPowerPrior-class}}, \code{\link{BorrowingFull-class}}, \code{\link{BorrowingHierarchicalCommensurate-class}}, \code{\link{BorrowingNone-class}} diff --git a/man/BorrowingFixedPowerPrior-class.Rd b/man/BorrowingFixedPowerPrior-class.Rd new file mode 100644 index 00000000..5f7eb327 --- /dev/null +++ b/man/BorrowingFixedPowerPrior-class.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/borrowing_fixed_power_prior.R +\docType{class} +\name{BorrowingFixedPowerPrior-class} +\alias{BorrowingFixedPowerPrior-class} +\alias{.borrowing_fixed_power_prior} +\title{\code{BorrowingFixedPowerPrior} class} +\description{ +\code{BorrowingFixedPowerPrior} class +} +\section{Slots}{ + +\describe{ +\item{\code{method_name}}{string. The name of the method.} + +\item{\code{ext_flag_col}}{character. Name of the external flag column in the matrix.} + +\item{\code{powers}}{numeric. The fixed values to be used as the powers in the power prior.} +}} + +\seealso{ +Other borrowing classes: +\code{\link{Borrowing-class}}, +\code{\link{BorrowingFull-class}}, +\code{\link{BorrowingHierarchicalCommensurate-class}}, +\code{\link{BorrowingNone-class}} +} +\concept{borrowing classes} diff --git a/man/BorrowingFull-class.Rd b/man/BorrowingFull-class.Rd index 309ab99c..aa68fc70 100644 --- a/man/BorrowingFull-class.Rd +++ b/man/BorrowingFull-class.Rd @@ -24,6 +24,7 @@ should not be created directly but by the constructor \seealso{ Other borrowing classes: \code{\link{Borrowing-class}}, +\code{\link{BorrowingFixedPowerPrior-class}}, \code{\link{BorrowingHierarchicalCommensurate-class}}, \code{\link{BorrowingNone-class}} } diff --git a/man/BorrowingHierarchicalCommensurate-class.Rd b/man/BorrowingHierarchicalCommensurate-class.Rd index 5187dba1..e9e0ab6b 100644 --- a/man/BorrowingHierarchicalCommensurate-class.Rd +++ b/man/BorrowingHierarchicalCommensurate-class.Rd @@ -25,6 +25,7 @@ should not be created directly but by the constructor \seealso{ Other borrowing classes: \code{\link{Borrowing-class}}, +\code{\link{BorrowingFixedPowerPrior-class}}, \code{\link{BorrowingFull-class}}, \code{\link{BorrowingNone-class}} } diff --git a/man/BorrowingNone-class.Rd b/man/BorrowingNone-class.Rd index 6ca73db4..9c11cb76 100644 --- a/man/BorrowingNone-class.Rd +++ b/man/BorrowingNone-class.Rd @@ -22,6 +22,7 @@ should not be created directly but by the constructor \seealso{ Other borrowing classes: \code{\link{Borrowing-class}}, +\code{\link{BorrowingFixedPowerPrior-class}}, \code{\link{BorrowingFull-class}}, \code{\link{BorrowingHierarchicalCommensurate-class}} } diff --git a/man/borrowing_fixed_power_prior.Rd b/man/borrowing_fixed_power_prior.Rd new file mode 100644 index 00000000..3155da5b --- /dev/null +++ b/man/borrowing_fixed_power_prior.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/borrowing_fixed_power_prior.R +\name{borrowing_fixed_power_prior} +\alias{borrowing_fixed_power_prior} +\title{Fixed Power Prior Borrowing} +\usage{ +borrowing_fixed_power_prior(ext_flag_col, power_par) +} +\arguments{ +\item{ext_flag_col}{character. Name of the external flag column in the matrix.} + +\item{power_par}{numeric. Fixed power parameter for all external data. Must be between 0 and 1.} +} +\value{ +Object of class \code{\link[=BorrowingFixedPowerPrior-class]{BorrowingFixedPowerPrior}}. +} +\description{ +Fixed Power Prior Borrowing +} +\examples{ +borrowing_fixed_power_prior( + ext_flag_col = "ext", + power_par = 0.5 +) +} diff --git a/man/get_vars.Rd b/man/get_vars.Rd index aeaf4373..bc1aa9fc 100644 --- a/man/get_vars.Rd +++ b/man/get_vars.Rd @@ -1,9 +1,9 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/generics.R, R/covariate_class.R, % R/treatment_class.R, R/borrowing_class.R, R/outcome_class.R, -% R/analysis_class.R, R/sim_treatment_list.R, R/sim_outcome_list.R, -% R/sim_borrowing_list.R, R/sim_covariate_list.R, R/simulation_class.R, -% R/simulate_data_baseline.R +% R/analysis_class.R, R/borrowing_fixed_power_prior.R, R/sim_treatment_list.R, +% R/sim_outcome_list.R, R/sim_borrowing_list.R, R/sim_covariate_list.R, +% R/simulation_class.R, R/simulate_data_baseline.R \name{get_vars} \alias{get_vars} \alias{get_vars,Covariates-method} @@ -14,6 +14,7 @@ \alias{get_vars,ContinuousOutcome-method} \alias{get_vars,Analysis-method} \alias{get_vars,NULL-method} +\alias{get_vars,BorrowingFixedPowerPrior-method} \alias{get_vars,SimTreatmentList-method} \alias{get_vars,SimOutcomeList-method} \alias{get_vars,SimBorrowingList-method} @@ -40,6 +41,8 @@ get_vars(object) \S4method{get_vars}{NULL}(object) +\S4method{get_vars}{BorrowingFixedPowerPrior}(object) + \S4method{get_vars}{SimTreatmentList}(object) \S4method{get_vars}{SimOutcomeList}(object) diff --git a/man/trim_cols.Rd b/man/trim_cols.Rd index 5345369c..660bcf3b 100644 --- a/man/trim_cols.Rd +++ b/man/trim_cols.Rd @@ -1,9 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/generics.R, R/borrowing_class.R, -% R/borrowing_hierarchical_commensurate.R +% R/borrowing_fixed_power_prior.R, R/borrowing_hierarchical_commensurate.R \name{trim_cols} \alias{trim_cols} \alias{trim_cols,Borrowing-method} +\alias{trim_cols,BorrowingFixedPowerPrior-method} \alias{trim_cols,BorrowingHierarchicalCommensurate-method} \title{Trim columns from Data Matrix Based on Borrowing object type} \usage{ @@ -11,6 +12,8 @@ trim_cols(borrowing_object, analysis_object) \S4method{trim_cols}{Borrowing}(borrowing_object, analysis_object) +\S4method{trim_cols}{BorrowingFixedPowerPrior}(borrowing_object, analysis_object) + \S4method{trim_cols}{BorrowingHierarchicalCommensurate}(borrowing_object, analysis_object) } \arguments{ diff --git a/tests/testthat/_snaps/outcome_surv_exponential.md b/tests/testthat/_snaps/outcome_surv_exponential.md new file mode 100644 index 00000000..540010a6 --- /dev/null +++ b/tests/testthat/_snaps/outcome_surv_exponential.md @@ -0,0 +1,19 @@ +# show works as expected for OutcomeSurvExponential + + Code + show(result) + Output + Outcome object with class OutcomeSurvExponential + + Outcome variables: + time_var cens_var weight_var + "time" "cens" "w" + + Baseline prior: + Normal Distribution + Parameters: + Stan R Value + mu mean 0 + sigma sd 1000 + + diff --git a/tests/testthat/_snaps/outcome_surv_pem.md b/tests/testthat/_snaps/outcome_surv_pem.md new file mode 100644 index 00000000..0b10e8e6 --- /dev/null +++ b/tests/testthat/_snaps/outcome_surv_pem.md @@ -0,0 +1,20 @@ +# show works as expected for OutcomeSurvPem + + Code + show(result) + Output + Outcome object with class OutcomeSurvPEM + + Outcome variables: + time_var cens_var + "time" "cens" + + Baseline prior: + Normal Distribution + Parameters: + Stan R Value + mu mean 0 + sigma sd 1000 + + Cut points: 10, 15, 30 + diff --git a/tests/testthat/test-borrowing_fixed_power_prior.R b/tests/testthat/test-borrowing_fixed_power_prior.R new file mode 100644 index 00000000..4b41060c --- /dev/null +++ b/tests/testthat/test-borrowing_fixed_power_prior.R @@ -0,0 +1,14 @@ +test_that("borrowing_fixed_power_prior works as expected", { + fpp <- borrowing_fixed_power_prior("ext", 0.33) + expect_class(fpp, "Borrowing") + expect_class(fpp, "BorrowingFixedPowerPrior") + expect_equal(fpp@ext_flag_col, "ext") + expect_equal(fpp@power_par, 0.33) +}) + +test_that("get_vars works for BorrowingFixedPowerPrior", { + expect_identical( + get_vars(borrowing_fixed_power_prior("ext_fl", 0.2)), + c(ext_flag_col = "ext_fl") + ) +}) diff --git a/tests/testthat/test-create_analysis_obj.R b/tests/testthat/test-create_analysis_obj.R index 19a003c8..d1b25ab6 100644 --- a/tests/testthat/test-create_analysis_obj.R +++ b/tests/testthat/test-create_analysis_obj.R @@ -196,6 +196,19 @@ test_that("Columns in analysis_obj should be in matrix", { ), "The following specified variables were not found in `data_matrix`:\n ext_flag_col: tira" ) + + expect_no_error( + create_analysis_obj( + data_matrix = example_matrix, + covariates = ac, + outcome = esd, + treatment = td, + borrowing = borrowing_fixed_power_prior( + ext_flag_col = "ext", + power_par = 0.3 + ) + ) + ) }) diff --git a/tests/testthat/test-mcmc_sample-analysis.R b/tests/testthat/test-mcmc_sample-analysis.R index ee33a29c..b034d0fc 100644 --- a/tests/testthat/test-mcmc_sample-analysis.R +++ b/tests/testthat/test-mcmc_sample-analysis.R @@ -468,7 +468,7 @@ test_that("mcmc_sample for Analysis works for normal with full borrowing", { outcome <- outcome_cont_normal( continuous_var = "outcome", baseline_prior = prior_normal(0, 100), - std_dev_prior=prior_half_cauchy(1, 5) + std_dev_prior = prior_half_cauchy(1, 5) ) borrowing <- borrowing_full( ext_flag_col = "ext" @@ -479,7 +479,7 @@ test_that("mcmc_sample for Analysis works for normal with full borrowing", { ) anls_obj <- create_analysis_obj( data_matrix = cbind(example_matrix, outcome = outcome_col), - outcome = outcome, + outcome = outcome, borrowing = borrowing, treatment = treatment, quiet = FALSE @@ -509,7 +509,7 @@ test_that("mcmc_sample for Analysis works for normal with BDB", { outcome <- outcome_cont_normal( continuous_var = "outcome", baseline_prior = prior_normal(0, 100), - std_dev_prior=prior_half_cauchy(1, 5) + std_dev_prior = prior_half_cauchy(1, 5) ) borrowing <- borrowing_hierarchical_commensurate( ext_flag_col = "ext", @@ -521,7 +521,7 @@ test_that("mcmc_sample for Analysis works for normal with BDB", { ) anls_obj <- create_analysis_obj( data_matrix = cbind(example_matrix, outcome = outcome_col), - outcome = outcome, + outcome = outcome, borrowing = borrowing, treatment = treatment, quiet = FALSE @@ -602,7 +602,7 @@ test_that("mcmc_sample for Analysis works for full borrowing, piecewise exponent skip_on_cran() skip_on_ci() library(eha) - cuts = c(1, 5, 10) + cuts <- c(1, 5, 10) pem_eha <- eha::pchreg(survival::Surv(time, status) ~ trt + cov1 + cov2, data = as.data.frame(psborrow2::example_matrix), cuts = c(0, cuts, 1000)) full_pem_bayes_ao <- create_analysis_obj( @@ -619,30 +619,29 @@ test_that("mcmc_sample for Analysis works for full borrowing, piecewise exponent chains = 2 ) expect_r6(full_pem_bayes, "CmdStanMCMC") - expect_equal(full_pem_bayes$summary("beta_trt")[[2]], pem_eha$coefficients[['trt']], tolerance = 0.05) - expect_equal(full_pem_bayes$summary("beta[1]")[[2]], pem_eha$coefficients[['cov1']], tolerance = 0.05) - expect_equal(full_pem_bayes$summary("beta[2]")[[2]], pem_eha$coefficients[['cov2']], tolerance = 0.05) + expect_equal(full_pem_bayes$summary("beta_trt")[[2]], pem_eha$coefficients[["trt"]], tolerance = 0.05) + expect_equal(full_pem_bayes$summary("beta[1]")[[2]], pem_eha$coefficients[["cov1"]], tolerance = 0.05) + expect_equal(full_pem_bayes$summary("beta[2]")[[2]], pem_eha$coefficients[["cov2"]], tolerance = 0.05) # Check that the cut points are the same expect_equal(full_pem_bayes$summary("alpha[1]")[[2]], log(pem_eha$hazards[1]), tolerance = 0.05) expect_equal(full_pem_bayes$summary("alpha[2]")[[2]], log(pem_eha$hazards[2]), tolerance = 0.05) expect_equal(full_pem_bayes$summary("alpha[3]")[[2]], log(pem_eha$hazards[3]), tolerance = 0.05) expect_equal(full_pem_bayes$summary("alpha[4]")[[2]], log(pem_eha$hazards[4]), tolerance = 0.05) - }) # Piecewise exponential, BDB ---- test_that("mcmc_sample for Analysis works for BDB, piecewise exponential dist", { skip_on_cran() skip_on_ci() - cuts = c(1, 5, 10) + cuts <- c(1, 5, 10) # Make commensurate matrix - internal_as_external <- example_matrix[example_matrix[, 'ext'] == 0 & example_matrix[,'trt'] == 0,] - internal_as_external[, 'ext'] <- 1 - internal_as_external[, 'id'] <- seq(10000, 10000 + nrow(internal_as_external) - 1) + internal_as_external <- example_matrix[example_matrix[, "ext"] == 0 & example_matrix[, "trt"] == 0, ] + internal_as_external[, "ext"] <- 1 + internal_as_external[, "id"] <- seq(10000, 10000 + nrow(internal_as_external) - 1) commensurate_matrix <- rbind( - example_matrix[example_matrix[,'ext'] == 0,], + example_matrix[example_matrix[, "ext"] == 0, ], internal_as_external ) @@ -711,5 +710,215 @@ test_that("mcmc_sample for Analysis works for BDB, piecewise exponential dist", expect_true(tau_commens_aggr > tau_incommens_aggr) expect_true(tau_commens_aggr > tau_commens_conserv) expect_true(tau_incommens_aggr > tau_incommens_conserv) +}) + +# Fixed Power Prior ------ +test_that("mcmc_sample for Analysis works for fixed power prior borrowing, exponential dist", { + skip_on_cran() + skip_on_ci() + power <- 1 - 0.3 * example_matrix[, "ext"] + + exp_c1 <- exp(coef(flexsurv::flexsurvreg( + survival::Surv(time, 1 - cnsr) ~ trt + cov1, + data = as.data.frame(example_matrix), + dist = "exponential", + weights = power + ))[["trt"]]) + + + fpp_exp_bayes_ao <- create_analysis_obj( + data_matrix = example_matrix, + outcome = outcome_surv_exponential("time", "cnsr", prior_normal(0, 100000)), + borrowing = borrowing_fixed_power_prior( + ext_flag_col = "ext", + power_par = 0.7 + ), + treatment = treatment_details("trt", prior_normal(0, 100000)) + ) + + fpp_exp_bayes <- mcmc_sample( + fpp_exp_bayes_ao, + iter_warmup = 2000, + iter_sampling = 2000, + chains = 1 + ) + expect_r6(fpp_exp_bayes, "CmdStanMCMC") + expect_equal( + fpp_exp_bayes$summary("HR_trt", "median")[[2]], + exp_c1, + tolerance = .05 + ) +}) + +test_that("mcmc_sample for Analysis works for fixed power prior, exponential dist, one covariate", { + skip_on_cran() + skip_on_ci() + power <- 1 - 0.3 * example_matrix[, "ext"] + fpp_exp_c1 <- exp(coef(flexsurv::flexsurvreg( + survival::Surv(time, 1 - cnsr) ~ trt + cov1, + data = as.data.frame(example_matrix), + dist = "exponential", + weights = power + ))[["trt"]]) + + fpp_exp_bayes_c1_ao <- create_analysis_obj( + data_matrix = example_matrix, + covariates = add_covariates("cov1", prior_normal(0, 100000)), + outcome = outcome_surv_exponential("time", "cnsr", prior_normal(0, 100000)), + borrowing = borrowing_fixed_power_prior( + ext_flag_col = "ext", + power_par = 0.7 + ), + treatment = treatment_details("trt", prior_normal(0, 100000)) + ) + + fpp_exp_bayes_c1 <- mcmc_sample( + fpp_exp_bayes_c1_ao, + iter_warmup = 2000, + iter_sampling = 2000, + chains = 1 + ) + expect_r6(fpp_exp_bayes_c1, "CmdStanMCMC") + expect_equal( + fpp_exp_bayes_c1$summary("HR_trt", "median")[[2]], + fpp_exp_c1, + tolerance = .05 + ) +}) + +test_that("mcmc_sample for Analysis works for fixed power prior borrowing, normal dist", { + set.seed(123) + power <- 1 - 0.5 * example_matrix[, "ext"] + outcome_col <- 5 + example_matrix[, "trt"] + example_matrix[, "cov1"] + 2 * example_matrix[, "cov2"] + + 0.5 * example_matrix[, "cov3"] - 1.5 * example_matrix[, "cov4"] + rnorm(500, 0, 1) + + outcome <- outcome_cont_normal( + continuous_var = "outcome", + baseline_prior = prior_normal(0, 100), + std_dev_prior = prior_half_cauchy(1, 5) + ) + borrowing <- borrowing_fixed_power_prior( + ext_flag_col = "ext", + power_par = 0.5 + ) + treatment <- treatment_details( + trt_flag_col = "trt", + trt_prior = prior_normal(0, 1000) + ) + anls_obj <- create_analysis_obj( + data_matrix = cbind(example_matrix, outcome = outcome_col), + outcome = outcome, + borrowing = borrowing, + treatment = treatment, + quiet = FALSE + ) + result <- mcmc_sample( + anls_obj, + iter_warmup = 2000, + iter_sampling = 2000, + chains = 1 + ) + + result_summary <- result$summary(c("alpha", "beta_trt")) + expect_equal(result_summary[["median"]], c(6.44, 0.72), tolerance = .05) + expect_equal(result_summary[["q5"]], c(6.25, 0.38), tolerance = .05) + expect_equal(result_summary[["q95"]], c(6.62, 1.06), tolerance = .05) +}) + + +test_that("mcmc_sample for Analysis works for fixed power prior borrowing, logistic dist", { + skip_on_cran() + skip_on_ci() + set.seed(123) + power <- 1 - 0.5 * example_matrix[, "ext"] + fpp_bin_bayes_ao <- create_analysis_obj( + data_matrix = example_matrix, + outcome = outcome_bin_logistic("resp", prior_normal(0, 100000)), + borrowing = borrowing_fixed_power_prior("ext", power_par = 0.5), + treatment = treatment_details("trt", prior_normal(0, 100000)) + ) + + fpp_bin_bayes <- mcmc_sample( + fpp_bin_bayes_ao, + iter_warmup = 2000, + iter_sampling = 2000, + chains = 1 + ) + expect_r6(fpp_bin_bayes, "CmdStanMCMC") + expect_equal( + fpp_bin_bayes$summary("OR_trt", "median")[[2]], + 1.59, + tolerance = .05 + ) +}) + +test_that("mcmc_sample for Analysis works for fixed power prior borrowing, PEM dist", { + skip_on_cran() + skip_on_ci() + cuts <- c(1, 5, 10) + set.seed(1234) + internal_as_external <- example_matrix[example_matrix[, "ext"] == 0 & example_matrix[, "trt"] == 0, ] + internal_as_external[, "ext"] <- 1 + internal_as_external[, "id"] <- seq(10000, 10000 + nrow(internal_as_external) - 1) + data_matrix <- rbind( + example_matrix[example_matrix[, "ext"] == 0, ], + internal_as_external + ) + power <- 1 - 0.5 * data_matrix[, "ext"] + ## Conservative commensurate + fpp_pem_ao <- create_analysis_obj( + data_matrix = data_matrix, + outcome = outcome_surv_pem("time", "cnsr", prior_normal(0, 100000), cut_points = cuts), + borrowing = borrowing_fixed_power_prior("ext", 0.5), + treatment = treatment_details("trt", prior_normal(0, 100000)) + ) + + fpp_pem <- mcmc_sample( + fpp_pem_ao, + iter_warmup = 2000, + iter_sampling = 5000, + chains = 2 + ) + beta_trt <- fpp_pem$summary("beta_trt")[["median"]] + expect_equal(beta_trt, -0.11, tolerance = 0.05) +}) + +test_that("mcmc_sample for Analysis works for fixed power prior borrowing, Weibull dist", { + skip_on_cran() + skip_on_ci() + + data_matrix <- cbind(example_matrix, power = 1 - 0.5 * example_matrix[, "ext"]) + + fpp_weib <- exp(coef(flexsurv::flexsurvreg( + survival::Surv(time, 1 - cnsr) ~ trt, + data = as.data.frame(data_matrix), + dist = "weibullPH", + weights = power + ))[["trt"]]) + + fpp_weib_bayes_ao <- create_analysis_obj( + data_matrix = data_matrix, + outcome = outcome_surv_weibull_ph( + "time", + "cnsr", + prior_normal(0, 100000), + prior_normal(0, 100000) + ), + borrowing = borrowing_fixed_power_prior("ext", 0.5), + treatment = treatment_details("trt", prior_normal(0, 100000)) + ) + + fpp_weib_bayes <- mcmc_sample( + fpp_weib_bayes_ao, + iter_warmup = 2000, + iter_sampling = 2000, + chains = 1 + ) + expect_r6(fpp_weib_bayes, "CmdStanMCMC") + expect_equal( + fpp_weib_bayes$summary("HR_trt", "median")[[2]], + fpp_weib, + tolerance = .05 + ) }) diff --git a/tests/testthat/test-outcome_surv_exponential.R b/tests/testthat/test-outcome_surv_exponential.R index e791e986..d85ac277 100644 --- a/tests/testthat/test-outcome_surv_exponential.R +++ b/tests/testthat/test-outcome_surv_exponential.R @@ -58,3 +58,13 @@ test_that("exp_surv_dist() throws error", { regexp = "deprecated" ) }) + +test_that("show works as expected for OutcomeSurvExponential", { + result <- outcome_surv_exponential( + time_var = "time", + cens_var = "cens", + prior_normal(0, 1000), + weight_var = "w" + ) + expect_snapshot(show(result)) +}) diff --git a/tests/testthat/test-outcome_surv_pem.R b/tests/testthat/test-outcome_surv_pem.R index 0f90c5d9..0af88a36 100644 --- a/tests/testthat/test-outcome_surv_pem.R +++ b/tests/testthat/test-outcome_surv_pem.R @@ -40,3 +40,13 @@ test_that("get_vars works for OutcomeSurvPEM", { c(time_var = "TIME", cens_var = "CENS", weight_var = "W") ) }) + +test_that("show works as expected for OutcomeSurvPem", { + result <- outcome_surv_pem( + time_var = "time", + cens_var = "cens", + baseline_prior = prior_normal(0, 1000), + cut_points = c(10, 15, 30) + ) + expect_snapshot(show(result)) +}) diff --git a/tests/testthat/test-prepare_stan_data_inputs.R b/tests/testthat/test-prepare_stan_data_inputs.R index 6b21c4c9..a44d60c3 100644 --- a/tests/testthat/test-prepare_stan_data_inputs.R +++ b/tests/testthat/test-prepare_stan_data_inputs.R @@ -150,3 +150,28 @@ test_that("prepare_stan_data_inputs works with PEM", { expect_equal(dim(result[["Z1"]])[2], result[["N_periods"]]) }) + + +test_that("prepare_stan_data_inputs works with a fixed power prior", { + + object <- psborrow2:::.analysis_obj( + data_matrix = example_matrix, + outcome = outcome_surv_exponential( + "time", + "cnsr", + prior_normal(0, 1000) + ), + borrowing = borrowing_fixed_power_prior( + "ext", + 0.8 + ), + treatment = treatment_details("trt", prior_normal(0, 1000)) + ) + + result <- psborrow2:::prepare_stan_data_inputs(object@outcome, object@borrowing, object) + power <- result$power + + expect_atomic_vector(power) + expect_equal(sum(power==1), sum(1-example_matrix[,'ext'])) + +}) diff --git a/tests/testthat/test-trim_data_matrix.R b/tests/testthat/test-trim_data_matrix.R index 7770dadc..0930ba10 100644 --- a/tests/testthat/test-trim_data_matrix.R +++ b/tests/testthat/test-trim_data_matrix.R @@ -49,3 +49,23 @@ test_that("data matrix trimming works with No Borrowing", { colnames(result), c("time", "cnsr", "trt", "cov1") ) }) + + +test_that("data matrix trimming works with Fixed Power Prior Borrowing", { + object <- psborrow2:::.analysis_obj( + data_matrix = example_matrix, + outcome = outcome_surv_exponential("time", "cnsr", prior_normal(0, 1000)), + treatment = treatment_details("trt", trt_prior = prior_normal(0, 1000)), + covariates = add_covariates("cov1", prior_normal(0, 1000)), + borrowing = borrowing_fixed_power_prior( + ext_flag_col = "ext", + power_par = 0.75 + ) + ) + + result <- psborrow2:::trim_data_matrix(object) + expect_matrix(result, mode = "numeric", nrows = 500, ncols = 5) + expect_set_equal( + colnames(result), c("time", "cnsr", "ext", "trt", "cov1") + ) +}) diff --git a/vignettes/data_simulation.Rmd b/vignettes/data_simulation.Rmd index 64f434e8..b35ec3a0 100644 --- a/vignettes/data_simulation.Rmd +++ b/vignettes/data_simulation.Rmd @@ -12,7 +12,7 @@ vignette: > This vignette will show the internal functions for generating data that can be used in simulation studies. -```r +``` r library(psborrow2) library(dplyr) ``` @@ -23,7 +23,7 @@ First we define how to generate baseline data for our study. In its simplest for patients are in the arms: treated internal, control internal, control external. -```r +``` r simple_baseline <- create_baseline_object( n_trt_int = 100, n_ctrl_int = 50, @@ -35,7 +35,7 @@ This object defines how the data will be created. To actually generate a sample, list containing `data.frame`s for each arm. We can convert this to a single `data.frame` if needed. -```r +``` r baseline_data <- generate(simple_baseline) baseline_data #> Baseline Data List @@ -81,7 +81,7 @@ for the internal and external arms. The internal arms are assumed to be randomiz distribution. If no external parameters are specified, the internal ones are used for both. -```r +``` r with_age <- create_baseline_object( n_trt_int = 100, n_ctrl_int = 50, @@ -126,7 +126,7 @@ In a more complex setting, we can generate correlated baseline covariates by spe the main diagonal (variance components) and the upper triangle (covariance components). -```r +``` r covariance_matrix(diag = c(5, 1.2), upper_tri = c(0.1)) #> [,1] [,2] #> [1,] 5.0 0.1 @@ -148,7 +148,7 @@ data_age_score <- generate(with_age_score) ``` -```r +``` r df_age_score <- as.data.frame(data_age_score) plot( x = df_age_score$age, @@ -171,7 +171,7 @@ legend( Finally, we have the option to transform non-normal covariates, such as with binary cut-offs. These can be added to existing objects and can use built-in functions or used defined. -```r +``` r with_age_score <- with_age_score %>% set_transformations( score_high = binary_cutoff("score", int_cutoff = 0.7, ext_cutoff = 0.7), @@ -209,13 +209,13 @@ The `create_event_dist()` function take the parameters needed to define the dist exponential distribution by specifying a single `lambda` parameter. We could also use Weibull or Gompertz distributions, mixtures of distributions, or even specify an arbitrary (log/cumulative) hazard function. -```r +``` r exponential_dist <- create_event_dist(dist = "exponential", lambda = 1 / 24) ``` The minimum required parameters to create a data simulation object are a baseline object and an event distribution. -```r +``` r create_data_simulation( baseline = create_baseline_object(n_trt_int = 100, n_ctrl_int = 50, n_ctrl_ext = 100), event_dist = exponential_dist @@ -229,7 +229,7 @@ We can expand by including a baseline object with covariates and so we need coef hazard calculation. Further we can include coefficient for the treatment parameter (`treatment_hr`) and for the difference between internal and external arms (`drift_hr`). -```r +``` r data_sim <- create_data_simulation( baseline = with_age_score, coefficients = c(age = 0.001, score_high = 1.5), @@ -249,19 +249,19 @@ data_list We can peek at the data with `get_data()`: -```r +``` r get_data(data_list, index = 1, dataset = 1) %>% head() #> patid age score_high trt ext eventtime status enrollment cens #> 1 1 53.25887 0 1 0 163.587989 1 1 0 #> 2 2 55.64217 0 1 0 6.884799 1 1 0 -#> [ reached getOption("max.print") -- omitted 4 rows ] +#> [ reached 'max' / getOption("max.print") -- omitted 4 rows ] ``` We can control how datasets are generated with the `n`, `treatment_hr` and `drift_hr` parameters of `generate()`. These will override the defaults specified in `create_data_simulation()`. -```r +``` r generate(data_sim, n = 10, treatment_hr = c(1, 1.3, 2), drift_hr = c(1, 1.2)) #> SimDataList object with 6 different scenarios #> sim_id treatment_hr drift_hr n_datasets_per_param @@ -285,7 +285,7 @@ in periods 1, 2, 3, 4, and then 2 patients each in 5, 6, 7, 8, 9. New methods can be specified by defining a function that takes a parameter `n`, the number of patients to generate enrollment times for. -```r +``` r poisson_enrollment <- custom_enrollment( fun = function(n) rpois(n, lambda = 5), label = "Poisson enrollment distribution" @@ -344,7 +344,7 @@ Drop out is defined using the same `create_event_dist()` function as for the sur specified for these distributions, but we can set different distributions for the three arms. -```r +``` r set_dropout( data_sim, internal_treated = create_event_dist(dist = "exponential", lambdas = 1 / 50), @@ -409,7 +409,7 @@ Different rules are available such as: If a patient has survival time longer than these rules their status will be set to `0` and `eventtime` set to the cut off time. -```r +``` r set_cut_off( data_sim, internal = cut_off_after_first(time = 36), @@ -468,7 +468,7 @@ set_cut_off( Putting all that together: -```r +``` r my_baseline <- create_baseline_object( n_trt_int = 100, n_ctrl_int = 50, @@ -508,7 +508,7 @@ my_data_list <- generate(my_data_sim_setup, n = 3, treatment_hr = c(1, 1.3), dri Now we can define the simulation setup for the models: -```r +``` r borrowing <- sim_borrowing_list(list(full = borrowing_full("ext"))) outcome <- sim_outcome_list( list(standard_outcome = outcome_surv_exponential( @@ -533,13 +533,12 @@ sim_obj <- create_simulation_obj( treatment = treatment, covariate = covariate ) -#> Error in create_simulation_obj(data_matrix_list = my_data_list, outcome = outcome, : Missing data detected in >1 matrix in `data_matrix_list`. Could be one of the following columns: 'age', 'score_high', 'ext', 'trt', 'eventtime', 'cens'. Error found in simulation_obj@data_matrix_list@data_list[[1]][[1]] ``` And finally sample from these models. See the vignette "4. Conduct a simulation study" for more details. -```r +``` r sim_results <- mcmc_sample(sim_obj, chains = 1) get_results(sim_results) ``` @@ -556,7 +555,7 @@ It is possible to include a fixed external dataset and additionally simulate ext to a non-zero value. This will simulate external patients in addition to the fixed data. -```r +``` r historical_trial_data <- data.frame( age = rnorm(40, 60, 5), score_high = rbinom(40, 1, 0.7), @@ -668,7 +667,7 @@ external_data_list <- generate(my_data_sim_setup_with_fixed, n = 3, treatment_hr Simulation data objects can be combined so that a simulation can be run once. -```r +``` r combined_data_list <- c(my_data_list, external_data_list) combined_data_list #> SimDataList object with 6 different scenarios diff --git a/vignettes/dataset.Rmd b/vignettes/dataset.Rmd index 29440b52..e870d509 100644 --- a/vignettes/dataset.Rmd +++ b/vignettes/dataset.Rmd @@ -30,7 +30,7 @@ and the external program. More information can be found in the The short version is: -```r +``` r # Install the cmdstanr package install.packages("cmdstanr", repos = c("https://stan-dev.r-universe.dev", getOption("repos"))) library(cmdstanr) @@ -42,10 +42,9 @@ install_cmdstan(cores = 2) Now you're ready to start with `psborrow2`. -```r +``` r library(psborrow2) library(cmdstanr) -# Warning: package 'cmdstanr' was built under R version 4.3.3 ``` # Creating an analysis object {.tabset} @@ -110,7 +109,7 @@ you can easily create a suitable matrix with the `psborrow2` helper function #### Creating a data matrix with `create_data_matrix()` -```r +``` r # Start with data.frame diabetic_df <- survival::diabetic @@ -143,7 +142,7 @@ head(diabetes_matrix) Let's look at the first few rows of the example matrix: -```r +``` r head(example_matrix) # id ext trt cov4 cov3 cov2 cov1 time status cnsr resp # [1,] 1 0 0 1 1 1 0 2.4226411 1 0 1 @@ -186,7 +185,7 @@ For our example, let's conduct a time-to-event analysis using the exponential distribution. -```r +``` r outcome <- outcome_surv_exponential( time_var = "time", cens_var = "cnsr", @@ -238,7 +237,7 @@ in this example. vignette, see `vignette('prior_distributions', package = 'psborrow2')`. -```r +``` r borrowing <- borrowing_hierarchical_commensurate( ext_flag_col = "ext", tau_prior = prior_gamma(0.001, 0.001) @@ -265,7 +264,7 @@ need to specify the prior on the effect estimate, `trt_prior`. We'll use another uninformative normal distribution for the prior on the treatment effect: -```r +``` r treatment <- treatment_details( trt_flag_col = "trt", trt_prior = prior_normal(0, 1000) @@ -291,7 +290,7 @@ let's create an analysis object: -```r +``` r anls_obj <- create_analysis_obj( data_matrix = example_matrix, outcome = outcome, @@ -311,55 +310,65 @@ you can use the `get_stan_code()` function to see the full Stan code that will be compiled. -```r +``` r get_stan_code(anls_obj) +# // Exponential survival model +# // Hierarchical commensurate prior # -# functions { -# -# } +# data { # -# data { -# int N; -# vector[N] trt; -# vector[N] time; -# vector[N] cens; +# int N; // number of observations +# vector[N] trt; // treatment indicator +# vector[N] time; // survival time +# vector[N] cens; // censoring indicator +# matrix[N,2] Z; // external flag indicators # -# matrix[N,2] Z; # +# +# # } # -# parameters { -# real beta_trt; +# parameters { # -# real tau; -# vector[2] alpha; +# real beta_trt; // treatment effect +# vector[2] alpha; // baseline hazard +# real tau; // precision on dynamic borrowing +# +# # # } # -# transformed parameters { +# transformed parameters { +# # real HR_trt = exp(beta_trt); +# # } # -# model { +# model { +# # vector[N] lp; # vector[N] elp; -# beta_trt ~ normal(0, 1000); -# lp = Z * alpha + trt * beta_trt; -# elp = exp(lp) ; +# real sigma; # +# beta_trt ~ normal(0, 1000) ; # +# alpha[2] ~ normal(0, 1000) ; # tau ~ gamma(0.001, 0.001) ; -# real sigma; -# sigma = 1 / tau; -# alpha[2] ~ normal(0, 1000) ; -# alpha[1] ~ normal(alpha[2], sqrt(sigma)) ; +# +# sigma = 1 / tau; +# alpha[1] ~ normal(alpha[2], sqrt(sigma)); +# +# lp = Z * alpha + trt * beta_trt ; +# elp = exp(lp); +# # for (i in 1:N) { -# if (cens[i] == 1) { -# target += exponential_lccdf(time[i] | elp[i] ); -# } else { -# target += exponential_lpdf(time[i] | elp[i] ); -# } -# } +# if (cens[i] == 1) { +# target += exponential_lccdf(time[i] | elp[i]) ; +# } else { +# target += exponential_lpdf(time[i] | elp[i]) ; +# } +# } +# # } ``` @@ -375,7 +384,7 @@ Note that running this may take a few minutes. -```r +``` r results <- mcmc_sample(anls_obj, iter_warmup = 2000, iter_sampling = 50000, @@ -395,17 +404,17 @@ For instance, we can see a see a summary of the posterior distribution samples with `results$summary()`: -```r +``` r results$summary() # # A tibble: 6 × 10 # variable mean median sd mad q5 q95 rhat ess_bulk # -# 1 lp__ -1618. -1618. 1.49 1.29 -1621. -1.62e+3 1.00 73570. -# 2 beta_trt -0.156 -0.158 0.198 0.198 -0.479 1.74e-1 1.00 87479. -# 3 tau 1.21 0.507 1.93 0.695 0.00448 4.76e+0 1.00 87300. -# 4 alpha[1] -3.36 -3.35 0.161 0.160 -3.63 -3.10e+0 1.00 87253. -# 5 alpha[2] -2.40 -2.40 0.0557 0.0556 -2.49 -2.31e+0 1.00 127633. -# 6 HR_trt 0.873 0.854 0.176 0.168 0.620 1.19e+0 1.00 87479. +# 1 lp__ -1618. -1618. 1.49 1.29 -1621. -1.62e+3 1.00 73444. +# 2 beta_trt -0.158 -0.159 0.198 0.197 -0.478 1.71e-1 1.00 86730. +# 3 alpha[1] -3.36 -3.35 0.161 0.160 -3.63 -3.10e+0 1.00 85882. +# 4 alpha[2] -2.40 -2.40 0.0557 0.0557 -2.49 -2.31e+0 1.00 129722. +# 5 tau 1.22 0.502 1.95 0.688 0.00433 4.79e+0 1.00 83471. +# 6 HR_trt 0.871 0.853 0.175 0.167 0.620 1.19e+0 1.00 86730. # # ℹ 1 more variable: ess_tail ``` @@ -415,7 +424,7 @@ from the Stan model refer to, `psborrow2` has a function which returns a variable dictionary from the analysis object to help interpret these parameters: -```r +``` r variable_dictionary(anls_obj) # Stan_variable Description # 1 tau commensurability parameter @@ -431,7 +440,7 @@ common in many MCMC sampling software packages and allow us to leverage packages such as `posterior` and `bayesplot`. -```r +``` r draws <- results$draws() print(draws) # # A draws_array: 50000 iterations, 4 chains, and 6 variables @@ -439,41 +448,41 @@ print(draws) # # chain # iteration 1 2 3 4 -# 1 -1620 -1617 -1617 -1618 -# 2 -1616 -1617 -1619 -1618 -# 3 -1619 -1618 -1618 -1617 -# 4 -1619 -1617 -1619 -1617 -# 5 -1620 -1617 -1618 -1617 +# 1 -1618 -1619 -1616 -1621 +# 2 -1620 -1620 -1623 -1621 +# 3 -1620 -1620 -1619 -1621 +# 4 -1618 -1620 -1618 -1619 +# 5 -1617 -1620 -1616 -1621 # # , , variable = beta_trt # # chain -# iteration 1 2 3 4 -# 1 0.297 0.037 -0.227 -0.164 -# 2 -0.200 -0.148 -0.312 -0.290 -# 3 -0.506 -0.472 0.033 -0.207 -# 4 0.043 -0.454 0.042 -0.126 -# 5 0.114 0.014 -0.151 -0.033 +# iteration 1 2 3 4 +# 1 -0.22 -0.41 -0.21 -0.64 +# 2 -0.18 -0.46 -0.67 -0.68 +# 3 -0.13 -0.44 -0.56 0.52 +# 4 -0.27 -0.36 -0.37 0.23 +# 5 -0.44 -0.38 -0.18 0.04 # -# , , variable = tau +# , , variable = alpha[1] # # chain -# iteration 1 2 3 4 -# 1 0.0136 0.45 0.083 0.607 -# 2 1.8391 0.41 0.012 0.041 -# 3 0.1793 2.85 0.310 0.079 -# 4 0.0056 1.55 0.088 0.043 -# 5 0.0017 0.90 0.020 0.113 +# iteration 1 2 3 4 +# 1 -3.4 -3.0 -3.3 -3.0 +# 2 -3.3 -3.1 -2.9 -3.0 +# 3 -3.4 -3.0 -3.0 -3.9 +# 4 -3.3 -3.2 -3.3 -3.6 +# 5 -3.2 -3.2 -3.4 -3.4 # -# , , variable = alpha[1] +# , , variable = alpha[2] # # chain # iteration 1 2 3 4 -# 1 -3.7 -3.6 -3.3 -3.2 -# 2 -3.4 -3.3 -3.2 -3.4 -# 3 -3.2 -3.2 -3.5 -3.4 -# 4 -3.4 -3.1 -3.5 -3.3 -# 5 -3.5 -3.5 -3.4 -3.5 +# 1 -2.5 -2.3 -2.4 -2.4 +# 2 -2.5 -2.3 -2.5 -2.4 +# 3 -2.5 -2.3 -2.4 -2.4 +# 4 -2.4 -2.3 -2.3 -2.3 +# 5 -2.4 -2.3 -2.4 -2.4 # # # ... with 49995 more iterations, and 2 more variables ``` @@ -484,18 +493,18 @@ uses the `variable_dictionary` labels. Let's use it here to make the results easier to interpret: -```r +``` r draws <- rename_draws_covariates(draws, anls_obj) summary(draws) # # A tibble: 6 × 10 # variable mean median sd mad q5 q95 rhat ess_bulk # -# 1 lp__ -1.62e+3 -1.62e+3 1.49 1.29 -1.62e+3 -1.62e+3 1.00 73570. -# 2 treatment lo… -1.56e-1 -1.58e-1 0.198 0.198 -4.79e-1 1.74e-1 1.00 87479. -# 3 commensurabi… 1.21e+0 5.07e-1 1.93 0.695 4.48e-3 4.76e+0 1.00 87300. -# 4 baseline log… -3.36e+0 -3.35e+0 0.161 0.160 -3.63e+0 -3.10e+0 1.00 87253. -# 5 baseline log… -2.40e+0 -2.40e+0 0.0557 0.0556 -2.49e+0 -2.31e+0 1.00 127633. -# 6 treatment HR 8.73e-1 8.54e-1 0.176 0.168 6.20e-1 1.19e+0 1.00 87479. +# 1 lp__ -1.62e+3 -1.62e+3 1.49 1.29 -1.62e+3 -1.62e+3 1.00 73444. +# 2 treatment lo… -1.58e-1 -1.59e-1 0.198 0.197 -4.78e-1 1.71e-1 1.00 86730. +# 3 baseline log… -3.36e+0 -3.35e+0 0.161 0.160 -3.63e+0 -3.10e+0 1.00 85882. +# 4 baseline log… -2.40e+0 -2.40e+0 0.0557 0.0557 -2.49e+0 -2.31e+0 1.00 129722. +# 5 commensurabi… 1.22e+0 5.02e-1 1.95 0.688 4.33e-3 4.79e+0 1.00 83471. +# 6 treatment HR 8.71e-1 8.53e-1 0.175 0.167 6.20e-1 1.19e+0 1.00 86730. # # ℹ 1 more variable: ess_tail ``` @@ -508,7 +517,7 @@ function `mcmc_hist()`. Let's do that for the Hazard ratio for the treatment effect and for our commensurability parameter, tau. -```r +``` r bayesplot::mcmc_hist(draws, c("treatment HR", "commensurability parameter")) ``` @@ -521,7 +530,7 @@ We can see other plots that help us understand and diagnose problems with the MCMC sampler, such as trace and rank plots: -```r +``` r bayesplot::color_scheme_set("mix-blue-pink") bayesplot::mcmc_trace( @@ -547,29 +556,29 @@ can use the `summarize_draws()` function to derive 80% credible intervals for all parameters: -```r +``` r library(posterior) summarize_draws(draws, ~ quantile(.x, probs = c(0.1, 0.9))) # # A tibble: 6 × 3 # variable `10%` `90%` # # 1 lp__ -1620. -1616. -# 2 treatment log HR -0.408 0.0998 -# 3 commensurability parameter 0.0178 3.22 -# 4 baseline log hazard rate, internal -3.57 -3.16 -# 5 baseline log hazard rate, external -2.47 -2.33 -# 6 treatment HR 0.665 1.10 +# 2 treatment log HR -0.409 0.0966 +# 3 baseline log hazard rate, internal -3.56 -3.15 +# 4 baseline log hazard rate, external -2.47 -2.33 +# 5 commensurability parameter 0.0177 3.25 +# 6 treatment HR 0.664 1.10 ``` Another useful application of the `posterior` package is the evaluation of the Monte Carlo standard error for quantiles of a variable of interest: -```r +``` r vm <- extract_variable_matrix(draws, "treatment HR") mcse_quantile(x = vm, probs = c(0.1, 0.5, 0.9)) # mcse_q10 mcse_q50 mcse_q90 -# 0.0006160 0.0006435 0.0012350 +# 0.0006515 0.0006575 0.0012550 ``` `posterior` contains many other helpful functions, as outlined in their diff --git a/vignettes/figure-dataset-mcmc-hist-1.png b/vignettes/figure-dataset-mcmc-hist-1.png index 4fd445dc..ff7dd688 100644 Binary files a/vignettes/figure-dataset-mcmc-hist-1.png and b/vignettes/figure-dataset-mcmc-hist-1.png differ diff --git a/vignettes/figure-dataset-mcmc-trace-1.png b/vignettes/figure-dataset-mcmc-trace-1.png index bab1b68d..adae99c3 100644 Binary files a/vignettes/figure-dataset-mcmc-trace-1.png and b/vignettes/figure-dataset-mcmc-trace-1.png differ diff --git a/vignettes/figure-match_weight_01_methods-unnamed-chunk-28-1.png b/vignettes/figure-match_weight_01_methods-unnamed-chunk-28-1.png index 29337a94..464be35d 100644 Binary files a/vignettes/figure-match_weight_01_methods-unnamed-chunk-28-1.png and b/vignettes/figure-match_weight_01_methods-unnamed-chunk-28-1.png differ diff --git a/vignettes/figure-match_weight_01_methods-unnamed-chunk-61-1.png b/vignettes/figure-match_weight_01_methods-unnamed-chunk-61-1.png index a231ba7f..89cad4e6 100644 Binary files a/vignettes/figure-match_weight_01_methods-unnamed-chunk-61-1.png and b/vignettes/figure-match_weight_01_methods-unnamed-chunk-61-1.png differ diff --git a/vignettes/figure-match_weight_01_methods-unnamed-chunk-62-1.png b/vignettes/figure-match_weight_01_methods-unnamed-chunk-62-1.png index 06dea64e..5df9cc8b 100644 Binary files a/vignettes/figure-match_weight_01_methods-unnamed-chunk-62-1.png and b/vignettes/figure-match_weight_01_methods-unnamed-chunk-62-1.png differ diff --git a/vignettes/figure-match_weight_01_methods-unnamed-chunk-67-1.png b/vignettes/figure-match_weight_01_methods-unnamed-chunk-67-1.png index 5f6b7bcf..3179a28c 100644 Binary files a/vignettes/figure-match_weight_01_methods-unnamed-chunk-67-1.png and b/vignettes/figure-match_weight_01_methods-unnamed-chunk-67-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-10-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-10-1.png index 55799d1b..855e1349 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-10-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-10-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-10-2.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-10-2.png index c5425528..d4df7b7b 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-10-2.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-10-2.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-12-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-12-1.png index cd94e52d..6a8a315b 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-12-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-12-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-19-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-19-1.png index 7c385d81..05a4ed09 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-19-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-19-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-19-2.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-19-2.png index f23a5d96..977dae80 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-19-2.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-19-2.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-21-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-21-1.png index 648f21d9..8a244ff5 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-21-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-21-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-31-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-31-1.png index a5281225..70988854 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-31-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-31-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-32-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-32-1.png index 6952374a..f57d1079 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-32-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-32-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-32-2.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-32-2.png index b866266b..bbeae47b 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-32-2.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-32-2.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-42-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-42-1.png index f968128d..6fcfc670 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-42-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-42-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-43-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-43-1.png index 8b31ab59..38b0ee75 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-43-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-43-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-1.png index 8d9f6c14..2d12cfa8 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-2.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-2.png index adc7261c..c789aa34 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-2.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-2.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-3.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-3.png index cbee82cb..bb5297a0 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-3.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-3.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-4.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-4.png index 46eadc3d..47ac790b 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-44-4.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-44-4.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-51-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-51-1.png index b338e6b2..c7a0a386 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-51-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-51-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-53-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-53-1.png index bff4c539..b7ba9a0a 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-53-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-53-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-60-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-60-1.png index 28791073..1a60823b 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-60-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-60-1.png differ diff --git a/vignettes/figure-match_weight_02_application-unnamed-chunk-62-1.png b/vignettes/figure-match_weight_02_application-unnamed-chunk-62-1.png index 19536f06..ecf8694d 100644 Binary files a/vignettes/figure-match_weight_02_application-unnamed-chunk-62-1.png and b/vignettes/figure-match_weight_02_application-unnamed-chunk-62-1.png differ diff --git a/vignettes/figure-propensity_scores-balplot-1.png b/vignettes/figure-propensity_scores-balplot-1.png index d4908c3d..2e495398 100644 Binary files a/vignettes/figure-propensity_scores-balplot-1.png and b/vignettes/figure-propensity_scores-balplot-1.png differ diff --git a/vignettes/figure-propensity_scores-loveplot-1.png b/vignettes/figure-propensity_scores-loveplot-1.png index 65bb2e1d..114bd95d 100644 Binary files a/vignettes/figure-propensity_scores-loveplot-1.png and b/vignettes/figure-propensity_scores-loveplot-1.png differ diff --git a/vignettes/figure-propensity_scores-matchit_jitter-1.png b/vignettes/figure-propensity_scores-matchit_jitter-1.png index 1fc19123..a97d49ed 100644 Binary files a/vignettes/figure-propensity_scores-matchit_jitter-1.png and b/vignettes/figure-propensity_scores-matchit_jitter-1.png differ diff --git a/vignettes/figure-propensity_scores-plots-1.png b/vignettes/figure-propensity_scores-plots-1.png index 5e236a03..1c176de3 100644 Binary files a/vignettes/figure-propensity_scores-plots-1.png and b/vignettes/figure-propensity_scores-plots-1.png differ diff --git a/vignettes/figure-simple_overview-unnamed-chunk-18-1.png b/vignettes/figure-simple_overview-unnamed-chunk-18-1.png index 55c4db6b..e902d84a 100644 Binary files a/vignettes/figure-simple_overview-unnamed-chunk-18-1.png and b/vignettes/figure-simple_overview-unnamed-chunk-18-1.png differ diff --git a/vignettes/figure-simulation_study-unnamed-chunk-23-1.png b/vignettes/figure-simulation_study-unnamed-chunk-23-1.png index de9a950d..3e55d9ff 100644 Binary files a/vignettes/figure-simulation_study-unnamed-chunk-23-1.png and b/vignettes/figure-simulation_study-unnamed-chunk-23-1.png differ diff --git a/vignettes/figure-simulation_study-unnamed-chunk-24-1.png b/vignettes/figure-simulation_study-unnamed-chunk-24-1.png index e3c8ce31..1382ff45 100644 Binary files a/vignettes/figure-simulation_study-unnamed-chunk-24-1.png and b/vignettes/figure-simulation_study-unnamed-chunk-24-1.png differ diff --git a/vignettes/figure-simulation_study-unnamed-chunk-25-1.png b/vignettes/figure-simulation_study-unnamed-chunk-25-1.png index 57c7dd5e..bcb9327b 100644 Binary files a/vignettes/figure-simulation_study-unnamed-chunk-25-1.png and b/vignettes/figure-simulation_study-unnamed-chunk-25-1.png differ diff --git a/vignettes/figure-simulation_study-unnamed-chunk-26-1.png b/vignettes/figure-simulation_study-unnamed-chunk-26-1.png index cbd37026..bac60e8e 100644 Binary files a/vignettes/figure-simulation_study-unnamed-chunk-26-1.png and b/vignettes/figure-simulation_study-unnamed-chunk-26-1.png differ diff --git a/vignettes/figure-simulation_study-unnamed-chunk-27-1.png b/vignettes/figure-simulation_study-unnamed-chunk-27-1.png index 378c2e1b..e4251ceb 100644 Binary files a/vignettes/figure-simulation_study-unnamed-chunk-27-1.png and b/vignettes/figure-simulation_study-unnamed-chunk-27-1.png differ diff --git a/vignettes/figure-weighting-exp_plot-1.png b/vignettes/figure-weighting-exp_plot-1.png index f957181d..dd43f5a4 100644 Binary files a/vignettes/figure-weighting-exp_plot-1.png and b/vignettes/figure-weighting-exp_plot-1.png differ diff --git a/vignettes/figure-weighting-logistic_plot-1.png b/vignettes/figure-weighting-logistic_plot-1.png index 53544092..71456b7e 100644 Binary files a/vignettes/figure-weighting-logistic_plot-1.png and b/vignettes/figure-weighting-logistic_plot-1.png differ diff --git a/vignettes/match_weight_01_methods.Rmd b/vignettes/match_weight_01_methods.Rmd index 6ae83128..a393f241 100644 --- a/vignettes/match_weight_01_methods.Rmd +++ b/vignettes/match_weight_01_methods.Rmd @@ -4,7 +4,7 @@ bibliography: references.bib author: - Manoj Khanal - Eli Lilly & Company -date: "2025-02-11" +date: "2025-05-07" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{1. Matching and Weighting Methods} @@ -19,7 +19,7 @@ Using external control in the analysis of clinical trial data can be challenging We first load all of the libraries used in this tutorial. -```r +``` r #Loading the libraries library(SimMultiCorrData) library(ebal) @@ -69,7 +69,7 @@ Other variables in the simulated data include To generate the covariates, we first specify the sample sizes, number of continuous and categorical variables, the marginal moments of the covariates, and the correlation matrix. Note that rcorrvar2 requires the correlation matrix to be ordered as ordinal, continuous, Poisson, and Negative Binomial. -```r +``` r #Simulate correlated covariates n_t <- 600 #Sample size in trial data n_ec <- 500 #Sample size in external control data @@ -87,7 +87,7 @@ skurts_tc <- rep(0.1,5) # Simulating trial data After specifying the moments of the covariate distribution, we simulate the covariates using `rcorrvar2` function from `SimMultiCorrData` package. -```r +``` r #Simulating covariates trial.data <- rcorrvar2(n = n_t, k_cont = k_cont, k_cat = k_cat, k_nb = 0, method = "Fleishman", seed=1, @@ -106,13 +106,13 @@ trial.data <- rcorrvar2(n = n_t, k_cont = k_cont, k_cat = k_cat, k_nb = 0, #> Constants: Distribution 5 #> #> Constants calculation time: 0.003 minutes -#> Intercorrelation calculation time: 0.002 minutes +#> Intercorrelation calculation time: 0.001 minutes #> Error loop calculation time: 0 minutes -#> Total Simulation time: 0.005 minutes +#> Total Simulation time: 0.003 minutes ``` -```r +``` r trial.data <- data.frame(cbind(id=paste("TRIAL",1:n_t,sep=""), trial.data$continuous_variables, ifelse(trial.data$ordinal_variables==1,1,0))) colnames(trial.data) <- c("ID", "Age", "Weight","Height","Biomarker1","Biomarker2","Smoker", @@ -137,7 +137,7 @@ where $\lambda_0(t)=2$ is the baseline hazard function and assumed to be constan The survival function is given by $$S(t|\boldsymbol X, Z)=exp\{-\Lambda(t|\boldsymbol X, Z)\},$$ where $\Lambda(t|\boldsymbol X, Z)=\int_0^t\lambda(u|\boldsymbol X, Z)du$. The time to event is generated using a inverse CDF method. The censoring time is generated independently from an exponential distribution with `rate=1/4`. -```r +``` r #Simulate survival outcome using Cox proportional hazards regression model set.seed(1) u <- runif(1) @@ -152,7 +152,7 @@ time <- pmin(times,cens.time) ``` -```r +``` r #Combine trial data trial.data <- data.frame(trial.data,time,event,data="TRIAL") ``` @@ -161,39 +161,39 @@ The first 10 observations in the RCT data is shown below. |ID | Age| Weight| Height| Biomarker1| Biomarker2| Smoker| Sex| ECOG1| group| time| event|data | |:-------|--------:|--------:|--------:|----------:|----------:|------:|---:|-----:|-----:|---------:|-----:|:-----| -|TRIAL1 | 55.51419| 148.5092| 65.63899| 33.71898| 55.26178| 0| 1| 0| 1| 0.4089043| 0|TRIAL | -|TRIAL2 | 49.93326| 146.9441| 63.44883| 35.65828| 45.50928| 1| 1| 1| 1| 0.9721405| 0|TRIAL | -|TRIAL3 | 58.51588| 153.5435| 70.53193| 35.53710| 55.89283| 0| 1| 0| 0| 0.2956919| 0|TRIAL | -|TRIAL4 | 48.61226| 147.2873| 64.10615| 31.17663| 55.80523| 0| 1| 0| 1| 4.3111823| 0|TRIAL | -|TRIAL5 | 51.84943| 151.0312| 62.16361| 33.99677| 48.01981| 0| 1| 0| 0| 0.2644848| 0|TRIAL | -|TRIAL6 | 51.81743| 148.1115| 68.69766| 33.82398| 47.32652| 1| 1| 0| 0| 0.3103544| 0|TRIAL | -|TRIAL7 | 58.04648| 150.2572| 71.35831| 38.71276| 50.68694| 0| 1| 0| 1| 1.7177436| 0|TRIAL | -|TRIAL8 | 50.72777| 147.1815| 64.55223| 33.86884| 49.31299| 0| 1| 1| 1| 5.2652367| 0|TRIAL | -|TRIAL9 | 55.16073| 145.0733| 63.84238| 36.28519| 50.27354| 0| 1| 1| 1| 3.9595461| 1|TRIAL | -|TRIAL10 | 54.85184| 146.4757| 65.59890| 36.21950| 47.12495| 1| 1| 0| 1| 1.1788494| 1|TRIAL | +|TRIAL1 | 54.59830| 145.8227| 67.60183| 36.69103| 52.73474| 0| 1| 0| 1| 0.4089043| 0|TRIAL | +|TRIAL2 | 50.71835| 151.0235| 63.55137| 32.19172| 45.61235| 1| 1| 1| 1| 0.9721405| 0|TRIAL | +|TRIAL3 | 59.66273| 150.3079| 68.91083| 40.98128| 53.25314| 0| 1| 0| 0| 0.2956919| 0|TRIAL | +|TRIAL4 | 48.05746| 142.5048| 65.83782| 37.38280| 51.73119| 0| 1| 1| 1| 4.3111823| 0|TRIAL | +|TRIAL5 | 51.36553| 150.3589| 61.96020| 33.71680| 49.69150| 0| 1| 0| 0| 0.2644848| 0|TRIAL | +|TRIAL6 | 55.05345| 148.9918| 64.62284| 36.49923| 45.23599| 0| 1| 0| 0| 0.3103544| 0|TRIAL | +|TRIAL7 | 60.73487| 154.1394| 70.10825| 37.82151| 48.60835| 0| 0| 0| 1| 1.7177436| 0|TRIAL | +|TRIAL8 | 51.16550| 147.4829| 64.71295| 34.43417| 47.80836| 0| 1| 1| 1| 5.2652367| 0|TRIAL | +|TRIAL9 | 54.18983| 148.4076| 67.79324| 32.34567| 49.26449| 1| 1| 1| 1| 3.7070752| 1|TRIAL | +|TRIAL10 | 55.79565| 150.5489| 65.86911| 32.92278| 46.61912| 0| 1| 0| 1| 0.4210952| 1|TRIAL | The censoring and event rate in the RCT data is -```r +``` r table(trial.data$event)/nrow(trial.data) #> -#> 0 1 -#> 0.3883333 0.6116667 +#> 0 1 +#> 0.385 0.615 ``` The distribution of the outcome time is -```r +``` r summary(trial.data$time) #> Min. 1st Qu. Median Mean 3rd Qu. Max. -#> 0.00771 0.35048 0.89904 1.61350 2.04652 21.27313 +#> 0.00771 0.31937 0.91603 1.61943 2.00923 15.21981 ``` The summary of each of the baseline covariates and their standardized mean difference between treatment arms is shown below. -```r +``` r myVars <- c("Age", "Weight", "Height", "Biomarker1", "Biomarker2", "Smoker", "Sex", "ECOG1") @@ -204,26 +204,26 @@ tab1 <- CreateTableOne(vars = myVars, strata = "group" , data = trial.data, fact ``` -```r +``` r print(tab1,smd=TRUE) #> Stratified by group #> 0 1 p test SMD #> n 178 422 -#> Age (mean (SD)) 54.82 (4.00) 55.08 (3.83) 0.451 0.067 -#> Weight (mean (SD)) 150.18 (3.02) 149.92 (3.24) 0.357 0.084 -#> Height (mean (SD)) 65.30 (2.39) 64.88 (2.49) 0.056 0.173 -#> Biomarker1 (mean (SD)) 35.26 (2.38) 34.89 (2.16) 0.066 0.161 -#> Biomarker2 (mean (SD)) 50.21 (2.52) 49.91 (2.43) 0.181 0.119 -#> Smoker = 1 (%) 32 (18.0) 87 (20.6) 0.530 0.067 -#> Sex = 1 (%) 132 (74.2) 344 (81.5) 0.054 0.178 -#> ECOG1 = 1 (%) 44 (24.7) 139 (32.9) 0.057 0.182 +#> Age (mean (SD)) 54.89 (3.84) 55.05 (3.91) 0.651 0.041 +#> Weight (mean (SD)) 150.29 (3.12) 149.88 (3.18) 0.151 0.129 +#> Height (mean (SD)) 65.29 (2.56) 64.88 (2.40) 0.060 0.166 +#> Biomarker1 (mean (SD)) 35.28 (2.24) 34.88 (2.24) 0.048 0.177 +#> Biomarker2 (mean (SD)) 50.02 (2.52) 49.99 (2.41) 0.902 0.011 +#> Smoker = 1 (%) 34 (19.1) 87 (20.6) 0.756 0.038 +#> Sex = 1 (%) 148 (83.1) 332 (78.7) 0.254 0.114 +#> ECOG1 = 1 (%) 45 (25.3) 137 (32.5) 0.099 0.159 ``` # Simulating external control data The same set of covariates $X$ were simulated for the external control data as the RCT. The means, variances for continuous variables and proportion for categorical variables are modified for the external controls compared to the RCT according to the code below. -```r +``` r means_cont_ec <- c(55+2,150-2,65-2,35+2,50-2) #Vector of means of continuous variables vars_cont_ec <- c(14,10,5,5,5) marginal_ec = list(0.3,0.7,0.4) @@ -243,14 +243,14 @@ ext.cont.data <- rcorrvar2(n = n_ec, k_cont = k_cont, k_cat = k_cat, k_nb = 0, #> #> Constants: Distribution 5 #> -#> Constants calculation time: 0.003 minutes -#> Intercorrelation calculation time: 0.001 minutes +#> Constants calculation time: 0.001 minutes +#> Intercorrelation calculation time: 0 minutes #> Error loop calculation time: 0 minutes -#> Total Simulation time: 0.004 minutes +#> Total Simulation time: 0.002 minutes ``` -```r +``` r ext.cont.data <- data.frame(cbind(id=paste("EC",1:n_ec,sep=""), ext.cont.data$continuous_variables, ifelse(ext.cont.data$ordinal_variables==1,1,0))) colnames(ext.cont.data) <- c("ID", "Age", "Weight","Height","Biomarker1","Biomarker2","Smoker", "Sex", "ECOG1") @@ -258,7 +258,7 @@ colnames(ext.cont.data) <- c("ID", "Age", "Weight","Height","Biomarker1","Biomar The same generating mechanism used for the RCT data was used to simulate survival time for the external controls. -```r +``` r #Simulate survival outcome using Cox proportional hazards regression model set.seed(1111) u <- runif(1) @@ -276,7 +276,7 @@ ext.cont.data <- data.frame(ext.cont.data,group,time,event,data="EC") ``` The first 10 observations in the external control data is shown below. -```r +``` r knitr::kable(head(ext.cont.data, 10)) ``` @@ -284,41 +284,41 @@ knitr::kable(head(ext.cont.data, 10)) |ID | Age| Weight| Height| Biomarker1| Biomarker2| Smoker| Sex| ECOG1| group| time| event|data | |:----|--------:|--------:|--------:|----------:|----------:|------:|---:|-----:|-----:|---------:|-----:|:----| -|EC1 | 58.84399| 149.2772| 60.14479| 41.13023| 47.84936| 1| 0| 0| 0| 0.1368416| 1|EC | -|EC2 | 54.94800| 142.9350| 59.48098| 33.54165| 48.14581| 1| 1| 1| 0| 0.0316781| 0|EC | -|EC3 | 56.60197| 144.2795| 65.27178| 37.10918| 49.17472| 0| 1| 0| 0| 0.0269829| 0|EC | -|EC4 | 58.26515| 144.3181| 58.38123| 35.80290| 48.79969| 0| 1| 0| 0| 0.5024074| 0|EC | -|EC5 | 55.35845| 151.2542| 61.23105| 34.81402| 47.81858| 1| 1| 1| 0| 0.1666448| 1|EC | -|EC6 | 57.25913| 147.4564| 62.22442| 34.45358| 47.49526| 1| 1| 1| 0| 0.0889376| 0|EC | -|EC7 | 60.09491| 143.2529| 64.61370| 38.69222| 48.54340| 0| 1| 0| 0| 0.0368207| 0|EC | -|EC8 | 55.55786| 149.6638| 63.85947| 37.91138| 46.49153| 0| 1| 1| 0| 0.0307342| 1|EC | -|EC9 | 59.11727| 143.3353| 61.02850| 35.18954| 47.95672| 0| 1| 0| 0| 0.0379876| 0|EC | -|EC10 | 50.35400| 141.1806| 57.91327| 34.52789| 42.01203| 1| 1| 0| 0| 0.2193177| 1|EC | +|EC1 | 56.22299| 147.2548| 68.01663| 37.43258| 46.26655| 0| 1| 0| 0| 0.0254106| 1|EC | +|EC2 | 51.53777| 142.9052| 59.87302| 37.63385| 45.57254| 1| 1| 1| 0| 0.0275893| 1|EC | +|EC3 | 61.84501| 146.4668| 61.10423| 37.99792| 47.80536| 0| 0| 0| 0| 0.0269829| 0|EC | +|EC4 | 51.65163| 142.8451| 63.08088| 38.97152| 45.82025| 0| 0| 1| 0| 0.0021760| 1|EC | +|EC5 | 51.60718| 150.1815| 62.41377| 36.57902| 47.97870| 1| 1| 1| 0| 0.0263622| 1|EC | +|EC6 | 53.71580| 146.6556| 61.32083| 36.67672| 48.62139| 1| 1| 1| 0| 0.0417934| 1|EC | +|EC7 | 62.89995| 144.0576| 63.05230| 38.20200| 48.28190| 0| 1| 0| 0| 0.0368207| 0|EC | +|EC8 | 58.22797| 149.8744| 63.76035| 35.20858| 48.10369| 0| 1| 0| 0| 0.0380045| 0|EC | +|EC9 | 54.67203| 142.4845| 61.59342| 37.91245| 47.51907| 0| 1| 0| 0| 0.0379876| 0|EC | +|EC10 | 50.52381| 140.2529| 60.53011| 31.34492| 42.98157| 0| 1| 1| 0| 0.0441761| 1|EC | The censoring and event rate in the trial data is -```r +``` r table(ext.cont.data$event)/nrow(ext.cont.data) #> #> 0 1 -#> 0.328 0.672 +#> 0.306 0.694 ``` The distribution of the outcome time is -```r +``` r summary(ext.cont.data$time) #> Min. 1st Qu. Median Mean 3rd Qu. Max. -#> 0.0000166 0.0211811 0.0542664 0.0995427 0.1207454 1.2824912 +#> 1.655e-05 2.286e-02 5.027e-02 9.448e-02 1.115e-01 8.698e-01 ``` # Merging trial and external control data We can use `bind_rows` function to merge two datasets. Before using this function, we make sure that the column names for the same variables are consistent in the two datasets. -```r +``` r names(ext.cont.data) #> [1] "ID" "Age" "Weight" "Height" "Biomarker1" #> [6] "Biomarker2" "Smoker" "Sex" "ECOG1" "group" @@ -326,7 +326,7 @@ names(ext.cont.data) ``` -```r +``` r names(trial.data) #> [1] "ID" "Age" "Weight" "Height" "Biomarker1" #> [6] "Biomarker2" "Smoker" "Sex" "ECOG1" "group" @@ -337,13 +337,13 @@ names(trial.data) Now, we merge the two datasets. -```r +``` r final.data <- data.frame(bind_rows(trial.data,ext.cont.data)) ``` -```r +``` r knitr::kable(head(final.data, 10)) ``` @@ -351,21 +351,21 @@ knitr::kable(head(final.data, 10)) |ID | Age| Weight| Height| Biomarker1| Biomarker2| Smoker| Sex| ECOG1| group| time| event|data | |:-------|--------:|--------:|--------:|----------:|----------:|------:|---:|-----:|-----:|---------:|-----:|:-----| -|TRIAL1 | 55.51419| 148.5092| 65.63899| 33.71898| 55.26178| 0| 1| 0| 1| 0.4089043| 0|TRIAL | -|TRIAL2 | 49.93326| 146.9441| 63.44883| 35.65828| 45.50928| 1| 1| 1| 1| 0.9721405| 0|TRIAL | -|TRIAL3 | 58.51588| 153.5435| 70.53193| 35.53710| 55.89283| 0| 1| 0| 0| 0.2956919| 0|TRIAL | -|TRIAL4 | 48.61226| 147.2873| 64.10615| 31.17663| 55.80523| 0| 1| 0| 1| 4.3111823| 0|TRIAL | -|TRIAL5 | 51.84943| 151.0312| 62.16361| 33.99677| 48.01981| 0| 1| 0| 0| 0.2644848| 0|TRIAL | -|TRIAL6 | 51.81743| 148.1115| 68.69766| 33.82398| 47.32652| 1| 1| 0| 0| 0.3103544| 0|TRIAL | -|TRIAL7 | 58.04648| 150.2572| 71.35831| 38.71276| 50.68694| 0| 1| 0| 1| 1.7177436| 0|TRIAL | -|TRIAL8 | 50.72777| 147.1815| 64.55223| 33.86884| 49.31299| 0| 1| 1| 1| 5.2652367| 0|TRIAL | -|TRIAL9 | 55.16073| 145.0733| 63.84238| 36.28519| 50.27354| 0| 1| 1| 1| 3.9595461| 1|TRIAL | -|TRIAL10 | 54.85184| 146.4757| 65.59890| 36.21950| 47.12495| 1| 1| 0| 1| 1.1788494| 1|TRIAL | +|TRIAL1 | 54.59830| 145.8227| 67.60183| 36.69103| 52.73474| 0| 1| 0| 1| 0.4089043| 0|TRIAL | +|TRIAL2 | 50.71835| 151.0235| 63.55137| 32.19172| 45.61235| 1| 1| 1| 1| 0.9721405| 0|TRIAL | +|TRIAL3 | 59.66273| 150.3079| 68.91083| 40.98128| 53.25314| 0| 1| 0| 0| 0.2956919| 0|TRIAL | +|TRIAL4 | 48.05746| 142.5048| 65.83782| 37.38280| 51.73119| 0| 1| 1| 1| 4.3111823| 0|TRIAL | +|TRIAL5 | 51.36553| 150.3589| 61.96020| 33.71680| 49.69150| 0| 1| 0| 0| 0.2644848| 0|TRIAL | +|TRIAL6 | 55.05345| 148.9918| 64.62284| 36.49923| 45.23599| 0| 1| 0| 0| 0.3103544| 0|TRIAL | +|TRIAL7 | 60.73487| 154.1394| 70.10825| 37.82151| 48.60835| 0| 0| 0| 1| 1.7177436| 0|TRIAL | +|TRIAL8 | 51.16550| 147.4829| 64.71295| 34.43417| 47.80836| 0| 1| 1| 1| 5.2652367| 0|TRIAL | +|TRIAL9 | 54.18983| 148.4076| 67.79324| 32.34567| 49.26449| 1| 1| 1| 1| 3.7070752| 1|TRIAL | +|TRIAL10 | 55.79565| 150.5489| 65.86911| 32.92278| 46.61912| 0| 1| 0| 1| 0.4210952| 1|TRIAL | -```r +``` r knitr::kable(tail(final.data, 10)) ``` @@ -373,40 +373,40 @@ knitr::kable(tail(final.data, 10)) | |ID | Age| Weight| Height| Biomarker1| Biomarker2| Smoker| Sex| ECOG1| group| time| event|data | |:----|:-----|--------:|--------:|--------:|----------:|----------:|------:|---:|-----:|-----:|---------:|-----:|:----| -|1091 |EC491 | 54.63798| 142.7690| 62.33772| 31.74800| 46.58420| 1| 1| 1| 0| 0.0880112| 1|EC | -|1092 |EC492 | 58.95356| 155.2625| 62.49057| 39.68135| 53.22807| 0| 0| 0| 0| 0.0578393| 0|EC | -|1093 |EC493 | 56.71679| 142.0092| 65.47906| 37.46290| 47.76221| 1| 0| 0| 0| 0.0157931| 1|EC | -|1094 |EC494 | 54.01146| 142.2547| 61.43211| 36.80426| 46.80182| 0| 1| 1| 0| 0.0241804| 1|EC | -|1095 |EC495 | 54.97775| 147.9758| 59.52286| 38.58231| 46.74939| 1| 1| 0| 0| 0.0937779| 0|EC | -|1096 |EC496 | 51.57175| 150.7496| 61.26278| 33.03108| 45.15747| 1| 1| 0| 0| 0.1023785| 0|EC | -|1097 |EC497 | 55.43787| 145.5014| 60.75110| 37.98437| 53.90082| 0| 1| 0| 0| 0.0636704| 1|EC | -|1098 |EC498 | 58.63598| 148.5516| 62.83803| 41.17086| 48.23983| 1| 0| 0| 0| 0.0511597| 1|EC | -|1099 |EC499 | 59.18113| 150.7839| 64.16530| 32.80313| 47.07868| 1| 1| 0| 0| 0.0295196| 0|EC | -|1100 |EC500 | 57.65147| 149.4489| 63.26176| 35.64399| 47.01619| 0| 1| 0| 0| 0.0514581| 0|EC | +|1091 |EC491 | 53.18940| 143.3551| 57.69727| 35.72714| 47.69671| 1| 1| 1| 0| 0.1025154| 1|EC | +|1092 |EC492 | 56.51178| 155.0675| 66.71788| 41.77618| 48.43950| 0| 0| 0| 0| 0.0110014| 1|EC | +|1093 |EC493 | 63.14821| 143.9180| 60.76872| 36.69923| 47.32070| 0| 1| 1| 0| 0.3981900| 1|EC | +|1094 |EC494 | 57.53298| 143.4756| 61.48325| 36.09593| 45.08466| 1| 1| 1| 0| 0.1298631| 1|EC | +|1095 |EC495 | 53.64071| 146.6340| 65.04088| 35.80434| 44.99437| 0| 1| 1| 0| 0.0168014| 1|EC | +|1096 |EC496 | 49.79612| 149.9081| 60.27894| 33.37305| 47.14132| 1| 1| 1| 0| 0.0616193| 1|EC | +|1097 |EC497 | 56.79361| 147.6758| 62.65351| 42.68411| 44.67903| 0| 1| 0| 0| 0.0633529| 1|EC | +|1098 |EC498 | 59.93031| 147.8859| 66.40048| 37.26859| 47.07702| 0| 1| 1| 0| 0.0419240| 1|EC | +|1099 |EC499 | 52.88813| 148.9676| 60.61804| 36.08088| 51.92952| 0| 1| 1| 0| 0.0295196| 0|EC | +|1100 |EC500 | 55.23686| 148.4539| 62.51881| 36.02603| 49.34816| 0| 1| 1| 0| 0.0409076| 1|EC | We examine the standardized mean difference for covariates between the RCT and external control data before conducting matching/weighting. Note that `strata="data"` in the following code. -```r +``` r tab2 <- CreateTableOne(vars = myVars, strata = "data" , data = final.data, factorVars = catVars) ``` -```r +``` r print(tab2,smd=TRUE) #> Stratified by data #> EC TRIAL p test SMD #> n 500 600 -#> Age (mean (SD)) 57.00 (3.72) 55.00 (3.88) <0.001 0.526 -#> Weight (mean (SD)) 148.00 (3.15) 150.00 (3.17) <0.001 0.633 -#> Height (mean (SD)) 63.00 (2.23) 65.00 (2.47) <0.001 0.850 -#> Biomarker1 (mean (SD)) 37.00 (2.24) 35.00 (2.24) <0.001 0.894 -#> Biomarker2 (mean (SD)) 48.00 (2.23) 50.00 (2.46) <0.001 0.853 -#> Smoker = 1 (%) 145 (29.0) 119 (19.8) 0.001 0.215 -#> Sex = 1 (%) 355 (71.0) 476 (79.3) 0.002 0.194 -#> ECOG1 = 1 (%) 195 (39.0) 183 (30.5) 0.004 0.179 +#> Age (mean (SD)) 57.00 (3.75) 55.00 (3.89) <0.001 0.524 +#> Weight (mean (SD)) 148.00 (3.16) 150.00 (3.17) <0.001 0.633 +#> Height (mean (SD)) 63.00 (2.23) 65.00 (2.45) <0.001 0.854 +#> Biomarker1 (mean (SD)) 37.00 (2.23) 35.00 (2.25) <0.001 0.894 +#> Biomarker2 (mean (SD)) 48.00 (2.23) 50.00 (2.44) <0.001 0.856 +#> Smoker = 1 (%) 160 (32.0) 121 (20.2) <0.001 0.272 +#> Sex = 1 (%) 347 (69.4) 480 (80.0) <0.001 0.246 +#> ECOG1 = 1 (%) 197 (39.4) 182 (30.3) 0.002 0.191 ``` Note that the standardized mean difference for all covariates is large. Next, we will conduct matching/weighting approach to reduce difference in baseline characteristics. @@ -414,24 +414,24 @@ Note that the standardized mean difference for all covariates is large. Next, we We also examine the standardized mean difference for covariates between the treated and control patients before conducting matching/weighting. Note that `strata="group"` in the following code. -```r +``` r tab2 <- CreateTableOne(vars = myVars, strata = "group" , data = final.data, factorVars = catVars) ``` -```r +``` r print(tab2,smd=TRUE) #> Stratified by group #> 0 1 p test SMD #> n 678 422 -#> Age (mean (SD)) 56.43 (3.91) 55.08 (3.83) <0.001 0.348 -#> Weight (mean (SD)) 148.57 (3.26) 149.92 (3.24) <0.001 0.416 -#> Height (mean (SD)) 63.60 (2.49) 64.88 (2.49) <0.001 0.511 -#> Biomarker1 (mean (SD)) 36.54 (2.40) 34.89 (2.16) <0.001 0.723 -#> Biomarker2 (mean (SD)) 48.58 (2.50) 49.91 (2.43) <0.001 0.541 -#> Smoker = 1 (%) 177 (26.1) 87 (20.6) 0.045 0.130 -#> Sex = 1 (%) 487 (71.8) 344 (81.5) <0.001 0.231 -#> ECOG1 = 1 (%) 239 (35.3) 139 (32.9) 0.472 0.049 +#> Age (mean (SD)) 56.45 (3.88) 55.05 (3.91) <0.001 0.359 +#> Weight (mean (SD)) 148.60 (3.30) 149.88 (3.18) <0.001 0.395 +#> Height (mean (SD)) 63.60 (2.53) 64.88 (2.40) <0.001 0.518 +#> Biomarker1 (mean (SD)) 36.55 (2.36) 34.88 (2.24) <0.001 0.725 +#> Biomarker2 (mean (SD)) 48.53 (2.47) 49.99 (2.41) <0.001 0.599 +#> Smoker = 1 (%) 194 (28.6) 87 (20.6) 0.004 0.186 +#> Sex = 1 (%) 495 (73.0) 332 (78.7) 0.041 0.133 +#> ECOG1 = 1 (%) 242 (35.7) 137 (32.5) 0.303 0.068 ``` # Propensity scores overlap @@ -439,7 +439,7 @@ print(tab2,smd=TRUE) Before applying the matching/weighting methods, we investigate the overlapping of propensity scores. The overlapping coefficient is only $0.19$ indicating a very small overlap. -```r +``` r final.data$indicator <- ifelse(final.data$data=="TRIAL",1,0) ps.logit <- glm(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + Sex +ECOG1, data = final.data, @@ -451,7 +451,7 @@ ps_extcont <- psfit[final.data$indicator==0] ``` -```r +``` r overlap(list(ps_trial=ps_trial, ps_extcont=ps_extcont),plot=TRUE) ``` @@ -462,7 +462,7 @@ overlap(list(ps_trial=ps_trial, ps_extcont=ps_extcont),plot=TRUE) ``` #> $OV -#> [1] 0.3875297 +#> [1] 0.3808273 ``` # Matching Methods @@ -479,13 +479,13 @@ We will explore several matching methods to balance the balance the baseline cha All of the matching methods can be conducted using the `MatchIt` package. The matching is conducted between the RCT subjects and external control subjects. Hence, we introduce a variable named `indicator` in `final.data` to represent the data source indicator. -```r +``` r final.data$indicator <- ifelse(final.data$data=="TRIAL",1,0) ``` ## PSML This matching method is a variation of nearest neighbour or greedy matching that selects matches based on the difference in the logit of the propensity score, up to a certain distance (caliper) [@austin2011optimal]. We selected a caliper width of 0.2 of the standard deviation of the logit of the propensity score, where the propensity score is estimated using a logistic regression. -```r +``` r m.out.nearest.ratio1.caliper.lps <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + Sex +ECOG1, estimand="ATT",data = final.data, method="nearest",ratio=1,caliper=0.20, @@ -503,83 +503,83 @@ summary(m.out.nearest.ratio1.caliper.lps) #> #> Summary of Balance for All Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 2.0386 -1.6416 1.8769 1.0922 0.4123 -#> Age 55.0001 56.9985 -0.5151 1.0883 0.1497 -#> Weight 150.0006 147.9990 0.6306 1.0170 0.1673 -#> Height 65.0010 62.9998 0.8104 1.2222 0.2232 -#> Biomarker1 34.9998 36.9999 -0.8948 0.9984 0.2397 -#> Biomarker2 50.0003 47.9994 0.8138 1.2180 0.2254 -#> Smoker 0.1983 0.2900 -0.2299 . 0.0917 -#> Sex 0.7933 0.7100 0.2058 . 0.0833 -#> ECOG1 0.3050 0.3900 -0.1846 . 0.0850 +#> distance 2.0938 -1.7165 1.8941 1.0635 0.4152 +#> Age 55.0006 57.0000 -0.5140 1.0784 0.1494 +#> Weight 150.0000 147.9994 0.6321 1.0064 0.1722 +#> Height 65.0002 62.9993 0.8153 1.2157 0.2229 +#> Biomarker1 35.0004 36.9995 -0.8904 1.0152 0.2397 +#> Biomarker2 49.9996 47.9993 0.8183 1.2068 0.2258 +#> Smoker 0.2017 0.3200 -0.2949 . 0.1183 +#> Sex 0.8000 0.6940 0.2650 . 0.1060 +#> ECOG1 0.3033 0.3940 -0.1972 . 0.0907 #> eCDF Max -#> distance 0.6657 -#> Age 0.2277 -#> Weight 0.2493 -#> Height 0.3247 -#> Biomarker1 0.3717 -#> Biomarker2 0.3463 -#> Smoker 0.0917 -#> Sex 0.0833 -#> ECOG1 0.0850 +#> distance 0.6703 +#> Age 0.2350 +#> Weight 0.2717 +#> Height 0.3237 +#> Biomarker1 0.3640 +#> Biomarker2 0.3387 +#> Smoker 0.1183 +#> Sex 0.1060 +#> ECOG1 0.0907 #> #> Summary of Balance for Matched Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.3084 -0.0768 0.1964 1.2651 0.0495 -#> Age 55.9994 56.1273 -0.0330 1.2535 0.0200 -#> Weight 149.0622 148.8328 0.0723 0.9531 0.0202 -#> Height 63.9366 63.8937 0.0174 0.9519 0.0195 -#> Biomarker1 35.8765 36.2704 -0.1762 1.1409 0.0574 -#> Biomarker2 49.0364 48.8589 0.0722 1.0945 0.0225 -#> Smoker 0.2455 0.2277 0.0448 . 0.0179 -#> Sex 0.7857 0.7723 0.0331 . 0.0134 -#> ECOG1 0.3304 0.3348 -0.0097 . 0.0045 +#> distance 0.2951 -0.0827 0.1878 1.2711 0.0485 +#> Age 56.0086 56.0781 -0.0179 1.4496 0.0281 +#> Weight 148.8976 148.9481 -0.0159 1.0788 0.0129 +#> Height 63.9709 63.8057 0.0673 1.2080 0.0235 +#> Biomarker1 35.7594 36.0287 -0.1200 1.3080 0.0436 +#> Biomarker2 48.9661 48.6500 0.1293 1.0174 0.0335 +#> Smoker 0.2374 0.2831 -0.1138 . 0.0457 +#> Sex 0.7717 0.7671 0.0114 . 0.0046 +#> ECOG1 0.3836 0.3927 -0.0199 . 0.0091 #> eCDF Max Std. Pair Dist. -#> distance 0.1786 0.1973 -#> Age 0.0491 1.1372 -#> Weight 0.0536 1.1005 -#> Height 0.0625 0.9848 -#> Biomarker1 0.1473 1.0799 -#> Biomarker2 0.0536 0.9051 -#> Smoker 0.0179 0.9628 -#> Sex 0.0134 0.8489 -#> ECOG1 0.0045 0.9987 +#> distance 0.1872 0.1881 +#> Age 0.0776 1.0502 +#> Weight 0.0411 1.1119 +#> Height 0.0685 0.8779 +#> Biomarker1 0.0959 0.9835 +#> Biomarker2 0.0822 0.9921 +#> Smoker 0.0457 0.9332 +#> Sex 0.0046 0.9018 +#> ECOG1 0.0091 1.0728 #> #> Sample Sizes: #> Control Treated #> All 500 600 -#> Matched 224 224 -#> Unmatched 276 376 +#> Matched 219 219 +#> Unmatched 281 381 #> Discarded 0 0 ``` -```r +``` r final.data$ratio1_caliper_weights_lps = m.out.nearest.ratio1.caliper.lps$weights ``` We now compare the SMD between the two datasets. -```r +``` r svy <- svydesign(id = ~0, data=final.data,weights = ~ratio1_caliper_weights_lps) t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) ``` -```r +``` r print(t1,smd=TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 224.00 224.00 -#> Age (mean (SD)) 56.13 (3.63) 56.00 (4.06) 0.725 0.033 -#> Weight (mean (SD)) 148.83 (3.14) 149.06 (3.07) 0.434 0.074 -#> Height (mean (SD)) 63.89 (2.14) 63.94 (2.08) 0.829 0.020 -#> Biomarker1 (mean (SD)) 36.27 (2.10) 35.88 (2.25) 0.056 0.181 -#> Biomarker2 (mean (SD)) 48.86 (2.12) 49.04 (2.21) 0.385 0.082 -#> Smoker = 1 (%) 51.0 (22.8) 55.0 (24.6) 0.657 0.042 -#> Sex = 1 (%) 173.0 (77.2) 176.0 (78.6) 0.733 0.032 -#> ECOG1 = 1 (%) 75.0 (33.5) 74.0 (33.0) 0.920 0.009 +#> n 219.00 219.00 +#> Age (mean (SD)) 56.08 (3.37) 56.01 (4.06) 0.845 0.019 +#> Weight (mean (SD)) 148.95 (2.99) 148.90 (3.11) 0.863 0.017 +#> Height (mean (SD)) 63.81 (2.07) 63.97 (2.27) 0.426 0.076 +#> Biomarker1 (mean (SD)) 36.03 (1.97) 35.76 (2.26) 0.183 0.127 +#> Biomarker2 (mean (SD)) 48.65 (2.33) 48.97 (2.35) 0.157 0.135 +#> Smoker = 1 (%) 62.0 (28.3) 52.0 (23.7) 0.277 0.104 +#> Sex = 1 (%) 168.0 (76.7) 169.0 (77.2) 0.910 0.011 +#> ECOG1 = 1 (%) 86.0 (39.3) 84.0 (38.4) 0.845 0.019 ``` @@ -602,7 +602,7 @@ print(t1,smd=TRUE) This matching method is similar to PSML except the caliper width of 0.2 is based on the standard deviation of the propensity score scale [@stuart2011nonparametric]. -```r +``` r m.out.nearest.ratio1.caliper <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + Sex +ECOG1, estimand="ATT",data = final.data, method = "nearest", ratio = 1, caliper=0.2, replace=FALSE) @@ -617,84 +617,84 @@ summary(m.out.nearest.ratio1.caliper) #> #> Summary of Balance for All Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7827 0.2608 2.1517 0.8827 0.4123 -#> Age 55.0001 56.9985 -0.5151 1.0883 0.1497 -#> Weight 150.0006 147.9990 0.6306 1.0170 0.1673 -#> Height 65.0010 62.9998 0.8104 1.2222 0.2232 -#> Biomarker1 34.9998 36.9999 -0.8948 0.9984 0.2397 -#> Biomarker2 50.0003 47.9994 0.8138 1.2180 0.2254 -#> Smoker 0.1983 0.2900 -0.2299 . 0.0917 -#> Sex 0.7933 0.7100 0.2058 . 0.0833 -#> ECOG1 0.3050 0.3900 -0.1846 . 0.0850 +#> distance 0.7861 0.2566 2.1932 0.8897 0.4152 +#> Age 55.0006 57.0000 -0.5140 1.0784 0.1494 +#> Weight 150.0000 147.9994 0.6321 1.0064 0.1722 +#> Height 65.0002 62.9993 0.8153 1.2157 0.2229 +#> Biomarker1 35.0004 36.9995 -0.8904 1.0152 0.2397 +#> Biomarker2 49.9996 47.9993 0.8183 1.2068 0.2258 +#> Smoker 0.2017 0.3200 -0.2949 . 0.1183 +#> Sex 0.8000 0.6940 0.2650 . 0.1060 +#> ECOG1 0.3033 0.3940 -0.1972 . 0.0907 #> eCDF Max -#> distance 0.6657 -#> Age 0.2277 -#> Weight 0.2493 -#> Height 0.3247 -#> Biomarker1 0.3717 -#> Biomarker2 0.3463 -#> Smoker 0.0917 -#> Sex 0.0833 -#> ECOG1 0.0850 +#> distance 0.6703 +#> Age 0.2350 +#> Weight 0.2717 +#> Height 0.3237 +#> Biomarker1 0.3640 +#> Biomarker2 0.3387 +#> Smoker 0.1183 +#> Sex 0.1060 +#> ECOG1 0.0907 #> #> Summary of Balance for Matched Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.5431 0.5011 0.1731 1.2229 0.0367 -#> Age 56.0260 56.1310 -0.0271 1.1311 0.0181 -#> Weight 148.9502 148.8880 0.0196 0.9491 0.0181 -#> Height 64.1172 63.9245 0.0781 1.1794 0.0170 -#> Biomarker1 35.9585 36.1764 -0.0975 1.1896 0.0426 -#> Biomarker2 49.2512 48.9165 0.1361 1.4438 0.0344 -#> Smoker 0.2266 0.2365 -0.0247 . 0.0099 -#> Sex 0.7734 0.7734 0.0000 . 0.0000 -#> ECOG1 0.3251 0.3498 -0.0535 . 0.0246 +#> distance 0.5419 0.4959 0.1904 1.2062 0.0390 +#> Age 55.9874 56.0864 -0.0254 1.4798 0.0323 +#> Weight 149.1182 148.8877 0.0728 1.0497 0.0204 +#> Height 64.1411 63.8307 0.1265 1.4769 0.0302 +#> Biomarker1 35.8381 35.9810 -0.0636 1.2859 0.0301 +#> Biomarker2 49.0981 48.7663 0.1357 1.0359 0.0368 +#> Smoker 0.2350 0.2950 -0.1495 . 0.0600 +#> Sex 0.7400 0.7700 -0.0750 . 0.0300 +#> ECOG1 0.3400 0.3900 -0.1088 . 0.0500 #> eCDF Max Std. Pair Dist. -#> distance 0.1133 0.1743 -#> Age 0.0443 1.1398 -#> Weight 0.0640 1.0586 -#> Height 0.0443 1.0077 -#> Biomarker1 0.1182 0.9965 -#> Biomarker2 0.0739 0.9127 -#> Smoker 0.0099 0.9883 -#> Sex 0.0000 0.3448 -#> ECOG1 0.0246 0.9950 +#> distance 0.110 0.1909 +#> Age 0.080 1.0475 +#> Weight 0.060 1.0672 +#> Height 0.085 0.9194 +#> Biomarker1 0.075 0.9429 +#> Biomarker2 0.085 0.9834 +#> Smoker 0.060 0.8723 +#> Sex 0.030 0.9250 +#> ECOG1 0.050 1.0442 #> #> Sample Sizes: #> Control Treated #> All 500 600 -#> Matched 203 203 -#> Unmatched 297 397 +#> Matched 200 200 +#> Unmatched 300 400 #> Discarded 0 0 ``` -```r +``` r final.data$ratio1_caliper_weights = m.out.nearest.ratio1.caliper$weights ``` We now compare the SMD between the two datasets. -```r +``` r svy <- svydesign(id = ~0, data=final.data,weights = ~ratio1_caliper_weights) t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) ``` -```r +``` r print(t1,smd=TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 203.00 203.00 -#> Age (mean (SD)) 56.13 (3.63) 56.03 (3.86) 0.777 0.028 -#> Weight (mean (SD)) 148.89 (3.15) 148.95 (3.07) 0.840 0.020 -#> Height (mean (SD)) 63.92 (2.15) 64.12 (2.34) 0.387 0.086 -#> Biomarker1 (mean (SD)) 36.18 (2.07) 35.96 (2.26) 0.312 0.100 -#> Biomarker2 (mean (SD)) 48.92 (2.07) 49.25 (2.49) 0.141 0.146 -#> Smoker = 1 (%) 48.0 (23.6) 46.0 (22.7) 0.814 0.023 -#> Sex = 1 (%) 157.0 (77.3) 157.0 (77.3) 1.000 <0.001 -#> ECOG1 = 1 (%) 71.0 (35.0) 66.0 (32.5) 0.600 0.052 +#> n 200.00 200.00 +#> Age (mean (SD)) 56.09 (3.33) 55.99 (4.05) 0.789 0.027 +#> Weight (mean (SD)) 148.89 (2.95) 149.12 (3.02) 0.440 0.077 +#> Height (mean (SD)) 63.83 (2.08) 64.14 (2.52) 0.179 0.134 +#> Biomarker1 (mean (SD)) 35.98 (2.00) 35.84 (2.26) 0.503 0.067 +#> Biomarker2 (mean (SD)) 48.77 (2.34) 49.10 (2.38) 0.159 0.141 +#> Smoker = 1 (%) 59.0 (29.5) 47.0 (23.5) 0.174 0.136 +#> Sex = 1 (%) 154.0 (77.0) 148.0 (74.0) 0.486 0.070 +#> ECOG1 = 1 (%) 78.0 (39.0) 68.0 (34.0) 0.299 0.104 ``` @@ -734,7 +734,7 @@ For a treated unit $i$ and a control unit $j$, genetic matching uses the general If $w_k=1$ for all $k$ then the distance is the standard Mahalanobis distance. However, genetic matching estimates the optimal $w_k$s. The default is to maximize the smallest p-value among balance tests for the covariates in the matched sample (both Kolmogorov-Smirnov tests and t-tests for each covariate) [@greifer2020update]. -```r +``` r m.out.genetic.ratio1 <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + Sex +ECOG1 ,replace=TRUE, estimand="ATT", data = final.data, method = "genetic", @@ -749,84 +749,84 @@ summary(m.out.genetic.ratio1) #> #> Summary of Balance for All Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7827 0.2608 2.1517 0.8827 0.4123 -#> Age 55.0001 56.9985 -0.5151 1.0883 0.1497 -#> Weight 150.0006 147.9990 0.6306 1.0170 0.1673 -#> Height 65.0010 62.9998 0.8104 1.2222 0.2232 -#> Biomarker1 34.9998 36.9999 -0.8948 0.9984 0.2397 -#> Biomarker2 50.0003 47.9994 0.8138 1.2180 0.2254 -#> Smoker 0.1983 0.2900 -0.2299 . 0.0917 -#> Sex 0.7933 0.7100 0.2058 . 0.0833 -#> ECOG1 0.3050 0.3900 -0.1846 . 0.0850 +#> distance 0.7861 0.2566 2.1932 0.8897 0.4152 +#> Age 55.0006 57.0000 -0.5140 1.0784 0.1494 +#> Weight 150.0000 147.9994 0.6321 1.0064 0.1722 +#> Height 65.0002 62.9993 0.8153 1.2157 0.2229 +#> Biomarker1 35.0004 36.9995 -0.8904 1.0152 0.2397 +#> Biomarker2 49.9996 47.9993 0.8183 1.2068 0.2258 +#> Smoker 0.2017 0.3200 -0.2949 . 0.1183 +#> Sex 0.8000 0.6940 0.2650 . 0.1060 +#> ECOG1 0.3033 0.3940 -0.1972 . 0.0907 #> eCDF Max -#> distance 0.6657 -#> Age 0.2277 -#> Weight 0.2493 -#> Height 0.3247 -#> Biomarker1 0.3717 -#> Biomarker2 0.3463 -#> Smoker 0.0917 -#> Sex 0.0833 -#> ECOG1 0.0850 +#> distance 0.6703 +#> Age 0.2350 +#> Weight 0.2717 +#> Height 0.3237 +#> Biomarker1 0.3640 +#> Biomarker2 0.3387 +#> Smoker 0.1183 +#> Sex 0.1060 +#> ECOG1 0.0907 #> #> Summary of Balance for Matched Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7827 0.7618 0.0860 1.0260 0.0379 -#> Age 55.0001 55.1101 -0.0283 1.1784 0.0342 -#> Weight 150.0006 149.3077 0.2183 1.2776 0.0561 -#> Height 65.0010 64.9209 0.0324 1.1642 0.0145 -#> Biomarker1 34.9998 35.3957 -0.1771 1.2578 0.0414 -#> Biomarker2 50.0003 49.7966 0.0829 1.4752 0.0245 -#> Smoker 0.1983 0.1933 0.0125 . 0.0050 -#> Sex 0.7933 0.7933 0.0000 . 0.0000 -#> ECOG1 0.3050 0.2483 0.1231 . 0.0567 +#> distance 0.7861 0.7314 0.2266 1.0791 0.0735 +#> Age 55.0006 55.6050 -0.1554 1.6735 0.0569 +#> Weight 150.0000 149.3017 0.2206 1.3229 0.0536 +#> Height 65.0002 64.8737 0.0515 1.4409 0.0301 +#> Biomarker1 35.0004 35.4694 -0.2089 1.0202 0.0594 +#> Biomarker2 49.9996 49.5484 0.1846 1.5596 0.0482 +#> Smoker 0.2017 0.1933 0.0208 . 0.0083 +#> Sex 0.8000 0.8000 -0.0000 . 0.0000 +#> ECOG1 0.3033 0.3033 -0.0000 . 0.0000 #> eCDF Max Std. Pair Dist. -#> distance 0.1950 0.1920 -#> Age 0.0917 0.9201 -#> Weight 0.1367 1.0483 -#> Height 0.0683 0.1557 -#> Biomarker1 0.1400 0.8230 -#> Biomarker2 0.1150 0.6582 -#> Smoker 0.0050 0.0125 +#> distance 0.2583 0.3565 +#> Age 0.1667 0.5881 +#> Weight 0.1683 0.7942 +#> Height 0.0983 0.2757 +#> Biomarker1 0.1417 0.7685 +#> Biomarker2 0.1633 0.7310 +#> Smoker 0.0083 0.0208 #> Sex 0.0000 0.0000 -#> ECOG1 0.0567 0.7747 +#> ECOG1 0.0000 0.0000 #> #> Sample Sizes: #> Control Treated #> All 500. 600 -#> Matched (ESS) 43.22 600 -#> Matched 151. 600 -#> Unmatched 349. 0 +#> Matched (ESS) 48.69 600 +#> Matched 153. 600 +#> Unmatched 347. 0 #> Discarded 0. 0 ``` -```r +``` r final.data$genetic_ratio1_weights = m.out.genetic.ratio1$weights ``` We now compare the SMD between the two datasets. -```r +``` r svy <- svydesign(id = ~0, data=final.data,weights = ~genetic_ratio1_weights) t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) ``` -```r +``` r print(t1,smd=TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 151.00 600.00 -#> Age (mean (SD)) 55.11 (3.54) 55.00 (3.88) 0.830 0.030 -#> Weight (mean (SD)) 149.31 (2.78) 150.00 (3.17) 0.111 0.232 -#> Height (mean (SD)) 64.92 (2.27) 65.00 (2.47) 0.815 0.034 -#> Biomarker1 (mean (SD)) 35.40 (1.98) 35.00 (2.24) 0.122 0.188 -#> Biomarker2 (mean (SD)) 49.80 (2.01) 50.00 (2.46) 0.506 0.091 -#> Smoker = 1 (%) 29.2 (19.3) 119.0 (19.8) 0.924 0.013 -#> Sex = 1 (%) 119.8 (79.3) 476.0 (79.3) 1.000 <0.001 -#> ECOG1 = 1 (%) 37.5 (24.8) 183.0 (30.5) 0.346 0.127 +#> n 153.00 600.00 +#> Age (mean (SD)) 55.61 (2.99) 55.00 (3.89) 0.126 0.174 +#> Weight (mean (SD)) 149.30 (2.73) 150.00 (3.17) 0.056 0.236 +#> Height (mean (SD)) 64.87 (2.03) 65.00 (2.45) 0.671 0.056 +#> Biomarker1 (mean (SD)) 35.47 (2.21) 35.00 (2.25) 0.165 0.211 +#> Biomarker2 (mean (SD)) 49.55 (1.94) 50.00 (2.44) 0.128 0.204 +#> Smoker = 1 (%) 29.6 (19.3) 121.0 (20.2) 0.872 0.021 +#> Sex = 1 (%) 122.4 (80.0) 480.0 (80.0) 1.000 <0.001 +#> ECOG1 = 1 (%) 46.4 (30.3) 182.0 (30.3) 1.000 <0.001 ``` ## GMW @@ -834,7 +834,7 @@ print(t1,smd=TRUE) We now consider genetic matching without replacement. -```r +``` r m.out.genetic.ratio1 <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + Sex +ECOG1 ,replace=FALSE, estimand="ATT", data = final.data, method = "genetic", @@ -851,47 +851,47 @@ summary(m.out.genetic.ratio1) #> #> Summary of Balance for All Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7827 0.2608 2.1517 0.8827 0.4123 -#> Age 55.0001 56.9985 -0.5151 1.0883 0.1497 -#> Weight 150.0006 147.9990 0.6306 1.0170 0.1673 -#> Height 65.0010 62.9998 0.8104 1.2222 0.2232 -#> Biomarker1 34.9998 36.9999 -0.8948 0.9984 0.2397 -#> Biomarker2 50.0003 47.9994 0.8138 1.2180 0.2254 -#> Smoker 0.1983 0.2900 -0.2299 . 0.0917 -#> Sex 0.7933 0.7100 0.2058 . 0.0833 -#> ECOG1 0.3050 0.3900 -0.1846 . 0.0850 +#> distance 0.7861 0.2566 2.1932 0.8897 0.4152 +#> Age 55.0006 57.0000 -0.5140 1.0784 0.1494 +#> Weight 150.0000 147.9994 0.6321 1.0064 0.1722 +#> Height 65.0002 62.9993 0.8153 1.2157 0.2229 +#> Biomarker1 35.0004 36.9995 -0.8904 1.0152 0.2397 +#> Biomarker2 49.9996 47.9993 0.8183 1.2068 0.2258 +#> Smoker 0.2017 0.3200 -0.2949 . 0.1183 +#> Sex 0.8000 0.6940 0.2650 . 0.1060 +#> ECOG1 0.3033 0.3940 -0.1972 . 0.0907 #> eCDF Max -#> distance 0.6657 -#> Age 0.2277 -#> Weight 0.2493 -#> Height 0.3247 -#> Biomarker1 0.3717 -#> Biomarker2 0.3463 -#> Smoker 0.0917 -#> Sex 0.0833 -#> ECOG1 0.0850 +#> distance 0.6703 +#> Age 0.2350 +#> Weight 0.2717 +#> Height 0.3237 +#> Biomarker1 0.3640 +#> Biomarker2 0.3387 +#> Smoker 0.1183 +#> Sex 0.1060 +#> ECOG1 0.0907 #> #> Summary of Balance for Matched Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.8753 0.2608 2.5336 0.2211 0.4823 -#> Age 54.7088 56.9985 -0.5902 1.0304 0.1701 -#> Weight 150.3086 147.9990 0.7276 0.9875 0.1960 -#> Height 65.3230 62.9998 0.9408 1.2096 0.2597 -#> Biomarker1 34.6396 36.9999 -1.0560 0.8539 0.2828 -#> Biomarker2 50.2868 47.9994 0.9304 1.1739 0.2579 -#> Smoker 0.1880 0.2900 -0.2558 . 0.1020 -#> Sex 0.8060 0.7100 0.2371 . 0.0960 -#> ECOG1 0.2920 0.3900 -0.2129 . 0.0980 +#> distance 0.8782 0.2566 2.5745 0.2286 0.4845 +#> Age 54.6141 57.0000 -0.6134 1.0025 0.1778 +#> Weight 150.3142 147.9994 0.7314 0.9790 0.2000 +#> Height 65.3268 62.9993 0.9483 1.1390 0.2592 +#> Biomarker1 34.7270 36.9995 -1.0122 0.9747 0.2732 +#> Biomarker2 50.3290 47.9993 0.9530 1.1270 0.2626 +#> Smoker 0.1900 0.3200 -0.3240 . 0.1300 +#> Sex 0.8100 0.6940 0.2900 . 0.1160 +#> ECOG1 0.2800 0.3940 -0.2480 . 0.1140 #> eCDF Max Std. Pair Dist. -#> distance 0.832 2.5336 -#> Age 0.260 0.7405 -#> Weight 0.292 0.7796 -#> Height 0.382 0.9666 -#> Biomarker1 0.434 1.0763 -#> Biomarker2 0.398 0.9554 -#> Smoker 0.102 0.6169 -#> Sex 0.096 0.5433 -#> ECOG1 0.098 0.4822 +#> distance 0.834 2.5745 +#> Age 0.272 0.6838 +#> Weight 0.310 0.7728 +#> Height 0.376 0.9634 +#> Biomarker1 0.426 1.0313 +#> Biomarker2 0.388 1.1486 +#> Smoker 0.130 0.8823 +#> Sex 0.116 0.6700 +#> ECOG1 0.114 0.6483 #> #> Sample Sizes: #> Control Treated @@ -902,32 +902,32 @@ summary(m.out.genetic.ratio1) ``` -```r +``` r final.data$genetic_ratio1_weights_no_replace = m.out.genetic.ratio1$weights ``` We now compare the SMD between the two datasets. -```r +``` r svy <- svydesign(id = ~0, data=final.data,weights = ~genetic_ratio1_weights_no_replace) t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) ``` -```r +``` r print(t1,smd=TRUE) #> Stratified by data #> EC TRIAL p test SMD #> n 500.00 500.00 -#> Age (mean (SD)) 57.00 (3.72) 54.71 (3.78) <0.001 0.611 -#> Weight (mean (SD)) 148.00 (3.15) 150.31 (3.13) <0.001 0.736 -#> Height (mean (SD)) 63.00 (2.23) 65.32 (2.46) <0.001 0.989 -#> Biomarker1 (mean (SD)) 37.00 (2.24) 34.64 (2.07) <0.001 1.096 -#> Biomarker2 (mean (SD)) 48.00 (2.23) 50.29 (2.41) <0.001 0.985 -#> Smoker = 1 (%) 145.0 (29.0) 94.0 (18.8) <0.001 0.241 -#> Sex = 1 (%) 355.0 (71.0) 403.0 (80.6) <0.001 0.226 -#> ECOG1 = 1 (%) 195.0 (39.0) 146.0 (29.2) 0.001 0.208 +#> Age (mean (SD)) 57.00 (3.75) 54.61 (3.75) <0.001 0.637 +#> Weight (mean (SD)) 148.00 (3.16) 150.31 (3.12) <0.001 0.738 +#> Height (mean (SD)) 63.00 (2.23) 65.33 (2.38) <0.001 1.011 +#> Biomarker1 (mean (SD)) 37.00 (2.23) 34.73 (2.20) <0.001 1.026 +#> Biomarker2 (mean (SD)) 48.00 (2.23) 50.33 (2.36) <0.001 1.015 +#> Smoker = 1 (%) 160.0 (32.0) 95.0 (19.0) <0.001 0.302 +#> Sex = 1 (%) 347.0 (69.4) 405.0 (81.0) <0.001 0.271 +#> ECOG1 = 1 (%) 197.0 (39.4) 140.0 (28.0) <0.001 0.243 ``` @@ -950,7 +950,7 @@ print(t1,smd=TRUE) The optimal matching algorithm performs a global minimization of propensity score distance between the RCT subjects and matched external controls [@harris2016brief]. The criterion used is the sum of the absolute pair distances in the matched sample. Optimal pair matching and nearest neighbor matching often yield the same or very similar matched samples and some research has indicated that optimal pair matching is not much better than nearest neighbor matching at yielding balanced matched samples [@greifer2020update]. -```r +``` r m.out.optimal.ratio1 <- matchit(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + Sex +ECOG1 , estimand="ATT", data = final.data, method = "optimal", @@ -967,47 +967,47 @@ summary(m.out.optimal.ratio1) #> #> Summary of Balance for All Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7827 0.2608 2.1517 0.8827 0.4123 -#> Age 55.0001 56.9985 -0.5151 1.0883 0.1497 -#> Weight 150.0006 147.9990 0.6306 1.0170 0.1673 -#> Height 65.0010 62.9998 0.8104 1.2222 0.2232 -#> Biomarker1 34.9998 36.9999 -0.8948 0.9984 0.2397 -#> Biomarker2 50.0003 47.9994 0.8138 1.2180 0.2254 -#> Smoker 0.1983 0.2900 -0.2299 . 0.0917 -#> Sex 0.7933 0.7100 0.2058 . 0.0833 -#> ECOG1 0.3050 0.3900 -0.1846 . 0.0850 +#> distance 0.7861 0.2566 2.1932 0.8897 0.4152 +#> Age 55.0006 57.0000 -0.5140 1.0784 0.1494 +#> Weight 150.0000 147.9994 0.6321 1.0064 0.1722 +#> Height 65.0002 62.9993 0.8153 1.2157 0.2229 +#> Biomarker1 35.0004 36.9995 -0.8904 1.0152 0.2397 +#> Biomarker2 49.9996 47.9993 0.8183 1.2068 0.2258 +#> Smoker 0.2017 0.3200 -0.2949 . 0.1183 +#> Sex 0.8000 0.6940 0.2650 . 0.1060 +#> ECOG1 0.3033 0.3940 -0.1972 . 0.0907 #> eCDF Max -#> distance 0.6657 -#> Age 0.2277 -#> Weight 0.2493 -#> Height 0.3247 -#> Biomarker1 0.3717 -#> Biomarker2 0.3463 -#> Smoker 0.0917 -#> Sex 0.0833 -#> ECOG1 0.0850 +#> distance 0.6703 +#> Age 0.2350 +#> Weight 0.2717 +#> Height 0.3237 +#> Biomarker1 0.3640 +#> Biomarker2 0.3387 +#> Smoker 0.1183 +#> Sex 0.1060 +#> ECOG1 0.0907 #> #> Summary of Balance for Matched Data: #> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean -#> distance 0.7410 0.2608 1.9801 0.9032 0.3589 -#> Age 55.3043 56.9985 -0.4367 1.1079 0.1263 -#> Weight 149.7031 147.9990 0.5369 0.9493 0.1424 -#> Height 64.6469 62.9998 0.6670 1.0524 0.1886 -#> Biomarker1 35.2375 36.9999 -0.7885 0.9722 0.2114 -#> Biomarker2 49.5934 47.9994 0.6483 1.0178 0.1862 -#> Smoker 0.2020 0.2900 -0.2207 . 0.0880 -#> Sex 0.7760 0.7100 0.1630 . 0.0660 -#> ECOG1 0.3360 0.3900 -0.1173 . 0.0540 +#> distance 0.7450 0.2566 2.0229 0.9127 0.3621 +#> Age 55.2223 57.0000 -0.4571 1.0612 0.1347 +#> Weight 149.6287 147.9994 0.5148 0.9235 0.1424 +#> Height 64.6082 62.9993 0.6555 1.0563 0.1834 +#> Biomarker1 35.2911 36.9995 -0.7609 0.9710 0.2065 +#> Biomarker2 49.6665 47.9993 0.6820 1.1171 0.1910 +#> Smoker 0.2200 0.3200 -0.2492 . 0.1000 +#> Sex 0.7960 0.6940 0.2550 . 0.1020 +#> ECOG1 0.3180 0.3940 -0.1653 . 0.0760 #> eCDF Max Std. Pair Dist. -#> distance 0.632 1.9801 -#> Age 0.192 1.0969 -#> Weight 0.220 1.1664 -#> Height 0.286 1.1544 -#> Biomarker1 0.342 1.2647 -#> Biomarker2 0.296 1.1523 -#> Smoker 0.088 0.9129 -#> Sex 0.066 0.8940 -#> ECOG1 0.054 1.1077 +#> distance 0.644 2.0229 +#> Age 0.210 1.1380 +#> Weight 0.236 1.1969 +#> Height 0.290 1.1318 +#> Biomarker1 0.310 1.2242 +#> Biomarker2 0.298 1.1685 +#> Smoker 0.100 0.9471 +#> Sex 0.102 1.0250 +#> ECOG1 0.076 1.0442 #> #> Sample Sizes: #> Control Treated @@ -1018,32 +1018,32 @@ summary(m.out.optimal.ratio1) ``` -```r +``` r final.data$optimal_ratio1_weights = m.out.optimal.ratio1$weights ``` We now compare the SMD between the two datasets. -```r +``` r svy <- svydesign(id = ~0, data=final.data,weights = ~optimal_ratio1_weights) t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) ``` -```r +``` r print(t1,smd=TRUE) #> Stratified by data #> EC TRIAL p test SMD #> n 500.00 500.00 -#> Age (mean (SD)) 57.00 (3.72) 55.30 (3.91) <0.001 0.444 -#> Weight (mean (SD)) 148.00 (3.15) 149.70 (3.07) <0.001 0.548 -#> Height (mean (SD)) 63.00 (2.23) 64.65 (2.29) <0.001 0.728 -#> Biomarker1 (mean (SD)) 37.00 (2.24) 35.24 (2.21) <0.001 0.793 -#> Biomarker2 (mean (SD)) 48.00 (2.23) 49.59 (2.25) <0.001 0.712 -#> Smoker = 1 (%) 145.0 (29.0) 101.0 (20.2) 0.001 0.205 -#> Sex = 1 (%) 355.0 (71.0) 388.0 (77.6) 0.017 0.151 -#> ECOG1 = 1 (%) 195.0 (39.0) 168.0 (33.6) 0.076 0.112 +#> Age (mean (SD)) 57.00 (3.75) 55.22 (3.86) <0.001 0.468 +#> Weight (mean (SD)) 148.00 (3.16) 149.63 (3.03) <0.001 0.527 +#> Height (mean (SD)) 63.00 (2.23) 64.61 (2.29) <0.001 0.713 +#> Biomarker1 (mean (SD)) 37.00 (2.23) 35.29 (2.20) <0.001 0.772 +#> Biomarker2 (mean (SD)) 48.00 (2.23) 49.67 (2.35) <0.001 0.728 +#> Smoker = 1 (%) 160.0 (32.0) 110.0 (22.0) <0.001 0.227 +#> Sex = 1 (%) 347.0 (69.4) 398.0 (79.6) <0.001 0.236 +#> ECOG1 = 1 (%) 197.0 (39.4) 159.0 (31.8) 0.012 0.159 ``` @@ -1070,7 +1070,7 @@ We also explore several weighting approaches. The GBM (Gradient Boosting Machine) is a machine learning method which generates predicted values from a flexible regression model. It can adjust for a large number of covariates. The estimation involves an iterative process with multiple regression trees to capture complex and non-linear relationships. One of the most useful features of GBM for estimating the propensity score is that its iterative estimation procedure can be tuned to find the propensity score model leading to the best balance between treated and control groups, where balance refers to the similarity between different groups on their propensity score weighted distributions of pretreatment covariates [@mccaffrey2013tutorial]. -```r +``` r set.seed(1) @@ -1113,32 +1113,32 @@ final.data$weights_gbm <- ps.AOD.ATT$w[,1] We now compare the SMD between the two datasets. -```r +``` r svy <- svydesign(id = ~0, data=final.data,weights = ~weights_gbm) t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) ``` -```r +``` r print(t1,smd=TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 284.19 600.00 -#> Age (mean (SD)) 54.93 (4.03) 55.00 (3.88) 0.902 0.018 -#> Weight (mean (SD)) 148.84 (2.69) 150.00 (3.17) <0.001 0.394 -#> Height (mean (SD)) 64.27 (2.11) 65.00 (2.47) 0.002 0.317 -#> Biomarker1 (mean (SD)) 35.61 (2.01) 35.00 (2.24) 0.002 0.285 -#> Biomarker2 (mean (SD)) 49.57 (2.11) 50.00 (2.46) 0.081 0.188 -#> Smoker = 1 (%) 70.0 (24.6) 119.0 (19.8) 0.338 0.116 -#> Sex = 1 (%) 222.6 (78.3) 476.0 (79.3) 0.816 0.025 -#> ECOG1 = 1 (%) 85.7 (30.2) 183.0 (30.5) 0.950 0.007 +#> n 278.08 600.00 +#> Age (mean (SD)) 55.73 (3.32) 55.00 (3.89) 0.040 0.201 +#> Weight (mean (SD)) 149.24 (2.67) 150.00 (3.17) 0.006 0.259 +#> Height (mean (SD)) 64.25 (2.05) 65.00 (2.45) 0.001 0.334 +#> Biomarker1 (mean (SD)) 35.36 (2.07) 35.00 (2.25) 0.154 0.165 +#> Biomarker2 (mean (SD)) 49.28 (2.20) 50.00 (2.44) 0.011 0.308 +#> Smoker = 1 (%) 54.9 (19.7) 121.0 (20.2) 0.909 0.011 +#> Sex = 1 (%) 227.7 (81.9) 480.0 (80.0) 0.597 0.048 +#> ECOG1 = 1 (%) 111.1 (40.0) 182.0 (30.3) 0.094 0.203 ``` ## EB Entropy balancing is a weighting method to balance the covariates by assigning a scalar weight to each external control observations such that the reweighted groups satisfy a set of balance constraints that are imposed on the sample moments of the covariate distributions [@hainmueller2012entropy]. -```r +``` r eb.out <- ebalance(final.data$indicator,final.data[,c(2:9)],max.iterations = 300) #> Converged within tolerance final.data$eb_weights <- rep(1,nrow(final.data)) @@ -1148,7 +1148,7 @@ final.data$eb_weights[final.data$indicator==0] <- eb.out$w Note that the entropy balancing method failed to converge. -```r +``` r eb.out$converged #> [1] TRUE ``` @@ -1156,25 +1156,25 @@ eb.out$converged We now compare the SMD between the two datasets. By definition, the SMD after EB should be zero. -```r +``` r svy <- svydesign(id = ~0, data=final.data,weights = ~eb_weights) t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) ``` -```r +``` r print(t1,smd=TRUE) #> Stratified by data #> EC TRIAL p test SMD #> n 600.00 600.00 -#> Age (mean (SD)) 55.00 (3.68) 55.00 (3.88) 1.000 <0.001 -#> Weight (mean (SD)) 150.00 (2.82) 150.00 (3.17) 1.000 <0.001 -#> Height (mean (SD)) 65.00 (2.15) 65.00 (2.47) 1.000 <0.001 -#> Biomarker1 (mean (SD)) 35.00 (1.49) 35.00 (2.24) 1.000 <0.001 -#> Biomarker2 (mean (SD)) 50.00 (1.91) 50.00 (2.46) 1.000 <0.001 -#> Smoker = 1 (%) 119.0 (19.8) 119.0 (19.8) 1.000 <0.001 -#> Sex = 1 (%) 476.0 (79.3) 476.0 (79.3) 1.000 <0.001 -#> ECOG1 = 1 (%) 183.0 (30.5) 183.0 (30.5) 1.000 <0.001 +#> Age (mean (SD)) 55.00 (3.29) 55.00 (3.89) 1.000 <0.001 +#> Weight (mean (SD)) 150.00 (2.43) 150.00 (3.17) 1.000 <0.001 +#> Height (mean (SD)) 65.00 (2.36) 65.00 (2.45) 1.000 <0.001 +#> Biomarker1 (mean (SD)) 35.00 (2.37) 35.00 (2.25) 1.000 <0.001 +#> Biomarker2 (mean (SD)) 50.00 (2.00) 50.00 (2.44) 1.000 <0.001 +#> Smoker = 1 (%) 121.0 (20.2) 121.0 (20.2) 1.000 <0.001 +#> Sex = 1 (%) 480.0 (80.0) 480.0 (80.0) 1.000 <0.001 +#> ECOG1 = 1 (%) 182.0 (30.3) 182.0 (30.3) 1.000 <0.001 ``` @@ -1190,7 +1190,7 @@ print(t1,smd=TRUE) ## IPW The propensity score is defined as the probability of a patient being in a trial given the observed baseline covariates. We utilized the ATT weights, which are defined for the IPW as fixing the trial patients weight at unity, and external control patients as $\hat{e}(x)/(1-\hat{e}(x))$ where $\hat{e}(x)$ is estimated using a logistic regression model [@amusa2019examination]. -```r +``` r ps.logit <- glm(indicator ~ Age + Weight + Height + Biomarker1 + Biomarker2 + Smoker + Sex +ECOG1, data = final.data, family=binomial) @@ -1207,25 +1207,25 @@ final.data$invprob_weights[final.data$indicator==1] <- ps_trial/ps_trial We now compare the SMD between the two datasets. -```r +``` r svy <- svydesign(id = ~0, data=final.data,weights = ~invprob_weights) t1 <- svyCreateTableOne(vars = myVars, strata = "data" , data = svy, factorVars = catVars) ``` -```r +``` r print(t1,smd=TRUE) #> Stratified by data #> EC TRIAL p test SMD -#> n 522.38 600.00 -#> Age (mean (SD)) 54.44 (4.03) 55.00 (3.88) 0.458 0.141 -#> Weight (mean (SD)) 148.92 (2.76) 150.00 (3.17) 0.002 0.364 -#> Height (mean (SD)) 64.65 (2.26) 65.00 (2.47) 0.311 0.147 -#> Biomarker1 (mean (SD)) 35.38 (1.80) 35.00 (2.24) 0.077 0.185 -#> Biomarker2 (mean (SD)) 49.83 (2.11) 50.00 (2.46) 0.589 0.076 -#> Smoker = 1 (%) 90.5 (17.3) 119.0 (19.8) 0.568 0.064 -#> Sex = 1 (%) 419.5 (80.3) 476.0 (79.3) 0.849 0.024 -#> ECOG1 = 1 (%) 121.3 (23.2) 183.0 (30.5) 0.158 0.165 +#> n 519.03 600.00 +#> Age (mean (SD)) 55.59 (3.19) 55.00 (3.89) 0.270 0.166 +#> Weight (mean (SD)) 149.27 (2.58) 150.00 (3.17) 0.014 0.254 +#> Height (mean (SD)) 64.80 (2.23) 65.00 (2.45) 0.646 0.084 +#> Biomarker1 (mean (SD)) 34.92 (2.27) 35.00 (2.25) 0.853 0.036 +#> Biomarker2 (mean (SD)) 49.39 (2.09) 50.00 (2.44) 0.041 0.270 +#> Smoker = 1 (%) 87.6 (16.9) 121.0 (20.2) 0.467 0.085 +#> Sex = 1 (%) 456.8 (88.0) 480.0 (80.0) 0.019 0.220 +#> ECOG1 = 1 (%) 187.0 (36.0) 182.0 (30.3) 0.404 0.121 ``` @@ -1241,7 +1241,7 @@ print(t1,smd=TRUE) Next we will investigate balance plots. -```r +``` r covs <- data.frame(final.data[,c("Age","Weight","Height","Biomarker1","Biomarker2","Smoker","Sex","ECOG1")]) data_with_weights <- final.data ``` @@ -1251,7 +1251,7 @@ data_with_weights <- final.data We now conduct a balance diagnostic by considering SMD plot. The x-axis of the plot represent the absolute value of the SMD and y-axis represent the list of all covariates. SMD greater than $0.1$ can be considered a sign of imbalance [@zhang2019balance]. Hence, we put a threshold of $0.1$ in the plot with a vertical dashed line. -```r +``` r love.plot(covs, treat=data_with_weights$data, weights = list(NNMPS=data_with_weights$ratio1_caliper_weights, @@ -1279,7 +1279,7 @@ love.plot(covs, # Balance Plots for Weighting Methods -```r +``` r love.plot(covs, treat=data_with_weights$data, weights = list(EB=data_with_weights$eb_weights, @@ -1303,7 +1303,7 @@ We now investigate the effective sample size (ESS) in the trial and external con -```r +``` r #External control ess.PSML.extcont <- (sum(data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==0]))^2/ sum((data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==0])^2) @@ -1334,7 +1334,7 @@ ess.genetic.no.replace.extcont <- (sum(data_with_weights$genetic_ratio1_weights_ ``` -```r +``` r #Trial ess.PSML.trial <- (sum(data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==1]))^2/ sum((data_with_weights$ratio1_caliper_weights_lps[data_with_weights$indicator==1])^2) @@ -1364,7 +1364,7 @@ ess.genetic.no.replace.trial <- (sum(data_with_weights$genetic_ratio1_weights_no ``` -```r +``` r out.ess <- data.frame(Unadjusted=c(length(which(final.data$data=="TRIAL")),length(which(final.data$data=="EC"))), PSML=c(ess.PSML.trial,ess.PSML.extcont), PSMR=c(ess.PSMR.trial,ess.PSMR.extcont), @@ -1382,20 +1382,20 @@ Note: After applying the matching methods, some patients in RCT were excluded. F The ESS for each cohort using different methods are shown below. -```r +``` r out.ess #> Unadjusted PSML PSMR OM GM GMW EB IPW -#> Trial 600 224 203 500 600.00000 500 600.00000 600.00000 -#> External Control 500 224 203 500 43.21729 500 27.69881 43.69827 +#> Trial 600 219 200 500 600.00000 500 600.00000 600.00000 +#> External Control 500 219 200 500 48.68813 500 23.87066 42.41241 #> GBM #> Trial 600.00000 -#> External Control 73.21273 +#> External Control 74.15286 ``` We also investigate the histogram of the weights for external control patients and the effective sample size (ESS). -```r +``` r par(mfrow=c(2,2)) hist(data_with_weights$eb_weights[data_with_weights$indicator==0],main=paste("EB \n ESS=",round(ess.eb.extcont),sep=""),xlab="Weight") hist(data_with_weights$invprob_weights[data_with_weights$indicator==0],main=paste("IPW\n ESS=",round(ess.ipw.extcont),sep=""),xlab="Weight") diff --git a/vignettes/match_weight_02_application.Rmd b/vignettes/match_weight_02_application.Rmd index 53b12fb8..0b15c421 100644 --- a/vignettes/match_weight_02_application.Rmd +++ b/vignettes/match_weight_02_application.Rmd @@ -4,7 +4,7 @@ bibliography: references.bib author: - Manoj Khanal - Eli Lilly & Company -date: "2025-02-11" +date: "2025-05-07" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{2. Matching and Weighting Application with Dynamic Borrowing} @@ -123,21 +123,16 @@ where $\tau_\ell$ is the precision parameter that determines the degree of borro We first load the following libraries. -```r +``` r #Loading the libraries library(R2jags) -#> Warning: package 'R2jags' was built under R version 4.3.2 -#> Warning: package 'coda' was built under R version 4.3.2 library(psborrow2) library(cmdstanr) -#> Warning: package 'cmdstanr' was built under R version 4.3.3 library(posterior) library(bayesplot) -#> Warning: package 'bayesplot' was built under R version 4.3.2 library(kableExtra) -#> Warning: package 'kableExtra' was built under R version 4.3.2 +#> Error in library(kableExtra): there is no package called 'kableExtra' library(survival) -#> Warning: package 'survival' was built under R version 4.3.3 ``` ## Example data @@ -184,7 +179,7 @@ We also load the data set `data_with_weights`. -```r +``` r data_with_weights <- read.csv("data_with_weights.csv") data_with_weights$cnsr <- 1 - data_with_weights$event data_with_weights <- data_with_weights[, sapply(data_with_weights, class) %in% c("numeric", "integer")] @@ -192,11 +187,12 @@ data_with_weights$ext <- 1 - data_with_weights$indicator #External control flag data_with_weights$norm.weights=data_with_weights$invprob_weights data_with_weights$norm.weights[data_with_weights$ext==1]=data_with_weights$norm.weights[data_with_weights$ext==1]/max(data_with_weights$norm.weights[data_with_weights$ext==1]) data_with_weights <- as.matrix(data_with_weights) + ``` The first six observations of the data set is shown below. -```r +``` r head(data_with_weights) #> X Age Weight Height Biomarker1 Biomarker2 Smoker Sex ECOG1 group #> [1,] 1 56.63723 150.8678 65.79587 39.69306 47.71659 0 1 0 1 @@ -242,7 +238,7 @@ The `psborrow2` package will be used to conduct the analysis. We use exponential distribution for the outcome model as follows. Non-informative normal prior is used for the baseline parameter. The data `data_with_weights` contain the time and censoring variable. For demonstration purpose, we choose the subject level weight `invprob_weights` calculated using inverse probability of treatment weighting (IPTW). In this step we extract the time, censoring indicator and the normalized weight variables denoted by `time`, `cnsr` and `norm.weights` respectively. -```r +``` r #Outcome outcome <- outcome_surv_exponential( time_var = "time", @@ -269,7 +265,7 @@ outcome Next, the borrowing method is implemented as shown below. We consider Bayesian Dynamic Borrowing (BDB) in which gamma prior is assigned for the commensurability parameter. The `tau_prior` shown below is the hyperparameter of the commensurate prior which determines the degree of borrowing. We assign a gamma prior for this hyperparameter. Furhtermore, we also need to specify a flag for external data which is denoted by `ext`. -```r +``` r #Borrowing borrowing <- borrowing_hierarchical_commensurate( ext_flag_col = "ext", @@ -293,7 +289,7 @@ borrowing Similarly, details regarding the treatment variable is specified below. A Non-informative prior is used for the treatment effect parameter $\gamma$. -```r +``` r #Treatment treatment <- treatment_details( trt_flag_col = "group", @@ -315,7 +311,7 @@ treatment Now, all the pieces are brought together to create the analysis object as shown below. -```r +``` r #Application anls_obj <- create_analysis_obj( data_matrix = data_with_weights, @@ -331,7 +327,7 @@ anls_obj <- create_analysis_obj( Finally, we run the MCMC sample for $10,000$ iterations with $3$ chains. -```r +``` r niter = 10000 results.exp.psborrow<- mcmc_sample(anls_obj, iter_warmup = round(niter/3), @@ -341,13 +337,13 @@ results.exp.psborrow<- mcmc_sample(anls_obj, ) #> Running MCMC with 3 sequential chains... #> -#> Chain 1 finished in 13.3 seconds. -#> Chain 2 finished in 13.2 seconds. -#> Chain 3 finished in 12.4 seconds. +#> Chain 1 finished in 5.9 seconds. +#> Chain 2 finished in 5.2 seconds. +#> Chain 3 finished in 6.3 seconds. #> #> All 3 chains finished successfully. -#> Mean chain execution time: 13.0 seconds. -#> Total execution time: 39.2 seconds. +#> Mean chain execution time: 5.8 seconds. +#> Total execution time: 17.6 seconds. draws1 <- results.exp.psborrow$draws() draws1 <- rename_draws_covariates(draws1, anls_obj) @@ -355,24 +351,24 @@ draws1 <- rename_draws_covariates(draws1, anls_obj) The summary is shown below. -```r +``` r results.exp.psborrow$summary() #> # A tibble: 6 × 10 #> variable mean median sd mad q5 q95 rhat ess_bulk #> -#> 1 lp__ -718. -718. 1.51 1.29 -721. -716. 1.00 10716. -#> 2 beta_trt -0.357 -0.359 0.110 0.111 -0.536 -0.175 1.00 11632. -#> 3 tau 0.119 0.0505 0.183 0.0694 0.000443 0.470 1.00 12583. -#> 4 alpha[1] -0.621 -0.620 0.0910 0.0924 -0.772 -0.474 1.00 11977. -#> 5 alpha[2] 2.36 2.38 0.374 0.368 1.70 2.93 1.00 18690. -#> 6 HR_trt 0.704 0.699 0.0778 0.0773 0.585 0.839 1.00 11632. +#> 1 lp__ -718. -718. 1.48 1.32 -721. -716. 1.00 11600. +#> 2 beta_trt -0.358 -0.360 0.110 0.109 -0.537 -0.173 1.00 12088. +#> 3 alpha[1] -0.620 -0.618 0.0916 0.0915 -0.775 -0.474 1.00 11788. +#> 4 alpha[2] 2.36 2.38 0.371 0.367 1.71 2.93 1.00 17931. +#> 5 tau 0.119 0.0511 0.182 0.0701 0.000400 0.468 1.00 12852. +#> 6 HR_trt 0.704 0.698 0.0777 0.0758 0.584 0.841 1.00 12088. #> # ℹ 1 more variable: ess_tail ``` The histogram of the posterior samples for hazard ratio and commensurability parameter as shown below. -```r +``` r bayesplot::mcmc_hist(draws1, c("treatment HR")) ``` @@ -381,7 +377,7 @@ bayesplot::mcmc_hist(draws1, c("treatment HR"))

plot of chunk unnamed-chunk-10

-```r +``` r bayesplot::mcmc_hist(draws1, c("commensurability parameter")) ``` @@ -390,28 +386,28 @@ bayesplot::mcmc_hist(draws1, c("commensurability parameter"))

plot of chunk unnamed-chunk-10

-```r +``` r bayesplot::color_scheme_set("mix-blue-pink") ``` The $95\%$ credible interval can be calculated as follows. -```r +``` r summarize_draws(draws1, ~ quantile(.x, probs = c(0.025, 0.975))) #> # A tibble: 6 × 3 -#> variable `2.5%` `97.5%` -#> -#> 1 lp__ -722. -716. -#> 2 treatment log HR -0.569 -0.141 -#> 3 commensurability parameter 0.000109 0.628 -#> 4 baseline log hazard rate, internal -0.801 -0.448 -#> 5 baseline log hazard rate, external 1.56 3.02 -#> 6 treatment HR 0.566 0.869 +#> variable `2.5%` `97.5%` +#> +#> 1 lp__ -722. -716. +#> 2 treatment log HR -0.571 -0.138 +#> 3 baseline log hazard rate, internal -0.806 -0.445 +#> 4 baseline log hazard rate, external 1.56 3.02 +#> 5 commensurability parameter 0.0000996 0.621 +#> 6 treatment HR 0.565 0.871 ``` We can graph other plots that help us evaluate convergence and diagnose problems with the MCMC sampler, such as trace plot. -```r +``` r bayesplot::color_scheme_set("mix-blue-pink") bayesplot::mcmc_trace( @@ -433,7 +429,7 @@ We conduct analysis using a Weibull distribution for the outcome model using the Non-informative normal prior is used for the baseline parameter $\mu$. An exponential prior is used for the Weibull shape parameter $\zeta$. -```r +``` r #Outcome outcome <- outcome_surv_weibull_ph( time_var = "time", @@ -468,7 +464,7 @@ outcome Next, the borrowing method is implemented as shown below. We consider Bayesian Dynamic Borrowing (BDB) in which a non-informative gamma prior is assigned for the commensurability parameter. -```r +``` r #Borrowing borrowing <- borrowing_hierarchical_commensurate( ext_flag_col = "ext", @@ -493,7 +489,7 @@ borrowing Similarly, details regarding the treatment variable are specified below. Non-informative prior is used for treatment effect $\gamma$. -```r +``` r #Treatment treatment <- treatment_details( trt_flag_col = "group", @@ -515,7 +511,7 @@ treatment Now, all the pieces are brought together to create the analysis object as shown below. -```r +``` r #Application data_with_weights <- as.matrix(data_with_weights) anls_obj <- create_analysis_obj( @@ -532,7 +528,7 @@ anls_obj <- create_analysis_obj( Finally, we run the MCMC sample for $10,000$ iterations with $3$ chains. -```r +``` r results.weib.psborrow<- mcmc_sample(anls_obj, iter_warmup = round(niter/3), iter_sampling = niter, @@ -541,13 +537,13 @@ results.weib.psborrow<- mcmc_sample(anls_obj, ) #> Running MCMC with 3 sequential chains... #> -#> Chain 1 finished in 29.3 seconds. -#> Chain 2 finished in 29.0 seconds. -#> Chain 3 finished in 31.5 seconds. +#> Chain 1 finished in 14.7 seconds. +#> Chain 2 finished in 14.8 seconds. +#> Chain 3 finished in 11.5 seconds. #> #> All 3 chains finished successfully. -#> Mean chain execution time: 30.0 seconds. -#> Total execution time: 90.2 seconds. +#> Mean chain execution time: 13.6 seconds. +#> Total execution time: 41.1 seconds. draws2 <- results.weib.psborrow$draws() draws2 <- rename_draws_covariates(draws2, anls_obj) @@ -555,25 +551,25 @@ draws2 <- rename_draws_covariates(draws2, anls_obj) The summary is shown below. -```r +``` r results.weib.psborrow$summary() #> # A tibble: 7 × 10 #> variable mean median sd mad q5 q95 rhat ess_bulk #> -#> 1 lp__ -708. -7.07e+2 1.68 1.49 -7.11e+2 -706. 1.00 11530. -#> 2 beta_trt -0.302 -3.03e-1 0.110 0.109 -4.82e-1 -0.120 1.00 16857. -#> 3 shape_weibull 0.836 8.35e-1 0.0322 0.0322 7.83e-1 0.890 1.00 21009. -#> 4 tau 0.180 7.77e-2 0.288 0.106 6.77e-4 0.693 1.00 14413. -#> 5 alpha[1] -0.509 -5.08e-1 0.0931 0.0934 -6.62e-1 -0.358 1.00 16847. -#> 6 alpha[2] 1.97 2.00e+0 0.386 0.380 1.29e+0 2.57 1.00 20959. -#> 7 HR_trt 0.744 7.39e-1 0.0823 0.0804 6.17e-1 0.887 1.00 16857. +#> 1 lp__ -708. -7.07e+2 1.66 1.48 -7.11e+2 -705. 1.00 11420. +#> 2 beta_trt -0.301 -3.01e-1 0.111 0.111 -4.82e-1 -0.118 1.00 16859. +#> 3 alpha[1] -0.510 -5.09e-1 0.0944 0.0940 -6.68e-1 -0.357 1.00 16239. +#> 4 alpha[2] 1.97 1.99e+0 0.382 0.377 1.30e+0 2.56 1.00 22836. +#> 5 tau 0.177 7.53e-2 0.281 0.103 6.87e-4 0.691 1.00 14556. +#> 6 shape_weibull 0.836 8.36e-1 0.0323 0.0327 7.83e-1 0.889 1.00 22322. +#> 7 HR_trt 0.744 7.40e-1 0.0828 0.0815 6.18e-1 0.888 1.00 16859. #> # ℹ 1 more variable: ess_tail ``` Histogram of the posterior samples for hazard ratio and commensurability parameter are produced. -```r +``` r bayesplot::mcmc_hist(draws2, c("treatment HR")) ``` @@ -582,7 +578,7 @@ bayesplot::mcmc_hist(draws2, c("treatment HR"))

plot of chunk unnamed-chunk-19

-```r +``` r bayesplot::mcmc_hist(draws2, c("commensurability parameter")) ``` @@ -591,29 +587,29 @@ bayesplot::mcmc_hist(draws2, c("commensurability parameter"))

plot of chunk unnamed-chunk-19

-```r +``` r bayesplot::color_scheme_set("mix-blue-pink") ``` The $95\%$ credible interval can be calculated as follows. -```r +``` r summarize_draws(draws2, ~ quantile(.x, probs = c(0.025, 0.975))) #> # A tibble: 7 × 3 #> variable `2.5%` `97.5%` #> #> 1 lp__ -712. -705. -#> 2 treatment log HR -0.515 -0.0855 -#> 3 Weibull shape parameter 0.774 0.900 -#> 4 commensurability parameter 0.000170 0.945 -#> 5 baseline log hazard rate, internal -0.692 -0.331 -#> 6 baseline log hazard rate, external 1.15 2.66 -#> 7 treatment HR 0.598 0.918 +#> 2 treatment log HR -0.516 -0.0847 +#> 3 baseline log hazard rate, internal -0.699 -0.328 +#> 4 baseline log hazard rate, external 1.15 2.66 +#> 5 commensurability parameter 0.000179 0.933 +#> 6 Weibull shape parameter 0.773 0.900 +#> 7 treatment HR 0.597 0.919 ``` We can see other plots that help us evaluate convergence and diagnose problems with the MCMC sampler, such as trace plot. -```r +``` r bayesplot::color_scheme_set("mix-blue-pink") bayesplot::mcmc_trace( @@ -633,7 +629,7 @@ bayesplot::mcmc_trace( We will consider the Bayesian power prior with subject specific power parameters. The weights from various matching and weighting methods can be used as subject specific power parameters for external control subjects. The baseline hazard function will be based on piecewise exponential distribution. The following JAGS code, adapted from [@alvares2021bayesian], implements the power prior with piecewise exponential baseline hazard. We will consider a subject specific power parameter as the weights from inverse probability weighting method. The subject specific weights for external control patients are normalized by dividing each weight by the maximum weight. -```r +``` r model{ #POWER PRIOR JAGS CODE #Trial Data @@ -698,7 +694,7 @@ model{ We now define all the variables as in input to JAGS. -```r +``` r data_with_weights=data.frame(data_with_weights) #Trial Data @@ -731,7 +727,7 @@ zerosR = rep(0, nR) We also need to pre-specify the number of intervals. For simplicity, we will consider $K=1$ interval. However, this method also works $K>1$. -```r +``` r # Time axis partition K <- 1 # number of intervals @@ -769,7 +765,7 @@ int.obsR <- rowSums(int.obsR) We now put all variables into a list as data inputs for JAGs. -```r +``` r ### JAGS #### d.jags <- list("n", "nR", "time", "timeR", "a", "X", "XR","int.obs", @@ -781,14 +777,15 @@ d.jags <- list("n", "nR", "time", "timeR", We specify the parameter of interest as follows. -```r +``` r #Parameter of interest p.jags <- c("beta","alpha") + ``` The standard normal distribution is used as an initial values for the parameter of interest. -```r +``` r #Initial values for each parameter i.jags <- function(){ list(beta = rnorm(ncol(X)), alpha = rnorm(K)) @@ -799,7 +796,7 @@ i.jags <- function(){ Now, we call the `jags` function to conduct MCMC sampling with $3$ chains as shown below. We set number of iterations as $niter=10,000$ with a burn in of $5000$ and thinning of $10$. -```r +``` r set.seed(1) model1 <- jags(data = d.jags, model.file = "powerprior.txt", inits = i.jags, n.chains = 3, parameters=p.jags,n.iter=niter,n.burnin = round(niter/2),n.thin = 10) @@ -816,29 +813,29 @@ model1 <- jags(data = d.jags, model.file = "powerprior.txt", inits = i.jags, n.c The summary can be produced using the following command. -```r +``` r model1$BUGSoutput$summary #> mean sd 2.5% 25% 50% -#> alpha -5.566809e-01 0.08954522 -7.394070e-01 -6.170039e-01 -5.543440e-01 -#> beta -4.200419e-01 0.10776354 -6.340181e-01 -4.921314e-01 -4.202084e-01 -#> deviance 2.200015e+08 1.99156129 2.200015e+08 2.200015e+08 2.200015e+08 +#> alpha -5.593230e-01 0.08814086 -7.325480e-01 -6.187778e-01 -5.577829e-01 +#> beta -4.220429e-01 0.10645216 -6.269033e-01 -4.931953e-01 -4.208824e-01 +#> deviance 2.200015e+08 1.98002031 2.200015e+08 2.200015e+08 2.200015e+08 #> 75% 97.5% Rhat n.eff -#> alpha -4.952955e-01 -3.884554e-01 1.000476 1500 -#> beta -3.479730e-01 -2.076608e-01 1.000497 1500 +#> alpha -4.975311e-01 -3.800964e-01 1.003074 650 +#> beta -3.470770e-01 -2.163331e-01 1.003200 630 #> deviance 2.200015e+08 2.200015e+08 1.000000 1 ``` The summary of the hazard ratio for treatment variable is shown below. -```r +``` r c(summary(exp(model1$BUGSoutput$sims.list$beta[,1])), quantile(exp(model1$BUGSoutput$sims.list$beta[,1]),probs=c(0.025,0.975))) #> Min. 1st Qu. Median Mean 3rd Qu. Max. 2.5% 97.5% -#> 0.4931593 0.6113220 0.6569099 0.6608469 0.7061179 0.9601307 0.5304561 0.8124826 +#> 0.4786944 0.6106720 0.6564673 0.6594231 0.7067509 0.9316665 0.5342440 0.8054669 ``` The histogram of the treatment hazard ratio can also be produced. -```r +``` r hist(exp(model1$BUGSoutput$sims.list$beta[,1]),xlab="treatmentHR",main="",ylab="") ``` @@ -849,7 +846,7 @@ hist(exp(model1$BUGSoutput$sims.list$beta[,1]),xlab="treatmentHR",main="",ylab=" The traceplot can be generated as follows. -```r +``` r traceplot(model1,varname=c("alpha","beta")) ``` @@ -869,7 +866,7 @@ traceplot(model1,varname=c("alpha","beta")) We will consider the Bayesian commensurate prior model. -```r +``` r model{ #Trial Data for (i in 1:n){ @@ -937,7 +934,7 @@ model{ We now define all the variables as in input to JAGS. -```r +``` r data_with_weights=data.frame(data_with_weights) #Trial Data @@ -970,7 +967,7 @@ zerosR = rep(0, nR) We also need to pre-specify the number of intervals. For simplicity, we will consider $K=1$ interval. However, this method also works when $K>1$. -```r +``` r # Time axis partition K <- 1 # number of intervals @@ -1008,7 +1005,7 @@ int.obsR <- rowSums(int.obsR) We now put all variables into a list as data inputs for JAGs. Note that the letter $R$ in the following corresponds to RWD. For example, $XR$ is the covariate matrix in RWD. -```r +``` r ### JAGS #### d.jags <- list("n", "nR", "time", "timeR", "a", "X", "XR","int.obs", @@ -1020,14 +1017,15 @@ d.jags <- list("n", "nR", "time", "timeR", We specify the parameter of interest as follows. Note that `alphaR` is the baseline hazard parameter for external control. -```r +``` r #Parameter of interest p.jags <- c("beta","alpha","alphaR","tau") + ``` We generate initial values for $\beta$ and $\alpha$ from standard normal distributions, and $\tau$ from a non-informative Gamma distribution. -```r +``` r #Initial values for each parameter i.jags <- function(){ list(beta = rnorm(ncol(X)), beta0 = rnorm(ncol(X)), @@ -1039,7 +1037,7 @@ i.jags <- function(){ Now, we call the `jags` function to conduct MCMC sampling with $3$ chains as shown below. We set number of iterations as $niter=10,000$ with a burn in of $5000$ and thinning of $10$. -```r +``` r set.seed(1) model2 <- jags(data = d.jags, model.file = "commensurateprior.txt", inits = i.jags, n.chains = 3, parameters=p.jags,n.iter=niter,n.burnin = round(niter/2),n.thin = 10) @@ -1056,33 +1054,33 @@ model2 <- jags(data = d.jags, model.file = "commensurateprior.txt", inits = i.ja The summary can be produced using the following command. -```r +``` r model2$BUGSoutput$summary #> mean sd 2.5% 25% 50% -#> alpha -6.191206e-01 0.08842641 -7.909870e-01 -6.787401e-01 -6.193462e-01 -#> alphaR 2.366979e+00 0.36366809 1.593958e+00 2.144550e+00 2.379864e+00 -#> beta -3.596931e-01 0.10541071 -5.626927e-01 -4.319852e-01 -3.571647e-01 -#> deviance 2.200014e+08 2.44607662 2.200014e+08 2.200014e+08 2.200014e+08 -#> tau 1.254783e-01 0.18316338 4.693678e-04 1.536327e-02 5.811439e-02 +#> alpha -6.207876e-01 0.08997989 -8.027429e-01 -6.804281e-01 -6.187626e-01 +#> alphaR 2.332726e+00 0.38544878 1.517221e+00 2.080838e+00 2.353291e+00 +#> beta -3.570740e-01 0.10944967 -5.737298e-01 -4.287050e-01 -3.588437e-01 +#> deviance 2.200014e+08 2.57443363 2.200014e+08 2.200014e+08 2.200014e+08 +#> tau 1.297205e-01 0.18216287 2.880458e-04 1.381331e-02 6.095153e-02 #> 75% 97.5% Rhat n.eff -#> alpha -5.575406e-01 -4.477827e-01 1.001772 1100 -#> alphaR 2.627147e+00 3.007402e+00 1.004616 440 -#> beta -2.884953e-01 -1.600480e-01 1.000624 1500 +#> alpha -5.638885e-01 -4.457021e-01 1.001797 1100 +#> alphaR 2.615898e+00 3.014684e+00 1.004889 820 +#> beta -2.823616e-01 -1.377196e-01 1.001395 1400 #> deviance 2.200014e+08 2.200014e+08 1.000000 1 -#> tau 1.585465e-01 6.215560e-01 1.001367 1500 +#> tau 1.637742e-01 6.863053e-01 1.004178 1100 ``` The summary of the hazard ratio for treatment variable is shown below. -```r +``` r c(summary(exp(model2$BUGSoutput$sims.list$beta[,1])), quantile(exp(model2$BUGSoutput$sims.list$beta[,1]),probs=c(0.025,0.975))) #> Min. 1st Qu. Median Mean 3rd Qu. Max. 2.5% 97.5% -#> 0.4932769 0.6492190 0.6996573 0.7017649 0.7493903 0.9788045 0.5696732 0.8521031 +#> 0.4690294 0.6513520 0.6984835 0.7039318 0.7540010 1.0061228 0.5634205 0.8713433 ``` The histogram of the treatment hazard ratio can also be produced. -```r +``` r hist(exp(model2$BUGSoutput$sims.list$beta[,1]),xlab="treatmentHR",main="",ylab="") ``` @@ -1093,7 +1091,7 @@ hist(exp(model2$BUGSoutput$sims.list$beta[,1]),xlab="treatmentHR",main="",ylab=" The histogram of the commensurability parameter can also be produced. -```r +``` r hist(exp(model2$BUGSoutput$sims.list$tau[,1]),xlab="Commensurability Parameter",main="",ylab="") ``` @@ -1104,7 +1102,7 @@ hist(exp(model2$BUGSoutput$sims.list$tau[,1]),xlab="Commensurability Parameter", The traceplot can be created as follows. -```r +``` r traceplot(model2,varname=c("alpha","alphaR","beta","tau")) ``` @@ -1129,7 +1127,7 @@ The `psborrow2` package will be used to conduct the analysis. Following are the We use exponential distribution for the outcome model as follows. Non-informative normal prior is used for the baseline parameter. -```r +``` r #Outcome outcome <- outcome_surv_exponential( time_var = "time", @@ -1154,7 +1152,7 @@ outcome Next, the borrowing method is implemented as shown below. We consider no borrowing as shown below. -```r +``` r #Borrowing borrowing <- borrowing_none( ext_flag_col = "ext" @@ -1168,7 +1166,7 @@ borrowing Similarly, details regarding the treatment variable is specified below. Non-informative prior is used a prior for treatment effect. -```r +``` r #Treatment treatment <- treatment_details( trt_flag_col = "group", @@ -1190,7 +1188,7 @@ treatment Now, all the pieces are brought together to create the analysis object as shown below. -```r +``` r data_with_weights <- as.matrix(data_with_weights) #Application anls_obj <- create_analysis_obj( @@ -1208,7 +1206,7 @@ anls_obj <- create_analysis_obj( Finally, we run the MCMC sample with $3$ chains. -```r +``` r results.no.psborrow<- mcmc_sample(anls_obj, iter_warmup = round(niter/3), iter_sampling = niter, @@ -1217,13 +1215,13 @@ results.no.psborrow<- mcmc_sample(anls_obj, ) #> Running MCMC with 3 sequential chains... #> -#> Chain 1 finished in 4.0 seconds. -#> Chain 2 finished in 4.1 seconds. -#> Chain 3 finished in 4.2 seconds. +#> Chain 1 finished in 2.0 seconds. +#> Chain 2 finished in 1.7 seconds. +#> Chain 3 finished in 1.8 seconds. #> #> All 3 chains finished successfully. -#> Mean chain execution time: 4.1 seconds. -#> Total execution time: 12.7 seconds. +#> Mean chain execution time: 1.8 seconds. +#> Total execution time: 5.7 seconds. draws3 <- results.no.psborrow$draws() draws3 <- rename_draws_covariates(draws3, anls_obj) @@ -1231,22 +1229,22 @@ draws3 <- rename_draws_covariates(draws3, anls_obj) The summary is shown below. -```r +``` r results.no.psborrow$summary() #> # A tibble: 4 × 10 #> variable mean median sd mad q5 q95 rhat ess_bulk #> -#> 1 lp__ -727. -727. 0.983 0.706 -729. -726. 1.00 10417. -#> 2 beta_trt -0.355 -0.356 0.109 0.109 -0.534 -0.174 1.00 8852. -#> 3 alpha -0.623 -0.622 0.0910 0.0901 -0.776 -0.477 1.00 8695. -#> 4 HR_trt 0.705 0.700 0.0773 0.0757 0.586 0.840 1.00 8852. +#> 1 lp__ -727. -727. 1.02 0.728 -729. -726. 1.00 9041. +#> 2 beta_trt -0.357 -0.357 0.111 0.111 -0.537 -0.172 1.00 8473. +#> 3 alpha -0.622 -0.621 0.0929 0.0926 -0.778 -0.472 1.00 8588. +#> 4 HR_trt 0.704 0.699 0.0788 0.0773 0.584 0.842 1.00 8473. #> # ℹ 1 more variable: ess_tail ``` The histogram of the posterior samples for hazard ratio and commensurability parameter as shown below. -```r +``` r bayesplot::mcmc_hist(draws3, c("treatment HR")) ``` @@ -1255,26 +1253,26 @@ bayesplot::mcmc_hist(draws3, c("treatment HR"))

plot of chunk unnamed-chunk-51

-```r +``` r bayesplot::color_scheme_set("mix-blue-pink") ``` The $95\%$ credible interval can be calculated as follows. -```r +``` r summarize_draws(draws3, ~ quantile(.x, probs = c(0.025, 0.975))) #> # A tibble: 4 × 3 #> variable `2.5%` `97.5%` #> #> 1 lp__ -730. -726. -#> 2 treatment log HR -0.568 -0.139 -#> 3 baseline log hazard rate -0.806 -0.451 -#> 4 treatment HR 0.567 0.870 +#> 2 treatment log HR -0.576 -0.138 +#> 3 baseline log hazard rate -0.807 -0.444 +#> 4 treatment HR 0.562 0.871 ``` We can see other plots that help us understand and diagnose problems with the MCMC sampler, such as trace plot. -```r +``` r bayesplot::color_scheme_set("mix-blue-pink") bayesplot::mcmc_trace( @@ -1295,7 +1293,7 @@ The `psborrow2` package will be used to conduct the analysis. Following are the We use exponential distribution for the outcome model as follows. Non-informative normal prior is used for the baseline parameter. -```r +``` r #Outcome outcome <- outcome_surv_exponential( time_var = "time", @@ -1320,7 +1318,7 @@ outcome Next, the borrowing method is implemented as shown below. We consider full borrowing as shown below. -```r +``` r #Borrowing borrowing <- borrowing_full(ext_flag_col = "ext") @@ -1332,7 +1330,7 @@ borrowing Similarly, details regarding the treatment variable is specified below. Non-informative prior is used a prior for treatment effect. -```r +``` r #Treatment treatment <- treatment_details( trt_flag_col = "group", @@ -1354,7 +1352,7 @@ treatment Now, all the pieces are brought together to create the analysis object as shown below. -```r +``` r #Application anls_obj <- create_analysis_obj( data_matrix = data_with_weights, @@ -1371,7 +1369,7 @@ anls_obj <- create_analysis_obj( Finally, we run the MCMC sample with $3$ chains. -```r +``` r results.full.psborrow<- mcmc_sample(anls_obj, iter_warmup = round(niter/3), iter_sampling = niter, @@ -1380,13 +1378,13 @@ results.full.psborrow<- mcmc_sample(anls_obj, ) #> Running MCMC with 3 sequential chains... #> -#> Chain 1 finished in 5.8 seconds. -#> Chain 2 finished in 5.7 seconds. -#> Chain 3 finished in 5.9 seconds. +#> Chain 1 finished in 2.8 seconds. +#> Chain 2 finished in 2.5 seconds. +#> Chain 3 finished in 2.8 seconds. #> #> All 3 chains finished successfully. -#> Mean chain execution time: 5.8 seconds. -#> Total execution time: 17.9 seconds. +#> Mean chain execution time: 2.7 seconds. +#> Total execution time: 8.4 seconds. draws4 <- results.full.psborrow$draws() draws4 <- rename_draws_covariates(draws4, anls_obj) @@ -1394,22 +1392,22 @@ draws4 <- rename_draws_covariates(draws4, anls_obj) The summary is shown below. -```r +``` r results.full.psborrow$summary() #> # A tibble: 4 × 10 #> variable mean median sd mad q5 q95 rhat ess_bulk #> -#> 1 lp__ -747. -747. 1.01 0.710 -749. -746. 1.00 13060. -#> 2 beta_trt -1.51 -1.51 0.0765 0.0762 -1.64 -1.39 1.00 10894. -#> 3 alpha 0.534 0.535 0.0467 0.0463 0.458 0.610 1.00 10829. -#> 4 HR_trt 0.221 0.220 0.0169 0.0168 0.194 0.250 1.00 10894. +#> 1 lp__ -747. -747. 0.992 0.698 -749. -746. 1.00 13207. +#> 2 beta_trt -1.51 -1.51 0.0761 0.0761 -1.64 -1.39 1.00 11791. +#> 3 alpha 0.534 0.534 0.0464 0.0459 0.456 0.609 1.00 11410. +#> 4 HR_trt 0.221 0.221 0.0169 0.0168 0.194 0.250 1.00 11791. #> # ℹ 1 more variable: ess_tail ``` The histogram of the posterior samples for hazard ratio and commensurability parameter as shown below. -```r +``` r bayesplot::mcmc_hist(draws4, c("treatment HR")) ``` @@ -1418,26 +1416,26 @@ bayesplot::mcmc_hist(draws4, c("treatment HR"))

plot of chunk unnamed-chunk-60

-```r +``` r bayesplot::color_scheme_set("mix-blue-pink") ``` The $95\%$ credible interval can be calculated as follows. -```r +``` r summarize_draws(draws4, ~ quantile(.x, probs = c(0.025, 0.975))) #> # A tibble: 4 × 3 #> variable `2.5%` `97.5%` #> #> 1 lp__ -750. -746. #> 2 treatment log HR -1.66 -1.36 -#> 3 baseline log hazard rate 0.442 0.625 +#> 3 baseline log hazard rate 0.441 0.624 #> 4 treatment HR 0.190 0.256 ``` We can see other plots that help us understand and diagnose problems with the MCMC sampler, such as trace plot. -```r +``` r bayesplot::color_scheme_set("mix-blue-pink") bayesplot::mcmc_trace( @@ -1457,7 +1455,7 @@ bayesplot::mcmc_trace( Now, we also demonstrate how researchers would fit a frequentist Cox model. For the first model, we will not borrow any information from external controls and use the trial data only. -```r +``` r data_with_weights = data.frame(data_with_weights) f1 <- coxph(Surv(time,event)~group, data=data_with_weights[data_with_weights$ext==0,]) summary(f1) @@ -1485,7 +1483,7 @@ summary(f1) A frequentist Cox model can also be fitted using all of the external control data via full borrowing. -```r +``` r f2 <- coxph(Surv(time,event)~group, data=data_with_weights) summary(f2) #> Call: @@ -1511,7 +1509,7 @@ summary(f2) # Summarizing all the results together Now, we concatenate the results from all of the Bayesian and frequentist analysis approaches together. -```r +``` r hr1=results.exp.psborrow$summary()[6,c(2)] hr1conf = summarize_draws(draws1, ~ quantile(.x, probs = c(0.025, 0.975)))[6,c(2,3)] hr1.conf=c(hr1$mean,hr1conf$`2.5%`,hr1conf$`97.5%`) @@ -1556,22 +1554,9 @@ colnames(out)[2:4] = c("Hazard Ratio","Lower 95% CI","Upper 95% CI") The hazard ratio estimates and $95\%$ credible intervals for all the methods are shown below. - -|Method | Hazard Ratio| Lower 95% CI| Upper 95% CI| -|:-------------------------------------------------------------------------------|------------:|------------:|------------:| -|Exponential distribution (constant hazard) and gamma prior | 0.7040| 0.5660| 0.8688| -|Piecewise exponential distribution (proportional hazard) and power prior | 0.6608| 0.5305| 0.8125| -|Weibull distribution (proportional hazard) and gamma prior | 0.7437| 0.5978| 0.9180| -|Piecewise exponential distribution (proportional hazard) and commensurate prior | 0.7018| 0.5697| 0.8521| -|Exponential distribution (constant hazard): No borrowing | 0.7054| 0.5666| 0.8699| -|Exponential distribution (constant hazard): Full borrowing | 0.2209| 0.1896| 0.2560| -|Cox model (Frequentist approach): No borrowing | 0.7757| 0.6246| 0.9633| -|Cox model (Frequentist approach): Full borrowing | 0.3327| 0.2845| 0.3890| - -__Note:__ -CI: Credible Interval for Bayesian methods and Confidence interval for Frequentist method - - +``` +#> Error in add_footnote(x, c("CI: Credible Interval for Bayesian methods and Confidence interval for Frequentist method"), : could not find function "add_footnote" +``` # References diff --git a/vignettes/original/_weighting.Rmd b/vignettes/original/_weighting.Rmd index 19f581c5..a81e6f59 100644 --- a/vignettes/original/_weighting.Rmd +++ b/vignettes/original/_weighting.Rmd @@ -44,7 +44,7 @@ Note that we'll need `cmdstanr` to run this analysis. Please install ```{r packages, results = "hide", message = FALSE} library(psborrow2) -library(cmdstsanr) +library(cmdstanr) library(BayesPPD) library(ggplot2) ``` diff --git a/vignettes/prior_distributions.Rmd b/vignettes/prior_distributions.Rmd index 2f2926db..119f8036 100644 --- a/vignettes/prior_distributions.Rmd +++ b/vignettes/prior_distributions.Rmd @@ -11,7 +11,7 @@ vignette: > -```r +``` r library(psborrow2) ``` @@ -56,7 +56,7 @@ For example, we can create an uninformative normal distribution by specifying a normal prior centered around 0 with a very large standard deviation: -```r +``` r uninformative_normal <- prior_normal(0, 10000) uninformative_normal # Normal Distribution @@ -76,7 +76,7 @@ scenarios, you can call `plot()` on the prior object to visualize the distribution: -```r +``` r plot(uninformative_normal) ``` @@ -93,7 +93,7 @@ distribution with greater density at higher values of `tau` (which will lead to more borrowing in a BDB analysis): -```r +``` r conservative_tau <- prior_gamma(0.001, 0.001) aggressive_tau <- prior_gamma(1, 0.001) plot(aggressive_tau, xlim = c(0, 2000), col = "blue", ylim = c(0, 1e-03)) diff --git a/vignettes/propensity_scores.Rmd b/vignettes/propensity_scores.Rmd index 929a6489..5d5c99e7 100644 --- a/vignettes/propensity_scores.Rmd +++ b/vignettes/propensity_scores.Rmd @@ -13,10 +13,9 @@ link-citations: yes -```r +``` r library(psborrow2) library(cmdstanr) -#> Warning: package 'cmdstanr' was built under R version 4.3.3 ``` Propensity scores (PS) methods offer various ways to adjust analyses for differences in groups of patients. @@ -33,9 +32,8 @@ In addition, weights can be calculated differently for different estimands. Here `estimand = "ATT"`, to calculate weights for estimating the average treatment effect among the treated (ATT). -```r +``` r library(WeightIt) -#> Warning: package 'WeightIt' was built under R version 4.3.2 example_dataframe <- as.data.frame(example_matrix) example_dataframe$int <- 1 - example_dataframe$ext @@ -48,32 +46,32 @@ weightit_model <- weightit( ) #> Warning: No `criterion` was provided. Using "smd.mean". summary(weightit_model) -#> Summary of weights +#> Summary of weights #> #> - Weight ranges: #> #> Min Max #> treated 1.0000 || 1.0000 -#> control 0.0826 |---------------------------| 5.8966 +#> control 0.0742 |---------------------------| 5.7137 #> #> - Units with the 5 most extreme weights by group: #> #> 5 4 3 2 1 #> treated 1 1 1 1 1 #> 195 158 465 438 371 -#> control 2.7924 2.7924 5.8966 5.8966 5.8966 +#> control 2.7859 2.7859 5.7137 5.7137 5.7137 #> #> - Weight statistics: #> -#> Coef of Var MAD Entropy # Zeros -#> treated 0.000 0.00 0.0 0 -#> control 1.582 0.82 0.6 0 +#> Coef of Var MAD Entropy # Zeros +#> treated 0.000 0.000 0.000 0 +#> control 1.564 0.831 0.603 0 #> #> - Effective Sample Sizes: #> #> Control Treated #> Unweighted 350. 150 -#> Weighted 100.09 150 +#> Weighted 101.76 150 ``` Another useful package is [cobalt](https://cran.r-project.org/package=cobalt), which provides tools for @@ -82,9 +80,8 @@ See the [vignette](https://CRAN.R-project.org/package=cobalt/vignettes/cobalt.ht We can use the `cobalt` package to assess balance with `bal.plot()`. -```r +``` r library(cobalt) -#> Warning: package 'cobalt' was built under R version 4.3.2 bal.plot(weightit_model) ``` @@ -96,7 +93,7 @@ bal.plot(weightit_model) "Love plots" can also be generated that compare the populations before and after weighting: -```r +``` r love.plot(weightit_model, stars = "std") ``` @@ -110,7 +107,7 @@ The PS values can be copied into the data set and the analysis object can be con -```r +``` r example_dataframe$att <- weightit_model$weights example_matrix_att <- create_data_matrix( @@ -143,7 +140,7 @@ As described in the [Getting Started vignette](https://CRAN.R-project.org/packag it can be useful to check the imbalance before matching. -```r +``` r library(MatchIt) #> #> Attaching package: 'MatchIt' @@ -187,7 +184,7 @@ Here we are matching treated to untreated to select the most comparable control internal or external. For simplicity let's try a 1:1 nearest matching approach. -```r +``` r match_11 <- matchit(trt ~ cov1 + cov2 + cov3 + cov4, data = example_dataframe, method = "nearest", distance = "glm" @@ -235,7 +232,7 @@ summary(match_11) ``` -```r +``` r set.seed(123) plot(match_11, type = "jitter", interactive = FALSE) ``` @@ -252,7 +249,7 @@ vignette. Again the `cobalt` package can be useful here. However, if you are happy with the results of the matching procedure, you can extract the data for use in `psborrow2`. -```r +``` r example_matrix_match <- create_data_matrix( data = example_dataframe[match_11$weights == 1, ], ext_flag_col = "ext", @@ -264,7 +261,7 @@ example_matrix_match <- create_data_matrix( -```r +``` r analysis_match <- create_analysis_obj( data_matrix = example_matrix_match, outcome = outcome_surv_exponential("time", "cnsr", prior_normal(0, 10000)), @@ -283,7 +280,7 @@ is equivalent to fixed power prior weights. This allows for the combination of m IPTW + commensurate prior approach. -```r +``` r analysis_iptw_bdb <- create_analysis_obj( data_matrix = example_matrix_att, outcome = outcome_surv_exponential("time", "cnsr", prior_normal(0, 10000), weight_var = "att"), @@ -301,7 +298,7 @@ We can also use weights to specify a fixed power prior model. Here we set the po for the external controls. -```r +``` r example_matrix_pp01 <- cbind(example_matrix, pp_alpha = ifelse(example_matrix[, "ext"] == 1, 0.1, 1)) analysis_pp01 <- create_analysis_obj( @@ -320,7 +317,7 @@ result_pp01 <- mcmc_sample(analysis_pp01, seed = 123) For comparison, we also fit a full borrowing, a no borrowing, and a BDB model without weights. -```r +``` r result_full <- mcmc_sample( create_analysis_obj( data_matrix = example_matrix, @@ -361,7 +358,7 @@ result_bdb <- mcmc_sample( # Comparison of Results -```r +``` r models <- list( "No borrowing" = result_none, "Full borrowing" = result_full, @@ -375,7 +372,7 @@ models <- list( We can use `summary()` to extract the variable of interest and specify summary statistics. -```r +``` r results_table <- do.call(rbind, lapply( models, function(i) i$summary("HR_trt", c("mean", "median", "sd", "quantile2")) @@ -387,20 +384,20 @@ knitr::kable(cbind(models = names(models), results_table), digits = 3) |models |variable | mean| median| sd| q5| q95| |:--------------|:--------|-----:|------:|-----:|-----:|-----:| -|No borrowing |HR_trt | 0.894| 0.877| 0.178| 0.636| 1.210| -|Full borrowing |HR_trt | 0.388| 0.386| 0.049| 0.311| 0.473| -|Power Prior 01 |HR_trt | 0.634| 0.626| 0.105| 0.476| 0.821| -|ATT Weights |HR_trt | 0.895| 0.886| 0.126| 0.701| 1.116| -|IPTW + BDB |HR_trt | 0.897| 0.882| 0.165| 0.656| 1.188| -|Matching 1:1 |HR_trt | 0.793| 0.784| 0.128| 0.600| 1.018| -|BDB |HR_trt | 0.871| 0.852| 0.175| 0.619| 1.187| +|No borrowing |HR_trt | 0.898| 0.878| 0.180| 0.638| 1.223| +|Full borrowing |HR_trt | 0.388| 0.386| 0.049| 0.310| 0.471| +|Power Prior 01 |HR_trt | 0.634| 0.625| 0.106| 0.478| 0.824| +|ATT Weights |HR_trt | 0.891| 0.882| 0.126| 0.697| 1.110| +|IPTW + BDB |HR_trt | 0.893| 0.877| 0.165| 0.653| 1.185| +|Matching 1:1 |HR_trt | 0.819| 0.808| 0.133| 0.620| 1.054| +|BDB |HR_trt | 0.875| 0.857| 0.178| 0.619| 1.194| We can extract a `draws` object from each model and plot the posterior distribution of the treatment hazard ratio. -```r +``` r plot(density(models[[1]]$draws("HR_trt")), col = 1, xlim = c(0, 2), ylim = c(0, 9), lwd = 2, xlab = "HR_trt", diff --git a/vignettes/simple_overview.Rmd b/vignettes/simple_overview.Rmd index d2b3c731..ecf9f5aa 100644 --- a/vignettes/simple_overview.Rmd +++ b/vignettes/simple_overview.Rmd @@ -35,7 +35,7 @@ library(table1) ## {psborrow2} contains an example matrix -```r +``` r head(example_matrix) ``` @@ -56,14 +56,14 @@ head(example_matrix) ## Load as data.frame for some functions -```r +``` r example_dataframe <- as.data.frame(example_matrix) ``` ## Look at distribution of arms -```r +``` r table(ext = example_matrix[, "ext"], trt = example_matrix[, "trt"]) ``` @@ -78,7 +78,7 @@ table(ext = example_matrix[, "ext"], trt = example_matrix[, "trt"]) ### Kaplan-Meier curves -```r +``` r km_fit <- survfit(Surv(time = time, event = 1 - cnsr) ~ trt + ext, data = example_dataframe ) @@ -92,7 +92,7 @@ The internal and external control arms look quite different... ### Cox model -```r +``` r cox_fit <- coxph(Surv(time = time, event = 1 - cnsr) ~ trt, data = example_dataframe, subset = ext == 0 @@ -113,7 +113,7 @@ cox_fit ## n= 150, number of events= 114 ``` -```r +``` r exp(confint(cox_fit)) # The internal HR is 0.90 (95% CI 0.61 - 1.32) ``` @@ -150,19 +150,19 @@ functions: Prior distributions can be plotted with the plot() method -```r +``` r plot(prior_normal(0, 1), xlim = c(-100, 100), ylim = c(0, 1)) ``` ![plot of chunk unnamed-chunk-7](figure-simple_overview-unnamed-chunk-7-1.png) -```r +``` r plot(prior_normal(0, 10), xlim = c(-100, 100), ylim = c(0, 1)) ``` ![plot of chunk unnamed-chunk-7](figure-simple_overview-unnamed-chunk-7-2.png) -```r +``` r plot(prior_normal(0, 10000), xlim = c(-100, 100), ylim = c(0, 1)) ``` @@ -181,7 +181,7 @@ psborrow2 currently supports 4 outcomes: # Create an exponential survival distribution Outcome object -```r +``` r exp_outcome <- outcome_surv_exponential( time_var = "time", cens_var = "cnsr", @@ -199,7 +199,7 @@ Borrowing objects are created with: ``` -```r +``` r bdb_borrowing <- borrowing_hierarchical_commensurate( ext_flag_col = "ext", tau_prior = prior_gamma(0.001, 0.001) @@ -214,7 +214,7 @@ Treatment objects are created with: ``` -```r +``` r trt_details <- treatment_details( trt_flag_col = "trt", trt_prior = prior_normal(0, 10000) @@ -225,7 +225,7 @@ trt_details <- treatment_details( Combine everything and create object of class Analysis: -```r +``` r analysis_object <- create_analysis_obj( data_matrix = example_matrix, outcome = exp_outcome, @@ -246,7 +246,7 @@ analysis_object <- create_analysis_obj( ## Ready to go! Now call `mcmc_sample()`. ``` -```r +``` r analysis_object ``` @@ -289,7 +289,7 @@ results <- mcmc_sample( ``` -```r +``` r class(results) ``` @@ -297,18 +297,18 @@ class(results) ## [1] "CmdStanMCMC" "CmdStanFit" "R6" ``` -```r +``` r results ``` ``` ## variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -## lp__ -1617.95 -1617.57 1.52 1.28 -1620.95 -1616.17 1.00 1745 2439 -## beta_trt -0.16 -0.16 0.20 0.20 -0.49 0.17 1.00 2413 2532 -## tau 1.19 0.49 1.91 0.67 0.00 4.69 1.00 2043 1716 -## alpha[1] -3.36 -3.35 0.16 0.17 -3.63 -3.10 1.00 2355 2672 -## alpha[2] -2.40 -2.40 0.06 0.05 -2.49 -2.31 1.00 3569 3165 -## HR_trt 0.87 0.85 0.18 0.17 0.61 1.19 1.00 2413 2532 +## lp__ -1618.00 -1617.68 1.51 1.38 -1620.85 -1616.19 1.00 1747 2189 +## beta_trt -0.16 -0.17 0.20 0.21 -0.48 0.18 1.00 2400 2911 +## alpha[1] -3.35 -3.34 0.16 0.17 -3.64 -3.10 1.00 2292 2763 +## alpha[2] -2.40 -2.40 0.06 0.06 -2.49 -2.30 1.00 2848 2880 +## tau 1.21 0.46 2.02 0.64 0.00 4.88 1.00 1917 1265 +## HR_trt 0.87 0.85 0.18 0.17 0.62 1.20 1.00 2400 2911 ``` # Interpret results @@ -316,7 +316,7 @@ results Dictionary to interpret parameters: -```r +``` r variable_dictionary(analysis_object) ``` @@ -331,38 +331,38 @@ variable_dictionary(analysis_object) Create a draws object -```r +``` r draws <- results$draws() ``` Rename draws object parameters -```r +``` r draws <- rename_draws_covariates(draws, analysis_object) ``` Get 95% credible intervals with posterior package -```r +``` r posterior::summarize_draws(draws, ~ quantile(.x, probs = c(0.025, 0.50, 0.975))) ``` ``` ## # A tibble: 6 × 4 -## variable `2.5%` `50%` `97.5%` -## -## 1 lp__ -1622. -1618. -1616. -## 2 treatment log HR -0.557 -0.158 0.244 -## 3 commensurability parameter 0.00108 0.486 6.39 -## 4 baseline log hazard rate, internal -3.68 -3.35 -3.04 -## 5 baseline log hazard rate, external -2.51 -2.40 -2.29 -## 6 treatment HR 0.573 0.854 1.28 +## variable `2.5%` `50%` `97.5%` +## +## 1 lp__ -1622. -1618. -1616. +## 2 treatment log HR -0.548 -0.165 0.256 +## 3 baseline log hazard rate, internal -3.68 -3.34 -3.06 +## 4 baseline log hazard rate, external -2.51 -2.40 -2.29 +## 5 commensurability parameter 0.000673 0.459 6.90 +## 6 treatment HR 0.578 0.848 1.29 ``` Look at histogram of draws with bayesplot package -```r +``` r bayesplot::mcmc_hist(draws, c("treatment HR")) ``` @@ -379,7 +379,7 @@ This is the ***desired outcome*** given how different the control arms were. Check balance between arms -```r +``` r table1( ~ cov1 + cov2 + cov3 + cov4 | factor(ext, labels = c("Internal RCT", "External data")) + @@ -511,7 +511,7 @@ let's adjust for propensity scores in our analysis Create a propensity score model -```r +``` r ps_model <- glm(ext ~ cov1 + cov2 + cov3 + cov4, data = example_dataframe, family = binomial @@ -532,7 +532,7 @@ levels(example_dataframe$ps_cat_) <- c( Convert the data back to a matrix with dummy variables for `ps_cat_` levels -```r +``` r example_matrix_ps <- create_data_matrix( example_dataframe, outcome = c("time", "cnsr"), @@ -570,7 +570,7 @@ res_ps_no_borrow <- mcmc_sample( ``` -```r +``` r draws_ps_no_borrow <- rename_draws_covariates( res_ps_no_borrow$draws(), anls_ps_no_borrow @@ -583,14 +583,14 @@ summarize_draws(draws_ps_no_borrow, ~ quantile(.x, probs = c(0.025, 0.50, 0.975) ## # A tibble: 8 × 4 ## variable `2.5%` `50%` `97.5%` ## -## 1 lp__ -466. -462. -460. -## 2 treatment log HR -0.715 -0.346 0.0647 -## 3 baseline log hazard rate -4.72 -4.18 -3.71 -## 4 ps_cat_low -0.417 0.381 1.11 -## 5 ps_cat_low_med 0.437 0.995 1.58 -## 6 ps_cat_high_med 1.43 2.08 2.76 -## 7 ps_cat_high 2.37 3.00 3.63 -## 8 treatment HR 0.489 0.707 1.07 +## 1 lp__ -466. -462. -459. +## 2 treatment log HR -0.738 -0.347 0.0689 +## 3 baseline log hazard rate -4.76 -4.20 -3.72 +## 4 ps_cat_low -0.344 0.408 1.13 +## 5 ps_cat_low_med 0.460 0.998 1.61 +## 6 ps_cat_high_med 1.44 2.10 2.80 +## 7 ps_cat_high 2.42 3.01 3.66 +## 8 treatment HR 0.478 0.707 1.07 ``` @@ -619,7 +619,7 @@ res_ps_bdb <- mcmc_sample( ``` -```r +``` r draws_ps_bdb <- rename_draws_covariates( res_ps_bdb$draws(), anls_ps_bdb @@ -630,18 +630,18 @@ summarize_draws(draws_ps_bdb, ~ quantile(.x, probs = c(0.025, 0.50, 0.975))) ``` ## # A tibble: 10 × 4 -## variable `2.5%` `50%` `97.5%` -## -## 1 lp__ -1426. -1421. -1418. -## 2 treatment log HR -0.684 -0.352 -0.0260 -## 3 commensurability parameter 0.0816 52.0 1372. -## 4 baseline log hazard rate, internal -4.64 -4.18 -3.76 -## 5 baseline log hazard rate, external -4.65 -4.20 -3.78 -## 6 ps_cat_low -0.355 0.263 0.840 -## 7 ps_cat_low_med 0.629 1.06 1.54 -## 8 ps_cat_high_med 1.62 2.09 2.60 -## 9 ps_cat_high 2.51 2.94 3.41 -## 10 treatment HR 0.504 0.704 0.974 +## variable `2.5%` `50%` `97.5%` +## +## 1 lp__ -1426. -1420. -1418. +## 2 treatment log HR -0.671 -0.350 -0.0350 +## 3 baseline log hazard rate, internal -4.65 -4.19 -3.77 +## 4 baseline log hazard rate, external -4.64 -4.21 -3.80 +## 5 commensurability parameter 0.107 58.0 1659. +## 6 ps_cat_low -0.300 0.274 0.851 +## 7 ps_cat_low_med 0.639 1.07 1.52 +## 8 ps_cat_high_med 1.65 2.10 2.58 +## 9 ps_cat_high 2.54 2.95 3.40 +## 10 treatment HR 0.511 0.705 0.966 ``` It looks like PS + BDB allowed us to most accurately recover the diff --git a/vignettes/simulation_study.Rmd b/vignettes/simulation_study.Rmd index 532d9f13..f3b7ab74 100644 --- a/vignettes/simulation_study.Rmd +++ b/vignettes/simulation_study.Rmd @@ -25,10 +25,9 @@ Note that we'll need `cmdstanr` to run the simulation study. Please install Let's load `psborrow2` to start: -```r +``` r library(psborrow2) library(cmdstanr) -# Warning: package 'cmdstanr' was built under R version 4.3.3 ``` # Bringing your own simulated data @@ -139,7 +138,7 @@ sim_single_matrix <- function(n = 500, # n simulated pts -```r +``` r set.seed(123) head(sim_single_matrix(n = 500, hr = 0.5, drift_hr = 1.2)) # id ext trt time status cnsr @@ -160,7 +159,7 @@ scenarios: - True HR = 1.0, drift HR = 1.5 -```r +``` r # Seed for reproducibility set.seed(123) @@ -191,7 +190,7 @@ my_data_list <- list( There are 4 scenarios. -```r +``` r NROW(my_data_list) # [1] 4 ``` @@ -199,7 +198,7 @@ NROW(my_data_list) Each scenario has 100 matrices. -```r +``` r NROW(my_data_list[[1]]) # [1] 100 ``` @@ -207,7 +206,7 @@ NROW(my_data_list[[1]]) The lowest level of the list of lists is a data matrix. -```r +``` r head(my_data_list[[1]][[1]]) # id ext trt time status cnsr # [1,] 1 0 0 8.179722 1 0 @@ -236,7 +235,7 @@ In this example, the 4 scenarios are summarized with the below `guide`: -```r +``` r my_sim_data_guide <- expand.grid( true_hr = c(0.6, 1.0), drift_hr = c("No drift HR", "Moderate drift HR") @@ -265,7 +264,7 @@ For our study, these are `"true_hr"`, `"drift_hr"`, and `"id"`. Putting it all together, we can create an object of class `SimDataList`: -```r +``` r my_sim_data_list <- sim_data_list( data_list = my_data_list, guide = my_sim_data_guide, @@ -291,7 +290,7 @@ or a list of `Outcome` class objects passed to `sim_outcome_list()`. For our example, let's just use a single exponential distribution. -```r +``` r my_sim_out <- outcome_surv_exponential( time_var = "time", cens_var = "cnsr", @@ -330,7 +329,7 @@ We'll use a special list of `Borrowing` objects, which we'll create through the function `sim_borrowing_list()`. -```r +``` r my_borrowing_list <- sim_borrowing_list( list( "No borrowing" = borrowing_none("ext"), @@ -365,7 +364,7 @@ or a list of these classes, created with `sim_treatment_list()`. Let's just use a single instance: -```r +``` r my_sim_treat <- treatment_details("trt", prior_normal(0, 1000)) my_sim_treat @@ -401,7 +400,7 @@ combinations of parameters. Let's create a simulation object of class -```r +``` r simulation_obj <- create_simulation_obj( my_sim_data_list, outcome = my_sim_out, @@ -421,7 +420,7 @@ We can access the guide to see the specific scenarios that will be simulated with `show_guide()`: -```r +``` r show_guide(simulation_obj) # true_hr drift_hr id n_datasets_per_param outcome_scenario # 1 0.6 No drift HR 1 100 default @@ -473,7 +472,7 @@ specifies the quantiles for null coverage and true coverage. For instance, -```r +``` r simulation_res <- mcmc_sample( simulation_obj, posterior_quantiles = c(0.025, 0.975), @@ -492,7 +491,7 @@ Instead, it returns a class unique to simulation study results: `MCMCSimulationResult`. -```r +``` r class(simulation_res) # [1] "MCMCSimulationResult" # attr(,"package") @@ -503,7 +502,7 @@ Let's look at the performance of our simulation study by extracting the data.fra that summarizes results, `get_results()`: -```r +``` r simulation_res_df <- get_results(simulation_res) head(simulation_res_df) # true_hr drift_hr id n_datasets_per_param outcome_scenario @@ -514,29 +513,28 @@ head(simulation_res_df) # 5 0.6 No drift HR 1 100 default # 6 1.0 No drift HR 2 100 default # borrowing_scenario covariate_scenario treatment_scenario trt_var -# 1 No borrowing No adjustment default 0.06385613 -# 2 No borrowing No adjustment default 0.06300573 -# 3 No borrowing No adjustment default 0.06358375 -# 4 No borrowing No adjustment default 0.06146742 -# 5 Full borrowing No adjustment default 0.02640545 -# 6 Full borrowing No adjustment default 0.02538354 +# 1 No borrowing No adjustment default 0.06359053 +# 2 No borrowing No adjustment default 0.06276921 +# 3 No borrowing No adjustment default 0.06305379 +# 4 No borrowing No adjustment default 0.06286691 +# 5 Full borrowing No adjustment default 0.02635029 +# 6 Full borrowing No adjustment default 0.02537450 # mse_mean bias_mean null_coverage true_coverage -# 1 0.05884766 0.05935161 0.55 0.97 -# 2 0.13329996 0.03483396 0.99 0.99 -# 3 0.04709898 0.01135305 0.46 0.96 -# 4 0.12446717 0.02322880 0.96 0.96 -# 5 0.02005176 0.02065375 0.11 0.98 -# 6 0.04977218 0.00931398 0.96 0.96 +# 1 0.05917938 0.06011393 0.59 0.96 +# 2 0.13170714 0.03556530 0.95 0.95 +# 3 0.04730839 0.01311444 0.41 0.96 +# 4 0.12775243 0.02715064 0.96 0.96 +# 5 0.02024202 0.02088567 0.10 0.96 +# 6 0.05035481 0.01011514 0.96 0.96 ``` Let's quickly visualize the results using `ggplot2`. We will first load `ggplot2` and factorize our borrowing scenarios: -```r +``` r # Load ggplot2 library(ggplot2) -# Warning: package 'ggplot2' was built under R version 4.3.2 # Factorize simulation_res_df$borrowing_scenario <- factor(simulation_res_df$borrowing_scenario, diff --git a/vignettes/weighting.Rmd b/vignettes/weighting.Rmd index bd6fb5fd..bc8ca155 100644 --- a/vignettes/weighting.Rmd +++ b/vignettes/weighting.Rmd @@ -29,13 +29,11 @@ Note that we'll need `cmdstanr` to run this analysis. Please install [this guide](https://mc-stan.org/cmdstanr/articles/cmdstanr.html). -```r +``` r library(psborrow2) -library(cmdstsanr) -# Error in library(cmdstsanr): there is no package called 'cmdstsanr' +library(cmdstanr) library(BayesPPD) library(ggplot2) -# Warning: package 'ggplot2' was built under R version 4.3.2 ``` # Logistic regression @@ -49,7 +47,7 @@ The model has a treatment indicator and two covariates, `resp ~ trt + cov1 + cov -```r +``` r logistic_glm_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { logistic_glm <- glm( resp ~ trt + cov1 + cov2, @@ -73,7 +71,7 @@ logistic_glm_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { ### BayesPPD -```r +``` r set.seed(123) logistic_ppd_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { logistic_ppd <- glm.fixed.a0( @@ -108,7 +106,7 @@ logistic_ppd_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { -```r +``` r logistic_psb_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { logistic_psb2 <- create_analysis_obj( data_matrix = as.matrix(cbind( @@ -150,7 +148,7 @@ logistic_psb_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { ### Results -```r +``` r logistic_res_df <- do.call( rbind, c(logistic_glm_reslist, logistic_ppd_reslist, logistic_psb_reslist) @@ -175,31 +173,31 @@ knitr::kable(wide[new_order, ], digits = 3, row.names = FALSE) | borrowing|variable |est_ci.glm |est_ci.BayesPPD |est_ci.psborrow2 | |---------:|:-----------|:-----------------------|:-----------------------|:-----------------------| -| 0.00|(Intercept) |0.646 (-0.038, 1.357) |0.691 (-0.014, 1.399) |0.657 (-0.039, 1.381) | -| 0.25|(Intercept) |0.394 (-0.131, 0.931) |0.396 (-0.131, 0.941) |0.404 (-0.130, 0.939) | -| 0.50|(Intercept) |0.293 (-0.158, 0.751) |0.297 (-0.184, 0.767) |0.304 (-0.169, 0.784) | -| 0.75|(Intercept) |0.235 (-0.168, 0.642) |0.240 (-0.175, 0.665) |0.237 (-0.164, 0.654) | -| 1.00|(Intercept) |0.196 (-0.172, 0.567) |0.202 (-0.172, 0.578) |0.203 (-0.167, 0.568) | -| 0.00|cov1 |-0.771 (-1.465, -0.095) |-0.809 (-1.515, -0.126) |-0.789 (-1.494, -0.099) | -| 0.25|cov1 |-0.781 (-1.340, -0.231) |-0.793 (-1.350, -0.236) |-0.794 (-1.351, -0.250) | -| 0.50|cov1 |-0.769 (-1.252, -0.291) |-0.776 (-1.271, -0.270) |-0.786 (-1.295, -0.300) | -| 0.75|cov1 |-0.758 (-1.191, -0.329) |-0.769 (-1.212, -0.310) |-0.763 (-1.198, -0.324) | -| 1.00|cov1 |-0.749 (-1.145, -0.357) |-0.761 (-1.152, -0.369) |-0.758 (-1.150, -0.369) | -| 0.00|cov2 |-0.730 (-1.472, -0.008) |-0.745 (-1.496, -0.004) |-0.752 (-1.496, -0.006) | -| 0.25|cov2 |-0.559 (-1.114, -0.014) |-0.568 (-1.124, -0.023) |-0.571 (-1.132, -0.016) | -| 0.50|cov2 |-0.459 (-0.926, 0.003) |-0.471 (-0.953, -0.003) |-0.464 (-0.928, 0.005) | -| 0.75|cov2 |-0.398 (-0.811, 0.011) |-0.402 (-0.814, 0.006) |-0.407 (-0.824, 0.002) | -| 1.00|cov2 |-0.358 (-0.731, 0.013) |-0.358 (-0.736, 0.022) |-0.363 (-0.741, 0.016) | -| 0.00|trt |0.154 (-0.558, 0.871) |0.137 (-0.572, 0.864) |0.165 (-0.567, 0.894) | -| 0.25|trt |0.349 (-0.183, 0.885) |0.361 (-0.170, 0.899) |0.349 (-0.202, 0.875) | -| 0.50|trt |0.405 (-0.082, 0.894) |0.415 (-0.068, 0.916) |0.409 (-0.078, 0.892) | -| 0.75|trt |0.434 (-0.031, 0.900) |0.436 (-0.031, 0.913) |0.439 (-0.030, 0.919) | -| 1.00|trt |0.452 (-0.000, 0.905) |0.456 (-0.010, 0.909) |0.453 (-0.009, 0.917) | - - - - -```r +| 0.00|(Intercept) |0.646 (-0.038, 1.357) |0.691 (-0.014, 1.399) |0.662 (-0.015, 1.383) | +| 0.25|(Intercept) |0.394 (-0.131, 0.931) |0.396 (-0.131, 0.941) |0.404 (-0.128, 0.948) | +| 0.50|(Intercept) |0.293 (-0.158, 0.751) |0.297 (-0.184, 0.767) |0.300 (-0.149, 0.767) | +| 0.75|(Intercept) |0.235 (-0.168, 0.642) |0.240 (-0.175, 0.665) |0.242 (-0.153, 0.643) | +| 1.00|(Intercept) |0.196 (-0.172, 0.567) |0.202 (-0.172, 0.578) |0.200 (-0.176, 0.580) | +| 0.00|cov1 |-0.771 (-1.465, -0.095) |-0.809 (-1.515, -0.126) |-0.790 (-1.488, -0.098) | +| 0.25|cov1 |-0.781 (-1.340, -0.231) |-0.793 (-1.350, -0.236) |-0.797 (-1.363, -0.237) | +| 0.50|cov1 |-0.769 (-1.252, -0.291) |-0.776 (-1.271, -0.270) |-0.783 (-1.275, -0.306) | +| 0.75|cov1 |-0.758 (-1.191, -0.329) |-0.769 (-1.212, -0.310) |-0.770 (-1.195, -0.352) | +| 1.00|cov1 |-0.749 (-1.145, -0.357) |-0.761 (-1.152, -0.369) |-0.756 (-1.154, -0.357) | +| 0.00|cov2 |-0.730 (-1.472, -0.008) |-0.745 (-1.496, -0.004) |-0.751 (-1.505, 0.008) | +| 0.25|cov2 |-0.559 (-1.114, -0.014) |-0.568 (-1.124, -0.023) |-0.574 (-1.127, -0.027) | +| 0.50|cov2 |-0.459 (-0.926, 0.003) |-0.471 (-0.953, -0.003) |-0.461 (-0.931, -0.000) | +| 0.75|cov2 |-0.398 (-0.811, 0.011) |-0.402 (-0.814, 0.006) |-0.403 (-0.813, 0.008) | +| 1.00|cov2 |-0.358 (-0.731, 0.013) |-0.358 (-0.736, 0.022) |-0.361 (-0.738, 0.014) | +| 0.00|trt |0.154 (-0.558, 0.871) |0.137 (-0.572, 0.864) |0.156 (-0.555, 0.876) | +| 0.25|trt |0.349 (-0.183, 0.885) |0.361 (-0.170, 0.899) |0.357 (-0.180, 0.912) | +| 0.50|trt |0.405 (-0.082, 0.894) |0.415 (-0.068, 0.916) |0.411 (-0.067, 0.907) | +| 0.75|trt |0.434 (-0.031, 0.900) |0.436 (-0.031, 0.913) |0.438 (-0.024, 0.910) | +| 1.00|trt |0.452 (-0.000, 0.905) |0.456 (-0.010, 0.909) |0.456 (-0.002, 0.930) | + + + + +``` r logistic_res_df$borrowing_x <- logistic_res_df$borrowing + (as.numeric(factor(logistic_res_df$fitter)) - 3) / 100 @@ -223,7 +221,7 @@ equal to 0, 0.25, 0.5, 0.75, 1. The internal treated and control patients have w The model has a treatment indicator and two covariates, `eventtime ~ trt + cov1 + cov2`. -```r +``` r set.seed(123) sim_data_exp <- cbind( simsurv::simsurv( @@ -237,7 +235,7 @@ sim_data_exp <- cbind( ) ``` -```r +``` r head(sim_data_exp) # id eventtime status trt cov1 cov2 ext censor # 1 1 0.14802638 1 0 0 1 0 0 @@ -250,7 +248,7 @@ head(sim_data_exp) -```r +``` r ## glm glm_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { exp_glm <- glm( @@ -276,7 +274,7 @@ glm_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { ``` -```r +``` r ## BayesPPD set.seed(123) ppd_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { @@ -311,7 +309,7 @@ ppd_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { -```r +``` r ## psborrow2 psb_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { exp_psb2 <- create_analysis_obj( @@ -347,7 +345,7 @@ psb_reslist <- lapply(c(0, 0.25, 0.5, 0.75, 1), function(w) { ``` -```r +``` r knitr::knit_hooks$set(output = output_hook) ``` @@ -356,7 +354,7 @@ knitr::knit_hooks$set(output = output_hook) Note: Wald confidence intervals are displayed here for `glm` for the exponential models. -```r +``` r res_df <- do.call(rbind, c(glm_reslist, ppd_reslist, psb_reslist)) res_df$est_ci <- sprintf( @@ -378,31 +376,31 @@ knitr::kable(wide[new_order, ], digits = 3, row.names = FALSE) | borrowing|variable |est_ci.glm |est_ci.BayesPPD |est_ci.psborrow2 | |---------:|:-----------|:----------------------|:----------------------|:----------------------| -| 0.00|(Intercept) |1.930 (1.597, 2.263) |1.915 (1.572, 2.236) |1.914 (1.576, 2.239) | -| 0.25|(Intercept) |1.581 (1.323, 1.838) |1.567 (1.308, 1.814) |1.573 (1.311, 1.819) | -| 0.50|(Intercept) |1.473 (1.251, 1.695) |1.466 (1.247, 1.685) |1.467 (1.244, 1.683) | -| 0.75|(Intercept) |1.414 (1.215, 1.613) |1.407 (1.208, 1.601) |1.409 (1.204, 1.605) | -| 1.00|(Intercept) |1.376 (1.194, 1.558) |1.369 (1.183, 1.551) |1.374 (1.198, 1.550) | -| 0.00|cov1 |0.630 (0.300, 0.959) |0.630 (0.304, 0.960) |0.634 (0.298, 0.973) | -| 0.25|cov1 |0.722 (0.453, 0.991) |0.729 (0.459, 1.013) |0.724 (0.455, 1.006) | -| 0.50|cov1 |0.786 (0.552, 1.020) |0.789 (0.559, 1.026) |0.788 (0.555, 1.027) | -| 0.75|cov1 |0.827 (0.616, 1.037) |0.829 (0.622, 1.044) |0.829 (0.625, 1.040) | -| 1.00|cov1 |0.854 (0.662, 1.046) |0.859 (0.668, 1.057) |0.852 (0.666, 1.038) | -| 0.00|cov2 |0.043 (-0.309, 0.395) |0.039 (-0.318, 0.382) |0.037 (-0.324, 0.386) | -| 0.25|cov2 |-0.009 (-0.273, 0.255) |-0.008 (-0.283, 0.252) |-0.012 (-0.279, 0.251) | -| 0.50|cov2 |0.009 (-0.213, 0.232) |0.007 (-0.218, 0.226) |0.008 (-0.217, 0.225) | -| 0.75|cov2 |0.023 (-0.173, 0.220) |0.024 (-0.172, 0.216) |0.022 (-0.171, 0.219) | -| 1.00|cov2 |0.033 (-0.144, 0.211) |0.033 (-0.145, 0.206) |0.034 (-0.148, 0.212) | -| 0.00|trt |1.256 (0.911, 1.601) |1.260 (0.911, 1.629) |1.260 (0.909, 1.620) | -| 0.25|trt |1.564 (1.306, 1.822) |1.563 (1.302, 1.816) |1.564 (1.306, 1.816) | -| 0.50|trt |1.622 (1.386, 1.859) |1.620 (1.383, 1.851) |1.620 (1.387, 1.850) | -| 0.75|trt |1.649 (1.422, 1.875) |1.646 (1.414, 1.871) |1.645 (1.414, 1.865) | -| 1.00|trt |1.664 (1.443, 1.884) |1.660 (1.438, 1.874) |1.661 (1.436, 1.875) | - - - - -```r +| 0.00|(Intercept) |1.930 (1.597, 2.263) |1.915 (1.572, 2.236) |1.913 (1.573, 2.230) | +| 0.25|(Intercept) |1.581 (1.323, 1.838) |1.567 (1.308, 1.814) |1.570 (1.313, 1.821) | +| 0.50|(Intercept) |1.473 (1.251, 1.695) |1.466 (1.247, 1.685) |1.466 (1.238, 1.681) | +| 0.75|(Intercept) |1.414 (1.215, 1.613) |1.407 (1.208, 1.601) |1.409 (1.203, 1.597) | +| 1.00|(Intercept) |1.376 (1.194, 1.558) |1.369 (1.183, 1.551) |1.372 (1.187, 1.550) | +| 0.00|cov1 |0.630 (0.300, 0.959) |0.630 (0.304, 0.960) |0.632 (0.306, 0.964) | +| 0.25|cov1 |0.722 (0.453, 0.991) |0.729 (0.459, 1.013) |0.726 (0.457, 0.996) | +| 0.50|cov1 |0.786 (0.552, 1.020) |0.789 (0.559, 1.026) |0.790 (0.558, 1.026) | +| 0.75|cov1 |0.827 (0.616, 1.037) |0.829 (0.622, 1.044) |0.828 (0.617, 1.042) | +| 1.00|cov1 |0.854 (0.662, 1.046) |0.859 (0.668, 1.057) |0.856 (0.661, 1.051) | +| 0.00|cov2 |0.043 (-0.309, 0.395) |0.039 (-0.318, 0.382) |0.038 (-0.318, 0.383) | +| 0.25|cov2 |-0.009 (-0.273, 0.255) |-0.008 (-0.283, 0.252) |-0.010 (-0.267, 0.247) | +| 0.50|cov2 |0.009 (-0.213, 0.232) |0.007 (-0.218, 0.226) |0.006 (-0.221, 0.232) | +| 0.75|cov2 |0.023 (-0.173, 0.220) |0.024 (-0.172, 0.216) |0.023 (-0.176, 0.217) | +| 1.00|cov2 |0.033 (-0.144, 0.211) |0.033 (-0.145, 0.206) |0.034 (-0.149, 0.214) | +| 0.00|trt |1.256 (0.911, 1.601) |1.260 (0.911, 1.629) |1.262 (0.913, 1.612) | +| 0.25|trt |1.564 (1.306, 1.822) |1.563 (1.302, 1.816) |1.566 (1.301, 1.821) | +| 0.50|trt |1.622 (1.386, 1.859) |1.620 (1.383, 1.851) |1.621 (1.381, 1.851) | +| 0.75|trt |1.649 (1.422, 1.875) |1.646 (1.414, 1.871) |1.646 (1.411, 1.876) | +| 1.00|trt |1.664 (1.443, 1.884) |1.660 (1.438, 1.874) |1.659 (1.435, 1.879) | + + + + +``` r res_df$borrowing_x <- res_df$borrowing + (as.numeric(factor(res_df$fitter)) - 3) / 100