From 4af86bb16f2e5a7a0143fddaafcec9bd9937b2d7 Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Mon, 30 Jan 2023 21:24:13 +0100 Subject: [PATCH 1/2] added function to consider a follow-up composition for prospective or change analyses --- DESCRIPTION | 2 +- R/check_input_args.R | 14 ++++-- R/plot_delta_comp.R | 70 ++++++++++++++++++++--------- R/predict_delta_comps.R | 91 ++++++++++++++++++++++++++++++++------ man/check_input_args.Rd | 4 +- man/plot_delta_comp.Rd | 9 +++- man/predict_delta_comps.Rd | 8 ++-- 7 files changed, 152 insertions(+), 46 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6acb9be..fbff30f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,4 +13,4 @@ LazyData: true Imports: compositions, ggplot2 Suggests: testthat -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.3 diff --git a/R/check_input_args.R b/R/check_input_args.R index 59180a8..d5b45de 100644 --- a/R/check_input_args.R +++ b/R/check_input_args.R @@ -6,6 +6,7 @@ #' @param dataf A \code{data.frame} containing data #' @param y Name (as string/character vector of length 1) of outcome variable in \code{dataf} #' @param comps Character vector of names of compositions in \code{dataf}. See details for more information. +#' @param comps_fup Character vector of names of compositions at follow up in \code{dataf}. See details for more information. #' @param covars Character vector of covariates names (non-comp variables) in \code{dataf} or NULL for none (default). #' @param deltas A vector of time-component changes (as proportions of compositions , i.e., values between -1 and 1). Optional. #' @export @@ -22,10 +23,7 @@ # # - - - -check_input_args <- function(dataf, y, comps, covars, deltas) { +check_input_args <- function(dataf, y, comps, comps_fup, covars, deltas) { if (!is.data.frame(dataf)) { @@ -46,6 +44,14 @@ check_input_args <- function(dataf, y, comps, covars, deltas) { stop("At least two compositional components are required to create an ilr linear regression.") } + if (!is_null_or_na(comps_fup)) { + if (!is.character(comps_fup)) { + stop("Please supply a character string of the compositional component column names in dataf.") + } else if (length(comps_fup) < 2) { + stop("At least two compositional components are required to create an ilr linear regression.") + } + } + if (!is_null_or_na(covars) & !is.character(covars)) { stop("Please supply a character string of the covariate column names in dataf (optionally NULL or NA for no covariates).") } diff --git a/R/plot_delta_comp.R b/R/plot_delta_comp.R index a70356f..388c532 100644 --- a/R/plot_delta_comp.R +++ b/R/plot_delta_comp.R @@ -8,9 +8,12 @@ globalVariables(c( #' #' @author Ty Stanford #' @description Provided the data (containing outcome, composiitional compoents and covariates), fit a ilr multiple linear regression model and provide predictions from reallocating compositional values pairwise amunsnst the components model. +#' #' @param dc_obj A \code{deltacomp_obj} object returned from the function \code{predict_delta_comps} #' @param comp_total A numeric scalar that is the original units of the composition to make the x-axis the original scale instead of in the range \code{[min(delta), max(delta)]} in (-1, 1). +#' @param graph.sys Graphing system within R. This defaults to "ggplot2" but can be one out of c("base","ggplot2"). #' @param units_lab Character string of the units of the compositions relating to \code{comp_total} to add to the x-axis label +#' #' @export #' @examples #' data(fairclough) @@ -52,7 +55,8 @@ globalVariables(c( -plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL) { +plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL, + graph.sys = c("base", "ggplot2")[2]) { if (!is_deltacomp_obj(dc_obj)) { stop("Input needs to be a deltacomp object. i.e., data.frame returned by predict_delta_comps().") @@ -90,27 +94,49 @@ plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL) { levels(dc_obj$`comp-`) <- paste0(comps, "-Delta") } - ggp <- - ggplot(dc_obj) + - geom_vline(xintercept = 0, col = "grey60") + - geom_hline(yintercept = 0, col = "grey60") + - geom_line(aes(x = delta, y = delta_pred, col = `comp+`)) + - geom_point(aes(x = delta, y = delta_pred, col = `comp+`), size = 1) + - geom_ribbon(aes(x = delta, ymin = ci_lo, ymax = ci_up, fill = `comp+`), alpha = 0.3) + - theme_bw() + - labs( - x = paste0("Change/delta in composition", x_lab_add), - y = paste0("Predicted change in outcome (", outc, ") with ", ci_lev, "% CI") - ) + - theme(legend.position = "none") - - - if (comparisons == "one-v-one") { - ggp <- ggp + - facet_grid(`comp-` ~ `comp+`, labeller = label_parsed) - } else { - ggp <- ggp + - facet_wrap(~ `comp+`, labeller = label_parsed) + if (graph.sys == "ggplot2") { + ggp <- + ggplot(dc_obj) + + geom_vline(xintercept = 0, col = "grey60") + + geom_hline(yintercept = 0, col = "grey60") + + geom_line(aes(x = delta, y = delta_pred, col = `comp+`)) + + geom_point(aes(x = delta, y = delta_pred, col = `comp+`), size = 1) + + geom_ribbon(aes(x = delta, ymin = ci_lo, ymax = ci_up, fill = `comp+`), alpha = 0.3) + + theme_bw() + + labs( + x = paste0("Change/delta in composition", x_lab_add), + y = paste0("Predicted change in outcome (", outc, ") with ", ci_lev, "% CI") + ) + + theme(legend.position = "none") + + + if (comparisons == "one-v-one") { + ggp <- ggp + + facet_grid(`comp-` ~ `comp+`, labeller = label_parsed) + } else { + ggp <- ggp + + facet_wrap(~ `comp+`, labeller = label_parsed) + } + } else if (graph.sys == "base") { + # to do.... + # PLOT + par(las = 1, mfrow = c(2,2)) + for (part in unique(dc_obj$`comp+`)) { + thisPlot = which(dc_obj$`comp+` == part) + plot(x = dc_obj$delta[thisPlot], y = dc_obj$delta_pred[thisPlot], + type = "o", pch = 19, + main = bquote(.(gsub("+Delta", "", part, fixed = T)) + Delta), + font.main=1, + col = colors[4], lwd = 3, + xlim = range(dc_obj$delta), + ylim = range(c(dc_obj$ci_lo, dc_obj$ci_up)), + ylab = paste0("Predicted change in outcome (", outc, ") with ", ci_lev, "% CI"), + xlab = paste0("Change/delta in composition", x_lab_add)) + polygon(x = c(dc_obj$delta[thisPlot], rev(dc_obj$delta[thisPlot])), + y = c(dc_obj$ci_lo[thisPlot], rev(dc_obj$ci_up[thisPlot])), + border = NA, col = myfunctions::add_alpha("red", alpha), fillOddEven = FALSE) + abline(h = 0) + } } return(ggp) diff --git a/R/predict_delta_comps.R b/R/predict_delta_comps.R index 52c8aca..15e385d 100644 --- a/R/predict_delta_comps.R +++ b/R/predict_delta_comps.R @@ -46,20 +46,25 @@ predict_delta_comps <- function( dataf, # data.frame of data y, # character name of outcome in dataf comps, # character vector of names of compositions in dataf + comps_fup = NULL, # character vector of names of compositions at follow up in dataf covars = NULL, # character vector of names of covariates (non-comp variables) in dataf - deltas = c(0, 10, 20) / (24 * 60), # changes in compositions to be computed pairwise + analysis_type = c("cross-sectional", "long_prospective", "long_change")[1], comparisons = c("prop-realloc", "one-v-one")[1], + deltas = c(0, 10, 20) / (24 * 60), # changes in compositions to be computed pairwise alpha = 0.05 ){ # perform some basic input checks - throws errors where input incorrect - check_input_args(dataf, y, comps, covars, deltas) + check_input_args(dataf, y, comps, comps_fup, covars, deltas) if (is_null_or_na(covars)) { # convert 0 length vecs and NAs to NULL covars <- NULL } - dataf <- rm_na_data_rows(dataf, c(y, comps, covars)) + if (is_null_or_na(comps_fup)) { # convert 0 length vecs and NAs to NULL + comps_fup <- NULL + } + dataf <- rm_na_data_rows(dataf, c(y, comps, comps_fup, covars)) # in case the data are much smaller after removing NAs - check_input_args(dataf, y, comps, covars, deltas) + check_input_args(dataf, y, comps, comps_fup, covars, deltas) check_strictly_positive_vals(dataf, comps) comparisons <- get_comp_type(comparisons) @@ -71,6 +76,7 @@ predict_delta_comps <- function( # standardise comps dataf <- standardise_comps(dataf, comps) + if (!is.null(comps_fup) & analysis_type != "cross-sectional") dataf <- standardise_comps(dataf, comps_fup) # get the mean of the compositions on the geometric scale mean_comps <- @@ -81,6 +87,16 @@ predict_delta_comps <- function( robust = FALSE ) + if (!is.null(comps_fup) & analysis_type != "cross-sectional") { + mean_comps_fup <- + compositions::mean.acomp( + compositions::acomp( + dataf[, comps_fup] + ), + robust = FALSE + ) + } + if(!all.equal(1, sum(mean_comps), tolerance = 1e-5)) stop("Calculated mean composition does not sum to 1") @@ -91,6 +107,9 @@ predict_delta_comps <- function( ### and covariates m_cov <- NULL mean_X <- as.data.frame(t(mean_comps)) + if (!is.null(comps_fup) & analysis_type != "cross-sectional") { + mean_X = cbind(mean_X, as.data.frame(t(mean_comps_fup))) + } if (n_covar > 0) { m_cov <- get_avg_covs(dataf, covars) # testing: @@ -107,32 +126,68 @@ predict_delta_comps <- function( # add ilr coords to dataset dataf <- append_ilr_coords(dataf, comps, psi) mean_X <- append_ilr_coords(mean_X, comps, psi) + if (!is.null(comps_fup) & analysis_type != "cross-sectional") { + dataf <- append_ilr_coords(dataf, comps_fup, psi) + mean_X <- append_ilr_coords(mean_X, comps_fup, psi) + + } # print(tibble::as_tibble(mean_X)) print_ilr_trans(comps) # print to console the ilr transformation for the user # create dataset X only consisting of outcome, ilr coords and covars ilr_names <- paste0("ilr", 1:(n_comp - 1)) + if (!is.null(comps_fup) & analysis_type != "cross-sectional") { + ilr_names_fup = paste0(ilr_names, "_fup") + colnames(dataf)[n_comp:(n_comp + (n_comp - 2))] = ilr_names_fup + colnames(mean_X)[n_comp:(n_comp + (n_comp - 2))] = ilr_names_fup + } # X <- dataf[, colnames(dataf) %in% c(y, ilr_names, covars)] X <- dataf[, c(y, ilr_names, covars)] # force order + if (!is.null(comps_fup) & analysis_type != "cross-sectional") { + if (analysis_type == "long_prospective") { + X <- dataf[, c(y, ilr_names, ilr_names_fup, covars)] # force order + covars = c(covars, ilr_names_fup) + } else if (analysis_type == "long_change") { + dataf[, ilr_names_fup] = dataf[, ilr_names_fup] - dataf[, ilr_names] + ilr_names_fup = gsub("_fup", "_ch", ilr_names_fup) + colnames(dataf) = gsub("_fup", "_ch", colnames(dataf)) + colnames(mean_X) = gsub("_fup", "_ch", colnames(mean_X)) + X <- dataf[, c(y, ilr_names_fup, ilr_names, covars)] # force order + covars = c(covars, ilr_names) + } + } # fit model lm_X <- fit_lm(y, X) # ANOVA of whether compositional variables are statistically significant collectively # note drop = FALSE in case no covariates and # stop one column from data.frame becoming vector - compare_two_lm(y, X[, c(y, covars), drop = FALSE], X[, c(y, covars, ilr_names)]) - + if (analysis_type == "cross-sectional" | analysis_type == "long_prospective") { + compare_two_lm(y, X[, c(y, covars), drop = FALSE], X[, c(y, covars, ilr_names)]) + } else if (analysis_type == "long_change") { + compare_two_lm(y, X[, c(y, covars), drop = FALSE], X[, c(y, covars, ilr_names_fup)]) + } mean_pred <- get_mean_pred(lm_X, mean_X, alpha = alpha) # extract linear model quantities required for further calculations lm_quants <- extract_lm_quantities(lm_X, alpha = alpha) - delta_mat <- get_delta_mat(deltas, comparisons, comps, mean_comps) - n_preds <- nrow(delta_mat) - poss_comps <- get_all_comparison_mat(deltas, comparisons, comps, mean_comps) - - m_comps <- matrix(rep(mean_comps, n_preds), nrow = n_preds, byrow = TRUE) - m_delta <- m_comps + delta_mat + if (analysis_type == "cross-sectional" | analysis_type == "long_prospective") { + delta_mat <- get_delta_mat(deltas, comparisons, comps, mean_comps) + n_preds <- nrow(delta_mat) + poss_comps <- get_all_comparison_mat(deltas, comparisons, comps, mean_comps) + + m_comps <- matrix(rep(mean_comps, n_preds), nrow = n_preds, byrow = TRUE) + m_delta <- m_comps + delta_mat + + } else if (analysis_type == "long_change") { + delta_mat <- get_delta_mat(deltas, comparisons, comps_fup, mean_comps_fup) + n_preds <- nrow(delta_mat) + poss_comps <- get_all_comparison_mat(deltas, comparisons, comps_fup, mean_comps_fup) + + m_comps <- matrix(rep(mean_comps_fup, n_preds), nrow = n_preds, byrow = TRUE) + m_delta <- m_comps + delta_mat + } m_delta_less_0 <- rowSums(m_delta < 0) if(any(m_delta_less_0 > 0)) { @@ -156,13 +211,21 @@ predict_delta_comps <- function( attr(ilr_delta, "class") <- NULL attr(ilr_means, "class") <- NULL - x0_star <- get_x0_star(lm_quants$dmX, n_preds, ilr_names, ilr_delta, ilr_means) + if (analysis_type == "cross-sectional" | analysis_type == "long_prospective") { + x0_star <- get_x0_star(lm_quants$dmX, n_preds, ilr_names, ilr_delta, ilr_means) + } else if (analysis_type == "long_change") { + x0_star <- get_x0_star(lm_quants$dmX, n_preds, ilr_names_fup, ilr_delta, ilr_means) + } y0_star <- x0_star %*% lm_quants$beta_hat se_y0_star <- get_se_y0_star(x0_star, lm_quants$s_e, lm_quants$XtX_inv) # get labels and deltas for reallocations - realloc_nms <- get_realloc_nms(comps, comparisons, poss_comps) + if (analysis_type == "cross-sectional" | analysis_type == "long_prospective") { + realloc_nms <- get_realloc_nms(comps, comparisons, poss_comps) + } else if (analysis_type == "long_change") { + realloc_nms <- get_realloc_nms(comps_fup, comparisons, poss_comps) + } delta_list <- get_pred_deltas(delta_mat, realloc_nms) diff --git a/man/check_input_args.Rd b/man/check_input_args.Rd index 339592d..a27bc6b 100644 --- a/man/check_input_args.Rd +++ b/man/check_input_args.Rd @@ -4,7 +4,7 @@ \alias{check_input_args} \title{Sanity checks for arguments passed to predict_delta_comps()} \usage{ -check_input_args(dataf, y, comps, covars, deltas) +check_input_args(dataf, y, comps, comps_fup, covars, deltas) } \arguments{ \item{dataf}{A \code{data.frame} containing data} @@ -13,6 +13,8 @@ check_input_args(dataf, y, comps, covars, deltas) \item{comps}{Character vector of names of compositions in \code{dataf}. See details for more information.} +\item{comps_fup}{Character vector of names of compositions at follow up in \code{dataf}. See details for more information.} + \item{covars}{Character vector of covariates names (non-comp variables) in \code{dataf} or NULL for none (default).} \item{deltas}{A vector of time-component changes (as proportions of compositions , i.e., values between -1 and 1). Optional.} diff --git a/man/plot_delta_comp.Rd b/man/plot_delta_comp.Rd index 58c976b..ed4769b 100644 --- a/man/plot_delta_comp.Rd +++ b/man/plot_delta_comp.Rd @@ -4,7 +4,12 @@ \alias{plot_delta_comp} \title{Get predictions from compositional ilr multiple linear regression model} \usage{ -plot_delta_comp(dc_obj, comp_total = NULL, units_lab = NULL) +plot_delta_comp( + dc_obj, + comp_total = NULL, + units_lab = NULL, + graph.sys = c("base", "ggplot2")[2] +) } \arguments{ \item{dc_obj}{A \code{deltacomp_obj} object returned from the function \code{predict_delta_comps}} @@ -12,6 +17,8 @@ plot_delta_comp(dc_obj, comp_total = NULL, units_lab = NULL) \item{comp_total}{A numeric scalar that is the original units of the composition to make the x-axis the original scale instead of in the range \code{[min(delta), max(delta)]} in (-1, 1).} \item{units_lab}{Character string of the units of the compositions relating to \code{comp_total} to add to the x-axis label} + +\item{graph.sys}{Graphing system within R. This defaults to "ggplot2" but can be one out of c("base","ggplot2").} } \description{ Provided the data (containing outcome, composiitional compoents and covariates), fit a ilr multiple linear regression model and provide predictions from reallocating compositional values pairwise amunsnst the components model. diff --git a/man/predict_delta_comps.Rd b/man/predict_delta_comps.Rd index dffbc85..0417e4e 100644 --- a/man/predict_delta_comps.Rd +++ b/man/predict_delta_comps.Rd @@ -8,9 +8,11 @@ predict_delta_comps( dataf, y, comps, + comps_fup = NULL, covars = NULL, - deltas = c(0, 10, 20)/(24 * 60), + analysis_type = c("cross-sectional", "long_prospective", "long_change")[1], comparisons = c("prop-realloc", "one-v-one")[1], + deltas = c(0, 10, 20)/(24 * 60), alpha = 0.05 ) } @@ -23,12 +25,12 @@ predict_delta_comps( \item{covars}{Optional. Character vector of covariates names (non-comp variables) in \code{dataf}. Defaults to NULL.} +\item{comparisons}{Currently two choices: \code{"one-v-one"} or \code{"prop-realloc"} (default). Please see details for explanation of these methods.} + \item{deltas}{A vector of time-component changes (as proportions of compositions , i.e., values between -1 and 1). Optional. Changes in compositions to be computed pairwise. Defaults to 0, 10 and 20 minutes as a proportion of the 1440 minutes in a day (i.e., approximately \code{0.000}, \code{0.007} and \code{0.014}).} -\item{comparisons}{Currently two choices: \code{"one-v-one"} or \code{"prop-realloc"} (default). Please see details for explanation of these methods.} - \item{alpha}{Optional. Level of significance. Defaults to 0.05.} } \description{ From 5777df2a9576c1bef3cb4ffe211ac82ecf06f218 Mon Sep 17 00:00:00 2001 From: Jairo H Migueles Date: Tue, 31 Jan 2023 18:25:04 +0100 Subject: [PATCH 2/2] added functionality to predict_delta_comps to build models with 2 compositions (baseline and follow-up) --- R/plot_delta_comp.R | 80 ++++-------- R/predict_delta_comps.R | 114 ++++++++---------- man/plot_delta_comp.Rd | 9 +- man/predict_delta_comps.Rd | 11 +- tests/testthat/test_check_input_args.R | 18 ++- .../test_check_strictly_positive_vals.R | 8 +- tests/testthat/test_rm_na_data_rows.R | 6 +- 7 files changed, 103 insertions(+), 143 deletions(-) diff --git a/R/plot_delta_comp.R b/R/plot_delta_comp.R index 388c532..6c1408e 100644 --- a/R/plot_delta_comp.R +++ b/R/plot_delta_comp.R @@ -8,12 +8,9 @@ globalVariables(c( #' #' @author Ty Stanford #' @description Provided the data (containing outcome, composiitional compoents and covariates), fit a ilr multiple linear regression model and provide predictions from reallocating compositional values pairwise amunsnst the components model. -#' #' @param dc_obj A \code{deltacomp_obj} object returned from the function \code{predict_delta_comps} #' @param comp_total A numeric scalar that is the original units of the composition to make the x-axis the original scale instead of in the range \code{[min(delta), max(delta)]} in (-1, 1). -#' @param graph.sys Graphing system within R. This defaults to "ggplot2" but can be one out of c("base","ggplot2"). #' @param units_lab Character string of the units of the compositions relating to \code{comp_total} to add to the x-axis label -#' #' @export #' @examples #' data(fairclough) @@ -55,9 +52,8 @@ globalVariables(c( -plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL, - graph.sys = c("base", "ggplot2")[2]) { - +plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL) { + if (!is_deltacomp_obj(dc_obj)) { stop("Input needs to be a deltacomp object. i.e., data.frame returned by predict_delta_comps().") } @@ -94,56 +90,30 @@ plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL, levels(dc_obj$`comp-`) <- paste0(comps, "-Delta") } - if (graph.sys == "ggplot2") { - ggp <- - ggplot(dc_obj) + - geom_vline(xintercept = 0, col = "grey60") + - geom_hline(yintercept = 0, col = "grey60") + - geom_line(aes(x = delta, y = delta_pred, col = `comp+`)) + - geom_point(aes(x = delta, y = delta_pred, col = `comp+`), size = 1) + - geom_ribbon(aes(x = delta, ymin = ci_lo, ymax = ci_up, fill = `comp+`), alpha = 0.3) + - theme_bw() + - labs( - x = paste0("Change/delta in composition", x_lab_add), - y = paste0("Predicted change in outcome (", outc, ") with ", ci_lev, "% CI") - ) + - theme(legend.position = "none") - - - if (comparisons == "one-v-one") { - ggp <- ggp + - facet_grid(`comp-` ~ `comp+`, labeller = label_parsed) - } else { - ggp <- ggp + - facet_wrap(~ `comp+`, labeller = label_parsed) - } - } else if (graph.sys == "base") { - # to do.... - # PLOT - par(las = 1, mfrow = c(2,2)) - for (part in unique(dc_obj$`comp+`)) { - thisPlot = which(dc_obj$`comp+` == part) - plot(x = dc_obj$delta[thisPlot], y = dc_obj$delta_pred[thisPlot], - type = "o", pch = 19, - main = bquote(.(gsub("+Delta", "", part, fixed = T)) + Delta), - font.main=1, - col = colors[4], lwd = 3, - xlim = range(dc_obj$delta), - ylim = range(c(dc_obj$ci_lo, dc_obj$ci_up)), - ylab = paste0("Predicted change in outcome (", outc, ") with ", ci_lev, "% CI"), - xlab = paste0("Change/delta in composition", x_lab_add)) - polygon(x = c(dc_obj$delta[thisPlot], rev(dc_obj$delta[thisPlot])), - y = c(dc_obj$ci_lo[thisPlot], rev(dc_obj$ci_up[thisPlot])), - border = NA, col = myfunctions::add_alpha("red", alpha), fillOddEven = FALSE) - abline(h = 0) - } + ggp <- + ggplot(dc_obj) + + geom_vline(xintercept = 0, col = "grey60") + + geom_hline(yintercept = 0, col = "grey60") + + geom_line(aes(x = delta, y = delta_pred, col = `comp+`)) + + geom_point(aes(x = delta, y = delta_pred, col = `comp+`), size = 1) + + geom_ribbon(aes(x = delta, ymin = ci_lo, ymax = ci_up, fill = `comp+`), alpha = 0.3) + + theme_bw() + + labs( + x = paste0("Change/delta in composition", x_lab_add), + y = paste0("Predicted change in outcome (", outc, ") with ", ci_lev, "% CI") + ) + + theme(legend.position = "none") + + + if (comparisons == "one-v-one") { + ggp <- ggp + + facet_grid(`comp-` ~ `comp+`, labeller = label_parsed) + } else { + ggp <- ggp + + facet_wrap(~ `comp+`, labeller = label_parsed) } return(ggp) - - + + } - - - - diff --git a/R/predict_delta_comps.R b/R/predict_delta_comps.R index 15e385d..8d069d0 100644 --- a/R/predict_delta_comps.R +++ b/R/predict_delta_comps.R @@ -4,11 +4,13 @@ #' @description Provided the data (containing outcome, compositional components and covariates), fit a ilr multiple linear regression model and provide predictions from reallocating compositional values pairwise amunsnst the components model. #' @param dataf A \code{data.frame} containing data #' @param y Name (as string/character vector of length 1) of outcome variable in \code{dataf} -#' @param comps Character vector of names of compositions in \code{dataf}. See details for more information. +#' @param comps Optional character vector of names of compositions in \code{dataf}. See details for more information. +#' @param comps_fup Character vector of names of compositions at follow-up in \code{dataf}. See details for more information. #' @param covars Optional. Character vector of covariates names (non-comp variables) in \code{dataf}. Defaults to NULL. #' @param deltas A vector of time-component changes (as proportions of compositions , i.e., values between -1 and 1). Optional. #' Changes in compositions to be computed pairwise. Defaults to 0, 10 and 20 minutes as a proportion of the 1440 minutes #' in a day (i.e., approximately \code{0.000}, \code{0.007} and \code{0.014}). +#' @param analysis_type Currently two choices: \code{"cross-sectional"} (default) or \code{"longitudinal"}. Please see details for explanation of these methods. #' @param comparisons Currently two choices: \code{"one-v-one"} or \code{"prop-realloc"} (default). Please see details for explanation of these methods. #' @param alpha Optional. Level of significance. Defaults to 0.05. #' @export @@ -19,6 +21,9 @@ #' Please see the \code{deltacomp} package \code{README.md} file for examples and explanation of the \code{comparisons = "prop-realloc"} and \code{comparisons = "one-v-one"} options. #' #' Note from version 0.1.0 to current version, \code{comparisons == "one-v-all"} is depreciated, \code{comparisons == "prop-realloc"} is probably the alternative you are after. +#' +#' \code{analysis_type == "longitudinal"} expects a second measurement of the composition (\code{comps_fup}). If longitudinal, the ilr coordinates of the follow-up composition will be calculated and used as covariates to adjust the regression model. +#' #' @examples #' predict_delta_comps( #' dataf = fat_data, @@ -40,20 +45,19 @@ #' comparisons = "one-v-one", #' alpha = 0.05 #' ) -#' predict_delta_comps <- function( - dataf, # data.frame of data - y, # character name of outcome in dataf - comps, # character vector of names of compositions in dataf - comps_fup = NULL, # character vector of names of compositions at follow up in dataf - covars = NULL, # character vector of names of covariates (non-comp variables) in dataf - analysis_type = c("cross-sectional", "long_prospective", "long_change")[1], - comparisons = c("prop-realloc", "one-v-one")[1], - deltas = c(0, 10, 20) / (24 * 60), # changes in compositions to be computed pairwise - alpha = 0.05 + dataf, # data.frame of data + y, # character name of outcome in dataf + comps, # character vector of names of compositions in dataf + comps_fup = NULL, # character vector of names of compositions at follow up in dataf + covars = NULL, # character vector of names of covariates (non-comp variables) in dataf + analysis_type = c("cross-sectional", "longitudinal")[1], + comparisons = c("prop-realloc", "one-v-one")[1], + deltas = c(0, 10, 20) / (24 * 60), # changes in compositions to be computed pairwise + alpha = 0.05 ){ - + # perform some basic input checks - throws errors where input incorrect check_input_args(dataf, y, comps, comps_fup, covars, deltas) if (is_null_or_na(covars)) { # convert 0 length vecs and NAs to NULL @@ -67,7 +71,7 @@ predict_delta_comps <- function( check_input_args(dataf, y, comps, comps_fup, covars, deltas) check_strictly_positive_vals(dataf, comps) comparisons <- get_comp_type(comparisons) - + # set up function constants n <- nrow(dataf) n_comp <- length(comps) @@ -76,7 +80,7 @@ predict_delta_comps <- function( # standardise comps dataf <- standardise_comps(dataf, comps) - if (!is.null(comps_fup) & analysis_type != "cross-sectional") dataf <- standardise_comps(dataf, comps_fup) + if (!is.null(comps_fup) & analysis_type == "longitudinal") dataf <- standardise_comps(dataf, comps_fup) # get the mean of the compositions on the geometric scale mean_comps <- @@ -117,7 +121,7 @@ predict_delta_comps <- function( mean_X <- cbind(mean_X, m_cov) # print(tibble::as_tibble(mean_X)) } - + # The ilr basis matrix psi <- create_v_mat(n_comp) ### previously: @@ -126,35 +130,32 @@ predict_delta_comps <- function( # add ilr coords to dataset dataf <- append_ilr_coords(dataf, comps, psi) mean_X <- append_ilr_coords(mean_X, comps, psi) - if (!is.null(comps_fup) & analysis_type != "cross-sectional") { + + if (!is.null(comps_fup) & analysis_type == "longitudinal") { + # change baseline ilr names to avoid confusion + colnames(dataf)[1:(n_comp - 1)] = paste0(colnames(dataf)[1:(n_comp - 1)], "_bl") + colnames(mean_X)[1:(n_comp - 1)] = paste0(colnames(mean_X)[1:(n_comp - 1)], "_bl") dataf <- append_ilr_coords(dataf, comps_fup, psi) mean_X <- append_ilr_coords(mean_X, comps_fup, psi) - + # change baseline ilr names to avoid confusion + colnames(dataf)[1:(n_comp - 1)] = paste0(colnames(dataf)[1:(n_comp - 1)], "_fup") + colnames(mean_X)[1:(n_comp - 1)] = paste0(colnames(mean_X)[1:(n_comp - 1)], "_fup") + # reset baseline ilr names + colnames(dataf)[(n_comp):(n_comp + (n_comp - 2))] = gsub("_bl", "", colnames(dataf)[(n_comp):(n_comp + (n_comp - 2))]) + colnames(mean_X)[(n_comp):(n_comp + (n_comp - 2))] = gsub("_bl", "", colnames(mean_X)[(n_comp):(n_comp + (n_comp - 2))]) } # print(tibble::as_tibble(mean_X)) print_ilr_trans(comps) # print to console the ilr transformation for the user # create dataset X only consisting of outcome, ilr coords and covars ilr_names <- paste0("ilr", 1:(n_comp - 1)) - if (!is.null(comps_fup) & analysis_type != "cross-sectional") { - ilr_names_fup = paste0(ilr_names, "_fup") - colnames(dataf)[n_comp:(n_comp + (n_comp - 2))] = ilr_names_fup - colnames(mean_X)[n_comp:(n_comp + (n_comp - 2))] = ilr_names_fup - } + # X <- dataf[, colnames(dataf) %in% c(y, ilr_names, covars)] X <- dataf[, c(y, ilr_names, covars)] # force order - if (!is.null(comps_fup) & analysis_type != "cross-sectional") { - if (analysis_type == "long_prospective") { - X <- dataf[, c(y, ilr_names, ilr_names_fup, covars)] # force order - covars = c(covars, ilr_names_fup) - } else if (analysis_type == "long_change") { - dataf[, ilr_names_fup] = dataf[, ilr_names_fup] - dataf[, ilr_names] - ilr_names_fup = gsub("_fup", "_ch", ilr_names_fup) - colnames(dataf) = gsub("_fup", "_ch", colnames(dataf)) - colnames(mean_X) = gsub("_fup", "_ch", colnames(mean_X)) - X <- dataf[, c(y, ilr_names_fup, ilr_names, covars)] # force order - covars = c(covars, ilr_names) - } + if (!is.null(comps_fup) & analysis_type == "longitudinal") { + ilr_names_fup = paste0(ilr_names, "_fup") + X <- dataf[, c(y, ilr_names, ilr_names_fup, covars)] # force order + covars = c(covars, ilr_names_fup) } # fit model lm_X <- fit_lm(y, X) @@ -162,32 +163,19 @@ predict_delta_comps <- function( # ANOVA of whether compositional variables are statistically significant collectively # note drop = FALSE in case no covariates and # stop one column from data.frame becoming vector - if (analysis_type == "cross-sectional" | analysis_type == "long_prospective") { - compare_two_lm(y, X[, c(y, covars), drop = FALSE], X[, c(y, covars, ilr_names)]) - } else if (analysis_type == "long_change") { - compare_two_lm(y, X[, c(y, covars), drop = FALSE], X[, c(y, covars, ilr_names_fup)]) - } + compare_two_lm(y, X[, c(y, covars), drop = FALSE], X[, c(y, covars, ilr_names)]) mean_pred <- get_mean_pred(lm_X, mean_X, alpha = alpha) # extract linear model quantities required for further calculations lm_quants <- extract_lm_quantities(lm_X, alpha = alpha) - if (analysis_type == "cross-sectional" | analysis_type == "long_prospective") { - delta_mat <- get_delta_mat(deltas, comparisons, comps, mean_comps) - n_preds <- nrow(delta_mat) - poss_comps <- get_all_comparison_mat(deltas, comparisons, comps, mean_comps) - - m_comps <- matrix(rep(mean_comps, n_preds), nrow = n_preds, byrow = TRUE) - m_delta <- m_comps + delta_mat - - } else if (analysis_type == "long_change") { - delta_mat <- get_delta_mat(deltas, comparisons, comps_fup, mean_comps_fup) - n_preds <- nrow(delta_mat) - poss_comps <- get_all_comparison_mat(deltas, comparisons, comps_fup, mean_comps_fup) - - m_comps <- matrix(rep(mean_comps_fup, n_preds), nrow = n_preds, byrow = TRUE) - m_delta <- m_comps + delta_mat - } + delta_mat <- get_delta_mat(deltas, comparisons, comps, mean_comps) + n_preds <- nrow(delta_mat) + poss_comps <- get_all_comparison_mat(deltas, comparisons, comps, mean_comps) + + m_comps <- matrix(rep(mean_comps, n_preds), nrow = n_preds, byrow = TRUE) + m_delta <- m_comps + delta_mat + m_delta_less_0 <- rowSums(m_delta < 0) if(any(m_delta_less_0 > 0)) { @@ -211,21 +199,15 @@ predict_delta_comps <- function( attr(ilr_delta, "class") <- NULL attr(ilr_means, "class") <- NULL - if (analysis_type == "cross-sectional" | analysis_type == "long_prospective") { - x0_star <- get_x0_star(lm_quants$dmX, n_preds, ilr_names, ilr_delta, ilr_means) - } else if (analysis_type == "long_change") { - x0_star <- get_x0_star(lm_quants$dmX, n_preds, ilr_names_fup, ilr_delta, ilr_means) - } + x0_star <- get_x0_star(lm_quants$dmX, n_preds, ilr_names, ilr_delta, ilr_means) + y0_star <- x0_star %*% lm_quants$beta_hat se_y0_star <- get_se_y0_star(x0_star, lm_quants$s_e, lm_quants$XtX_inv) # get labels and deltas for reallocations - if (analysis_type == "cross-sectional" | analysis_type == "long_prospective") { - realloc_nms <- get_realloc_nms(comps, comparisons, poss_comps) - } else if (analysis_type == "long_change") { - realloc_nms <- get_realloc_nms(comps_fup, comparisons, poss_comps) - } + realloc_nms <- get_realloc_nms(comps, comparisons, poss_comps) + delta_list <- get_pred_deltas(delta_mat, realloc_nms) @@ -260,5 +242,5 @@ predict_delta_comps <- function( attr(ret_obj, "mean_pred") <- mean_pred return(ret_obj) - + } diff --git a/man/plot_delta_comp.Rd b/man/plot_delta_comp.Rd index ed4769b..58c976b 100644 --- a/man/plot_delta_comp.Rd +++ b/man/plot_delta_comp.Rd @@ -4,12 +4,7 @@ \alias{plot_delta_comp} \title{Get predictions from compositional ilr multiple linear regression model} \usage{ -plot_delta_comp( - dc_obj, - comp_total = NULL, - units_lab = NULL, - graph.sys = c("base", "ggplot2")[2] -) +plot_delta_comp(dc_obj, comp_total = NULL, units_lab = NULL) } \arguments{ \item{dc_obj}{A \code{deltacomp_obj} object returned from the function \code{predict_delta_comps}} @@ -17,8 +12,6 @@ plot_delta_comp( \item{comp_total}{A numeric scalar that is the original units of the composition to make the x-axis the original scale instead of in the range \code{[min(delta), max(delta)]} in (-1, 1).} \item{units_lab}{Character string of the units of the compositions relating to \code{comp_total} to add to the x-axis label} - -\item{graph.sys}{Graphing system within R. This defaults to "ggplot2" but can be one out of c("base","ggplot2").} } \description{ Provided the data (containing outcome, composiitional compoents and covariates), fit a ilr multiple linear regression model and provide predictions from reallocating compositional values pairwise amunsnst the components model. diff --git a/man/predict_delta_comps.Rd b/man/predict_delta_comps.Rd index 0417e4e..e20f0ca 100644 --- a/man/predict_delta_comps.Rd +++ b/man/predict_delta_comps.Rd @@ -10,7 +10,7 @@ predict_delta_comps( comps, comps_fup = NULL, covars = NULL, - analysis_type = c("cross-sectional", "long_prospective", "long_change")[1], + analysis_type = c("cross-sectional", "longitudinal")[1], comparisons = c("prop-realloc", "one-v-one")[1], deltas = c(0, 10, 20)/(24 * 60), alpha = 0.05 @@ -21,10 +21,14 @@ predict_delta_comps( \item{y}{Name (as string/character vector of length 1) of outcome variable in \code{dataf}} -\item{comps}{Character vector of names of compositions in \code{dataf}. See details for more information.} +\item{comps}{Optional character vector of names of compositions in \code{dataf}. See details for more information.} + +\item{comps_fup}{Character vector of names of compositions at follow-up in \code{dataf}. See details for more information.} \item{covars}{Optional. Character vector of covariates names (non-comp variables) in \code{dataf}. Defaults to NULL.} +\item{analysis_type}{Currently two choices: \code{"cross-sectional"} (default) or \code{"longitudinal"}. Please see details for explanation of these methods.} + \item{comparisons}{Currently two choices: \code{"one-v-one"} or \code{"prop-realloc"} (default). Please see details for explanation of these methods.} \item{deltas}{A vector of time-component changes (as proportions of compositions , i.e., values between -1 and 1). Optional. @@ -43,6 +47,8 @@ values as the function normalises the compositions row-wise to sum to 1 in part Please see the \code{deltacomp} package \code{README.md} file for examples and explanation of the \code{comparisons = "prop-realloc"} and \code{comparisons = "one-v-one"} options. Note from version 0.1.0 to current version, \code{comparisons == "one-v-all"} is depreciated, \code{comparisons == "prop-realloc"} is probably the alternative you are after. + +\code{analysis_type == "longitudinal"} expects a second measurement of the composition (\code{comps_fup}). If longitudinal, the ilr coordinates of the follow-up composition will be calculated and used as covariates to adjust the regression model. } \examples{ predict_delta_comps( @@ -64,7 +70,6 @@ predict_delta_comps( comparisons = "one-v-one", alpha = 0.05 ) - } \author{ Ty Stanford diff --git a/tests/testthat/test_check_input_args.R b/tests/testthat/test_check_input_args.R index 5a30779..ee97fdd 100644 --- a/tests/testthat/test_check_input_args.R +++ b/tests/testthat/test_check_input_args.R @@ -57,30 +57,36 @@ test_that("predict_delta_comps() correctly throws errors via check_input_args() ) expect_error( - predict_delta_comps(dataf = fairclough, fa_y, "sed", fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough, y = fa_y, comps = "sed", covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "At least two compositional components" ) expect_error( - predict_delta_comps(dataf = fairclough, fa_y, fa_comps, 1, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = 1, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "Please supply a character string of the covariate" ) expect_error( - predict_delta_comps(dataf = fairclough, fa_y, fa_comps, TRUE, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = TRUE, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "Please supply a character string of the covariate" ) expect_error( - predict_delta_comps(dataf = fairclough, fa_y, fa_comps, fa_covars, -1.1, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = -1.1, comparisons = fa_comparisons, alpha = fa_alpha), "deltas must be specified as positive and negative proportions" ) expect_error( - predict_delta_comps(dataf = fairclough, fa_y, fa_comps, fa_covars, +1.1, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = +1.1, comparisons = fa_comparisons, alpha = fa_alpha), "deltas must be specified as positive and negative proportions" ) expect_error( - predict_delta_comps(dataf = fairclough[1:6, ], fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = fairclough[1:6,], y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "The number of rows.*" ) diff --git a/tests/testthat/test_check_strictly_positive_vals.R b/tests/testthat/test_check_strictly_positive_vals.R index ae4f463..ab944f5 100644 --- a/tests/testthat/test_check_strictly_positive_vals.R +++ b/tests/testthat/test_check_strictly_positive_vals.R @@ -38,17 +38,19 @@ test_that("check_strictly_positive_vals() correctly throws errors for bad input" "Values less than" ) expect_error( - predict_delta_comps(dataf = f2, fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = f2, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "Values less than" ) expect_error( - check_strictly_positive_vals(dataf = f3, fa_comps), + check_strictly_positive_vals(dataf = f3, comps = fa_comps), "Values less than" ) expect_error( - predict_delta_comps(dataf = f3, fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = f3, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "Values less than" ) diff --git a/tests/testthat/test_rm_na_data_rows.R b/tests/testthat/test_rm_na_data_rows.R index 8f6141a..05f4002 100644 --- a/tests/testthat/test_rm_na_data_rows.R +++ b/tests/testthat/test_rm_na_data_rows.R @@ -36,7 +36,8 @@ test_that("rm_na_data_rows() correctly removes NAs", { f1[-c(5, 10, 6, 2), c(fa_y, fa_comps, fa_covars)] ) expect_error( - predict_delta_comps(dataf = f1, fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = f1, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "The number of rows in dataf" ) @@ -45,7 +46,8 @@ test_that("rm_na_data_rows() correctly removes NAs", { f2[0, c(fa_y, fa_comps, fa_covars)] ) expect_error( - predict_delta_comps(dataf = f2, fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha), + predict_delta_comps(dataf = f2, y = fa_y, comps = fa_comps, covars = fa_covars, + deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha), "dataf supplied must" )