Skip to content

Added functionality to build models with two compositions (baseline and follow-up) #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ LazyData: true
Imports: compositions,
ggplot2
Suggests: testthat
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
14 changes: 10 additions & 4 deletions R/check_input_args.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#' @param dataf A \code{data.frame} containing data
#' @param y Name (as string/character vector of length 1) of outcome variable in \code{dataf}
#' @param comps Character vector of names of compositions in \code{dataf}. See details for more information.
#' @param comps_fup Character vector of names of compositions at follow up in \code{dataf}. See details for more information.
#' @param covars Character vector of covariates names (non-comp variables) in \code{dataf} or NULL for none (default).
#' @param deltas A vector of time-component changes (as proportions of compositions , i.e., values between -1 and 1). Optional.
#' @export
Expand All @@ -22,10 +23,7 @@
#
#




check_input_args <- function(dataf, y, comps, covars, deltas) {
check_input_args <- function(dataf, y, comps, comps_fup, covars, deltas) {


if (!is.data.frame(dataf)) {
Expand All @@ -46,6 +44,14 @@ check_input_args <- function(dataf, y, comps, covars, deltas) {
stop("At least two compositional components are required to create an ilr linear regression.")
}

if (!is_null_or_na(comps_fup)) {
if (!is.character(comps_fup)) {
stop("Please supply a character string of the compositional component column names in dataf.")
} else if (length(comps_fup) < 2) {
stop("At least two compositional components are required to create an ilr linear regression.")
}
}

if (!is_null_or_na(covars) & !is.character(covars)) {
stop("Please supply a character string of the covariate column names in dataf (optionally NULL or NA for no covariates).")
}
Expand Down
12 changes: 4 additions & 8 deletions R/plot_delta_comp.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ globalVariables(c(


plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL) {

if (!is_deltacomp_obj(dc_obj)) {
stop("Input needs to be a deltacomp object. i.e., data.frame returned by predict_delta_comps().")
}
Expand Down Expand Up @@ -104,7 +104,7 @@ plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL) {
) +
theme(legend.position = "none")


if (comparisons == "one-v-one") {
ggp <- ggp +
facet_grid(`comp-` ~ `comp+`, labeller = label_parsed)
Expand All @@ -114,10 +114,6 @@ plot_delta_comp <- function(dc_obj, comp_total = NULL, units_lab = NULL) {
}

return(ggp)


}




79 changes: 62 additions & 17 deletions R/predict_delta_comps.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
#' @description Provided the data (containing outcome, compositional components and covariates), fit a ilr multiple linear regression model and provide predictions from reallocating compositional values pairwise amunsnst the components model.
#' @param dataf A \code{data.frame} containing data
#' @param y Name (as string/character vector of length 1) of outcome variable in \code{dataf}
#' @param comps Character vector of names of compositions in \code{dataf}. See details for more information.
#' @param comps Optional character vector of names of compositions in \code{dataf}. See details for more information.
#' @param comps_fup Character vector of names of compositions at follow-up in \code{dataf}. See details for more information.
#' @param covars Optional. Character vector of covariates names (non-comp variables) in \code{dataf}. Defaults to NULL.
#' @param deltas A vector of time-component changes (as proportions of compositions , i.e., values between -1 and 1). Optional.
#' Changes in compositions to be computed pairwise. Defaults to 0, 10 and 20 minutes as a proportion of the 1440 minutes
#' in a day (i.e., approximately \code{0.000}, \code{0.007} and \code{0.014}).
#' @param analysis_type Currently two choices: \code{"cross-sectional"} (default) or \code{"longitudinal"}. Please see details for explanation of these methods.
#' @param comparisons Currently two choices: \code{"one-v-one"} or \code{"prop-realloc"} (default). Please see details for explanation of these methods.
#' @param alpha Optional. Level of significance. Defaults to 0.05.
#' @export
Expand All @@ -19,6 +21,9 @@
#' Please see the \code{deltacomp} package \code{README.md} file for examples and explanation of the \code{comparisons = "prop-realloc"} and \code{comparisons = "one-v-one"} options.
#'
#' Note from version 0.1.0 to current version, \code{comparisons == "one-v-all"} is depreciated, \code{comparisons == "prop-realloc"} is probably the alternative you are after.
#'
#' \code{analysis_type == "longitudinal"} expects a second measurement of the composition (\code{comps_fup}). If longitudinal, the ilr coordinates of the follow-up composition will be calculated and used as covariates to adjust the regression model.
#'
#' @examples
#' predict_delta_comps(
#' dataf = fat_data,
Expand All @@ -40,29 +45,33 @@
#' comparisons = "one-v-one",
#' alpha = 0.05
#' )
#'

predict_delta_comps <- function(
dataf, # data.frame of data
y, # character name of outcome in dataf
comps, # character vector of names of compositions in dataf
covars = NULL, # character vector of names of covariates (non-comp variables) in dataf
deltas = c(0, 10, 20) / (24 * 60), # changes in compositions to be computed pairwise
comparisons = c("prop-realloc", "one-v-one")[1],
alpha = 0.05
dataf, # data.frame of data
y, # character name of outcome in dataf
comps, # character vector of names of compositions in dataf
comps_fup = NULL, # character vector of names of compositions at follow up in dataf
covars = NULL, # character vector of names of covariates (non-comp variables) in dataf
analysis_type = c("cross-sectional", "longitudinal")[1],
comparisons = c("prop-realloc", "one-v-one")[1],
deltas = c(0, 10, 20) / (24 * 60), # changes in compositions to be computed pairwise
alpha = 0.05
){

# perform some basic input checks - throws errors where input incorrect
check_input_args(dataf, y, comps, covars, deltas)
check_input_args(dataf, y, comps, comps_fup, covars, deltas)
if (is_null_or_na(covars)) { # convert 0 length vecs and NAs to NULL
covars <- NULL
}
dataf <- rm_na_data_rows(dataf, c(y, comps, covars))
if (is_null_or_na(comps_fup)) { # convert 0 length vecs and NAs to NULL
comps_fup <- NULL
}
dataf <- rm_na_data_rows(dataf, c(y, comps, comps_fup, covars))
# in case the data are much smaller after removing NAs
check_input_args(dataf, y, comps, covars, deltas)
check_input_args(dataf, y, comps, comps_fup, covars, deltas)
check_strictly_positive_vals(dataf, comps)
comparisons <- get_comp_type(comparisons)

# set up function constants
n <- nrow(dataf)
n_comp <- length(comps)
Expand All @@ -71,6 +80,7 @@ predict_delta_comps <- function(

# standardise comps
dataf <- standardise_comps(dataf, comps)
if (!is.null(comps_fup) & analysis_type == "longitudinal") dataf <- standardise_comps(dataf, comps_fup)

# get the mean of the compositions on the geometric scale
mean_comps <-
Expand All @@ -81,6 +91,16 @@ predict_delta_comps <- function(
robust = FALSE
)

if (!is.null(comps_fup) & analysis_type != "cross-sectional") {
mean_comps_fup <-
compositions::mean.acomp(
compositions::acomp(
dataf[, comps_fup]
),
robust = FALSE
)
}

if(!all.equal(1, sum(mean_comps), tolerance = 1e-5))
stop("Calculated mean composition does not sum to 1")

Expand All @@ -91,14 +111,17 @@ predict_delta_comps <- function(
### and covariates
m_cov <- NULL
mean_X <- as.data.frame(t(mean_comps))
if (!is.null(comps_fup) & analysis_type != "cross-sectional") {
mean_X = cbind(mean_X, as.data.frame(t(mean_comps_fup)))
}
if (n_covar > 0) {
m_cov <- get_avg_covs(dataf, covars)
# testing:
# print(tibble::as_tibble(m_cov))
mean_X <- cbind(mean_X, m_cov)
# print(tibble::as_tibble(mean_X))
}

# The ilr basis matrix
psi <- create_v_mat(n_comp)
### previously:
Expand All @@ -107,13 +130,33 @@ predict_delta_comps <- function(
# add ilr coords to dataset
dataf <- append_ilr_coords(dataf, comps, psi)
mean_X <- append_ilr_coords(mean_X, comps, psi)

if (!is.null(comps_fup) & analysis_type == "longitudinal") {
# change baseline ilr names to avoid confusion
colnames(dataf)[1:(n_comp - 1)] = paste0(colnames(dataf)[1:(n_comp - 1)], "_bl")
colnames(mean_X)[1:(n_comp - 1)] = paste0(colnames(mean_X)[1:(n_comp - 1)], "_bl")
dataf <- append_ilr_coords(dataf, comps_fup, psi)
mean_X <- append_ilr_coords(mean_X, comps_fup, psi)
# change baseline ilr names to avoid confusion
colnames(dataf)[1:(n_comp - 1)] = paste0(colnames(dataf)[1:(n_comp - 1)], "_fup")
colnames(mean_X)[1:(n_comp - 1)] = paste0(colnames(mean_X)[1:(n_comp - 1)], "_fup")
# reset baseline ilr names
colnames(dataf)[(n_comp):(n_comp + (n_comp - 2))] = gsub("_bl", "", colnames(dataf)[(n_comp):(n_comp + (n_comp - 2))])
colnames(mean_X)[(n_comp):(n_comp + (n_comp - 2))] = gsub("_bl", "", colnames(mean_X)[(n_comp):(n_comp + (n_comp - 2))])
}
# print(tibble::as_tibble(mean_X))
print_ilr_trans(comps) # print to console the ilr transformation for the user

# create dataset X only consisting of outcome, ilr coords and covars
ilr_names <- paste0("ilr", 1:(n_comp - 1))

# X <- dataf[, colnames(dataf) %in% c(y, ilr_names, covars)]
X <- dataf[, c(y, ilr_names, covars)] # force order
if (!is.null(comps_fup) & analysis_type == "longitudinal") {
ilr_names_fup = paste0(ilr_names, "_fup")
X <- dataf[, c(y, ilr_names, ilr_names_fup, covars)] # force order
covars = c(covars, ilr_names_fup)
}
# fit model
lm_X <- fit_lm(y, X)

Expand All @@ -122,7 +165,6 @@ predict_delta_comps <- function(
# stop one column from data.frame becoming vector
compare_two_lm(y, X[, c(y, covars), drop = FALSE], X[, c(y, covars, ilr_names)])


mean_pred <- get_mean_pred(lm_X, mean_X, alpha = alpha)
# extract linear model quantities required for further calculations
lm_quants <- extract_lm_quantities(lm_X, alpha = alpha)
Expand All @@ -134,6 +176,7 @@ predict_delta_comps <- function(
m_comps <- matrix(rep(mean_comps, n_preds), nrow = n_preds, byrow = TRUE)
m_delta <- m_comps + delta_mat


m_delta_less_0 <- rowSums(m_delta < 0)
if(any(m_delta_less_0 > 0)) {
warning(
Expand All @@ -157,12 +200,14 @@ predict_delta_comps <- function(
attr(ilr_means, "class") <- NULL

x0_star <- get_x0_star(lm_quants$dmX, n_preds, ilr_names, ilr_delta, ilr_means)

y0_star <- x0_star %*% lm_quants$beta_hat
se_y0_star <- get_se_y0_star(x0_star, lm_quants$s_e, lm_quants$XtX_inv)


# get labels and deltas for reallocations
realloc_nms <- get_realloc_nms(comps, comparisons, poss_comps)

delta_list <- get_pred_deltas(delta_mat, realloc_nms)


Expand Down Expand Up @@ -197,5 +242,5 @@ predict_delta_comps <- function(
attr(ret_obj, "mean_pred") <- mean_pred

return(ret_obj)

}
4 changes: 3 additions & 1 deletion man/check_input_args.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 12 additions & 5 deletions man/predict_delta_comps.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 12 additions & 6 deletions tests/testthat/test_check_input_args.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,30 +57,36 @@ test_that("predict_delta_comps() correctly throws errors via check_input_args()
)

expect_error(
predict_delta_comps(dataf = fairclough, fa_y, "sed", fa_covars, fa_deltas, fa_comparisons, fa_alpha),
predict_delta_comps(dataf = fairclough, y = fa_y, comps = "sed", covars = fa_covars,
deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha),
"At least two compositional components"
)

expect_error(
predict_delta_comps(dataf = fairclough, fa_y, fa_comps, 1, fa_deltas, fa_comparisons, fa_alpha),
predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = 1,
deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha),
"Please supply a character string of the covariate"
)
expect_error(
predict_delta_comps(dataf = fairclough, fa_y, fa_comps, TRUE, fa_deltas, fa_comparisons, fa_alpha),
predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = TRUE,
deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha),
"Please supply a character string of the covariate"
)

expect_error(
predict_delta_comps(dataf = fairclough, fa_y, fa_comps, fa_covars, -1.1, fa_comparisons, fa_alpha),
predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = fa_covars,
deltas = -1.1, comparisons = fa_comparisons, alpha = fa_alpha),
"deltas must be specified as positive and negative proportions"
)
expect_error(
predict_delta_comps(dataf = fairclough, fa_y, fa_comps, fa_covars, +1.1, fa_comparisons, fa_alpha),
predict_delta_comps(dataf = fairclough, y = fa_y, comps = fa_comps, covars = fa_covars,
deltas = +1.1, comparisons = fa_comparisons, alpha = fa_alpha),
"deltas must be specified as positive and negative proportions"
)

expect_error(
predict_delta_comps(dataf = fairclough[1:6, ], fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha),
predict_delta_comps(dataf = fairclough[1:6,], y = fa_y, comps = fa_comps, covars = fa_covars,
deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha),
"The number of rows.*"
)

Expand Down
8 changes: 5 additions & 3 deletions tests/testthat/test_check_strictly_positive_vals.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,19 @@ test_that("check_strictly_positive_vals() correctly throws errors for bad input"
"Values less than"
)
expect_error(
predict_delta_comps(dataf = f2, fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha),
predict_delta_comps(dataf = f2, y = fa_y, comps = fa_comps, covars = fa_covars,
deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha),
"Values less than"
)


expect_error(
check_strictly_positive_vals(dataf = f3, fa_comps),
check_strictly_positive_vals(dataf = f3, comps = fa_comps),
"Values less than"
)
expect_error(
predict_delta_comps(dataf = f3, fa_y, fa_comps, fa_covars, fa_deltas, fa_comparisons, fa_alpha),
predict_delta_comps(dataf = f3, y = fa_y, comps = fa_comps, covars = fa_covars,
deltas = fa_deltas, comparisons = fa_comparisons, alpha = fa_alpha),
"Values less than"
)

Expand Down
Loading