Skip to content

Commit

Permalink
Add heatmap and lmm modeling function
Browse files Browse the repository at this point in the history
  • Loading branch information
aubreyodom committed Jan 30, 2024
1 parent 7752e89 commit c8adc25
Show file tree
Hide file tree
Showing 30 changed files with 1,451 additions and 233 deletions.
2 changes: 1 addition & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
^LMTK\.Rproj$
^LegATo\.Rproj$
^\.Rproj\.user$
^LICENSE\.md$
^vignettes\docs
Expand Down
18 changes: 13 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,31 @@ Authors@R: c(
person("Jordan", "Pincus", , "[email protected]", role = "art",
comment = "Artist of LegATo icon")
)
Description: LegATo is a suite of open-source software tools for streamlining longitudinal microbiome analysis,
integrating visualization, modeling and testing procedures.
Description: LegATo is a suite of open-source software tools for
streamlining longitudinal microbiome analysis, integrating
visualization, modeling and testing procedures.
License: MIT + file LICENSE
Depends:
R (>= 4.3.0)
Imports:
animalcules,
data.table,
dplyr,
geepack,
ggplot2,
magrittr,
MultiAssayExperiment,
rlang,
S4Vectors,
stringr,
SummarizedExperiment,
TBSignatureProfiler,
tibble,
tidyr,
tidyselect
Suggests:
BiocStyle,
biomformat,
broom,
ComplexHeatmap,
emmeans,
ggalluvial,
ggeffects,
Expand All @@ -50,9 +50,17 @@ Suggests:
Rsubread,
spelling,
sys,
TBSignatureProfiler,
testthat,
usethis,
vegan
vegan,
RColorBrewer,
circlize,
geepack,
paletteer,
broom.mixed,
lmerTest,
lme4
VignetteBuilder:
knitr
BiocType: Software
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,20 @@
export(NMIT)
export(clean_animalcules_MAE)
export(create_formatted_MAE)
export(distinctColors)
export(filter_animalcules_MAE)
export(get_long_data)
export(get_stacked_data)
export(get_summary_table)
export(get_top_taxa)
export(parse_MAE_SE)
export(plot_alluvial)
export(plot_heatmap)
export(plot_spaghetti)
export(plot_stacked_area)
export(plot_stacked_bar)
export(run_gee_model)
export(run_lmm_model)
export(test_hotelling_t2)
export(tscor)
import(MultiAssayExperiment)
Expand Down
88 changes: 61 additions & 27 deletions R/NMIT.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,44 +63,58 @@ tscor <- function(dat, unit_var, method = "kendall", fill_na = 0) {
}

#' Nonparametric Microbial Interdependence Test (NMIT)
#'
#' The package performs a multivariate distance-based test for group comparisons of
#' microbial temporal interdependence. The NMIT test provides a comprehensive way to evaluate
#' the association between key phenotypic variables and microbial interdependence.
#' This function is recommended for use after a filtering step using \code{filter_animalcules_MAE}.
#'
#' An R-based implementation of the NMIT, a multivariate distance-based test for
#' group comparisons of microbial temporal interdependence. The NMIT test
#' provides a comprehensive way to evaluate the association between key
#' phenotypic variables and microbial interdependence. This function is
#' recommended for use after a filtering step using
#' \code{filter_animalcules_MAE}.
#'
#' @author Yilong Zhang, Huilin Li, Aubrey Odom
#'
#' @inheritParams tscor
#' @inheritParams plot_stacked_bar
#' @param fixed_cov A character vector of covariates of interest found in dat.
#' @param dist_type A character strin specifying the type of matrix norm to be computed. The default is \code{"F"}.
#' @param fixed_cov A character vector of covariates of interest found in
#' \code{dat}.
#' @param dist_type A character string specifying the type of matrix norm to be
#' computed. The default is \code{"F"}.
#' \itemize{
#' \item{\code{"M"} or \code{"m"}}{specifies the maximum modulus of all the elements in \code{x}.}
#' \item{\code{"O"}, \code{"o"} or \code{"1"}}{specifies the one norm, (maximum absolute column sum);}
#' \item{\code{"I"} or \code{"i"}}{specifies the infinity norm (maximum absolute row sum);}
#' \item{\code{"F"} or \code{"f"}}{specifies the Frobenius norm (the Euclidean norm of \code{x} treated as if it were a vector); and}
#' \item{\code{"M"} or \code{"m"}}{specifies the maximum modulus of all the
#' elements in \code{x}.}
#' \item{\code{"O"}, \code{"o"} or \code{"1"}}{specifies the one norm,
#' (maximum absolute column sum);}
#' \item{\code{"I"} or \code{"i"}}{specifies the infinity norm (maximum
#' absolute row sum);}
#' \item{\code{"F"} or \code{"f"}}{specifies the Frobenius norm (the
#' Euclidean norm of \code{x} treated as if it were a vector)}
#' }
#' @param heatmap A logical value indicating whether to draw heatmap. The default is \code{TRUE}.
#' @param classify A logical value indicating whether to draw a classifier tree. The default is \code{FALSE}.
#' @param fill_na A number between 0 and 1 to fill \code{NA} values. The default value is 0.
#' @param heatmap A logical value indicating whether to draw heatmap. The
#' default is \code{TRUE}.
#' @param classify A logical value indicating whether to draw a classifier tree.
#' The default is \code{FALSE}.
#' @param fill_na A number between 0 and 1 to fill \code{NA} values. The default
#' value is 0.
#' @param ... Additional arguments to be passed to \code{ComplexHeatmap::Heatmap()}.
#'
#' @return This function returns an analysis of variance (ANOVA) table showing
#' sources of variation, degrees of freedom, sequential sums of squares, mean
#' squares, F statistics, partial R-squared and P values, based on 999
#' permutations.
#'
#' @return This function returns an analysis of variance (ANOVA) table showing sources of variation,
#' degrees of freedom, sequential sums of squares, mean squares, F statistics, partial R-squared and P values,
#' based on 999 permutations.
#'
#' @export
#'
#' @examples
#' dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
#' NMIT(dat, unit_var = "Subject", fixed_cov = "Group", covariate_time = "Month")
#'
#'

NMIT <- function(dat, unit_var, fixed_cov,
covariate_time,
method = "kendall", dist_type = "F",
heatmap = TRUE, classify = FALSE,
fill_na = 0) {
fill_na = 0,
...) {
dat <- .rearrange_sub_time(dat, unit_var, covariate_time)
all_dat <- parse_MAE_SE(dat)
map <- all_dat$sam
Expand All @@ -119,17 +133,37 @@ NMIT <- function(dat, unit_var, fixed_cov,
})
rownames(dist) <- dimnames(otu.cor)[[3]]
colnames(dist) <- dimnames(otu.cor)[[3]]
grp <- map.unique[rownames(dist) , fixed_cov]
grp <- map.unique[rownames(dist), fixed_cov]
test <- vegan::adonis2(dist ~ grp)
if(heatmap){
#pvalue <- paste(round(c(test$F[1], test$`Pr(>F)`[1]), 3),
# collapse = ";")
pvalue <- round(c(test$`Pr(>F)`[1]), 3)
n.taxa <- nrow(otu)

diag(dist) <- stats::median(dist)
plot_title <- paste0(" #OTU:", n.taxa, " pvalue:", pvalue)
stats::heatmap(sqrt(dist), Colv = classify, Rowv = classify, scale = "column",
xlab = unit_var, ylab = unit_var,
main = paste("NMIT Correlation Heatmap across", n.taxa, "taxa with p-value", pvalue))
mat <- as.matrix(sqrt(dist))

plot_title <- paste0(" # OTU: ", n.taxa, " pvalue: ", pvalue)
cov_mat <- grp %>% as.matrix %>%
magrittr::set_rownames(rownames(dist)) %>%
magrittr::set_colnames(fixed_cov) %>%
as.data.frame
plot_heatmap(
inputData = mat,
annotationData = cov_mat,
name = "NMIT Correlation",
plot_title = plot_title,
signatureColNames = NULL,
annotationColNames = NULL,
colList = list(),
scale = FALSE,
showColumnNames = TRUE,
showRowNames = TRUE,
colorSets = c("Set1", "Set2", "Set3", "Pastel1", "Pastel2", "Accent", "Dark2",
"Paired"),
choose_color = c("blue", "gray95", "red"),
split_heatmap = "none",
column_order = NULL,
...
)
}
}
51 changes: 25 additions & 26 deletions R/clean_animalcules_MAE.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,43 +19,42 @@
#'

clean_animalcules_MAE <- function(dat) {
# Extract data
parsed <- parse_MAE_SE(dat, which_assay = "MicrobeGenetics", type = "MAE")
tax_table <- parsed$tax
sam_table <- parsed$sam
counts_table <- parsed$counts

# Preliminary fixing of species names
if(!all(c("genus", "species") %in% colnames(tax_table))) {
stop("Columns 'genus' and 'species' must be present in taxonomy table.")
}
if(!all.equal(rownames(tax_table), rownames(counts_table))) {
stop("Error in parsing MAE")
}
# Only need to alter species names in tax_table initially
se_rowData <- tax_table %>%
dplyr::mutate("species" = stringr::str_remove_all(.data$species, "\\[|\\]"))

# Replace species that are others with "sp."
dplyr::mutate("species" = stringr::str_remove_all(.data$species, "\\[|\\]"),
"species" = stringr::str_replace_all(.data$species, "_", " "))
## Replace species that are others with "sp."
ind <- se_rowData$species == "others"
se_rowData$species[ind] <- paste(se_rowData$genus[ind], "sp.", sep = " ")
# Replace genus that are others with species information
## Replace genus that are others with species information
ind <- se_rowData$genus == "others" & !is.na(se_rowData$genus)
all_split <- plyr::aaply(strsplit(se_rowData$species, " "), 1,
function(x) x[[1]])
se_rowData$genus[ind] <- all_split[ind]
# Consolidate tax and counts to species level
up_counts <- animalcules::upsample_counts(counts_table, se_rowData,
higher_level = "species")
up_rowData <- se_rowData %>%
tibble::remove_rownames() %>%
dplyr::distinct(.data$species, .keep_all = TRUE)
ind <- match(rownames(up_counts), up_rowData$species)
up_rowData <- up_rowData[ind, ]
rownames(up_rowData) <- up_rowData$species

#ISSUE: DUplicates because we need to consolidate down to species level

# Fix rownames
if (all.equal(rownames(tax_table), rownames(counts_table))) {
to_rownames <- stringr::str_replace_all(se_rowData$species, "_", " ")
rownames(se_rowData) <- rownames(counts_table) <- to_rownames
# Alphabetize
se_rowData_final <- se_rowData[order(to_rownames),]
counts_table_final <- counts_table[order(to_rownames),]
} else (stop("Mismatch of rownames in datasets"))

se_mgx <- counts_table %>% base::data.matrix() %>% S4Vectors::SimpleList() %>%
magrittr::set_names("MGX")

microbe_se <- SummarizedExperiment::SummarizedExperiment(
assays = se_mgx, colData = sam_table, rowData = se_rowData)
MAE_out <- MultiAssayExperiment::MultiAssayExperiment(
experiments = S4Vectors::SimpleList(MicrobeGenetics = microbe_se),
colData = sam_table)

MAE_out <- create_formatted_MAE(counts_dat = up_counts,
tax_dat = up_rowData,
metadata_dat = parsed$sam)
return(MAE_out)
}

30 changes: 11 additions & 19 deletions R/filter_animalcules_MAE.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ utils::globalVariables(".")
#'
#' This function takes an animalcules-formatted \code{MultiAssayExperiment}
#' (MAE) object and identifies all taxa at the "genus" level that represent
#' <\code{filter_pct} average relative abundance across all samples in the MAE.
#' <\code{filter_prop} average relative abundance across all samples in the MAE.
#' After identification at the genus level, taxa across the genus and species
#' levels are then consolidated into the category "Other".
#'
#' @inheritParams plot_stacked_bar
#' @param filter_pct A double strictly between 0 and 1.
#' @param filter_prop A double strictly between 0 and 1, representing
#' the proportion of relative abundance at which to filter. Default is 0.001.
#'
#' @returns An animalcules-formatted \code{MultiAssayExperiment} object with
#' appropriate filtration.
Expand All @@ -23,8 +24,8 @@ utils::globalVariables(".")
#' filter_animalcules_MAE(in_dat, 0.01)
#'

filter_animalcules_MAE <- function(dat, filter_pct = 0.05) {
if(filter_pct <= 0 | filter_pct >= 1) stop("filter_pct must be between 0 and 1.")
filter_animalcules_MAE <- function(dat, filter_prop = 0.001) {
if(filter_prop <= 0 | filter_prop >= 1) stop("filter_prop must be between 0 and 1.")
# Extract metadata, taxonomic info, and counts
parsed <- parse_MAE_SE(dat, which_assay = "MicrobeGenetics", type = "MAE")
tax_table <- parsed$tax
Expand All @@ -33,11 +34,11 @@ filter_animalcules_MAE <- function(dat, filter_pct = 0.05) {

all_relabu_genus <- counts_table %>%
as.matrix() %>%
# Get rel abu within samokes
# Get rel abu within samples
prop.table(margin = 2) %>%
tibble::as_tibble() %>%
dplyr::bind_cols("genus" = tax_table$genus) %>%
dplyr::relocate(.data$genus) %>%
dplyr::relocate("genus") %>%
dplyr::group_by(.data$genus) %>%
# Sum rel abu within samples/columns for genera
dplyr::summarise(dplyr::across(.fns = sum, .cols = dplyr::everything())) %>%
Expand All @@ -46,11 +47,11 @@ filter_animalcules_MAE <- function(dat, filter_pct = 0.05) {
dplyr::select("genus", "allmeans") %>%
dplyr::mutate("genus" = replace(.data$genus, is.na(.data$genus), "Unknown")) %>%
dplyr::arrange(dplyr::desc(.data$allmeans)) %>%
dplyr::mutate("lessthan1pct" = .data$allmeans < filter_pct/10)
dplyr::mutate("lessthanXpct" = .data$allmeans < filter_prop)

# Identify species in other
othergenera <- all_relabu_genus %>%
dplyr::filter(.data$lessthan1pct == TRUE) %>%
dplyr::filter(.data$lessthanXpct == TRUE) %>%
dplyr::select("genus") %>% unlist() %>% unname()

## Use the identified species to update the tax, species tables
Expand All @@ -67,10 +68,7 @@ filter_animalcules_MAE <- function(dat, filter_pct = 0.05) {
dplyr::group_by(.data$species) %>%
dplyr::summarise(dplyr::across(.cols = dplyr::all_of(colnames(counts_table)),
.fns = sum)) %>%
tibble::column_to_rownames("species") %>%
base::data.matrix() %>%
S4Vectors::SimpleList() %>%
magrittr::set_names("MGX")
tibble::column_to_rownames("species")

# Adjust tax table accordingly
tax_table_other2 <- tax_table_other %>%
Expand All @@ -80,13 +78,7 @@ filter_animalcules_MAE <- function(dat, filter_pct = 0.05) {
rownames(tax_table_other2) <- tax_table_other2$species

## Create SE object
microbe_other <- SummarizedExperiment::SummarizedExperiment(
assays = counts_other,
rowData = S4Vectors::DataFrame(tax_table_other2),
colData = S4Vectors::DataFrame(sam_table))
MAE_out <- MultiAssayExperiment::MultiAssayExperiment(
experiments = S4Vectors::SimpleList(MicrobeGenetics = microbe_other),
colData = S4Vectors::DataFrame(sam_table))
MAE_out <- create_formatted_MAE(counts_other, tax_table_other2, sam_table)
return(MAE_out)
}

Loading

0 comments on commit c8adc25

Please sign in to comment.