-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
72a460c
commit 8f929ff
Showing
40 changed files
with
1,714 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
^LMTK\.Rproj$ | ||
^\.Rproj\.user$ | ||
^LICENSE\.md$ | ||
^vignettes\docs | ||
^Legato-docs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,3 +47,7 @@ po/*~ | |
|
||
# RStudio Connect folder | ||
rsconnect/ | ||
.Rproj.user | ||
|
||
# pkgdown | ||
/Legato-doc/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
Package: LegATo | ||
Title: LegATo: Longitudinal mEtaGenomic Analysis Toolkit | ||
Version: 0.0.0.9000 | ||
Authors@R: c( | ||
person("Aubrey", "Odom", , "[email protected]", role = c("aut", "cre"), | ||
comment = c(ORCID = "0000-0001-7113-7598")), | ||
person("Jared", "Pincus", , "[email protected]", role = "csl"), | ||
person("Jordan", "Pincus", , "[email protected]", role = "art") | ||
) | ||
Description: Streamlining longitudinal microbiome profiling in | ||
Bioconductor. | ||
License: MIT + file LICENSE | ||
Depends: | ||
R (>= 4.3.0) | ||
Imports: | ||
alluvial, | ||
animalcules, | ||
emmeans, | ||
geepack, | ||
ggeffects, | ||
lme4, | ||
magrittr, | ||
MultiAssayExperiment, | ||
rlang, | ||
SummarizedExperiment, | ||
S4Vectors, | ||
TBSignatureProfiler, | ||
dplyr, | ||
ggalluvial, | ||
ggplot2, | ||
stringr, | ||
tibble, | ||
tidyr, | ||
tidyselect | ||
Suggests: | ||
BiocStyle, | ||
biomformat, | ||
knitr, | ||
lintr, | ||
plyr, | ||
R.utils, | ||
rmarkdown, | ||
Rsubread, | ||
spelling, | ||
sys, | ||
testthat, | ||
usethis | ||
Encoding: UTF-8 | ||
Language: en-US | ||
Roxygen: list(markdown = TRUE) | ||
RoxygenNote: 7.2.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2024 | ||
COPYRIGHT HOLDER: LegATo authors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# MIT License | ||
|
||
Copyright (c) 2024 LegATo authors | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: Default | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: Sweave | ||
LaTeX: pdfLaTeX | ||
|
||
BuildType: Package | ||
PackageUseDevtools: Yes | ||
PackageInstallArgs: --no-multiarch --with-keep.source |
Submodule Legato-docs
added at
b09b8d
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(clean_animalcules_MAE) | ||
export(filter_animalcules_MAE) | ||
export(get_long_data) | ||
export(get_summary_table) | ||
export(parse_MAE_SE) | ||
export(plot_alluvial) | ||
export(plot_stacked_area) | ||
export(plot_stacked_bar) | ||
import(MultiAssayExperiment) | ||
importFrom(magrittr,"%>%") | ||
importFrom(rlang,.data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#' @keywords internal | ||
"_PACKAGE" | ||
|
||
## usethis namespace: start | ||
#' @importFrom magrittr %>% | ||
## usethis namespace: end | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#' Clean up taxon names in a MultiAssayExperiment | ||
#' | ||
#' This functional is an optional method for fixing up taxon names in a | ||
#' \code{MultiAssayExperiment} to be run before \code{filter_animalcules_MAE}. | ||
#' Specifically it removes brackets from species names, replaces species labeled | ||
#' as "others" with "sp." and finally replaces underscores with spaces. | ||
#' | ||
#' @inheritParams plot_stacked_bar | ||
#' | ||
#' @returns An animalcules-formatted \code{MultiAssayExperiment} object with | ||
#' cleaned-up taxonomy nomenclature. | ||
#' | ||
#' @export | ||
#' @importFrom rlang .data | ||
#' | ||
#' @examples | ||
#' # example code | ||
#' | ||
|
||
clean_animalcules_MAE <- function(dat) { | ||
parsed <- parse_MAE_SE(dat, which_assay = "MicrobeGenetics", type = "MAE") | ||
tax_table <- parsed$tax | ||
sam_table <- parsed$sam | ||
counts_table <- parsed$counts | ||
|
||
se_rowData <- tax_table %>% | ||
dplyr::mutate("species" = stringr::str_remove_all(species, "\\[|\\]")) | ||
|
||
# Replace species that are others with "sp." | ||
ind <- se_rowData$species == "others" | ||
se_rowData$species[ind] <- paste(se_rowData$genus[ind], "sp.", sep = " ") | ||
# Replace genus that are others with species information | ||
ind <- se_rowData$genus == "others" & !is.na(se_rowData$genus) | ||
all_split <- sapply(strsplit(se_rowData$species, " "), function(x) x[[1]]) | ||
se_rowData$genus[ind] <- all_split[ind] | ||
|
||
#ISSUE: DUplicates because we need to consolidate down to species level | ||
|
||
# Fix rownames | ||
if (all.equal(rownames(tax_table), rownames(counts_table))) { | ||
to_rownames <- stringr::str_replace_all(se_rowData$species, "_", " ") | ||
rownames(se_rowData) <- rownames(counts_table) <- to_rownames | ||
# Alphabetize | ||
se_rowData_final <- se_rowData[order(to_rownames),] | ||
counts_table_final <- counts_table[order(to_rownames),] | ||
} else (stop("Mismatch of rownames in datasets")) | ||
|
||
se_mgx <- counts_table %>% base::data.matrix() %>% S4Vectors::SimpleList() %>% | ||
magrittr::set_names("MGX") | ||
|
||
microbe_se <- SummarizedExperiment::SummarizedExperiment( | ||
assays = se_mgx, colData = sam_table, rowData = se_rowData) | ||
MAE_out <- MultiAssayExperiment::MultiAssayExperiment( | ||
experiments = S4Vectors::SimpleList(MicrobeGenetics = microbe_se), | ||
colData = sam_table) | ||
|
||
return(MAE_out) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
utils::globalVariables(".") | ||
|
||
#' Filter a MultiAssayExperiment to a top percentage of taxa and label the rest | ||
#' as "Other" | ||
#' | ||
#' This function takes an animalcules-formatted \code{MultiAssayExperiment} | ||
#' (MAE) object and identifies all taxa at the "genus" level that represent | ||
#' <\code{filter_pct} average relative abundance across all samples in the MAE. | ||
#' After identification at the genus level, taxa across the genus and species | ||
#' levels are then consolidated into the category "Other". | ||
#' | ||
#' @inheritParams plot_stacked_bar | ||
#' @param filter_pct A double strictly between 0 and 1. | ||
#' | ||
#' @returns An animalcules-formatted \code{MultiAssayExperiment} object with | ||
#' appropriate filtration. | ||
#' | ||
#' @export | ||
#' @importFrom rlang .data | ||
#' | ||
#' @examples | ||
#' # example code | ||
#' | ||
|
||
filter_animalcules_MAE <- function(dat, filter_pct = 0.05) { | ||
if(filter_pct <= 0 | filter_pct >= 1) stop("filter_pct must be between 0 and 1.") | ||
# Extract metadata, taxonomic info, and counts | ||
parsed <- parse_MAE_SE(dat, which_assay = "MicrobeGenetics", type = "MAE") | ||
tax_table <- parsed$tax | ||
sam_table <- parsed$sam | ||
counts_table <- parsed$counts | ||
|
||
all_relabu_genus <- counts_table %>% | ||
as.matrix() %>% | ||
# Get rel abu within samokes | ||
prop.table(margin = 2) %>% | ||
tibble::as_tibble() %>% | ||
dplyr::bind_cols("genus" = tax_table$genus) %>% | ||
dplyr::relocate(.data$genus) %>% | ||
dplyr::group_by(.data$genus) %>% | ||
# Sum rel abu within samples/columns for genera | ||
dplyr::summarise(dplyr::across(.fns = sum, .cols = dplyr::everything())) %>% | ||
# Sum everything but the first columm ("genus") | ||
dplyr::mutate("allmeans" = rowMeans(select(., -1))) %>% | ||
dplyr::select("genus", "allmeans") %>% | ||
dplyr::mutate("genus" = replace(.data$genus, is.na(.data$genus), "Unknown")) %>% | ||
dplyr::arrange(dplyr::desc(.data$allmeans)) %>% | ||
dplyr::mutate("lessthan1pct" = .data$allmeans < filter_pct/10) | ||
|
||
# Identify species in other | ||
othergenera <- all_relabu_genus %>% | ||
dplyr::filter(.data$lessthan1pct == TRUE) %>% | ||
dplyr::select("genus") %>% unlist() %>% unname() | ||
|
||
## Use the identified species to update the tax, species tables | ||
|
||
# Replace tax table | ||
tax_table_other <- tax_table %>% | ||
dplyr::mutate("genus" = replace(.data$genus, is.na(.data$genus), "Unknown")) %>% | ||
dplyr::mutate(dplyr::across(c(.data$genus, .data$species), | ||
function(x) replace(x, .data$genus %in% othergenera, "Other"))) | ||
|
||
# Resum the counts table | ||
counts_other <- counts_table %>% | ||
dplyr::bind_cols("species" = tax_table_other$species) %>% | ||
dplyr::group_by(.data$species) %>% | ||
dplyr::summarise(dplyr::across(.cols = dplyr::all_of(colnames(counts_table)), | ||
.fns = sum)) %>% | ||
tibble::column_to_rownames("species") %>% | ||
base::data.matrix() %>% | ||
S4Vectors::SimpleList() %>% | ||
magrittr::set_names("MGX") | ||
|
||
# Adjust tax table accordingly | ||
tax_table_other2 <- tax_table_other %>% | ||
dplyr::distinct(.data$species, .keep_all = TRUE) %>% | ||
dplyr::arrange(.data$species) | ||
|
||
rownames(tax_table_other2) <- tax_table_other2$species | ||
|
||
## Create SE object | ||
microbe_other <- SummarizedExperiment::SummarizedExperiment( | ||
assays = counts_other, | ||
rowData = S4Vectors::DataFrame(tax_table_other2), | ||
colData = S4Vectors::DataFrame(sam_table)) | ||
MAE_out <- MultiAssayExperiment::MultiAssayExperiment( | ||
experiments = S4Vectors::SimpleList(MicrobeGenetics = microbe_other), | ||
colData = S4Vectors::DataFrame(sam_table)) | ||
return(MAE_out) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
## Generalized Estimating Equations | ||
#- Run an independent GEE model for each taxa with relative abundance | ||
#- Works well with small data - multiple subpoints/subjects across clusters | ||
|
||
#https://data.library.virginia.edu/getting-started-with-generalized-estimating-equations/ | ||
|
||
### Functions to implement GEEs | ||
|
||
mk_gee_plot <- function(this_coef, complex, tn) { | ||
p <- plot(ggeffects::ggemmeans(complex, | ||
terms = c(this_coef, "VisitNumber", "Group"))) + | ||
ggplot2::labs(subtitle = "Estimated marginal means", | ||
title = str_replace(tn, "_", " ")) + | ||
ylab("Taxon abundance (log CPM)") + | ||
ggplot2::theme_classic() | ||
ggplot2::ggsave(filename = | ||
paste("PaperFigs/gee/", tn, "_", this_coef, ".png", sep = ""), | ||
p, width = 6, height = 4, units = "in", scale = 0.7) | ||
} | ||
|
||
test_models_gee <- function(tn, input_df, to_plot = FALSE) { | ||
filt_df <- input_df %>% filter(taxon == tn) %>% | ||
mutate(PatientID = as.factor(PatientID)) %>% | ||
arrange(PatientID) | ||
complex <- geeglm(Abundance ~ 1 + Group + | ||
Q23.17.Neutraliz..*Group + LAI.Neutraliz..*Group + | ||
BaL.Neutraliz..*Group + VisitNumber, | ||
id = PatientID, data = filt_df, | ||
na.action = na.omit, family = "gaussian", | ||
corstr = "ar1") | ||
sum_comp <- summary(complex) | ||
all_ci <- broom:::confint.geeglm(complex) | ||
if (to_plot) { | ||
sapply(c("Q23.17.Neutraliz..", "LAI.Neutraliz..", "BaL.Neutraliz.."), | ||
mk_gee_plot, complex, tn) | ||
} | ||
res_out <- sum_comp$coefficients %>% select(Estimate, `Pr(>|W|)`) %>% | ||
mutate("Taxon" = tn) %>% | ||
bind_cols(all_ci) %>% relocate(Taxon, lwr, Estimate, upr) %>% | ||
tibble::rownames_to_column(var = "Coefficient") | ||
return(res_out) | ||
} | ||
|
||
save_table <- function(input_df = results_species, | ||
fileout = "PaperFigs/species_us_gee.csv") { | ||
input_df %>% | ||
dplyr::rename("Coefficient Estimate" = Estimate, | ||
"Lower 95% CI" = lwr, "Upper 95% CI" = upr, | ||
"Unadj p-value" = `Pr(>|W|)`, | ||
"Adj p-value" = `adj_p`) %>% | ||
write.csv(., fileout) | ||
} | ||
|
||
# logcpm_tab <- long_lcpm_genus | ||
run_model <- function(logcpm_tab, to_plot = TRUE) { | ||
input_df <- logcpm_tab %>% filter(VisitNumber != 2) | ||
all_tn <- unique(input_df$taxon) | ||
n <- length(all_tn) | ||
|
||
# Plot model coefficients | ||
# If emmeans doesn't work, may need to step through function | ||
# and make one plot | ||
if (to_plot) lapply(all_tn, test_models_gee, input_df = input_df, to_plot = TRUE) | ||
|
||
storage <- lapply(all_tn, test_models_gee, input_df = input_df) %>% | ||
data.table::rbindlist() %>% | ||
arrange(Coefficient) %>% | ||
group_by(Coefficient) %>% | ||
mutate(adj_p = p.adjust(`Pr(>|W|)`, method = "bonferroni")) %>% | ||
as.data.frame() | ||
kable(storage, caption = "P-values from GEEs", | ||
row.names = TRUE) | ||
return(storage) | ||
} | ||
|
||
|
||
### Modeling genera ----- | ||
```{R} | ||
# Input: create long version of CPM (both moms and infants) | ||
long_lcpm_genus <- create_long(logCPM_genus) | ||
|
||
# Get genus-level results | ||
results_genus <- run_model(long_lcpm_genus, to_plot = TRUE) | ||
save_table(results_genus, "PaperFigs/genus_ar1_gee.csv") | ||
|
||
# Bonferroni cutoff - genus | ||
0.05/length(unique(results_genus$Taxon)) | ||
``` | ||
|
||
|
||
### Modeling species ----- | ||
```{R} | ||
# Input: create long version of CPM (both moms and infants) | ||
long_lcpm_species <- SummarizedExperiment::assay(dat, "log_counts_cpm") %>% | ||
as.data.frame() %>% create_long() | ||
|
||
# Top species (more than 1%) | ||
filt <- animalcules::counts_to_relabu(counts_table) |> | ||
apply(1, function(x) round(mean(x), 3)) %>% | ||
.[. > 0.01] | ||
## Remove any species that are general "sp." or "Other" | ||
ind <- stringr::str_detect(names(filt), "_sp.|Other") | ||
species_to_test <- names(filt[!ind]) | ||
|
||
# Get species-level results | ||
results_species <- long_lcpm_species %>% | ||
filter(taxon %in% species_to_test) %>% | ||
run_model(to_plot = TRUE) | ||
save_table(results_species, "PaperFigs/species_ar1_gee.csv") | ||
|
||
# Bonferroni cutoff - genus | ||
0.05/length(species_to_test) | ||
``` |
Oops, something went wrong.