Fixe examples

wejlab · Jan 25, 2024 · 5e4e044 · 5e4e044
1 parent 9c61eb4
commit 5e4e044
Show file tree

Hide file tree

Showing 31 changed files with 226 additions and 69 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -3,3 +3,5 @@
 ^LICENSE\.md$
 ^vignettes\docs
 ^Legato-docs
+^pkgdown
+
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,15 @@
 Package: LegATo
 Title: LegATo: Longitudinal mEtaGenomic Analysis Toolkit
-Version: 0.0.0.9000
+Version: 0.99.0
 Authors@R: c(
     person("Aubrey", "Odom", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0001-7113-7598")),
     person("Yilong", "Zhang", , "[email protected]", role = "ctb"),
-    person("Jared", "Pincus", , "[email protected]", role = "csl"),
+    person("Jared", "Pincus", , "[email protected]", role = "csl",
+           comment = c(ORCID = "0000-0001-6708-5262")),
     person("Jordan", "Pincus", , "[email protected]", role = "art")
   )
-Description: Streamlining longitudinal microbiome profiling in
-    Bioconductor.
+Description: Streamlining longitudinal microbiome profiling in Bioconductor.
 License: MIT + file LICENSE
 Depends:
     R (>= 4.3.0)

diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,13 @@
+# LegATo 0.99.0 (Spring 2024)
+
+* Pre-Release version of LegATo
+
+## Bug Fixes
+* None to report
+
+## Major Changes
+* Readying for the big leagues
+* The Pincuses contributed the etymology and icon
+
+## Minor Changes
+* Nearly as many as the major changes
diff --git a/R/filter_animalcules_MAE.R b/R/filter_animalcules_MAE.R
@@ -19,7 +19,7 @@ utils::globalVariables(".")
 #' @importFrom rlang .data
 #'
 #' @examples
-#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") %>% readRDS()
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
 #' filter_animalcules_MAE(in_dat, 0.01)
 #' 
 

diff --git a/R/get_long_data.R b/R/get_long_data.R
@@ -2,7 +2,7 @@
 #'
 #' This function takes a \code{MultiAssayExperiment} object and a specified
 #' taxon level of interest and creates a long \code{data.frame} that can be used
-#' more easily for plotting counts.
+#' more easily for plotting counts data.
 #'
 #' @inheritParams plot_stacked_bar
 #' @param log logical. Indicate whether an assay returned should be the log of
@@ -21,7 +21,7 @@
 #' @importFrom rlang .data
 #'
 #' @examples
-#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") %>% readRDS()
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
 #' out <- get_long_data(in_dat, "genus", log = TRUE, counts_to_CPM = TRUE)
 #' head(out)
 #' 

diff --git a/R/get_stacked_data.R b/R/get_stacked_data.R
@@ -1,9 +1,19 @@
-#' Documentation
+#' Create a long data.frame with grouped abundances from a MultiAssayExperiment counts object
 #' 
+#' This function takes a \code{MultiAssayExperiment} object and a specified
+#' taxon level of interest and creates a long \code{data.frame} that can be used
+#' more easily for plotting counts data in a stacked bar plot or a stacked area
+#' chart. The function groups taxa and computes relative abundance within taxa strata.
+#' 
+#' @inheritParams plot_spaghetti
+#' 
+#' @return A \code{data.frame} consisting of the counts data, taxa, and metadata.
+#'
 #' @export
+#' @importFrom rlang .data
 #' 
 #' @examples
-#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") %>% readRDS()
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
 #' get_stacked_data(in_dat, "genus", covariate_1 = "Sex", covariate_time = "Month")
 #'
 

diff --git a/R/get_summary_table.R b/R/get_summary_table.R
@@ -15,7 +15,7 @@
 #' @importFrom rlang .data
 #'
 #' @examples
-#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") %>% readRDS()
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
 #' out <- get_summary_table(in_dat, c("Group", "Subject"))
 #' head(out)
 #'

diff --git a/R/get_top_taxa.R b/R/get_top_taxa.R
@@ -12,7 +12,7 @@
 #' @importFrom rlang .data
 #'
 #' @examples
-#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") %>% readRDS()
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
 #' out <- get_top_taxa(in_dat, "genus")
 #' out
 #'

diff --git a/R/parse_MAE_SE.R b/R/parse_MAE_SE.R
@@ -21,7 +21,7 @@
 #' @import MultiAssayExperiment
 #'
 #' @examples
-#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") %>% readRDS()
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
 #' out <- parse_MAE_SE(in_dat)
 #' head(out$tax)
 #' head(out$sam)

diff --git a/R/plot_alluvial.R b/R/plot_alluvial.R
@@ -16,7 +16,7 @@
 #' @importFrom rlang .data
 #'
 #' @examples
-#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") %>% readRDS()
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
 #' plot_alluvial(in_dat, taxon_level = "family", covariate_1 = "Group", covariate_time = "Month",
 #'               palette_input = rainbow(25))
 #'

diff --git a/R/plot_spaghetti.R b/R/plot_spaghetti.R
@@ -22,7 +22,7 @@
 #' @importFrom rlang .data
 #'
 #' @examples
-#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") %>% readRDS()
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
 #' all_taxa <- get_top_taxa(in_dat, "phylum")
 #' plot_spaghetti(in_dat, taxon_level = "phylum", covariate_1 = "Group", covariate_time = "Month",
 #'               unit_var = "Subject", which_taxon = all_taxa$taxon[1],

diff --git a/R/plot_stacked_area.R b/R/plot_stacked_area.R
@@ -16,7 +16,7 @@
 #' @importFrom rlang .data
 #'
 #' @examples
-#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") %>% readRDS()
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
 #' plot_stacked_area(in_dat, taxon_level = "phylum", covariate_1 = "Group",
 #'                   covariate_time = "Month",
 #'                   palette_input = rainbow(25))

diff --git a/R/plot_stacked_bar.R b/R/plot_stacked_bar.R
@@ -30,7 +30,7 @@
 #' @importFrom rlang .data
 #'
 #' @examples
-#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") %>% readRDS()
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
 #' plot_stacked_bar(in_dat, taxon_level = "family", covariate_1 = "Group",
 #'                   covariate_time = "Month",
 #'                   palette_input = rainbow(25))

diff --git a/R/run_gee_model.R b/R/run_gee_model.R
@@ -42,23 +42,44 @@ test_models_gee <- function(tn, input_df, unit_var, fixed_cov,
   return(res_out)
 }
 
-#' Compute Generalized Estimating Equations (GEEs)
-#' 
-#' Run an independent GEE model for each taxa with relative abundance
-#' Works well with small data - multiple subpoints/subjects across clusters
-#' 
-#' Source
-#' https://data.library.virginia.edu/getting-started-with-generalized-estimating-equations/
-#' 
-#' fixed_cov is a vector
-#' 
+#' Compute Generalized Estimating Equations (GEEs) on longitudinal microbiome
+#' data
+#'
+#' This function takes an animalcules-formatted \code{MultiAssayExperiment} and
+#' runs an independent GEE model for each taxon. The model predicts taxon log
+#' CPM abundance as a product of fixed-effects covariates conditional on a
+#' grouping ID variable, usually the unit on which repeated measurements were
+#' taken. This modeling approach works best with small datasets that multiple
+#' samples across many (>40) clusters/units.
+#'
+#' P-values are adjusted for the model coefficients within each taxon. The
+#' following methods are permitted: \code{c("holm", "hochberg", "hommel",
+#' "bonferroni", "BH", "BY", "fdr", "none")}
+#'
+#' @inheritParams test_hotelling_t2
+#' @param fixed_cov A character vector naming covariates to be tested.
+#' @param corstr A character string specifying the correlation structure. The
+#'   following are permitted: '"independence"', '"exchangeable"', '"ar1"',
+#'   '"unstructured"'.
+#' @param p_adj_method A character string specifying the correction method. Can
+#'   be abbreviated. See details. Default is \code{"fdr"}.
+#' @param plot_out Logical indicating whether plots should be output alongside
+#'   the model results. Default is \code{FALSE}.
+#' @param plotsave_loc A character string giving the folder path to save plot
+#'   outputs. This defaults to the current working directory.
+#' @param plot_terms Character vector. Which terms should be examined in the
+#'   plot output? Can overlap with the \code{fixed_cov} inputs.
+#' @param ... Further arguments passed to \code{ggsave} for plot creation.
+#'
 #' @export
 #' @importFrom rlang .data
-#' 
+#'
 #' @examples
-#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") %>% readRDS()
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |>
+#'               readRDS()
 #' out <- run_gee_model(in_dat, taxon_level = "genus", unit_var = "Subject",
-#'                      fixed_cov = c("HairLength", "Age", "Group", "Sex"), corstr = "ar1")
+#'                      fixed_cov = c("HairLength", "Age", "Group", "Sex"),
+#'                      corstr = "ar1")
 #' head(out)
 #' 
 
@@ -67,6 +88,7 @@ run_gee_model <- function(dat,
                           unit_var,
                           fixed_cov,
                           corstr = "ar1",
+                          p_adj_method = "fdr",
                           plot_out = FALSE,
                           plotsave_loc = ".",
                           plot_terms = NULL,
@@ -84,7 +106,7 @@ run_gee_model <- function(dat,
     data.table::rbindlist() %>%
     dplyr::arrange(.data$Coefficient) %>%
     dplyr::group_by(.data$Coefficient) %>%
-    dplyr::mutate("Adj p-value" = stats::p.adjust(.data$`Pr(>|W|)`, method = "bonferroni")) %>%
+    dplyr::mutate("Adj p-value" = stats::p.adjust(.data$`Pr(>|W|)`, method = p_adj_method)) %>%
     dplyr::rename("Unadj p-value" = .data$`Pr(>|W|)`) %>%
     as.data.frame()
   return(storage)

diff --git a/R/test_hotelling_t2.R b/R/test_hotelling_t2.R
@@ -20,9 +20,9 @@
   n <- input_data %>% dplyr::group_by(Populations) %>%
     dplyr::distinct(Subjects) %>% dplyr::summarize("n_col" = dplyr::n())
   n1 <- n %>% dplyr::filter(Populations == Group1) %>%
-    dplyr::select(`n_col`) %>% as.numeric()
+    dplyr::select("n_col") %>% as.numeric()
   n2 <- n %>% dplyr::filter(Populations == Group2) %>%
-    dplyr::select(`n_col`) %>% as.numeric()
+    dplyr::select("n_col") %>% as.numeric()
   p <- length(unique(input_data$Taxon))
   # Sample mean vector
   X_i <- input_data %>%
@@ -219,7 +219,7 @@
 #' @importFrom rlang .data
 #'
 #' @examples
-#' dat <- system.file("extdata", "MAE.RDS", package = "LegATo") %>%
+#' dat <- system.file("extdata", "MAE.RDS", package = "LegATo") |>
 #' readRDS()
 #' dat_0.05 <- filter_animalcules_MAE(dat, 0.05)
 #' out1 <- test_hotelling_t2(dat = dat_0.05,

diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
-# LegATo: Longitudinal mEtaGenomic Analysis Toolkit <img src="https://github.com/aubreyodom/Legato-docs/blob/main/legato-logo.jpg?raw=true" align="right" width="170" />
 
-## What is LegATo?
+# LegATo
+### A Longitudinal mEtaGenomic Analysis Toolkit <img src="https://github.com/aubreyodom/Legato-docs/blob/main/legato-logo.jpg?raw=true" align="right" width="140">
 
 LegATo is a suite of open-source software tools for longitudinal microbiome analysis. It is extendable to
 several different study forms with optimal ease-of-use for researchers. Microbiome time-series data
@@ -9,13 +9,19 @@ designs. This toolkit will allow researchers to determine which microbial taxa a
 perturbations such as onset of disease or lifestyle choices, and to predict the effects of these perturbations
 over time, including changes in composition or stability of commensal bacteria. 
 
-LegATo integrates visualization, modeling and testing procedures. It is currently in development, but it will soon be supplemented by hierarchical clustering tools and multivariate generalized estimating equations (JGEEs) to adjust for the compositional nature of microbiome data. Other tools will be implemented as needed.
+LegATo integrates visualization, modeling and testing procedures. It is currently in development, but it will soon be supplemented by hierarchical clustering tools and multivariate generalized estimating equations (JGEEs) to adjust for the compositional nature of microbiome data.
 
-# Documentation
+### The Story Behind the Name
+In music, legato indicates that notes are played or sung smoothly and connected, without a noticeable break between them. The LegATo package facilitates a cohesive and interconnected understanding of the microbial communities represented by the samples, much like the smooth connection of musical notes in a legato passage. 
+
+Therefore, LegATo metaphorically represents the smooth and connected analysis of longitudinal metagenomic data, drawing inspiration from the musical term to convey a sense of continuity and harmony in the modeling process.
+
+## Documentation
 Documentation and tutorials for LegATo are available at our [website](https://aubreyodom.github.io/LegATo-docs/).
 
+Check out a thorough tutorial on proper usage of our package [here](https://aubreyodom.github.io/LegATo-docs/articles/LegATo_vignette.html).
 
-# Installation
+## Installation
 LegATo requires R Version 4.3.
 
 Install the development version of the package from Github:

diff --git a/man/LegATo-package.Rd b/man/LegATo-package.Rd
diff --git a/man/filter_animalcules_MAE.Rd b/man/filter_animalcules_MAE.Rd
diff --git a/man/get_long_data.Rd b/man/get_long_data.Rd
diff --git a/man/get_stacked_data.Rd b/man/get_stacked_data.Rd
diff --git a/man/get_summary_table.Rd b/man/get_summary_table.Rd
diff --git a/man/get_top_taxa.Rd b/man/get_top_taxa.Rd
diff --git a/man/parse_MAE_SE.Rd b/man/parse_MAE_SE.Rd
diff --git a/man/plot_alluvial.Rd b/man/plot_alluvial.Rd
diff --git a/man/plot_spaghetti.Rd b/man/plot_spaghetti.Rd