Add unit tests

wejlab · Oct 3, 2024 · 01931ee · 01931ee
1 parent 54cb82d
commit 01931ee
Show file tree

Hide file tree

Showing 12 changed files with 510 additions and 28 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -60,12 +60,14 @@ Suggests:
     rmarkdown,
     spelling,
     TBSignatureProfiler,
+    testthat (>= 3.0.0),
     usethis,
     vegan
 VignetteBuilder: 
     knitr
 BiocType: Software
 biocViews: MicrobiomeData, ReproducibleResearch, SequencingData
+Config/testthat/edition: 3
 Encoding: UTF-8
 Language: en-US
 LazyData: FALSE

diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,7 @@ export(clean_MAE)
 export(create_formatted_MAE)
 export(distinctColors)
 export(filter_MAE)
+export(filter_animalcules_MAE)
 export(get_long_data)
 export(get_stacked_data)
 export(get_summary_table)

diff --git a/R/NMIT.R b/R/NMIT.R
@@ -12,6 +12,24 @@
   return(MAE)
 }
 
+# Helper function for if standard deviation is 0 during stats::cor
+safe_cor <- function(x, y = NULL, method = "pearson") {
+    df <- x[, colSums(x) != 0]
+    tryCatch({
+        # Try to compute the correlation
+        result <- cor(x = df, y, method = method)
+        return(result)
+    }, error = function(e) {
+        # If an error occurs (like zero standard deviation), return NA or a custom message
+        if (grepl("the standard deviation is zero", e$message)) {
+            return(NA)
+        } else {
+            # Return the error message if it's another issue
+            stop(e)
+        }
+    })
+}
+
 #' Calculate within-subject OTU correlations
 #'
 #' This function takes a \code{MultiAssayExperiment} and outputs an array of
@@ -49,7 +67,8 @@ tscor <- function(dat, unit_var, method = "kendall", fill_na = 0) {
   otu_list <- split(otus, sub_split)
 
   # Pairwise correlations for each OTU sub-table
-  cor_list <- lapply(otu_list, stats::cor, method = method)
+  cor_list <- lapply(otu_list, safe_cor, method = method)
+
   names.cor <- names(cor_list)
   names.otu <- colnames(otus)
 

diff --git a/R/create_formatted_MAE.R b/R/create_formatted_MAE.R
@@ -25,28 +25,42 @@
 #' @returns A \code{MultiAssayExperiment} object.
 #' @examples
 #' nsample <- ntaxa <- 3
-#' counts_dat <- data.frame("X123" = runif(ntaxa, 0, 500),
-#'                          "X456" = runif(ntaxa, 0, 500),
-#'                          "X789" = runif(ntaxa, 0, 500))
-#' tax_dat <- data.frame("class" = c("rand1", "rand2", "rand3"),
-#'                       "species" = c("rand4", "rand5", "rand6")) |>
-#'   as.data.frame()
+#' counts_dat <- data.frame(
+#'     "X123" = runif(ntaxa, 0, 500),
+#'     "X456" = runif(ntaxa, 0, 500),
+#'     "X789" = runif(ntaxa, 0, 500)
+#' )
+#' tax_dat <- data.frame(
+#'     "class" = c("rand1", "rand2", "rand3"),
+#'     "species" = c("rand4", "rand5", "rand6")
+#' ) |>
+#'     as.data.frame()
 #' # Set rownames as lowest unique taxonomic level
 #' rownames(tax_dat) <- tax_dat$species
-#' metadata <- data.frame(Sample = c("X123", "X456", "X789"),
-#'                        Group = c("A", "B", "A"),
-#'                        Var = rnorm(nsample))
+#' rownames(counts_dat) <- tax_dat$species
+#' metadata <- data.frame(
+#'     Sample = c("X123", "X456", "X789"),
+#'     Group = c("A", "B", "A"),
+#'     Var = rnorm(nsample)
+#' )
 #' rownames(metadata) <- metadata$Sample
 #' out_MAE <- create_formatted_MAE(counts_dat, tax_dat, metadata)
-#' out_MAE
-#'
+#' 
+#' # TreeSummarizedExperiment
+#' tse <- TreeSummarizedExperiment::TreeSummarizedExperiment(
+#'     assays = list(counts = counts_dat),
+#'     colData = metadata,            
+#'     rowData = tax_dat             
+#' )
+#' out_MAE_2 <- create_formatted_MAE(tree_SE = tse)
+#' out_MAE_2
 
 create_formatted_MAE <- function(counts_dat = NULL, tax_dat = NULL, metadata_dat = NULL,
                                  tree_SE = NULL) {
     if (class(tree_SE) == "TreeSummarizedExperiment") {
-        counts_dat <- assays(tree_SE)[[1]]
-        tax_dat <- rowData(tree_SE)
-        metadata_dat <- colData(tree_SE) |> as.data.frame() 
+        counts_dat <- SummarizedExperiment::assays(tree_SE)[[1]]
+        tax_dat <-SummarizedExperiment::rowData(tree_SE)
+        metadata_dat <- SummarizedExperiment::colData(tree_SE) |> as.data.frame() 
     } else {
         if (is.null(counts_dat) | is.null(tax_dat) | is.null(metadata_dat)) {
             stop("Please supply counts, taxonomy, and metadata tables.")

diff --git a/R/filter_animalcules_MAE.R b/R/filter_animalcules_MAE.R
@@ -0,0 +1,83 @@
+utils::globalVariables(".")
+
+#' Filter a MultiAssayExperiment to a top percentage of taxa and label the rest
+#' as "Other"
+#'
+#' This function takes an animalcules-formatted \code{MultiAssayExperiment}
+#' (MAE) object and identifies all taxa at the "genus" level that represent
+#' <\code{filter_prop} average relative abundance across all samples in the MAE.
+#' After identification at the genus level, taxa across the genus and species
+#' levels are then consolidated into the category "Other".
+#'
+#' @inheritParams plot_stacked_bar
+#' @param filter_prop A double strictly between 0 and 1, representing
+#' the proportion of relative abundance at which to filter. Default is 0.001.
+#'
+#' @returns An animalcules-formatted \code{MultiAssayExperiment} object with
+#'   appropriate filtration.
+#'
+#' @export
+#' @importFrom rlang .data
+#'
+#' @examples
+#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
+#' filter_animalcules_MAE(in_dat, 0.01)
+#' 
+
+filter_animalcules_MAE <- function(dat, filter_prop = 0.001) {
+    if(filter_prop <= 0 | filter_prop >= 1) stop("filter_prop must be between 0 and 1.")
+    # Extract metadata, taxonomic info, and counts
+    parsed <- parse_MAE_SE(dat, which_assay = "MicrobeGenetics", type = "MAE")
+    tax_table <- parsed$tax
+    sam_table <- parsed$sam
+    counts_table <- parsed$counts
+
+    all_relabu_genus <- counts_table %>%
+        as.matrix() %>%
+        # Get rel abu within samples
+        prop.table(margin = 2) %>%
+        tibble::as_tibble() %>%
+        dplyr::bind_cols("genus" = tax_table$genus) %>%
+        dplyr::relocate("genus") %>%
+        dplyr::group_by(.data$genus) %>%
+        # Sum rel abu within samples/columns for genera
+        dplyr::summarise(dplyr::across(.fns = sum, .cols = dplyr::everything())) %>%
+        # Sum everything but the first columm ("genus")
+        dplyr::mutate("allmeans" = rowMeans(dplyr::select(., -1))) %>%
+        dplyr::select("genus", "allmeans") %>%
+        dplyr::mutate("genus" = replace(.data$genus, is.na(.data$genus), "Unknown")) %>%
+        dplyr::arrange(dplyr::desc(.data$allmeans)) %>%
+        dplyr::mutate("lessthanXpct" = .data$allmeans < filter_prop)
+
+    # Identify species in other
+    othergenera <- all_relabu_genus %>%
+        dplyr::filter(.data$lessthanXpct == TRUE) %>%
+        dplyr::select("genus") %>% unlist() %>% unname()
+
+    ## Use the identified species to update the tax, species tables
+
+    # Replace tax table
+    tax_table_other <- tax_table %>%
+        dplyr::mutate("genus" = replace(.data$genus, is.na(.data$genus), "Unknown")) %>%
+        dplyr::mutate(dplyr::across(c(.data$genus, .data$species),
+                                    function(x) replace(x, .data$genus %in% othergenera, "Other")))
+
+    # Resum the counts table
+    counts_other <- counts_table %>%
+        dplyr::bind_cols("species" = tax_table_other$species) %>%
+        dplyr::group_by(.data$species) %>%
+        dplyr::summarise(dplyr::across(.cols = dplyr::all_of(colnames(counts_table)),
+                                       .fns = sum)) %>%
+        tibble::column_to_rownames("species")
+
+    # Adjust tax table accordingly
+    tax_table_other2 <- tax_table_other %>%
+        dplyr::distinct(.data$species, .keep_all = TRUE) %>%
+        dplyr::arrange(.data$species)
+
+    rownames(tax_table_other2) <- tax_table_other2$species
+
+    ## Create SE object
+    MAE_out <- create_formatted_MAE(counts_other, tax_table_other2, sam_table)
+    return(MAE_out)
+}
diff --git a/R/plot_heatmap.R b/R/plot_heatmap.R
@@ -333,5 +333,5 @@ plot_heatmap <- function(inputData, annotationData = NULL, plot_title = NULL,
                             column_order = column_order,
                             cluster_columns = TRUE, ...),
     annotation_legend_side = "bottom")
-  return()
+  return(NULL)
 }
diff --git a/R/run_lm_model.R b/R/run_lm_model.R
@@ -30,7 +30,7 @@ test_models_lm <- function(tn, input_df, fixed_cov,
   return(res_out)
 }
 
-#' Compute linear models (LMM) on microbiome data
+#' Compute linear models (LM) on microbiome data
 #'
 #' This function takes an animalcules-formatted \code{MultiAssayExperiment} and
 #' runs an independent linear model for each taxon. The model predicts taxon log

diff --git a/man/create_formatted_MAE.Rd b/man/create_formatted_MAE.Rd
diff --git a/man/filter_animalcules_MAE.Rd b/man/filter_animalcules_MAE.Rd
diff --git a/man/run_lm_model.Rd b/man/run_lm_model.Rd
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
+# * https://testthat.r-lib.org/articles/special-files.html
+
+library(testthat)
+library(LegATo)
+
+test_check("LegATo")