Skip to content

Commit

Permalink
Add unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
aubreyodom committed Oct 3, 2024
1 parent 54cb82d commit 01931ee
Show file tree
Hide file tree
Showing 12 changed files with 510 additions and 28 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,14 @@ Suggests:
rmarkdown,
spelling,
TBSignatureProfiler,
testthat (>= 3.0.0),
usethis,
vegan
VignetteBuilder:
knitr
BiocType: Software
biocViews: MicrobiomeData, ReproducibleResearch, SequencingData
Config/testthat/edition: 3
Encoding: UTF-8
Language: en-US
LazyData: FALSE
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(clean_MAE)
export(create_formatted_MAE)
export(distinctColors)
export(filter_MAE)
export(filter_animalcules_MAE)
export(get_long_data)
export(get_stacked_data)
export(get_summary_table)
Expand Down
21 changes: 20 additions & 1 deletion R/NMIT.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,24 @@
return(MAE)
}

# Helper function for if standard deviation is 0 during stats::cor
safe_cor <- function(x, y = NULL, method = "pearson") {
df <- x[, colSums(x) != 0]
tryCatch({
# Try to compute the correlation
result <- cor(x = df, y, method = method)
return(result)
}, error = function(e) {
# If an error occurs (like zero standard deviation), return NA or a custom message
if (grepl("the standard deviation is zero", e$message)) {
return(NA)
} else {
# Return the error message if it's another issue
stop(e)
}
})
}

#' Calculate within-subject OTU correlations
#'
#' This function takes a \code{MultiAssayExperiment} and outputs an array of
Expand Down Expand Up @@ -49,7 +67,8 @@ tscor <- function(dat, unit_var, method = "kendall", fill_na = 0) {
otu_list <- split(otus, sub_split)

# Pairwise correlations for each OTU sub-table
cor_list <- lapply(otu_list, stats::cor, method = method)
cor_list <- lapply(otu_list, safe_cor, method = method)

names.cor <- names(cor_list)
names.otu <- colnames(otus)

Expand Down
42 changes: 28 additions & 14 deletions R/create_formatted_MAE.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,42 @@
#' @returns A \code{MultiAssayExperiment} object.
#' @examples
#' nsample <- ntaxa <- 3
#' counts_dat <- data.frame("X123" = runif(ntaxa, 0, 500),
#' "X456" = runif(ntaxa, 0, 500),
#' "X789" = runif(ntaxa, 0, 500))
#' tax_dat <- data.frame("class" = c("rand1", "rand2", "rand3"),
#' "species" = c("rand4", "rand5", "rand6")) |>
#' as.data.frame()
#' counts_dat <- data.frame(
#' "X123" = runif(ntaxa, 0, 500),
#' "X456" = runif(ntaxa, 0, 500),
#' "X789" = runif(ntaxa, 0, 500)
#' )
#' tax_dat <- data.frame(
#' "class" = c("rand1", "rand2", "rand3"),
#' "species" = c("rand4", "rand5", "rand6")
#' ) |>
#' as.data.frame()
#' # Set rownames as lowest unique taxonomic level
#' rownames(tax_dat) <- tax_dat$species
#' metadata <- data.frame(Sample = c("X123", "X456", "X789"),
#' Group = c("A", "B", "A"),
#' Var = rnorm(nsample))
#' rownames(counts_dat) <- tax_dat$species
#' metadata <- data.frame(
#' Sample = c("X123", "X456", "X789"),
#' Group = c("A", "B", "A"),
#' Var = rnorm(nsample)
#' )
#' rownames(metadata) <- metadata$Sample
#' out_MAE <- create_formatted_MAE(counts_dat, tax_dat, metadata)
#' out_MAE
#'
#'
#' # TreeSummarizedExperiment
#' tse <- TreeSummarizedExperiment::TreeSummarizedExperiment(
#' assays = list(counts = counts_dat),
#' colData = metadata,
#' rowData = tax_dat
#' )
#' out_MAE_2 <- create_formatted_MAE(tree_SE = tse)
#' out_MAE_2

create_formatted_MAE <- function(counts_dat = NULL, tax_dat = NULL, metadata_dat = NULL,
tree_SE = NULL) {
if (class(tree_SE) == "TreeSummarizedExperiment") {
counts_dat <- assays(tree_SE)[[1]]
tax_dat <- rowData(tree_SE)
metadata_dat <- colData(tree_SE) |> as.data.frame()
counts_dat <- SummarizedExperiment::assays(tree_SE)[[1]]
tax_dat <-SummarizedExperiment::rowData(tree_SE)
metadata_dat <- SummarizedExperiment::colData(tree_SE) |> as.data.frame()
} else {
if (is.null(counts_dat) | is.null(tax_dat) | is.null(metadata_dat)) {
stop("Please supply counts, taxonomy, and metadata tables.")
Expand Down
83 changes: 83 additions & 0 deletions R/filter_animalcules_MAE.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
utils::globalVariables(".")

#' Filter a MultiAssayExperiment to a top percentage of taxa and label the rest
#' as "Other"
#'
#' This function takes an animalcules-formatted \code{MultiAssayExperiment}
#' (MAE) object and identifies all taxa at the "genus" level that represent
#' <\code{filter_prop} average relative abundance across all samples in the MAE.
#' After identification at the genus level, taxa across the genus and species
#' levels are then consolidated into the category "Other".
#'
#' @inheritParams plot_stacked_bar
#' @param filter_prop A double strictly between 0 and 1, representing
#' the proportion of relative abundance at which to filter. Default is 0.001.
#'
#' @returns An animalcules-formatted \code{MultiAssayExperiment} object with
#' appropriate filtration.
#'
#' @export
#' @importFrom rlang .data
#'
#' @examples
#' in_dat <- system.file("extdata/MAE_small.RDS", package = "LegATo") |> readRDS()
#' filter_animalcules_MAE(in_dat, 0.01)
#'

filter_animalcules_MAE <- function(dat, filter_prop = 0.001) {
if(filter_prop <= 0 | filter_prop >= 1) stop("filter_prop must be between 0 and 1.")
# Extract metadata, taxonomic info, and counts
parsed <- parse_MAE_SE(dat, which_assay = "MicrobeGenetics", type = "MAE")
tax_table <- parsed$tax
sam_table <- parsed$sam
counts_table <- parsed$counts

all_relabu_genus <- counts_table %>%
as.matrix() %>%
# Get rel abu within samples
prop.table(margin = 2) %>%
tibble::as_tibble() %>%
dplyr::bind_cols("genus" = tax_table$genus) %>%
dplyr::relocate("genus") %>%
dplyr::group_by(.data$genus) %>%
# Sum rel abu within samples/columns for genera
dplyr::summarise(dplyr::across(.fns = sum, .cols = dplyr::everything())) %>%
# Sum everything but the first columm ("genus")
dplyr::mutate("allmeans" = rowMeans(dplyr::select(., -1))) %>%
dplyr::select("genus", "allmeans") %>%
dplyr::mutate("genus" = replace(.data$genus, is.na(.data$genus), "Unknown")) %>%
dplyr::arrange(dplyr::desc(.data$allmeans)) %>%
dplyr::mutate("lessthanXpct" = .data$allmeans < filter_prop)

# Identify species in other
othergenera <- all_relabu_genus %>%
dplyr::filter(.data$lessthanXpct == TRUE) %>%
dplyr::select("genus") %>% unlist() %>% unname()

## Use the identified species to update the tax, species tables

# Replace tax table
tax_table_other <- tax_table %>%
dplyr::mutate("genus" = replace(.data$genus, is.na(.data$genus), "Unknown")) %>%
dplyr::mutate(dplyr::across(c(.data$genus, .data$species),
function(x) replace(x, .data$genus %in% othergenera, "Other")))

# Resum the counts table
counts_other <- counts_table %>%
dplyr::bind_cols("species" = tax_table_other$species) %>%
dplyr::group_by(.data$species) %>%
dplyr::summarise(dplyr::across(.cols = dplyr::all_of(colnames(counts_table)),
.fns = sum)) %>%
tibble::column_to_rownames("species")

# Adjust tax table accordingly
tax_table_other2 <- tax_table_other %>%
dplyr::distinct(.data$species, .keep_all = TRUE) %>%
dplyr::arrange(.data$species)

rownames(tax_table_other2) <- tax_table_other2$species

## Create SE object
MAE_out <- create_formatted_MAE(counts_other, tax_table_other2, sam_table)
return(MAE_out)
}
2 changes: 1 addition & 1 deletion R/plot_heatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -333,5 +333,5 @@ plot_heatmap <- function(inputData, annotationData = NULL, plot_title = NULL,
column_order = column_order,
cluster_columns = TRUE, ...),
annotation_legend_side = "bottom")
return()
return(NULL)
}
2 changes: 1 addition & 1 deletion R/run_lm_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ test_models_lm <- function(tn, input_df, fixed_cov,
return(res_out)
}

#' Compute linear models (LMM) on microbiome data
#' Compute linear models (LM) on microbiome data
#'
#' This function takes an animalcules-formatted \code{MultiAssayExperiment} and
#' runs an independent linear model for each taxon. The model predicts taxon log
Expand Down
34 changes: 24 additions & 10 deletions man/create_formatted_MAE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions man/filter_animalcules_MAE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/run_lm_model.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# This file is part of the standard setup for testthat.
# It is recommended that you do not modify it.
#
# Where should you do additional test configuration?
# Learn more about the roles of various files in:
# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
# * https://testthat.r-lib.org/articles/special-files.html

library(testthat)
library(LegATo)

test_check("LegATo")
Loading

0 comments on commit 01931ee

Please sign in to comment.