Skip to content

Commit

Permalink
Update for review
Browse files Browse the repository at this point in the history
  • Loading branch information
aubreyodom committed Oct 1, 2024
1 parent 9225cad commit 54cb82d
Show file tree
Hide file tree
Showing 9 changed files with 3,538 additions and 45 deletions.
3,457 changes: 3,457 additions & 0 deletions .test.txt

Large diffs are not rendered by default.

62 changes: 44 additions & 18 deletions R/create_formatted_MAE.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
#' Create a formatted MultiAssayExperiment compatible with LegATo
#'
#' This function takes a counts_dat, tax_dat, or metadata_dat input and creates
#' This function takes either a counts_dat, tax_dat, and metadata_dat input OR
#' a TreeSummarizedExperiment input and creates
#' a specifically-formatted MAE object that is compatible for use with LegATo
#' and animalcules. Checks are performed on inputs to ensure that they can be
#' integrated properly.
#'
#' @param counts_dat A matrix, data.table, or data.frame consisting of microbial
#' raw counts data. The \code{colnames} should be sample names and the
#' \code{rownames} should be in the same order as the \code{tax_dat} entries.
#' Not required if \code{tree_SE} is passed in.
#' @param tax_dat A matrix, data.table, or data.frame of hierarchical taxonomic
#' data. Should have columns such as "family", "genus", "species" with each
#' row uniquely delineating a different taxon. The rows should be in the same
#' order as the rows of \code{counts_dat}.
#' order as the rows of \code{counts_dat}. Not required if \code{tree_SE} is
#' passed in.
#' @param metadata_dat A metadata table with \code{rownames} equivalent to the
#' samples that are the \code{colnames} of the \code{counts_dat}.
#' samples that are the \code{colnames} of the \code{counts_dat}. Not required
#' if \code{tree_SE} is passed in.
#' @param tree_SE A TreeSummarizedExperiment object with counts, taxonomy, and
#' metadata.
#'
#' @export
#' @returns A \code{MultiAssayExperiment} object.
Expand All @@ -35,20 +41,40 @@
#' out_MAE
#'

create_formatted_MAE <- function(counts_dat, tax_dat, metadata_dat) {
# Check that inputs are matrix, data.table, or data.frame?
# Check that the row and column names conform, same dimensions...
# All taxon names should be lowercase
se_mgx <- counts_dat %>% base::data.matrix() %>% S4Vectors::SimpleList() %>%
magrittr::set_names("MGX")
se_rowData <- tax_dat %>% base::data.frame() %>%
dplyr::mutate_all(as.character) %>% S4Vectors::DataFrame()
se_colData <- metadata_dat %>% S4Vectors::DataFrame()

microbe_se <- SummarizedExperiment::SummarizedExperiment(
assays = se_mgx, colData = se_colData, rowData = se_rowData)
MAE_out <- MultiAssayExperiment::MultiAssayExperiment(
experiments = S4Vectors::SimpleList(MicrobeGenetics = microbe_se),
colData = se_colData)
create_formatted_MAE <- function(counts_dat = NULL, tax_dat = NULL, metadata_dat = NULL,
tree_SE = NULL) {
if (class(tree_SE) == "TreeSummarizedExperiment") {
counts_dat <- assays(tree_SE)[[1]]
tax_dat <- rowData(tree_SE)
metadata_dat <- colData(tree_SE) |> as.data.frame()
} else {
if (is.null(counts_dat) | is.null(tax_dat) | is.null(metadata_dat)) {
stop("Please supply counts, taxonomy, and metadata tables.")
}
to_check <- c("matrix", "data.table", "data.frame", "tibble")
if (!(class(counts_dat) %in% to_check)) stop("counts_dat should be one of",
to_check)
if (!(class(metadata_dat) %in% to_check)) stop("metadata_dat should be one of",
to_check)
if (!(class(tax_dat) %in% to_check)) stop("tax_dat should be one of",
to_check)
if(nrow(tax_dat) != nrow(counts_dat)) stop(
"The number of rows of tax_dat and counts_dat should be equal.")
if(nrow(metadata_dat) != ncol(counts_dat)) stop(
"The number of rows of metadata_dat and columns of counts_dat should be equal.")
}

se_mgx <- counts_dat %>% base::data.matrix() %>% S4Vectors::SimpleList() %>%
magrittr::set_names("MGX")
se_rowData <- tax_dat %>% base::data.frame() %>%
dplyr::mutate_all(as.character) %>% S4Vectors::DataFrame()
se_colData <- metadata_dat %>% S4Vectors::DataFrame()

microbe_se <- SummarizedExperiment::SummarizedExperiment(
assays = se_mgx, colData = se_colData, rowData = se_rowData)
MAE_out <- MultiAssayExperiment::MultiAssayExperiment(
experiments = S4Vectors::SimpleList(MicrobeGenetics = microbe_se),
colData = se_colData)

return(MAE_out)
}
9 changes: 4 additions & 5 deletions R/plot_heatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,10 @@ distinctColors <- function(n, hues = c("red", "cyan", "orange", "blue",
v <- seq(from = value.range[2], to = value.range[1], length = num.vs)

## Create all combinations of hues with saturation/value pairs
new.hsv <- c()
for (i in seq_len(num.vs)) {
temp <- rbind(hues.hsv[1, ], s[i], v[i])
new.hsv <- cbind(new.hsv, temp)
}
new.hsv <- dplyr::tibble(V1 = rep(hues.hsv[1, ], num.vs),
V2 = rep(s, each = length(hues)),
V3 = rep(v, each = length(hues))) |>
t() |> as.matrix() |> magrittr::set_rownames(NULL)

## Convert to HEX
col <- grDevices::hsv(new.hsv[1, ], new.hsv[2, ], new.hsv[3, ])
Expand Down
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,5 @@ LegATo requires R Version 4.3.
Install the development version of the package from Github:

```
if (!requireNamespace("devtools", quietly=TRUE))
install.packages("devtools")
devtools::install_github("wejlab/LegATo")
BiocManager::install("wejlab/LegATo")
```
2 changes: 1 addition & 1 deletion inst/script/extdata_explanations.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ output: html_document

This file consists of a MultiAssayExperiment object with raw data published in the following paper:

Odom-Mabey, Aubrey R., et al. "Characterization of longitudinal nasopharyngeal microbiome patterns in maternally HIV-exposed Zambian infants." Gates Open Research 6.143 (2022): 143.
Odom AR, Gill CJ, Pieciak R et al. Characterization of longitudinal nasopharyngeal microbiome patterns in maternally HIV-exposed Zambian infants [version 2; peer review: 2 approved with reservations]. Gates Open Res 2024, 6:143 (https://doi.org/10.12688/gatesopenres.14041.2)

The code to create the package data object is reproduced below.

Expand Down
22 changes: 17 additions & 5 deletions man/create_formatted_MAE.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file removed vignettes/Dolosigranulum_timepoint.png
Binary file not shown.
27 changes: 14 additions & 13 deletions vignettes/LegATo_vignette.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ You should note the following:

## Use `create_formatted_MAE()`

So once your data are formatted correctly, you can pretty easily use `create_formatted_MAE()` like so:
Once your data are formatted correctly, you can easily use `create_formatted_MAE()` like so:
```{R}
output <- create_formatted_MAE(counts_dat = counts,
tax_dat = tax,
Expand All @@ -125,6 +125,8 @@ MultiAssayExperiment::assays(output)
SummarizedExperiment::assays(output[["MicrobeGenetics"]])
```

If your data is in the format of a TreeSummarizedExperiment, you can call `create_formatted_MAE(tree_SE)` to create a MAE output that is compatible with the package.

## Adding information to your metadata later in your analysis

If information needs to be added to your data object at some point in the analysis, it is easiest to manipulate the raw data objects (potentially via `parse_MAE_SE()`) and then recreate the `MAE` object with `create_formatted_MAE()`.
Expand All @@ -133,7 +135,7 @@ If information needs to be added to your data object at some point in the analys

To illustrate the capabilities of LegATo, we will turn to a published dataset from the following paper:

Odom-Mabey AR, Gill CJ, Pieciak R et al. Characterization of longitudinal nasopharyngeal microbiome patterns in maternally HIV-exposed Zambian infants [version 1; peer review: 1 approved with reservations]. Gates Open Res 2022, 6:143 (https://doi.org/10.12688/gatesopenres.14041.1)
Odom AR, Gill CJ, Pieciak R et al. Characterization of longitudinal nasopharyngeal microbiome patterns in maternally HIV-exposed Zambian infants [version 2; peer review: 2 approved with reservations]. Gates Open Res 2024, 6:143 (https://doi.org/10.12688/gatesopenres.14041.2)

The raw dataset is archived in Zenodo:

Expand Down Expand Up @@ -180,7 +182,7 @@ dat_cleaned <- clean_MAE(dat_subsetted)
Many metagenomic pipelines identify taxon abundances at extremely small levels, which can be noisy to deal with in an analysis. The `filter_MAE` function smoothly transforms reads belonging to taxa with an overall genera threshold under the `filter_prop` (filter proportion) argument, which we will set as 0.005.

```{R}
dat_filt <- filter_MAE(dat_cleaned)
dat_filt <- filter_MAE(dat_cleaned, relabu_threshold = 0.05, occur_pct_cutoff = 10)
```

### Parse MAE to extract data
Expand Down Expand Up @@ -255,13 +257,13 @@ plot_alluvial(dat = dat_filt,
We can create spaghetti or volatility plots to elucidate changes over time on a sample level for a given taxon. This is advantageous as other visualization methods are often aggregates of multiple samples and lack granularity. These plots can be created with `plot_spaghetti()`.

```{R, results = "asis", message = FALSE}
p <- plot_spaghetti(dat = dat_filt,
plot_spaghetti(dat = dat_filt,
covariate_time = "timepoint",
covariate_1 = "HIVStatus",
unit_var = "Subject",
taxon_level = "genus",
which_taxon = "Staphylococcus",
palette_input= this_palette,
palette_input = this_palette,
title = "Spaghetti Plot",
subtitle = NULL) +
ggplot2::xlab("Infant Age (Days)") +
Expand Down Expand Up @@ -390,9 +392,8 @@ head(output) |> knitr::kable(caption = "GEE Outputs")

You can also create plots of the covariates, which will be saved to a folder specified by the user:

```{R}
tempfolder <- tempfile()
dir.create(tempfolder)
```{R, out.width = "50%", fig.align = "center", echo = FALSE}
tempfolder <- tempdir()
# Trying out plotting
output <- run_gee_model(dat_1, unit_var = "Subject",
Expand All @@ -404,12 +405,12 @@ output <- run_gee_model(dat_1, unit_var = "Subject",
plot_terms = "timepoint",
width = 6, height = 4, units = "in", scale = 0.7)
unlink(tempfolder, recursive = TRUE)
```
list.files(tempfolder)
knitr::include_graphics(file.path(tempfolder, "Dolosigranulum_timepoint.png"))
knitr::include_graphics(file.path(tempfolder, "Streptococcus_timepoint.png"))
```{R, out.width = "50%", fig.align = "center", echo = FALSE}
knitr::include_graphics("Dolosigranulum_timepoint.png")
knitr::include_graphics("Streptococcus_timepoint.png")
unlink(tempfolder, recursive = TRUE)
```

### Linear Mixed Models
Expand Down
Binary file removed vignettes/Streptococcus_timepoint.png
Binary file not shown.

0 comments on commit 54cb82d

Please sign in to comment.