diff --git a/lib/lcdbwf/R/helpers.R b/lib/lcdbwf/R/helpers.R index c57db9d07..1daebae59 100644 --- a/lib/lcdbwf/R/helpers.R +++ b/lib/lcdbwf/R/helpers.R @@ -9,6 +9,7 @@ library(heatmaply) library(readr) library(stringr) library(tibble) +library(limma) library(purrr) library(tidyr) @@ -893,3 +894,36 @@ write.clusterprofiler.results <- function(res, cprof.folder, label){ return(list(orig=filename.orig, split=filename.split)) } + +#' Remove batch effect +#' +#' based on the [removeBatchEffect](rdrr.io/bio/limma/src/R/removeBatchEffect.R) function from limma (Gordan Smyth et al.) +#' @param x Numeric matrix containing log-expression values for a series of samples +#' @param batches Vector of batch factors/vectors +#' @param covariates Matrix or vector of numeric covariates to be adjusted for +#' @param design Design matrix relating to treatment conditions to be preserved, usually the design matrix with all experimental factors other than the batch effects +#' @param ... Other arguments are passed to lmFit +removebatchEffect <- +function(x, batches=c(), covariates=NULL, design=matrix(1,ncol(x),1),...) +{ + if(length(batches) == 0 & is.null(covariates)){ + return(as.matrix(x)) + } + batches_processed <- c() + if(length(batches) != 0){ + for(batch in batches){ + batch <- as.factor(batch) + contrasts(batch) <- contr.sum(levels(batch)) + batch <- model.matrix(~batch)[,-1,drop=FALSE] + batches_processed <- rbind(batches_processed, batch) + } + } + if(!is.null(covariates)) { + covariates <- as.matrix(covariates) + } + X.batch <- cbind(batches_processed, covariates) + fit <- lmFit(x, cbind(design, X.batch),...) + beta <- fit$coefficients[,-(1:ncol(design)),drop=FALSE] + beta[is.na(beta)] <- 0 + as.matrix(x) - beta %*% t(X.batch) +} diff --git a/workflows/rnaseq/downstream/helpers.Rmd b/workflows/rnaseq/downstream/helpers.Rmd index e811fe971..e997ca3df 100644 --- a/workflows/rnaseq/downstream/helpers.Rmd +++ b/workflows/rnaseq/downstream/helpers.Rmd @@ -9,6 +9,7 @@ library(heatmaply) library(readr) library(stringr) library(tibble) +library(limma) library(purrr) library(tidyr) @@ -868,4 +869,38 @@ write.clusterprofiler.results <- function(res, cprof.folder, label){ write.table(res.split, file=filename.split, sep='\t', quote=FALSE, row.names=FALSE) return(list(orig=filename.orig, split=filename.split)) } + + +#' Remove batch effect +#' +#' based on the [removeBatchEffect](rdrr.io/bio/limma/src/R/removeBatchEffect.R) function from limma (Gordan Smyth et al.) +#' @param x Numeric matrix containing log-expression values for a series of samples +#' @param batches Vector of batch factors/vectors +#' @param covariates Matrix or vector of numeric covariates to be adjusted for +#' @param design Design matrix relating to treatment conditions to be preserved, usually the design matrix with all experimental factors other than the batch effects +#' @param ... Other arguments are passed to lmFit +removebatchEffect <- +function(x, batches=c(), covariates=NULL, design=matrix(1,ncol(x),1),...) +{ + if(length(batches) == 0 & is.null(covariates)){ + return(as.matrix(x)) + } + batches_processed <- c() + if(length(batches) != 0){ + for(batch in batches){ + batch <- as.factor(batch) + contrasts(batch) <- contr.sum(levels(batch)) + batch <- model.matrix(~batch)[,-1,drop=FALSE] + batches_processed <- rbind(batches_processed, batch) + } + } + if(!is.null(covariates)) { + covariates <- as.matrix(covariates) + } + X.batch <- cbind(batches_processed, covariates) + fit <- lmFit(x, cbind(design, X.batch),...) + beta <- fit$coefficients[,-(1:ncol(design)),drop=FALSE] + beta[is.na(beta)] <- 0 + as.matrix(x) - beta %*% t(X.batch) +} ``` diff --git a/workflows/rnaseq/downstream/test_batch_effects.Rmd b/workflows/rnaseq/downstream/test_batch_effects.Rmd new file mode 100644 index 000000000..820e5676b --- /dev/null +++ b/workflows/rnaseq/downstream/test_batch_effects.Rmd @@ -0,0 +1,46 @@ +# Test removebatchEffects function + +1. Install `airway` package, which has counts data for an experiment described + at + http://bioconductor.org/packages/release/data/experiment/html/airway.html: + +```{r} +if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") +BiocManager::install("airway") +``` + +2. Load data, show colData + +```{r} +library(airway) +library(DESeq2) +data(airway) +knitr::kable(colData(airway)) +``` + + +3. dds object; run vst; plot PCA: + +```{r} +dds <- DESeqDataSet(airway, design = ~ cell + dex) +rld <- vst(dds, blind=TRUE) +plotPCA(rld, 'cell') +``` + +4. Load helpers + +```{r child='helpers.Rmd', include=FALSE} +rmarkdown::render('helpers.Rmd', run_pandoc=FALSE) +``` + + +5. Attempted usage here...imagine we want to remove the effect of cell type: + +```{r} +coldata <- colData(rld) +no.cell <- removebatchEffect(assay(rld), batches=list(coldata$cell), design=model.matrix(~dex, data=coldata)) +rld.nocell <- rld +assay(rld.nocell) <- no.cell +plotPCA(rld.nocell, 'cell') +```