openproblems-bio · rcannood · Aug 20, 2025 · Aug 20, 2025 · Sep 25, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@
 * Added `methods/stacas` new method (PR #58).
     - Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations.
 * Added `method/drvi` component (PR #61).
+
 * Added `ARI_batch` and `NMI_batch` to `metrics/clustering_overlap` (PR #68).
 
 * Added `metrics/cilisi` new metric component (PR #57).
@@ -14,6 +15,8 @@
         overcorrected datasets with removed cell type signals.
         We propose adding this metric to substitute iLISI.
 
+* Added `method/limma_removebatcheffect` component (PR #79).
+
 ## Minor changes
 
 * Un-pin the scPRINT version and update parameters (PR #51)

diff --git a/src/methods/limma_removebatcheffect/config.vsh.yaml b/src/methods/limma_removebatcheffect/config.vsh.yaml
@@ -0,0 +1,37 @@
+__merge__: /src/api/comp_method.yaml
+name: limma_removebatcheffect
+label: limma removeBatchEffect
+summary: Classical linear model-based batch correction from the limma package
+description: |
+  The removeBatchEffect function from the limma package performs linear model-based batch correction. 
+  It fits a linear model to remove batch effects while preserving the biological effects of interest.
+  This is a classical approach that works at the feature level to directly correct the expression values.
+
+  The method fits a linear model with batch as a covariate and removes the batch effects from the data 
+  while preserving biological variation. It is particularly useful for microarray and bulk RNA-seq data,
+  and has been adapted for single-cell RNA-seq applications.
+references:
+  # Ritchie ME, Phipson B, Wu D, Hu Y, Law CW, Shi W, Smyth GK.
+  # limma powers differential expression analyses for RNA-sequencing and microarray studies.
+  # Nucleic Acids Res. 2015;43(7):e47.
+  doi: 10.1093/nar/gkv007
+links:
+  repository: https://bioconductor.org/packages/limma/
+  documentation: https://bioconductor.org/packages/limma/
+info:
+  method_types: [feature]
+  preferred_normalization: log_cp10k
+resources:
+  - type: r_script
+    path: script.R
+engines:
+  - type: docker
+    image: openproblems/base_r:1
+    setup:
+      - type: r
+        bioc: limma
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [lowcpu, midmem, midtime]
diff --git a/src/methods/limma_removebatcheffect/script.R b/src/methods/limma_removebatcheffect/script.R
@@ -0,0 +1,65 @@
+cat("Loading dependencies\n")
+suppressPackageStartupMessages({
+  requireNamespace("anndata", quietly = TRUE)
+  library(Matrix, warn.conflicts = FALSE)
+  library(limma, warn.conflicts = FALSE)
+})
+
+## VIASH START
+par <- list(
+  input = 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
+  output = 'output.h5ad'
+)
+meta <- list(
+  name = "limma_removebatcheffect"
+)
+## VIASH END
+
+cat("Read input\n")
+adata <- anndata::read_h5ad(par$input)
+
+cat("Extract data and metadata\n")
+# Extract normalized data
+expr_data <- t(adata$layers[["normalized"]])
+batch_info <- adata$obs[["batch"]]
+obs <- adata$obs
+var <- adata$var
+
+# Convert to dgCMatrix if needed
+if (inherits(expr_data, "dgRMatrix")) {
+  dense_temp <- as.matrix(expr_data)
+  expr_data <- as(dense_temp, "dgCMatrix")
+}
+
+cat("Apply limma removeBatchEffect\n")
+# Create design matrix (intercept only, as we want to preserve all biological variation)
+design <- matrix(1, nrow = ncol(expr_data), ncol = 1)
+colnames(design) <- "Intercept"
+rownames(design) <- colnames(expr_data)
+
+# Apply batch correction using limma's removeBatchEffect
+corrected_data <- limma::removeBatchEffect(
+  x = expr_data,
+  batch = batch_info,
+  design = design
+)
+
+cat("Prepare output\n")
+# Create output AnnData object with corrected feature matrix
+output <- anndata::AnnData(
+  obs = obs[, c()],
+  var = var[, c()],
+  layers = list(
+    corrected_counts = t(corrected_data)
+  ),
+  uns = list(
+    dataset_id = adata$uns[["dataset_id"]],
+    normalization_id = adata$uns[["normalization_id"]],
+    method_id = meta$name
+  )
+)
+
+cat("Write output to file\n")
+zzz <- output$write_h5ad(par$output, compression = "gzip")
+
+cat("Finished\n")