From 31bb9fb6e39d2a6b004b29e9cabb598dfe117dce Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 20 Aug 2025 17:19:31 +0200 Subject: [PATCH 1/2] add limma --- .../limma_removebatcheffect/config.vsh.yaml | 37 +++++++++++ src/methods/limma_removebatcheffect/script.R | 65 +++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 src/methods/limma_removebatcheffect/config.vsh.yaml create mode 100644 src/methods/limma_removebatcheffect/script.R diff --git a/src/methods/limma_removebatcheffect/config.vsh.yaml b/src/methods/limma_removebatcheffect/config.vsh.yaml new file mode 100644 index 00000000..f679da7b --- /dev/null +++ b/src/methods/limma_removebatcheffect/config.vsh.yaml @@ -0,0 +1,37 @@ +__merge__: /src/api/comp_method.yaml +name: limma_removebatcheffect +label: limma removeBatchEffect +summary: Classical linear model-based batch correction from the limma package +description: | + The removeBatchEffect function from the limma package performs linear model-based batch correction. + It fits a linear model to remove batch effects while preserving the biological effects of interest. + This is a classical approach that works at the feature level to directly correct the expression values. + + The method fits a linear model with batch as a covariate and removes the batch effects from the data + while preserving biological variation. It is particularly useful for microarray and bulk RNA-seq data, + and has been adapted for single-cell RNA-seq applications. +references: + # Ritchie ME, Phipson B, Wu D, Hu Y, Law CW, Shi W, Smyth GK. + # limma powers differential expression analyses for RNA-sequencing and microarray studies. + # Nucleic Acids Res. 2015;43(7):e47. + doi: 10.1093/nar/gkv007 +links: + repository: https://bioconductor.org/packages/limma/ + documentation: https://bioconductor.org/packages/limma/ +info: + method_types: [feature] + preferred_normalization: log_cp10k +resources: + - type: r_script + path: script.R +engines: + - type: docker + image: openproblems/base_r:1 + setup: + - type: r + bioc: limma +runners: + - type: executable + - type: nextflow + directives: + label: [lowcpu, midmem, midtime] diff --git a/src/methods/limma_removebatcheffect/script.R b/src/methods/limma_removebatcheffect/script.R new file mode 100644 index 00000000..7f799cde --- /dev/null +++ b/src/methods/limma_removebatcheffect/script.R @@ -0,0 +1,65 @@ +cat("Loading dependencies\n") +suppressPackageStartupMessages({ + requireNamespace("anndata", quietly = TRUE) + library(Matrix, warn.conflicts = FALSE) + library(limma, warn.conflicts = FALSE) +}) + +## VIASH START +par <- list( + input = 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad', + output = 'output.h5ad' +) +meta <- list( + name = "limma_removebatcheffect" +) +## VIASH END + +cat("Read input\n") +adata <- anndata::read_h5ad(par$input) + +cat("Extract data and metadata\n") +# Extract normalized data +expr_data <- t(adata$layers[["normalized"]]) +batch_info <- adata$obs[["batch"]] +obs <- adata$obs +var <- adata$var + +# Convert to dgCMatrix if needed +if (inherits(expr_data, "dgRMatrix")) { + dense_temp <- as.matrix(expr_data) + expr_data <- as(dense_temp, "dgCMatrix") +} + +cat("Apply limma removeBatchEffect\n") +# Create design matrix (intercept only, as we want to preserve all biological variation) +design <- matrix(1, nrow = ncol(expr_data), ncol = 1) +colnames(design) <- "Intercept" +rownames(design) <- colnames(expr_data) + +# Apply batch correction using limma's removeBatchEffect +corrected_data <- limma::removeBatchEffect( + x = expr_data, + batch = batch_info, + design = design +) + +cat("Prepare output\n") +# Create output AnnData object with corrected feature matrix +output <- anndata::AnnData( + obs = obs[, c()], + var = var[, c()], + layers = list( + corrected_counts = t(corrected_data) + ), + uns = list( + dataset_id = adata$uns[["dataset_id"]], + normalization_id = adata$uns[["normalization_id"]], + method_id = meta$name + ) +) + +cat("Write output to file\n") +zzz <- output$write_h5ad(par$output, compression = "gzip") + +cat("Finished\n") From 54d6200aab64336198a040b9e5d20554c2e9cf17 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 20 Aug 2025 17:20:14 +0200 Subject: [PATCH 2/2] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 041bd729..25e0b706 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ * Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52). * Added `method/drvi` component (PR #61). +* Added `method/limma_removebatcheffect` component (PR #79). ## Minor changes