diff --git a/CHANGELOG.md b/CHANGELOG.md index 890c4eb7..f9165618 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * Added `methods/stacas` new method (PR #58). - Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations. * Added `method/drvi` component (PR #61). + * Added `ARI_batch` and `NMI_batch` to `metrics/clustering_overlap` (PR #68). * Added `metrics/cilisi` new metric component (PR #57). @@ -14,6 +15,8 @@ overcorrected datasets with removed cell type signals. We propose adding this metric to substitute iLISI. +* Added `method/limma_removebatcheffect` component (PR #79). + ## Minor changes * Un-pin the scPRINT version and update parameters (PR #51) diff --git a/src/methods/limma_removebatcheffect/config.vsh.yaml b/src/methods/limma_removebatcheffect/config.vsh.yaml new file mode 100644 index 00000000..f679da7b --- /dev/null +++ b/src/methods/limma_removebatcheffect/config.vsh.yaml @@ -0,0 +1,37 @@ +__merge__: /src/api/comp_method.yaml +name: limma_removebatcheffect +label: limma removeBatchEffect +summary: Classical linear model-based batch correction from the limma package +description: | + The removeBatchEffect function from the limma package performs linear model-based batch correction. + It fits a linear model to remove batch effects while preserving the biological effects of interest. + This is a classical approach that works at the feature level to directly correct the expression values. + + The method fits a linear model with batch as a covariate and removes the batch effects from the data + while preserving biological variation. It is particularly useful for microarray and bulk RNA-seq data, + and has been adapted for single-cell RNA-seq applications. +references: + # Ritchie ME, Phipson B, Wu D, Hu Y, Law CW, Shi W, Smyth GK. + # limma powers differential expression analyses for RNA-sequencing and microarray studies. + # Nucleic Acids Res. 2015;43(7):e47. + doi: 10.1093/nar/gkv007 +links: + repository: https://bioconductor.org/packages/limma/ + documentation: https://bioconductor.org/packages/limma/ +info: + method_types: [feature] + preferred_normalization: log_cp10k +resources: + - type: r_script + path: script.R +engines: + - type: docker + image: openproblems/base_r:1 + setup: + - type: r + bioc: limma +runners: + - type: executable + - type: nextflow + directives: + label: [lowcpu, midmem, midtime] diff --git a/src/methods/limma_removebatcheffect/script.R b/src/methods/limma_removebatcheffect/script.R new file mode 100644 index 00000000..7f799cde --- /dev/null +++ b/src/methods/limma_removebatcheffect/script.R @@ -0,0 +1,65 @@ +cat("Loading dependencies\n") +suppressPackageStartupMessages({ + requireNamespace("anndata", quietly = TRUE) + library(Matrix, warn.conflicts = FALSE) + library(limma, warn.conflicts = FALSE) +}) + +## VIASH START +par <- list( + input = 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad', + output = 'output.h5ad' +) +meta <- list( + name = "limma_removebatcheffect" +) +## VIASH END + +cat("Read input\n") +adata <- anndata::read_h5ad(par$input) + +cat("Extract data and metadata\n") +# Extract normalized data +expr_data <- t(adata$layers[["normalized"]]) +batch_info <- adata$obs[["batch"]] +obs <- adata$obs +var <- adata$var + +# Convert to dgCMatrix if needed +if (inherits(expr_data, "dgRMatrix")) { + dense_temp <- as.matrix(expr_data) + expr_data <- as(dense_temp, "dgCMatrix") +} + +cat("Apply limma removeBatchEffect\n") +# Create design matrix (intercept only, as we want to preserve all biological variation) +design <- matrix(1, nrow = ncol(expr_data), ncol = 1) +colnames(design) <- "Intercept" +rownames(design) <- colnames(expr_data) + +# Apply batch correction using limma's removeBatchEffect +corrected_data <- limma::removeBatchEffect( + x = expr_data, + batch = batch_info, + design = design +) + +cat("Prepare output\n") +# Create output AnnData object with corrected feature matrix +output <- anndata::AnnData( + obs = obs[, c()], + var = var[, c()], + layers = list( + corrected_counts = t(corrected_data) + ), + uns = list( + dataset_id = adata$uns[["dataset_id"]], + normalization_id = adata$uns[["normalization_id"]], + method_id = meta$name + ) +) + +cat("Write output to file\n") +zzz <- output$write_h5ad(par$output, compression = "gzip") + +cat("Finished\n")