diff --git a/CHANGELOG.md b/CHANGELOG.md index 890c4eb7..049e2db9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ * Added `methods/stacas` new method (PR #58). - Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations. * Added `method/drvi` component (PR #61). + +* Added `method/sca` component. + - Add Surprisal Causal Analysis (SCA) for dimensionality reduction + * Added `ARI_batch` and `NMI_batch` to `metrics/clustering_overlap` (PR #68). * Added `metrics/cilisi` new metric component (PR #57). diff --git a/src/methods/sca/config.vsh.yaml b/src/methods/sca/config.vsh.yaml new file mode 100644 index 00000000..530682f1 --- /dev/null +++ b/src/methods/sca/config.vsh.yaml @@ -0,0 +1,45 @@ +__merge__: /src/api/comp_method.yaml +name: sca +label: SCA +summary: "SCA: recovering single-cell heterogeneity through information-based dimensionality reduction" +description: | + Surprisal Component Analysis (SCA) is a dimensionality reduction technique + for single-cell data which leverages mathematical information theory to + identify biologically informative axes of variation in single-cell + transcriptomic data, enabling recovery of rare and common cell types +references: + # DeMeo B & Berger B. + # SCA: recovering single-cell heterogeneity through information-based dimensionality reduction + # Genome Biol 24, 195 (2023). https://doi.org/10.1186/s13059-023-02998-7 + doi: 10.1186/s13059-023-02998-7 +links: + repository: https://github.com/bendemeo/shannonca + documentation: https://shannonca.readthedocs.io/en/latest/index.html +info: + method_types: [embedding] + preferred_normalization: log_cp10k +arguments: + - name: --iters + type: integer + default: 5 + description: Number of iterations of SCA. + - name: --n_comps + type: integer + default: 100 + description: Embedding dimension +resources: + - type: python_script + path: script.py + - path: /src/utils/read_anndata_partial.py +engines: + - type: docker + image: openproblems/base_python:1 + setup: + - type: python + pypi: + - shannonca +runners: + - type: executable + - type: nextflow + directives: + label: [lowcpu, highmem, midtime] diff --git a/src/methods/sca/script.py b/src/methods/sca/script.py new file mode 100644 index 00000000..0a814f11 --- /dev/null +++ b/src/methods/sca/script.py @@ -0,0 +1,50 @@ +import sys +import anndata as ad +from shannonca.dimred import reduce_scanpy + + +## VIASH START +par = { + "input": "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad", + "output": "output.h5ad", + "iters": 5, + "n_comps": 100 +} +meta = { + "name": "sca", +} +## VIASH END + +sys.path.append(meta["resources_dir"]) +from read_anndata_partial import read_anndata + +print(">> Read input", flush=True) +adata = read_anndata( + par["input"], + X='layers/normalized', + obs="obs", + var="var", + uns="uns" +) + +print(">> Run SCA", flush=True) +reduce_scanpy(adata, keep_loadings=False, layer=None, key_added='sca', iters=par['iters'], n_comps=par['n_comps']) + + +print("Store output", flush=True) +output = ad.AnnData( + obs=adata.obs[[]], + var=adata.var[[]], + obsm={ + "X_emb": adata.obsm['X_sca'] + }, + shape=adata.shape, + uns={ + "dataset_id": adata.uns["dataset_id"], + "normalization_id": adata.uns["normalization_id"], + "method_id": meta["name"], + } +) + +print("Write output to file", flush=True) +output.write_h5ad(par["output"], compression="gzip") diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml index 09905ad0..946d3f74 100644 --- a/src/workflows/run_benchmark/config.vsh.yaml +++ b/src/workflows/run_benchmark/config.vsh.yaml @@ -98,6 +98,7 @@ dependencies: - name: methods/liger - name: methods/mnnpy - name: methods/pyliger + - name: methods/sca - name: methods/scalex - name: methods/scanorama - name: methods/scanvi diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf index 6196f749..bbc6442d 100644 --- a/src/workflows/run_benchmark/main.nf +++ b/src/workflows/run_benchmark/main.nf @@ -26,6 +26,7 @@ methods = [ liger, mnnpy, pyliger, + sca, scalex, scanorama, scanvi,