openproblems-bio · schafferde · Oct 19, 2025 · Oct 19, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,10 @@
 * Added `methods/stacas` new method (PR #58).
     - Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations.
 * Added `method/drvi` component (PR #61).
+
+* Added `method/sca` component.
+    - Add Surprisal Causal Analysis (SCA) for dimensionality reduction
+
 * Added `ARI_batch` and `NMI_batch` to `metrics/clustering_overlap` (PR #68).
 
 * Added `metrics/cilisi` new metric component (PR #57).

diff --git a/src/methods/sca/config.vsh.yaml b/src/methods/sca/config.vsh.yaml
@@ -0,0 +1,45 @@
+__merge__: /src/api/comp_method.yaml
+name: sca
+label: SCA
+summary: "SCA: recovering single-cell heterogeneity through information-based dimensionality reduction"
+description: |
+  Surprisal Component Analysis (SCA) is a dimensionality reduction technique 
+  for single-cell data which leverages mathematical information theory to 
+  identify biologically informative axes of variation in single-cell
+  transcriptomic data, enabling recovery of rare and common cell types 
+references:
+  # DeMeo B & Berger B. 
+  # SCA: recovering single-cell heterogeneity through information-based dimensionality reduction
+  # Genome Biol 24, 195 (2023). https://doi.org/10.1186/s13059-023-02998-7
+  doi: 10.1186/s13059-023-02998-7
+links:
+  repository: https://github.com/bendemeo/shannonca
+  documentation: https://shannonca.readthedocs.io/en/latest/index.html
+info:
+  method_types: [embedding]
+  preferred_normalization: log_cp10k
+arguments:
+  - name: --iters
+    type: integer
+    default: 5
+    description: Number of iterations of SCA.
+  - name: --n_comps
+    type: integer
+    default: 100
+    description: Embedding dimension
+resources:
+  - type: python_script
+    path: script.py
+  - path: /src/utils/read_anndata_partial.py
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    setup:
+      - type: python
+        pypi:
+          - shannonca
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [lowcpu, highmem, midtime]
diff --git a/src/methods/sca/script.py b/src/methods/sca/script.py
@@ -0,0 +1,50 @@
+import sys
+import anndata as ad
+from shannonca.dimred import reduce_scanpy
+
+
+## VIASH START
+par = {
+    "input": "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad",
+    "output": "output.h5ad",
+    "iters": 5,
+    "n_comps": 100
+}
+meta = {
+    "name": "sca",
+}
+## VIASH END
+
+sys.path.append(meta["resources_dir"])
+from read_anndata_partial import read_anndata
+
+print(">> Read input", flush=True)
+adata = read_anndata(
+    par["input"],
+    X='layers/normalized',    
+    obs="obs",
+    var="var",
+    uns="uns"
+)
+
+print(">> Run SCA", flush=True)
+reduce_scanpy(adata, keep_loadings=False, layer=None, key_added='sca', iters=par['iters'], n_comps=par['n_comps'])
+
+
+print("Store output", flush=True)
+output = ad.AnnData(
+    obs=adata.obs[[]],
+    var=adata.var[[]],
+    obsm={
+        "X_emb": adata.obsm['X_sca']
+    },
+    shape=adata.shape,
+    uns={
+        "dataset_id": adata.uns["dataset_id"],
+        "normalization_id": adata.uns["normalization_id"],
+        "method_id": meta["name"],
+    }
+)
+
+print("Write output to file", flush=True)
+output.write_h5ad(par["output"], compression="gzip")
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
@@ -98,6 +98,7 @@ dependencies:
   - name: methods/liger
   - name: methods/mnnpy
   - name: methods/pyliger
+  - name: methods/sca
   - name: methods/scalex
   - name: methods/scanorama
   - name: methods/scanvi

diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
@@ -26,6 +26,7 @@ methods = [
   liger,
   mnnpy,
   pyliger,
+  sca,
   scalex,
   scanorama,
   scanvi,
-Original file line number
+Diff line change
@@ Expand Up / @@ -26,6 +26,7 @@ methods = [ @@
       liger,
       mnnpy,
       pyliger,
+      sca,
       scalex,
       scanorama,
       scanvi,
@@ Expand Down @@