openproblems-bio · schafferde · Oct 19, 2025 · Oct 19, 2025 · Oct 19, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,6 +24,12 @@
 * Update scPRINT to use latest stable version (PR #70)
 * Fix kbet dependencies to numpy<2 and scipy<=1.13 (PR #78).
 
+* Split Scanorama into two methods/scores
+    - Split Scanorama into embedding (integrate) and count-correction (correct) modes, instead of running both together. 
+        This makes clear what the reported score(s) are describing, and also corrects the misleadingly low score that 
+        the combined method receives. The scores for each component  are in line with their scores from v1, where the modes 
+        were separated.  
+
 # task_batch_integration 2.0.0
 
 A major update to the OpenProblems framework, switching from a Python-based framework to a Viash + Nextflow-based framework. This update features the same concepts as the previous version, but with a new implementation that is more flexible, scalable, and maintainable.

diff --git a/src/methods/scanorama/config.vsh.yaml → ...methods/scanorama_correct/config.vsh.yaml b/src/methods/scanorama/config.vsh.yaml → ...methods/scanorama_correct/config.vsh.yaml
@@ -1,6 +1,6 @@
 __merge__: /src/api/comp_method.yaml
-name: scanorama
-label: Scanorama
+name: scanorama_correct
+label: Scanorama-Corrrect
 summary: Efficient integration of heterogeneous single-cell transcriptomes using Scanorama
 description: |
   Scanorama enables batch-correction and integration of heterogeneous scRNA-seq datasets.
@@ -17,7 +17,7 @@ links:
   repository: https://github.com/brianhie/scanorama
   documentation: https://github.com/brianhie/scanorama#readme
 info:
-  method_types: [feature, embedding]
+  method_types: [feature]
   preferred_normalization: log_cp10k
 resources:
   - type: python_script

diff --git a/src/methods/scanorama/script.py → src/methods/scanorama_correct/script.py b/src/methods/scanorama/script.py → src/methods/scanorama_correct/script.py
@@ -8,8 +8,7 @@
     'output': 'output.h5ad',
 }
 meta = {
-    'name': 'foo',
-    'config': 'bar'
+    'name': 'scanorama-correct',
 }
 ## VIASH END
 
@@ -57,7 +56,7 @@ def merge_adata(*adata_list, **kwargs):
 batch_categories = adata.obs['batch'].cat.categories
 for i in batch_categories:
     split.append(adata[adata.obs['batch'] == i].copy())
-corrected = scanorama.correct_scanpy(split, return_dimred=True)
+corrected = scanorama.correct_scanpy(split, return_dimred=False)
 corrected = merge_adata(*corrected, batch_key='batch', batch_categories=batch_categories, index_unique=None)
 
 print("Store output", flush=True)
@@ -71,9 +70,6 @@ def merge_adata(*adata_list, **kwargs):
     },
     layers={
         'corrected_counts': corrected.X,
-    },
-    obsm={
-        'X_emb': corrected.obsm["X_scanorama"],
     }
 )
 

diff --git a/src/methods/scanorama_integrate/config.vsh.yaml b/src/methods/scanorama_integrate/config.vsh.yaml
@@ -0,0 +1,42 @@
+__merge__: /src/api/comp_method.yaml
+name: scanorama_integrate
+label: Scanorama-Integrate
+summary: Efficient integration of heterogeneous single-cell transcriptomes using Scanorama
+description: |
+  Scanorama enables batch-correction and integration of heterogeneous scRNA-seq datasets.
+  It is designed to be used in scRNA-seq pipelines downstream of noise-reduction methods,
+  including those for imputation and highly-variable gene filtering. The results from
+  Scanorama integration and batch correction can then be used as input to other tools
+  for scRNA-seq clustering, visualization, and analysis.
+references:
+  # Hie, B., Bryson, B. & Berger, B. Efficient integration of heterogeneous single-cell
+  # transcriptomes using Scanorama. Nat Biotechnol 37, 685–691 (2019).
+  # https://doi.org/10.1038/s41587-019-0113-3
+  doi: 10.1038/s41587-019-0113-3
+links:
+  repository: https://github.com/brianhie/scanorama
+  documentation: https://github.com/brianhie/scanorama#readme
+info:
+  method_types: [embedding]
+  preferred_normalization: log_cp10k
+arguments:
+  - name: --dimred
+    type: integer
+    default: 100
+    description: Embedding dimension
+resources:
+  - type: python_script
+    path: script.py
+  - path: /src/utils/read_anndata_partial.py
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    setup:
+      - type: python
+        pypi:
+          - scanorama
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [hightime, highmem, lowcpu]
diff --git a/src/methods/scanorama_integrate/script.py b/src/methods/scanorama_integrate/script.py
@@ -0,0 +1,59 @@
+import sys
+import anndata as ad
+import scanorama
+import numpy as np
+
+## VIASH START
+par = {
+    'input': 'resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad',
+    'output': 'output.h5ad',
+    'dimred': 100
+}
+meta = {
+    'name': 'scanorama-integrate',
+}
+## VIASH END
+
+sys.path.append(meta["resources_dir"])
+from read_anndata_partial import read_anndata
+
+
+print('Read input', flush=True)
+adata = read_anndata(
+    par['input'],
+    X='layers/normalized',
+    obs='obs',
+    var='var',
+    uns='uns'
+)
+
+print('Run scanorama', flush=True)
+split = []
+batch_categories = adata.obs['batch'].cat.categories
+for b in batch_categories:
+    split.append(adata[adata.obs['batch'] == b].copy())
+scanorama.integrate_scanpy(split, dimred=par["dimred"])
+
+#From https://colab.research.google.com/drive/1CebA3Ow4jXITK0dW5el320KVTX_szhxG
+result = np.zeros((adata.shape[0], split[0].obsm["X_scanorama"].shape[1]))
+for i, b in enumerate(batch_categories):
+    result[adata.obs['batch'] == b] = split[i].obsm["X_scanorama"]
+
+
+print("Store output", flush=True)
+output = ad.AnnData(
+    obs=adata.obs[[]],
+    var=adata.var[[]],
+    uns={
+        'dataset_id': adata.uns['dataset_id'],
+        'normalization_id': adata.uns['normalization_id'],
+        'method_id': meta['name'],
+    },
+    obsm={
+        'X_emb': result
+    },
+    shape=adata.shape,
+)
+
+print("Write output to file", flush=True)
+output.write(par['output'], compression='gzip')
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
@@ -99,7 +99,8 @@ dependencies:
   - name: methods/mnnpy
   - name: methods/pyliger
   - name: methods/scalex
-  - name: methods/scanorama
+  - name: methods/scanorama_correct
+  - name: methods/scanorama_integrate
   - name: methods/scanvi
   - name: methods/scgpt_finetuned
   - name: methods/scgpt_zeroshot

diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
@@ -27,7 +27,8 @@ methods = [
   mnnpy,
   pyliger,
   scalex,
-  scanorama,
+  scanorama_correct,
+  scanorama_integrate,
   scanvi,
   scgpt_finetuned.run(
     args: [model: file("s3://openproblems-work/cache/scGPT_human.zip")]