openproblems-bio · lazappi · Jul 8, 2025 · Jul 8, 2025 · Jul 8, 2025 · Jul 8, 2025
diff --git a/scripts/run_benchmark/run_full_local.sh b/scripts/run_benchmark/run_full_local.sh
@@ -26,7 +26,7 @@ input_states: resources/datasets/**/state.yaml
 rename_keys: 'input_dataset:output_dataset;input_solution:output_solution'
 output_state: "state.yaml"
 publish_dir: "$publish_dir"
-settings: '{"methods_exclude": ["uce", "scgpt_finetuned"]}'
+settings: '{"methods_exclude": ["uce", "scgpt_finetuned", "transcriptformer_mlflow"]}'
 HERE
 
 # run the benchmark

diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh
@@ -21,7 +21,7 @@ input_states: resources_test/task_batch_integration/**/state.yaml
 rename_keys: 'input_dataset:output_dataset;input_solution:output_solution'
 output_state: "state.yaml"
 publish_dir: "$publish_dir"
-settings: '{"methods_exclude": ["uce", "scgpt_finetuned"]}'
+settings: '{"methods_exclude": ["uce", "scgpt_finetuned", "transcriptformer_mlflow"]}'
 HERE
 
 nextflow run . \

diff --git a/src/methods/transcriptformer_mlflow/config.vsh.yaml b/src/methods/transcriptformer_mlflow/config.vsh.yaml
@@ -0,0 +1,67 @@
+__merge__: ../../api/base_method.yaml
+
+name: transcriptformer_mlflow
+label: TranscriptFormer (MLflow model)
+summary: "Context-aware representations of single-cell transcriptomes by jointly modeling genes and transcripts"
+description: |
+  TranscriptFormer is designed to learn rich, context-aware representations of
+  single-cell transcriptomes while jointly modeling genes and transcripts using
+  a novel generative architecture.
+
+  It is a family of generative foundation models representing a cross-species
+  generative cell atlas trained on up to 112 million cells spanning 1.53 billion
+  years of evolution across 12 species.
+
+  Here, we use a version packaged as an MLflow model.
+references:
+  doi:
+    - 10.1101/2025.04.25.650731
+links:
+  documentation: https://github.com/czi-ai/transcriptformer#readme
+  repository: https://github.com/czi-ai/transcriptformer
+
+info:
+  method_types: [embedding]
+  preferred_normalization: counts
+
+arguments:
+  - name: --model
+    type: file
+    description: |
+      An MLflow model URI for the transcriptformer model. If it is a .zip or
+      .tar.gz file it will be extracted to a temporary directory.
+    required: true
+
+resources:
+  - type: python_script
+    path: script.py
+  - path: /src/utils/read_anndata_partial.py
+  - path: /src/utils/exit_codes.py
+  - path: requirements.txt
+
+engines:
+  - type: docker
+    image: openproblems/base_pytorch_nvidia:1
+    setup:
+      - type: docker
+        add: https://astral.sh/uv/0.7.19/install.sh /uv-installer.sh
+        run: sh /uv-installer.sh && rm /uv-installer.sh
+        env: PATH="/root/.local/bin/:$PATH"
+      - type: docker
+        run: uv venv --python 3.11 /opt/venv
+      - type: docker
+        env:
+          - VIRTUAL_ENV=/opt/venv
+          - PATH="/opt/venv/bin:$PATH"
+        add: requirements.txt /requirements.txt
+        run: uv pip install -r /requirements.txt
+      - type: docker
+        run: uv pip install mlflow==3.1.0
+      - type: docker
+        run: uv pip install git+https://github.com/openproblems-bio/core#subdirectory=packages/python/openproblems
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [hightime, highmem, midcpu, gpu]