Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion scripts/run_benchmark/run_full_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ input_states: resources/datasets/**/state.yaml
rename_keys: 'input_dataset:output_dataset;input_solution:output_solution'
output_state: "state.yaml"
publish_dir: "$publish_dir"
settings: '{"methods_exclude": ["uce", "scgpt_finetuned"]}'
settings: '{"methods_exclude": ["uce", "scgpt_finetuned", "transcriptformer_mlflow"]}'
HERE

# run the benchmark
Expand Down
2 changes: 1 addition & 1 deletion scripts/run_benchmark/run_test_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ input_states: resources_test/task_batch_integration/**/state.yaml
rename_keys: 'input_dataset:output_dataset;input_solution:output_solution'
output_state: "state.yaml"
publish_dir: "$publish_dir"
settings: '{"methods_exclude": ["uce", "scgpt_finetuned"]}'
settings: '{"methods_exclude": ["uce", "scgpt_finetuned", "transcriptformer_mlflow"]}'
HERE

nextflow run . \
Expand Down
67 changes: 67 additions & 0 deletions src/methods/transcriptformer_mlflow/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
__merge__: ../../api/base_method.yaml

name: transcriptformer_mlflow
label: TranscriptFormer (MLflow model)
summary: "Context-aware representations of single-cell transcriptomes by jointly modeling genes and transcripts"
description: |
TranscriptFormer is designed to learn rich, context-aware representations of
single-cell transcriptomes while jointly modeling genes and transcripts using
a novel generative architecture.

It is a family of generative foundation models representing a cross-species
generative cell atlas trained on up to 112 million cells spanning 1.53 billion
years of evolution across 12 species.

Here, we use a version packaged as an MLflow model.
references:
doi:
- 10.1101/2025.04.25.650731
links:
documentation: https://github.com/czi-ai/transcriptformer#readme
repository: https://github.com/czi-ai/transcriptformer

info:
method_types: [embedding]
preferred_normalization: counts

arguments:
- name: --model
type: file
description: |
An MLflow model URI for the transcriptformer model. If it is a .zip or
.tar.gz file it will be extracted to a temporary directory.
required: true

resources:
- type: python_script
path: script.py
- path: /src/utils/read_anndata_partial.py
- path: /src/utils/exit_codes.py
- path: requirements.txt

engines:
- type: docker
image: openproblems/base_pytorch_nvidia:1
setup:
- type: docker
add: https://astral.sh/uv/0.7.19/install.sh /uv-installer.sh
run: sh /uv-installer.sh && rm /uv-installer.sh
env: PATH="/root/.local/bin/:$PATH"
- type: docker
run: uv venv --python 3.11 /opt/venv
- type: docker
env:
- VIRTUAL_ENV=/opt/venv
- PATH="/opt/venv/bin:$PATH"
add: requirements.txt /requirements.txt
run: uv pip install -r /requirements.txt
- type: docker
run: uv pip install mlflow==3.1.0
- type: docker
run: uv pip install git+https://github.com/openproblems-bio/core#subdirectory=packages/python/openproblems

runners:
- type: executable
- type: nextflow
directives:
label: [hightime, highmem, midcpu, gpu]
Loading