diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 93cf0ac5..f13c5ca2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -82,7 +82,22 @@ jobs: - name: Clean up Disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + - name: "Run pipeline in image mode with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} -stub --outdir ./results --mode image + + - name: "Run pipeline in coordinate mode with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} -stub --outdir ./results --mode coordinate + + - name: "Run pipeline in preview mode with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} -stub --outdir ./results --mode preview + + - name: "Run pipeline in segfree mode with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} -stub --outdir ./results --mode segfree diff --git a/README.md b/README.md index 0b53aba4..7cb1dad2 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,6 @@ -1. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) - ## Usage On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/spatialxe/results). @@ -61,7 +59,7 @@ nextflow run nf-core/spatialxe \ ## Run coordinate-based segmentation mode
-`PROSEG -> BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC` +`PROSEG -> PROSEG2BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC` ```bash nextflow run nf-core/spatialxe \ @@ -71,6 +69,30 @@ nextflow run nf-core/spatialxe \ --mode coordinate ``` +## Run segfree mode
+ +`BAYSOR_SEGFREE` + +```bash +nextflow run nf-core/spatialxe \ + -profile \ + --input samplesheet.csv \ + --outdir \ + --mode segfree +``` + +## Run preview mode
+ +`BAYSOR_PREVIEW` + +```bash +nextflow run nf-core/spatialxe \ + -profile \ + --input samplesheet.csv \ + --outdir \ + --mode preview +``` + > [!WARNING] > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). diff --git a/conf/modules.config b/conf/modules.config index 9c1c0373..254a9d1a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,6 +27,13 @@ process { ] } + withName: XENIUMRANGER_RESEGMENT { + publishDir = [ + path: "${params.outdir}/xeniumranger/resegment", + mode: params.publish_dir_mode + ] + } + withName: XENIUMRANGER_IMPORT_SEGMENTATION { publishDir = [ path: "${params.outdir}/xeniumranger/import_segementation", diff --git a/conf/test.config b/conf/test.config index 77b495da..4a5e3c94 100644 --- a/conf/test.config +++ b/conf/test.config @@ -12,19 +12,14 @@ process { - withLabel: process_high { - resourceLimits = [ - cpus: 8, - memory: '8.GB', - time: '1.h' - ] - } + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '2.h' + ] - withName: CELLPOSE { - resourceLimits = [ - cpus: 4, - memory: '8.GB' - ] + withName: UNTAR { + ext.prefix = "test_bundle" } } diff --git a/docs/images/spatialxe-metromap.png b/docs/images/spatialxe-metromap.png index c48c05ea..1279d4f8 100644 Binary files a/docs/images/spatialxe-metromap.png and b/docs/images/spatialxe-metromap.png differ diff --git a/docs/images/spatialxe-metromap.svg b/docs/images/spatialxe-metromap.svg index c4f2ab69..c963ad4e 100644 --- a/docs/images/spatialxe-metromap.svg +++ b/docs/images/spatialxe-metromap.svg @@ -1,4 +1 @@ - - - -
Xenium bundle
gene panel
relabel
import-segmentation
Bundle redefinition Xenium Ranger
segger
xeniumranger resegment
Segmentation refinement
Coordinates/ mask
FICTURE
Proseg
Segmentation-free approach
BOMS
Cellpose
Image-based segmentation approach
Baysor
tiff
csv
JSON
morphology
transcripts
Xenium bundle (redefined)
spatialxe QC
spatialxe meta
SpatialData Domain
html
QC reports
JSON
Metadata
Coordinates/ mask
RO-crate output
SpatialData integration
Approach: image-based (Cellpose, BOMS)
Approach: image-based and segmentation-free (Baysor)
Approach: segmentation-free (Proseg, FICTURE)
Optional step
Outputs
Xenium onboard analysis (XOA)
Inputs
\ No newline at end of file +
Xenium bundle
Xenium bundle
Xenium onboard analysis (XOA)
Xenium onboard analy...
gene panel
gene panel
relabel
relabel
QC reports
QC reports
Metadata
Metadata
SpoQC
SpoQC
spatialdata
spatialdata
MultiQC
MultiQC
Xenium bundle (redefined)
Xenium bundle (rede...
import-segmentation
import-segmentati...
resegment
resegment
baysor
baysor
cellpose
cellpose
baysor
baysor
cellpose
cellpose
Image-based segmentation
Image-based segmentation
morphology.ome.tif
morphology.ome.tif
morphology.ome.tif
morphology.ome.tif
segmentation polygons/csv/mask
segmentation pol...
tif
tif
tif
tif
json
json
html
html
html
html
prior segmentation mask
prior segmentat...
segger
segger
proseg
proseg
baysor
baysor
transcripts.parquet
transcripts.parquet
segmentation polygons/csv
segmentation pol...
parquet
parq...
Coordinate-based segmentation
Coordinate-based segmentation
spatialdata
spatialdata
spatialdata
spatialdata
Optional step
Optional step
Outputs
Outputs
Inputs
Inputs
Default workflow for image mode
Default workflow for image mode
Default workflow for coordinate mode
Default workflow for coordinate mode
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/usage.md b/docs/usage.md index 2f2ae944..80c1f6a0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -40,10 +40,10 @@ This runs the default image mode:
```bash nextflow run nf-core/spatialxe \ + -profile --input ./samplesheet.csv \ --outdir ./results \ - --mode image \ - -profile + --mode image ``` #### Coordinate-based (transcripts-based) segmentation mode @@ -53,10 +53,10 @@ This runs the default coordinate mode:
```bash nextflow run nf-core/spatialxe \ + -profile --input ./samplesheet.csv \ --outdir ./results \ - --mode coordinate \ - -profile + --mode coordinate ``` ### Image-based Segmentation mode (--mode image):
@@ -73,26 +73,26 @@ nextflow run nf-core/spatialxe \ #### Run Segmentation with the methods methods mentioned above :
-eg: To run proseg segmentation use the `coordinate` mode and the `proseg` segmentation method +eg: To run proseg segmentation use the `coordinate` mode and the `proseg` segmentation method (--method) ```bash nextflow run nf-core/spatialxe \ + -profile --input ./samplesheet.csv \ --outdir ./results \ --mode coordinate \ - --segmentation proseg \ - -profile + --method proseg ``` -eg: To run cellpose segmentation use the `image` mode and the `cellpose` segmentation method +eg: To run cellpose segmentation use the `image` mode and the `cellpose` segmentation method (--method) ```bash nextflow run nf-core/spatialxe \ + -profile --input ./samplesheet.csv \ --outdir ./results \ --mode image \ - --segmentation cellpose \ - -profile + --method cellpose ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. diff --git a/modules.json b/modules.json index 920f97ee..d89c6fa9 100644 --- a/modules.json +++ b/modules.json @@ -11,11 +11,6 @@ "installed_by": ["modules"], "patch": "modules/nf-core/cellpose/cellpose.diff" }, - "gunzip": { - "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"] - }, "multiqc": { "branch": "master", "git_sha": "7b50cb7be890e4b28cffb82e438cc6a8d7805d3f", @@ -24,7 +19,8 @@ "untar": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/untar/untar.diff" }, "unzip": { "branch": "master", diff --git a/modules/local/baysor/create_dataset/templates/create_dataset.py b/modules/local/baysor/create_dataset/templates/create_dataset.py index 4e33f4e5..21db550b 100644 --- a/modules/local/baysor/create_dataset/templates/create_dataset.py +++ b/modules/local/baysor/create_dataset/templates/create_dataset.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import csv import random @@ -41,18 +41,16 @@ def generate_dataset( # randomize csv rows to write for row in reader: - if random.random() < sample_fraction: + if random.random() < float(sample_fraction): writer.writerow(row) - # print(f"Sampled data written to {sampled_transcripts}") - return None @staticmethod - def generate_version_yml(version: str) -> None: + def generate_version_yml() -> None: with open("versions.yml", "w") as yml: yml.write('"${task.process}":\\n') - yml.write(f'Baysor-Preview Create Dataset: {version}"\\n') + yml.write("Baysor-Preview Create Dataset: 0.7.1'\\n") return None @@ -64,17 +62,16 @@ def main() -> None: transcripts: str = "${transcripts}" sample_fraction: float = "${sample_fraction}" sampled_transcripts: str = "sampled_transcripts.csv" - version: str = "${VERSION}" # generate dataset - BaysorPreview.generate_dataset( + BaysorPreview.generate_dataset ( transcripts=transcripts, sampled_transcripts=sampled_transcripts, sample_fraction=sample_fraction ) # generate versions.yml - BaysorPreview.generate_version_yml(version=version) + BaysorPreview.generate_version_yml() return None diff --git a/modules/local/baysor/run/main.nf b/modules/local/baysor/run/main.nf index 619880a2..455308e2 100644 --- a/modules/local/baysor/run/main.nf +++ b/modules/local/baysor/run/main.nf @@ -58,10 +58,12 @@ process BAYSOR_RUN { """ touch segmentation.csv touch segmentation_polygons_2d.json + touch segmentation_polygons_3d.json touch segmentation_log.log touch segmentation_counts.loom touch segmentation_cell_stats.csv touch segmentation_params.dump.toml + touch segmentation_run.html cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/spatialconverter/parquet_to_csv/main.nf b/modules/local/spatialconverter/parquet_to_csv/main.nf index 8f59157e..b585aee8 100644 --- a/modules/local/spatialconverter/parquet_to_csv/main.nf +++ b/modules/local/spatialconverter/parquet_to_csv/main.nf @@ -4,26 +4,27 @@ process PARQUET_TO_CSV { container "ghcr.io/scverse/spatialdata:spatialdata0.3.0_spatialdata-io0.1.7_spatialdata-plot0.2.9" - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "PARQUET_TO_CSV module does not support Conda. Please use Docker / Singularity / Podman instead." - } - input: tuple val(meta), path(transcripts) + val(extension) output: - tuple val(meta), path("*.csv") , emit: transcripts_csv - path("versions.yml") , emit: versions + tuple val(meta), path("*.csv*"), emit: transcripts_csv + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when script: + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "PARQUET_TO_CSV module does not support Conda. Please use Docker / Singularity / Podman instead." + } + template 'parquet_to_csv.py' stub: """ - touch ${transcripts} + touch ${transcripts}.csv cat <<-END_VERSIONS > versions.yml "${task.process}": spatialconverter: "${task.version}" diff --git a/modules/local/spatialconverter/parquet_to_csv/templates/parquet_to_csv.py b/modules/local/spatialconverter/parquet_to_csv/templates/parquet_to_csv.py index 3e5e91e8..3d4acbb1 100755 --- a/modules/local/spatialconverter/parquet_to_csv/templates/parquet_to_csv.py +++ b/modules/local/spatialconverter/parquet_to_csv/templates/parquet_to_csv.py @@ -1,17 +1,38 @@ #!/usr/bin/env python import pandas as pd +from pathlib import Path + + +def convert_parquet ( + transcripts: Path, + extension: str = '.csv' + ) -> None: + + df = pd.read_parquet(transcripts, engine = 'pyarrow') + + if extension == ".gz": + output = transcripts.replace(".parquet", ".csv.gz") + df.to_csv(f"{output}", compression='gzip', index=False) + else: + output = transcripts.replace(".parquet", ".csv") + df.to_csv(f"{output}", index=False) + + return None + if __name__ == '__main__': - print("[START]") - df = pd.read_parquet("${transcripts}") - output="${transcripts}".replace(".parquet",".csv") - df.to_csv(f"{output}", index=False) + + transcripts: str = "${transcripts}" + extension: str = "${extension}" + + # generate transcripts.csv(.gz) + convert_parquet ( + transcripts=transcripts, + extension=extension + ) #Output version information with open("versions.yml", "w") as f: f.write('"${task.process}":\\n') f.write(f'spatialconverter: "v0.0.1"\\n') - - print("[FINISH]") - diff --git a/modules/local/spatialdata/merge/main.nf b/modules/local/spatialdata/merge/main.nf index 528e540d..dbcc00af 100644 --- a/modules/local/spatialdata/merge/main.nf +++ b/modules/local/spatialdata/merge/main.nf @@ -6,11 +6,11 @@ process SPATIALDATA_MERGE { input: tuple val(meta), path(ref_bundle, stageAs: "*") - tuple val(meta), path(add_bundle, stageAs: "*") + path(add_bundle, stageAs: "*") output: - tuple val(meta), path("spatialdata_spatialxe") , emit: spatialxe_bundle - path "versions.yml" , emit: versions + tuple val(meta), path("spatialdata_spatialxe"), emit: spatialxe_bundle + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when @@ -22,6 +22,7 @@ process SPATIALDATA_MERGE { } def args = task.ext.args ?: '' + template 'merge.py' stub: diff --git a/modules/local/spatialdata/meta/main.nf b/modules/local/spatialdata/meta/main.nf index 4f302df4..260372aa 100644 --- a/modules/local/spatialdata/meta/main.nf +++ b/modules/local/spatialdata/meta/main.nf @@ -4,30 +4,32 @@ process SPATIALDATA_META { container "heylf/spatialdata:0.2.6" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "SPATIALDATA_WRITE module does not support Conda. Please use Docker / Singularity / Podman instead." - } - input: tuple val(meta), path(spatialdata_bundle, stageAs: "*") - tuple val(meta), path(xenium_bundle, stageAs: "*") + path(xenium_bundle, stageAs: "*") output: - tuple val(meta), path("spatialdata_spatialxe_final") , emit: spatialxe_bundle - path "versions.yml" , emit: versions + tuple val(meta), path("spatialdata_spatialxe_final"), emit: spatialxe_bundle + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "SPATIALDATA_META module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + template 'meta.py' stub: + """ - mkdir -p "spatialdata_spatialxe/" - touch spatialdata_spatialxe/fake_file.txt + mkdir -p "spatialdata_spatialxe_final/" + touch "spatialdata_spatialxe_final/fake_file.txt" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/spatialdata/write/main.nf b/modules/local/spatialdata/write/main.nf index 19dd9abc..53d8cbb6 100644 --- a/modules/local/spatialdata/write/main.nf +++ b/modules/local/spatialdata/write/main.nf @@ -9,8 +9,8 @@ process SPATIALDATA_WRITE { val(outputfolder) output: - tuple val(meta), path("${outputfolder}") , emit: spatialdata - path "versions.yml" , emit: versions + tuple val(meta), path("${outputfolder}"), emit: spatialdata + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when @@ -26,9 +26,11 @@ process SPATIALDATA_WRITE { template 'write.py' stub: + + def outdir = "${outputfolder}" """ - mkdir -p "spatialdata/" - touch spatialdata/fake_file.txt + mkdir -p "${outdir}/" + touch "${outdir}/fake_file.txt" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml deleted file mode 100644 index 9b926b1f..00000000 --- a/modules/nf-core/gunzip/environment.yml +++ /dev/null @@ -1,12 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -channels: - - conda-forge - - bioconda -dependencies: - - conda-forge::coreutils=9.5 - - conda-forge::grep=3.11 - - conda-forge::gzip=1.13 - - conda-forge::lbzip2=2.5 - - conda-forge::sed=4.8 - - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf deleted file mode 100644 index 3ffc8e92..00000000 --- a/modules/nf-core/gunzip/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process GUNZIP { - tag "${archive}" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/52ccce28d2ab928ab862e25aae26314d69c8e38bd41ca9431c67ef05221348aa/data' - : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" - - input: - tuple val(meta), path(archive) - - output: - tuple val(meta), path("${gunzip}"), emit: gunzip - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def extension = (archive.toString() - '.gz').tokenize('.')[-1] - def name = archive.toString() - '.gz' - ".${extension}" - def prefix = task.ext.prefix ?: name - gunzip = prefix + ".${extension}" - """ - # Not calling gunzip itself because it creates files - # with the original group ownership rather than the - # default one for that user / the work directory - gzip \\ - -cd \\ - ${args} \\ - ${archive} \\ - > ${gunzip} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def extension = (archive.toString() - '.gz').tokenize('.')[-1] - def name = archive.toString() - '.gz' - ".${extension}" - def prefix = task.ext.prefix ?: name - gunzip = prefix + ".${extension}" - """ - touch ${gunzip} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml deleted file mode 100644 index 69d31024..00000000 --- a/modules/nf-core/gunzip/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: gunzip -description: Compresses and decompresses files. -keywords: - - gunzip - - compression - - decompression -tools: - - gunzip: - description: | - gzip is a file format and a software application used for file compression and decompression. - documentation: https://www.gnu.org/software/gzip/manual/gzip.html - licence: ["GPL-3.0-or-later"] - identifier: "" -input: - - - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" -output: - - gunzip: - - meta: - type: file - description: Compressed/uncompressed file - pattern: "*.*" - - ${gunzip}: - type: file - description: Compressed/uncompressed file - pattern: "*.*" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@jfy133" -maintainers: - - "@joseespinosa" - - "@drpatelh" - - "@jfy133" - - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test deleted file mode 100644 index 776211ad..00000000 --- a/modules/nf-core/gunzip/tests/main.nf.test +++ /dev/null @@ -1,121 +0,0 @@ -nextflow_process { - - name "Test Process GUNZIP" - script "../main.nf" - process "GUNZIP" - tag "gunzip" - tag "modules_nfcore" - tag "modules" - - test("Should run without failures") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("Should run without failures - prefix") { - - config './nextflow.config' - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("Should run without failures - stub") { - - options '-stub' - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("Should run without failures - prefix - stub") { - - options '-stub' - config './nextflow.config' - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap deleted file mode 100644 index a0f0e67e..00000000 --- a/modules/nf-core/gunzip/tests/main.nf.test.snap +++ /dev/null @@ -1,134 +0,0 @@ -{ - "Should run without failures - prefix - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ], - "gunzip": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-13T11:48:22.080222697" - }, - "Should run without failures - stub": { - "content": [ - { - "0": [ - [ - [ - - ], - "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ], - "gunzip": [ - [ - [ - - ], - "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-13T11:48:14.593020264" - }, - "Should run without failures": { - "content": [ - { - "0": [ - [ - [ - - ], - "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ], - "gunzip": [ - [ - [ - - ], - "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-13T11:48:01.295397925" - }, - "Should run without failures - prefix": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ], - "gunzip": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-13T11:48:07.414271387" - } -} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config deleted file mode 100644 index dec77642..00000000 --- a/modules/nf-core/gunzip/tests/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: GUNZIP { - ext.prefix = { "${meta.id}.xyz" } - } -} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml deleted file mode 100644 index fd3f6915..00000000 --- a/modules/nf-core/gunzip/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -gunzip: - - modules/nf-core/gunzip/** diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index e712ebe6..550d8577 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -53,6 +53,10 @@ process UNTAR { prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) """ mkdir ${prefix} + touch ${prefix}/morphology.ome.tif + touch ${prefix}/transcripts.parquet + touch ${prefix}/gene_panel.json + ## Dry-run untaring the archive to get the files and place all in prefix if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then for i in `tar -tf ${archive}`; diff --git a/modules/nf-core/untar/untar.diff b/modules/nf-core/untar/untar.diff new file mode 100644 index 00000000..7076b9d5 --- /dev/null +++ b/modules/nf-core/untar/untar.diff @@ -0,0 +1,21 @@ +Changes in component 'nf-core/untar' +'modules/nf-core/untar/environment.yml' is unchanged +Changes in 'untar/main.nf': +--- modules/nf-core/untar/main.nf ++++ modules/nf-core/untar/main.nf +@@ -53,6 +53,10 @@ + prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir ${prefix} ++ touch ${prefix}/morphology.ome.tif ++ touch ${prefix}/transcripts.parquet ++ touch ${prefix}/gene_panel.json ++ + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + +'modules/nf-core/untar/meta.yml' is unchanged +'modules/nf-core/untar/tests/main.nf.test' is unchanged +'modules/nf-core/untar/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/nextflow.config b/nextflow.config index 02db8443..173c9b74 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,16 +12,17 @@ params { // Input options input = null // path to the samplesheet.csv containing meta,bundle,image outdir = null // path to generate pipeline results at - mode = null // run the pipeline either in `image` or `coordinate` modes - segmentation = null // name of the segmentation method to run + mode = null // run the pipeline either in `image` or `coordinate` or `segfree` or `preview` modes + method = null // name of the method to run for image or coordinate or segfree approaches gene_panel = null // path to gene panel json file if `relabel_genes` is true qupath_polygons = null // polygon segmentation results in GeoJSON format alignment_csv = null // image alignment file format a 3x3 transformation matrix, where the last row is [0,0,1] + cellpose_model = null // custom cellpose model to use for running or starting training + segmentation_mask = null // prior segmentation mask // execution specific sharpen_tiff = false // wether to sharpen the morphology-focus tiff nucleus_segmentation_only = false // to only run nucleus segmentation while running XR_IMP-SEG - generate_preview = false // generate preview with baysor and exit // Xeniumranger specific xeniumranger_only = false // to generate redefined bundle with just changing the xr specific params @@ -36,9 +37,6 @@ params { segger_accelerator = 'cpu' // either 'cuda' or 'cpu' segger_knn_method = 'kd_tree' // 'cuda' - ensure your system has CUDA installed and configured properly - // Cellpose specific - cellpose_model = 'nuclei' // model to use for running or starting training default - cyto3 - // Proseg specific format = 'xenium' // preset value set as `xenium` @@ -52,9 +50,6 @@ params { features = null // Baysor specific - baysor_run_image = true // run baysor with image/seg-mask - baysor_run_transcripts = false // run baysor with transcripts.csv.gz - baysor_preview = false // generate preview with baysor preview cmd // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index c1bd1276..aa9088be 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -31,12 +31,12 @@ }, "mode": { "type": "string", - "description": "Mode in which the pipeline is to be run. Either image-based or coordinate-based approach.", - "enum": ["image", "coordinate"] + "description": "Mode in which the pipeline is to be run. Either image-based segmentation, coordinate-based segmentation, segmentation-free analysis or data preview.", + "enum": ["image", "coordinate", "segfree", "preview"] }, - "segmentation": { + "method": { "type": "string", - "enum": ["cellpose", "xeniumranger", "baysor", "proseg", "segger"], + "enum": ["cellpose", "xeniumranger", "baysor", "proseg", "segger", "ficture"], "description": "Segmentation method to run." }, "gene_panel": { @@ -53,6 +53,16 @@ "description": "Image alignment file containing similarity transform matrix. (e.g., the _imagealignment.csv file exported from Xenium Explorer)", "format": "file-path" }, + "cellpose_model": { + "type": "string", + "description": "Model to use for running or starting training.", + "format": "file-path" + }, + "segmentation_mask": { + "type": "string", + "description": "Prior segmentation mask from other segmentation methods.", + "format": "file-path" + }, "email": { "type": "string", "description": "Email address for completion summary.", @@ -73,10 +83,6 @@ "description": "Options for the segmentation layer of the spatialxe pipeline", "default": "", "properties": { - "generate_preview": { - "type": "boolean", - "description": "Whether to generate a preview of the dataset with the transcripts.csv.gz." - }, "segmentation_refinement": { "type": "boolean", "description": "Whether to run refinement on the image-based segmentation methods. Runs coordinate-based methods after the initial image-based segmentation run." @@ -129,12 +135,6 @@ "enum": ["kd_tree", "cuda"], "description": "Method for KNN computation. (e.g., cuda for GPU-based computation)" }, - "cellpose_model": { - "type": "string", - "default": "nuclei", - "enum": ["nuclei", "cyto3"], - "description": "Model to use for running or starting training. (eg. cyto3 or nuclei)" - }, "format": { "type": "string", "default": "xenium", @@ -171,19 +171,6 @@ "features": { "type": "string", "description": "List of features to be passed to the ficture method. (eg: TP53,OCIAD1,BCAS3,SOX)" - }, - "baysor_run_image": { - "type": "boolean", - "default": true, - "description": "Whether to run bayor with image/segmentation-mask." - }, - "baysor_run_transcripts": { - "type": "boolean", - "description": "Whether to run baysor with transcripts.csv.gz." - }, - "baysor_preview": { - "type": "boolean", - "description": "Whether to create a preview of the dataset with transcripts.csv.gz." } } }, diff --git a/subworkflows/local/baysor_generate_preview/main.nf b/subworkflows/local/baysor_generate_preview/main.nf index 62e41bff..749bce5b 100644 --- a/subworkflows/local/baysor_generate_preview/main.nf +++ b/subworkflows/local/baysor_generate_preview/main.nf @@ -2,31 +2,29 @@ // Run baysor create_dataset & preview // -include { GUNZIP } from '../../../modules/nf-core/gunzip/main' include { BAYSOR_PREVIEW } from '../../../modules/local/baysor/preview/main' include { BAYSOR_CREATE_DATASET } from '../../../modules/local/baysor/create_dataset/main' +include { PARQUET_TO_CSV } from '../../../modules/local/spatialconverter/parquet_to_csv/main' workflow BAYSOR_GENERATE_PREVIEW { take: - ch_transcripts // channel: [ val(meta), ["transcripts.csv.gz"] ] - ch_config // channel: ["path-to-xenium.toml"] + ch_transcripts_parquet // channel: [ val(meta), ["path-to-transcripts.parquet"] ] + ch_config // channel: ["path-to-xenium.toml"] main: - ch_versions = Channel.empty() - ch_preview_html = Channel.empty() + ch_versions = Channel.empty() + ch_preview_html = Channel.empty() - // unzip transcripts.csv.gz - GUNZIP ( ch_transcripts ) - ch_versions = ch_versions.mix ( GUNZIP.out.versions ) - - ch_unzipped_transcripts = GUNZIP.out.gunzip + // run parquet to csv + PARQUET_TO_CSV ( ch_transcripts_parquet, ".csv" ) + ch_versions = ch_versions.mix ( PARQUET_TO_CSV.out.versions ) // generate randomised sample data - BAYSOR_CREATE_DATASET ( ch_unzipped_transcripts, "0.3" ) + BAYSOR_CREATE_DATASET ( PARQUET_TO_CSV.out.transcripts_csv, 0.3 ) ch_versions = ch_versions.mix ( BAYSOR_CREATE_DATASET.out.versions ) // run baysor preview if param - generate_preview is true diff --git a/subworkflows/local/baysor_generate_segfree/main.nf b/subworkflows/local/baysor_generate_segfree/main.nf index c194341b..1e4b5f59 100644 --- a/subworkflows/local/baysor_generate_segfree/main.nf +++ b/subworkflows/local/baysor_generate_segfree/main.nf @@ -2,35 +2,30 @@ // Run baysor segfree // -include { GUNZIP } from '../../../modules/nf-core/gunzip/main' -include { BAYSOR_SEGFREE } from '../../../modules/local/baysor/segfree/main' +include { BAYSOR_SEGFREE } from '../../../modules/local/baysor/segfree/main' workflow BAYSOR_GENERATE_SEGFREE { take: - ch_transcripts // channel: [ val(meta), ["transcripts.csv.gz"] ] + ch_transcripts_parquet // channel: [ val(meta), ["transcripts.parquet"] ] + ch_config // channel: [ ["path-to-xenium.toml"] ] main: ch_versions = Channel.empty() - ch_ncvs = Channel.empty() - - // unzip transcripts.csv.gz - GUNZIP ( ch_transcripts ) - ch_versions = ch_versions.mix ( GUNZIP.out.versions ) - // run baysor segfree BAYSOR_SEGFREE ( - GUNZIP.out.gunzip + ch_transcripts_parquet, + ch_config ) - ch_versions = ch_versions.mix( BAYSOR_SEGFREE.out.versions ) + ch_versions = ch_versions.mix ( BAYSOR_SEGFREE.out.versions ) emit: - ncvs = ch_ncvs + ncvs = BAYSOR_SEGFREE.out.ncvs // channel: [ val(meta), ["ncvs.loom"] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/baysor_run_morphology_ome_tif/main.nf b/subworkflows/local/baysor_run_morphology_ome_tif/main.nf deleted file mode 100644 index 86a42f1e..00000000 --- a/subworkflows/local/baysor_run_morphology_ome_tif/main.nf +++ /dev/null @@ -1,99 +0,0 @@ -// -// Run baysor run & import-segmentation -// - -include { GUNZIP } from '../../../modules/nf-core/gunzip/main' -include { RESOLIFT } from '../../../modules/local/resolift/main' -include { BAYSOR_RUN as BAYSOR_RUN_IMAGE } from '../../../modules/local/baysor/run/main' -include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' - - -workflow BAYSOR_RUN_MORPHOLOGY_OME_TIF { - - take: - - ch_bundle // channel: [ val(meta), ["xenium-bundle"] ] - ch_transcripts // channel: [ val(meta), ["transcripts.csv.gz"] ] - ch_image // channel: [ val(meta), ["morphology_focus.tiff"] ] - ch_config // channel: ["path-to-xenium.toml"] - - main: - - ch_versions = Channel.empty() - - ch_enhanced_tiff = Channel.empty() - ch_segmentation = Channel.empty() - ch_polygons2d = Channel.empty() - ch_htmls = Channel.empty() - - ch_redefined_bundle = Channel.empty() - ch_unzipped_transcripts = Channel.empty() - - - // unzip transcripts.csv.gz - GUNZIP ( ch_transcripts ) - ch_versions = ch_versions.mix ( GUNZIP.out.versions ) - - ch_unzipped_transcripts = GUNZIP.out.gunzip - - // sharpen morphology tiff if param `sharpen_tiff` is true - ch_just_image = Channel.empty() - if ( params.sharpen_tiff ) { - - RESOLIFT ( ch_image ) - ch_versions = ch_versions.mix( RESOLIFT.out.versions ) - - ch_enhanced_tiff = RESOLIFT.out.enhanced_tiff - ch_just_image = ch_enhanced_tiff.map { - _meta, image -> return [ image ] - } - - } else { - - // use the original morphology tiff from the bundle - ch_just_image = ch_image.map { - _meta, image -> return [ image ] - } - } - - // run baysor with morphology.tiff - BAYSOR_RUN_IMAGE ( - ch_unzipped_transcripts, - ch_just_image, - ch_config, - 30 - ) - ch_versions = ch_versions.mix( BAYSOR_RUN_IMAGE.out.versions ) - - ch_segmentation = BAYSOR_RUN_IMAGE.out.segmentation - ch_jus_segmentation = ch_segmentation.map { - _meta, segmentation -> return [ segmentation ] - } - ch_polygons2d = BAYSOR_RUN_IMAGE.out.polygons2d - ch_htmls = BAYSOR_RUN_IMAGE.out.htmls - // run xeniumranger import-segmentation - XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, - [], - [], - [], - ch_jus_segmentation, - ch_polygons2d, - "pixel" - ) - ch_versions = ch_versions.mix( XENIUMRANGER_IMPORT_SEGMENTATION.out.versions ) - - ch_redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.bundle - - emit: - - enhanced_tiff = ch_enhanced_tiff // channel: [ val(meta), ["morphology.tiff"] ] - - segmentation = ch_segmentation // channel: [ val(meta), ["segmentation.csv"] ] - polygons2d = ch_polygons2d // channel: [ ["segmentation_polygons_2d.json"] ] - htmls = ch_htmls // channel: [ ["*.html"] ] - - redefined_bundle = ch_redefined_bundle // channel: [ val(meta), "redefined-xenium-bundle" ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/baysor_run_prior_segmentation_mask/main.nf b/subworkflows/local/baysor_run_prior_segmentation_mask/main.nf new file mode 100644 index 00000000..40a4924e --- /dev/null +++ b/subworkflows/local/baysor_run_prior_segmentation_mask/main.nf @@ -0,0 +1,68 @@ +// +// Run baysor run & import-segmentation +// + +include { BAYSOR_RUN as BAYSOR_RUN_IMAGE } from '../../../modules/local/baysor/run/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + + +workflow BAYSOR_RUN_PRIOR_SEGMENTATION_MASK { + + take: + + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_parquet // channel: [ val(meta), ["path-to-transcripts.parquet"] ] + ch_segmentation_mask // channel: [ ["path-to-prior-segmentation-mask"] ] + ch_config // channel: [ "path-to-xenium.toml" ] + + main: + + ch_versions = Channel.empty() + + ch_segmentation = Channel.empty() + ch_polygons2d = Channel.empty() + ch_htmls = Channel.empty() + + ch_redefined_bundle = Channel.empty() + + + // run baysor with morphology.tiff + BAYSOR_RUN_IMAGE ( + ch_transcripts_parquet, + ch_segmentation_mask, + ch_config, + 30 + ) + ch_versions = ch_versions.mix( BAYSOR_RUN_IMAGE.out.versions ) + + ch_segmentation = BAYSOR_RUN_IMAGE.out.segmentation + ch_just_segmentation = ch_segmentation.map { + _meta, segmentation -> return [ segmentation ] + } + ch_polygons2d = BAYSOR_RUN_IMAGE.out.polygons2d + ch_htmls = BAYSOR_RUN_IMAGE.out.htmls + + // run xeniumranger import-segmentation + XENIUMRANGER_IMPORT_SEGMENTATION ( + ch_bundle_path, + [], + [], + [], + ch_just_segmentation, + ch_polygons2d, + "microns" + ) + ch_versions = ch_versions.mix( XENIUMRANGER_IMPORT_SEGMENTATION.out.versions ) + + ch_redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.bundle + + emit: + + segmentation = ch_segmentation // channel: [ val(meta), ["segmentation.csv"] ] + polygons2d = ch_polygons2d // channel: [ ["segmentation_polygons_2d.json"] ] + htmls = ch_htmls // channel: [ ["*.html"] ] + + redefined_bundle = ch_redefined_bundle // channel: [ val(meta), "redefined-xenium-bundle" ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/baysor_run_transcripts_csv/main.nf b/subworkflows/local/baysor_run_transcripts_parquet/main.nf similarity index 73% rename from subworkflows/local/baysor_run_transcripts_csv/main.nf rename to subworkflows/local/baysor_run_transcripts_parquet/main.nf index 386b55f7..7077eede 100644 --- a/subworkflows/local/baysor_run_transcripts_csv/main.nf +++ b/subworkflows/local/baysor_run_transcripts_parquet/main.nf @@ -2,19 +2,17 @@ // Run baysor run and import-segmentation // -include { GUNZIP } from '../../../modules/nf-core/gunzip/main' include { BAYSOR_RUN as BAYSOR_RUN_TRANSCRIPTS } from '../../../modules/local/baysor/run/main' include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' -workflow BAYSOR_RUN_TRANSCRIPTS_CSV { +workflow BAYSOR_RUN_TRANSCRIPTS_PARQUET { take: - ch_bundle // channel: [ val(meta), ["xenium-bundle"] ] - ch_transcripts // channel: [ val(meta), ["transcripts.csv.gz"] ] - ch_image // channel: [ val(meta), ["morphology_focus.tiff"] ] - ch_config // channel: ["path-to-xenium.toml"] + ch_bundle_path // channel: [ val(meta), ["xenium-bundle"] ] + ch_transcripts_parquet // channel: [ val(meta), ["transcripts.csv.parquet"] ] + ch_config // channel: ["path-to-xenium.toml"] main: @@ -25,18 +23,10 @@ workflow BAYSOR_RUN_TRANSCRIPTS_CSV { ch_htmls = Channel.empty() ch_redefined_bundle = Channel.empty() - ch_unzipped_transcripts = Channel.empty() - - - // unzip transcripts.csv.gz - GUNZIP ( ch_transcripts ) - ch_versions = ch_versions.mix ( GUNZIP.out.versions ) - - ch_unzipped_transcripts = GUNZIP.out.gunzip // run baysor with transcripts.csv BAYSOR_RUN_TRANSCRIPTS ( - ch_unzipped_transcripts, + ch_transcripts_parquet, [], ch_config, 30 @@ -52,7 +42,7 @@ workflow BAYSOR_RUN_TRANSCRIPTS_CSV { // run xeniumranger import-segmentation XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, + ch_bundle_path, [], [], [], diff --git a/subworkflows/local/cellpose_baysor_import_segmentation/main.nf b/subworkflows/local/cellpose_baysor_import_segmentation/main.nf index 7abf8454..22f556b9 100644 --- a/subworkflows/local/cellpose_baysor_import_segmentation/main.nf +++ b/subworkflows/local/cellpose_baysor_import_segmentation/main.nf @@ -10,17 +10,17 @@ workflow CELLPOSE_BAYSOR_IMPORT_SEGMENTATION { take: - ch_image // channel: [ val(meta), ["path-to-morphology.ome.tif"] ] - ch_bundle // channel: [ val(meta), ["path-to-xenium-bundle"] ] - ch_transcripts // channel: [ val(meta), ["path-to-transcripts.parquet"] ] - ch_config // channel: ["path-to-xenium.toml"] + ch_morphology_image // channel: [ val(meta), ["path-to-morphology.ome.tif"] ] + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_parquet // channel: [ val(meta), ["path-to-transcripts.parquet"] ] + ch_config // channel: ["path-to-xenium.toml"] main: ch_versions = Channel.empty() // run cellpose to generate segmentation mask - CELLPOSE ( ch_image, []) + CELLPOSE ( ch_morphology_image, []) ch_versions = ch_versions.mix ( CELLPOSE.out.versions ) @@ -28,7 +28,7 @@ workflow CELLPOSE_BAYSOR_IMPORT_SEGMENTATION { ch_mask = CELLPOSE.out.mask.map { _meta, seg_mask -> [ seg_mask ] } - BAYSOR_RUN ( ch_transcripts, ch_mask, ch_config, 30 ) + BAYSOR_RUN ( ch_transcripts_parquet, ch_mask, ch_config, 30 ) ch_versions = ch_versions.mix ( BAYSOR_RUN.out.versions ) @@ -39,7 +39,7 @@ workflow CELLPOSE_BAYSOR_IMPORT_SEGMENTATION { ch_polygons = BAYSOR_RUN.out.polygons2d XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, + ch_bundle_path, [], [], [], diff --git a/subworkflows/local/cellpose_resolift_morphology_ome_tif/main.nf b/subworkflows/local/cellpose_resolift_morphology_ome_tif/main.nf index 1b20888f..dd889264 100644 --- a/subworkflows/local/cellpose_resolift_morphology_ome_tif/main.nf +++ b/subworkflows/local/cellpose_resolift_morphology_ome_tif/main.nf @@ -10,27 +10,29 @@ workflow CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF { take: - ch_image // channel: [ val(meta), ["morphology.ome.tiff"] ] - ch_bundle // channel: [ val(meta), ["xenium-bundle"] ] + ch_morphology_image // channel: [ val(meta), ["path-to-morphology.ome.tiff"] ] + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] main: ch_versions = Channel.empty() + cellpose_model = params.cellpose_model ? (Channel.fromPath(params.cellpose_model, checkIfExists: true)) : [] + // sharpen morphology tiff if param - sharpen_tiff is true if ( params.sharpen_tiff ) { - RESOLIFT ( ch_image ) + RESOLIFT ( ch_morphology_image ) ch_versions = ch_versions.mix( RESOLIFT.out.versions ) // run cellpose on the enhanced tiff - CELLPOSE ( RESOLIFT.out.enhanced_tiff, params.cellpose_model ) + CELLPOSE ( RESOLIFT.out.enhanced_tiff, cellpose_model ) ch_versions = ch_versions.mix( CELLPOSE.out.versions ) } else { // run cellpose on the original tiff - CELLPOSE ( ch_image, params.cellpose_model ) + CELLPOSE ( ch_morphology_image, cellpose_model ) ch_versions = ch_versions.mix( CELLPOSE.out.versions ) } @@ -41,7 +43,7 @@ workflow CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF { cellpose_mask = CELLPOSE.out.mask.map { _meta, mask -> return [ mask ] } - cellpose_flows = CELLPOSE.out.flows.map { + _cellpose_flows = CELLPOSE.out.flows.map { _meta, flows -> return [ flows ] } @@ -49,7 +51,7 @@ workflow CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF { if ( params.nucleus_segmentation_only ) { XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, + ch_bundle_path, [], cellpose_mask, [], @@ -58,10 +60,11 @@ workflow CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF { "" ) ch_versions = ch_versions.mix( XENIUMRANGER_IMPORT_SEGMENTATION.out.versions ) + } else { XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, + ch_bundle_path, [], cellpose_mask, cellpose_cells, diff --git a/subworkflows/local/ficture_preprocess_model/main.nf b/subworkflows/local/ficture_preprocess_model/main.nf index 5790eef6..bf8e259a 100644 --- a/subworkflows/local/ficture_preprocess_model/main.nf +++ b/subworkflows/local/ficture_preprocess_model/main.nf @@ -2,24 +2,31 @@ // Run ficture preprocess and model modules // -include { FICTURE_PREPROCESS } from '../../../modules/local/ficture/preprocess/main' -include { FICTURE } from '../../../modules/local/ficture/model/main' -// include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' +include { FICTURE_PREPROCESS } from '../../../modules/local/ficture/preprocess/main' +include { FICTURE } from '../../../modules/local/ficture/model/main' +include { PARQUET_TO_CSV } from '../../../modules/local/spatialconverter/parquet_to_csv/main' + workflow FICTURE_PREPROCESS_MODEL { take: - ch_transcripts // channel: [ val(meta), [ "transcripts.csv.gz" ] ] - ch_features // channel: [ "features" ] + ch_transcripts_parquet // channel: [ val(meta), [ "transcripts.parquet" ] ] + ch_features // channel: [ ["features"] ] main: ch_versions = Channel.empty() + // convert parquet to csv + PARQUET_TO_CSV ( ch_transcripts_parquet, ".csv" ) + ch_versions = ch_versions.mix ( PARQUET_TO_CSV.out.versions ) + // run ficture preprocessing + ch_transcripts = PARQUET_TO_CSV.out.transcripts_csv + FICTURE_PREPROCESS ( ch_transcripts, ch_features ) - ch_versions = ch_versions.mix( FICTURE_PREPROCESS.out.versions ) + ch_versions = ch_versions.mix ( FICTURE_PREPROCESS.out.versions ) // run the ficture wrapper pipeline ch_features_clean = Channel.empty() @@ -33,17 +40,6 @@ workflow FICTURE_PREPROCESS_MODEL { ) ch_versions = ch_versions.mix( FICTURE.out.versions ) - // run xeniumranger import-segmentation - // XENIUMRANGER_IMPORT_SEGMENTATION ( - // ch_bundle, - // [], - // [], - // [], - // ch_segmentation, - // BAYSOR_RUN.out.polygons2d, - // "microns" - // ) - emit: transcripts = FICTURE_PREPROCESS.out.transcripts // channel: [ val(meta), [ "*processed_transcripts.tsv.gz" ] ] diff --git a/subworkflows/local/proseg_preset_proseg2baysor/main.nf b/subworkflows/local/proseg_preset_proseg2baysor/main.nf index 6c3a2441..b9bda8c4 100644 --- a/subworkflows/local/proseg_preset_proseg2baysor/main.nf +++ b/subworkflows/local/proseg_preset_proseg2baysor/main.nf @@ -4,24 +4,28 @@ include { PROSEG } from '../../../modules/local/proseg/preset/main' include { PROSEG2BAYSOR } from '../../../modules/local/proseg/proseg2baysor/main' +include { PARQUET_TO_CSV } from '../../../modules/local/spatialconverter/parquet_to_csv/main' include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' workflow PROSEG_PRESET_PROSEG2BAYSOR { take: - ch_bundle // channel: [ val(meta), ["xenium-bundle"] ] - ch_transcripts // channel: [ val(meta), [ "transcripts.csv.gz" ] ] + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_parquet // channel: [ val(meta), [ "transcripts.parquet" ] ] main: ch_versions = Channel.empty() + // run parquet-to-csv + PARQUET_TO_CSV ( ch_transcripts_parquet, ".gz" ) + ch_versions = ch_versions.mix( PARQUET_TO_CSV.out.versions ) + // run proseg with the xenium format - PROSEG ( ch_transcripts ) + PROSEG ( PARQUET_TO_CSV.out.transcripts_csv ) ch_versions = ch_versions.mix( PROSEG.out.versions ) - // run proseg-to-baysor on the data generated with the proseg run PROSEG2BAYSOR ( PROSEG.out.cell_polygons_2d, PROSEG.out.transcript_metadata ) ch_versions = ch_versions.mix( PROSEG2BAYSOR.out.versions ) @@ -33,7 +37,7 @@ workflow PROSEG_PRESET_PROSEG2BAYSOR { // run xeniumranger import-segmentation XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, + ch_bundle_path, [], [], [], diff --git a/subworkflows/local/segger_create_train_predict/main.nf b/subworkflows/local/segger_create_train_predict/main.nf index 77b232e4..99f07c12 100644 --- a/subworkflows/local/segger_create_train_predict/main.nf +++ b/subworkflows/local/segger_create_train_predict/main.nf @@ -2,16 +2,16 @@ // Run segger create_dataset, train and predict modules & parquet_to_csv // -include { SEGGER_CREATE_DATASET } from '../../../modules/local/segger/create_dataset/main' include { SEGGER_TRAIN } from '../../../modules/local/segger/train/main' include { SEGGER_PREDICT } from '../../../modules/local/segger/predict/main' +include { SEGGER_CREATE_DATASET } from '../../../modules/local/segger/create_dataset/main' include { PARQUET_TO_CSV } from '../../../modules/local/spatialconverter/parquet_to_csv/main' workflow SEGGER_CREATE_TRAIN_PREDICT { take: - ch_basedir // channel: [ val(meta), [ basedir ] ] + ch_basedir // channel: [ val(meta), [ "basedir" ] ] ch_transcripts_parquet // channel: [ val(meta), [bundle + "/transcripts.parquet"]] main: diff --git a/subworkflows/local/spatialdata_write_meta_merge/main.nf b/subworkflows/local/spatialdata_write_meta_merge/main.nf index 18b20289..36635052 100644 --- a/subworkflows/local/spatialdata_write_meta_merge/main.nf +++ b/subworkflows/local/spatialdata_write_meta_merge/main.nf @@ -10,7 +10,7 @@ include { SPATIALDATA_META } from '../../ workflow SPATIALDATA_WRITE_META_MERGE { take: - ch_raw_bundle // channel: [ val(meta), [ "xenium-bundle" ] ] + ch_bundle_path // channel: [ val(meta), [ "path-to-xenium-bundle" ] ] ch_redefined_bundle // channel: [ val(meta), [ "redefined-xenium-bundle" ] ] main: @@ -18,39 +18,39 @@ workflow SPATIALDATA_WRITE_META_MERGE { ch_versions = Channel.empty() // write spatialdata object from the raw xenium bundle - raw_bundle_path = ch_raw_bundle.map { meta, file_path -> - return [ meta, file(file_path) ] - } SPATIALDATA_WRITE_RAW_BUNDLE ( - raw_bundle_path, + ch_bundle_path, 'spatialdata_raw' ) ch_versions = ch_versions.mix ( SPATIALDATA_WRITE_RAW_BUNDLE.out.versions ) // write spatialdata object after running IMP_SEG - redefined_bundle_path = ch_redefined_bundle.map { meta, file_path -> - return [ meta, file(file_path) ] - } SPATIALDATA_WRITE_REDEFINED_BUNDLE ( - redefined_bundle_path, + ch_redefined_bundle, 'spatialdata_redefined' ) ch_versions = ch_versions.mix ( SPATIALDATA_WRITE_REDEFINED_BUNDLE.out.versions ) // merge raw & redefined spatialdata objects + ch_just_redefined_bundle = SPATIALDATA_WRITE_REDEFINED_BUNDLE.out.spatialdata.map { + _meta, bundle -> return [ bundle ] + } SPATIALDATA_MERGE_RAW_REDEFINED ( SPATIALDATA_WRITE_RAW_BUNDLE.out.spatialdata, - SPATIALDATA_WRITE_REDEFINED_BUNDLE.out.spatialdata + ch_just_redefined_bundle ) ch_versions = ch_versions.mix ( SPATIALDATA_MERGE_RAW_REDEFINED.out.versions ) // write metadata with spatialdata object + ch_just_bundle_path = ch_bundle_path.map { + _meta, bundle -> return [ bundle ] + } SPATIALDATA_META ( SPATIALDATA_MERGE_RAW_REDEFINED.out.spatialxe_bundle, - ch_raw_bundle + ch_just_bundle_path ) ch_versions = ch_versions.mix ( SPATIALDATA_META.out.versions ) @@ -61,6 +61,5 @@ workflow SPATIALDATA_WRITE_META_MERGE { ch_sd_merged = SPATIALDATA_MERGE_RAW_REDEFINED.out.spatialxe_bundle // channel: [ val(meta), "spatialdata_spatialxe" ] ch_sd_meta = SPATIALDATA_META.out.spatialxe_bundle // channel: [ val(meta), "spatialdata_spatialxe_final" ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } - diff --git a/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf b/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf index 09c7ac0d..e9207233 100644 --- a/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf @@ -67,18 +67,39 @@ workflow PIPELINE_INITIALISATION { // Custom validation for pipeline parameters // validateInputParameters() + log.info "INFO Input params validated ✅ " // // Create channel from input file provided through params.input // - Channel + try { + + Channel .fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) .map { meta, bundle, image -> return [ [id: meta.id], bundle, image ] } .set { ch_samplesheet } + log.info "INFO Samplesheet validated ✅ " + + } catch (Exception e) { + + log.error "❌ Samplesheet validation failed: ${e.message}" + exit 1 + } + + + // + // Check and validate xenium bundle + // + if ( !workflow.profile.contains('test')) { + validateXeniumBundle(ch_samplesheet) + } + + emit: + samplesheet = ch_samplesheet versions = ch_versions @@ -129,7 +150,7 @@ workflow PIPELINE_COMPLETION { } workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + log.error "❌ Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" } } @@ -144,30 +165,112 @@ workflow PIPELINE_COMPLETION { def validateInputParameters() { // check if the segmentation method provided is valid for a mode - if ( params.mode == 'image' && params.segmentation ) { - if ( !params.image_seg_methods.contains(params.segmentation) ) { - error "Error: Invalid segmentation method: ${params.segmentation} provided for the `image` based mode. Options: ${params.image_seg_methods}" + if ( params.mode == 'image' && params.method ) { + if ( !params.image_seg_methods.contains(params.method) ) { + log.error "❌ Error: Invalid segmentation method: ${params.method} provided for the `image` based mode. Options: ${params.image_seg_methods}" + exit 1 } } - if ( params.mode == 'coordinate' && params.segmentation ) { - if ( !params.transcript_seg_methods.contains(params.segmentation) ) { - error "Error: Invalid segmentation method: ${params.segmentation} provided for the `coordinate` based mode. Options: ${params.transcript_seg_methods}" + if ( params.mode == 'coordinate' && params.method ) { + if ( !params.transcript_seg_methods.contains(params.method) ) { + log.error "❌ Error: Invalid segmentation method: `${params.method}` provided for the `coordinate` based mode. Options: ${params.transcript_seg_methods}" + exit 1 } } // check if --relabel_genes is true but --gene_panel is not provided if ( params.relabel_genes && !params.gene_panel ) { - log.warn "Relabel genes is enabled, but gene panel is not provided with the `--gene_panel`. Using `gene_panel.json` in the xenium bundle" + log.warn "⚠️ Relabel genes is enabled, but gene panel is not provided with the `--gene_panel`. Using `gene_panel.json` in the xenium bundle." } // check if --relabel_genes is true but --gene_panel is not provided if ( params.gene_panel && !params.relabel_genes ) { - log.warn "Gene panel provided, but relabel genes is disabled. Using `gene_panel.json` only to generate metadata" + log.warn "⚠️ Gene panel provided, but relabel genes is disabled. Using `gene_panel.json` only to generate metadata." + } + + // check if segmentation method is xeniumranger and nucleus_ony_segmentation is enabled + if ( params.method == 'xeniumranger' && !params.nucleus_segmentation_only ) { + log.warn "⚠️ Nucleus segmentation is disabled. Running xeniumranger resegment module to redefine xenium bundle without nucleus segmentation." + log.warn "⚠️ Use --nucleus_segmentation_only to enable nucleus segmentation to redefine xenium bundle with import-segmentation module." + } + + if ( params.mode == 'image' && params.method == 'baysor' ) { + if ( !params.method_mask ) { + log.error "❌ Error: Missing path to segmentation mask. Image-based segmentation with the `baysor` method requires a segmentation mask with the `--segmentation_mask` option." + exit 1 + } } } +// +// Check and validate xenium bundle +// +def validateXeniumBundle(ch_samplesheet) { + + // define xenium bundle directory structure + def xenium_bundle = [ + "analysis.tar.gz", + "analysis.zarr.zip", + "analysis_summary.html", + "cell_boundaries.csv.gz", + "cell_boundaries.parquet", + "cell_feature_matrix.h5", + "cell_feature_matrix.tar.gz", + "cell_feature_matrix.zarr.zip", + "cells.csv.gz", + "cells.parquet", + "cells.zarr.zip", + "experiment.xenium", + "gene_panel.json", + "metrics_summary.csv", + "morphology.ome.tif", + "morphology_focus/", + "nucleus_boundaries.csv.gz", + "nucleus_boundaries.parquet", + "transcripts.parquet", + "transcripts.zarr.zip" + ] + + // get bundle path + def ch_bundle_path = ch_samplesheet.map { + _meta, bundle, _image -> + def bundle_path = file ( + bundle.toString().replaceFirst(/\/$/, ''), + ) + return bundle_path + } + + // check if the path exists + if ( !ch_bundle_path.map { it.exists() } ) { + error "❌ Error: Xenium bundle path not found. Check if the path provided in the samplesheet exists." + exit 1 + } + + // if the path exists, check for the presence of xenium files + if ( ch_bundle_path.map { it.exists() } ) { + + ch_bundle_path.map { path -> + def missing_files = [] + + def allExist = xenium_bundle.every { filename -> + def fullPath = file("${path}/${filename}") + if (!fullPath.exists()) { + missing_files.add(filename) + return false + } + return true + } + + if (!allExist) { + log.error "❌ Missing file(s) at bundle path provided in the samplesheet: ${missing_files}" + exit 1 + } + } + } + log.info "INFO Xenium bundle validated ✅ \n" +} // // Generate methods description for MultiQC diff --git a/subworkflows/local/utils_stage_testdata/main.nf b/subworkflows/local/utils_stage_testdata/main.nf deleted file mode 100644 index 8a13590b..00000000 --- a/subworkflows/local/utils_stage_testdata/main.nf +++ /dev/null @@ -1,66 +0,0 @@ -// -// stage data for the test profile of the pipeline -// - -include { UNTAR } from '../../../modules/nf-core/untar/main' - - -workflow STAGE_TESTDATA { - - take: - ch_samplesheet - - main: - - ch_versions = Channel.empty() - ch_raw_bundle = Channel.empty() - ch_bundle_url = Channel.empty() - ch_image = Channel.empty() - ch_transcripts_csv = Channel.empty() - ch_transcripts_parquet = Channel.empty() - - // get xenium bundle path - ch_bundle_url = ch_samplesheet.map { meta, bundle, _image -> - return [ meta, file(bundle) ] - } - - // run the UNTAR module to create xenium bundle - UNTAR(ch_bundle_url) - ch_versions = ch_versions.mix ( UNTAR.out.versions ) - - ch_bundle_local_path = UNTAR.out.untar - ch_bundle_local_path.view() - - // get transcript.csv.gz - ch_transcripts_csv = ch_bundle_local_path.map { meta, bundle -> - def transcripts_csv = file(bundle + "/transcripts.csv.gz") - return [ meta, transcripts_csv ] - } - - // get transcript.parquet - ch_transcripts_parquet = ch_bundle_local_path.map { meta, bundle -> - def transcripts_parquet = file(bundle + "/transcripts.parquet") - return [ meta, transcripts_parquet ] - } - - // get morphology.ome.tif - ch_image = ch_bundle_local_path.map { meta, bundle -> - def morphology_image = file(bundle + "/morphology.ome.tif") - return [ meta, morphology_image ] - } - - // get baysor xenium config - ch_config = Channel.fromPath("${projectDir}/assets/config/xenium.toml", checkIfExists: true) - - - emit: - - ch_raw_bundle = ch_bundle_local_path // channel [ val(meta), ["xenium-bundle"] ] - ch_transcripts_csv = ch_transcripts_csv // channel [ val(meta), ["path-to-transcripts.csv.gz"] ] - ch_transcripts_parquet = ch_transcripts_parquet // channel [ val(meta), ["path-to-transcripts.csv.gz"] ] - ch_image = ch_image // channel [ val(meta), ["path-to-morphology.ome.tif"] ] - ch_config = ch_config // channel [ ["path-to-xenium.toml"] ] - - versions = ch_versions // channel [versions.yml] - -} diff --git a/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main.nf b/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main.nf index f1054b2e..461751ef 100644 --- a/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main.nf +++ b/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main.nf @@ -11,14 +11,14 @@ workflow XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE { take: - ch_bundle // channel: [ val(meta), [ "xenium-bundle" ] ] + ch_bundle_path // channel: [ val(meta), [ "path-to-xenium-bundle" ] ] main: ch_versions = Channel.empty() ch_redefined_bundle = Channel.empty() - cells = ch_bundle.map { + cells = ch_bundle_path.map { _meta, bundle -> return [ bundle + "/cells.zarr.zip" ] } @@ -26,7 +26,7 @@ workflow XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE { if ( params.expansion_distance == 0 || params.expansion_distance != 5 ){ IMP_SEG_COUNT_MATRIX_EXP_DISTANCE ( - ch_bundle, + ch_bundle_path, [], cells, [], @@ -43,7 +43,7 @@ workflow XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE { if ( params.qupath_polygons && params.nucleus_segmentation_only ) { IMP_SEG_POLYGON_GEOJSON_INPUT ( - ch_bundle, + ch_bundle_path, [], params.qupath_polygons, [], @@ -58,7 +58,7 @@ workflow XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE { } else if ( params.qupath_polygons ) { IMP_SEG_POLYGON_GEOJSON_INPUT ( - ch_bundle, + ch_bundle_path, [], params.qupath_polygons, params.qupath_polygons, @@ -80,7 +80,7 @@ workflow XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE { if ( params.qupath_polygons && params.alignment_csv ) { IMP_SEG_TRANS_MATRIX_INPUT ( - ch_bundle, + ch_bundle_path, params.alignment_csv, params.qupath_polygons, params.qupath_polygons, diff --git a/subworkflows/local/xeniumranger_relabel_resegment/main.nf b/subworkflows/local/xeniumranger_relabel_resegment/main.nf index 657db843..1151e69a 100644 --- a/subworkflows/local/xeniumranger_relabel_resegment/main.nf +++ b/subworkflows/local/xeniumranger_relabel_resegment/main.nf @@ -9,17 +9,22 @@ workflow XENIUMRANGER_RELABEL_RESEGMENT { take: - ch_bundle // channel: [ val(meta), [ xenium-bundle-path ] ] - ch_gene_panel // channel: [ ["gene_panel.json"] ] + ch_bundle_path // channel: [ val(meta), [ "path-to-xenium-bundle" ] ] + ch_gene_panel // channel: [ ["gene_panel.json"] ] main: ch_versions = Channel.empty() - XENIUMRANGER_RELABEL ( ch_bundle, ch_gene_panel ) + XENIUMRANGER_RELABEL ( + ch_bundle_path, + ch_gene_panel + ) ch_versions = ch_versions.mix ( XENIUMRANGER_RELABEL.out.versions ) - XENIUMRANGER_RESEGMENT ( XENIUMRANGER_RELABEL.out.bundle ) + XENIUMRANGER_RESEGMENT ( + XENIUMRANGER_RELABEL.out.bundle + ) ch_versions = ch_versions.mix ( XENIUMRANGER_RESEGMENT.out.versions ) @@ -27,5 +32,5 @@ workflow XENIUMRANGER_RELABEL_RESEGMENT { redefined_bundle = XENIUMRANGER_RESEGMENT.out.bundle // channel: [ val(meta), ["redefined-xenium-bundle"] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main.nf b/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main.nf index 1000c165..6907ff50 100644 --- a/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main.nf +++ b/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main.nf @@ -9,37 +9,48 @@ workflow XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF { take: - ch_bundle // channel: [ val(meta), ["xenium-bundle"] ] + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() + ch_redefined_bundle = Channel.empty() // run resegment with changed config values - XENIUMRANGER_RESEGMENT ( ch_bundle ) + XENIUMRANGER_RESEGMENT ( ch_bundle_path ) ch_versions = ch_versions.mix( XENIUMRANGER_RESEGMENT.out.versions ) - // run import segmentation to redine - cells = ch_bundle.map { + // run import segmentation to redine xenium bundle along with nuclear segmentation + cells = XENIUMRANGER_RESEGMENT.out.bundle.map { _meta, bundle -> return [ bundle + "/cells.zarr.zip" ] } - XENIUMRANGER_IMPORT_SEGMENTATION ( - XENIUMRANGER_RESEGMENT.out.bundle, - [], - cells, - cells, - [], - [], - "pixel" - ) - ch_versions = ch_versions.mix( XENIUMRANGER_IMPORT_SEGMENTATION.out.versions ) + // adjust the nuclear expansion distance without altering nuclei detection + if ( params.nucleus_segmentation_only ) { + + XENIUMRANGER_IMPORT_SEGMENTATION ( + XENIUMRANGER_RESEGMENT.out.bundle, + [], + cells, + [], + [], + [], + "pixels" + ) + ch_versions = ch_versions.mix( XENIUMRANGER_IMPORT_SEGMENTATION.out.versions ) + + ch_redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.bundle + + } else { + + ch_redefined_bundle = XENIUMRANGER_RESEGMENT.out.bundle + } emit: - redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.bundle // channel: [ val(meta), ["redefined-xenium-bundle"] ] + redefined_bundle = ch_redefined_bundle // channel: [ val(meta), ["redefined-xenium-bundle"] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/spatialxe.nf b/workflows/spatialxe.nf index b31db6ea..3bffb276 100644 --- a/workflows/spatialxe.nf +++ b/workflows/spatialxe.nf @@ -5,42 +5,39 @@ */ // multiqc -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' // nf-core functionality -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_spatialxe_pipeline' -include { paramsSummaryMap } from 'plugin/nf-schema' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_spatialxe_pipeline' +include { paramsSummaryMap } from 'plugin/nf-schema' // nf-core modules -include { UNTAR } from '../modules/nf-core/untar/main' - -// testdata stagign subworkflow -include { STAGE_TESTDATA } from '../subworkflows/local/utils_stage_testdata/main' +include { UNTAR } from '../modules/nf-core/untar/main' // coordinate-based segmentation subworklfows -include { SEGGER_CREATE_TRAIN_PREDICT } from '../subworkflows/local/segger_create_train_predict/main' -include { PROSEG_PRESET_PROSEG2BAYSOR } from '../subworkflows/local/proseg_preset_proseg2baysor/main' -include { BAYSOR_GENERATE_PREVIEW } from '../subworkflows/local/baysor_generate_preview/main' -include { BAYSOR_RUN_TRANSCRIPTS_CSV } from '../subworkflows/local/baysor_run_transcripts_csv/main' +include { SEGGER_CREATE_TRAIN_PREDICT } from '../subworkflows/local/segger_create_train_predict/main' +include { PROSEG_PRESET_PROSEG2BAYSOR } from '../subworkflows/local/proseg_preset_proseg2baysor/main' +include { BAYSOR_GENERATE_PREVIEW } from '../subworkflows/local/baysor_generate_preview/main' +include { BAYSOR_RUN_TRANSCRIPTS_PARQUET } from '../subworkflows/local/baysor_run_transcripts_parquet/main' // image-based segmentation subworklfows -include { BAYSOR_RUN_MORPHOLOGY_OME_TIF } from '../subworkflows/local/baysor_run_morphology_ome_tif/main' -include { CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/cellpose_resolift_morphology_ome_tif/main' -include { CELLPOSE_BAYSOR_IMPORT_SEGMENTATION } from '../subworkflows/local/cellpose_baysor_import_segmentation/main' -include { XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main' +include { BAYSOR_RUN_PRIOR_SEGMENTATION_MASK } from '../subworkflows/local/baysor_run_prior_segmentation_mask/main' +include { CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/cellpose_resolift_morphology_ome_tif/main' +include { CELLPOSE_BAYSOR_IMPORT_SEGMENTATION } from '../subworkflows/local/cellpose_baysor_import_segmentation/main' +include { XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main' // segmentation-free subworkflows -include { BAYSOR_GENERATE_SEGFREE } from '../subworkflows/local/baysor_generate_segfree/main' -include { FICTURE_PREPROCESS_MODEL } from '../subworkflows/local/ficture_preprocess_model/main' +include { BAYSOR_GENERATE_SEGFREE } from '../subworkflows/local/baysor_generate_segfree/main' +include { FICTURE_PREPROCESS_MODEL } from '../subworkflows/local/ficture_preprocess_model/main' // xeniumranger subworkflows include { XENIUMRANGER_RELABEL_RESEGMENT } from '../subworkflows/local/xeniumranger_relabel_resegment/main' include { XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE } from '../subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main' // spatialdata subworkflows -include { SPATIALDATA_WRITE_META_MERGE } from '../subworkflows/local/spatialdata_write_meta_merge/main' +include { SPATIALDATA_WRITE_META_MERGE } from '../subworkflows/local/spatialdata_write_meta_merge/main' // TODO qc layer subworkflows @@ -65,14 +62,18 @@ workflow SPATIALXE { */ ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + + ch_input = Channel.empty() ch_bundle = Channel.empty() - ch_bundle_path = Channel.empty() + ch_config = Channel.empty() + ch_features = Channel.empty() ch_raw_bundle = Channel.empty() ch_gene_panel = Channel.empty() - ch_transcripts_parquet = Channel.empty() + ch_bundle_path = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_morphology_image = Channel.empty() ch_redefined_bundle = Channel.empty() - ch_config = Channel.empty() + ch_transcripts_parquet = Channel.empty() /* @@ -81,66 +82,95 @@ workflow SPATIALXE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - // check if its a test run if ( workflow.profile.contains('test') ) { - STAGE_TESTDATA ( - ch_samplesheet - ) + // get sample, xenium bundle and image path + ch_input_untar = ch_samplesheet.map { meta, bundle, _image -> + return [ meta, bundle ] + } - ch_raw_bundle = STAGE_TESTDATA.out.ch_raw_bundle - ch_transcripts = STAGE_TESTDATA.out.ch_transcripts_csv - ch_transcripts_parquet = STAGE_TESTDATA.out.ch_transcripts_parquet - ch_image = STAGE_TESTDATA.out.ch_image - ch_config = STAGE_TESTDATA.out.ch_config + // get testdata + UNTAR ( ch_input_untar ) + ch_versions = ch_versions.mix ( UNTAR.out.versions ) + + ch_untar_outs = UNTAR.out.untar.map { _meta, bundle -> + return [ bundle.toString() ] + } + + ch_samplesheet.combine(ch_untar_outs) + .map { meta, _url, image, test_bundle -> + return [meta, test_bundle, image] + } + .set { ch_input } } else { - // get samplesheet fields - ch_bundle_path = ch_samplesheet.map { meta, bundle, _image -> - return [ meta, file(bundle)] - } + // for all other profile runs + ch_input = ch_samplesheet + } - // get xenium bundle files - ch_bundle = ch_samplesheet.map { meta, bundle, _image -> - def bundle_files = file(bundle).toList().collect() - return [meta, bundle_files] - } + // path to bundle input + ch_bundle_path = ch_input.map { meta, bundle, _image -> + return [ meta, bundle ] + } - // get transcript.csv.gz - ch_transcripts = ch_samplesheet.map { meta, bundle, _image -> - def transcripts_csv = file(bundle.replaceFirst(/\/$/, '') + "/transcripts.csv.gz") - return [ meta, transcripts_csv ] - } + // get transcript.parquet from the xenium bundle + ch_transcripts_parquet = ch_input.map { meta, bundle, _image -> + def transcripts_parquet = file ( + bundle.toString().replaceFirst(/\/$/, '') + "/transcripts.parquet", + checkIfExists: true + ) + return [ meta, transcripts_parquet ] + } - // get transcript.parquet - ch_transcripts_parquet = ch_samplesheet.map { meta, bundle, _image -> - def transcripts_parquet = file(bundle.replaceFirst(/\/$/, '') + "/transcripts.parquet") - return [ meta, transcripts_parquet ] - } + // get morphology.ome.tif from the xenium bundle + ch_morphology_image = ch_input.map { meta, bundle, image -> + def morphology_img = image ? file(image) : file ( + bundle.toString().replaceFirst(/\/$/, '') + "/morphology.ome.tif", + checkIfExists: true + ) + return [ meta, morphology_img ] + } - // get morphology.ome.tif - ch_image = ch_samplesheet.map { meta, bundle, image -> - def morphology_img = image ? file(image) : file(bundle.replaceFirst(/\/$/, '') + "/morphology.ome.tif") - return [ meta, morphology_img ] - } + // get baysor xenium config + ch_config = Channel.fromPath ( + "${projectDir}/assets/config/xenium.toml", + checkIfExists: true + ) - // get baysor xenium config - ch_config = Channel.fromPath("${projectDir}/assets/config/xenium.toml", checkIfExists: true) + // get segmentation mask if provided with --segmentation_mask for the baysor method + if ( params.segmentation_mask ) { + ch_segmentation_mask = Channel.fromPath ( + params.segmentation_mask, checkIfExists: true + ) + } - // get gene_panel.json if provided with --gene_panel, sets relabel_genes to true - if (( params.gene_panel )) { + // get a list of features if provided with the --features for the ficture method + if ( params.features ) { + ch_features = Channel.fromPath ( + params.features, + checkIfExists: true + ) + } - params.relabel_genes = true - ch_gene_panel = Channel.fromPath(params.gene_panel, checkIfExists: true) + // get gene_panel.json if provided with --gene_panel, sets relabel_genes to true + if (( params.gene_panel )) { - } else { + params.relabel_genes = true + ch_gene_panel = Channel.fromPath ( + params.gene_panel, + checkIfExists: true + ) - // gene panel to use if only --relabel_genes is provided - ch_gene_panel = ch_samplesheet.map { meta, bundle, _image -> - def gene_panel = file(bundle.replaceFirst(/\/$/, '') + "/gene_panel.json") - return [ meta, gene_panel ] - } + } else { + + // gene panel to use if only --relabel_genes is provided + ch_gene_panel = ch_input.map { meta, bundle, _image -> + def gene_panel = file ( + bundle.toString().replaceFirst(/\/$/, '') + "/gene_panel.json", + checkIfExists: true + ) + return [ meta, gene_panel ] } } @@ -154,7 +184,7 @@ workflow SPATIALXE { if ( params.relabel_genes ) { XENIUMRANGER_RELABEL_RESEGMENT ( - ch_bundle, + ch_bundle_path, ch_gene_panel ) ch_raw_bundle = XENIUMRANGER_RELABEL_RESEGMENT.out.redefined_bundle @@ -169,14 +199,12 @@ workflow SPATIALXE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // run baysor preview if `generate_preview ` is true - if ( params.generate_preview && params.mode == 'coordinate' ) { + if ( params.mode == 'preview' ) { BAYSOR_GENERATE_PREVIEW ( - ch_transcripts, + ch_transcripts_parquet, ch_config ) - log.info "Preview generated at ${params.outdir}" - exit 0 } /* @@ -187,10 +215,10 @@ workflow SPATIALXE { if ( params.mode == 'image' ) { // trigger the default image-based workflow if no method is specified - if ( !params.segmentation ) { + if ( !params.method ) { CELLPOSE_BAYSOR_IMPORT_SEGMENTATION ( - ch_image, + ch_morphology_image, ch_bundle_path, ch_transcripts_parquet, ch_config @@ -199,35 +227,35 @@ workflow SPATIALXE { } // check it the provided method is part of the methods list - if ( params.segmentation in params.image_seg_methods ) { + if ( params.method in params.image_seg_methods ) { // run xeniumranger resegment with morphology_ome.tif - if ( params.segmentation == 'xeniumranger' ) { + if ( params.method == 'xeniumranger' ) { XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF ( - ch_raw_bundle + ch_bundle_path ) ch_redefined_bundle = XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF.out.redefined_bundle } // run baysor run with morphology_ome.tif - if ( params.segmentation == 'baysor' ) { + if ( params.method == 'baysor' ) { - BAYSOR_RUN_MORPHOLOGY_OME_TIF ( - ch_raw_bundle, - ch_transcripts, - ch_image, + BAYSOR_RUN_PRIOR_SEGMENTATION_MASK ( + ch_bundle_path, + ch_transcripts_parquet, + ch_segmentation_mask, ch_config ) - ch_redefined_bundle = BAYSOR_RUN_MORPHOLOGY_OME_TIF.out.redefined_bundle + ch_redefined_bundle = BAYSOR_RUN_PRIOR_SEGMENTATION_MASK.out.redefined_bundle } // run cellpose on the morphology_ome.tif - if ( params.segmentation == 'cellpose' ) { + if ( params.method == 'cellpose' ) { CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF ( - ch_image, - ch_raw_bundle + ch_morphology_image, + ch_bundle_path ) ch_redefined_bundle = CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF.out.redefined_bundle } @@ -243,50 +271,49 @@ workflow SPATIALXE { if ( params.mode == 'coordinate' ) { // trigger the default transcripts-based workflow if no method is specified - if ( !params.segmentation ) { + if ( !params.method ) { PROSEG_PRESET_PROSEG2BAYSOR ( - ch_raw_bundle, - ch_transcripts + ch_bundle_path, + ch_transcripts_parquet ) ch_redefined_bundle = PROSEG_PRESET_PROSEG2BAYSOR.out.redefined_bundle } // check it the provided method is part of the methods list - if ( params.segmentation in params.transcript_seg_methods ) { + if ( params.method in params.transcript_seg_methods ) { - // run proseg with transcripts.csv.gz - if ( params.segmentation == 'proseg') { + // run proseg with transcripts.parquet + if ( params.method == 'proseg') { PROSEG_PRESET_PROSEG2BAYSOR ( - ch_raw_bundle, - ch_transcripts + ch_bundle_path, + ch_transcripts_parquet ) ch_redefined_bundle = PROSEG_PRESET_PROSEG2BAYSOR.out.redefined_bundle } - // run segger with transcripts.csv.gz - if ( params.segmentation == 'segger' ) { + // run segger with transcripts.parquet + if ( params.method == 'segger' ) { SEGGER_CREATE_TRAIN_PREDICT ( - ch_raw_bundle, + ch_bundle_path, ch_transcripts_parquet ) } - // run baysor with transcripts.csv.gz - if ( params.segmentation == 'baysor' ) { + // run baysor with transcripts.parquet + if ( params.method == 'baysor' ) { - BAYSOR_RUN_TRANSCRIPTS_CSV ( - ch_raw_bundle, - ch_transcripts, - ch_config, - [] + BAYSOR_RUN_TRANSCRIPTS_PARQUET ( + ch_bundle_path, + ch_transcripts_parquet, + ch_config ) - ch_redefined_bundle = BAYSOR_RUN_TRANSCRIPTS_CSV.out.redefined_bundle + ch_redefined_bundle = BAYSOR_RUN_TRANSCRIPTS_PARQUET.out.redefined_bundle } } @@ -301,7 +328,7 @@ workflow SPATIALXE { if ( params.xeniumranger_only ) { XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE ( - ch_raw_bundle + ch_bundle_path ) ch_redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE.out.redefined_bundle } @@ -312,11 +339,17 @@ workflow SPATIALXE { SPATIALXE - SPATIALDATA / METADATA LAYER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - // run spatialdata modules to generate sd objects - SPATIALDATA_WRITE_META_MERGE ( - ch_raw_bundle, - ch_redefined_bundle - ) + + // run spatialdata modules to generate sd objects in image or coordinate mode + if ( params.mode == 'image' || params.mode == 'coordinate' ) { + + SPATIALDATA_WRITE_META_MERGE ( + ch_bundle_path, + ch_redefined_bundle + ) + + } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -325,6 +358,46 @@ workflow SPATIALXE { */ + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALXE - SEGMENTATION-FREE LAYER + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + if ( params.mode == 'segfree' ) { + + // trigger the default segfree workflow if no method is specified + if ( !params.method ) { + + BAYSOR_GENERATE_SEGFREE ( + ch_transcripts_parquet, + ch_config + ) + } + + // check it the provided method is part of the methods list + if ( params.method in params.segfree_methods ) { + + // run baysor with transcripts.parquet + if ( params.method == 'baysor' ) { + + BAYSOR_GENERATE_SEGFREE ( + ch_transcripts_parquet, + ch_config + ) + } + + // run ficture with transcripts.parquet + if ( params.method == 'ficture' ) { + + FICTURE_PREPROCESS_MODEL ( + ch_transcripts_parquet, + ch_features + ) + } + } + } + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~