diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 93cf0ac5..f13c5ca2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -82,7 +82,22 @@ jobs: - name: Clean up Disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + - name: "Run pipeline in image mode with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} -stub --outdir ./results --mode image + + - name: "Run pipeline in coordinate mode with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} -stub --outdir ./results --mode coordinate + + - name: "Run pipeline in preview mode with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} -stub --outdir ./results --mode preview + + - name: "Run pipeline in segfree mode with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} -stub --outdir ./results --mode segfree diff --git a/README.md b/README.md index 0b53aba4..7cb1dad2 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,6 @@ -1. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) - ## Usage On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/spatialxe/results). @@ -61,7 +59,7 @@ nextflow run nf-core/spatialxe \ ## Run coordinate-based segmentation mode -`PROSEG -> BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC` +`PROSEG -> PROSEG2BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC` ```bash nextflow run nf-core/spatialxe \ @@ -71,6 +69,30 @@ nextflow run nf-core/spatialxe \ --mode coordinate ``` +## Run segfree mode + +`BAYSOR_SEGFREE` + +```bash +nextflow run nf-core/spatialxe \ + -profile \ + --input samplesheet.csv \ + --outdir \ + --mode segfree +``` + +## Run preview mode + +`BAYSOR_PREVIEW` + +```bash +nextflow run nf-core/spatialxe \ + -profile \ + --input samplesheet.csv \ + --outdir \ + --mode preview +``` + > [!WARNING] > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). diff --git a/conf/modules.config b/conf/modules.config index 9c1c0373..254a9d1a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,6 +27,13 @@ process { ] } + withName: XENIUMRANGER_RESEGMENT { + publishDir = [ + path: "${params.outdir}/xeniumranger/resegment", + mode: params.publish_dir_mode + ] + } + withName: XENIUMRANGER_IMPORT_SEGMENTATION { publishDir = [ path: "${params.outdir}/xeniumranger/import_segementation", diff --git a/conf/test.config b/conf/test.config index 77b495da..4a5e3c94 100644 --- a/conf/test.config +++ b/conf/test.config @@ -12,19 +12,14 @@ process { - withLabel: process_high { - resourceLimits = [ - cpus: 8, - memory: '8.GB', - time: '1.h' - ] - } + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '2.h' + ] - withName: CELLPOSE { - resourceLimits = [ - cpus: 4, - memory: '8.GB' - ] + withName: UNTAR { + ext.prefix = "test_bundle" } } diff --git a/docs/images/spatialxe-metromap.png b/docs/images/spatialxe-metromap.png index c48c05ea..1279d4f8 100644 Binary files a/docs/images/spatialxe-metromap.png and b/docs/images/spatialxe-metromap.png differ diff --git a/docs/images/spatialxe-metromap.svg b/docs/images/spatialxe-metromap.svg index c4f2ab69..c963ad4e 100644 --- a/docs/images/spatialxe-metromap.svg +++ b/docs/images/spatialxe-metromap.svg @@ -1,4 +1 @@ - - - -Xenium bundlegene panelrelabelimport-segmentationBundle redefinition Xenium Rangerseggerxeniumranger resegmentSegmentation refinementCoordinates/ maskFICTUREProsegSegmentation-free approachBOMSCellposeImage-based segmentation approachBaysortiffcsvJSONmorphologytranscriptsXenium bundle (redefined)spatialxe QCspatialxe metaSpatialData DomainhtmlQC reportsJSONMetadataCoordinates/ maskRO-crate outputSpatialData integration Approach: image-based (Cellpose, BOMS)Approach: image-based and segmentation-free (Baysor)Approach: segmentation-free (Proseg, FICTURE) Optional stepOutputsXenium onboard analysis (XOA)Inputs \ No newline at end of file +Xenium bundleXenium bundleXenium onboard analysis (XOA)Xenium onboard analy...gene panelgene panelrelabelrelabelQC reportsQC reportsMetadataMetadataSpoQCSpoQCspatialdataspatialdataMultiQCMultiQCXenium bundle (redefined)Xenium bundle (rede...import-segmentationimport-segmentati...resegmentresegmentbaysorbaysorcellposecellposebaysorbaysorcellposecellposeImage-based segmentationImage-based segmentationmorphology.ome.tifmorphology.ome.tifmorphology.ome.tifmorphology.ome.tifsegmentation polygons/csv/masksegmentation pol...tiftiftiftifjsonjsonhtmlhtmlhtmlhtmlprior segmentation maskprior segmentat...seggerseggerprosegprosegbaysorbaysortranscripts.parquettranscripts.parquetsegmentation polygons/csvsegmentation pol...parquetparq...Coordinate-based segmentationCoordinate-based segmentationspatialdataspatialdataspatialdataspatialdataOptional stepOptional stepOutputsOutputsInputsInputsDefault workflow for image modeDefault workflow for image modeDefault workflow for coordinate modeDefault workflow for coordinate modeText is not SVG - cannot display \ No newline at end of file diff --git a/docs/usage.md b/docs/usage.md index 2f2ae944..80c1f6a0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -40,10 +40,10 @@ This runs the default image mode: ```bash nextflow run nf-core/spatialxe \ + -profile --input ./samplesheet.csv \ --outdir ./results \ - --mode image \ - -profile + --mode image ``` #### Coordinate-based (transcripts-based) segmentation mode @@ -53,10 +53,10 @@ This runs the default coordinate mode: ```bash nextflow run nf-core/spatialxe \ + -profile --input ./samplesheet.csv \ --outdir ./results \ - --mode coordinate \ - -profile + --mode coordinate ``` ### Image-based Segmentation mode (--mode image): @@ -73,26 +73,26 @@ nextflow run nf-core/spatialxe \ #### Run Segmentation with the methods methods mentioned above : -eg: To run proseg segmentation use the `coordinate` mode and the `proseg` segmentation method +eg: To run proseg segmentation use the `coordinate` mode and the `proseg` segmentation method (--method) ```bash nextflow run nf-core/spatialxe \ + -profile --input ./samplesheet.csv \ --outdir ./results \ --mode coordinate \ - --segmentation proseg \ - -profile + --method proseg ``` -eg: To run cellpose segmentation use the `image` mode and the `cellpose` segmentation method +eg: To run cellpose segmentation use the `image` mode and the `cellpose` segmentation method (--method) ```bash nextflow run nf-core/spatialxe \ + -profile --input ./samplesheet.csv \ --outdir ./results \ --mode image \ - --segmentation cellpose \ - -profile + --method cellpose ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. diff --git a/modules.json b/modules.json index 920f97ee..d89c6fa9 100644 --- a/modules.json +++ b/modules.json @@ -11,11 +11,6 @@ "installed_by": ["modules"], "patch": "modules/nf-core/cellpose/cellpose.diff" }, - "gunzip": { - "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"] - }, "multiqc": { "branch": "master", "git_sha": "7b50cb7be890e4b28cffb82e438cc6a8d7805d3f", @@ -24,7 +19,8 @@ "untar": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/untar/untar.diff" }, "unzip": { "branch": "master", diff --git a/modules/local/baysor/create_dataset/templates/create_dataset.py b/modules/local/baysor/create_dataset/templates/create_dataset.py index 4e33f4e5..21db550b 100644 --- a/modules/local/baysor/create_dataset/templates/create_dataset.py +++ b/modules/local/baysor/create_dataset/templates/create_dataset.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import csv import random @@ -41,18 +41,16 @@ def generate_dataset( # randomize csv rows to write for row in reader: - if random.random() < sample_fraction: + if random.random() < float(sample_fraction): writer.writerow(row) - # print(f"Sampled data written to {sampled_transcripts}") - return None @staticmethod - def generate_version_yml(version: str) -> None: + def generate_version_yml() -> None: with open("versions.yml", "w") as yml: yml.write('"${task.process}":\\n') - yml.write(f'Baysor-Preview Create Dataset: {version}"\\n') + yml.write("Baysor-Preview Create Dataset: 0.7.1'\\n") return None @@ -64,17 +62,16 @@ def main() -> None: transcripts: str = "${transcripts}" sample_fraction: float = "${sample_fraction}" sampled_transcripts: str = "sampled_transcripts.csv" - version: str = "${VERSION}" # generate dataset - BaysorPreview.generate_dataset( + BaysorPreview.generate_dataset ( transcripts=transcripts, sampled_transcripts=sampled_transcripts, sample_fraction=sample_fraction ) # generate versions.yml - BaysorPreview.generate_version_yml(version=version) + BaysorPreview.generate_version_yml() return None diff --git a/modules/local/baysor/run/main.nf b/modules/local/baysor/run/main.nf index 619880a2..455308e2 100644 --- a/modules/local/baysor/run/main.nf +++ b/modules/local/baysor/run/main.nf @@ -58,10 +58,12 @@ process BAYSOR_RUN { """ touch segmentation.csv touch segmentation_polygons_2d.json + touch segmentation_polygons_3d.json touch segmentation_log.log touch segmentation_counts.loom touch segmentation_cell_stats.csv touch segmentation_params.dump.toml + touch segmentation_run.html cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/spatialconverter/parquet_to_csv/main.nf b/modules/local/spatialconverter/parquet_to_csv/main.nf index 8f59157e..b585aee8 100644 --- a/modules/local/spatialconverter/parquet_to_csv/main.nf +++ b/modules/local/spatialconverter/parquet_to_csv/main.nf @@ -4,26 +4,27 @@ process PARQUET_TO_CSV { container "ghcr.io/scverse/spatialdata:spatialdata0.3.0_spatialdata-io0.1.7_spatialdata-plot0.2.9" - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "PARQUET_TO_CSV module does not support Conda. Please use Docker / Singularity / Podman instead." - } - input: tuple val(meta), path(transcripts) + val(extension) output: - tuple val(meta), path("*.csv") , emit: transcripts_csv - path("versions.yml") , emit: versions + tuple val(meta), path("*.csv*"), emit: transcripts_csv + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when script: + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "PARQUET_TO_CSV module does not support Conda. Please use Docker / Singularity / Podman instead." + } + template 'parquet_to_csv.py' stub: """ - touch ${transcripts} + touch ${transcripts}.csv cat <<-END_VERSIONS > versions.yml "${task.process}": spatialconverter: "${task.version}" diff --git a/modules/local/spatialconverter/parquet_to_csv/templates/parquet_to_csv.py b/modules/local/spatialconverter/parquet_to_csv/templates/parquet_to_csv.py index 3e5e91e8..3d4acbb1 100755 --- a/modules/local/spatialconverter/parquet_to_csv/templates/parquet_to_csv.py +++ b/modules/local/spatialconverter/parquet_to_csv/templates/parquet_to_csv.py @@ -1,17 +1,38 @@ #!/usr/bin/env python import pandas as pd +from pathlib import Path + + +def convert_parquet ( + transcripts: Path, + extension: str = '.csv' + ) -> None: + + df = pd.read_parquet(transcripts, engine = 'pyarrow') + + if extension == ".gz": + output = transcripts.replace(".parquet", ".csv.gz") + df.to_csv(f"{output}", compression='gzip', index=False) + else: + output = transcripts.replace(".parquet", ".csv") + df.to_csv(f"{output}", index=False) + + return None + if __name__ == '__main__': - print("[START]") - df = pd.read_parquet("${transcripts}") - output="${transcripts}".replace(".parquet",".csv") - df.to_csv(f"{output}", index=False) + + transcripts: str = "${transcripts}" + extension: str = "${extension}" + + # generate transcripts.csv(.gz) + convert_parquet ( + transcripts=transcripts, + extension=extension + ) #Output version information with open("versions.yml", "w") as f: f.write('"${task.process}":\\n') f.write(f'spatialconverter: "v0.0.1"\\n') - - print("[FINISH]") - diff --git a/modules/local/spatialdata/merge/main.nf b/modules/local/spatialdata/merge/main.nf index 528e540d..dbcc00af 100644 --- a/modules/local/spatialdata/merge/main.nf +++ b/modules/local/spatialdata/merge/main.nf @@ -6,11 +6,11 @@ process SPATIALDATA_MERGE { input: tuple val(meta), path(ref_bundle, stageAs: "*") - tuple val(meta), path(add_bundle, stageAs: "*") + path(add_bundle, stageAs: "*") output: - tuple val(meta), path("spatialdata_spatialxe") , emit: spatialxe_bundle - path "versions.yml" , emit: versions + tuple val(meta), path("spatialdata_spatialxe"), emit: spatialxe_bundle + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when @@ -22,6 +22,7 @@ process SPATIALDATA_MERGE { } def args = task.ext.args ?: '' + template 'merge.py' stub: diff --git a/modules/local/spatialdata/meta/main.nf b/modules/local/spatialdata/meta/main.nf index 4f302df4..260372aa 100644 --- a/modules/local/spatialdata/meta/main.nf +++ b/modules/local/spatialdata/meta/main.nf @@ -4,30 +4,32 @@ process SPATIALDATA_META { container "heylf/spatialdata:0.2.6" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "SPATIALDATA_WRITE module does not support Conda. Please use Docker / Singularity / Podman instead." - } - input: tuple val(meta), path(spatialdata_bundle, stageAs: "*") - tuple val(meta), path(xenium_bundle, stageAs: "*") + path(xenium_bundle, stageAs: "*") output: - tuple val(meta), path("spatialdata_spatialxe_final") , emit: spatialxe_bundle - path "versions.yml" , emit: versions + tuple val(meta), path("spatialdata_spatialxe_final"), emit: spatialxe_bundle + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + exit 1, "SPATIALDATA_META module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + template 'meta.py' stub: + """ - mkdir -p "spatialdata_spatialxe/" - touch spatialdata_spatialxe/fake_file.txt + mkdir -p "spatialdata_spatialxe_final/" + touch "spatialdata_spatialxe_final/fake_file.txt" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/spatialdata/write/main.nf b/modules/local/spatialdata/write/main.nf index 19dd9abc..53d8cbb6 100644 --- a/modules/local/spatialdata/write/main.nf +++ b/modules/local/spatialdata/write/main.nf @@ -9,8 +9,8 @@ process SPATIALDATA_WRITE { val(outputfolder) output: - tuple val(meta), path("${outputfolder}") , emit: spatialdata - path "versions.yml" , emit: versions + tuple val(meta), path("${outputfolder}"), emit: spatialdata + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when @@ -26,9 +26,11 @@ process SPATIALDATA_WRITE { template 'write.py' stub: + + def outdir = "${outputfolder}" """ - mkdir -p "spatialdata/" - touch spatialdata/fake_file.txt + mkdir -p "${outdir}/" + touch "${outdir}/fake_file.txt" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml deleted file mode 100644 index 9b926b1f..00000000 --- a/modules/nf-core/gunzip/environment.yml +++ /dev/null @@ -1,12 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -channels: - - conda-forge - - bioconda -dependencies: - - conda-forge::coreutils=9.5 - - conda-forge::grep=3.11 - - conda-forge::gzip=1.13 - - conda-forge::lbzip2=2.5 - - conda-forge::sed=4.8 - - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf deleted file mode 100644 index 3ffc8e92..00000000 --- a/modules/nf-core/gunzip/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process GUNZIP { - tag "${archive}" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/52ccce28d2ab928ab862e25aae26314d69c8e38bd41ca9431c67ef05221348aa/data' - : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" - - input: - tuple val(meta), path(archive) - - output: - tuple val(meta), path("${gunzip}"), emit: gunzip - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def extension = (archive.toString() - '.gz').tokenize('.')[-1] - def name = archive.toString() - '.gz' - ".${extension}" - def prefix = task.ext.prefix ?: name - gunzip = prefix + ".${extension}" - """ - # Not calling gunzip itself because it creates files - # with the original group ownership rather than the - # default one for that user / the work directory - gzip \\ - -cd \\ - ${args} \\ - ${archive} \\ - > ${gunzip} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def extension = (archive.toString() - '.gz').tokenize('.')[-1] - def name = archive.toString() - '.gz' - ".${extension}" - def prefix = task.ext.prefix ?: name - gunzip = prefix + ".${extension}" - """ - touch ${gunzip} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml deleted file mode 100644 index 69d31024..00000000 --- a/modules/nf-core/gunzip/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: gunzip -description: Compresses and decompresses files. -keywords: - - gunzip - - compression - - decompression -tools: - - gunzip: - description: | - gzip is a file format and a software application used for file compression and decompression. - documentation: https://www.gnu.org/software/gzip/manual/gzip.html - licence: ["GPL-3.0-or-later"] - identifier: "" -input: - - - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" -output: - - gunzip: - - meta: - type: file - description: Compressed/uncompressed file - pattern: "*.*" - - ${gunzip}: - type: file - description: Compressed/uncompressed file - pattern: "*.*" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@jfy133" -maintainers: - - "@joseespinosa" - - "@drpatelh" - - "@jfy133" - - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test deleted file mode 100644 index 776211ad..00000000 --- a/modules/nf-core/gunzip/tests/main.nf.test +++ /dev/null @@ -1,121 +0,0 @@ -nextflow_process { - - name "Test Process GUNZIP" - script "../main.nf" - process "GUNZIP" - tag "gunzip" - tag "modules_nfcore" - tag "modules" - - test("Should run without failures") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("Should run without failures - prefix") { - - config './nextflow.config' - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("Should run without failures - stub") { - - options '-stub' - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("Should run without failures - prefix - stub") { - - options '-stub' - config './nextflow.config' - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap deleted file mode 100644 index a0f0e67e..00000000 --- a/modules/nf-core/gunzip/tests/main.nf.test.snap +++ /dev/null @@ -1,134 +0,0 @@ -{ - "Should run without failures - prefix - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ], - "gunzip": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-13T11:48:22.080222697" - }, - "Should run without failures - stub": { - "content": [ - { - "0": [ - [ - [ - - ], - "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ], - "gunzip": [ - [ - [ - - ], - "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-13T11:48:14.593020264" - }, - "Should run without failures": { - "content": [ - { - "0": [ - [ - [ - - ], - "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ], - "gunzip": [ - [ - [ - - ], - "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-13T11:48:01.295397925" - }, - "Should run without failures - prefix": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "1": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ], - "gunzip": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "versions": [ - "versions.yml:md5,d327e4a19a6d5c5e974136cef8999d8c" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-13T11:48:07.414271387" - } -} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config deleted file mode 100644 index dec77642..00000000 --- a/modules/nf-core/gunzip/tests/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: GUNZIP { - ext.prefix = { "${meta.id}.xyz" } - } -} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml deleted file mode 100644 index fd3f6915..00000000 --- a/modules/nf-core/gunzip/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -gunzip: - - modules/nf-core/gunzip/** diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index e712ebe6..550d8577 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -53,6 +53,10 @@ process UNTAR { prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) """ mkdir ${prefix} + touch ${prefix}/morphology.ome.tif + touch ${prefix}/transcripts.parquet + touch ${prefix}/gene_panel.json + ## Dry-run untaring the archive to get the files and place all in prefix if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then for i in `tar -tf ${archive}`; diff --git a/modules/nf-core/untar/untar.diff b/modules/nf-core/untar/untar.diff new file mode 100644 index 00000000..7076b9d5 --- /dev/null +++ b/modules/nf-core/untar/untar.diff @@ -0,0 +1,21 @@ +Changes in component 'nf-core/untar' +'modules/nf-core/untar/environment.yml' is unchanged +Changes in 'untar/main.nf': +--- modules/nf-core/untar/main.nf ++++ modules/nf-core/untar/main.nf +@@ -53,6 +53,10 @@ + prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir ${prefix} ++ touch ${prefix}/morphology.ome.tif ++ touch ${prefix}/transcripts.parquet ++ touch ${prefix}/gene_panel.json ++ + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + +'modules/nf-core/untar/meta.yml' is unchanged +'modules/nf-core/untar/tests/main.nf.test' is unchanged +'modules/nf-core/untar/tests/main.nf.test.snap' is unchanged +************************************************************ diff --git a/nextflow.config b/nextflow.config index 02db8443..173c9b74 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,16 +12,17 @@ params { // Input options input = null // path to the samplesheet.csv containing meta,bundle,image outdir = null // path to generate pipeline results at - mode = null // run the pipeline either in `image` or `coordinate` modes - segmentation = null // name of the segmentation method to run + mode = null // run the pipeline either in `image` or `coordinate` or `segfree` or `preview` modes + method = null // name of the method to run for image or coordinate or segfree approaches gene_panel = null // path to gene panel json file if `relabel_genes` is true qupath_polygons = null // polygon segmentation results in GeoJSON format alignment_csv = null // image alignment file format a 3x3 transformation matrix, where the last row is [0,0,1] + cellpose_model = null // custom cellpose model to use for running or starting training + segmentation_mask = null // prior segmentation mask // execution specific sharpen_tiff = false // wether to sharpen the morphology-focus tiff nucleus_segmentation_only = false // to only run nucleus segmentation while running XR_IMP-SEG - generate_preview = false // generate preview with baysor and exit // Xeniumranger specific xeniumranger_only = false // to generate redefined bundle with just changing the xr specific params @@ -36,9 +37,6 @@ params { segger_accelerator = 'cpu' // either 'cuda' or 'cpu' segger_knn_method = 'kd_tree' // 'cuda' - ensure your system has CUDA installed and configured properly - // Cellpose specific - cellpose_model = 'nuclei' // model to use for running or starting training default - cyto3 - // Proseg specific format = 'xenium' // preset value set as `xenium` @@ -52,9 +50,6 @@ params { features = null // Baysor specific - baysor_run_image = true // run baysor with image/seg-mask - baysor_run_transcripts = false // run baysor with transcripts.csv.gz - baysor_preview = false // generate preview with baysor preview cmd // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index c1bd1276..aa9088be 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -31,12 +31,12 @@ }, "mode": { "type": "string", - "description": "Mode in which the pipeline is to be run. Either image-based or coordinate-based approach.", - "enum": ["image", "coordinate"] + "description": "Mode in which the pipeline is to be run. Either image-based segmentation, coordinate-based segmentation, segmentation-free analysis or data preview.", + "enum": ["image", "coordinate", "segfree", "preview"] }, - "segmentation": { + "method": { "type": "string", - "enum": ["cellpose", "xeniumranger", "baysor", "proseg", "segger"], + "enum": ["cellpose", "xeniumranger", "baysor", "proseg", "segger", "ficture"], "description": "Segmentation method to run." }, "gene_panel": { @@ -53,6 +53,16 @@ "description": "Image alignment file containing similarity transform matrix. (e.g., the _imagealignment.csv file exported from Xenium Explorer)", "format": "file-path" }, + "cellpose_model": { + "type": "string", + "description": "Model to use for running or starting training.", + "format": "file-path" + }, + "segmentation_mask": { + "type": "string", + "description": "Prior segmentation mask from other segmentation methods.", + "format": "file-path" + }, "email": { "type": "string", "description": "Email address for completion summary.", @@ -73,10 +83,6 @@ "description": "Options for the segmentation layer of the spatialxe pipeline", "default": "", "properties": { - "generate_preview": { - "type": "boolean", - "description": "Whether to generate a preview of the dataset with the transcripts.csv.gz." - }, "segmentation_refinement": { "type": "boolean", "description": "Whether to run refinement on the image-based segmentation methods. Runs coordinate-based methods after the initial image-based segmentation run." @@ -129,12 +135,6 @@ "enum": ["kd_tree", "cuda"], "description": "Method for KNN computation. (e.g., cuda for GPU-based computation)" }, - "cellpose_model": { - "type": "string", - "default": "nuclei", - "enum": ["nuclei", "cyto3"], - "description": "Model to use for running or starting training. (eg. cyto3 or nuclei)" - }, "format": { "type": "string", "default": "xenium", @@ -171,19 +171,6 @@ "features": { "type": "string", "description": "List of features to be passed to the ficture method. (eg: TP53,OCIAD1,BCAS3,SOX)" - }, - "baysor_run_image": { - "type": "boolean", - "default": true, - "description": "Whether to run bayor with image/segmentation-mask." - }, - "baysor_run_transcripts": { - "type": "boolean", - "description": "Whether to run baysor with transcripts.csv.gz." - }, - "baysor_preview": { - "type": "boolean", - "description": "Whether to create a preview of the dataset with transcripts.csv.gz." } } }, diff --git a/subworkflows/local/baysor_generate_preview/main.nf b/subworkflows/local/baysor_generate_preview/main.nf index 62e41bff..749bce5b 100644 --- a/subworkflows/local/baysor_generate_preview/main.nf +++ b/subworkflows/local/baysor_generate_preview/main.nf @@ -2,31 +2,29 @@ // Run baysor create_dataset & preview // -include { GUNZIP } from '../../../modules/nf-core/gunzip/main' include { BAYSOR_PREVIEW } from '../../../modules/local/baysor/preview/main' include { BAYSOR_CREATE_DATASET } from '../../../modules/local/baysor/create_dataset/main' +include { PARQUET_TO_CSV } from '../../../modules/local/spatialconverter/parquet_to_csv/main' workflow BAYSOR_GENERATE_PREVIEW { take: - ch_transcripts // channel: [ val(meta), ["transcripts.csv.gz"] ] - ch_config // channel: ["path-to-xenium.toml"] + ch_transcripts_parquet // channel: [ val(meta), ["path-to-transcripts.parquet"] ] + ch_config // channel: ["path-to-xenium.toml"] main: - ch_versions = Channel.empty() - ch_preview_html = Channel.empty() + ch_versions = Channel.empty() + ch_preview_html = Channel.empty() - // unzip transcripts.csv.gz - GUNZIP ( ch_transcripts ) - ch_versions = ch_versions.mix ( GUNZIP.out.versions ) - - ch_unzipped_transcripts = GUNZIP.out.gunzip + // run parquet to csv + PARQUET_TO_CSV ( ch_transcripts_parquet, ".csv" ) + ch_versions = ch_versions.mix ( PARQUET_TO_CSV.out.versions ) // generate randomised sample data - BAYSOR_CREATE_DATASET ( ch_unzipped_transcripts, "0.3" ) + BAYSOR_CREATE_DATASET ( PARQUET_TO_CSV.out.transcripts_csv, 0.3 ) ch_versions = ch_versions.mix ( BAYSOR_CREATE_DATASET.out.versions ) // run baysor preview if param - generate_preview is true diff --git a/subworkflows/local/baysor_generate_segfree/main.nf b/subworkflows/local/baysor_generate_segfree/main.nf index c194341b..1e4b5f59 100644 --- a/subworkflows/local/baysor_generate_segfree/main.nf +++ b/subworkflows/local/baysor_generate_segfree/main.nf @@ -2,35 +2,30 @@ // Run baysor segfree // -include { GUNZIP } from '../../../modules/nf-core/gunzip/main' -include { BAYSOR_SEGFREE } from '../../../modules/local/baysor/segfree/main' +include { BAYSOR_SEGFREE } from '../../../modules/local/baysor/segfree/main' workflow BAYSOR_GENERATE_SEGFREE { take: - ch_transcripts // channel: [ val(meta), ["transcripts.csv.gz"] ] + ch_transcripts_parquet // channel: [ val(meta), ["transcripts.parquet"] ] + ch_config // channel: [ ["path-to-xenium.toml"] ] main: ch_versions = Channel.empty() - ch_ncvs = Channel.empty() - - // unzip transcripts.csv.gz - GUNZIP ( ch_transcripts ) - ch_versions = ch_versions.mix ( GUNZIP.out.versions ) - // run baysor segfree BAYSOR_SEGFREE ( - GUNZIP.out.gunzip + ch_transcripts_parquet, + ch_config ) - ch_versions = ch_versions.mix( BAYSOR_SEGFREE.out.versions ) + ch_versions = ch_versions.mix ( BAYSOR_SEGFREE.out.versions ) emit: - ncvs = ch_ncvs + ncvs = BAYSOR_SEGFREE.out.ncvs // channel: [ val(meta), ["ncvs.loom"] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/baysor_run_morphology_ome_tif/main.nf b/subworkflows/local/baysor_run_morphology_ome_tif/main.nf deleted file mode 100644 index 86a42f1e..00000000 --- a/subworkflows/local/baysor_run_morphology_ome_tif/main.nf +++ /dev/null @@ -1,99 +0,0 @@ -// -// Run baysor run & import-segmentation -// - -include { GUNZIP } from '../../../modules/nf-core/gunzip/main' -include { RESOLIFT } from '../../../modules/local/resolift/main' -include { BAYSOR_RUN as BAYSOR_RUN_IMAGE } from '../../../modules/local/baysor/run/main' -include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' - - -workflow BAYSOR_RUN_MORPHOLOGY_OME_TIF { - - take: - - ch_bundle // channel: [ val(meta), ["xenium-bundle"] ] - ch_transcripts // channel: [ val(meta), ["transcripts.csv.gz"] ] - ch_image // channel: [ val(meta), ["morphology_focus.tiff"] ] - ch_config // channel: ["path-to-xenium.toml"] - - main: - - ch_versions = Channel.empty() - - ch_enhanced_tiff = Channel.empty() - ch_segmentation = Channel.empty() - ch_polygons2d = Channel.empty() - ch_htmls = Channel.empty() - - ch_redefined_bundle = Channel.empty() - ch_unzipped_transcripts = Channel.empty() - - - // unzip transcripts.csv.gz - GUNZIP ( ch_transcripts ) - ch_versions = ch_versions.mix ( GUNZIP.out.versions ) - - ch_unzipped_transcripts = GUNZIP.out.gunzip - - // sharpen morphology tiff if param `sharpen_tiff` is true - ch_just_image = Channel.empty() - if ( params.sharpen_tiff ) { - - RESOLIFT ( ch_image ) - ch_versions = ch_versions.mix( RESOLIFT.out.versions ) - - ch_enhanced_tiff = RESOLIFT.out.enhanced_tiff - ch_just_image = ch_enhanced_tiff.map { - _meta, image -> return [ image ] - } - - } else { - - // use the original morphology tiff from the bundle - ch_just_image = ch_image.map { - _meta, image -> return [ image ] - } - } - - // run baysor with morphology.tiff - BAYSOR_RUN_IMAGE ( - ch_unzipped_transcripts, - ch_just_image, - ch_config, - 30 - ) - ch_versions = ch_versions.mix( BAYSOR_RUN_IMAGE.out.versions ) - - ch_segmentation = BAYSOR_RUN_IMAGE.out.segmentation - ch_jus_segmentation = ch_segmentation.map { - _meta, segmentation -> return [ segmentation ] - } - ch_polygons2d = BAYSOR_RUN_IMAGE.out.polygons2d - ch_htmls = BAYSOR_RUN_IMAGE.out.htmls - // run xeniumranger import-segmentation - XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, - [], - [], - [], - ch_jus_segmentation, - ch_polygons2d, - "pixel" - ) - ch_versions = ch_versions.mix( XENIUMRANGER_IMPORT_SEGMENTATION.out.versions ) - - ch_redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.bundle - - emit: - - enhanced_tiff = ch_enhanced_tiff // channel: [ val(meta), ["morphology.tiff"] ] - - segmentation = ch_segmentation // channel: [ val(meta), ["segmentation.csv"] ] - polygons2d = ch_polygons2d // channel: [ ["segmentation_polygons_2d.json"] ] - htmls = ch_htmls // channel: [ ["*.html"] ] - - redefined_bundle = ch_redefined_bundle // channel: [ val(meta), "redefined-xenium-bundle" ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/baysor_run_prior_segmentation_mask/main.nf b/subworkflows/local/baysor_run_prior_segmentation_mask/main.nf new file mode 100644 index 00000000..40a4924e --- /dev/null +++ b/subworkflows/local/baysor_run_prior_segmentation_mask/main.nf @@ -0,0 +1,68 @@ +// +// Run baysor run & import-segmentation +// + +include { BAYSOR_RUN as BAYSOR_RUN_IMAGE } from '../../../modules/local/baysor/run/main' +include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' + + +workflow BAYSOR_RUN_PRIOR_SEGMENTATION_MASK { + + take: + + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_parquet // channel: [ val(meta), ["path-to-transcripts.parquet"] ] + ch_segmentation_mask // channel: [ ["path-to-prior-segmentation-mask"] ] + ch_config // channel: [ "path-to-xenium.toml" ] + + main: + + ch_versions = Channel.empty() + + ch_segmentation = Channel.empty() + ch_polygons2d = Channel.empty() + ch_htmls = Channel.empty() + + ch_redefined_bundle = Channel.empty() + + + // run baysor with morphology.tiff + BAYSOR_RUN_IMAGE ( + ch_transcripts_parquet, + ch_segmentation_mask, + ch_config, + 30 + ) + ch_versions = ch_versions.mix( BAYSOR_RUN_IMAGE.out.versions ) + + ch_segmentation = BAYSOR_RUN_IMAGE.out.segmentation + ch_just_segmentation = ch_segmentation.map { + _meta, segmentation -> return [ segmentation ] + } + ch_polygons2d = BAYSOR_RUN_IMAGE.out.polygons2d + ch_htmls = BAYSOR_RUN_IMAGE.out.htmls + + // run xeniumranger import-segmentation + XENIUMRANGER_IMPORT_SEGMENTATION ( + ch_bundle_path, + [], + [], + [], + ch_just_segmentation, + ch_polygons2d, + "microns" + ) + ch_versions = ch_versions.mix( XENIUMRANGER_IMPORT_SEGMENTATION.out.versions ) + + ch_redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.bundle + + emit: + + segmentation = ch_segmentation // channel: [ val(meta), ["segmentation.csv"] ] + polygons2d = ch_polygons2d // channel: [ ["segmentation_polygons_2d.json"] ] + htmls = ch_htmls // channel: [ ["*.html"] ] + + redefined_bundle = ch_redefined_bundle // channel: [ val(meta), "redefined-xenium-bundle" ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/baysor_run_transcripts_csv/main.nf b/subworkflows/local/baysor_run_transcripts_parquet/main.nf similarity index 73% rename from subworkflows/local/baysor_run_transcripts_csv/main.nf rename to subworkflows/local/baysor_run_transcripts_parquet/main.nf index 386b55f7..7077eede 100644 --- a/subworkflows/local/baysor_run_transcripts_csv/main.nf +++ b/subworkflows/local/baysor_run_transcripts_parquet/main.nf @@ -2,19 +2,17 @@ // Run baysor run and import-segmentation // -include { GUNZIP } from '../../../modules/nf-core/gunzip/main' include { BAYSOR_RUN as BAYSOR_RUN_TRANSCRIPTS } from '../../../modules/local/baysor/run/main' include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' -workflow BAYSOR_RUN_TRANSCRIPTS_CSV { +workflow BAYSOR_RUN_TRANSCRIPTS_PARQUET { take: - ch_bundle // channel: [ val(meta), ["xenium-bundle"] ] - ch_transcripts // channel: [ val(meta), ["transcripts.csv.gz"] ] - ch_image // channel: [ val(meta), ["morphology_focus.tiff"] ] - ch_config // channel: ["path-to-xenium.toml"] + ch_bundle_path // channel: [ val(meta), ["xenium-bundle"] ] + ch_transcripts_parquet // channel: [ val(meta), ["transcripts.csv.parquet"] ] + ch_config // channel: ["path-to-xenium.toml"] main: @@ -25,18 +23,10 @@ workflow BAYSOR_RUN_TRANSCRIPTS_CSV { ch_htmls = Channel.empty() ch_redefined_bundle = Channel.empty() - ch_unzipped_transcripts = Channel.empty() - - - // unzip transcripts.csv.gz - GUNZIP ( ch_transcripts ) - ch_versions = ch_versions.mix ( GUNZIP.out.versions ) - - ch_unzipped_transcripts = GUNZIP.out.gunzip // run baysor with transcripts.csv BAYSOR_RUN_TRANSCRIPTS ( - ch_unzipped_transcripts, + ch_transcripts_parquet, [], ch_config, 30 @@ -52,7 +42,7 @@ workflow BAYSOR_RUN_TRANSCRIPTS_CSV { // run xeniumranger import-segmentation XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, + ch_bundle_path, [], [], [], diff --git a/subworkflows/local/cellpose_baysor_import_segmentation/main.nf b/subworkflows/local/cellpose_baysor_import_segmentation/main.nf index 7abf8454..22f556b9 100644 --- a/subworkflows/local/cellpose_baysor_import_segmentation/main.nf +++ b/subworkflows/local/cellpose_baysor_import_segmentation/main.nf @@ -10,17 +10,17 @@ workflow CELLPOSE_BAYSOR_IMPORT_SEGMENTATION { take: - ch_image // channel: [ val(meta), ["path-to-morphology.ome.tif"] ] - ch_bundle // channel: [ val(meta), ["path-to-xenium-bundle"] ] - ch_transcripts // channel: [ val(meta), ["path-to-transcripts.parquet"] ] - ch_config // channel: ["path-to-xenium.toml"] + ch_morphology_image // channel: [ val(meta), ["path-to-morphology.ome.tif"] ] + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_parquet // channel: [ val(meta), ["path-to-transcripts.parquet"] ] + ch_config // channel: ["path-to-xenium.toml"] main: ch_versions = Channel.empty() // run cellpose to generate segmentation mask - CELLPOSE ( ch_image, []) + CELLPOSE ( ch_morphology_image, []) ch_versions = ch_versions.mix ( CELLPOSE.out.versions ) @@ -28,7 +28,7 @@ workflow CELLPOSE_BAYSOR_IMPORT_SEGMENTATION { ch_mask = CELLPOSE.out.mask.map { _meta, seg_mask -> [ seg_mask ] } - BAYSOR_RUN ( ch_transcripts, ch_mask, ch_config, 30 ) + BAYSOR_RUN ( ch_transcripts_parquet, ch_mask, ch_config, 30 ) ch_versions = ch_versions.mix ( BAYSOR_RUN.out.versions ) @@ -39,7 +39,7 @@ workflow CELLPOSE_BAYSOR_IMPORT_SEGMENTATION { ch_polygons = BAYSOR_RUN.out.polygons2d XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, + ch_bundle_path, [], [], [], diff --git a/subworkflows/local/cellpose_resolift_morphology_ome_tif/main.nf b/subworkflows/local/cellpose_resolift_morphology_ome_tif/main.nf index 1b20888f..dd889264 100644 --- a/subworkflows/local/cellpose_resolift_morphology_ome_tif/main.nf +++ b/subworkflows/local/cellpose_resolift_morphology_ome_tif/main.nf @@ -10,27 +10,29 @@ workflow CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF { take: - ch_image // channel: [ val(meta), ["morphology.ome.tiff"] ] - ch_bundle // channel: [ val(meta), ["xenium-bundle"] ] + ch_morphology_image // channel: [ val(meta), ["path-to-morphology.ome.tiff"] ] + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] main: ch_versions = Channel.empty() + cellpose_model = params.cellpose_model ? (Channel.fromPath(params.cellpose_model, checkIfExists: true)) : [] + // sharpen morphology tiff if param - sharpen_tiff is true if ( params.sharpen_tiff ) { - RESOLIFT ( ch_image ) + RESOLIFT ( ch_morphology_image ) ch_versions = ch_versions.mix( RESOLIFT.out.versions ) // run cellpose on the enhanced tiff - CELLPOSE ( RESOLIFT.out.enhanced_tiff, params.cellpose_model ) + CELLPOSE ( RESOLIFT.out.enhanced_tiff, cellpose_model ) ch_versions = ch_versions.mix( CELLPOSE.out.versions ) } else { // run cellpose on the original tiff - CELLPOSE ( ch_image, params.cellpose_model ) + CELLPOSE ( ch_morphology_image, cellpose_model ) ch_versions = ch_versions.mix( CELLPOSE.out.versions ) } @@ -41,7 +43,7 @@ workflow CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF { cellpose_mask = CELLPOSE.out.mask.map { _meta, mask -> return [ mask ] } - cellpose_flows = CELLPOSE.out.flows.map { + _cellpose_flows = CELLPOSE.out.flows.map { _meta, flows -> return [ flows ] } @@ -49,7 +51,7 @@ workflow CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF { if ( params.nucleus_segmentation_only ) { XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, + ch_bundle_path, [], cellpose_mask, [], @@ -58,10 +60,11 @@ workflow CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF { "" ) ch_versions = ch_versions.mix( XENIUMRANGER_IMPORT_SEGMENTATION.out.versions ) + } else { XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, + ch_bundle_path, [], cellpose_mask, cellpose_cells, diff --git a/subworkflows/local/ficture_preprocess_model/main.nf b/subworkflows/local/ficture_preprocess_model/main.nf index 5790eef6..bf8e259a 100644 --- a/subworkflows/local/ficture_preprocess_model/main.nf +++ b/subworkflows/local/ficture_preprocess_model/main.nf @@ -2,24 +2,31 @@ // Run ficture preprocess and model modules // -include { FICTURE_PREPROCESS } from '../../../modules/local/ficture/preprocess/main' -include { FICTURE } from '../../../modules/local/ficture/model/main' -// include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' +include { FICTURE_PREPROCESS } from '../../../modules/local/ficture/preprocess/main' +include { FICTURE } from '../../../modules/local/ficture/model/main' +include { PARQUET_TO_CSV } from '../../../modules/local/spatialconverter/parquet_to_csv/main' + workflow FICTURE_PREPROCESS_MODEL { take: - ch_transcripts // channel: [ val(meta), [ "transcripts.csv.gz" ] ] - ch_features // channel: [ "features" ] + ch_transcripts_parquet // channel: [ val(meta), [ "transcripts.parquet" ] ] + ch_features // channel: [ ["features"] ] main: ch_versions = Channel.empty() + // convert parquet to csv + PARQUET_TO_CSV ( ch_transcripts_parquet, ".csv" ) + ch_versions = ch_versions.mix ( PARQUET_TO_CSV.out.versions ) + // run ficture preprocessing + ch_transcripts = PARQUET_TO_CSV.out.transcripts_csv + FICTURE_PREPROCESS ( ch_transcripts, ch_features ) - ch_versions = ch_versions.mix( FICTURE_PREPROCESS.out.versions ) + ch_versions = ch_versions.mix ( FICTURE_PREPROCESS.out.versions ) // run the ficture wrapper pipeline ch_features_clean = Channel.empty() @@ -33,17 +40,6 @@ workflow FICTURE_PREPROCESS_MODEL { ) ch_versions = ch_versions.mix( FICTURE.out.versions ) - // run xeniumranger import-segmentation - // XENIUMRANGER_IMPORT_SEGMENTATION ( - // ch_bundle, - // [], - // [], - // [], - // ch_segmentation, - // BAYSOR_RUN.out.polygons2d, - // "microns" - // ) - emit: transcripts = FICTURE_PREPROCESS.out.transcripts // channel: [ val(meta), [ "*processed_transcripts.tsv.gz" ] ] diff --git a/subworkflows/local/proseg_preset_proseg2baysor/main.nf b/subworkflows/local/proseg_preset_proseg2baysor/main.nf index 6c3a2441..b9bda8c4 100644 --- a/subworkflows/local/proseg_preset_proseg2baysor/main.nf +++ b/subworkflows/local/proseg_preset_proseg2baysor/main.nf @@ -4,24 +4,28 @@ include { PROSEG } from '../../../modules/local/proseg/preset/main' include { PROSEG2BAYSOR } from '../../../modules/local/proseg/proseg2baysor/main' +include { PARQUET_TO_CSV } from '../../../modules/local/spatialconverter/parquet_to_csv/main' include { XENIUMRANGER_IMPORT_SEGMENTATION } from '../../../modules/nf-core/xeniumranger/import-segmentation/main' workflow PROSEG_PRESET_PROSEG2BAYSOR { take: - ch_bundle // channel: [ val(meta), ["xenium-bundle"] ] - ch_transcripts // channel: [ val(meta), [ "transcripts.csv.gz" ] ] + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] + ch_transcripts_parquet // channel: [ val(meta), [ "transcripts.parquet" ] ] main: ch_versions = Channel.empty() + // run parquet-to-csv + PARQUET_TO_CSV ( ch_transcripts_parquet, ".gz" ) + ch_versions = ch_versions.mix( PARQUET_TO_CSV.out.versions ) + // run proseg with the xenium format - PROSEG ( ch_transcripts ) + PROSEG ( PARQUET_TO_CSV.out.transcripts_csv ) ch_versions = ch_versions.mix( PROSEG.out.versions ) - // run proseg-to-baysor on the data generated with the proseg run PROSEG2BAYSOR ( PROSEG.out.cell_polygons_2d, PROSEG.out.transcript_metadata ) ch_versions = ch_versions.mix( PROSEG2BAYSOR.out.versions ) @@ -33,7 +37,7 @@ workflow PROSEG_PRESET_PROSEG2BAYSOR { // run xeniumranger import-segmentation XENIUMRANGER_IMPORT_SEGMENTATION ( - ch_bundle, + ch_bundle_path, [], [], [], diff --git a/subworkflows/local/segger_create_train_predict/main.nf b/subworkflows/local/segger_create_train_predict/main.nf index 77b232e4..99f07c12 100644 --- a/subworkflows/local/segger_create_train_predict/main.nf +++ b/subworkflows/local/segger_create_train_predict/main.nf @@ -2,16 +2,16 @@ // Run segger create_dataset, train and predict modules & parquet_to_csv // -include { SEGGER_CREATE_DATASET } from '../../../modules/local/segger/create_dataset/main' include { SEGGER_TRAIN } from '../../../modules/local/segger/train/main' include { SEGGER_PREDICT } from '../../../modules/local/segger/predict/main' +include { SEGGER_CREATE_DATASET } from '../../../modules/local/segger/create_dataset/main' include { PARQUET_TO_CSV } from '../../../modules/local/spatialconverter/parquet_to_csv/main' workflow SEGGER_CREATE_TRAIN_PREDICT { take: - ch_basedir // channel: [ val(meta), [ basedir ] ] + ch_basedir // channel: [ val(meta), [ "basedir" ] ] ch_transcripts_parquet // channel: [ val(meta), [bundle + "/transcripts.parquet"]] main: diff --git a/subworkflows/local/spatialdata_write_meta_merge/main.nf b/subworkflows/local/spatialdata_write_meta_merge/main.nf index 18b20289..36635052 100644 --- a/subworkflows/local/spatialdata_write_meta_merge/main.nf +++ b/subworkflows/local/spatialdata_write_meta_merge/main.nf @@ -10,7 +10,7 @@ include { SPATIALDATA_META } from '../../ workflow SPATIALDATA_WRITE_META_MERGE { take: - ch_raw_bundle // channel: [ val(meta), [ "xenium-bundle" ] ] + ch_bundle_path // channel: [ val(meta), [ "path-to-xenium-bundle" ] ] ch_redefined_bundle // channel: [ val(meta), [ "redefined-xenium-bundle" ] ] main: @@ -18,39 +18,39 @@ workflow SPATIALDATA_WRITE_META_MERGE { ch_versions = Channel.empty() // write spatialdata object from the raw xenium bundle - raw_bundle_path = ch_raw_bundle.map { meta, file_path -> - return [ meta, file(file_path) ] - } SPATIALDATA_WRITE_RAW_BUNDLE ( - raw_bundle_path, + ch_bundle_path, 'spatialdata_raw' ) ch_versions = ch_versions.mix ( SPATIALDATA_WRITE_RAW_BUNDLE.out.versions ) // write spatialdata object after running IMP_SEG - redefined_bundle_path = ch_redefined_bundle.map { meta, file_path -> - return [ meta, file(file_path) ] - } SPATIALDATA_WRITE_REDEFINED_BUNDLE ( - redefined_bundle_path, + ch_redefined_bundle, 'spatialdata_redefined' ) ch_versions = ch_versions.mix ( SPATIALDATA_WRITE_REDEFINED_BUNDLE.out.versions ) // merge raw & redefined spatialdata objects + ch_just_redefined_bundle = SPATIALDATA_WRITE_REDEFINED_BUNDLE.out.spatialdata.map { + _meta, bundle -> return [ bundle ] + } SPATIALDATA_MERGE_RAW_REDEFINED ( SPATIALDATA_WRITE_RAW_BUNDLE.out.spatialdata, - SPATIALDATA_WRITE_REDEFINED_BUNDLE.out.spatialdata + ch_just_redefined_bundle ) ch_versions = ch_versions.mix ( SPATIALDATA_MERGE_RAW_REDEFINED.out.versions ) // write metadata with spatialdata object + ch_just_bundle_path = ch_bundle_path.map { + _meta, bundle -> return [ bundle ] + } SPATIALDATA_META ( SPATIALDATA_MERGE_RAW_REDEFINED.out.spatialxe_bundle, - ch_raw_bundle + ch_just_bundle_path ) ch_versions = ch_versions.mix ( SPATIALDATA_META.out.versions ) @@ -61,6 +61,5 @@ workflow SPATIALDATA_WRITE_META_MERGE { ch_sd_merged = SPATIALDATA_MERGE_RAW_REDEFINED.out.spatialxe_bundle // channel: [ val(meta), "spatialdata_spatialxe" ] ch_sd_meta = SPATIALDATA_META.out.spatialxe_bundle // channel: [ val(meta), "spatialdata_spatialxe_final" ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } - diff --git a/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf b/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf index 09c7ac0d..e9207233 100644 --- a/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_spatialxe_pipeline/main.nf @@ -67,18 +67,39 @@ workflow PIPELINE_INITIALISATION { // Custom validation for pipeline parameters // validateInputParameters() + log.info "INFO Input params validated ✅ " // // Create channel from input file provided through params.input // - Channel + try { + + Channel .fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) .map { meta, bundle, image -> return [ [id: meta.id], bundle, image ] } .set { ch_samplesheet } + log.info "INFO Samplesheet validated ✅ " + + } catch (Exception e) { + + log.error "❌ Samplesheet validation failed: ${e.message}" + exit 1 + } + + + // + // Check and validate xenium bundle + // + if ( !workflow.profile.contains('test')) { + validateXeniumBundle(ch_samplesheet) + } + + emit: + samplesheet = ch_samplesheet versions = ch_versions @@ -129,7 +150,7 @@ workflow PIPELINE_COMPLETION { } workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + log.error "❌ Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" } } @@ -144,30 +165,112 @@ workflow PIPELINE_COMPLETION { def validateInputParameters() { // check if the segmentation method provided is valid for a mode - if ( params.mode == 'image' && params.segmentation ) { - if ( !params.image_seg_methods.contains(params.segmentation) ) { - error "Error: Invalid segmentation method: ${params.segmentation} provided for the `image` based mode. Options: ${params.image_seg_methods}" + if ( params.mode == 'image' && params.method ) { + if ( !params.image_seg_methods.contains(params.method) ) { + log.error "❌ Error: Invalid segmentation method: ${params.method} provided for the `image` based mode. Options: ${params.image_seg_methods}" + exit 1 } } - if ( params.mode == 'coordinate' && params.segmentation ) { - if ( !params.transcript_seg_methods.contains(params.segmentation) ) { - error "Error: Invalid segmentation method: ${params.segmentation} provided for the `coordinate` based mode. Options: ${params.transcript_seg_methods}" + if ( params.mode == 'coordinate' && params.method ) { + if ( !params.transcript_seg_methods.contains(params.method) ) { + log.error "❌ Error: Invalid segmentation method: `${params.method}` provided for the `coordinate` based mode. Options: ${params.transcript_seg_methods}" + exit 1 } } // check if --relabel_genes is true but --gene_panel is not provided if ( params.relabel_genes && !params.gene_panel ) { - log.warn "Relabel genes is enabled, but gene panel is not provided with the `--gene_panel`. Using `gene_panel.json` in the xenium bundle" + log.warn "⚠️ Relabel genes is enabled, but gene panel is not provided with the `--gene_panel`. Using `gene_panel.json` in the xenium bundle." } // check if --relabel_genes is true but --gene_panel is not provided if ( params.gene_panel && !params.relabel_genes ) { - log.warn "Gene panel provided, but relabel genes is disabled. Using `gene_panel.json` only to generate metadata" + log.warn "⚠️ Gene panel provided, but relabel genes is disabled. Using `gene_panel.json` only to generate metadata." + } + + // check if segmentation method is xeniumranger and nucleus_ony_segmentation is enabled + if ( params.method == 'xeniumranger' && !params.nucleus_segmentation_only ) { + log.warn "⚠️ Nucleus segmentation is disabled. Running xeniumranger resegment module to redefine xenium bundle without nucleus segmentation." + log.warn "⚠️ Use --nucleus_segmentation_only to enable nucleus segmentation to redefine xenium bundle with import-segmentation module." + } + + if ( params.mode == 'image' && params.method == 'baysor' ) { + if ( !params.method_mask ) { + log.error "❌ Error: Missing path to segmentation mask. Image-based segmentation with the `baysor` method requires a segmentation mask with the `--segmentation_mask` option." + exit 1 + } } } +// +// Check and validate xenium bundle +// +def validateXeniumBundle(ch_samplesheet) { + + // define xenium bundle directory structure + def xenium_bundle = [ + "analysis.tar.gz", + "analysis.zarr.zip", + "analysis_summary.html", + "cell_boundaries.csv.gz", + "cell_boundaries.parquet", + "cell_feature_matrix.h5", + "cell_feature_matrix.tar.gz", + "cell_feature_matrix.zarr.zip", + "cells.csv.gz", + "cells.parquet", + "cells.zarr.zip", + "experiment.xenium", + "gene_panel.json", + "metrics_summary.csv", + "morphology.ome.tif", + "morphology_focus/", + "nucleus_boundaries.csv.gz", + "nucleus_boundaries.parquet", + "transcripts.parquet", + "transcripts.zarr.zip" + ] + + // get bundle path + def ch_bundle_path = ch_samplesheet.map { + _meta, bundle, _image -> + def bundle_path = file ( + bundle.toString().replaceFirst(/\/$/, ''), + ) + return bundle_path + } + + // check if the path exists + if ( !ch_bundle_path.map { it.exists() } ) { + error "❌ Error: Xenium bundle path not found. Check if the path provided in the samplesheet exists." + exit 1 + } + + // if the path exists, check for the presence of xenium files + if ( ch_bundle_path.map { it.exists() } ) { + + ch_bundle_path.map { path -> + def missing_files = [] + + def allExist = xenium_bundle.every { filename -> + def fullPath = file("${path}/${filename}") + if (!fullPath.exists()) { + missing_files.add(filename) + return false + } + return true + } + + if (!allExist) { + log.error "❌ Missing file(s) at bundle path provided in the samplesheet: ${missing_files}" + exit 1 + } + } + } + log.info "INFO Xenium bundle validated ✅ \n" +} // // Generate methods description for MultiQC diff --git a/subworkflows/local/utils_stage_testdata/main.nf b/subworkflows/local/utils_stage_testdata/main.nf deleted file mode 100644 index 8a13590b..00000000 --- a/subworkflows/local/utils_stage_testdata/main.nf +++ /dev/null @@ -1,66 +0,0 @@ -// -// stage data for the test profile of the pipeline -// - -include { UNTAR } from '../../../modules/nf-core/untar/main' - - -workflow STAGE_TESTDATA { - - take: - ch_samplesheet - - main: - - ch_versions = Channel.empty() - ch_raw_bundle = Channel.empty() - ch_bundle_url = Channel.empty() - ch_image = Channel.empty() - ch_transcripts_csv = Channel.empty() - ch_transcripts_parquet = Channel.empty() - - // get xenium bundle path - ch_bundle_url = ch_samplesheet.map { meta, bundle, _image -> - return [ meta, file(bundle) ] - } - - // run the UNTAR module to create xenium bundle - UNTAR(ch_bundle_url) - ch_versions = ch_versions.mix ( UNTAR.out.versions ) - - ch_bundle_local_path = UNTAR.out.untar - ch_bundle_local_path.view() - - // get transcript.csv.gz - ch_transcripts_csv = ch_bundle_local_path.map { meta, bundle -> - def transcripts_csv = file(bundle + "/transcripts.csv.gz") - return [ meta, transcripts_csv ] - } - - // get transcript.parquet - ch_transcripts_parquet = ch_bundle_local_path.map { meta, bundle -> - def transcripts_parquet = file(bundle + "/transcripts.parquet") - return [ meta, transcripts_parquet ] - } - - // get morphology.ome.tif - ch_image = ch_bundle_local_path.map { meta, bundle -> - def morphology_image = file(bundle + "/morphology.ome.tif") - return [ meta, morphology_image ] - } - - // get baysor xenium config - ch_config = Channel.fromPath("${projectDir}/assets/config/xenium.toml", checkIfExists: true) - - - emit: - - ch_raw_bundle = ch_bundle_local_path // channel [ val(meta), ["xenium-bundle"] ] - ch_transcripts_csv = ch_transcripts_csv // channel [ val(meta), ["path-to-transcripts.csv.gz"] ] - ch_transcripts_parquet = ch_transcripts_parquet // channel [ val(meta), ["path-to-transcripts.csv.gz"] ] - ch_image = ch_image // channel [ val(meta), ["path-to-morphology.ome.tif"] ] - ch_config = ch_config // channel [ ["path-to-xenium.toml"] ] - - versions = ch_versions // channel [versions.yml] - -} diff --git a/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main.nf b/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main.nf index f1054b2e..461751ef 100644 --- a/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main.nf +++ b/subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main.nf @@ -11,14 +11,14 @@ workflow XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE { take: - ch_bundle // channel: [ val(meta), [ "xenium-bundle" ] ] + ch_bundle_path // channel: [ val(meta), [ "path-to-xenium-bundle" ] ] main: ch_versions = Channel.empty() ch_redefined_bundle = Channel.empty() - cells = ch_bundle.map { + cells = ch_bundle_path.map { _meta, bundle -> return [ bundle + "/cells.zarr.zip" ] } @@ -26,7 +26,7 @@ workflow XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE { if ( params.expansion_distance == 0 || params.expansion_distance != 5 ){ IMP_SEG_COUNT_MATRIX_EXP_DISTANCE ( - ch_bundle, + ch_bundle_path, [], cells, [], @@ -43,7 +43,7 @@ workflow XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE { if ( params.qupath_polygons && params.nucleus_segmentation_only ) { IMP_SEG_POLYGON_GEOJSON_INPUT ( - ch_bundle, + ch_bundle_path, [], params.qupath_polygons, [], @@ -58,7 +58,7 @@ workflow XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE { } else if ( params.qupath_polygons ) { IMP_SEG_POLYGON_GEOJSON_INPUT ( - ch_bundle, + ch_bundle_path, [], params.qupath_polygons, params.qupath_polygons, @@ -80,7 +80,7 @@ workflow XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE { if ( params.qupath_polygons && params.alignment_csv ) { IMP_SEG_TRANS_MATRIX_INPUT ( - ch_bundle, + ch_bundle_path, params.alignment_csv, params.qupath_polygons, params.qupath_polygons, diff --git a/subworkflows/local/xeniumranger_relabel_resegment/main.nf b/subworkflows/local/xeniumranger_relabel_resegment/main.nf index 657db843..1151e69a 100644 --- a/subworkflows/local/xeniumranger_relabel_resegment/main.nf +++ b/subworkflows/local/xeniumranger_relabel_resegment/main.nf @@ -9,17 +9,22 @@ workflow XENIUMRANGER_RELABEL_RESEGMENT { take: - ch_bundle // channel: [ val(meta), [ xenium-bundle-path ] ] - ch_gene_panel // channel: [ ["gene_panel.json"] ] + ch_bundle_path // channel: [ val(meta), [ "path-to-xenium-bundle" ] ] + ch_gene_panel // channel: [ ["gene_panel.json"] ] main: ch_versions = Channel.empty() - XENIUMRANGER_RELABEL ( ch_bundle, ch_gene_panel ) + XENIUMRANGER_RELABEL ( + ch_bundle_path, + ch_gene_panel + ) ch_versions = ch_versions.mix ( XENIUMRANGER_RELABEL.out.versions ) - XENIUMRANGER_RESEGMENT ( XENIUMRANGER_RELABEL.out.bundle ) + XENIUMRANGER_RESEGMENT ( + XENIUMRANGER_RELABEL.out.bundle + ) ch_versions = ch_versions.mix ( XENIUMRANGER_RESEGMENT.out.versions ) @@ -27,5 +32,5 @@ workflow XENIUMRANGER_RELABEL_RESEGMENT { redefined_bundle = XENIUMRANGER_RESEGMENT.out.bundle // channel: [ val(meta), ["redefined-xenium-bundle"] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main.nf b/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main.nf index 1000c165..6907ff50 100644 --- a/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main.nf +++ b/subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main.nf @@ -9,37 +9,48 @@ workflow XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF { take: - ch_bundle // channel: [ val(meta), ["xenium-bundle"] ] + ch_bundle_path // channel: [ val(meta), ["path-to-xenium-bundle"] ] main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() + ch_redefined_bundle = Channel.empty() // run resegment with changed config values - XENIUMRANGER_RESEGMENT ( ch_bundle ) + XENIUMRANGER_RESEGMENT ( ch_bundle_path ) ch_versions = ch_versions.mix( XENIUMRANGER_RESEGMENT.out.versions ) - // run import segmentation to redine - cells = ch_bundle.map { + // run import segmentation to redine xenium bundle along with nuclear segmentation + cells = XENIUMRANGER_RESEGMENT.out.bundle.map { _meta, bundle -> return [ bundle + "/cells.zarr.zip" ] } - XENIUMRANGER_IMPORT_SEGMENTATION ( - XENIUMRANGER_RESEGMENT.out.bundle, - [], - cells, - cells, - [], - [], - "pixel" - ) - ch_versions = ch_versions.mix( XENIUMRANGER_IMPORT_SEGMENTATION.out.versions ) + // adjust the nuclear expansion distance without altering nuclei detection + if ( params.nucleus_segmentation_only ) { + + XENIUMRANGER_IMPORT_SEGMENTATION ( + XENIUMRANGER_RESEGMENT.out.bundle, + [], + cells, + [], + [], + [], + "pixels" + ) + ch_versions = ch_versions.mix( XENIUMRANGER_IMPORT_SEGMENTATION.out.versions ) + + ch_redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.bundle + + } else { + + ch_redefined_bundle = XENIUMRANGER_RESEGMENT.out.bundle + } emit: - redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION.out.bundle // channel: [ val(meta), ["redefined-xenium-bundle"] ] + redefined_bundle = ch_redefined_bundle // channel: [ val(meta), ["redefined-xenium-bundle"] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/spatialxe.nf b/workflows/spatialxe.nf index b31db6ea..3bffb276 100644 --- a/workflows/spatialxe.nf +++ b/workflows/spatialxe.nf @@ -5,42 +5,39 @@ */ // multiqc -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' // nf-core functionality -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_spatialxe_pipeline' -include { paramsSummaryMap } from 'plugin/nf-schema' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_spatialxe_pipeline' +include { paramsSummaryMap } from 'plugin/nf-schema' // nf-core modules -include { UNTAR } from '../modules/nf-core/untar/main' - -// testdata stagign subworkflow -include { STAGE_TESTDATA } from '../subworkflows/local/utils_stage_testdata/main' +include { UNTAR } from '../modules/nf-core/untar/main' // coordinate-based segmentation subworklfows -include { SEGGER_CREATE_TRAIN_PREDICT } from '../subworkflows/local/segger_create_train_predict/main' -include { PROSEG_PRESET_PROSEG2BAYSOR } from '../subworkflows/local/proseg_preset_proseg2baysor/main' -include { BAYSOR_GENERATE_PREVIEW } from '../subworkflows/local/baysor_generate_preview/main' -include { BAYSOR_RUN_TRANSCRIPTS_CSV } from '../subworkflows/local/baysor_run_transcripts_csv/main' +include { SEGGER_CREATE_TRAIN_PREDICT } from '../subworkflows/local/segger_create_train_predict/main' +include { PROSEG_PRESET_PROSEG2BAYSOR } from '../subworkflows/local/proseg_preset_proseg2baysor/main' +include { BAYSOR_GENERATE_PREVIEW } from '../subworkflows/local/baysor_generate_preview/main' +include { BAYSOR_RUN_TRANSCRIPTS_PARQUET } from '../subworkflows/local/baysor_run_transcripts_parquet/main' // image-based segmentation subworklfows -include { BAYSOR_RUN_MORPHOLOGY_OME_TIF } from '../subworkflows/local/baysor_run_morphology_ome_tif/main' -include { CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/cellpose_resolift_morphology_ome_tif/main' -include { CELLPOSE_BAYSOR_IMPORT_SEGMENTATION } from '../subworkflows/local/cellpose_baysor_import_segmentation/main' -include { XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main' +include { BAYSOR_RUN_PRIOR_SEGMENTATION_MASK } from '../subworkflows/local/baysor_run_prior_segmentation_mask/main' +include { CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/cellpose_resolift_morphology_ome_tif/main' +include { CELLPOSE_BAYSOR_IMPORT_SEGMENTATION } from '../subworkflows/local/cellpose_baysor_import_segmentation/main' +include { XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF } from '../subworkflows/local/xeniumranger_resegment_morphology_ome_tif/main' // segmentation-free subworkflows -include { BAYSOR_GENERATE_SEGFREE } from '../subworkflows/local/baysor_generate_segfree/main' -include { FICTURE_PREPROCESS_MODEL } from '../subworkflows/local/ficture_preprocess_model/main' +include { BAYSOR_GENERATE_SEGFREE } from '../subworkflows/local/baysor_generate_segfree/main' +include { FICTURE_PREPROCESS_MODEL } from '../subworkflows/local/ficture_preprocess_model/main' // xeniumranger subworkflows include { XENIUMRANGER_RELABEL_RESEGMENT } from '../subworkflows/local/xeniumranger_relabel_resegment/main' include { XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE } from '../subworkflows/local/xeniumranger_import_segmentation_redefine_bundle/main' // spatialdata subworkflows -include { SPATIALDATA_WRITE_META_MERGE } from '../subworkflows/local/spatialdata_write_meta_merge/main' +include { SPATIALDATA_WRITE_META_MERGE } from '../subworkflows/local/spatialdata_write_meta_merge/main' // TODO qc layer subworkflows @@ -65,14 +62,18 @@ workflow SPATIALXE { */ ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + + ch_input = Channel.empty() ch_bundle = Channel.empty() - ch_bundle_path = Channel.empty() + ch_config = Channel.empty() + ch_features = Channel.empty() ch_raw_bundle = Channel.empty() ch_gene_panel = Channel.empty() - ch_transcripts_parquet = Channel.empty() + ch_bundle_path = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_morphology_image = Channel.empty() ch_redefined_bundle = Channel.empty() - ch_config = Channel.empty() + ch_transcripts_parquet = Channel.empty() /* @@ -81,66 +82,95 @@ workflow SPATIALXE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - // check if its a test run if ( workflow.profile.contains('test') ) { - STAGE_TESTDATA ( - ch_samplesheet - ) + // get sample, xenium bundle and image path + ch_input_untar = ch_samplesheet.map { meta, bundle, _image -> + return [ meta, bundle ] + } - ch_raw_bundle = STAGE_TESTDATA.out.ch_raw_bundle - ch_transcripts = STAGE_TESTDATA.out.ch_transcripts_csv - ch_transcripts_parquet = STAGE_TESTDATA.out.ch_transcripts_parquet - ch_image = STAGE_TESTDATA.out.ch_image - ch_config = STAGE_TESTDATA.out.ch_config + // get testdata + UNTAR ( ch_input_untar ) + ch_versions = ch_versions.mix ( UNTAR.out.versions ) + + ch_untar_outs = UNTAR.out.untar.map { _meta, bundle -> + return [ bundle.toString() ] + } + + ch_samplesheet.combine(ch_untar_outs) + .map { meta, _url, image, test_bundle -> + return [meta, test_bundle, image] + } + .set { ch_input } } else { - // get samplesheet fields - ch_bundle_path = ch_samplesheet.map { meta, bundle, _image -> - return [ meta, file(bundle)] - } + // for all other profile runs + ch_input = ch_samplesheet + } - // get xenium bundle files - ch_bundle = ch_samplesheet.map { meta, bundle, _image -> - def bundle_files = file(bundle).toList().collect() - return [meta, bundle_files] - } + // path to bundle input + ch_bundle_path = ch_input.map { meta, bundle, _image -> + return [ meta, bundle ] + } - // get transcript.csv.gz - ch_transcripts = ch_samplesheet.map { meta, bundle, _image -> - def transcripts_csv = file(bundle.replaceFirst(/\/$/, '') + "/transcripts.csv.gz") - return [ meta, transcripts_csv ] - } + // get transcript.parquet from the xenium bundle + ch_transcripts_parquet = ch_input.map { meta, bundle, _image -> + def transcripts_parquet = file ( + bundle.toString().replaceFirst(/\/$/, '') + "/transcripts.parquet", + checkIfExists: true + ) + return [ meta, transcripts_parquet ] + } - // get transcript.parquet - ch_transcripts_parquet = ch_samplesheet.map { meta, bundle, _image -> - def transcripts_parquet = file(bundle.replaceFirst(/\/$/, '') + "/transcripts.parquet") - return [ meta, transcripts_parquet ] - } + // get morphology.ome.tif from the xenium bundle + ch_morphology_image = ch_input.map { meta, bundle, image -> + def morphology_img = image ? file(image) : file ( + bundle.toString().replaceFirst(/\/$/, '') + "/morphology.ome.tif", + checkIfExists: true + ) + return [ meta, morphology_img ] + } - // get morphology.ome.tif - ch_image = ch_samplesheet.map { meta, bundle, image -> - def morphology_img = image ? file(image) : file(bundle.replaceFirst(/\/$/, '') + "/morphology.ome.tif") - return [ meta, morphology_img ] - } + // get baysor xenium config + ch_config = Channel.fromPath ( + "${projectDir}/assets/config/xenium.toml", + checkIfExists: true + ) - // get baysor xenium config - ch_config = Channel.fromPath("${projectDir}/assets/config/xenium.toml", checkIfExists: true) + // get segmentation mask if provided with --segmentation_mask for the baysor method + if ( params.segmentation_mask ) { + ch_segmentation_mask = Channel.fromPath ( + params.segmentation_mask, checkIfExists: true + ) + } - // get gene_panel.json if provided with --gene_panel, sets relabel_genes to true - if (( params.gene_panel )) { + // get a list of features if provided with the --features for the ficture method + if ( params.features ) { + ch_features = Channel.fromPath ( + params.features, + checkIfExists: true + ) + } - params.relabel_genes = true - ch_gene_panel = Channel.fromPath(params.gene_panel, checkIfExists: true) + // get gene_panel.json if provided with --gene_panel, sets relabel_genes to true + if (( params.gene_panel )) { - } else { + params.relabel_genes = true + ch_gene_panel = Channel.fromPath ( + params.gene_panel, + checkIfExists: true + ) - // gene panel to use if only --relabel_genes is provided - ch_gene_panel = ch_samplesheet.map { meta, bundle, _image -> - def gene_panel = file(bundle.replaceFirst(/\/$/, '') + "/gene_panel.json") - return [ meta, gene_panel ] - } + } else { + + // gene panel to use if only --relabel_genes is provided + ch_gene_panel = ch_input.map { meta, bundle, _image -> + def gene_panel = file ( + bundle.toString().replaceFirst(/\/$/, '') + "/gene_panel.json", + checkIfExists: true + ) + return [ meta, gene_panel ] } } @@ -154,7 +184,7 @@ workflow SPATIALXE { if ( params.relabel_genes ) { XENIUMRANGER_RELABEL_RESEGMENT ( - ch_bundle, + ch_bundle_path, ch_gene_panel ) ch_raw_bundle = XENIUMRANGER_RELABEL_RESEGMENT.out.redefined_bundle @@ -169,14 +199,12 @@ workflow SPATIALXE { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // run baysor preview if `generate_preview ` is true - if ( params.generate_preview && params.mode == 'coordinate' ) { + if ( params.mode == 'preview' ) { BAYSOR_GENERATE_PREVIEW ( - ch_transcripts, + ch_transcripts_parquet, ch_config ) - log.info "Preview generated at ${params.outdir}" - exit 0 } /* @@ -187,10 +215,10 @@ workflow SPATIALXE { if ( params.mode == 'image' ) { // trigger the default image-based workflow if no method is specified - if ( !params.segmentation ) { + if ( !params.method ) { CELLPOSE_BAYSOR_IMPORT_SEGMENTATION ( - ch_image, + ch_morphology_image, ch_bundle_path, ch_transcripts_parquet, ch_config @@ -199,35 +227,35 @@ workflow SPATIALXE { } // check it the provided method is part of the methods list - if ( params.segmentation in params.image_seg_methods ) { + if ( params.method in params.image_seg_methods ) { // run xeniumranger resegment with morphology_ome.tif - if ( params.segmentation == 'xeniumranger' ) { + if ( params.method == 'xeniumranger' ) { XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF ( - ch_raw_bundle + ch_bundle_path ) ch_redefined_bundle = XENIUMRANGER_RESEGMENT_MORPHOLOGY_OME_TIF.out.redefined_bundle } // run baysor run with morphology_ome.tif - if ( params.segmentation == 'baysor' ) { + if ( params.method == 'baysor' ) { - BAYSOR_RUN_MORPHOLOGY_OME_TIF ( - ch_raw_bundle, - ch_transcripts, - ch_image, + BAYSOR_RUN_PRIOR_SEGMENTATION_MASK ( + ch_bundle_path, + ch_transcripts_parquet, + ch_segmentation_mask, ch_config ) - ch_redefined_bundle = BAYSOR_RUN_MORPHOLOGY_OME_TIF.out.redefined_bundle + ch_redefined_bundle = BAYSOR_RUN_PRIOR_SEGMENTATION_MASK.out.redefined_bundle } // run cellpose on the morphology_ome.tif - if ( params.segmentation == 'cellpose' ) { + if ( params.method == 'cellpose' ) { CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF ( - ch_image, - ch_raw_bundle + ch_morphology_image, + ch_bundle_path ) ch_redefined_bundle = CELLPOSE_RESOLIFT_MORPHOLOGY_OME_TIF.out.redefined_bundle } @@ -243,50 +271,49 @@ workflow SPATIALXE { if ( params.mode == 'coordinate' ) { // trigger the default transcripts-based workflow if no method is specified - if ( !params.segmentation ) { + if ( !params.method ) { PROSEG_PRESET_PROSEG2BAYSOR ( - ch_raw_bundle, - ch_transcripts + ch_bundle_path, + ch_transcripts_parquet ) ch_redefined_bundle = PROSEG_PRESET_PROSEG2BAYSOR.out.redefined_bundle } // check it the provided method is part of the methods list - if ( params.segmentation in params.transcript_seg_methods ) { + if ( params.method in params.transcript_seg_methods ) { - // run proseg with transcripts.csv.gz - if ( params.segmentation == 'proseg') { + // run proseg with transcripts.parquet + if ( params.method == 'proseg') { PROSEG_PRESET_PROSEG2BAYSOR ( - ch_raw_bundle, - ch_transcripts + ch_bundle_path, + ch_transcripts_parquet ) ch_redefined_bundle = PROSEG_PRESET_PROSEG2BAYSOR.out.redefined_bundle } - // run segger with transcripts.csv.gz - if ( params.segmentation == 'segger' ) { + // run segger with transcripts.parquet + if ( params.method == 'segger' ) { SEGGER_CREATE_TRAIN_PREDICT ( - ch_raw_bundle, + ch_bundle_path, ch_transcripts_parquet ) } - // run baysor with transcripts.csv.gz - if ( params.segmentation == 'baysor' ) { + // run baysor with transcripts.parquet + if ( params.method == 'baysor' ) { - BAYSOR_RUN_TRANSCRIPTS_CSV ( - ch_raw_bundle, - ch_transcripts, - ch_config, - [] + BAYSOR_RUN_TRANSCRIPTS_PARQUET ( + ch_bundle_path, + ch_transcripts_parquet, + ch_config ) - ch_redefined_bundle = BAYSOR_RUN_TRANSCRIPTS_CSV.out.redefined_bundle + ch_redefined_bundle = BAYSOR_RUN_TRANSCRIPTS_PARQUET.out.redefined_bundle } } @@ -301,7 +328,7 @@ workflow SPATIALXE { if ( params.xeniumranger_only ) { XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE ( - ch_raw_bundle + ch_bundle_path ) ch_redefined_bundle = XENIUMRANGER_IMPORT_SEGMENTATION_REDEFINE_BUNDLE.out.redefined_bundle } @@ -312,11 +339,17 @@ workflow SPATIALXE { SPATIALXE - SPATIALDATA / METADATA LAYER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - // run spatialdata modules to generate sd objects - SPATIALDATA_WRITE_META_MERGE ( - ch_raw_bundle, - ch_redefined_bundle - ) + + // run spatialdata modules to generate sd objects in image or coordinate mode + if ( params.mode == 'image' || params.mode == 'coordinate' ) { + + SPATIALDATA_WRITE_META_MERGE ( + ch_bundle_path, + ch_redefined_bundle + ) + + } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -325,6 +358,46 @@ workflow SPATIALXE { */ + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALXE - SEGMENTATION-FREE LAYER + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + if ( params.mode == 'segfree' ) { + + // trigger the default segfree workflow if no method is specified + if ( !params.method ) { + + BAYSOR_GENERATE_SEGFREE ( + ch_transcripts_parquet, + ch_config + ) + } + + // check it the provided method is part of the methods list + if ( params.method in params.segfree_methods ) { + + // run baysor with transcripts.parquet + if ( params.method == 'baysor' ) { + + BAYSOR_GENERATE_SEGFREE ( + ch_transcripts_parquet, + ch_config + ) + } + + // run ficture with transcripts.parquet + if ( params.method == 'ficture' ) { + + FICTURE_PREPROCESS_MODEL ( + ch_transcripts_parquet, + ch_features + ) + } + } + } + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~