diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 30e66026..7a527a34 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,7 +11,7 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Set up Python 3.14 uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 @@ -28,7 +28,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 @@ -71,7 +71,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: linting-logs path: | diff --git a/.python-version b/.python-version new file mode 100644 index 00000000..04e20791 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12.8 diff --git a/conf/modules.config b/conf/modules.config index f2c5bdd8..5b75b24e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -48,6 +48,7 @@ process { } + // ---------------------------- xeniumranger --------------------------------------------------- withName: XENIUMRANGER_RELABEL { @@ -148,6 +149,60 @@ process { ] } + // ---------------------------- segtraq ----------------------------------------- + withName: SEGTRAQ_BASELINE { + publishDir = [ + path: { "${params.outdir}/${params.mode}/segtraq/baseline" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: SEGTRAQ_CLUSTERING_STABILITY { + publishDir = [ + path: { "${params.outdir}/${params.mode}/segtraq/clustering_stability" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: SEGTRAQ_REGION_SIMILARITY { + publishDir = [ + path: { "${params.outdir}/${params.mode}/segtraq/region_similarity"}, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: SEGTRAQ_VOLUME { + publishDir = [ + path: { "${params.outdir}/${params.mode}/segtraq/volume"}, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: SEGTRAQ_SUPERVISED { + publishDir = [ + path: { "${params.outdir}/${params.mode}/segtraq/supervised"}, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: SEGTRAQ_POINT_STATISTICS { + publishDir = [ + path: { "${params.outdir}/${params.mode}/segtraq/point_statistics"}, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: SEGTRAQ_PLOTTING { + publishDir = [ + path: { "${params.outdir}/${params.mode}/segtraq/plotting"}, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + + + // ---------------------------- ficture ------------------------------------------ withName: FICTURE_PREPROCESS { diff --git a/modules/local/segtraq/Dockerfile b/modules/local/segtraq/Dockerfile new file mode 100644 index 00000000..2ae51634 --- /dev/null +++ b/modules/local/segtraq/Dockerfile @@ -0,0 +1,40 @@ +# Dockerfile to create container with segtraq +# SegTraQ: A Python toolkit for quantitative and visual quality control +# of segmentation and transcript assignment in spatial omics data. +# https://github.com/LazDaria/SegTraQ + +FROM python:3.12-slim + +LABEL authors="Priyal Tripathi" \ + description="Docker image containing SegTraQ and its dependencies for segmentation QC" + +# Set environment variables +ENV PYTHONUNBUFFERED=1 +ENV NUMBA_CACHE_DIR='tmp' +ENV MPLCONFIGDIR='tmp/matplotlib' +ENV XDG_CACHE_HOME='tmp' + +# Install system dependencies required by geopandas, rasterio, rtree, igraph +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + libgeos-dev \ + libgdal-dev \ + libspatialindex-dev \ + && rm -rf /var/lib/apt/lists/* + +# Set the working directory +WORKDIR /app + +# Install segtraq with pinned version and its key dependencies +RUN pip install --no-cache-dir \ + segtraq==0.0.3 \ + spatialdata>=0.7.2 \ + spatialdata-io>=0.1.4 \ + anndata>=0.12 \ + scanpy \ + squidpy>=1.6.2 \ + geopandas \ + igraph \ + rtree \ + rasterio \ + ovrlpy>=1.1.0 diff --git a/modules/local/segtraq/baseline/main.nf b/modules/local/segtraq/baseline/main.nf new file mode 100644 index 00000000..2f5fc56e --- /dev/null +++ b/modules/local/segtraq/baseline/main.nf @@ -0,0 +1,44 @@ +process SEGTRAQ_BASELINE { + tag "${meta.id}" + label 'process_medium' + + container "quay.io/priyal_tripathi/segtraq:0.0.3" + + input: + tuple val(meta), path(spatialdata_zarr) + + output: + tuple val(meta), path("segtraq_qc/${prefix}/"), emit: qc_results + path ("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_BASELINE module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + template 'baseline.py' + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_BASELINE module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "segtraq_qc/${prefix}" + touch "segtraq_qc/${prefix}/baseline_summary.json" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + segtraq: \$(pip show segtraq | grep Version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/local/segtraq/baseline/meta.yml b/modules/local/segtraq/baseline/meta.yml new file mode 100644 index 00000000..95aa4f38 --- /dev/null +++ b/modules/local/segtraq/baseline/meta.yml @@ -0,0 +1,52 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: segtraq_baseline +description: Run SegTraQ baseline QC metrics on a SpatialData object to assess + segmentation quality including cell counts, unassigned transcripts, transcripts/genes + per cell, transcript density, morphological features, and more. +keywords: + - segtraq + - quality control + - segmentation + - spatial transcriptomics + - baseline metrics +tools: + - custom: + description: SegTraQ - A Python toolkit for quantitative and visual quality + control of segmentation and transcript assignment in spatial omics data. + homepage: https://github.com/LazDaria/SegTraQ + documentation: https://lazdaria.github.io/SegTraQ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - spatialdata_zarr: + type: directory + description: | + Path to a SpatialData .zarr directory containing the spatial omics + data with segmentation results. + pattern: "*.zarr" +output: + - qc_results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - "segtraq_qc/${prefix}/": + type: directory + description: | + Directory containing SegTraQ baseline QC results including + baseline_summary.json with cell counts, transcript/gene stats, + and morphological features. + pattern: "segtraq_qc/*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyaltripathi" +maintainers: + - "@priyaltripathi" diff --git a/modules/local/segtraq/baseline/templates/baseline.py b/modules/local/segtraq/baseline/templates/baseline.py new file mode 100644 index 00000000..91f6ef39 --- /dev/null +++ b/modules/local/segtraq/baseline/templates/baseline.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python + +"""Compute baseline statistics on spatialdata object for QC.""" + +import os +import segtraq +import spatialdata as sd +import json +import subprocess + +def main(): + print("[START] SegTraQ Baseline QC") + input_path = "${spatialdata_zarr}" + prefix = "${prefix}" + centroid_x_key = "${params.segtraq_centroid_x_key}" + centroid_y_key = "${params.segtraq_centroid_y_key}" + output_dir = f"segtraq_qc/{prefix}" + os.makedirs(output_dir, exist_ok=True) + + #reading the spatial data + print(f"[INFO] Reading SpatialData object from: {input_path}") + sdata = sd.read_zarr(input_path) + + #initialiizing segtraq object + cx_key = centroid_x_key if centroid_x_key not in ("null", "", "None") else None + cy_key = centroid_y_key if centroid_y_key not in ("null", "", "None") else None + print("[INFO] Initializing SegTraQ object") + st = segtraq.SegTraQ( + sdata, + images_key = None, + tables_area_key = None, + points_background_id =0, + tables_centroid_x_key=cx_key, + tables_centroid_y_key=cy_key, + ) + + print(f"[INFO] Computing baseline QC metrics") + summary = {} + + #number of cells + n_cells = st.bl.num_cells() + summary["num_cells"] = int(n_cells) + print(f" num_cells: {n_cells}") + + #number of transcripts + n_transcripts = st.bl.num_transcripts() + summary["num_transcripts"] = int(n_transcripts) + print(f" num_transcripts: {n_transcripts}") + + #number of genes + n_genes = st.bl.num_genes() + summary["num_genes"] = int(n_genes) + print(f" num_genes: {n_genes}") + + #percentage of assigned transcripts + percentage_unassgn_transcripts = st.bl.perc_unassigned_transcripts() + summary["percent_unassigned_transcripts"] = int(percentage_unassgn_transcripts) + print(f" percent_unassigned_transcripts: {percentage_unassgn_transcripts}") + + #unassigned transcripts per gene + unassgn_transcripts_per_gene = st.bl.perc_unassigned_transcripts_per_gene() + summary["unassigned_transcripts_per_gene"] = int(unassgn_transcripts_per_gene) + print(f" unassigned_transcripts_per_gene: {unassgn_transcripts_per_gene}") + + #transcripts per cell + transcripts_per_cell = st.bl.transcripts_per_cell() + summary["transcripts_per_cell"] = int(transcripts_per_cell) + print(f" transcripts_per_cell: {transcripts_per_cell}") + + #genes per celll + genes_per_cell = st.bl.genes_per_cell() + summary["genes_per_cell"] = int(genes_per_cell) + print(f" genes_per_cell: {genes_per_cell}") + + #transcript density + transcript_density = st.bl.transcript_density() + summary["transcript_density"] = int(transcript_density) + print(f" transcript_density: {transcript_density}") + + #mean transcripts per gene cell + mean_transcripts_per_gene_per_cell = st.bl.mean_transcripts_per_gene_per_cell() + summary["mean_transcripts_per_gene_per_cell"] = int(mean_transcripts_per_gene_per_cell) + print(f" mean_transcripts_per_gene_per_cell: {mean_transcripts_per_gene_per_cell}") + + #morphological features + morpho_features = st.bl.morphological_features() + summary["morpho_features"] = int(morpho_features) + print(f" morpho_features: {morpho_features}") + + #summary + with open(f"{output_dir}/baseline_summary.json", "w") as f: + json.dump(summary, f, indent=2) + print(f"[INFO] Summary written to {output_dir}/baseline_summary.json") + + version = subprocess.check_output( + ["pip", "show", "segtraq"], text=True + ) + segtraq_version = [l for l in version.splitlines() if l.startswith("Version:")][0].split(": ")[1] + + with open("versions.yml", "w") as f: + f.write('"${task.process}":\n') + f.write(f' segtraq: "{segtraq_version}"\n') + f.write(f' spatialdata: "{sd.__version__}"\n') + print("[FINISH] SegTraQ Baseline QC") + +if __name__ == "__main__": + main() diff --git a/modules/local/segtraq/clustering_stability/main.nf b/modules/local/segtraq/clustering_stability/main.nf new file mode 100644 index 00000000..ffc80e5c --- /dev/null +++ b/modules/local/segtraq/clustering_stability/main.nf @@ -0,0 +1,44 @@ +process SEGTRAQ_CLUSTERING_STABILITY { + tag "${meta.id}" + label 'process_medium' + + container "quay.io/priyal_tripathi/segtraq:0.0.3" + + input: + tuple val(meta), path(spatialdata_zarr) + + output: + tuple val(meta), path("segtraq_qc/${prefix}/"), emit: qc_results + path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_CLUSTERING_STABILITY module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + template('clustering_stability.py') + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_CLUSTERING_STABILITY module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "segtraq_qc/${prefix}" + touch "segtraq_qc/${prefix}/clustering_stability_summary.json" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + segtraq: \$(pip show segtraq | grep Version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/local/segtraq/clustering_stability/meta.yml b/modules/local/segtraq/clustering_stability/meta.yml new file mode 100644 index 00000000..3059edc9 --- /dev/null +++ b/modules/local/segtraq/clustering_stability/meta.yml @@ -0,0 +1,50 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: segtraq_clustering_stability +description: Run SegTraQ clustering stability metrics on a SpatialData object to assess + the stability of cell type clusters across different segmentations or parameters. +keywords: + - segtraq + - quality control + - clustering + - stability + - spatial transcriptomics +tools: + - custom: + description: SegTraQ - A Python toolkit for quantitative and visual quality + control of segmentation and transcript assignment in spatial omics data. + homepage: https://github.com/LazDaria/SegTraQ + documentation: https://lazdaria.github.io/SegTraQ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - spatialdata_zarr: + type: directory + description: | + Path to a SpatialData .zarr directory containing the spatial omics + data with segmentation results. + pattern: "*.zarr" +output: + - qc_results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - "segtraq_qc/${prefix}/": + type: directory + description: | + Directory containing SegTraQ clustering stability QC results including + clustering_stability_summary.json. + pattern: "segtraq_qc/*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyaltripathi" +maintainers: + - "@priyaltripathi" diff --git a/modules/local/segtraq/clustering_stability/templates/clustering_stability.py b/modules/local/segtraq/clustering_stability/templates/clustering_stability.py new file mode 100644 index 00000000..6fce693d --- /dev/null +++ b/modules/local/segtraq/clustering_stability/templates/clustering_stability.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +"""Compute clustering stability metrics on spatialdata object for QC.""" + +import os +import segtraq +import spatialdata as sd +import json +import subprocess +import scanpy as sc + + +def main(): + print("[START] SegTraQ Clustering Stability QC") + input_path = "${spatialdata_zarr}" + prefix = "${prefix}" + centroid_x_key = "${params.segtraq_centroid_x_key}" + centroid_y_key = "${params.segtraq_centroid_y_key}" + output_dir = f"segtraq_qc/{prefix}" + os.makedirs(output_dir, exist_ok=True) + + #reading the spatial data + print(f"[INFO] Reading SpatialData object from: {input_path}") + sdata = sd.read_zarr(input_path) + + #initialiizing segtraq object + cx_key = centroid_x_key if centroid_x_key not in ("null", "", "None") else None + cy_key = centroid_y_key if centroid_y_key not in ("null", "", "None") else None + print("[INFO] Initializing SegTraQ object") + st = segtraq.SegTraQ( + sdata, + images_key = None, + tables_area_key = None, + points_background_id =0, + tables_centroid_x_key=cx_key, + tables_centroid_y_key=cy_key, + ) + #normalizing and log-transforming for clustering stability metrics + print(f"[INFO] Normalizing data for clustering stability metrics") + adata = st.sdata.tables["table"] + if "counts" not in adata.layers: + adata.layers["counts"] = adata.X.copy() + # normalizing and log-transforming the counts + sc.pp.normalize_total(adata, inplace=True) + sc.pp.log1p(adata) + # computing a PCA and neighbors + sc.pp.pca(adata) + sc.pp.neighbors(adata) + + + #computing metrics + print(f"[INFO] Computing clustering stability QC metrics") + summary = {} + + #adjusted_rand_index + adjusted_rand_index = st.cs.adjusted_rand_index() + summary["adjusted_rand_index"] = float(adjusted_rand_index) + print(f" adjusted_rand_index: {adjusted_rand_index}") + + #cluster_connectedness + cluster_connectedness = st.cs.cluster_connectedness(use_weights=True) + summary["cluster_connectedness"] = float(cluster_connectedness) + print(f" cluster_connectedness: {cluster_connectedness}") + + #purity + purity = st.cs.purity() + summary["purity"] = float(purity) + print(f" purity: {purity}") + + #silhouette_score + silhouette_score = st.cs.silhouette_score() + summary["silhouette_score"] = float(silhouette_score) + print(f" silhouette_score: {silhouette_score}") + + #summary + with open(f"{output_dir}/clustering_stability_summary.json", "w") as f: + json.dump(summary, f, indent=2) + print(f"[INFO] Summary written to {output_dir}/clustering_stability_summary.json") + + version = subprocess.check_output( + ["pip", "show", "segtraq"], text=True + ) + segtraq_version = [l for l in version.splitlines() if l.startswith("Version:")][0].split(": ")[1] + + with open("versions.yml", "w") as f: + f.write('"${task.process}":\n') + f.write(f' segtraq: "{segtraq_version}"\n') + f.write(f' spatialdata: "{sd.__version__}"\n') + print("[FINISH] SegTraQ Clustering Stability QC") + +if __name__ == "__main__": + main() diff --git a/modules/local/segtraq/plotting/main.nf b/modules/local/segtraq/plotting/main.nf new file mode 100644 index 00000000..f2e94d97 --- /dev/null +++ b/modules/local/segtraq/plotting/main.nf @@ -0,0 +1,44 @@ +process SEGTRAQ_PLOTTING { + tag "${meta.id}" + label 'process_medium' + + container "quay.io/priyal_tripathi/segtraq:0.0.3" + + input: + tuple val(meta), path(spatialdata_zarr) + val cell_type_key + + output: + tuple val(meta), path("segtraq_qc/${prefix}/"), emit: qc_results + path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_PLOTTING module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + template 'plotting.py' + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_PLOTTING module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "segtraq_qc/${prefix}" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + segtraq: \$(pip show segtraq | grep Version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/local/segtraq/plotting/meta.yml b/modules/local/segtraq/plotting/meta.yml new file mode 100644 index 00000000..03ab45dc --- /dev/null +++ b/modules/local/segtraq/plotting/meta.yml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: segtraq_plotting +description: Generate QC plots using the SegTraQ plotting module on a SpatialData object. + +keywords: + - segtraq + - quality control + - plotting + - scRNA + - spatial transcriptomics +tools: + - custom: + description: SegTraQ - A Python toolkit for quantitative and visual quality + control of segmentation and transcript assignment in spatial omics data. + homepage: https://github.com/LazDaria/SegTraQ + documentation: https://lazdaria.github.io/SegTraQ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - spatialdata_zarr: + type: directory + description: | + Path to a SpatialData .zarr directory containing the spatial omics + data with segmentation results. + pattern: "*.zarr" + - cell_type_key: + type: string + description: | + The column name in the AnnData table's `.obs` that contains the cell type assignments. +output: + - qc_results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - plots: + type: directory + description: | + Directory containing SegTraQ plotting results including all produced plots (UMAPs, boxplots, etc.). + pattern: "segtraq_qc/*/" + - versions: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyaltripathi" +maintainers: + - "@priyaltripathi" diff --git a/modules/local/segtraq/plotting/templates/plotting.py b/modules/local/segtraq/plotting/templates/plotting.py new file mode 100644 index 00000000..9169148a --- /dev/null +++ b/modules/local/segtraq/plotting/templates/plotting.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python + +"""Generate QC plots using SegTraQ plotting module.""" + +import os +import json +import subprocess +import segtraq +import spatialdata as sd +import scanpy as sc +import matplotlib.pyplot as plt + +def main(): + print("[START] SegTraQ Plotting QC") + input_path = "${spatialdata_zarr}" + prefix = "${prefix}" + cell_type_key = "${cell_type_key}" + + centroid_x_key = "${params.segtraq_centroid_x_key}" + centroid_y_key = "${params.segtraq_centroid_y_key}" + + output_dir = f"segtraq_qc/{prefix}/plots" + os.makedirs(output_dir, exist_ok=True) + + print(f"[INFO] Reading SpatialData object from: {input_path}") + sdata = sd.read_zarr(input_path) + + cx_key = centroid_x_key if centroid_x_key not in ("null", "", "None") else None + cy_key = centroid_y_key if centroid_y_key not in ("null", "", "None") else None + cell_type_key = cell_type_key if cell_type_key not in ("null", "", "None") else None + + + st = segtraq.SegTraQ( + sdata, + tables_centroid_x_key=cx_key, + tables_centroid_y_key=cy_key + ) + + print("[INFO] Preprocessing data for plotting (Normalization, PCA, UMAP)") + st.filter_control_and_low_quality_transcripts() + + adata = st.sdata.tables[st.tables_key] + if "counts" not in adata.layers: + adata.layers["counts"] = adata.X.copy() + + sc.pp.normalize_total(adata, target_sum=1e4) + sc.pp.log1p(adata) + sc.pp.pca(adata) + sc.pp.neighbors(adata) + sc.tl.umap(adata) + + + st_dict = {prefix: st} + + print("[INFO] Generating plots...") + + if cell_type_key and cell_type_key in adata.obs.columns: + print(f" Plotting cell type proportions using: {cell_type_key}") + segtraq.pl.celltype_proportions(st_dict, celltype_col=cell_type_key) + plt.savefig(f"{output_dir}/celltype_proportions.png", bbox_inches='tight') + plt.close() + + segtraq.pl.umap(st_dict, color=cell_type_key, legend=True) + plt.savefig(f"{output_dir}/umap_cell_types.png", bbox_inches='tight') + plt.close() + + if 'transcript_count' in adata.obs.columns: + segtraq.pl.umap(st_dict, color="transcript_count", legend=True) + plt.savefig(f"{output_dir}/umap_transcript_count.png", bbox_inches='tight') + plt.close() + + if cell_type_key and cell_type_key in adata.obs.columns: + segtraq.pl.boxplot(st_dict, celltype_col=cell_type_key, value_key="transcript_count") + plt.savefig(f"{output_dir}/boxplot_transcripts_per_type.png", bbox_inches='tight') + plt.close() + + print(f"[INFO] Plots saved to {output_dir}") + + version = subprocess.check_output(["pip", "show", "segtraq"], text=True) + segtraq_version = [l for l in version.splitlines() if l.startswith("Version:")][0].split(": ")[1] + + with open("versions.yml", "w") as f: + f.write('"${task.process}":\n') + f.write(f' segtraq: "{segtraq_version}"\n') + f.write(f' spatialdata: "{sd.__version__}"\n') + + print("[FINISH] SegTraQ Plotting QC") + +if __name__ == "__main__": + main() diff --git a/modules/local/segtraq/point_statistics/main.nf b/modules/local/segtraq/point_statistics/main.nf new file mode 100644 index 00000000..3989b973 --- /dev/null +++ b/modules/local/segtraq/point_statistics/main.nf @@ -0,0 +1,45 @@ +process SEGTRAQ_POINT_STATISTICS { + tag "${meta.id}" + label 'process_medium' + + container "quay.io/priyal_tripathi/segtraq:0.0.3" + + input: + tuple val(meta), path(spatialdata_zarr) + path markers + + output: + tuple val(meta), path("segtraq_qc/${prefix}/"), emit: qc_results + path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_POINT_STATISTICS module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + template 'point_stats.py' + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_POINT_STATISTICS module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "segtraq_qc/${prefix}" + touch "segtraq_qc/${prefix}/point_stats_summary.json" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + segtraq: \$(pip show segtraq | grep Version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/local/segtraq/point_statistics/meta.yml b/modules/local/segtraq/point_statistics/meta.yml new file mode 100644 index 00000000..bb08764e --- /dev/null +++ b/modules/local/segtraq/point_statistics/meta.yml @@ -0,0 +1,55 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: segtraq_point_statistics +description: Run SegTraQ region similarity metrics on a SpatialData object to assess + how similar the expression profiles are between subcellular regions. +keywords: + - segtraq + - quality control + - point statistics + - scRNA + - spatial transcriptomics +tools: + - custom: + description: SegTraQ - A Python toolkit for quantitative and visual quality + control of segmentation and transcript assignment in spatial omics data. + homepage: https://github.com/LazDaria/SegTraQ + documentation: https://lazdaria.github.io/SegTraQ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - spatialdata_zarr: + type: directory + description: | + Path to a SpatialData .zarr directory containing the spatial omics + data with segmentation results. + pattern: "*.zarr" + - markers: + type: file + description: | + Path to a JSON file containing cell-type specific markers (positive and negative). + pattern: "*.json" +output: + - qc_results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - "segtraq_qc/${prefix}/": + type: directory + description: | + Directory containing SegTraQ point statistics results including + point_statistics.json. + pattern: "segtraq_qc/*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyaltripathi" +maintainers: + - "@priyaltripathi" diff --git a/modules/local/segtraq/point_statistics/templates/point_stats.py b/modules/local/segtraq/point_statistics/templates/point_stats.py new file mode 100644 index 00000000..9e652a27 --- /dev/null +++ b/modules/local/segtraq/point_statistics/templates/point_stats.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + +"""Compute point statistics on spatialdata object for QC.""" + +import segtraq +import spatialdata as sd +import json +import os +import subprocess + +def main(): + print("[START] SegTraQ Point Statistics QC") + input_path = "${spatialdata_zarr}" + prefix = "${prefix}" + centroid_x_key = "${params.segtraq_centroid_x_key}" + centroid_y_key = "${params.segtraq_centroid_y_key}" + output_dir = f"segtraq_qc/{prefix}" + os.makedirs(output_dir, exist_ok=True) + markers_path = "${markers}" + + #reading the spatial data + print(f"[INFO] Reading SpatialData object from: {input_path}") + sdata = sd.read_zarr(input_path) + + #initialiizing segtraq object + cx_key = centroid_x_key if centroid_x_key not in ("null", "", "None") else None + cy_key = centroid_y_key if centroid_y_key not in ("null", "", "None") else None + print("[INFO] Initializing SegTraQ object") + + st = segtraq.SegTraQ( + sdata, + tables_centroid_x_key=cx_key, + tables_centroid_y_key=cy_key + ) + + st.filter_control_and_low_quality_transcripts() + + #reading markers + with open(markers_path, 'r') as f: + markers = json.load(f) + genes_to_test = list(set([g for ct in markers.values() for g in ct.get('positive', []) + ct.get('negative', [])])) + + #computing point statistics + print(f"[INFO] Computing Point Statistics for QC") + summary = {} + + if not genes_to_test: + print("[WARNING] No genes found in markers list. Skipping Point Statistics.") + else: + print(f" Running point statistics for {len(genes_to_test)} genes...") + + st.ps.distance_to_centroid(genes=genes_to_test, restrict_to_within_boundary=True) + centroid_cols = [f"distance_to_cell_centroid_norm_{g}" for g in genes_to_test + if f"distance_to_cell_centroid_norm_{g}" in sdata.tables["table"].obs.columns] + if centroid_cols: + mean_dist_cent = float(sdata.tables["table"].obs[centroid_cols].mean().mean()) + summary["mean_normalized_distance_to_centroid"] = mean_dist_cent + print(f" Mean normalized distance to centroid: {mean_dist_cent:.4f}") + + st.ps.distance_to_membrane(genes=genes_to_test, restrict_to_within_boundary=True) + membrane_cols = [f"distance_to_cell_membrane_norm_{g}" for g in genes_to_test + if f"distance_to_cell_membrane_norm_{g}" in sdata.tables["table"].obs.columns] + if membrane_cols: + mean_dist_memb = float(sdata.tables["table"].obs[membrane_cols].mean().mean()) + summary["mean_normalized_distance_to_membrane"] = mean_dist_memb + print(f" Mean normalized distance to membrane: {mean_dist_memb:.4f}") + + if "nucleus_boundaries" in sdata.shapes: + print(" Calculating compartment localization...") + st.ps.percentage_transcripts_in_compartments(genes=genes_to_test) + + nuc_cols = [f"pct_nucleus_{g}" for g in genes_to_test if f"pct_nucleus_{g}" in sdata.tables["table"].obs.columns] + if nuc_cols: + mean_nuc_pct = float(sdata.tables["table"].obs[nuc_cols].mean().mean()) + summary["mean_percentage_in_nucleus"] = mean_nuc_pct + print(f" Mean % in nucleus: {mean_nuc_pct:.2f}%") + else: + print(" [SKIP] Nucleus compartment analysis (no nucleus_boundaries found in shapes)") + + #summary + with open(f"{output_dir}/point_stats_summary.json", "w") as f: + json.dump(summary, f, indent=2) + print(f"[INFO] Summary written to {output_dir}/point_stats_summary.json") + + obs_csv_path = f"{output_dir}/point_statistics_table.csv" + sdata.tables["table"].obs.to_csv(obs_csv_path) + print(f" Point statistics table saved to {obs_csv_path}") + + version = subprocess.check_output( + ["pip", "show", "segtraq"], text=True + ) + segtraq_version = [l for l in version.splitlines() if l.startswith("Version:")][0].split(": ")[1] + + with open("versions.yml", "w") as f: + f.write('"${task.process}":\n') + f.write(f' segtraq: "{segtraq_version}"\n') + f.write(f' spatialdata: "{sd.__version__}"\n') + print("[FINISH] SegTraQ Point Statistics QC") + +if __name__ == "__main__": + main() diff --git a/modules/local/segtraq/region_similarity/main.nf b/modules/local/segtraq/region_similarity/main.nf new file mode 100644 index 00000000..61fc3983 --- /dev/null +++ b/modules/local/segtraq/region_similarity/main.nf @@ -0,0 +1,44 @@ +process SEGTRAQ_REGION_SIMILARITY { + tag "${meta.id}" + label 'process_medium' + + container "quay.io/priyal_tripathi/segtraq:0.0.3" + + input: + tuple val(meta), path(spatialdata_zarr) + + output: + tuple val(meta), path("segtraq_qc/${prefix}/"), emit: qc_results + path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_REGION_SIMILARITY module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + template 'rs.py' + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_REGION_SIMILARITY module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "segtraq_qc/${prefix}" + touch "segtraq_qc/${prefix}/region_similarity.json" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + segtraq: \$(pip show segtraq | grep Version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/local/segtraq/region_similarity/meta.yml b/modules/local/segtraq/region_similarity/meta.yml new file mode 100644 index 00000000..13b0c1d6 --- /dev/null +++ b/modules/local/segtraq/region_similarity/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: segtraq_region_similarity +description: Run SegTraQ region similarity metrics on a SpatialData object to assess + how similar the expression profiles are between subcellular regions. +keywords: + - segtraq + - quality control + - region + - similarity + - stability + - spatial transcriptomics +tools: + - custom: + description: SegTraQ - A Python toolkit for quantitative and visual quality + control of segmentation and transcript assignment in spatial omics data. + homepage: https://github.com/LazDaria/SegTraQ + documentation: https://lazdaria.github.io/SegTraQ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - spatialdata_zarr: + type: directory + description: | + Path to a SpatialData .zarr directory containing the spatial omics + data with segmentation results. + pattern: "*.zarr" +output: + - qc_results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - "segtraq_qc/${prefix}/": + type: directory + description: | + Directory containing SegTraQ region similarity QC results including + region_similarity.json. + pattern: "segtraq_qc/*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyaltripathi" +maintainers: + - "@priyaltripathi" diff --git a/modules/local/segtraq/region_similarity/templates/rs.py b/modules/local/segtraq/region_similarity/templates/rs.py new file mode 100644 index 00000000..c0247159 --- /dev/null +++ b/modules/local/segtraq/region_similarity/templates/rs.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python + +"""Compute region similarity metrics on spatialdata object for QC.""" + +import os +import segtraq +import spatialdata as sd +import json +import subprocess +import pandas as pd + + +def main(): + print("[START] SegTraQ Region Similarity QC") + input_path = "${spatialdata_zarr}" + prefix = "${prefix}" + centroid_x_key = "${params.segtraq_centroid_x_key}" + centroid_y_key = "${params.segtraq_centroid_y_key}" + output_dir = f"segtraq_qc/{prefix}" + os.makedirs(output_dir, exist_ok=True) + + #reading the spatial data + print(f"[INFO] Reading SpatialData object from: {input_path}") + sdata = sd.read_zarr(input_path) + + #initialiizing segtraq object + cx_key = centroid_x_key if centroid_x_key not in ("null", "", "None") else None + cy_key = centroid_y_key if centroid_y_key not in ("null", "", "None") else None + print("[INFO] Initializing SegTraQ object") + st = segtraq.SegTraQ( + sdata, + images_key = None, + tables_area_key = None, + points_background_id = 0, + tables_centroid_x_key= cx_key, + tables_centroid_y_key= cy_key, + ) + + print(f"[INFO] checking the presence of cell and nuclear masks") + if hasattr(st.sdata, "shapes") and st.sdata.shapes: + print("Found shapes. Moving ahead.") + else: + raise ValueError("A nuclear and cell segmentation object in 'shapes' is required.") + + #computing metrics + print(f"[INFO] Computing region similarity QC metrics") + summary = {} + + #match nuclei to cells + nuclei_to_cells_df = st.rs.match_nuclei_to_cells() + csv_path = f"{output_dir}/match_nucleus_to_cell.csv" + nuclei_to_cells_df.to_csv(csv_path, index=False) + print(f" Saved full nuclei matching stats to {csv_path}") + mean_iou = float(nuclei_to_cells_df["iou"].mean()) + summary["mean_nucleus_cell_iou"] = mean_iou + print(f" mean nucleus-cell IoU: {mean_iou}") + + #similarity between border and neighbourhood + sim_border_neighborhood_df = st.rs.similarity_border_neighborhood() + csv_path = f"{output_dir}/similarity_border_neighbourhood.csv" + sim_border_neighborhood_df.to_csv(csv_path, index=False) + print(f" Saved similarity between border and neighborhood stats to {csv_path}") + mean_sim_cent_border = float(sim_border_neighborhood_df["similarity_center_border"].mean()) + summary["mean_similarity_center_border"] = mean_sim_cent_border + print(f" mean_similarity_center_border: {mean_sim_cent_border}") + mean_sim_border_neigh = float(sim_border_neighborhood_df["similarity_border_neighborhood"].mean()) + summary["mean_similarity_border_neighborhood"] = mean_sim_border_neigh + print(f" mean_similarity_border_neighborhood: {mean_sim_border_neigh}") + mean_ratio = float(sim_border_neighborhood_df["ratio_border_neighborhood_to_center"].mean()) + summary["mean_ratio_border_neighborhood_to_center"] = mean_ratio + print(f" mean_ratio_border_neighborhood_to_center: {mean_ratio}") + + #similarity between nucleus and cell + sim_nucleus_cell = st.rs.similarity_nucleus_cell() + if isinstance(sim_nucleus_cell, pd.Series): + sim_nucleus_cell = sim_nucleus_cell.to_frame(name="similarity_nucleus_cell") + sim_nucleus_cell.to_csv(f"{output_dir}/similarity_nucleus_cell.csv", index=False) + summary["mean_similarity_nucleus_cell"] = float(sim_nucleus_cell.iloc[:, -1].mean()) + + #similarity between nucleus and cytoplasm + sim_nucleus_cyto = st.rs.similarity_nucleus_cytoplasm() + if isinstance(sim_nucleus_cyto, pd.Series): + sim_nucleus_cyto = sim_nucleus_cyto.to_frame(name="similarity_nucleus_cytoplasm") + sim_nucleus_cyto.to_csv(f"{output_dir}/similarity_nucleus_cytoplasm.csv", index=False) + summary["mean_similarity_nucleus_cytoplasm"] = float(sim_nucleus_cyto.iloc[:, -1].mean()) + + #summary + with open(f"{output_dir}/region_similarity_summary.json", "w") as f: + json.dump(summary, f, indent=2) + print(f"[INFO] Summary written to {output_dir}/region_similarity_summary.json") + print(f"[INFO] Dataframes generated are stored within respective directories") + + version = subprocess.check_output( + ["pip", "show", "segtraq"], text=True + ) + segtraq_version = [l for l in version.splitlines() if l.startswith("Version:")][0].split(": ")[1] + + with open("versions.yml", "w") as f: + f.write('"${task.process}":\n') + f.write(f' segtraq: "{segtraq_version}"\n') + f.write(f' spatialdata: "{sd.__version__}"\n') + print("[FINISH] SegTraQ Region Similarity QC") + +if __name__ == "__main__": + main() diff --git a/modules/local/segtraq/supervised/main.nf b/modules/local/segtraq/supervised/main.nf new file mode 100644 index 00000000..d2167872 --- /dev/null +++ b/modules/local/segtraq/supervised/main.nf @@ -0,0 +1,46 @@ +process SEGTRAQ_SUPERVISED { + tag "${meta.id}" + label 'process_medium' + + container "quay.io/priyal_tripathi/segtraq:0.0.3" + + input: + tuple val(meta), path(spatialdata_zarr) + path markers + val cell_type_key + + output: + tuple val(meta), path("segtraq_qc/${prefix}/"), emit: qc_results + path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_SUPERVISED module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + template 'supervised.py' + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_SUPERVISED module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "segtraq_qc/${prefix}" + touch "segtraq_qc/${prefix}/supervised_summary.json" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + segtraq: \$(pip show segtraq | grep Version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/local/segtraq/supervised/meta.yml b/modules/local/segtraq/supervised/meta.yml new file mode 100644 index 00000000..17bafcba --- /dev/null +++ b/modules/local/segtraq/supervised/meta.yml @@ -0,0 +1,59 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: segtraq_supervised +description: Run SegTraQ region similarity metrics on a SpatialData object to assess + how similar the expression profiles are between subcellular regions. +keywords: + - segtraq + - quality control + - supervised + - scRNA + - spatial transcriptomics +tools: + - custom: + description: SegTraQ - A Python toolkit for quantitative and visual quality + control of segmentation and transcript assignment in spatial omics data. + homepage: https://github.com/LazDaria/SegTraQ + documentation: https://lazdaria.github.io/SegTraQ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - spatialdata_zarr: + type: directory + description: | + Path to a SpatialData .zarr directory containing the spatial omics + data with segmentation results. + pattern: "*.zarr" + - markers: + type: file + description: | + Path to a JSON file containing cell-type specific markers (positive and negative). + pattern: "*.json" + - cell_type_key: + type: string + description: | + The column name in the AnnData table's `.obs` that contains the cell type assignments. +output: + - qc_results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - "segtraq_qc/${prefix}/": + type: directory + description: | + Directory containing SegTraQ supervised metrics QC results including + supervised_summary.json. + pattern: "segtraq_qc/*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyaltripathi" +maintainers: + - "@priyaltripathi" diff --git a/modules/local/segtraq/supervised/templates/supervised.py b/modules/local/segtraq/supervised/templates/supervised.py new file mode 100644 index 00000000..81372e8f --- /dev/null +++ b/modules/local/segtraq/supervised/templates/supervised.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python + +"""Compute supervised metrics on spatialdata object for QC.""" + +import os +import segtraq +import spatialdata as sd +import json +import subprocess +import scanpy as sc +import pandas as pd + + +def main(): + print("[START] SegTraQ Supervised metrics QC") + input_path = "${spatialdata_zarr}" + prefix = "${prefix}" + cell_type_key = "${cell_type_key}" + markers_path = "${markers}" + + centroid_x_key = "${params.segtraq_centroid_x_key}" + centroid_y_key = "${params.segtraq_centroid_y_key}" + output_dir = f"segtraq_qc/{prefix}" + os.makedirs(output_dir, exist_ok=True) + + #loading markers + with open(markers_path, 'r') as f: + markers = json.load(f) + + #reading the spatial data + print(f"[INFO] Reading SpatialData object from: {input_path}") + sdata = sd.read_zarr(input_path) + + #initialiizing segtraq object + cx_key = centroid_x_key if centroid_x_key not in ("null", "", "None") else None + cy_key = centroid_y_key if centroid_y_key not in ("null", "", "None") else None + print("[INFO] Initializing SegTraQ object") + st = segtraq.SegTraQ( + sdata, + images_key = None, + tables_area_key = None, + points_background_id = 0, + tables_centroid_x_key= cx_key, + tables_centroid_y_key= cy_key, + ) + + #normalization + adata = sdata.tables[st.tables_key] + sc.pp.normalize_total(adata, target_sum=1e4) + sc.pp.log1p(adata) + + + #computing metrics + print(f"[INFO] Computing supervised QC metrics") + summary = {} + + #marker purity + marker_purity_df = st.sp.marker_purity( + cell_type_key=cell_type_key, + markers=markers + ) + csv_path = f"{output_dir}/marker_purity.csv" + marker_purity_df.to_csv(csv_path, index=False) + print(f" Saved marker purity stats to {csv_path}") + mean_positive_F1 = float(marker_purity_df["positive_F1"].mean()) + summary["mean_positive_F1"] = mean_positive_F1 + print(f" mean_positive_F1: {mean_positive_F1}") + mean_negative_F1 = float(marker_purity_df["negative_F1"].mean()) + summary["mean_negative_F1"] = mean_negative_F1 + print(f" mean_negative_F1: {mean_negative_F1}") + mean_F1_purity = float(marker_purity_df["F1_purity"].mean()) + summary["F1_purity"] = mean_F1_purity + print(f" mean_F1_purity: {mean_F1_purity}") + + #mutually_exclusive_coexpression_rate + mecr_df = st.sp.mutually_exclusive_coexpression_rate( + markers=markers + ) + csv_path = f"{output_dir}/mutually_exclusive_coexpression_rate.csv" + mecr_df.to_csv(csv_path, index=False) + print(f" Saved mutually exclusive coexpression rate stats to {csv_path}") + sig_count = (mecr_df["pvalue"] < 0.05).sum() + total_pairs = len(mecr_df) + summary["total_gene_pairs_tested"] = int(total_pairs) + summary["significant_exclusive_pairs_count"] = int(sig_count) + summary["percentage_significant_exclusivity"] = float((sig_count / total_pairs) * 100) if total_pairs > 0 else 0.0 + + #neighbor contamination + per_cell_df, strength_df, binary_df = st.sp.neighbor_contamination( + cell_type_key=cell_type_key, + markers=markers + ) + csv_path = f"{output_dir}/per_cell_contamination.csv" + per_cell_df.to_csv(csv_path, index=False) + csv_path = f"{output_dir}/matrix_contamination.csv" + strength_df.to_csv(csv_path, index=False) + csv_path = f"{output_dir}/binary_contamination.csv" + binary_df.to_csv(csv_path, index=False) + summary["mean_cell_contamination_fraction"] = float(per_cell_df["negative_marker_contamination_fraction"].mean()) + max_contam_val = binary_df.values.max() + summary["max_type_to_type_contamination_proportion"] = float(max_contam_val) + + + #summary + with open(f"{output_dir}/supervised_summary.json", "w") as f: + json.dump(summary, f, indent=2) + print(f"[INFO] Summary written to {output_dir}/supervised_summary.json") + print(f"[INFO] Dataframes generated are stored within respective directories") + + version = subprocess.check_output( + ["pip", "show", "segtraq"], text=True + ) + segtraq_version = [l for l in version.splitlines() if l.startswith("Version:")][0].split(": ")[1] + + with open("versions.yml", "w") as f: + f.write('"${task.process}":\n') + f.write(f' segtraq: "{segtraq_version}"\n') + f.write(f' spatialdata: "{sd.__version__}"\n') + print("[FINISH] SegTraQ Supervised QC") + +if __name__ == "__main__": + main() diff --git a/modules/local/segtraq/volume/main.nf b/modules/local/segtraq/volume/main.nf new file mode 100644 index 00000000..851a5684 --- /dev/null +++ b/modules/local/segtraq/volume/main.nf @@ -0,0 +1,44 @@ +process SEGTRAQ_VOLUME { + tag "${meta.id}" + label 'process_medium' + + container "quay.io/priyal_tripathi/segtraq:0.0.3" + + input: + tuple val(meta), path(spatialdata_zarr) + + output: + tuple val(meta), path("segtraq_qc/${prefix}/"), emit: qc_results + path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_VOLUME module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + template 'vol.py' + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("SEGTRAQ_VOLUME module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p "segtraq_qc/${prefix}" + touch "segtraq_qc/${prefix}/volume_summary.json" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + segtraq: \$(pip show segtraq | grep Version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/local/segtraq/volume/meta.yml b/modules/local/segtraq/volume/meta.yml new file mode 100644 index 00000000..55835adf --- /dev/null +++ b/modules/local/segtraq/volume/meta.yml @@ -0,0 +1,50 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: segtraq_volume +description: Run SegTraQ volume metrics on a SpatialData object to assess + how well a segmentation method resolves cell overlaps in 3D. +keywords: + - segtraq + - quality control + - volume + - three dimensional + - spatial transcriptomics +tools: + - custom: + description: SegTraQ - A Python toolkit for quantitative and visual quality + control of segmentation and transcript assignment in spatial omics data. + homepage: https://github.com/LazDaria/SegTraQ + documentation: https://lazdaria.github.io/SegTraQ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - spatialdata_zarr: + type: directory + description: | + Path to a SpatialData .zarr directory containing the spatial omics + data with segmentation results. + pattern: "*.zarr" +output: + - qc_results: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample' ] + - "segtraq_qc/${prefix}/": + type: directory + description: | + Directory containing SegTraQ volume QC results including + volume_summary.json. + pattern: "segtraq_qc/*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyaltripathi" +maintainers: + - "@priyaltripathi" diff --git a/modules/local/segtraq/volume/templates/vol.py b/modules/local/segtraq/volume/templates/vol.py new file mode 100644 index 00000000..4750fd53 --- /dev/null +++ b/modules/local/segtraq/volume/templates/vol.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +"""Compute 3D volume metrics on spatialdata object for QC.""" + +import os +import segtraq +import spatialdata as sd +import json +import subprocess +import pandas as pd + + +def main(): + print("[START] SegTraQ 3D Volume QC") + input_path = "${spatialdata_zarr}" + prefix = "${prefix}" + centroid_x_key = "${params.segtraq_centroid_x_key}" + centroid_y_key = "${params.segtraq_centroid_y_key}" + output_dir = f"segtraq_qc/{prefix}" + os.makedirs(output_dir, exist_ok=True) + + #reading the spatial data + print(f"[INFO] Reading SpatialData object from: {input_path}") + sdata = sd.read_zarr(input_path) + + #initialiizing segtraq object + cx_key = centroid_x_key if centroid_x_key not in ("null", "", "None") else None + cy_key = centroid_y_key if centroid_y_key not in ("null", "", "None") else None + print("[INFO] Initializing SegTraQ object") + st = segtraq.SegTraQ( + sdata, + images_key = None, + tables_area_key = None, + points_background_id = 0, + tables_centroid_x_key= cx_key, + tables_centroid_y_key= cy_key, + ) + + #computing metrics + print(f"[INFO] Computing 3D Volume QC metrics") + summary = {} + + #heterotypic overlap fraction + hetero_overlap_df = st.vl.fraction_heterotypic_overlap() + csv_path = f"{output_dir}/heterotypic_overlap_df.csv" + hetero_overlap_df.to_csv(csv_path, index=False) + print(f" Saved heterotypic overlap stats to {csv_path}") + mean_area = float(hetero_overlap_df["heterotypic_overlap_area"].mean()) + mean_frac = float(hetero_overlap_df["heterotypic_overlap_fraction"].mean()) + summary["mean_overlap_area"] = mean_area + summary["mean_overlap_fraction"] = mean_frac + print(f" mean_overlap_area: {mean_area}") + print(f" mean_overlap_fraction: {mean_frac}") + + + #top and bottom z consistency + sim_top_bottom_df = st.vl.similarity_top_bottom() + csv_path = f"{output_dir}/similarity_top_bottom.csv" + sim_top_bottom_df.to_csv(csv_path, index=False) + print(f" Saved top-bottom z consistency stats to {csv_path}") + cosine_sim_top_bottom_z = float(sim_top_bottom_df["cosine_sim_top_bottom_z"].mean()) + summary["cosine_sim_top_bottom_z"] = cosine_sim_top_bottom_z + print(f" cosine_sim_top_bottom_z: {cosine_sim_top_bottom_z}") + + + #mean VSI per cell + mean_vsi_df = st.vl.vertical_signal_integrity_per_cell() + if isinstance(mean_vsi_df, pd.Series): + mean_vsi_df = mean_vsi_df.to_frame(name="mean_vsi") + mean_vsi_df.to_csv(f"{output_dir}/mean_vsi.csv", index=False) + summary["mean_vsi"] = float(mean_vsi_df["mean_vsi"].mean()) + + #summary + with open(f"{output_dir}/volume_summary.json", "w") as f: + json.dump(summary, f, indent=2) + print(f"[INFO] Summary written to {output_dir}/volume_summary.json") + print(f"[INFO] Dataframes generated are stored within respective directories") + + version = subprocess.check_output( + ["pip", "show", "segtraq"], text=True + ) + segtraq_version = [l for l in version.splitlines() if l.startswith("Version:")][0].split(": ")[1] + + with open("versions.yml", "w") as f: + f.write('"${task.process}":\n') + f.write(f' segtraq: "{segtraq_version}"\n') + f.write(f' spatialdata: "{sd.__version__}"\n') + print("[FINISH] SegTraQ 3D Volume QC") + +if __name__ == "__main__": + main() diff --git a/nextflow.config b/nextflow.config index 473c41b8..42911acc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -74,6 +74,15 @@ params { run_qc = true // whether to run the qc layer of pipeline offtarget_probe_tracking = false // whether to run off-target probe tracking (provide probe_fasta, reference sequences, gene synonyms ) + //SegTraQ specific + run_segtraq = false // whether to run SegTraQ QC after segmentation + segtraq_centroid_x_key = 'x_centroid' // key for x centroid in tables + segtraq_centroid_y_key = 'y_centroid' // key for y centroid in tables + segtraq_modules = 'all' // comma-separated list of modules to run, or 'all' + segtraq_markers = null // Path to a JSON file containing cell-type specific markers + segtraq_cell_type_key = 'transferred_cell_type' // Key for cell type annotation in the AnnData table + + // utility modules csplit_x_bins = 2 // number of tiles along the x axis (total number of bins is product of x_bins * y_bins) csplit_y_bins = 2 // number of tiles along the y axis @@ -316,6 +325,14 @@ manifest { github: '@dongzehe', contribution: ['contributor'], orcid: '0000-0001-8259-7434' + ], + [ + name: 'Priyal Tripathi', + affiliation: 'Maitreyi College, New Delhi, India', + email: 'priyaltripathi2910@gmail.com', + github: '@priyalT', + contribution: ['contributor'], + orcid: '' ] ] homePage = 'https://github.com/nf-core/spatialxe' diff --git a/nextflow_schema.json b/nextflow_schema.json index e5144c04..f79b656e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -284,6 +284,36 @@ "type": "boolean", "description": "Restrict parallelizing a process. Eg. restrict running cellpose cell and nuclei segmentation together if the resources are limited.", "default": false + }, + "run_segtraq": { + "type": "boolean", + "description": "Whether to run SegTraQ baseline QC after segmentation.", + "default": false + }, + "segtraq_centroid_x_key": { + "type": "string", + "description": "Key for x centroid in SpatialData tables, used by SegTraQ.", + "default": "x_centroid" + }, + "segtraq_centroid_y_key": { + "type": "string", + "description": "Key for y centroid in SpatialData tables, used by SegTraQ.", + "default": "y_centroid" + }, + "segtraq_modules": { + "type": "string", + "description": "Key for which modules to run, used by SegTraQ.", + "default": "all" + }, + "segtraq_markers": { + "type": "string", + "format": "file-path", + "description": "Path to a JSON file containing cell-type specific markers (positive and negative)." + }, + "segtraq_cell_type_key": { + "type": "string", + "description": "Key for cell type annotation in the AnnData table.", + "default": "transferred_cell_type" } } }, diff --git a/nf-test b/nf-test new file mode 100755 index 00000000..8ae605eb --- /dev/null +++ b/nf-test @@ -0,0 +1,59 @@ +#!/bin/bash +APP_HOME="$HOME/.nf-test" +APP_JAR="nf-test.jar" +APP_UPDATE_URL="https://code.askimed.com/install/nf-test" + +set -e + +FOLDER=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) + +update() { + if command -v curl > /dev/null 2>&1; then + curl -fsSL ${APP_UPDATE_URL} | bash + else + wget -qO- ${APP_UPDATE_URL} | bash + fi +} + +# nf-test uses the same logic as Nextflow to ensure to pick up the same jvm. +# https://github.com/nextflow-io/nextflow/blob/master/nextflow#L263 +if [[ "$NXF_JAVA_HOME" ]]; then + JAVA_HOME="$NXF_JAVA_HOME" + unset JAVA_CMD +fi +# Determine the Java command to use to start the JVM. +if [ ! -x "$JAVA_CMD" ] ; then + if [ -d "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVA_CMD="$JAVA_HOME/jre/sh/java" + else + JAVA_CMD="$JAVA_HOME/bin/java" + fi + elif [ -x /usr/libexec/java_home ]; then + JAVA_CMD="$(/usr/libexec/java_home -v 1.8+)/bin/java" + else + JAVA_CMD="$(which java)" || JAVA_CMD=java + fi +fi + +if test -f "${FOLDER}/${APP_JAR}"; then + FILE_PATH_JAR=${FOLDER}/${APP_JAR} +else + FILE_PATH_JAR=${APP_HOME}/${APP_JAR} +fi + +JAVA_ARGS="-Xmx10G" +if [[ "$NFT_JAVA_ARGS" ]]; then + JAVA_ARGS="$NFT_JAVA_ARGS" +fi + +export JAVA_PROGRAM_ARGS=`echo "$@"` + +if [ "${JAVA_PROGRAM_ARGS}" = "update" ]; then + echo "Updating application..." + cd "${FOLDER}" + update +else + exec ${JAVA_CMD} ${JAVA_ARGS} -jar "${FILE_PATH_JAR}" "$@" +fi diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 9e9b152e..0e62f46c 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2025-09-09T21:16:18+00:00", - "description": "

\n \n \n \"nf-core/spatialxe\"\n \n

\n\n[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/spatialxe)\n[![GitHub Actions CI Status](https://github.com/nf-core/spatialxe/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/spatialxe/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/spatialxe/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/spatialxe/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/spatialxe/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/spatialxe)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23spatialxe-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/spatialxe)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/spatialxe** is a bioinformatics best-practice processing and quality control pipeline for Xenium data. **The pipeline is currently under developement and not completed yet!**. The current plan for the pipeline implementation is shown in the metromap below. Please note that the pipeline steps and methods might change as we move forward in the development cycle.\n\n![nf-core/spatialxe-metromap](docs/images/spatialxe-metromap.png)\n\n## Usage\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/spatialxe/results).\n\n## Pipeline summary\n\n## Quick Start\n\n`samplesheet.csv`:\n\n```csv\nsample,bundle,image\ntest_sample,/path/to/xenium-bundle,/path/to/morphology.ome.tif\n```\n\nNow, you can run the pipeline using:\n\n## Run image-based segmentation mode
\n\n`CELLPOSE -> BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC`\n\n```bash\nnextflow run nf-core/spatialxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode \n```\n\n## Run coordinate-based segmentation mode
\n\n`PROSEG -> PROSEG2BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC`\n\n```bash\nnextflow run nf-core/spatialxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode coordinate\n```\n\n## Run segfree mode
\n\n`BAYSOR_SEGFREE`\n\n```bash\nnextflow run nf-core/spatialxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode segfree\n```\n\n## Run preview mode
\n\n`BAYSOR_PREVIEW`\n\n```bash\nnextflow run nf-core/spatialxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode preview\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/spatialxe/usage) and the [parameter documentation](https://nf-co.re/spatialxe/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/spatialxe/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/spatialxe/output).\n\n## Credits\n\nnf-core/spatialxe was originally written by [Sameesh Kher](https://github.com/khersameesh24) and [Florian Heyl](https://github.com/heylf).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- Tobias Krause\n- Kre\u0161imir Be\u0161tak (kbestak)\n- Matthias H\u00f6rtenhuber (mashehu)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#spatialxe` channel](https://nfcore.slack.com/channels/spatialxe) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/spatialxe\"\n \n

\n\n[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/spatialxe)\n[![GitHub Actions CI Status](https://github.com/nf-core/spatialxe/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/spatialxe/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/spatialxe/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/spatialxe/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/spatialxe/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/spatialxe)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23spatialxe-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/spatialxe)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/spatialxe** is a bioinformatics best-practice processing and quality control pipeline for Xenium data. **The pipeline is currently under developement and not completed yet!**. The current plan for the pipeline implementation is shown in the metromap below. Please note that the pipeline steps and methods might change as we move forward in the development cycle.\n\n![nf-core/spatialxe-metromap](docs/images/spatialxe-metromap.png)\n\n## Usage\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/spatialxe/results).\n\n## Pipeline summary\n\n## Quick Start\n\n`samplesheet.csv`:\n\n```csv\nsample,bundle,image\ntest_sample,/path/to/xenium-bundle,/path/to/morphology.ome.tif\n```\n\nNow, you can run the pipeline using:\n\n## Run image-based segmentation mode
\n\n`CELLPOSE -> BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC`\n\n```bash\nnextflow run nf-core/spatialxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode \n```\n\n## Run coordinate-based segmentation mode
\n\n`PROSEG -> PROSEG2BAYSOR -> XR-IMPORT_SEGMENTATION -> SPATIALDATA -> QC`\n\n```bash\nnextflow run nf-core/spatialxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode coordinate\n```\n\n## Run segfree mode
\n\n`BAYSOR_SEGFREE`\n\n```bash\nnextflow run nf-core/spatialxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode segfree\n```\n\n## Run preview mode
\n\n`BAYSOR_PREVIEW`\n\n```bash\nnextflow run nf-core/spatialxe \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode preview\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/spatialxe/usage) and the [parameter documentation](https://nf-co.re/spatialxe/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/spatialxe/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/spatialxe/output).\n\n## Credits\n\nnf-core/spatialxe is mainly developed by [Sameesh Kher](https://github.com/khersameesh24), [Dongze He](https://github.com/an-altosian), and [Florian Heyl](https://github.com/heylf).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- Tobias Krause\n- Kre\u0161imir Be\u0161tak (kbestak)\n- Matthias H\u00f6rtenhuber (mashehu)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#spatialxe` channel](https://nfcore.slack.com/channels/spatialxe) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/segtraq_qc/main.nf b/subworkflows/local/segtraq_qc/main.nf new file mode 100644 index 00000000..6bda7498 --- /dev/null +++ b/subworkflows/local/segtraq_qc/main.nf @@ -0,0 +1,82 @@ +// +// Run SegTraQ QC +// + +include { SEGTRAQ_BASELINE } from '../../../modules/local/segtraq/baseline/main' +include { SEGTRAQ_CLUSTERING_STABILITY } from '../../../modules/local/segtraq/clustering_stability/main' +include { SEGTRAQ_REGION_SIMILARITY } from '../../../modules/local/segtraq/region_similarity/main' +include { SEGTRAQ_VOLUME } from '../../../modules/local/segtraq/volume/main' +include { SEGTRAQ_SUPERVISED } from '../../../modules/local/segtraq/supervised/main' +include { SEGTRAQ_POINT_STATISTICS } from '../../../modules/local/segtraq/point_statistics/main' +include { SEGTRAQ_PLOTTING } from '../../../modules/local/segtraq/plotting/main' + + + +workflow SEGTRAQ_QC { + take: + ch_spatialdata // channel: [ val(meta), path("spatialdata.zarr") ] + ch_markers + ch_cell_type_key + + main: + + ch_versions = channel.empty() + ch_qc = channel.empty() + def modules_to_run = params.segtraq_modules == 'all' ? + ['baseline', 'clustering_stability', 'region_similarity', 'volume', 'supervised', 'point_statistics', 'plotting'] : params.segtraq_modules.tokenize(',') + + + if ('baseline' in modules_to_run) { + SEGTRAQ_BASELINE(ch_spatialdata) + ch_versions = ch_versions.mix(SEGTRAQ_BASELINE.out.versions) + ch_qc = ch_qc.mix(SEGTRAQ_BASELINE.out.qc_results)} + + if ('clustering_stability' in modules_to_run) { + SEGTRAQ_CLUSTERING_STABILITY(ch_spatialdata) + ch_versions = ch_versions.mix(SEGTRAQ_CLUSTERING_STABILITY.out.versions) + ch_qc = ch_qc.mix(SEGTRAQ_CLUSTERING_STABILITY.out.qc_results)} + + if ('region_similarity' in modules_to_run) { + SEGTRAQ_REGION_SIMILARITY(ch_spatialdata) + ch_versions = ch_versions.mix(SEGTRAQ_REGION_SIMILARITY.out.versions) + ch_qc = ch_qc.mix(SEGTRAQ_REGION_SIMILARITY.out.qc_results)} + + if ('volume' in modules_to_run) { + SEGTRAQ_VOLUME(ch_spatialdata) + ch_versions = ch_versions.mix(SEGTRAQ_VOLUME.out.versions) + ch_qc = ch_qc.mix(SEGTRAQ_VOLUME.out.qc_results) + } + + if ('supervised' in modules_to_run) { + if (params.segtraq_markers) { + SEGTRAQ_SUPERVISED(ch_spatialdata, ch_markers, ch_cell_type_key) + ch_versions = ch_versions.mix(SEGTRAQ_SUPERVISED.out.versions) + ch_qc = ch_qc.mix(SEGTRAQ_SUPERVISED.out.qc_results) + } else { + log.warn "SegTraQ Supervised QC was requested but 'params.segtraq_markers' is not provided. Skipping." + } + } + if ('point_statistics' in modules_to_run) { + if (params.segtraq_markers) { + SEGTRAQ_POINT_STATISTICS(ch_spatialdata, ch_markers) + ch_versions = ch_versions.mix(SEGTRAQ_POINT_STATISTICS.out.versions) + ch_qc = ch_qc.mix(SEGTRAQ_POINT_STATISTICS.out.qc_results) + } else { + log.warn "SegTraQ Point Statistics QC was requested but 'params.segtraq_markers' is not provided. Skipping." + } + } + if ('plotting' in modules_to_run) { + if (params.segtraq_cell_type_key) { + SEGTRAQ_PLOTTING(ch_spatialdata, ch_cell_type_key) + ch_versions = ch_versions.mix(SEGTRAQ_PLOTTING.out.versions) + ch_qc = ch_qc.mix(SEGTRAQ_PLOTTING.out.qc_results) + } else { + log.warn "SegTraQ Plotting was requested but 'params.segtraq_cell_type_key' is not provided. Skipping." + } + } + + + emit: + qc_results = ch_qc // channel: [ val(meta), path("segtraq_qc/*/") ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/segtraq_qc/meta.yml b/subworkflows/local/segtraq_qc/meta.yml new file mode 100644 index 00000000..03d7199d --- /dev/null +++ b/subworkflows/local/segtraq_qc/meta.yml @@ -0,0 +1,49 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "segtraq_qc" +description: Run SegTraQ quality control on SpatialData objects to assess segmentation + and transcript assignment quality in spatial omics data. +keywords: + - segtraq + - quality control + - segmentation + - spatial transcriptomics +components: + - segtraq/baseline + - segtraq/clustering_stability + - segtraq/region_similarity + - segtraq/volume + - segtraq/supervised + - segtraq/point_statistics +input: + - ch_spatialdata: + type: directory + description: | + Input channel containing the sample info and the SpatialData .zarr directory + Structure: [ val(meta), path("spatialdata.zarr") ] + pattern: "*.zarr" + - markers: + type: file + description: | + Path to a JSON file containing cell-type specific markers (positive and negative). + pattern: "*.json" + - cell_type_key: + type: string + description: | + The column name in the AnnData table's .obs that contains the cell type assignments. + +output: + - qc_results: + type: directory + description: | + Directory containing SegTraQ QC results + Structure: [ val(meta), path("segtraq_qc/*/") ] + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@priyaltripathi" +maintainers: + - "@priyaltripathi" diff --git a/tests/coordinate_mode.nf.test.snap b/tests/coordinate_mode.nf.test.snap index 86c88518..92ecf5b3 100644 --- a/tests/coordinate_mode.nf.test.snap +++ b/tests/coordinate_mode.nf.test.snap @@ -3,7 +3,7 @@ "content": [ { "UNTAR": { - "untar": 1.34 + "untar": "bsdtar 3.5.3 - libarchive 3.7.4 zlib/1.2.12 liblzma/5.4.3 bz2lib/1.0.8" }, "Workflow": { "nf-core/spatialxe": "v1.0dev" @@ -58,6 +58,30 @@ "coordinate/untar/test_run/gene_panel.json", "coordinate/untar/test_run/morphology.ome.tif", "coordinate/untar/test_run/transcripts.parquet", + "coordinate/untar/test_run/xenium_bundle", + "coordinate/untar/test_run/xenium_bundle/.end-of-run", + "coordinate/untar/test_run/xenium_bundle/analysis.tar.gz", + "coordinate/untar/test_run/xenium_bundle/analysis.zarr.zip", + "coordinate/untar/test_run/xenium_bundle/analysis_summary.html", + "coordinate/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "coordinate/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "coordinate/untar/test_run/xenium_bundle/cell_boundaries.parquet", + "coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.h5", + "coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.zarr.zip", + "coordinate/untar/test_run/xenium_bundle/cells.csv.gz", + "coordinate/untar/test_run/xenium_bundle/cells.parquet", + "coordinate/untar/test_run/xenium_bundle/cells.zarr.zip", + "coordinate/untar/test_run/xenium_bundle/experiment.xenium", + "coordinate/untar/test_run/xenium_bundle/gene_panel.json", + "coordinate/untar/test_run/xenium_bundle/metrics_summary.csv", + "coordinate/untar/test_run/xenium_bundle/morphology.ome.tif", + "coordinate/untar/test_run/xenium_bundle/morphology_focus", + "coordinate/untar/test_run/xenium_bundle/morphology_focus/morphology_focus_0000.ome.tif", + "coordinate/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "coordinate/untar/test_run/xenium_bundle/nucleus_boundaries.parquet", + "coordinate/untar/test_run/xenium_bundle/transcripts.parquet", + "coordinate/untar/test_run/xenium_bundle/transcripts.zarr.zip", "coordinate/untar/versions.yml", "coordinate/xeniumranger", "coordinate/xeniumranger/import_segementation", @@ -73,27 +97,127 @@ "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", "cell-polygons.geojson:md5,d41d8cd98f00b204e9800998ecf8427e", "transcript-metadata.csv:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,66d4df69da12a0f8425f221f94afe76b", + "versions.yml:md5,9ff039aad2e015c87487f1a7150d9887", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,9010b5ed380b88a5b20ca6dae73345cb", + "versions.yml:md5,07f7a043ac5687b8d7fc33891b040eec", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,1dfc19eaa8a0746f17aad700d797e323", + "versions.yml:md5,f2adc6b225c56fabe67e2c1facc94870", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,5ffb7137a403934431ca87be7ad84968", + "versions.yml:md5,216cd26b1224d93ab464338288619a26", "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,4054f048e726d8faf84c982f8180a9e0", + ".end-of-run:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/analysis.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/analysis.tar.gz", + "directory": false, + "file": true, + "freeSpace": 8630976512, + "hidden": false, + "name": "analysis.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/analysis.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 8630976512 + }, + "analysis.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis_summary.html:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "directory": false, + "file": true, + "freeSpace": 8630976512, + "hidden": false, + "name": "aux_outputs.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 8630976512 + }, + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "directory": false, + "file": true, + "freeSpace": 8630976512, + "hidden": false, + "name": "cell_boundaries.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 8630976512 + }, + "cell_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.h5:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "directory": false, + "file": true, + "freeSpace": 8630976512, + "hidden": false, + "name": "cell_feature_matrix.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 8630976512 + }, + "cell_feature_matrix.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/cells.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/cells.csv.gz", + "directory": false, + "file": true, + "freeSpace": 8630976512, + "hidden": false, + "name": "cells.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/cells.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 8630976512 + }, + "cells.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", + "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "metrics_summary.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology_focus_0000.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "directory": false, + "file": true, + "freeSpace": 8630976512, + "hidden": false, + "name": "nucleus_boundaries.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/6f858b4571938d7b726c3c9366a0bf19/output/coordinate/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 8630976512 + }, + "nucleus_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,fdc4db112522423a19362acb1fd64258", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", "versions.yml:md5,7e76e3dce07f8df00a8628e74c2e6157" ] ], + "timestamp": "2026-03-13T19:16:13.805444", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-29T13:54:47.676492" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } } } \ No newline at end of file diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 98894a1a..9acebe4d 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -3,7 +3,7 @@ "content": [ { "UNTAR": { - "untar": 1.34 + "untar": "bsdtar 3.5.3 - libarchive 3.7.4 zlib/1.2.12 liblzma/5.4.3 bz2lib/1.0.8" }, "Workflow": { "nf-core/spatialxe": "v1.0dev" @@ -58,6 +58,30 @@ "coordinate/untar/test_run/gene_panel.json", "coordinate/untar/test_run/morphology.ome.tif", "coordinate/untar/test_run/transcripts.parquet", + "coordinate/untar/test_run/xenium_bundle", + "coordinate/untar/test_run/xenium_bundle/.end-of-run", + "coordinate/untar/test_run/xenium_bundle/analysis.tar.gz", + "coordinate/untar/test_run/xenium_bundle/analysis.zarr.zip", + "coordinate/untar/test_run/xenium_bundle/analysis_summary.html", + "coordinate/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "coordinate/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "coordinate/untar/test_run/xenium_bundle/cell_boundaries.parquet", + "coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.h5", + "coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.zarr.zip", + "coordinate/untar/test_run/xenium_bundle/cells.csv.gz", + "coordinate/untar/test_run/xenium_bundle/cells.parquet", + "coordinate/untar/test_run/xenium_bundle/cells.zarr.zip", + "coordinate/untar/test_run/xenium_bundle/experiment.xenium", + "coordinate/untar/test_run/xenium_bundle/gene_panel.json", + "coordinate/untar/test_run/xenium_bundle/metrics_summary.csv", + "coordinate/untar/test_run/xenium_bundle/morphology.ome.tif", + "coordinate/untar/test_run/xenium_bundle/morphology_focus", + "coordinate/untar/test_run/xenium_bundle/morphology_focus/morphology_focus_0000.ome.tif", + "coordinate/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "coordinate/untar/test_run/xenium_bundle/nucleus_boundaries.parquet", + "coordinate/untar/test_run/xenium_bundle/transcripts.parquet", + "coordinate/untar/test_run/xenium_bundle/transcripts.zarr.zip", "coordinate/untar/versions.yml", "coordinate/xeniumranger", "coordinate/xeniumranger/import_segementation", @@ -73,27 +97,127 @@ "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", "cell-polygons.geojson:md5,d41d8cd98f00b204e9800998ecf8427e", "transcript-metadata.csv:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,66d4df69da12a0f8425f221f94afe76b", + "versions.yml:md5,9ff039aad2e015c87487f1a7150d9887", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,9010b5ed380b88a5b20ca6dae73345cb", + "versions.yml:md5,07f7a043ac5687b8d7fc33891b040eec", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,1dfc19eaa8a0746f17aad700d797e323", + "versions.yml:md5,f2adc6b225c56fabe67e2c1facc94870", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,5ffb7137a403934431ca87be7ad84968", + "versions.yml:md5,216cd26b1224d93ab464338288619a26", "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,4054f048e726d8faf84c982f8180a9e0", + ".end-of-run:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/analysis.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/analysis.tar.gz", + "directory": false, + "file": true, + "freeSpace": 10247581696, + "hidden": false, + "name": "analysis.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/analysis.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 10247581696 + }, + "analysis.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis_summary.html:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "directory": false, + "file": true, + "freeSpace": 10247581696, + "hidden": false, + "name": "aux_outputs.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 10247581696 + }, + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "directory": false, + "file": true, + "freeSpace": 10247581696, + "hidden": false, + "name": "cell_boundaries.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 10247581696 + }, + "cell_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.h5:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "directory": false, + "file": true, + "freeSpace": 10247581696, + "hidden": false, + "name": "cell_feature_matrix.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 10247581696 + }, + "cell_feature_matrix.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/cells.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/cells.csv.gz", + "directory": false, + "file": true, + "freeSpace": 10247581696, + "hidden": false, + "name": "cells.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/cells.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 10247581696 + }, + "cells.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", + "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "metrics_summary.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology_focus_0000.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "directory": false, + "file": true, + "freeSpace": 10247581696, + "hidden": false, + "name": "nucleus_boundaries.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/1db1ba8542b4ea4bd9c804e1af717e9/output/coordinate/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 10247581696 + }, + "nucleus_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,fdc4db112522423a19362acb1fd64258", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", "versions.yml:md5,7e76e3dce07f8df00a8628e74c2e6157" ] ], + "timestamp": "2026-03-13T16:33:40.831817", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-29T13:55:25.39051" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } } } \ No newline at end of file diff --git a/tests/image_mode.nf.test.snap b/tests/image_mode.nf.test.snap index 09a19eda..e7073921 100644 --- a/tests/image_mode.nf.test.snap +++ b/tests/image_mode.nf.test.snap @@ -3,7 +3,7 @@ "content": [ { "UNTAR": { - "untar": 1.34 + "untar": "bsdtar 3.5.3 - libarchive 3.7.4 zlib/1.2.12 liblzma/5.4.3 bz2lib/1.0.8" }, "Workflow": { "nf-core/spatialxe": "v1.0dev" @@ -65,6 +65,30 @@ "image/untar/test_run/gene_panel.json", "image/untar/test_run/morphology.ome.tif", "image/untar/test_run/transcripts.parquet", + "image/untar/test_run/xenium_bundle", + "image/untar/test_run/xenium_bundle/.end-of-run", + "image/untar/test_run/xenium_bundle/analysis.tar.gz", + "image/untar/test_run/xenium_bundle/analysis.zarr.zip", + "image/untar/test_run/xenium_bundle/analysis_summary.html", + "image/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "image/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "image/untar/test_run/xenium_bundle/cell_boundaries.parquet", + "image/untar/test_run/xenium_bundle/cell_feature_matrix.h5", + "image/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "image/untar/test_run/xenium_bundle/cell_feature_matrix.zarr.zip", + "image/untar/test_run/xenium_bundle/cells.csv.gz", + "image/untar/test_run/xenium_bundle/cells.parquet", + "image/untar/test_run/xenium_bundle/cells.zarr.zip", + "image/untar/test_run/xenium_bundle/experiment.xenium", + "image/untar/test_run/xenium_bundle/gene_panel.json", + "image/untar/test_run/xenium_bundle/metrics_summary.csv", + "image/untar/test_run/xenium_bundle/morphology.ome.tif", + "image/untar/test_run/xenium_bundle/morphology_focus", + "image/untar/test_run/xenium_bundle/morphology_focus/morphology_focus_0000.ome.tif", + "image/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "image/untar/test_run/xenium_bundle/nucleus_boundaries.parquet", + "image/untar/test_run/xenium_bundle/transcripts.parquet", + "image/untar/test_run/xenium_bundle/transcripts.zarr.zip", "image/untar/versions.yml", "image/utility", "image/utility/resize_tif", @@ -82,29 +106,129 @@ "versions.yml:md5,a072fc212a30ce4d87379d0871eff5fb", "morphology.ome_cells_masks.tif:md5,d41d8cd98f00b204e9800998ecf8427e", "morphology.ome_cells_seg.npy:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,98fed7928a2da3470a39e1f74022a899", + "versions.yml:md5,d0820ff27c0f37874e94c7922991053b", "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,9010b5ed380b88a5b20ca6dae73345cb", + "versions.yml:md5,07f7a043ac5687b8d7fc33891b040eec", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,1dfc19eaa8a0746f17aad700d797e323", + "versions.yml:md5,f2adc6b225c56fabe67e2c1facc94870", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", "fake_file.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,5ffb7137a403934431ca87be7ad84968", + "versions.yml:md5,216cd26b1224d93ab464338288619a26", "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,4054f048e726d8faf84c982f8180a9e0", + ".end-of-run:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/analysis.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/analysis.tar.gz", + "directory": false, + "file": true, + "freeSpace": 8638631936, + "hidden": false, + "name": "analysis.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/analysis.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 8638631936 + }, + "analysis.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis_summary.html:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "directory": false, + "file": true, + "freeSpace": 8638631936, + "hidden": false, + "name": "aux_outputs.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 8638631936 + }, + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "directory": false, + "file": true, + "freeSpace": 8638631936, + "hidden": false, + "name": "cell_boundaries.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 8638631936 + }, + "cell_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.h5:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "directory": false, + "file": true, + "freeSpace": 8638631936, + "hidden": false, + "name": "cell_feature_matrix.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 8638631936 + }, + "cell_feature_matrix.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/cells.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/cells.csv.gz", + "directory": false, + "file": true, + "freeSpace": 8638631936, + "hidden": false, + "name": "cells.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/cells.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 8638631936 + }, + "cells.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", + "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "metrics_summary.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology_focus_0000.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "directory": false, + "file": true, + "freeSpace": 8638631936, + "hidden": false, + "name": "nucleus_boundaries.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/de9fb8c060dbe28872fd2f7f09f3598/output/image/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 8638631936 + }, + "nucleus_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,fdc4db112522423a19362acb1fd64258", "resized_morphology.ome_cells_masks.tif.tif:md5,d41d8cd98f00b204e9800998ecf8427e", "versions.yml:md5,686bf4196185d2e13ae2e12221233b7e" ] ], + "timestamp": "2026-03-13T19:15:50.83961", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-29T13:22:11.312536" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } } } \ No newline at end of file diff --git a/tests/preview_mode.nf.test.snap b/tests/preview_mode.nf.test.snap index d07650a5..ea1872c1 100644 --- a/tests/preview_mode.nf.test.snap +++ b/tests/preview_mode.nf.test.snap @@ -3,7 +3,7 @@ "content": [ { "UNTAR": { - "untar": 1.34 + "untar": "bsdtar 3.5.3 - libarchive 3.7.4 zlib/1.2.12 liblzma/5.4.3 bz2lib/1.0.8" }, "Workflow": { "nf-core/spatialxe": "v1.0dev" @@ -32,6 +32,30 @@ "preview/untar/test_run/gene_panel.json", "preview/untar/test_run/morphology.ome.tif", "preview/untar/test_run/transcripts.parquet", + "preview/untar/test_run/xenium_bundle", + "preview/untar/test_run/xenium_bundle/.end-of-run", + "preview/untar/test_run/xenium_bundle/analysis.tar.gz", + "preview/untar/test_run/xenium_bundle/analysis.zarr.zip", + "preview/untar/test_run/xenium_bundle/analysis_summary.html", + "preview/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "preview/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "preview/untar/test_run/xenium_bundle/cell_boundaries.parquet", + "preview/untar/test_run/xenium_bundle/cell_feature_matrix.h5", + "preview/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "preview/untar/test_run/xenium_bundle/cell_feature_matrix.zarr.zip", + "preview/untar/test_run/xenium_bundle/cells.csv.gz", + "preview/untar/test_run/xenium_bundle/cells.parquet", + "preview/untar/test_run/xenium_bundle/cells.zarr.zip", + "preview/untar/test_run/xenium_bundle/experiment.xenium", + "preview/untar/test_run/xenium_bundle/gene_panel.json", + "preview/untar/test_run/xenium_bundle/metrics_summary.csv", + "preview/untar/test_run/xenium_bundle/morphology.ome.tif", + "preview/untar/test_run/xenium_bundle/morphology_focus", + "preview/untar/test_run/xenium_bundle/morphology_focus/morphology_focus_0000.ome.tif", + "preview/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "preview/untar/test_run/xenium_bundle/nucleus_boundaries.parquet", + "preview/untar/test_run/xenium_bundle/transcripts.parquet", + "preview/untar/test_run/xenium_bundle/transcripts.zarr.zip", "preview/untar/versions.yml", "preview/utility", "preview/utility/preview_data", @@ -58,7 +82,107 @@ "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,4054f048e726d8faf84c982f8180a9e0", + ".end-of-run:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/analysis.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/analysis.tar.gz", + "directory": false, + "file": true, + "freeSpace": 7554719744, + "hidden": false, + "name": "analysis.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/analysis.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 7554719744 + }, + "analysis.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "analysis_summary.html:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "directory": false, + "file": true, + "freeSpace": 7554719744, + "hidden": false, + "name": "aux_outputs.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/aux_outputs.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 7554719744 + }, + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "directory": false, + "file": true, + "freeSpace": 7554719744, + "hidden": false, + "name": "cell_boundaries.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/cell_boundaries.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 7554719744 + }, + "cell_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cell_feature_matrix.h5:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "directory": false, + "file": true, + "freeSpace": 7554719744, + "hidden": false, + "name": "cell_feature_matrix.tar.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/cell_feature_matrix.tar.gz", + "totalSpace": 245107195904, + "usableSpace": 7554719744 + }, + "cell_feature_matrix.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/cells.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/cells.csv.gz", + "directory": false, + "file": true, + "freeSpace": 7554719744, + "hidden": false, + "name": "cells.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/cells.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 7554719744 + }, + "cells.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "cells.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "experiment.xenium:md5,d41d8cd98f00b204e9800998ecf8427e", + "gene_panel.json:md5,d41d8cd98f00b204e9800998ecf8427e", + "metrics_summary.csv:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + "morphology_focus_0000.ome.tif:md5,d41d8cd98f00b204e9800998ecf8427e", + { + "absolute": true, + "absolutePath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "canonicalPath": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "directory": false, + "file": true, + "freeSpace": 7554719744, + "hidden": false, + "name": "nucleus_boundaries.csv.gz", + "parent": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle", + "path": "/Users/priyaltripathi/spatialxe/.nf-test/tests/341e2e101c9b232de088210fc5c70002/output/preview/untar/test_run/xenium_bundle/nucleus_boundaries.csv.gz", + "totalSpace": 245107195904, + "usableSpace": 7554719744 + }, + "nucleus_boundaries.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.parquet:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcripts.zarr.zip:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,fdc4db112522423a19362acb1fd64258", "gene_structure_mqc.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "noise_distribution_mqc.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "noise_level_mqc.png:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -69,10 +193,10 @@ "versions.yml:md5,b7a26f2cff61d87a77f2db813dbc851a" ] ], + "timestamp": "2026-03-13T19:16:47.409319", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" - }, - "timestamp": "2025-10-30T20:21:33.929224" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } } } \ No newline at end of file diff --git a/workflows/spatialxe.nf b/workflows/spatialxe.nf index f22c4f5c..99d63222 100644 --- a/workflows/spatialxe.nf +++ b/workflows/spatialxe.nf @@ -44,6 +44,8 @@ include { SPATIALDATA_WRITE_META_MERGE } from '../subworkflo // TODO qc layer subworkflows include { OPT_FLIP_TRACK_STAT } from '../subworkflows/local/opt_flip_track_stat/main' +// SegTraQ workflow +include { SEGTRAQ_QC } from '../subworkflows/local/segtraq_qc/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -452,6 +454,23 @@ workflow SPATIALXE { } } + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SPATIALXE - SegTraQ QC LAYER + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + // check to run the qc layer + if (params.run_segtraq && (params.mode == 'image' || params.mode == 'coordinate')) { + SEGTRAQ_QC( + SPATIALDATA_WRITE_META_MERGE.out.sd_redefined_bundle, + params.segtraq_markers ? file(params.segtraq_markers) : [], + params.segtraq_cell_type_key + ) + ch_versions = ch_versions.mix(SEGTRAQ_QC.out.versions) + } + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~