From ea46ad0222f7ab572b30638aab418a04e9f7fcc7 Mon Sep 17 00:00:00 2001
From: an-altosian <dhe@altoslabs.com>
Date: Tue, 5 May 2026 16:10:25 +0000
Subject: [PATCH 1/3] refactor(modules): convert all 19 module-level bin
 scripts to Nextflow templates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

nf-core core team requires no module-level `bin/` for release approval
(PR #139, comment r3185699629 from awgymer). Move every per-module
`resources/usr/bin/*.py` into `templates/*.py` and invoke via the
`template '<file>.py'` directive in the script: block.

- 18 modules: mechanical conversion. Replace argparse with module-top
  Groovy interpolation (RAW_BUNDLE = "${raw_bundle}", etc.). For modules
  with task.ext.args (ficture/preprocess, segger/predict,
  segger/create_dataset, xenium_patch/stitch): inject ARGS = "${args}",
  build sys.argv via shlex.split, keep existing argparse code path.
- segger/create_dataset: NUMBA_CACHE_DIR setup moves from shell prelude
  into Python BEFORE numba/torch imports (file-level # ruff: noqa: E402
  to document the load-order requirement).
- xenium_patch/stitch: stitch_transcripts.py + stitch_postprocess.py
  merged into one templates/stitch.py orchestrator that calls both
  inline (single-file template constraint of the directive).

Containers, inputs, outputs, and stub blocks are unchanged across all 19
modules. Pipeline-level bin/ scripts (divide_transcripts.py,
stitch_transcripts.py — used by xenium_patch/divide and
utility/reconstruct_patches) are untouched; pipeline-level bin/ remains
nf-core compliant.
---
 modules/local/baysor/create_dataset/main.nf   |   7 +-
 .../usr/bin => templates}/create_dataset.py   |  29 +---
 modules/local/baysor/preprocess/main.nf       |  11 +-
 .../preprocess_transcripts.py                 |  63 ++------
 modules/local/ficture/preprocess/main.nf      |   9 +-
 .../bin => templates}/ficture_preprocess.py   |  36 +++--
 modules/local/segger/create_dataset/main.nf   |  14 +-
 .../bin => templates}/run_create_dataset.py   |  61 ++++----
 modules/local/segger/predict/main.nf          |  14 +-
 .../usr/bin => templates}/run_predict.py      |  32 ++++
 modules/local/spatialdata/merge/main.nf       |   8 +-
 .../bin => templates}/spatialdata_merge.py    |  43 ++----
 modules/local/spatialdata/meta/main.nf        |   9 +-
 .../usr/bin => templates}/spatialdata_meta.py |  32 ++--
 modules/local/spatialdata/write/main.nf       |  10 +-
 .../bin => templates}/spatialdata_write.py    |  40 +++--
 .../local/utility/convert_mask_uint32/main.nf |   7 +-
 .../bin => templates}/convert_mask_uint32.py  |  23 +--
 .../utility/downscale_morphology/main.nf      |   9 +-
 .../bin => templates}/downscale_morphology.py |  28 ++--
 modules/local/utility/extract_dapi/main.nf    |   8 +-
 .../usr/bin => templates}/extract_dapi.py     |  31 +---
 .../utility/extract_preview_data/main.nf      |   6 +-
 .../usr/bin => templates}/extract_data.py     |  29 +---
 modules/local/utility/get_coordinates/main.nf |   5 +-
 .../usr/bin => templates}/get_coordinates.py  |  21 +--
 modules/local/utility/parquet_to_csv/main.nf  |   7 +-
 .../usr/bin => templates}/parquet_to_csv.py   |  36 +----
 modules/local/utility/resize_tif/main.nf      |   9 +-
 .../usr/bin => templates}/resize_tif.py       |  33 ++---
 modules/local/utility/segger2xr/main.nf       |   9 +-
 .../usr/bin => templates}/segger2xr.py        |  37 +----
 .../local/utility/split_transcripts/main.nf   |  10 +-
 .../bin => templates}/split_transcripts.py    |  46 ++----
 modules/local/utility/upscale_mask/main.nf    |   8 +-
 .../usr/bin => templates}/upscale_mask.py     |  24 +--
 modules/local/xenium_patch/stitch/main.nf     |  12 +-
 .../resources/usr/bin/stitch_postprocess.py   |  98 ------------
 .../stitch.py}                                | 140 ++++++++++++++++--
 39 files changed, 393 insertions(+), 661 deletions(-)
 rename modules/local/baysor/create_dataset/{resources/usr/bin => templates}/create_dataset.py (76%)
 mode change 100755 => 100644
 rename modules/local/baysor/preprocess/{resources/usr/bin => templates}/preprocess_transcripts.py (66%)
 mode change 100755 => 100644
 rename modules/local/ficture/preprocess/{resources/usr/bin => templates}/ficture_preprocess.py (83%)
 mode change 100755 => 100644
 rename modules/local/segger/create_dataset/{resources/usr/bin => templates}/run_create_dataset.py (87%)
 mode change 100755 => 100644
 rename modules/local/segger/predict/{resources/usr/bin => templates}/run_predict.py (83%)
 mode change 100755 => 100644
 rename modules/local/spatialdata/merge/{resources/usr/bin => templates}/spatialdata_merge.py (55%)
 mode change 100755 => 100644
 rename modules/local/spatialdata/meta/{resources/usr/bin => templates}/spatialdata_meta.py (78%)
 mode change 100755 => 100644
 rename modules/local/spatialdata/write/{resources/usr/bin => templates}/spatialdata_write.py (78%)
 mode change 100755 => 100644
 rename modules/local/utility/convert_mask_uint32/{resources/usr/bin => templates}/convert_mask_uint32.py (60%)
 mode change 100755 => 100644
 rename modules/local/utility/downscale_morphology/{resources/usr/bin => templates}/downscale_morphology.py (79%)
 mode change 100755 => 100644
 rename modules/local/utility/extract_dapi/{resources/usr/bin => templates}/extract_dapi.py (60%)
 mode change 100755 => 100644
 rename modules/local/utility/extract_preview_data/{resources/usr/bin => templates}/extract_data.py (90%)
 mode change 100755 => 100644
 rename modules/local/utility/get_coordinates/{resources/usr/bin => templates}/get_coordinates.py (71%)
 mode change 100755 => 100644
 rename modules/local/utility/parquet_to_csv/{resources/usr/bin => templates}/parquet_to_csv.py (57%)
 mode change 100755 => 100644
 rename modules/local/utility/resize_tif/{resources/usr/bin => templates}/resize_tif.py (80%)
 mode change 100755 => 100644
 rename modules/local/utility/segger2xr/{resources/usr/bin => templates}/segger2xr.py (90%)
 mode change 100755 => 100644
 rename modules/local/utility/split_transcripts/{resources/usr/bin => templates}/split_transcripts.py (68%)
 mode change 100755 => 100644
 rename modules/local/utility/upscale_mask/{resources/usr/bin => templates}/upscale_mask.py (71%)
 mode change 100755 => 100644
 delete mode 100755 modules/local/xenium_patch/stitch/resources/usr/bin/stitch_postprocess.py
 rename modules/local/xenium_patch/stitch/{resources/usr/bin/stitch_transcripts.py => templates/stitch.py} (85%)
 mode change 100755 => 100644
diff --git a/modules/local/baysor/create_dataset/main.nf b/modules/local/baysor/create_dataset/main.nf
index d4e0043e..91aa67a0 100644
--- a/modules/local/baysor/create_dataset/main.nf
+++ b/modules/local/baysor/create_dataset/main.nf
@@ -23,12 +23,7 @@ process BAYSOR_CREATE_DATASET {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    """
-    create_dataset.py \\
-        --transcripts ${transcripts} \\
-        --sample-fraction ${sample_fraction} \\
-        --prefix ${prefix}
-    """
+    template 'create_dataset.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/baysor/create_dataset/resources/usr/bin/create_dataset.py b/modules/local/baysor/create_dataset/templates/create_dataset.py
old mode 100755
new mode 100644
similarity index 76%
rename from modules/local/baysor/create_dataset/resources/usr/bin/create_dataset.py
rename to modules/local/baysor/create_dataset/templates/create_dataset.py
index 4e5a263a..bbf05320
--- a/modules/local/baysor/create_dataset/resources/usr/bin/create_dataset.py
+++ b/modules/local/baysor/create_dataset/templates/create_dataset.py
@@ -6,12 +6,16 @@
 writing the result to a new CSV file.
 """
 
-import argparse
 import csv
 import os
 import random
 from pathlib import Path
 
+# Nextflow-injected variables
+TRANSCRIPTS = "${transcripts}"
+SAMPLE_FRACTION = "${sample_fraction}"
+PREFIX = "${prefix}"
+
 
 class BaysorPreview():
     """
@@ -62,31 +66,14 @@ def main() -> None:
     """
     Run create dataset as nf module
     """
-    parser = argparse.ArgumentParser(
-        description="Create sampled dataset for Baysor preview"
-    )
-    parser.add_argument(
-        "--transcripts", required=True,
-        help="Path to transcripts CSV file"
-    )
-    parser.add_argument(
-        "--sample-fraction", required=True, type=float,
-        help="Fraction of rows to sample"
-    )
-    parser.add_argument(
-        "--prefix", required=True,
-        help="Output directory prefix"
-    )
-    args = parser.parse_args()
-
     sampled_transcripts = "sampled_transcripts.csv"
 
     # generate dataset
     BaysorPreview.generate_dataset(
-        transcripts=args.transcripts,
+        transcripts=TRANSCRIPTS,
         sampled_transcripts=sampled_transcripts,
-        sample_fraction=args.sample_fraction,
-        prefix=args.prefix
+        sample_fraction=float(SAMPLE_FRACTION),
+        prefix=PREFIX,
     )
 
     return None
diff --git a/modules/local/baysor/preprocess/main.nf b/modules/local/baysor/preprocess/main.nf
index 7b3c6ac8..b11479d1 100644
--- a/modules/local/baysor/preprocess/main.nf
+++ b/modules/local/baysor/preprocess/main.nf
@@ -29,16 +29,7 @@ process BAYSOR_PREPROCESS_TRANSCRIPTS {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    """
-    preprocess_transcripts.py \\
-        --transcripts ${transcripts} \\
-        --prefix ${prefix} \\
-        --min-qv ${min_qv} \\
-        --min-x ${min_x} \\
-        --max-x ${max_x} \\
-        --min-y ${min_y} \\
-        --max-y ${max_y}
-    """
+    template 'preprocess_transcripts.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/baysor/preprocess/resources/usr/bin/preprocess_transcripts.py b/modules/local/baysor/preprocess/templates/preprocess_transcripts.py
old mode 100755
new mode 100644
similarity index 66%
rename from modules/local/baysor/preprocess/resources/usr/bin/preprocess_transcripts.py
rename to modules/local/baysor/preprocess/templates/preprocess_transcripts.py
index 2662f83c..25bc41dc
--- a/modules/local/baysor/preprocess/resources/usr/bin/preprocess_transcripts.py
+++ b/modules/local/baysor/preprocess/templates/preprocess_transcripts.py
@@ -6,11 +6,19 @@
 removes negative control probes, and outputs filtered CSV for Baysor compatibility.
 """
 
-import argparse
 import os
 
 import pandas as pd
 
+# Nextflow-injected variables
+TRANSCRIPTS = "${transcripts}"
+PREFIX = "${prefix}"
+MIN_QV = "${min_qv}"
+MIN_X = "${min_x}"
+MAX_X = "${max_x}"
+MIN_Y = "${min_y}"
+MAX_Y = "${max_y}"
+
 
 def filter_transcripts(
     transcripts: str,
@@ -70,53 +78,14 @@ def main() -> None:
     """
     Run preprocess transcripts as nf module.
     """
-    parser = argparse.ArgumentParser(
-        description="Preprocess Xenium transcripts for Baysor"
-    )
-    parser.add_argument(
-        "--transcripts", required=True, help="Path to transcripts parquet file"
-    )
-    parser.add_argument("--prefix", required=True, help="Output directory prefix")
-    parser.add_argument(
-        "--min-qv",
-        type=float,
-        default=20.0,
-        help="Minimum Q-Score threshold (default: 20.0)",
-    )
-    parser.add_argument(
-        "--min-x",
-        type=float,
-        default=0.0,
-        help="Minimum x-coordinate threshold (default: 0.0)",
-    )
-    parser.add_argument(
-        "--max-x",
-        type=float,
-        default=24000.0,
-        help="Maximum x-coordinate threshold (default: 24000.0)",
-    )
-    parser.add_argument(
-        "--min-y",
-        type=float,
-        default=0.0,
-        help="Minimum y-coordinate threshold (default: 0.0)",
-    )
-    parser.add_argument(
-        "--max-y",
-        type=float,
-        default=24000.0,
-        help="Maximum y-coordinate threshold (default: 24000.0)",
-    )
-    args = parser.parse_args()
-
     filter_transcripts(
-        transcripts=args.transcripts,
-        min_qv=args.min_qv,
-        min_x=args.min_x,
-        max_x=args.max_x,
-        min_y=args.min_y,
-        max_y=args.max_y,
-        prefix=args.prefix,
+        transcripts=TRANSCRIPTS,
+        min_qv=float(MIN_QV),
+        min_x=float(MIN_X),
+        max_x=float(MAX_X),
+        min_y=float(MIN_Y),
+        max_y=float(MAX_Y),
+        prefix=PREFIX,
     )
 
     return None
diff --git a/modules/local/ficture/preprocess/main.nf b/modules/local/ficture/preprocess/main.nf
index 7ec3c081..76005c10 100644
--- a/modules/local/ficture/preprocess/main.nf
+++ b/modules/local/ficture/preprocess/main.nf
@@ -21,15 +21,8 @@ process FICTURE_PREPROCESS {
 
     script:
     def args = task.ext.args ?: ''
-    def features_arg = features ? "--features ${features}" : ""
 
-    """
-    ficture_preprocess.py \\
-        --transcripts ${transcripts} \\
-        ${features_arg} \\
-        --negative-control-regex '${params.negative_control_regex}' \\
-        ${args}
-    """
+    template 'ficture_preprocess.py'
 
     stub:
     """
diff --git a/modules/local/ficture/preprocess/resources/usr/bin/ficture_preprocess.py b/modules/local/ficture/preprocess/templates/ficture_preprocess.py
old mode 100755
new mode 100644
similarity index 83%
rename from modules/local/ficture/preprocess/resources/usr/bin/ficture_preprocess.py
rename to modules/local/ficture/preprocess/templates/ficture_preprocess.py
index 2e0c687c..92ae9721
--- a/modules/local/ficture/preprocess/resources/usr/bin/ficture_preprocess.py
+++ b/modules/local/ficture/preprocess/templates/ficture_preprocess.py
@@ -1,18 +1,40 @@
 #!/usr/bin/env python3
 """Preprocess Xenium transcripts for FICTURE analysis."""
 
-import argparse
 import gzip
 import logging
 import os
 import re
+import shlex
 import sys
 
 import pandas as pd
 
+# Nextflow-injected variables
+TRANSCRIPTS = "${transcripts}"
+FEATURES = "${features}"
+NEGATIVE_CONTROL_REGEX = "${params.negative_control_regex}"
+ARGS = "${args}"
+
+
+def main():
+    """Run FICTURE preprocessing."""
+    print("[START]")
+
+    # Re-build argv so parse_known_args() can absorb any extra flags from task.ext.args
+    sys.argv = [
+        sys.argv[0],
+        "--transcripts",
+        TRANSCRIPTS,
+        "--negative-control-regex",
+        NEGATIVE_CONTROL_REGEX,
+    ]
+    if FEATURES:
+        sys.argv += ["--features", FEATURES]
+    sys.argv += shlex.split(ARGS)
+
+    import argparse
 
-def parse_args():
-    """Parse command-line arguments."""
     parser = argparse.ArgumentParser(
         description="Preprocess Xenium transcripts for FICTURE"
     )
@@ -25,13 +47,7 @@ def parse_args():
     parser.add_argument(
         "--negative-control-regex", default="", help="Regex for negative control probes"
     )
-    return parser.parse_args()
-
-
-def main():
-    """Run FICTURE preprocessing."""
-    args = parse_args()
-    print("[START]")
+    args, _ = parser.parse_known_args()
 
     negctrl_regex = "BLANK|NegCon"
     if args.negative_control_regex:
diff --git a/modules/local/segger/create_dataset/main.nf b/modules/local/segger/create_dataset/main.nf
index 81320eff..2b03d2c9 100644
--- a/modules/local/segger/create_dataset/main.nf
+++ b/modules/local/segger/create_dataset/main.nf
@@ -29,19 +29,7 @@ process SEGGER_CREATE_DATASET {
         error("${params.format} is an invalid platform type.")
     }
 
-    """
-    export NUMBA_CACHE_DIR=\$PWD/.numba_cache
-    mkdir -p \$NUMBA_CACHE_DIR
-
-    run_create_dataset.py \\
-        --bundle-dir ${base_dir} \\
-        --output-dir ${prefix} \\
-        --sample-type ${params.format} \\
-        --tile-width ${params.tile_width} \\
-        --tile-height ${params.tile_height} \\
-        --n-workers ${task.cpus} \\
-        ${args}
-    """
+    template 'run_create_dataset.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/segger/create_dataset/resources/usr/bin/run_create_dataset.py b/modules/local/segger/create_dataset/templates/run_create_dataset.py
old mode 100755
new mode 100644
similarity index 87%
rename from modules/local/segger/create_dataset/resources/usr/bin/run_create_dataset.py
rename to modules/local/segger/create_dataset/templates/run_create_dataset.py
index c73ab006..28df3cff
--- a/modules/local/segger/create_dataset/resources/usr/bin/run_create_dataset.py
+++ b/modules/local/segger/create_dataset/templates/run_create_dataset.py
@@ -11,8 +11,14 @@
 Each WORKAROUND should be removable when the upstream segger bug is fixed.
 """
 
-import argparse
+# ruff: noqa: E402  -- NUMBA_CACHE_DIR must be set before subsequent imports
+# (segger / torch transitively pull in numba). Order matters; do not reorder.
 import os
+
+os.environ["NUMBA_CACHE_DIR"] = os.path.join(os.getcwd(), ".numba_cache")
+os.makedirs(os.environ["NUMBA_CACHE_DIR"], exist_ok=True)
+
+import shlex
 import shutil
 import subprocess
 import sys
@@ -26,18 +32,14 @@
 
 SEGGER_CLI = "/workspace/segger_dev/src/segger/cli/create_dataset_fast.py"
 
-
-def parse_args():
-    p = argparse.ArgumentParser()
-    p.add_argument("--bundle-dir", required=True)
-    p.add_argument("--output-dir", required=True)
-    p.add_argument("--sample-type", required=True, choices=["xenium"])
-    p.add_argument("--tile-width", type=int, required=True)
-    p.add_argument("--tile-height", type=int, required=True)
-    p.add_argument("--n-workers", type=int, required=True)
-    # remaining args forwarded to segger CLI
-    args, extra = p.parse_known_args()
-    return args, extra
+# Nextflow-injected variables
+BUNDLE_DIR = "${base_dir}"
+OUTPUT_DIR = "${prefix}"
+SAMPLE_TYPE = "${params.format}"
+TILE_WIDTH = "${params.tile_width}"
+TILE_HEIGHT = "${params.tile_height}"
+N_WORKERS = "${task.cpus}"
+ARGS = "${args}"
 
 
 def prepare_bundle(bundle_dir):
@@ -110,22 +112,22 @@ def add_parquet_stats():
     print("=== End Debug ===\n")
 
 
-def run_segger_cli(args, extra):
+def run_segger_cli(output_dir, sample_type, tile_width, tile_height, n_workers, extra):
     cmd = [
         "python3",
         SEGGER_CLI,
         "--base_dir",
         "bundle_stats",
         "--data_dir",
-        args.output_dir,
+        output_dir,
         "--sample_type",
-        args.sample_type,
+        sample_type,
         "--tile_width",
-        str(args.tile_width),
+        str(tile_width),
         "--tile_height",
-        str(args.tile_height),
+        str(tile_height),
         "--n_workers",
-        str(args.n_workers),
+        str(n_workers),
         *extra,
     ]
     print(f"Running: {' '.join(cmd)}")
@@ -228,13 +230,9 @@ def fix_bd_x_nan(prefix):
 
 
 def main():
-    args, extra = parse_args()
+    extra = shlex.split(ARGS)
 
-    # Ensure numba cache dir is writable (env var should be set by caller, but belt-and-suspenders)
-    os.environ.setdefault("NUMBA_CACHE_DIR", os.path.join(os.getcwd(), ".numba_cache"))
-    os.makedirs(os.environ["NUMBA_CACHE_DIR"], exist_ok=True)
-
-    prepare_bundle(args.bundle_dir)
+    prepare_bundle(BUNDLE_DIR)
     print("Adding statistics to parquet files...")
     add_parquet_stats()
 
@@ -243,10 +241,17 @@ def main():
     for item in sorted(Path("bundle_stats").iterdir()):
         print(f"  {item.name}")
 
-    run_segger_cli(args, extra)
+    run_segger_cli(
+        OUTPUT_DIR,
+        SAMPLE_TYPE,
+        int(TILE_WIDTH),
+        int(TILE_HEIGHT),
+        int(N_WORKERS),
+        extra,
+    )
 
-    filter_trainable_tiles_if_needed(args.output_dir)
-    fix_bd_x_nan(args.output_dir)
+    filter_trainable_tiles_if_needed(OUTPUT_DIR)
+    fix_bd_x_nan(OUTPUT_DIR)
 
 
 if __name__ == "__main__":
diff --git a/modules/local/segger/predict/main.nf b/modules/local/segger/predict/main.nf
index 0da7a594..d8384c74 100644
--- a/modules/local/segger/predict/main.nf
+++ b/modules/local/segger/predict/main.nf
@@ -26,18 +26,8 @@ process SEGGER_PREDICT {
 
     def args = task.ext.args ?: ''
     prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    run_predict.py \\
-        --models-dir ${models_dir} \\
-        --segger-data-dir ${segger_dataset} \\
-        --transcripts-file ${transcripts} \\
-        --benchmarks-dir benchmarks_dir \\
-        --batch-size ${params.batch_size_predict} \\
-        --use-cc ${params.cc_analysis} \\
-        --knn-method ${params.segger_knn_method} \\
-        --num-workers ${task.cpus} \\
-        ${args}
-    """
+
+    template 'run_predict.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/segger/predict/resources/usr/bin/run_predict.py b/modules/local/segger/predict/templates/run_predict.py
old mode 100755
new mode 100644
similarity index 83%
rename from modules/local/segger/predict/resources/usr/bin/run_predict.py
rename to modules/local/segger/predict/templates/run_predict.py
index 56a77ffc..ca2051a7
--- a/modules/local/segger/predict/resources/usr/bin/run_predict.py
+++ b/modules/local/segger/predict/templates/run_predict.py
@@ -12,12 +12,23 @@
 
 import argparse
 import os
+import shlex
 import subprocess
 import sys
 
 
 SEGGER_CLI = "/workspace/segger_dev/src/segger/cli/predict_fast.py"
 
+# Nextflow-injected variables
+MODELS_DIR = "${models_dir}"
+SEGGER_DATASET = "${segger_dataset}"
+TRANSCRIPTS = "${transcripts}"
+BATCH_SIZE = "${params.batch_size_predict}"
+USE_CC = "${params.cc_analysis}"
+KNN_METHOD = "${params.segger_knn_method}"
+NUM_WORKERS = "${task.cpus}"
+ARGS = "${args}"
+
 
 def parse_args():
     p = argparse.ArgumentParser()
@@ -113,6 +124,27 @@ def run_segger_cli(args, extra, gpu_ids):
 
 
 def main():
+    # Re-build argv from Nextflow-injected constants + any extra task.ext.args flags
+    sys.argv = [
+        sys.argv[0],
+        "--models-dir",
+        MODELS_DIR,
+        "--segger-data-dir",
+        SEGGER_DATASET,
+        "--transcripts-file",
+        TRANSCRIPTS,
+        "--benchmarks-dir",
+        "benchmarks_dir",
+        "--batch-size",
+        BATCH_SIZE,
+        "--use-cc",
+        USE_CC,
+        "--knn-method",
+        KNN_METHOD,
+        "--num-workers",
+        NUM_WORKERS,
+    ] + shlex.split(ARGS)
+
     args, extra = parse_args()
 
     # Limit cupy GPU memory to 80% so PyTorch has headroom for graph attention ops
diff --git a/modules/local/spatialdata/merge/main.nf b/modules/local/spatialdata/merge/main.nf
index 9a57f877..9ff4b112 100644
--- a/modules/local/spatialdata/merge/main.nf
+++ b/modules/local/spatialdata/merge/main.nf
@@ -25,13 +25,7 @@ process SPATIALDATA_MERGE {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    """
-    spatialdata_merge.py \\
-        --raw-bundle ${raw_bundle} \\
-        --redefined-bundle ${redefined_bundle} \\
-        --prefix ${prefix} \\
-        --output-folder ${outputfolder}
-    """
+    template 'spatialdata_merge.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/spatialdata/merge/resources/usr/bin/spatialdata_merge.py b/modules/local/spatialdata/merge/templates/spatialdata_merge.py
old mode 100755
new mode 100644
similarity index 55%
rename from modules/local/spatialdata/merge/resources/usr/bin/spatialdata_merge.py
rename to modules/local/spatialdata/merge/templates/spatialdata_merge.py
index 409d8c00..cd55e00c
--- a/modules/local/spatialdata/merge/resources/usr/bin/spatialdata_merge.py
+++ b/modules/local/spatialdata/merge/templates/spatialdata_merge.py
@@ -1,45 +1,36 @@
 #!/usr/bin/env python3
 """Merge two spatialdata bundles to create a layered spatialdata object."""
 
-import argparse
 import json
 import os
 import shutil
 
-import spatialdata
+import spatialdata  # noqa: F401  (kept so versions topic via `import spatialdata` is valid)
 
-
-def parse_args():
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(description="Merge two spatialdata bundles")
-    parser.add_argument("--raw-bundle", required=True, help="Path to raw spatialdata bundle")
-    parser.add_argument("--redefined-bundle", required=True, help="Path to redefined spatialdata bundle")
-    parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)")
-    parser.add_argument("--output-folder", required=True, help="Output folder name")
-    return parser.parse_args()
+# Nextflow-injected variables
+RAW_BUNDLE = "${raw_bundle}"
+REDEFINED_BUNDLE = "${redefined_bundle}"
+PREFIX = "${prefix}"
+OUTPUT_FOLDER = "${outputfolder}"
 
 
 def main():
     """Run spatialdata merge."""
-    args = parse_args()
     print("[START]")
 
-    output_dir = f"spatialdata/{args.prefix}/{args.output_folder}"
+    output_dir = f"spatialdata/{PREFIX}/{OUTPUT_FOLDER}"
 
-    # Ensure the output folder exists
     if os.path.exists(output_dir):
         shutil.rmtree(output_dir)
     os.makedirs(output_dir)
 
-    # Copy the entire reference bundle as is
-    for root, _, files in os.walk(args.raw_bundle):
-        rel_path = os.path.relpath(root, args.raw_bundle)
+    for root, _, files in os.walk(RAW_BUNDLE):
+        rel_path = os.path.relpath(root, RAW_BUNDLE)
         target_path = os.path.join(output_dir, rel_path)
         os.makedirs(target_path, exist_ok=True)
         for file in files:
             shutil.copy(os.path.join(root, file), os.path.join(target_path, file))
 
-    # Rename folders in Points, Shapes, and Tables to raw_*
     for category in ["points", "shapes", "tables"]:
         category_path = os.path.join(output_dir, category)
         if os.path.exists(category_path):
@@ -49,9 +40,8 @@ def main():
                 new_path = os.path.join(category_path, f"raw_{folder}")
                 os.rename(old_path, new_path)
 
-    # Copy folders from redefined_bundle and rename them as redefined_*
     for category in ["points", "shapes", "tables"]:
-        add_category_path = os.path.join(args.redefined_bundle, category)
+        add_category_path = os.path.join(REDEFINED_BUNDLE, category)
         output_category_path = os.path.join(output_dir, category)
         os.makedirs(output_category_path, exist_ok=True)
 
@@ -62,17 +52,16 @@ def main():
                 shutil.copytree(src_folder, dest_folder)
 
     # Invalidate consolidated metadata in zarr.json -- the directory renames above
-    # made the element paths in the metadata stale (e.g., 'points/transcripts' ->
-    # 'points/raw_transcripts'). Without consolidated metadata, sd.read_zarr()
-    # discovers elements by scanning the filesystem directly.
+    # made the element paths in the metadata stale. Without consolidated metadata,
+    # sd.read_zarr() discovers elements by scanning the filesystem directly.
     zarr_json = os.path.join(output_dir, "zarr.json")
     if os.path.exists(zarr_json):
         with open(zarr_json) as f:
-            meta = json.load(f)
-        if "consolidated_metadata" in meta:
-            del meta["consolidated_metadata"]
+            meta_obj = json.load(f)
+        if "consolidated_metadata" in meta_obj:
+            del meta_obj["consolidated_metadata"]
             with open(zarr_json, "w") as f:
-                json.dump(meta, f)
+                json.dump(meta_obj, f)
             print("[NOTE] Removed stale consolidated metadata from zarr.json")
 
     print("[FINISH]")
diff --git a/modules/local/spatialdata/meta/main.nf b/modules/local/spatialdata/meta/main.nf
index 40e2efae..c3665b1f 100644
--- a/modules/local/spatialdata/meta/main.nf
+++ b/modules/local/spatialdata/meta/main.nf
@@ -25,14 +25,7 @@ process SPATIALDATA_META {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    """
-    spatialdata_meta.py \\
-        --spatialdata-bundle ${spatialdata_bundle} \\
-        --xenium-bundle ${xenium_bundle} \\
-        --prefix ${prefix} \\
-        --metadata '${meta}' \\
-        --output-folder ${outputfolder}
-    """
+    template 'spatialdata_meta.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/spatialdata/meta/resources/usr/bin/spatialdata_meta.py b/modules/local/spatialdata/meta/templates/spatialdata_meta.py
old mode 100755
new mode 100644
similarity index 78%
rename from modules/local/spatialdata/meta/resources/usr/bin/spatialdata_meta.py
rename to modules/local/spatialdata/meta/templates/spatialdata_meta.py
index 935f39b2..e7f4d3d2
--- a/modules/local/spatialdata/meta/resources/usr/bin/spatialdata_meta.py
+++ b/modules/local/spatialdata/meta/templates/spatialdata_meta.py
@@ -1,13 +1,12 @@
 #!/usr/bin/env python3
 """Add metadata to SpatialData bundle."""
 
-import argparse
 import json
 import sys
 
 import pandas as pd
 import spatialdata as sd
-import zarr
+import zarr  # noqa: F401  (preserved from original; ensures zarr is loaded before zarr.core.group)
 
 # Fix zarr v3 + anndata + numcodecs incompatibility:
 # anndata's string writer passes numcodecs.VLenUTF8 to zarr.Group.create_array,
@@ -17,6 +16,13 @@
 import numcodecs
 import zarr.core.group as _zarr_group
 
+# Nextflow-injected variables
+SPATIALDATA_BUNDLE = "${spatialdata_bundle}"
+XENIUM_BUNDLE = "${xenium_bundle}"
+PREFIX = "${prefix}"
+METADATA = "${meta}"
+OUTPUT_FOLDER = "${outputfolder}"
+
 _orig_create_array = _zarr_group.Group.create_array
 
 
@@ -70,27 +76,15 @@ def convert_arrow_to_numpy(sdata):
         _convert_df_arrow_to_numpy(adata.var)
 
 
-def parse_args():
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(description="Add metadata to SpatialData bundle")
-    parser.add_argument("--spatialdata-bundle", required=True, help="Path to spatialdata bundle")
-    parser.add_argument("--xenium-bundle", required=True, help="Path to xenium bundle")
-    parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)")
-    parser.add_argument("--metadata", required=True, help="Metadata string from Nextflow meta map")
-    parser.add_argument("--output-folder", required=True, help="Output folder name")
-    return parser.parse_args()
-
-
 def main():
     """Run spatialdata metadata addition."""
-    args = parse_args()
     print("[START]")
 
-    sdata = sd.read_zarr(args.spatialdata_bundle)
+    sdata = sd.read_zarr(SPATIALDATA_BUNDLE)
 
     # Convert metadata into dict
     print("[NOTE] Read in provenance ...")
-    metadata = args.metadata.strip("[]")  # Remove square brackets
+    metadata = METADATA.strip("[]")  # Remove square brackets
     pairs = metadata.split(", ")  # Split by comma and space
     metadata = {k: v for k, v in (pair.split(":") for pair in pairs)}  # Create dictionary
 
@@ -103,7 +97,7 @@ def main():
     # Add experimental metadata
     print("[NOTE] Read in experiment metadata ...")
     sdata['raw_table'].uns['experiment_xenium'] = ''
-    metadata_experiment = f'{args.xenium_bundle}/experiment.xenium'
+    metadata_experiment = f'{XENIUM_BUNDLE}/experiment.xenium'
     with open(metadata_experiment, "r") as f:
         metadata_experiment = json.load(f)
         sdata['raw_table'].uns['experiment_xenium'] = json.dumps(metadata_experiment)
@@ -111,13 +105,13 @@ def main():
     # Add gene panel metadata
     print("[NOTE] Read in gene panel metadata ...")
     sdata['raw_table'].uns['gene_panel'] = ''
-    metadata_gene_panel = f'{args.xenium_bundle}/gene_panel.json'
+    metadata_gene_panel = f'{XENIUM_BUNDLE}/gene_panel.json'
     with open(metadata_gene_panel, "r") as f:
         metadata_gene_panel = json.load(f)
         sdata['raw_table'].uns['gene_panel'] = json.dumps(metadata_gene_panel)
 
     convert_arrow_to_numpy(sdata)
-    sdata.write(f"spatialdata/{args.prefix}/{args.output_folder}", overwrite=True, consolidate_metadata=True, sdata_formats=None)
+    sdata.write(f"spatialdata/{PREFIX}/{OUTPUT_FOLDER}", overwrite=True, consolidate_metadata=True, sdata_formats=None)
 
     print("[FINISH]")
 
diff --git a/modules/local/spatialdata/write/main.nf b/modules/local/spatialdata/write/main.nf
index 43acc073..1144b7c9 100644
--- a/modules/local/spatialdata/write/main.nf
+++ b/modules/local/spatialdata/write/main.nf
@@ -27,15 +27,7 @@ process SPATIALDATA_WRITE {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    """
-    spatialdata_write.py \\
-        --bundle ${bundle} \\
-        --prefix ${prefix} \\
-        --output-folder ${outputfolder} \\
-        --segmented-object ${segmented_object} \\
-        --coordinate-space ${coordinate_space} \\
-        --format ${params.format}
-    """
+    template 'spatialdata_write.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/spatialdata/write/resources/usr/bin/spatialdata_write.py b/modules/local/spatialdata/write/templates/spatialdata_write.py
old mode 100755
new mode 100644
similarity index 78%
rename from modules/local/spatialdata/write/resources/usr/bin/spatialdata_write.py
rename to modules/local/spatialdata/write/templates/spatialdata_write.py
index 421e830f..9428dccd
--- a/modules/local/spatialdata/write/resources/usr/bin/spatialdata_write.py
+++ b/modules/local/spatialdata/write/templates/spatialdata_write.py
@@ -1,11 +1,10 @@
 #!/usr/bin/env python3
 """Write spatialdata object from segmentation format."""
 
-import argparse
 import sys
 
 import pandas as pd
-import spatialdata
+import spatialdata  # noqa: F401  (preserved from original; ensures spatialdata loads before spatialdata_io)
 from spatialdata_io import xenium
 
 # Fix zarr v3 + anndata + numcodecs incompatibility:
@@ -16,6 +15,14 @@
 import numcodecs
 import zarr.core.group as _zarr_group
 
+# Nextflow-injected variables
+BUNDLE = "${bundle}"
+PREFIX = "${prefix}"
+OUTPUT_FOLDER = "${outputfolder}"
+SEGMENTED_OBJECT = "${segmented_object}"
+COORDINATE_SPACE = "${coordinate_space}"
+FORMAT = "${params.format}"
+
 _orig_create_array = _zarr_group.Group.create_array
 
 
@@ -77,21 +84,8 @@ def convert_arrow_to_numpy(sdata):
         _convert_df_arrow_to_numpy(adata.var)
 
 
-def parse_args():
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(description="Write spatialdata object from segmentation format")
-    parser.add_argument("--bundle", required=True, help="Path to input bundle")
-    parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)")
-    parser.add_argument("--output-folder", required=True, help="Output folder name")
-    parser.add_argument("--segmented-object", required=True, help="Segmented object type (cells, nuclei, cells_and_nuclei)")
-    parser.add_argument("--coordinate-space", required=True, help="Coordinate space (pixels, microns)")
-    parser.add_argument("--format", required=True, help="Input format (xenium)")
-    return parser.parse_args()
-
-
 def main():
     """Run spatialdata write."""
-    args = parse_args()
     print("[START]")
 
     cells_as_circles = False
@@ -100,13 +94,13 @@ def main():
     cells_labels = False
     nucleus_labels = False
 
-    if args.segmented_object == "cells":
+    if SEGMENTED_OBJECT == "cells":
         cells_boundaries = True
         cells_labels = True
-    elif args.segmented_object == "nuclei":
+    elif SEGMENTED_OBJECT == "nuclei":
         nucleus_boundaries = True
         nucleus_labels = True
-    elif args.segmented_object == "cells_and_nuclei":
+    elif SEGMENTED_OBJECT == "cells_and_nuclei":
         cells_boundaries = True
         nucleus_boundaries = True
         cells_labels = True
@@ -115,7 +109,7 @@ def main():
         cells_as_circles = False
 
     # set sd variables based on the coordinate space
-    if args.coordinate_space == "pixels":
+    if COORDINATE_SPACE == "pixels":
         cells_labels = True
         nucleus_labels = True
         # Labels are sufficient in pixel space; boundaries can contain
@@ -124,16 +118,16 @@ def main():
         cells_boundaries = False
         nucleus_boundaries = False
 
-    if args.coordinate_space == "microns":
+    if COORDINATE_SPACE == "microns":
         cells_labels = False
         cells_boundaries = True
         nucleus_boundaries = False
         nucleus_labels = False
         cells_as_circles = False
 
-    if args.format == "xenium":
+    if FORMAT == "xenium":
         sd_xenium_obj = xenium(
-            args.bundle,
+            BUNDLE,
             cells_as_circles=cells_as_circles,
             cells_boundaries=cells_boundaries,
             nucleus_boundaries=nucleus_boundaries,
@@ -145,7 +139,7 @@ def main():
         )
         print(sd_xenium_obj)
         convert_arrow_to_numpy(sd_xenium_obj)
-        sd_xenium_obj.write(f"spatialdata/{args.prefix}/{args.output_folder}")
+        sd_xenium_obj.write(f"spatialdata/{PREFIX}/{OUTPUT_FOLDER}")
     else:
         sys.exit("[ERROR] Format not found")
 
diff --git a/modules/local/utility/convert_mask_uint32/main.nf b/modules/local/utility/convert_mask_uint32/main.nf
index 3f0333a7..b80acfad 100644
--- a/modules/local/utility/convert_mask_uint32/main.nf
+++ b/modules/local/utility/convert_mask_uint32/main.nf
@@ -34,11 +34,8 @@ process CONVERT_MASK_UINT32 {
 
     script:
     prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    convert_mask_uint32.py \\
-        --input ${mask} \\
-        --output ${prefix}_uint32_mask.tif
-    """
+
+    template 'convert_mask_uint32.py'
 
     stub:
     prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/utility/convert_mask_uint32/resources/usr/bin/convert_mask_uint32.py b/modules/local/utility/convert_mask_uint32/templates/convert_mask_uint32.py
old mode 100755
new mode 100644
similarity index 60%
rename from modules/local/utility/convert_mask_uint32/resources/usr/bin/convert_mask_uint32.py
rename to modules/local/utility/convert_mask_uint32/templates/convert_mask_uint32.py
index 955ad4b7..f0a6835d
--- a/modules/local/utility/convert_mask_uint32/resources/usr/bin/convert_mask_uint32.py
+++ b/modules/local/utility/convert_mask_uint32/templates/convert_mask_uint32.py
@@ -7,11 +7,13 @@
 the input mask, casts it to uint32, and writes the result.
 """
 
-import argparse
-
 import numpy as np
 import tifffile
 
+# Nextflow-injected variables
+INPUT_PATH = "${mask}"
+OUTPUT_PATH = "${prefix}_uint32_mask.tif"
+
 
 def convert_mask_to_uint32(input_path: str, output_path: str) -> None:
     """
@@ -27,20 +29,5 @@ def convert_mask_to_uint32(input_path: str, output_path: str) -> None:
     print("Output dtype: uint32")
 
 
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Convert a segmentation mask TIFF to uint32 dtype."
-    )
-    parser.add_argument(
-        "--input", required=True, help="Path to input mask TIFF"
-    )
-    parser.add_argument(
-        "--output", required=True, help="Path where uint32 mask will be written"
-    )
-    return parser.parse_args()
-
-
 if __name__ == "__main__":
-    args = parse_args()
-    convert_mask_to_uint32(input_path=args.input, output_path=args.output)
+    convert_mask_to_uint32(input_path=INPUT_PATH, output_path=OUTPUT_PATH)
diff --git a/modules/local/utility/downscale_morphology/main.nf b/modules/local/utility/downscale_morphology/main.nf
index edaf3d67..39620bd6 100644
--- a/modules/local/utility/downscale_morphology/main.nf
+++ b/modules/local/utility/downscale_morphology/main.nf
@@ -40,13 +40,8 @@ process DOWNSCALE_MORPHOLOGY {
     def diameter = task.ext.diameter ?: 9
     def diam_mean = 30
     prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    downscale_morphology.py \\
-        --image ${image} \\
-        --diameter ${diameter} \\
-        --diam-mean ${diam_mean} \\
-        --prefix ${prefix}
-    """
+
+    template 'downscale_morphology.py'
 
     stub:
     prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/utility/downscale_morphology/resources/usr/bin/downscale_morphology.py b/modules/local/utility/downscale_morphology/templates/downscale_morphology.py
old mode 100755
new mode 100644
similarity index 79%
rename from modules/local/utility/downscale_morphology/resources/usr/bin/downscale_morphology.py
rename to modules/local/utility/downscale_morphology/templates/downscale_morphology.py
index 8544ecf3..2f0f1e7d
--- a/modules/local/utility/downscale_morphology/resources/usr/bin/downscale_morphology.py
+++ b/modules/local/utility/downscale_morphology/templates/downscale_morphology.py
@@ -13,13 +13,18 @@
     {prefix}/scale_info.json - Scale factor and original/new dimensions.
 """
 
-import argparse
 import json
 from pathlib import Path
 
 import tifffile
 from skimage.transform import resize
 
+# Nextflow-injected variables
+IMAGE_PATH = "${image}"
+DIAMETER = float("${diameter}")
+DIAM_MEAN = float("${diam_mean}")
+PREFIX = "${prefix}"
+
 # Cellpose network requires a minimum spatial size of 256 px.
 MIN_DIM = 256
 
@@ -81,23 +86,10 @@ def downscale_image(
     print(f"Done: downscaled.tif written, shape={img_ds.shape}")
 
 
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Pre-downscale a morphology image for Cellpose."
-    )
-    parser.add_argument("--image", required=True, help="Morphology TIFF input")
-    parser.add_argument("--diameter", type=float, required=True, help="Target object diameter")
-    parser.add_argument("--diam-mean", type=float, required=True, help="Cellpose model diam_mean")
-    parser.add_argument("--prefix", required=True, help="Output directory")
-    return parser.parse_args()
-
-
 if __name__ == "__main__":
-    args = parse_args()
     downscale_image(
-        image_path=args.image,
-        diameter=args.diameter,
-        diam_mean=args.diam_mean,
-        prefix=args.prefix,
+        image_path=IMAGE_PATH,
+        diameter=DIAMETER,
+        diam_mean=DIAM_MEAN,
+        prefix=PREFIX,
     )
diff --git a/modules/local/utility/extract_dapi/main.nf b/modules/local/utility/extract_dapi/main.nf
index 79cce91f..206e6519 100644
--- a/modules/local/utility/extract_dapi/main.nf
+++ b/modules/local/utility/extract_dapi/main.nf
@@ -35,12 +35,8 @@ process EXTRACT_DAPI {
     script:
     prefix = task.ext.prefix ?: "${meta.id}"
     def channel_index = task.ext.channel_index ?: 0
-    """
-    extract_dapi.py \\
-        --input ${image} \\
-        --output ${prefix}_dapi.tif \\
-        --channel-index ${channel_index}
-    """
+
+    template 'extract_dapi.py'
 
     stub:
     prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/utility/extract_dapi/resources/usr/bin/extract_dapi.py b/modules/local/utility/extract_dapi/templates/extract_dapi.py
old mode 100755
new mode 100644
similarity index 60%
rename from modules/local/utility/extract_dapi/resources/usr/bin/extract_dapi.py
rename to modules/local/utility/extract_dapi/templates/extract_dapi.py
index 3d60f563..f0d9b9f2
--- a/modules/local/utility/extract_dapi/resources/usr/bin/extract_dapi.py
+++ b/modules/local/utility/extract_dapi/templates/extract_dapi.py
@@ -8,10 +8,13 @@
 the requested channel, and writes the result.
 """
 
-import argparse
-
 import tifffile
 
+# Nextflow-injected variables
+INPUT_PATH = "${image}"
+OUTPUT_PATH = "${prefix}_dapi.tif"
+CHANNEL_INDEX = int("${channel_index}")
+
 
 def extract_channel(input_path: str, output_path: str, channel_index: int) -> None:
     """
@@ -34,27 +37,9 @@ def extract_channel(input_path: str, output_path: str, channel_index: int) -> No
     print(f"Input shape: {orig_shape} -> extracted channel {channel_index}: {img.shape}")
 
 
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Extract a single channel from a multi-channel OME-TIFF."
-    )
-    parser.add_argument(
-        "--input", required=True, help="Path to multi-channel OME-TIFF morphology image"
-    )
-    parser.add_argument(
-        "--output", required=True, help="Path where the single-channel TIFF will be written"
-    )
-    parser.add_argument(
-        "--channel-index", type=int, default=0, help="Channel index to extract (default: 0)"
-    )
-    return parser.parse_args()
-
-
 if __name__ == "__main__":
-    args = parse_args()
     extract_channel(
-        input_path=args.input,
-        output_path=args.output,
-        channel_index=args.channel_index,
+        input_path=INPUT_PATH,
+        output_path=OUTPUT_PATH,
+        channel_index=CHANNEL_INDEX,
     )
diff --git a/modules/local/utility/extract_preview_data/main.nf b/modules/local/utility/extract_preview_data/main.nf
index fb07df29..34d997fb 100644
--- a/modules/local/utility/extract_preview_data/main.nf
+++ b/modules/local/utility/extract_preview_data/main.nf
@@ -25,11 +25,7 @@ process EXTRACT_PREVIEW_DATA {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    """
-    extract_data.py \\
-        --preview-html ${preview_html} \\
-        --prefix ${prefix}
-    """
+    template 'extract_data.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/extract_preview_data/resources/usr/bin/extract_data.py b/modules/local/utility/extract_preview_data/templates/extract_data.py
old mode 100755
new mode 100644
similarity index 90%
rename from modules/local/utility/extract_preview_data/resources/usr/bin/extract_data.py
rename to modules/local/utility/extract_preview_data/templates/extract_data.py
index 0ea737c2..d34fedaf
--- a/modules/local/utility/extract_preview_data/resources/usr/bin/extract_data.py
+++ b/modules/local/utility/extract_preview_data/templates/extract_data.py
@@ -6,7 +6,6 @@
 Baysor preview.html file, writing MultiQC-compatible TSV and PNG files.
 """
 
-import argparse
 import base64
 import html
 import json
@@ -18,6 +17,10 @@
 import pandas as pd
 from bs4 import BeautifulSoup
 
+# Nextflow-injected variables
+PREVIEW_HTML = "${preview_html}"
+PREFIX = "${prefix}"
+
 
 def get_png_files(soup: BeautifulSoup, outdir: Path) -> None:
     """Get png base64 images following specific h1 tags in preview.html"""
@@ -160,29 +163,9 @@ def write_tsvs(specs: Dict[str, str], outdir: Path) -> List[Path]:
     return written
 
 
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Extract preview data from Baysor preview HTML reports."
-    )
-    parser.add_argument(
-        "--preview-html",
-        required=True,
-        help="Path to Baysor preview HTML file",
-    )
-    parser.add_argument(
-        "--prefix",
-        required=True,
-        help="Output directory prefix (sample ID)",
-    )
-    return parser.parse_args()
-
-
 if __name__ == "__main__":
-    args = parse_args()
-
-    input_path: Path = Path(args.preview_html)
-    outdir: Path = Path(args.prefix)
+    input_path: Path = Path(PREVIEW_HTML)
+    outdir: Path = Path(PREFIX)
 
     text = input_path.read_text(encoding="utf-8", errors="ignore")
     soup = BeautifulSoup(text, "html.parser")
diff --git a/modules/local/utility/get_coordinates/main.nf b/modules/local/utility/get_coordinates/main.nf
index 3fdd7862..f6b70571 100644
--- a/modules/local/utility/get_coordinates/main.nf
+++ b/modules/local/utility/get_coordinates/main.nf
@@ -24,10 +24,7 @@ process GET_TRANSCRIPTS_COORDINATES {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    """
-    get_coordinates.py \\
-        --transcripts ${transcripts}
-    """
+    template 'get_coordinates.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/get_coordinates/resources/usr/bin/get_coordinates.py b/modules/local/utility/get_coordinates/templates/get_coordinates.py
old mode 100755
new mode 100644
similarity index 71%
rename from modules/local/utility/get_coordinates/resources/usr/bin/get_coordinates.py
rename to modules/local/utility/get_coordinates/templates/get_coordinates.py
index 8770f9a2..05c01567
--- a/modules/local/utility/get_coordinates/resources/usr/bin/get_coordinates.py
+++ b/modules/local/utility/get_coordinates/templates/get_coordinates.py
@@ -6,10 +6,11 @@
 bounding box (x_min, x_max, y_min, y_max) to stdout.
 """
 
-import argparse
-
 import pandas as pd
 
+# Nextflow-injected variables
+TRANSCRIPTS = "${transcripts}"
+
 
 def get_coordinates(parquet_path: str):
     """
@@ -41,20 +42,6 @@ def get_coordinates(parquet_path: str):
     )
 
 
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Get transcript coordinate bounds from a Parquet file."
-    )
-    parser.add_argument(
-        "--transcripts",
-        required=True,
-        help="Path to transcripts parquet file",
-    )
-    return parser.parse_args()
-
-
 if __name__ == "__main__":
-    args = parse_args()
-    result = get_coordinates(args.transcripts)
+    result = get_coordinates(TRANSCRIPTS)
     print(",".join(str(v) for v in result))
diff --git a/modules/local/utility/parquet_to_csv/main.nf b/modules/local/utility/parquet_to_csv/main.nf
index 033ed00a..6df3da37 100644
--- a/modules/local/utility/parquet_to_csv/main.nf
+++ b/modules/local/utility/parquet_to_csv/main.nf
@@ -24,12 +24,7 @@ process PARQUET_TO_CSV {
     }
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    """
-    parquet_to_csv.py \\
-        --transcripts ${transcripts} \\
-        --extension ${extension} \\
-        --prefix ${prefix}
-    """
+    template 'parquet_to_csv.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/parquet_to_csv/resources/usr/bin/parquet_to_csv.py b/modules/local/utility/parquet_to_csv/templates/parquet_to_csv.py
old mode 100755
new mode 100644
similarity index 57%
rename from modules/local/utility/parquet_to_csv/resources/usr/bin/parquet_to_csv.py
rename to modules/local/utility/parquet_to_csv/templates/parquet_to_csv.py
index bfa19c40..cb1d8107
--- a/modules/local/utility/parquet_to_csv/resources/usr/bin/parquet_to_csv.py
+++ b/modules/local/utility/parquet_to_csv/templates/parquet_to_csv.py
@@ -5,11 +5,15 @@
 Reads a Parquet file and writes it as CSV, optionally gzip-compressed.
 """
 
-import argparse
 from pathlib import Path
 
 import pandas as pd
 
+# Nextflow-injected variables
+TRANSCRIPTS = "${transcripts}"
+EXTENSION = "${extension}"
+PREFIX = "${prefix}"
+
 
 def convert_parquet(
     transcripts: str,
@@ -38,33 +42,9 @@ def convert_parquet(
     return None
 
 
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Convert a Parquet file to CSV format."
-    )
-    parser.add_argument(
-        "--transcripts",
-        required=True,
-        help="Input parquet filename",
-    )
-    parser.add_argument(
-        "--extension",
-        default=".csv",
-        help="Output extension: '.csv' or '.gz' (default: .csv)",
-    )
-    parser.add_argument(
-        "--prefix",
-        required=True,
-        help="Output directory prefix (sample ID)",
-    )
-    return parser.parse_args()
-
-
 if __name__ == "__main__":
-    args = parse_args()
     convert_parquet(
-        transcripts=args.transcripts,
-        extension=args.extension,
-        prefix=args.prefix,
+        transcripts=TRANSCRIPTS,
+        extension=EXTENSION,
+        prefix=PREFIX,
     )
diff --git a/modules/local/utility/resize_tif/main.nf b/modules/local/utility/resize_tif/main.nf
index 6877af27..965d652e 100644
--- a/modules/local/utility/resize_tif/main.nf
+++ b/modules/local/utility/resize_tif/main.nf
@@ -25,14 +25,7 @@ process RESIZE_TIF {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    """
-    resize_tif.py \\
-        --mask ${mask} \\
-        --transcripts ${transcripts} \\
-        --metadata ${metadata} \\
-        --prefix ${prefix} \\
-        --mask-filename ${mask}
-    """
+    template 'resize_tif.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/resize_tif/resources/usr/bin/resize_tif.py b/modules/local/utility/resize_tif/templates/resize_tif.py
old mode 100755
new mode 100644
similarity index 80%
rename from modules/local/utility/resize_tif/resources/usr/bin/resize_tif.py
rename to modules/local/utility/resize_tif/templates/resize_tif.py
index 6cca640d..5674a094
--- a/modules/local/utility/resize_tif/resources/usr/bin/resize_tif.py
+++ b/modules/local/utility/resize_tif/templates/resize_tif.py
@@ -6,7 +6,6 @@
 space of Xenium transcript data using microns-per-pixel metadata.
 """
 
-import argparse
 import json
 import os
 from typing import Tuple
@@ -16,6 +15,13 @@
 import tifffile
 from skimage.transform import resize
 
+# Nextflow-injected variables
+MASK = "${mask}"
+TRANSCRIPTS = "${transcripts}"
+METADATA = "${metadata}"
+PREFIX = "${prefix}"
+MASK_FILENAME = "${mask}"
+
 
 def read_mask(mask_path: str) -> np.ndarray:
     """Read the segmentation mask from a TIFF file."""
@@ -107,28 +113,13 @@ def main(mask_path: str, transcripts_path: str, metadata_path: str, output_path:
     print(f"Saved resized mask -> {output_path}")
 
 
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Resize a segmentation TIFF mask to match transcript coordinates."
-    )
-    parser.add_argument("--mask", required=True, help="Path to segmentation mask TIFF")
-    parser.add_argument("--transcripts", required=True, help="Path to transcripts file")
-    parser.add_argument("--metadata", required=True, help="Path to metadata JSON")
-    parser.add_argument("--prefix", required=True, help="Output directory prefix")
-    parser.add_argument("--mask-filename", required=True, help="Original mask filename for output naming")
-    return parser.parse_args()
-
-
 if __name__ == "__main__":
-    args = parse_args()
-
-    os.makedirs(args.prefix, exist_ok=True)
-    output_mask: str = os.path.join(args.prefix, f"resized_{args.mask_filename}.tif")
+    os.makedirs(PREFIX, exist_ok=True)
+    output_mask: str = os.path.join(PREFIX, f"resized_{MASK_FILENAME}.tif")
 
     main(
-        mask_path=args.mask,
-        transcripts_path=args.transcripts,
-        metadata_path=args.metadata,
+        mask_path=MASK,
+        transcripts_path=TRANSCRIPTS,
+        metadata_path=METADATA,
         output_path=output_mask,
     )
diff --git a/modules/local/utility/segger2xr/main.nf b/modules/local/utility/segger2xr/main.nf
index 1964469a..b562eed4 100644
--- a/modules/local/utility/segger2xr/main.nf
+++ b/modules/local/utility/segger2xr/main.nf
@@ -24,14 +24,9 @@ process SEGGER2XR {
         error "SEGGER2XR module does not support Conda. Please use Docker / Singularity / Podman instead."
     }
 
-    def min_transcripts = task.ext.min_transcripts_per_cell ?: 3
+    min_transcripts = task.ext.min_transcripts_per_cell ?: 3
 
-    """
-    segger2xr.py \\
-        --transcripts ${transcripts} \\
-        --prefix ${meta.id} \\
-        --min-transcripts ${min_transcripts}
-    """
+    template 'segger2xr.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/segger2xr/resources/usr/bin/segger2xr.py b/modules/local/utility/segger2xr/templates/segger2xr.py
old mode 100755
new mode 100644
similarity index 90%
rename from modules/local/utility/segger2xr/resources/usr/bin/segger2xr.py
rename to modules/local/utility/segger2xr/templates/segger2xr.py
index 22889e82..96f2d8df
--- a/modules/local/utility/segger2xr/resources/usr/bin/segger2xr.py
+++ b/modules/local/utility/segger2xr/templates/segger2xr.py
@@ -7,7 +7,6 @@
 and GeoJSON cell boundary polygons for xeniumranger import-segmentation.
 """
 
-import argparse
 import json
 from pathlib import Path
 from typing import List
@@ -15,6 +14,11 @@
 import pandas as pd
 from scipy.spatial import ConvexHull
 
+# Nextflow-injected variables
+TRANSCRIPTS = "${transcripts}"
+PREFIX = "${meta.id}"
+MIN_TRANSCRIPTS = int("${min_transcripts}")
+
 # Expected columns in transcripts.parquet
 REQUIRED_COLUMNS: List[str] = [
     "transcript_id",
@@ -214,34 +218,9 @@ def main(input_file: str, prefix: str, min_transcripts: int = 3) -> None:
     generate_viz_polygons(transcripts, f"{prefix}/segmentation_polygons.json", cell_map)
 
 
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Convert Segger prediction output to XeniumRanger-compatible format."
-    )
-    parser.add_argument(
-        "--transcripts",
-        required=True,
-        help="Path to Segger output transcripts parquet file",
-    )
-    parser.add_argument(
-        "--prefix",
-        required=True,
-        help="Output directory prefix (sample ID)",
-    )
-    parser.add_argument(
-        "--min-transcripts",
-        type=int,
-        default=3,
-        help="Minimum transcripts per cell (default: 3)",
-    )
-    return parser.parse_args()
-
-
 if __name__ == "__main__":
-    args = parse_args()
     main(
-        input_file=args.transcripts,
-        prefix=args.prefix,
-        min_transcripts=args.min_transcripts,
+        input_file=TRANSCRIPTS,
+        prefix=PREFIX,
+        min_transcripts=MIN_TRANSCRIPTS,
     )
diff --git a/modules/local/utility/split_transcripts/main.nf b/modules/local/utility/split_transcripts/main.nf
index 5cfa0b65..3cc54042 100644
--- a/modules/local/utility/split_transcripts/main.nf
+++ b/modules/local/utility/split_transcripts/main.nf
@@ -23,15 +23,9 @@ process SPLIT_TRANSCRIPTS {
     if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
         error "SPLIT_TRANSCRIPTS module does not support Conda. Please use Docker / Singularity / Podman instead."
     }
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    prefix = task.ext.prefix ?: "${meta.id}"
 
-    """
-    split_transcripts.py \\
-        --transcripts ${transcripts} \\
-        --x-bins ${x_bins} \\
-        --y-bins ${y_bins} \\
-        --prefix ${prefix}
-    """
+    template 'split_transcripts.py'
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/split_transcripts/resources/usr/bin/split_transcripts.py b/modules/local/utility/split_transcripts/templates/split_transcripts.py
old mode 100755
new mode 100644
similarity index 68%
rename from modules/local/utility/split_transcripts/resources/usr/bin/split_transcripts.py
rename to modules/local/utility/split_transcripts/templates/split_transcripts.py
index 275fbab1..1fa6db15
--- a/modules/local/utility/split_transcripts/resources/usr/bin/split_transcripts.py
+++ b/modules/local/utility/split_transcripts/templates/split_transcripts.py
@@ -6,12 +6,17 @@
 tiles, writing a splits.csv with tile boundaries.
 """
 
-import argparse
 import os
 from typing import List
 
 import pandas as pd
 
+# Nextflow-injected variables
+TRANSCRIPTS = "${transcripts}"
+X_BINS = "${x_bins}"
+Y_BINS = "${y_bins}"
+PREFIX = "${prefix}"
+
 
 def compute_quantile_ranges(df: pd.DataFrame, col: str, n_bins: int) -> List:
     """
@@ -69,41 +74,10 @@ def main(
     return None
 
 
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Split transcript coordinates into spatial tiles."
-    )
-    parser.add_argument(
-        "--transcripts",
-        required=True,
-        help="Path to transcripts parquet file",
-    )
-    parser.add_argument(
-        "--x-bins",
-        type=int,
-        required=True,
-        help="Number of bins along X axis",
-    )
-    parser.add_argument(
-        "--y-bins",
-        type=int,
-        required=True,
-        help="Number of bins along Y axis",
-    )
-    parser.add_argument(
-        "--prefix",
-        required=True,
-        help="Output directory prefix",
-    )
-    return parser.parse_args()
-
-
 if __name__ == "__main__":
-    args = parse_args()
     main(
-        transcripts=args.transcripts,
-        x_bins=args.x_bins,
-        y_bins=args.y_bins,
-        prefix=args.prefix,
+        transcripts=TRANSCRIPTS,
+        x_bins=int(X_BINS),
+        y_bins=int(Y_BINS),
+        prefix=PREFIX,
     )
diff --git a/modules/local/utility/upscale_mask/main.nf b/modules/local/utility/upscale_mask/main.nf
index a201abf1..41b3ff91 100644
--- a/modules/local/utility/upscale_mask/main.nf
+++ b/modules/local/utility/upscale_mask/main.nf
@@ -34,12 +34,8 @@ process UPSCALE_MASK {
 
     script:
     prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    upscale_mask.py \\
-        --mask ${mask} \\
-        --scale-info ${scale_info} \\
-        --prefix ${prefix}
-    """
+
+    template 'upscale_mask.py'
 
     stub:
     prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/utility/upscale_mask/resources/usr/bin/upscale_mask.py b/modules/local/utility/upscale_mask/templates/upscale_mask.py
old mode 100755
new mode 100644
similarity index 71%
rename from modules/local/utility/upscale_mask/resources/usr/bin/upscale_mask.py
rename to modules/local/utility/upscale_mask/templates/upscale_mask.py
index 6cc1694e..9c0ef0bb
--- a/modules/local/utility/upscale_mask/resources/usr/bin/upscale_mask.py
+++ b/modules/local/utility/upscale_mask/templates/upscale_mask.py
@@ -9,7 +9,6 @@
 Output: {prefix}/upscaled_{mask_basename}.tif
 """
 
-import argparse
 import json
 from pathlib import Path
 
@@ -17,6 +16,11 @@
 import tifffile
 from PIL import Image
 
+# Nextflow-injected variables
+MASK = "${mask}"
+SCALE_INFO = "${scale_info}"
+PREFIX = "${prefix}"
+
 
 def upscale_mask(mask_path: str, scale_info_path: str, prefix: str) -> None:
     """
@@ -52,21 +56,9 @@ def upscale_mask(mask_path: str, scale_info_path: str, prefix: str) -> None:
     )
 
 
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Upscale a Cellpose mask back to original resolution."
-    )
-    parser.add_argument("--mask", required=True, help="Downscaled mask TIFF")
-    parser.add_argument("--scale-info", required=True, help="scale_info.json from downscale step")
-    parser.add_argument("--prefix", required=True, help="Output directory")
-    return parser.parse_args()
-
-
 if __name__ == "__main__":
-    args = parse_args()
     upscale_mask(
-        mask_path=args.mask,
-        scale_info_path=args.scale_info,
-        prefix=args.prefix,
+        mask_path=MASK,
+        scale_info_path=SCALE_INFO,
+        prefix=PREFIX,
     )
diff --git a/modules/local/xenium_patch/stitch/main.nf b/modules/local/xenium_patch/stitch/main.nf
index d805a0f5..118e77a7 100644
--- a/modules/local/xenium_patch/stitch/main.nf
+++ b/modules/local/xenium_patch/stitch/main.nf
@@ -34,18 +34,8 @@ process XENIUM_PATCH_STITCH {
 
     script:
     def args = task.ext.args ?: ''
-    """
-    stitch_transcripts.py \\
-        --patches ${patches} \\
-        --output output \\
-        ${args}
 
-    # Post-process: ensure all GeoJSON geometries are Polygon and
-    # reconcile dropped cells in the transcript CSV.
-    stitch_postprocess.py \\
-        --geojson output/xr-cell-polygons.geojson \\
-        --csv output/xr-transcript-metadata.csv
-    """
+    template 'stitch.py'
 
     stub:
     """
diff --git a/modules/local/xenium_patch/stitch/resources/usr/bin/stitch_postprocess.py b/modules/local/xenium_patch/stitch/resources/usr/bin/stitch_postprocess.py
deleted file mode 100755
index 7144b1ac..00000000
--- a/modules/local/xenium_patch/stitch/resources/usr/bin/stitch_postprocess.py
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/bin/env python3
-"""
-Post-process stitched per-patch segmentation outputs.
-
-Ensures every GeoJSON feature is a single Polygon: make_valid() and
-sopa.solve_conflicts() can produce MultiPolygon, MultiLineString, or
-GeometryCollection geometries that XeniumRanger rejects. Cells dropped
-during cleanup are also reassigned to UNASSIGNED in the transcript CSV
-so the two outputs stay consistent.
-"""
-
-import argparse
-import csv
-import json
-
-import shapely
-from shapely.geometry import mapping, shape
-
-
-def clean_geojson(geojson_path: str) -> set:
-    """
-    Force every feature to a single valid Polygon.
-
-    Returns the set of cell ids whose features were dropped.
-    """
-    with open(geojson_path) as f:
-        data = json.load(f)
-
-    clean = []
-    dropped_cells = set()
-    for feat in data["features"]:
-        geom = shape(feat["geometry"])
-        if not geom.is_valid:
-            geom = shapely.make_valid(geom)
-        poly = None
-        if geom.geom_type == "Polygon":
-            poly = geom
-        elif geom.geom_type == "MultiPolygon":
-            poly = max(geom.geoms, key=lambda g: g.area)
-        elif geom.geom_type == "GeometryCollection":
-            polys = [g for g in geom.geoms if g.geom_type == "Polygon"]
-            if polys:
-                poly = max(polys, key=lambda g: g.area)
-        if poly is not None and not poly.is_empty:
-            feat["geometry"] = mapping(poly)
-            clean.append(feat)
-        else:
-            cell_id = feat.get("id") or feat.get("properties", {}).get("cell_id", "")
-            dropped_cells.add(str(cell_id))
-
-    print(f"GeoJSON: {len(clean)} kept, {len(dropped_cells)} dropped: {dropped_cells}")
-    data["features"] = clean
-    with open(geojson_path, "w") as f:
-        json.dump(data, f)
-
-    return dropped_cells
-
-
-def reassign_dropped(csv_path: str, dropped_cells: set) -> None:
-    """
-    Reassign transcripts of dropped cells to UNASSIGNED in the CSV.
-    """
-    if not dropped_cells:
-        return
-
-    with open(csv_path) as f:
-        reader = csv.DictReader(f)
-        fieldnames = reader.fieldnames
-        rows = list(reader)
-
-    reassigned = 0
-    for row in rows:
-        if row["cell"] in dropped_cells:
-            row["cell"] = ""
-            row["is_noise"] = "1"
-            reassigned += 1
-
-    with open(csv_path, "w", newline="") as f:
-        writer = csv.DictWriter(f, fieldnames=fieldnames)
-        writer.writeheader()
-        writer.writerows(rows)
-    print(f"CSV: {reassigned} transcripts reassigned to UNASSIGNED")
-
-
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="Clean stitched GeoJSON polygons and reconcile transcript CSV."
-    )
-    parser.add_argument("--geojson", required=True, help="Path to xr-cell-polygons.geojson")
-    parser.add_argument("--csv", required=True, help="Path to xr-transcript-metadata.csv")
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    dropped = clean_geojson(args.geojson)
-    reassign_dropped(args.csv, dropped)
diff --git a/modules/local/xenium_patch/stitch/resources/usr/bin/stitch_transcripts.py b/modules/local/xenium_patch/stitch/templates/stitch.py
old mode 100755
new mode 100644
similarity index 85%
rename from modules/local/xenium_patch/stitch/resources/usr/bin/stitch_transcripts.py
rename to modules/local/xenium_patch/stitch/templates/stitch.py
index d9fb8d41..2306a7dc
--- a/modules/local/xenium_patch/stitch/resources/usr/bin/stitch_transcripts.py
+++ b/modules/local/xenium_patch/stitch/templates/stitch.py
@@ -1,15 +1,25 @@
 #!/usr/bin/env python3
-"""Stitch per-patch Baysor segmentation results into unified output.
-
-Standalone script that replaces the xenium_patch CLI package's stitch
-functionality. Uses sopa's solve_conflicts() for overlap resolution.
+"""Stitch per-patch segmentation results, then post-process the GeoJSON + CSV.
+
+Phase 1 (stitch_transcripts):
+    Stitch per-patch Baysor segmentation results into unified output. Uses
+    sopa's solve_conflicts() for overlap resolution at patch boundaries.
+
+Phase 2 (postprocess):
+    Ensures every GeoJSON feature is a single Polygon: make_valid() and
+    sopa.solve_conflicts() can produce MultiPolygon, MultiLineString, or
+    GeometryCollection geometries that XeniumRanger rejects. Cells dropped
+    during cleanup are also reassigned to UNASSIGNED in the transcript CSV
+    so the two outputs stay consistent.
 """
 
 from __future__ import annotations
 
 import argparse
+import csv
 import json
 import os
+import shlex
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
 from pathlib import Path
@@ -24,6 +34,11 @@
 from shapely.geometry import mapping, shape
 from sopa.segmentation.resolve import solve_conflicts
 
+# Nextflow-injected variables
+PATCHES = "${patches}"
+ARGS = "${args}"  # task.ext.args, may be empty
+
+
 # ---------------------------------------------------------------------------
 # Geometry helpers
 # ---------------------------------------------------------------------------
@@ -44,7 +59,7 @@ def _ensure_polygon(geom) -> "shapely.Polygon | None":
     if geom.geom_type == "GeometryCollection":
         polys = [g for g in geom.geoms if g.geom_type == "Polygon"]
         return max(polys, key=lambda g: g.area) if polys else None
-    # LineString, MultiLineString, Point, etc. — not a polygon
+    # LineString, MultiLineString, Point, etc. -- not a polygon
     return None
 
 
@@ -630,7 +645,7 @@ def _stitch_sopa_resolve(
 
 
 # ---------------------------------------------------------------------------
-# Main orchestrator
+# Main orchestrator (stitch phase)
 # ---------------------------------------------------------------------------
 
 
@@ -764,11 +779,17 @@ def stitch_transcript_assignments(
 
 
 # ---------------------------------------------------------------------------
-# CLI
+# Phase 1 entry point: stitch_transcripts (replaces stitch_transcripts.py main)
 # ---------------------------------------------------------------------------
 
 
-def main() -> None:
+def stitch_transcripts(patches_dir: str, output_dir: str, args_str: str) -> None:
+    """Phase 1: parse extra CLI args and run the stitch orchestrator.
+
+    Mirrors the original ``stitch_transcripts.py`` argparse interface so that
+    any flags passed through ``task.ext.args`` are honored by the same
+    parser as before.
+    """
     parser = argparse.ArgumentParser(
         description="Stitch per-patch Baysor segmentation results into unified output."
     )
@@ -794,13 +815,106 @@ def main() -> None:
         default="segmentation_polygons.json",
         help="GeoJSON filename within each patch (default: segmentation_polygons.json)",
     )
-    args = parser.parse_args()
+
+    argv = ["--patches", patches_dir, "--output", output_dir]
+    if args_str:
+        argv.extend(shlex.split(args_str))
+    parsed = parser.parse_args(argv)
 
     stitch_transcript_assignments(
-        patches_dir=args.patches,
-        output_dir=args.output,
-        csv_filename=args.csv_filename,
-        geojson_filename=args.geojson_filename,
+        patches_dir=parsed.patches,
+        output_dir=parsed.output,
+        csv_filename=parsed.csv_filename,
+        geojson_filename=parsed.geojson_filename,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Phase 2: post-processing (from stitch_postprocess.py)
+# ---------------------------------------------------------------------------
+
+
+def clean_geojson(geojson_path: str) -> set:
+    """
+    Force every feature to a single valid Polygon.
+
+    Returns the set of cell ids whose features were dropped.
+    """
+    with open(geojson_path) as f:
+        data = json.load(f)
+
+    clean = []
+    dropped_cells = set()
+    for feat in data["features"]:
+        geom = shape(feat["geometry"])
+        if not geom.is_valid:
+            geom = shapely.make_valid(geom)
+        poly = None
+        if geom.geom_type == "Polygon":
+            poly = geom
+        elif geom.geom_type == "MultiPolygon":
+            poly = max(geom.geoms, key=lambda g: g.area)
+        elif geom.geom_type == "GeometryCollection":
+            polys = [g for g in geom.geoms if g.geom_type == "Polygon"]
+            if polys:
+                poly = max(polys, key=lambda g: g.area)
+        if poly is not None and not poly.is_empty:
+            feat["geometry"] = mapping(poly)
+            clean.append(feat)
+        else:
+            cell_id = feat.get("id") or feat.get("properties", {}).get("cell_id", "")
+            dropped_cells.add(str(cell_id))
+
+    print(f"GeoJSON: {len(clean)} kept, {len(dropped_cells)} dropped: {dropped_cells}")
+    data["features"] = clean
+    with open(geojson_path, "w") as f:
+        json.dump(data, f)
+
+    return dropped_cells
+
+
+def reassign_dropped(csv_path: str, dropped_cells: set) -> None:
+    """
+    Reassign transcripts of dropped cells to UNASSIGNED in the CSV.
+    """
+    if not dropped_cells:
+        return
+
+    with open(csv_path) as f:
+        reader = csv.DictReader(f)
+        fieldnames = reader.fieldnames
+        rows = list(reader)
+
+    reassigned = 0
+    for row in rows:
+        if row["cell"] in dropped_cells:
+            row["cell"] = ""
+            row["is_noise"] = "1"
+            reassigned += 1
+
+    with open(csv_path, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)
+    print(f"CSV: {reassigned} transcripts reassigned to UNASSIGNED")
+
+
+def postprocess(geojson_path: str, csv_path: str) -> None:
+    """Phase 2 entry point: cleanup polygons and reconcile the CSV."""
+    dropped = clean_geojson(geojson_path)
+    reassign_dropped(csv_path, dropped)
+
+
+# ---------------------------------------------------------------------------
+# Main: run both phases sequentially
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+    stitch_transcripts(PATCHES, "output", ARGS)
+    postprocess(
+        "output/xr-cell-polygons.geojson",
+        "output/xr-transcript-metadata.csv",
     )
 
 

From dbd7a35df4b842a3e727a1cb0dca82c7582a61a6 Mon Sep 17 00:00:00 2001
From: an-altosian <dhe@altoslabs.com>
Date: Tue, 5 May 2026 16:43:39 +0000
Subject: [PATCH 2/3] fix(modules): switch from `template` directive to
 shell-call invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CI on PR #154 surfaced this hard Nextflow constraint:

    Process output of type 'eval' is only allowed with Bash process scripts
    -- Current interpreter: /usr/bin/env python3

The `template '<file>.py'` directive sets the process interpreter to
Python from the shebang, but every module emits a version string via an
`eval('python3 -c ...')` topic channel — and `eval(...)` outputs only
work when the process script body is Bash. All 19 modules failed
identically.

Fix: keep the Python files in `modules/local/<x>/templates/`
(satisfying the no-module-level-bin requirement from PR #139's review)
but invoke them from a shell `script:` block via
`python3 \${moduleDir}/templates/<file>.py --flag value`. The process
stays Bash, `eval()` works, and the original argparse-based scripts are
restored verbatim — no constants conversion, no script merging, no
moved env preludes.

For xenium_patch/stitch this means restoring the two original scripts
(stitch_transcripts.py + stitch_postprocess.py) instead of the merged
stitch.py. For segger/create_dataset, the NUMBA_CACHE_DIR shell prelude
is restored to its original location in the .nf script: block.

Net result vs. the previous attempt: same nf-core compliance (no
module-level bin/), but invasive Python rewrites are reverted. The
original behavior is preserved exactly.
---
 modules/local/baysor/create_dataset/main.nf   |   7 +-
 .../templates/create_dataset.py               |  29 +++-
 modules/local/baysor/preprocess/main.nf       |  11 +-
 .../templates/preprocess_transcripts.py       |  63 ++++++--
 modules/local/ficture/preprocess/main.nf      |   9 +-
 .../templates/ficture_preprocess.py           |  36 ++---
 modules/local/segger/create_dataset/main.nf   |  14 +-
 .../templates/run_create_dataset.py           |  61 ++++----
 modules/local/segger/predict/main.nf          |  14 +-
 .../segger/predict/templates/run_predict.py   |  32 ----
 modules/local/spatialdata/merge/main.nf       |   8 +-
 .../merge/templates/spatialdata_merge.py      |  43 ++++--
 modules/local/spatialdata/meta/main.nf        |   9 +-
 .../meta/templates/spatialdata_meta.py        |  32 ++--
 modules/local/spatialdata/write/main.nf       |  10 +-
 .../write/templates/spatialdata_write.py      |  40 ++---
 .../local/utility/convert_mask_uint32/main.nf |   7 +-
 .../templates/convert_mask_uint32.py          |  23 ++-
 .../utility/downscale_morphology/main.nf      |   9 +-
 .../templates/downscale_morphology.py         |  28 ++--
 modules/local/utility/extract_dapi/main.nf    |   8 +-
 .../extract_dapi/templates/extract_dapi.py    |  31 +++-
 .../utility/extract_preview_data/main.nf      |   6 +-
 .../templates/extract_data.py                 |  29 +++-
 modules/local/utility/get_coordinates/main.nf |   5 +-
 .../templates/get_coordinates.py              |  21 ++-
 modules/local/utility/parquet_to_csv/main.nf  |   7 +-
 .../templates/parquet_to_csv.py               |  36 ++++-
 modules/local/utility/resize_tif/main.nf      |   9 +-
 .../resize_tif/templates/resize_tif.py        |  33 +++--
 modules/local/utility/segger2xr/main.nf       |   9 +-
 .../utility/segger2xr/templates/segger2xr.py  |  37 ++++-
 .../local/utility/split_transcripts/main.nf   |  10 +-
 .../templates/split_transcripts.py            |  46 ++++--
 modules/local/utility/upscale_mask/main.nf    |   8 +-
 .../upscale_mask/templates/upscale_mask.py    |  24 ++-
 modules/local/xenium_patch/stitch/main.nf     |  12 +-
 .../stitch/templates/stitch_postprocess.py    |  98 ++++++++++++
 .../{stitch.py => stitch_transcripts.py}      | 140 ++----------------
 39 files changed, 661 insertions(+), 393 deletions(-)
 create mode 100644 modules/local/xenium_patch/stitch/templates/stitch_postprocess.py
 rename modules/local/xenium_patch/stitch/templates/{stitch.py => stitch_transcripts.py} (85%)

diff --git a/modules/local/baysor/create_dataset/main.nf b/modules/local/baysor/create_dataset/main.nf
index 91aa67a0..b05350fa 100644
--- a/modules/local/baysor/create_dataset/main.nf
+++ b/modules/local/baysor/create_dataset/main.nf
@@ -23,7 +23,12 @@ process BAYSOR_CREATE_DATASET {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    template 'create_dataset.py'
+    """
+    python3 ${moduleDir}/templates/create_dataset.py \\
+        --transcripts ${transcripts} \\
+        --sample-fraction ${sample_fraction} \\
+        --prefix ${prefix}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/baysor/create_dataset/templates/create_dataset.py b/modules/local/baysor/create_dataset/templates/create_dataset.py
index bbf05320..4e5a263a 100644
--- a/modules/local/baysor/create_dataset/templates/create_dataset.py
+++ b/modules/local/baysor/create_dataset/templates/create_dataset.py
@@ -6,16 +6,12 @@
 writing the result to a new CSV file.
 """
 
+import argparse
 import csv
 import os
 import random
 from pathlib import Path
 
-# Nextflow-injected variables
-TRANSCRIPTS = "${transcripts}"
-SAMPLE_FRACTION = "${sample_fraction}"
-PREFIX = "${prefix}"
-
 
 class BaysorPreview():
     """
@@ -66,14 +62,31 @@ def main() -> None:
     """
     Run create dataset as nf module
     """
+    parser = argparse.ArgumentParser(
+        description="Create sampled dataset for Baysor preview"
+    )
+    parser.add_argument(
+        "--transcripts", required=True,
+        help="Path to transcripts CSV file"
+    )
+    parser.add_argument(
+        "--sample-fraction", required=True, type=float,
+        help="Fraction of rows to sample"
+    )
+    parser.add_argument(
+        "--prefix", required=True,
+        help="Output directory prefix"
+    )
+    args = parser.parse_args()
+
     sampled_transcripts = "sampled_transcripts.csv"
 
     # generate dataset
     BaysorPreview.generate_dataset(
-        transcripts=TRANSCRIPTS,
+        transcripts=args.transcripts,
         sampled_transcripts=sampled_transcripts,
-        sample_fraction=float(SAMPLE_FRACTION),
-        prefix=PREFIX,
+        sample_fraction=args.sample_fraction,
+        prefix=args.prefix
     )
 
     return None
diff --git a/modules/local/baysor/preprocess/main.nf b/modules/local/baysor/preprocess/main.nf
index b11479d1..1a5a42fe 100644
--- a/modules/local/baysor/preprocess/main.nf
+++ b/modules/local/baysor/preprocess/main.nf
@@ -29,7 +29,16 @@ process BAYSOR_PREPROCESS_TRANSCRIPTS {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    template 'preprocess_transcripts.py'
+    """
+    python3 ${moduleDir}/templates/preprocess_transcripts.py \\
+        --transcripts ${transcripts} \\
+        --prefix ${prefix} \\
+        --min-qv ${min_qv} \\
+        --min-x ${min_x} \\
+        --max-x ${max_x} \\
+        --min-y ${min_y} \\
+        --max-y ${max_y}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/baysor/preprocess/templates/preprocess_transcripts.py b/modules/local/baysor/preprocess/templates/preprocess_transcripts.py
index 25bc41dc..2662f83c 100644
--- a/modules/local/baysor/preprocess/templates/preprocess_transcripts.py
+++ b/modules/local/baysor/preprocess/templates/preprocess_transcripts.py
@@ -6,19 +6,11 @@
 removes negative control probes, and outputs filtered CSV for Baysor compatibility.
 """
 
+import argparse
 import os
 
 import pandas as pd
 
-# Nextflow-injected variables
-TRANSCRIPTS = "${transcripts}"
-PREFIX = "${prefix}"
-MIN_QV = "${min_qv}"
-MIN_X = "${min_x}"
-MAX_X = "${max_x}"
-MIN_Y = "${min_y}"
-MAX_Y = "${max_y}"
-
 
 def filter_transcripts(
     transcripts: str,
@@ -78,14 +70,53 @@ def main() -> None:
     """
     Run preprocess transcripts as nf module.
     """
+    parser = argparse.ArgumentParser(
+        description="Preprocess Xenium transcripts for Baysor"
+    )
+    parser.add_argument(
+        "--transcripts", required=True, help="Path to transcripts parquet file"
+    )
+    parser.add_argument("--prefix", required=True, help="Output directory prefix")
+    parser.add_argument(
+        "--min-qv",
+        type=float,
+        default=20.0,
+        help="Minimum Q-Score threshold (default: 20.0)",
+    )
+    parser.add_argument(
+        "--min-x",
+        type=float,
+        default=0.0,
+        help="Minimum x-coordinate threshold (default: 0.0)",
+    )
+    parser.add_argument(
+        "--max-x",
+        type=float,
+        default=24000.0,
+        help="Maximum x-coordinate threshold (default: 24000.0)",
+    )
+    parser.add_argument(
+        "--min-y",
+        type=float,
+        default=0.0,
+        help="Minimum y-coordinate threshold (default: 0.0)",
+    )
+    parser.add_argument(
+        "--max-y",
+        type=float,
+        default=24000.0,
+        help="Maximum y-coordinate threshold (default: 24000.0)",
+    )
+    args = parser.parse_args()
+
     filter_transcripts(
-        transcripts=TRANSCRIPTS,
-        min_qv=float(MIN_QV),
-        min_x=float(MIN_X),
-        max_x=float(MAX_X),
-        min_y=float(MIN_Y),
-        max_y=float(MAX_Y),
-        prefix=PREFIX,
+        transcripts=args.transcripts,
+        min_qv=args.min_qv,
+        min_x=args.min_x,
+        max_x=args.max_x,
+        min_y=args.min_y,
+        max_y=args.max_y,
+        prefix=args.prefix,
     )
 
     return None
diff --git a/modules/local/ficture/preprocess/main.nf b/modules/local/ficture/preprocess/main.nf
index 76005c10..09b6fb4c 100644
--- a/modules/local/ficture/preprocess/main.nf
+++ b/modules/local/ficture/preprocess/main.nf
@@ -21,8 +21,15 @@ process FICTURE_PREPROCESS {
 
     script:
     def args = task.ext.args ?: ''
+    def features_arg = features ? "--features ${features}" : ""
 
-    template 'ficture_preprocess.py'
+    """
+    python3 ${moduleDir}/templates/ficture_preprocess.py \\
+        --transcripts ${transcripts} \\
+        ${features_arg} \\
+        --negative-control-regex '${params.negative_control_regex}' \\
+        ${args}
+    """
 
     stub:
     """
diff --git a/modules/local/ficture/preprocess/templates/ficture_preprocess.py b/modules/local/ficture/preprocess/templates/ficture_preprocess.py
index 92ae9721..2e0c687c 100644
--- a/modules/local/ficture/preprocess/templates/ficture_preprocess.py
+++ b/modules/local/ficture/preprocess/templates/ficture_preprocess.py
@@ -1,40 +1,18 @@
 #!/usr/bin/env python3
 """Preprocess Xenium transcripts for FICTURE analysis."""
 
+import argparse
 import gzip
 import logging
 import os
 import re
-import shlex
 import sys
 
 import pandas as pd
 
-# Nextflow-injected variables
-TRANSCRIPTS = "${transcripts}"
-FEATURES = "${features}"
-NEGATIVE_CONTROL_REGEX = "${params.negative_control_regex}"
-ARGS = "${args}"
-
-
-def main():
-    """Run FICTURE preprocessing."""
-    print("[START]")
-
-    # Re-build argv so parse_known_args() can absorb any extra flags from task.ext.args
-    sys.argv = [
-        sys.argv[0],
-        "--transcripts",
-        TRANSCRIPTS,
-        "--negative-control-regex",
-        NEGATIVE_CONTROL_REGEX,
-    ]
-    if FEATURES:
-        sys.argv += ["--features", FEATURES]
-    sys.argv += shlex.split(ARGS)
-
-    import argparse
 
+def parse_args():
+    """Parse command-line arguments."""
     parser = argparse.ArgumentParser(
         description="Preprocess Xenium transcripts for FICTURE"
     )
@@ -47,7 +25,13 @@ def main():
     parser.add_argument(
         "--negative-control-regex", default="", help="Regex for negative control probes"
     )
-    args, _ = parser.parse_known_args()
+    return parser.parse_args()
+
+
+def main():
+    """Run FICTURE preprocessing."""
+    args = parse_args()
+    print("[START]")
 
     negctrl_regex = "BLANK|NegCon"
     if args.negative_control_regex:
diff --git a/modules/local/segger/create_dataset/main.nf b/modules/local/segger/create_dataset/main.nf
index 2b03d2c9..bd81f7b8 100644
--- a/modules/local/segger/create_dataset/main.nf
+++ b/modules/local/segger/create_dataset/main.nf
@@ -29,7 +29,19 @@ process SEGGER_CREATE_DATASET {
         error("${params.format} is an invalid platform type.")
     }
 
-    template 'run_create_dataset.py'
+    """
+    export NUMBA_CACHE_DIR=\$PWD/.numba_cache
+    mkdir -p \$NUMBA_CACHE_DIR
+
+    python3 ${moduleDir}/templates/run_create_dataset.py \\
+        --bundle-dir ${base_dir} \\
+        --output-dir ${prefix} \\
+        --sample-type ${params.format} \\
+        --tile-width ${params.tile_width} \\
+        --tile-height ${params.tile_height} \\
+        --n-workers ${task.cpus} \\
+        ${args}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/segger/create_dataset/templates/run_create_dataset.py b/modules/local/segger/create_dataset/templates/run_create_dataset.py
index 28df3cff..c73ab006 100644
--- a/modules/local/segger/create_dataset/templates/run_create_dataset.py
+++ b/modules/local/segger/create_dataset/templates/run_create_dataset.py
@@ -11,14 +11,8 @@
 Each WORKAROUND should be removable when the upstream segger bug is fixed.
 """
 
-# ruff: noqa: E402  -- NUMBA_CACHE_DIR must be set before subsequent imports
-# (segger / torch transitively pull in numba). Order matters; do not reorder.
+import argparse
 import os
-
-os.environ["NUMBA_CACHE_DIR"] = os.path.join(os.getcwd(), ".numba_cache")
-os.makedirs(os.environ["NUMBA_CACHE_DIR"], exist_ok=True)
-
-import shlex
 import shutil
 import subprocess
 import sys
@@ -32,14 +26,18 @@
 
 SEGGER_CLI = "/workspace/segger_dev/src/segger/cli/create_dataset_fast.py"
 
-# Nextflow-injected variables
-BUNDLE_DIR = "${base_dir}"
-OUTPUT_DIR = "${prefix}"
-SAMPLE_TYPE = "${params.format}"
-TILE_WIDTH = "${params.tile_width}"
-TILE_HEIGHT = "${params.tile_height}"
-N_WORKERS = "${task.cpus}"
-ARGS = "${args}"
+
+def parse_args():
+    p = argparse.ArgumentParser()
+    p.add_argument("--bundle-dir", required=True)
+    p.add_argument("--output-dir", required=True)
+    p.add_argument("--sample-type", required=True, choices=["xenium"])
+    p.add_argument("--tile-width", type=int, required=True)
+    p.add_argument("--tile-height", type=int, required=True)
+    p.add_argument("--n-workers", type=int, required=True)
+    # remaining args forwarded to segger CLI
+    args, extra = p.parse_known_args()
+    return args, extra
 
 
 def prepare_bundle(bundle_dir):
@@ -112,22 +110,22 @@ def add_parquet_stats():
     print("=== End Debug ===\n")
 
 
-def run_segger_cli(output_dir, sample_type, tile_width, tile_height, n_workers, extra):
+def run_segger_cli(args, extra):
     cmd = [
         "python3",
         SEGGER_CLI,
         "--base_dir",
         "bundle_stats",
         "--data_dir",
-        output_dir,
+        args.output_dir,
         "--sample_type",
-        sample_type,
+        args.sample_type,
         "--tile_width",
-        str(tile_width),
+        str(args.tile_width),
         "--tile_height",
-        str(tile_height),
+        str(args.tile_height),
         "--n_workers",
-        str(n_workers),
+        str(args.n_workers),
         *extra,
     ]
     print(f"Running: {' '.join(cmd)}")
@@ -230,9 +228,13 @@ def fix_bd_x_nan(prefix):
 
 
 def main():
-    extra = shlex.split(ARGS)
+    args, extra = parse_args()
 
-    prepare_bundle(BUNDLE_DIR)
+    # Ensure numba cache dir is writable (env var should be set by caller, but belt-and-suspenders)
+    os.environ.setdefault("NUMBA_CACHE_DIR", os.path.join(os.getcwd(), ".numba_cache"))
+    os.makedirs(os.environ["NUMBA_CACHE_DIR"], exist_ok=True)
+
+    prepare_bundle(args.bundle_dir)
     print("Adding statistics to parquet files...")
     add_parquet_stats()
 
@@ -241,17 +243,10 @@ def main():
     for item in sorted(Path("bundle_stats").iterdir()):
         print(f"  {item.name}")
 
-    run_segger_cli(
-        OUTPUT_DIR,
-        SAMPLE_TYPE,
-        int(TILE_WIDTH),
-        int(TILE_HEIGHT),
-        int(N_WORKERS),
-        extra,
-    )
+    run_segger_cli(args, extra)
 
-    filter_trainable_tiles_if_needed(OUTPUT_DIR)
-    fix_bd_x_nan(OUTPUT_DIR)
+    filter_trainable_tiles_if_needed(args.output_dir)
+    fix_bd_x_nan(args.output_dir)
 
 
 if __name__ == "__main__":
diff --git a/modules/local/segger/predict/main.nf b/modules/local/segger/predict/main.nf
index d8384c74..48195d53 100644
--- a/modules/local/segger/predict/main.nf
+++ b/modules/local/segger/predict/main.nf
@@ -26,8 +26,18 @@ process SEGGER_PREDICT {
 
     def args = task.ext.args ?: ''
     prefix = task.ext.prefix ?: "${meta.id}"
-
-    template 'run_predict.py'
+    """
+    python3 ${moduleDir}/templates/run_predict.py \\
+        --models-dir ${models_dir} \\
+        --segger-data-dir ${segger_dataset} \\
+        --transcripts-file ${transcripts} \\
+        --benchmarks-dir benchmarks_dir \\
+        --batch-size ${params.batch_size_predict} \\
+        --use-cc ${params.cc_analysis} \\
+        --knn-method ${params.segger_knn_method} \\
+        --num-workers ${task.cpus} \\
+        ${args}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/segger/predict/templates/run_predict.py b/modules/local/segger/predict/templates/run_predict.py
index ca2051a7..56a77ffc 100644
--- a/modules/local/segger/predict/templates/run_predict.py
+++ b/modules/local/segger/predict/templates/run_predict.py
@@ -12,23 +12,12 @@
 
 import argparse
 import os
-import shlex
 import subprocess
 import sys
 
 
 SEGGER_CLI = "/workspace/segger_dev/src/segger/cli/predict_fast.py"
 
-# Nextflow-injected variables
-MODELS_DIR = "${models_dir}"
-SEGGER_DATASET = "${segger_dataset}"
-TRANSCRIPTS = "${transcripts}"
-BATCH_SIZE = "${params.batch_size_predict}"
-USE_CC = "${params.cc_analysis}"
-KNN_METHOD = "${params.segger_knn_method}"
-NUM_WORKERS = "${task.cpus}"
-ARGS = "${args}"
-
 
 def parse_args():
     p = argparse.ArgumentParser()
@@ -124,27 +113,6 @@ def run_segger_cli(args, extra, gpu_ids):
 
 
 def main():
-    # Re-build argv from Nextflow-injected constants + any extra task.ext.args flags
-    sys.argv = [
-        sys.argv[0],
-        "--models-dir",
-        MODELS_DIR,
-        "--segger-data-dir",
-        SEGGER_DATASET,
-        "--transcripts-file",
-        TRANSCRIPTS,
-        "--benchmarks-dir",
-        "benchmarks_dir",
-        "--batch-size",
-        BATCH_SIZE,
-        "--use-cc",
-        USE_CC,
-        "--knn-method",
-        KNN_METHOD,
-        "--num-workers",
-        NUM_WORKERS,
-    ] + shlex.split(ARGS)
-
     args, extra = parse_args()
 
     # Limit cupy GPU memory to 80% so PyTorch has headroom for graph attention ops
diff --git a/modules/local/spatialdata/merge/main.nf b/modules/local/spatialdata/merge/main.nf
index 9ff4b112..9e063844 100644
--- a/modules/local/spatialdata/merge/main.nf
+++ b/modules/local/spatialdata/merge/main.nf
@@ -25,7 +25,13 @@ process SPATIALDATA_MERGE {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    template 'spatialdata_merge.py'
+    """
+    python3 ${moduleDir}/templates/spatialdata_merge.py \\
+        --raw-bundle ${raw_bundle} \\
+        --redefined-bundle ${redefined_bundle} \\
+        --prefix ${prefix} \\
+        --output-folder ${outputfolder}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/spatialdata/merge/templates/spatialdata_merge.py b/modules/local/spatialdata/merge/templates/spatialdata_merge.py
index cd55e00c..409d8c00 100644
--- a/modules/local/spatialdata/merge/templates/spatialdata_merge.py
+++ b/modules/local/spatialdata/merge/templates/spatialdata_merge.py
@@ -1,36 +1,45 @@
 #!/usr/bin/env python3
 """Merge two spatialdata bundles to create a layered spatialdata object."""
 
+import argparse
 import json
 import os
 import shutil
 
-import spatialdata  # noqa: F401  (kept so versions topic via `import spatialdata` is valid)
+import spatialdata
 
-# Nextflow-injected variables
-RAW_BUNDLE = "${raw_bundle}"
-REDEFINED_BUNDLE = "${redefined_bundle}"
-PREFIX = "${prefix}"
-OUTPUT_FOLDER = "${outputfolder}"
+
+def parse_args():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(description="Merge two spatialdata bundles")
+    parser.add_argument("--raw-bundle", required=True, help="Path to raw spatialdata bundle")
+    parser.add_argument("--redefined-bundle", required=True, help="Path to redefined spatialdata bundle")
+    parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)")
+    parser.add_argument("--output-folder", required=True, help="Output folder name")
+    return parser.parse_args()
 
 
 def main():
     """Run spatialdata merge."""
+    args = parse_args()
     print("[START]")
 
-    output_dir = f"spatialdata/{PREFIX}/{OUTPUT_FOLDER}"
+    output_dir = f"spatialdata/{args.prefix}/{args.output_folder}"
 
+    # Ensure the output folder exists
     if os.path.exists(output_dir):
         shutil.rmtree(output_dir)
     os.makedirs(output_dir)
 
-    for root, _, files in os.walk(RAW_BUNDLE):
-        rel_path = os.path.relpath(root, RAW_BUNDLE)
+    # Copy the entire reference bundle as is
+    for root, _, files in os.walk(args.raw_bundle):
+        rel_path = os.path.relpath(root, args.raw_bundle)
         target_path = os.path.join(output_dir, rel_path)
         os.makedirs(target_path, exist_ok=True)
         for file in files:
             shutil.copy(os.path.join(root, file), os.path.join(target_path, file))
 
+    # Rename folders in Points, Shapes, and Tables to raw_*
     for category in ["points", "shapes", "tables"]:
         category_path = os.path.join(output_dir, category)
         if os.path.exists(category_path):
@@ -40,8 +49,9 @@ def main():
                 new_path = os.path.join(category_path, f"raw_{folder}")
                 os.rename(old_path, new_path)
 
+    # Copy folders from redefined_bundle and rename them as redefined_*
     for category in ["points", "shapes", "tables"]:
-        add_category_path = os.path.join(REDEFINED_BUNDLE, category)
+        add_category_path = os.path.join(args.redefined_bundle, category)
         output_category_path = os.path.join(output_dir, category)
         os.makedirs(output_category_path, exist_ok=True)
 
@@ -52,16 +62,17 @@ def main():
                 shutil.copytree(src_folder, dest_folder)
 
     # Invalidate consolidated metadata in zarr.json -- the directory renames above
-    # made the element paths in the metadata stale. Without consolidated metadata,
-    # sd.read_zarr() discovers elements by scanning the filesystem directly.
+    # made the element paths in the metadata stale (e.g., 'points/transcripts' ->
+    # 'points/raw_transcripts'). Without consolidated metadata, sd.read_zarr()
+    # discovers elements by scanning the filesystem directly.
     zarr_json = os.path.join(output_dir, "zarr.json")
     if os.path.exists(zarr_json):
         with open(zarr_json) as f:
-            meta_obj = json.load(f)
-        if "consolidated_metadata" in meta_obj:
-            del meta_obj["consolidated_metadata"]
+            meta = json.load(f)
+        if "consolidated_metadata" in meta:
+            del meta["consolidated_metadata"]
             with open(zarr_json, "w") as f:
-                json.dump(meta_obj, f)
+                json.dump(meta, f)
             print("[NOTE] Removed stale consolidated metadata from zarr.json")
 
     print("[FINISH]")
diff --git a/modules/local/spatialdata/meta/main.nf b/modules/local/spatialdata/meta/main.nf
index c3665b1f..54d9ae25 100644
--- a/modules/local/spatialdata/meta/main.nf
+++ b/modules/local/spatialdata/meta/main.nf
@@ -25,7 +25,14 @@ process SPATIALDATA_META {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    template 'spatialdata_meta.py'
+    """
+    python3 ${moduleDir}/templates/spatialdata_meta.py \\
+        --spatialdata-bundle ${spatialdata_bundle} \\
+        --xenium-bundle ${xenium_bundle} \\
+        --prefix ${prefix} \\
+        --metadata '${meta}' \\
+        --output-folder ${outputfolder}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/spatialdata/meta/templates/spatialdata_meta.py b/modules/local/spatialdata/meta/templates/spatialdata_meta.py
index e7f4d3d2..935f39b2 100644
--- a/modules/local/spatialdata/meta/templates/spatialdata_meta.py
+++ b/modules/local/spatialdata/meta/templates/spatialdata_meta.py
@@ -1,12 +1,13 @@
 #!/usr/bin/env python3
 """Add metadata to SpatialData bundle."""
 
+import argparse
 import json
 import sys
 
 import pandas as pd
 import spatialdata as sd
-import zarr  # noqa: F401  (preserved from original; ensures zarr is loaded before zarr.core.group)
+import zarr
 
 # Fix zarr v3 + anndata + numcodecs incompatibility:
 # anndata's string writer passes numcodecs.VLenUTF8 to zarr.Group.create_array,
@@ -16,13 +17,6 @@
 import numcodecs
 import zarr.core.group as _zarr_group
 
-# Nextflow-injected variables
-SPATIALDATA_BUNDLE = "${spatialdata_bundle}"
-XENIUM_BUNDLE = "${xenium_bundle}"
-PREFIX = "${prefix}"
-METADATA = "${meta}"
-OUTPUT_FOLDER = "${outputfolder}"
-
 _orig_create_array = _zarr_group.Group.create_array
 
 
@@ -76,15 +70,27 @@ def convert_arrow_to_numpy(sdata):
         _convert_df_arrow_to_numpy(adata.var)
 
 
+def parse_args():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(description="Add metadata to SpatialData bundle")
+    parser.add_argument("--spatialdata-bundle", required=True, help="Path to spatialdata bundle")
+    parser.add_argument("--xenium-bundle", required=True, help="Path to xenium bundle")
+    parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)")
+    parser.add_argument("--metadata", required=True, help="Metadata string from Nextflow meta map")
+    parser.add_argument("--output-folder", required=True, help="Output folder name")
+    return parser.parse_args()
+
+
 def main():
     """Run spatialdata metadata addition."""
+    args = parse_args()
     print("[START]")
 
-    sdata = sd.read_zarr(SPATIALDATA_BUNDLE)
+    sdata = sd.read_zarr(args.spatialdata_bundle)
 
     # Convert metadata into dict
     print("[NOTE] Read in provenance ...")
-    metadata = METADATA.strip("[]")  # Remove square brackets
+    metadata = args.metadata.strip("[]")  # Remove square brackets
     pairs = metadata.split(", ")  # Split by comma and space
     metadata = {k: v for k, v in (pair.split(":") for pair in pairs)}  # Create dictionary
 
@@ -97,7 +103,7 @@ def main():
     # Add experimental metadata
     print("[NOTE] Read in experiment metadata ...")
     sdata['raw_table'].uns['experiment_xenium'] = ''
-    metadata_experiment = f'{XENIUM_BUNDLE}/experiment.xenium'
+    metadata_experiment = f'{args.xenium_bundle}/experiment.xenium'
     with open(metadata_experiment, "r") as f:
         metadata_experiment = json.load(f)
         sdata['raw_table'].uns['experiment_xenium'] = json.dumps(metadata_experiment)
@@ -105,13 +111,13 @@ def main():
     # Add gene panel metadata
     print("[NOTE] Read in gene panel metadata ...")
     sdata['raw_table'].uns['gene_panel'] = ''
-    metadata_gene_panel = f'{XENIUM_BUNDLE}/gene_panel.json'
+    metadata_gene_panel = f'{args.xenium_bundle}/gene_panel.json'
     with open(metadata_gene_panel, "r") as f:
         metadata_gene_panel = json.load(f)
         sdata['raw_table'].uns['gene_panel'] = json.dumps(metadata_gene_panel)
 
     convert_arrow_to_numpy(sdata)
-    sdata.write(f"spatialdata/{PREFIX}/{OUTPUT_FOLDER}", overwrite=True, consolidate_metadata=True, sdata_formats=None)
+    sdata.write(f"spatialdata/{args.prefix}/{args.output_folder}", overwrite=True, consolidate_metadata=True, sdata_formats=None)
 
     print("[FINISH]")
 
diff --git a/modules/local/spatialdata/write/main.nf b/modules/local/spatialdata/write/main.nf
index 1144b7c9..050dcdd7 100644
--- a/modules/local/spatialdata/write/main.nf
+++ b/modules/local/spatialdata/write/main.nf
@@ -27,7 +27,15 @@ process SPATIALDATA_WRITE {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    template 'spatialdata_write.py'
+    """
+    python3 ${moduleDir}/templates/spatialdata_write.py \\
+        --bundle ${bundle} \\
+        --prefix ${prefix} \\
+        --output-folder ${outputfolder} \\
+        --segmented-object ${segmented_object} \\
+        --coordinate-space ${coordinate_space} \\
+        --format ${params.format}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/spatialdata/write/templates/spatialdata_write.py b/modules/local/spatialdata/write/templates/spatialdata_write.py
index 9428dccd..421e830f 100644
--- a/modules/local/spatialdata/write/templates/spatialdata_write.py
+++ b/modules/local/spatialdata/write/templates/spatialdata_write.py
@@ -1,10 +1,11 @@
 #!/usr/bin/env python3
 """Write spatialdata object from segmentation format."""
 
+import argparse
 import sys
 
 import pandas as pd
-import spatialdata  # noqa: F401  (preserved from original; ensures spatialdata loads before spatialdata_io)
+import spatialdata
 from spatialdata_io import xenium
 
 # Fix zarr v3 + anndata + numcodecs incompatibility:
@@ -15,14 +16,6 @@
 import numcodecs
 import zarr.core.group as _zarr_group
 
-# Nextflow-injected variables
-BUNDLE = "${bundle}"
-PREFIX = "${prefix}"
-OUTPUT_FOLDER = "${outputfolder}"
-SEGMENTED_OBJECT = "${segmented_object}"
-COORDINATE_SPACE = "${coordinate_space}"
-FORMAT = "${params.format}"
-
 _orig_create_array = _zarr_group.Group.create_array
 
 
@@ -84,8 +77,21 @@ def convert_arrow_to_numpy(sdata):
         _convert_df_arrow_to_numpy(adata.var)
 
 
+def parse_args():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(description="Write spatialdata object from segmentation format")
+    parser.add_argument("--bundle", required=True, help="Path to input bundle")
+    parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)")
+    parser.add_argument("--output-folder", required=True, help="Output folder name")
+    parser.add_argument("--segmented-object", required=True, help="Segmented object type (cells, nuclei, cells_and_nuclei)")
+    parser.add_argument("--coordinate-space", required=True, help="Coordinate space (pixels, microns)")
+    parser.add_argument("--format", required=True, help="Input format (xenium)")
+    return parser.parse_args()
+
+
 def main():
     """Run spatialdata write."""
+    args = parse_args()
     print("[START]")
 
     cells_as_circles = False
@@ -94,13 +100,13 @@ def main():
     cells_labels = False
     nucleus_labels = False
 
-    if SEGMENTED_OBJECT == "cells":
+    if args.segmented_object == "cells":
         cells_boundaries = True
         cells_labels = True
-    elif SEGMENTED_OBJECT == "nuclei":
+    elif args.segmented_object == "nuclei":
         nucleus_boundaries = True
         nucleus_labels = True
-    elif SEGMENTED_OBJECT == "cells_and_nuclei":
+    elif args.segmented_object == "cells_and_nuclei":
         cells_boundaries = True
         nucleus_boundaries = True
         cells_labels = True
@@ -109,7 +115,7 @@ def main():
         cells_as_circles = False
 
     # set sd variables based on the coordinate space
-    if COORDINATE_SPACE == "pixels":
+    if args.coordinate_space == "pixels":
         cells_labels = True
         nucleus_labels = True
         # Labels are sufficient in pixel space; boundaries can contain
@@ -118,16 +124,16 @@ def main():
         cells_boundaries = False
         nucleus_boundaries = False
 
-    if COORDINATE_SPACE == "microns":
+    if args.coordinate_space == "microns":
         cells_labels = False
         cells_boundaries = True
         nucleus_boundaries = False
         nucleus_labels = False
         cells_as_circles = False
 
-    if FORMAT == "xenium":
+    if args.format == "xenium":
         sd_xenium_obj = xenium(
-            BUNDLE,
+            args.bundle,
             cells_as_circles=cells_as_circles,
             cells_boundaries=cells_boundaries,
             nucleus_boundaries=nucleus_boundaries,
@@ -139,7 +145,7 @@ def main():
         )
         print(sd_xenium_obj)
         convert_arrow_to_numpy(sd_xenium_obj)
-        sd_xenium_obj.write(f"spatialdata/{PREFIX}/{OUTPUT_FOLDER}")
+        sd_xenium_obj.write(f"spatialdata/{args.prefix}/{args.output_folder}")
     else:
         sys.exit("[ERROR] Format not found")
 
diff --git a/modules/local/utility/convert_mask_uint32/main.nf b/modules/local/utility/convert_mask_uint32/main.nf
index b80acfad..78190dfe 100644
--- a/modules/local/utility/convert_mask_uint32/main.nf
+++ b/modules/local/utility/convert_mask_uint32/main.nf
@@ -34,8 +34,11 @@ process CONVERT_MASK_UINT32 {
 
     script:
     prefix = task.ext.prefix ?: "${meta.id}"
-
-    template 'convert_mask_uint32.py'
+    """
+    python3 ${moduleDir}/templates/convert_mask_uint32.py \\
+        --input ${mask} \\
+        --output ${prefix}_uint32_mask.tif
+    """
 
     stub:
     prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/utility/convert_mask_uint32/templates/convert_mask_uint32.py b/modules/local/utility/convert_mask_uint32/templates/convert_mask_uint32.py
index f0a6835d..955ad4b7 100644
--- a/modules/local/utility/convert_mask_uint32/templates/convert_mask_uint32.py
+++ b/modules/local/utility/convert_mask_uint32/templates/convert_mask_uint32.py
@@ -7,13 +7,11 @@
 the input mask, casts it to uint32, and writes the result.
 """
 
+import argparse
+
 import numpy as np
 import tifffile
 
-# Nextflow-injected variables
-INPUT_PATH = "${mask}"
-OUTPUT_PATH = "${prefix}_uint32_mask.tif"
-
 
 def convert_mask_to_uint32(input_path: str, output_path: str) -> None:
     """
@@ -29,5 +27,20 @@ def convert_mask_to_uint32(input_path: str, output_path: str) -> None:
     print("Output dtype: uint32")
 
 
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Convert a segmentation mask TIFF to uint32 dtype."
+    )
+    parser.add_argument(
+        "--input", required=True, help="Path to input mask TIFF"
+    )
+    parser.add_argument(
+        "--output", required=True, help="Path where uint32 mask will be written"
+    )
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
-    convert_mask_to_uint32(input_path=INPUT_PATH, output_path=OUTPUT_PATH)
+    args = parse_args()
+    convert_mask_to_uint32(input_path=args.input, output_path=args.output)
diff --git a/modules/local/utility/downscale_morphology/main.nf b/modules/local/utility/downscale_morphology/main.nf
index 39620bd6..1423106f 100644
--- a/modules/local/utility/downscale_morphology/main.nf
+++ b/modules/local/utility/downscale_morphology/main.nf
@@ -40,8 +40,13 @@ process DOWNSCALE_MORPHOLOGY {
     def diameter = task.ext.diameter ?: 9
     def diam_mean = 30
     prefix = task.ext.prefix ?: "${meta.id}"
-
-    template 'downscale_morphology.py'
+    """
+    python3 ${moduleDir}/templates/downscale_morphology.py \\
+        --image ${image} \\
+        --diameter ${diameter} \\
+        --diam-mean ${diam_mean} \\
+        --prefix ${prefix}
+    """
 
     stub:
     prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/utility/downscale_morphology/templates/downscale_morphology.py b/modules/local/utility/downscale_morphology/templates/downscale_morphology.py
index 2f0f1e7d..8544ecf3 100644
--- a/modules/local/utility/downscale_morphology/templates/downscale_morphology.py
+++ b/modules/local/utility/downscale_morphology/templates/downscale_morphology.py
@@ -13,18 +13,13 @@
     {prefix}/scale_info.json - Scale factor and original/new dimensions.
 """
 
+import argparse
 import json
 from pathlib import Path
 
 import tifffile
 from skimage.transform import resize
 
-# Nextflow-injected variables
-IMAGE_PATH = "${image}"
-DIAMETER = float("${diameter}")
-DIAM_MEAN = float("${diam_mean}")
-PREFIX = "${prefix}"
-
 # Cellpose network requires a minimum spatial size of 256 px.
 MIN_DIM = 256
 
@@ -86,10 +81,23 @@ def downscale_image(
     print(f"Done: downscaled.tif written, shape={img_ds.shape}")
 
 
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Pre-downscale a morphology image for Cellpose."
+    )
+    parser.add_argument("--image", required=True, help="Morphology TIFF input")
+    parser.add_argument("--diameter", type=float, required=True, help="Target object diameter")
+    parser.add_argument("--diam-mean", type=float, required=True, help="Cellpose model diam_mean")
+    parser.add_argument("--prefix", required=True, help="Output directory")
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
+    args = parse_args()
     downscale_image(
-        image_path=IMAGE_PATH,
-        diameter=DIAMETER,
-        diam_mean=DIAM_MEAN,
-        prefix=PREFIX,
+        image_path=args.image,
+        diameter=args.diameter,
+        diam_mean=args.diam_mean,
+        prefix=args.prefix,
     )
diff --git a/modules/local/utility/extract_dapi/main.nf b/modules/local/utility/extract_dapi/main.nf
index 206e6519..3ff3ae78 100644
--- a/modules/local/utility/extract_dapi/main.nf
+++ b/modules/local/utility/extract_dapi/main.nf
@@ -35,8 +35,12 @@ process EXTRACT_DAPI {
     script:
     prefix = task.ext.prefix ?: "${meta.id}"
     def channel_index = task.ext.channel_index ?: 0
-
-    template 'extract_dapi.py'
+    """
+    python3 ${moduleDir}/templates/extract_dapi.py \\
+        --input ${image} \\
+        --output ${prefix}_dapi.tif \\
+        --channel-index ${channel_index}
+    """
 
     stub:
     prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/utility/extract_dapi/templates/extract_dapi.py b/modules/local/utility/extract_dapi/templates/extract_dapi.py
index f0d9b9f2..3d60f563 100644
--- a/modules/local/utility/extract_dapi/templates/extract_dapi.py
+++ b/modules/local/utility/extract_dapi/templates/extract_dapi.py
@@ -8,12 +8,9 @@
 the requested channel, and writes the result.
 """
 
-import tifffile
+import argparse
 
-# Nextflow-injected variables
-INPUT_PATH = "${image}"
-OUTPUT_PATH = "${prefix}_dapi.tif"
-CHANNEL_INDEX = int("${channel_index}")
+import tifffile
 
 
 def extract_channel(input_path: str, output_path: str, channel_index: int) -> None:
@@ -37,9 +34,27 @@ def extract_channel(input_path: str, output_path: str, channel_index: int) -> No
     print(f"Input shape: {orig_shape} -> extracted channel {channel_index}: {img.shape}")
 
 
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Extract a single channel from a multi-channel OME-TIFF."
+    )
+    parser.add_argument(
+        "--input", required=True, help="Path to multi-channel OME-TIFF morphology image"
+    )
+    parser.add_argument(
+        "--output", required=True, help="Path where the single-channel TIFF will be written"
+    )
+    parser.add_argument(
+        "--channel-index", type=int, default=0, help="Channel index to extract (default: 0)"
+    )
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
+    args = parse_args()
     extract_channel(
-        input_path=INPUT_PATH,
-        output_path=OUTPUT_PATH,
-        channel_index=CHANNEL_INDEX,
+        input_path=args.input,
+        output_path=args.output,
+        channel_index=args.channel_index,
     )
diff --git a/modules/local/utility/extract_preview_data/main.nf b/modules/local/utility/extract_preview_data/main.nf
index 34d997fb..c0ae5117 100644
--- a/modules/local/utility/extract_preview_data/main.nf
+++ b/modules/local/utility/extract_preview_data/main.nf
@@ -25,7 +25,11 @@ process EXTRACT_PREVIEW_DATA {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    template 'extract_data.py'
+    """
+    python3 ${moduleDir}/templates/extract_data.py \\
+        --preview-html ${preview_html} \\
+        --prefix ${prefix}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/extract_preview_data/templates/extract_data.py b/modules/local/utility/extract_preview_data/templates/extract_data.py
index d34fedaf..0ea737c2 100644
--- a/modules/local/utility/extract_preview_data/templates/extract_data.py
+++ b/modules/local/utility/extract_preview_data/templates/extract_data.py
@@ -6,6 +6,7 @@
 Baysor preview.html file, writing MultiQC-compatible TSV and PNG files.
 """
 
+import argparse
 import base64
 import html
 import json
@@ -17,10 +18,6 @@
 import pandas as pd
 from bs4 import BeautifulSoup
 
-# Nextflow-injected variables
-PREVIEW_HTML = "${preview_html}"
-PREFIX = "${prefix}"
-
 
 def get_png_files(soup: BeautifulSoup, outdir: Path) -> None:
     """Get png base64 images following specific h1 tags in preview.html"""
@@ -163,9 +160,29 @@ def write_tsvs(specs: Dict[str, str], outdir: Path) -> List[Path]:
     return written
 
 
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Extract preview data from Baysor preview HTML reports."
+    )
+    parser.add_argument(
+        "--preview-html",
+        required=True,
+        help="Path to Baysor preview HTML file",
+    )
+    parser.add_argument(
+        "--prefix",
+        required=True,
+        help="Output directory prefix (sample ID)",
+    )
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
-    input_path: Path = Path(PREVIEW_HTML)
-    outdir: Path = Path(PREFIX)
+    args = parse_args()
+
+    input_path: Path = Path(args.preview_html)
+    outdir: Path = Path(args.prefix)
 
     text = input_path.read_text(encoding="utf-8", errors="ignore")
     soup = BeautifulSoup(text, "html.parser")
diff --git a/modules/local/utility/get_coordinates/main.nf b/modules/local/utility/get_coordinates/main.nf
index f6b70571..e45d4798 100644
--- a/modules/local/utility/get_coordinates/main.nf
+++ b/modules/local/utility/get_coordinates/main.nf
@@ -24,7 +24,10 @@ process GET_TRANSCRIPTS_COORDINATES {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    template 'get_coordinates.py'
+    """
+    python3 ${moduleDir}/templates/get_coordinates.py \\
+        --transcripts ${transcripts}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/get_coordinates/templates/get_coordinates.py b/modules/local/utility/get_coordinates/templates/get_coordinates.py
index 05c01567..8770f9a2 100644
--- a/modules/local/utility/get_coordinates/templates/get_coordinates.py
+++ b/modules/local/utility/get_coordinates/templates/get_coordinates.py
@@ -6,10 +6,9 @@
 bounding box (x_min, x_max, y_min, y_max) to stdout.
 """
 
-import pandas as pd
+import argparse
 
-# Nextflow-injected variables
-TRANSCRIPTS = "${transcripts}"
+import pandas as pd
 
 
 def get_coordinates(parquet_path: str):
@@ -42,6 +41,20 @@ def get_coordinates(parquet_path: str):
     )
 
 
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Get transcript coordinate bounds from a Parquet file."
+    )
+    parser.add_argument(
+        "--transcripts",
+        required=True,
+        help="Path to transcripts parquet file",
+    )
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
-    result = get_coordinates(TRANSCRIPTS)
+    args = parse_args()
+    result = get_coordinates(args.transcripts)
     print(",".join(str(v) for v in result))
diff --git a/modules/local/utility/parquet_to_csv/main.nf b/modules/local/utility/parquet_to_csv/main.nf
index 6df3da37..65d8e580 100644
--- a/modules/local/utility/parquet_to_csv/main.nf
+++ b/modules/local/utility/parquet_to_csv/main.nf
@@ -24,7 +24,12 @@ process PARQUET_TO_CSV {
     }
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    template 'parquet_to_csv.py'
+    """
+    python3 ${moduleDir}/templates/parquet_to_csv.py \\
+        --transcripts ${transcripts} \\
+        --extension ${extension} \\
+        --prefix ${prefix}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/parquet_to_csv/templates/parquet_to_csv.py b/modules/local/utility/parquet_to_csv/templates/parquet_to_csv.py
index cb1d8107..bfa19c40 100644
--- a/modules/local/utility/parquet_to_csv/templates/parquet_to_csv.py
+++ b/modules/local/utility/parquet_to_csv/templates/parquet_to_csv.py
@@ -5,15 +5,11 @@
 Reads a Parquet file and writes it as CSV, optionally gzip-compressed.
 """
 
+import argparse
 from pathlib import Path
 
 import pandas as pd
 
-# Nextflow-injected variables
-TRANSCRIPTS = "${transcripts}"
-EXTENSION = "${extension}"
-PREFIX = "${prefix}"
-
 
 def convert_parquet(
     transcripts: str,
@@ -42,9 +38,33 @@ def convert_parquet(
     return None
 
 
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Convert a Parquet file to CSV format."
+    )
+    parser.add_argument(
+        "--transcripts",
+        required=True,
+        help="Input parquet filename",
+    )
+    parser.add_argument(
+        "--extension",
+        default=".csv",
+        help="Output extension: '.csv' or '.gz' (default: .csv)",
+    )
+    parser.add_argument(
+        "--prefix",
+        required=True,
+        help="Output directory prefix (sample ID)",
+    )
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
+    args = parse_args()
     convert_parquet(
-        transcripts=TRANSCRIPTS,
-        extension=EXTENSION,
-        prefix=PREFIX,
+        transcripts=args.transcripts,
+        extension=args.extension,
+        prefix=args.prefix,
     )
diff --git a/modules/local/utility/resize_tif/main.nf b/modules/local/utility/resize_tif/main.nf
index 965d652e..a4989d68 100644
--- a/modules/local/utility/resize_tif/main.nf
+++ b/modules/local/utility/resize_tif/main.nf
@@ -25,7 +25,14 @@ process RESIZE_TIF {
 
     prefix = task.ext.prefix ?: "${meta.id}"
 
-    template 'resize_tif.py'
+    """
+    python3 ${moduleDir}/templates/resize_tif.py \\
+        --mask ${mask} \\
+        --transcripts ${transcripts} \\
+        --metadata ${metadata} \\
+        --prefix ${prefix} \\
+        --mask-filename ${mask}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/resize_tif/templates/resize_tif.py b/modules/local/utility/resize_tif/templates/resize_tif.py
index 5674a094..6cca640d 100644
--- a/modules/local/utility/resize_tif/templates/resize_tif.py
+++ b/modules/local/utility/resize_tif/templates/resize_tif.py
@@ -6,6 +6,7 @@
 space of Xenium transcript data using microns-per-pixel metadata.
 """
 
+import argparse
 import json
 import os
 from typing import Tuple
@@ -15,13 +16,6 @@
 import tifffile
 from skimage.transform import resize
 
-# Nextflow-injected variables
-MASK = "${mask}"
-TRANSCRIPTS = "${transcripts}"
-METADATA = "${metadata}"
-PREFIX = "${prefix}"
-MASK_FILENAME = "${mask}"
-
 
 def read_mask(mask_path: str) -> np.ndarray:
     """Read the segmentation mask from a TIFF file."""
@@ -113,13 +107,28 @@ def main(mask_path: str, transcripts_path: str, metadata_path: str, output_path:
     print(f"Saved resized mask -> {output_path}")
 
 
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Resize a segmentation TIFF mask to match transcript coordinates."
+    )
+    parser.add_argument("--mask", required=True, help="Path to segmentation mask TIFF")
+    parser.add_argument("--transcripts", required=True, help="Path to transcripts file")
+    parser.add_argument("--metadata", required=True, help="Path to metadata JSON")
+    parser.add_argument("--prefix", required=True, help="Output directory prefix")
+    parser.add_argument("--mask-filename", required=True, help="Original mask filename for output naming")
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
-    os.makedirs(PREFIX, exist_ok=True)
-    output_mask: str = os.path.join(PREFIX, f"resized_{MASK_FILENAME}.tif")
+    args = parse_args()
+
+    os.makedirs(args.prefix, exist_ok=True)
+    output_mask: str = os.path.join(args.prefix, f"resized_{args.mask_filename}.tif")
 
     main(
-        mask_path=MASK,
-        transcripts_path=TRANSCRIPTS,
-        metadata_path=METADATA,
+        mask_path=args.mask,
+        transcripts_path=args.transcripts,
+        metadata_path=args.metadata,
         output_path=output_mask,
     )
diff --git a/modules/local/utility/segger2xr/main.nf b/modules/local/utility/segger2xr/main.nf
index b562eed4..daec2fba 100644
--- a/modules/local/utility/segger2xr/main.nf
+++ b/modules/local/utility/segger2xr/main.nf
@@ -24,9 +24,14 @@ process SEGGER2XR {
         error "SEGGER2XR module does not support Conda. Please use Docker / Singularity / Podman instead."
     }
 
-    min_transcripts = task.ext.min_transcripts_per_cell ?: 3
+    def min_transcripts = task.ext.min_transcripts_per_cell ?: 3
 
-    template 'segger2xr.py'
+    """
+    python3 ${moduleDir}/templates/segger2xr.py \\
+        --transcripts ${transcripts} \\
+        --prefix ${meta.id} \\
+        --min-transcripts ${min_transcripts}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/segger2xr/templates/segger2xr.py b/modules/local/utility/segger2xr/templates/segger2xr.py
index 96f2d8df..22889e82 100644
--- a/modules/local/utility/segger2xr/templates/segger2xr.py
+++ b/modules/local/utility/segger2xr/templates/segger2xr.py
@@ -7,6 +7,7 @@
 and GeoJSON cell boundary polygons for xeniumranger import-segmentation.
 """
 
+import argparse
 import json
 from pathlib import Path
 from typing import List
@@ -14,11 +15,6 @@
 import pandas as pd
 from scipy.spatial import ConvexHull
 
-# Nextflow-injected variables
-TRANSCRIPTS = "${transcripts}"
-PREFIX = "${meta.id}"
-MIN_TRANSCRIPTS = int("${min_transcripts}")
-
 # Expected columns in transcripts.parquet
 REQUIRED_COLUMNS: List[str] = [
     "transcript_id",
@@ -218,9 +214,34 @@ def main(input_file: str, prefix: str, min_transcripts: int = 3) -> None:
     generate_viz_polygons(transcripts, f"{prefix}/segmentation_polygons.json", cell_map)
 
 
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Convert Segger prediction output to XeniumRanger-compatible format."
+    )
+    parser.add_argument(
+        "--transcripts",
+        required=True,
+        help="Path to Segger output transcripts parquet file",
+    )
+    parser.add_argument(
+        "--prefix",
+        required=True,
+        help="Output directory prefix (sample ID)",
+    )
+    parser.add_argument(
+        "--min-transcripts",
+        type=int,
+        default=3,
+        help="Minimum transcripts per cell (default: 3)",
+    )
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
+    args = parse_args()
     main(
-        input_file=TRANSCRIPTS,
-        prefix=PREFIX,
-        min_transcripts=MIN_TRANSCRIPTS,
+        input_file=args.transcripts,
+        prefix=args.prefix,
+        min_transcripts=args.min_transcripts,
     )
diff --git a/modules/local/utility/split_transcripts/main.nf b/modules/local/utility/split_transcripts/main.nf
index 3cc54042..6862643a 100644
--- a/modules/local/utility/split_transcripts/main.nf
+++ b/modules/local/utility/split_transcripts/main.nf
@@ -23,9 +23,15 @@ process SPLIT_TRANSCRIPTS {
     if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
         error "SPLIT_TRANSCRIPTS module does not support Conda. Please use Docker / Singularity / Podman instead."
     }
-    prefix = task.ext.prefix ?: "${meta.id}"
+    def prefix = task.ext.prefix ?: "${meta.id}"
 
-    template 'split_transcripts.py'
+    """
+    python3 ${moduleDir}/templates/split_transcripts.py \\
+        --transcripts ${transcripts} \\
+        --x-bins ${x_bins} \\
+        --y-bins ${y_bins} \\
+        --prefix ${prefix}
+    """
 
     stub:
     // Exit if running this module with -profile conda / -profile mamba
diff --git a/modules/local/utility/split_transcripts/templates/split_transcripts.py b/modules/local/utility/split_transcripts/templates/split_transcripts.py
index 1fa6db15..275fbab1 100644
--- a/modules/local/utility/split_transcripts/templates/split_transcripts.py
+++ b/modules/local/utility/split_transcripts/templates/split_transcripts.py
@@ -6,17 +6,12 @@
 tiles, writing a splits.csv with tile boundaries.
 """
 
+import argparse
 import os
 from typing import List
 
 import pandas as pd
 
-# Nextflow-injected variables
-TRANSCRIPTS = "${transcripts}"
-X_BINS = "${x_bins}"
-Y_BINS = "${y_bins}"
-PREFIX = "${prefix}"
-
 
 def compute_quantile_ranges(df: pd.DataFrame, col: str, n_bins: int) -> List:
     """
@@ -74,10 +69,41 @@ def main(
     return None
 
 
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Split transcript coordinates into spatial tiles."
+    )
+    parser.add_argument(
+        "--transcripts",
+        required=True,
+        help="Path to transcripts parquet file",
+    )
+    parser.add_argument(
+        "--x-bins",
+        type=int,
+        required=True,
+        help="Number of bins along X axis",
+    )
+    parser.add_argument(
+        "--y-bins",
+        type=int,
+        required=True,
+        help="Number of bins along Y axis",
+    )
+    parser.add_argument(
+        "--prefix",
+        required=True,
+        help="Output directory prefix",
+    )
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
+    args = parse_args()
     main(
-        transcripts=TRANSCRIPTS,
-        x_bins=int(X_BINS),
-        y_bins=int(Y_BINS),
-        prefix=PREFIX,
+        transcripts=args.transcripts,
+        x_bins=args.x_bins,
+        y_bins=args.y_bins,
+        prefix=args.prefix,
     )
diff --git a/modules/local/utility/upscale_mask/main.nf b/modules/local/utility/upscale_mask/main.nf
index 41b3ff91..f03464a8 100644
--- a/modules/local/utility/upscale_mask/main.nf
+++ b/modules/local/utility/upscale_mask/main.nf
@@ -34,8 +34,12 @@ process UPSCALE_MASK {
 
     script:
     prefix = task.ext.prefix ?: "${meta.id}"
-
-    template 'upscale_mask.py'
+    """
+    python3 ${moduleDir}/templates/upscale_mask.py \\
+        --mask ${mask} \\
+        --scale-info ${scale_info} \\
+        --prefix ${prefix}
+    """
 
     stub:
     prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/utility/upscale_mask/templates/upscale_mask.py b/modules/local/utility/upscale_mask/templates/upscale_mask.py
index 9c0ef0bb..6cc1694e 100644
--- a/modules/local/utility/upscale_mask/templates/upscale_mask.py
+++ b/modules/local/utility/upscale_mask/templates/upscale_mask.py
@@ -9,6 +9,7 @@
 Output: {prefix}/upscaled_{mask_basename}.tif
 """
 
+import argparse
 import json
 from pathlib import Path
 
@@ -16,11 +17,6 @@
 import tifffile
 from PIL import Image
 
-# Nextflow-injected variables
-MASK = "${mask}"
-SCALE_INFO = "${scale_info}"
-PREFIX = "${prefix}"
-
 
 def upscale_mask(mask_path: str, scale_info_path: str, prefix: str) -> None:
     """
@@ -56,9 +52,21 @@ def upscale_mask(mask_path: str, scale_info_path: str, prefix: str) -> None:
     )
 
 
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Upscale a Cellpose mask back to original resolution."
+    )
+    parser.add_argument("--mask", required=True, help="Downscaled mask TIFF")
+    parser.add_argument("--scale-info", required=True, help="scale_info.json from downscale step")
+    parser.add_argument("--prefix", required=True, help="Output directory")
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
+    args = parse_args()
     upscale_mask(
-        mask_path=MASK,
-        scale_info_path=SCALE_INFO,
-        prefix=PREFIX,
+        mask_path=args.mask,
+        scale_info_path=args.scale_info,
+        prefix=args.prefix,
     )
diff --git a/modules/local/xenium_patch/stitch/main.nf b/modules/local/xenium_patch/stitch/main.nf
index 118e77a7..db687a9a 100644
--- a/modules/local/xenium_patch/stitch/main.nf
+++ b/modules/local/xenium_patch/stitch/main.nf
@@ -34,8 +34,18 @@ process XENIUM_PATCH_STITCH {
 
     script:
     def args = task.ext.args ?: ''
+    """
+    python3 ${moduleDir}/templates/stitch_transcripts.py \\
+        --patches ${patches} \\
+        --output output \\
+        ${args}
 
-    template 'stitch.py'
+    # Post-process: ensure all GeoJSON geometries are Polygon and
+    # reconcile dropped cells in the transcript CSV.
+    python3 ${moduleDir}/templates/stitch_postprocess.py \\
+        --geojson output/xr-cell-polygons.geojson \\
+        --csv output/xr-transcript-metadata.csv
+    """
 
     stub:
     """
diff --git a/modules/local/xenium_patch/stitch/templates/stitch_postprocess.py b/modules/local/xenium_patch/stitch/templates/stitch_postprocess.py
new file mode 100644
index 00000000..7144b1ac
--- /dev/null
+++ b/modules/local/xenium_patch/stitch/templates/stitch_postprocess.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+"""
+Post-process stitched per-patch segmentation outputs.
+
+Ensures every GeoJSON feature is a single Polygon: make_valid() and
+sopa.solve_conflicts() can produce MultiPolygon, MultiLineString, or
+GeometryCollection geometries that XeniumRanger rejects. Cells dropped
+during cleanup are also reassigned to UNASSIGNED in the transcript CSV
+so the two outputs stay consistent.
+"""
+
+import argparse
+import csv
+import json
+
+import shapely
+from shapely.geometry import mapping, shape
+
+
+def clean_geojson(geojson_path: str) -> set:
+    """
+    Force every feature to a single valid Polygon.
+
+    Returns the set of cell ids whose features were dropped.
+    """
+    with open(geojson_path) as f:
+        data = json.load(f)
+
+    clean = []
+    dropped_cells = set()
+    for feat in data["features"]:
+        geom = shape(feat["geometry"])
+        if not geom.is_valid:
+            geom = shapely.make_valid(geom)
+        poly = None
+        if geom.geom_type == "Polygon":
+            poly = geom
+        elif geom.geom_type == "MultiPolygon":
+            poly = max(geom.geoms, key=lambda g: g.area)
+        elif geom.geom_type == "GeometryCollection":
+            polys = [g for g in geom.geoms if g.geom_type == "Polygon"]
+            if polys:
+                poly = max(polys, key=lambda g: g.area)
+        if poly is not None and not poly.is_empty:
+            feat["geometry"] = mapping(poly)
+            clean.append(feat)
+        else:
+            cell_id = feat.get("id") or feat.get("properties", {}).get("cell_id", "")
+            dropped_cells.add(str(cell_id))
+
+    print(f"GeoJSON: {len(clean)} kept, {len(dropped_cells)} dropped: {dropped_cells}")
+    data["features"] = clean
+    with open(geojson_path, "w") as f:
+        json.dump(data, f)
+
+    return dropped_cells
+
+
+def reassign_dropped(csv_path: str, dropped_cells: set) -> None:
+    """
+    Reassign transcripts of dropped cells to UNASSIGNED in the CSV.
+    """
+    if not dropped_cells:
+        return
+
+    with open(csv_path) as f:
+        reader = csv.DictReader(f)
+        fieldnames = reader.fieldnames
+        rows = list(reader)
+
+    reassigned = 0
+    for row in rows:
+        if row["cell"] in dropped_cells:
+            row["cell"] = ""
+            row["is_noise"] = "1"
+            reassigned += 1
+
+    with open(csv_path, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)
+    print(f"CSV: {reassigned} transcripts reassigned to UNASSIGNED")
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Clean stitched GeoJSON polygons and reconcile transcript CSV."
+    )
+    parser.add_argument("--geojson", required=True, help="Path to xr-cell-polygons.geojson")
+    parser.add_argument("--csv", required=True, help="Path to xr-transcript-metadata.csv")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    dropped = clean_geojson(args.geojson)
+    reassign_dropped(args.csv, dropped)
diff --git a/modules/local/xenium_patch/stitch/templates/stitch.py b/modules/local/xenium_patch/stitch/templates/stitch_transcripts.py
similarity index 85%
rename from modules/local/xenium_patch/stitch/templates/stitch.py
rename to modules/local/xenium_patch/stitch/templates/stitch_transcripts.py
index 2306a7dc..d9fb8d41 100644
--- a/modules/local/xenium_patch/stitch/templates/stitch.py
+++ b/modules/local/xenium_patch/stitch/templates/stitch_transcripts.py
@@ -1,25 +1,15 @@
 #!/usr/bin/env python3
-"""Stitch per-patch segmentation results, then post-process the GeoJSON + CSV.
-
-Phase 1 (stitch_transcripts):
-    Stitch per-patch Baysor segmentation results into unified output. Uses
-    sopa's solve_conflicts() for overlap resolution at patch boundaries.
-
-Phase 2 (postprocess):
-    Ensures every GeoJSON feature is a single Polygon: make_valid() and
-    sopa.solve_conflicts() can produce MultiPolygon, MultiLineString, or
-    GeometryCollection geometries that XeniumRanger rejects. Cells dropped
-    during cleanup are also reassigned to UNASSIGNED in the transcript CSV
-    so the two outputs stay consistent.
+"""Stitch per-patch Baysor segmentation results into unified output.
+
+Standalone script that replaces the xenium_patch CLI package's stitch
+functionality. Uses sopa's solve_conflicts() for overlap resolution.
 """
 
 from __future__ import annotations
 
 import argparse
-import csv
 import json
 import os
-import shlex
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
 from pathlib import Path
@@ -34,11 +24,6 @@
 from shapely.geometry import mapping, shape
 from sopa.segmentation.resolve import solve_conflicts
 
-# Nextflow-injected variables
-PATCHES = "${patches}"
-ARGS = "${args}"  # task.ext.args, may be empty
-
-
 # ---------------------------------------------------------------------------
 # Geometry helpers
 # ---------------------------------------------------------------------------
@@ -59,7 +44,7 @@ def _ensure_polygon(geom) -> "shapely.Polygon | None":
     if geom.geom_type == "GeometryCollection":
         polys = [g for g in geom.geoms if g.geom_type == "Polygon"]
         return max(polys, key=lambda g: g.area) if polys else None
-    # LineString, MultiLineString, Point, etc. -- not a polygon
+    # LineString, MultiLineString, Point, etc. — not a polygon
     return None
 
 
@@ -645,7 +630,7 @@ def _stitch_sopa_resolve(
 
 
 # ---------------------------------------------------------------------------
-# Main orchestrator (stitch phase)
+# Main orchestrator
 # ---------------------------------------------------------------------------
 
 
@@ -779,17 +764,11 @@ def stitch_transcript_assignments(
 
 
 # ---------------------------------------------------------------------------
-# Phase 1 entry point: stitch_transcripts (replaces stitch_transcripts.py main)
+# CLI
 # ---------------------------------------------------------------------------
 
 
-def stitch_transcripts(patches_dir: str, output_dir: str, args_str: str) -> None:
-    """Phase 1: parse extra CLI args and run the stitch orchestrator.
-
-    Mirrors the original ``stitch_transcripts.py`` argparse interface so that
-    any flags passed through ``task.ext.args`` are honored by the same
-    parser as before.
-    """
+def main() -> None:
     parser = argparse.ArgumentParser(
         description="Stitch per-patch Baysor segmentation results into unified output."
     )
@@ -815,106 +794,13 @@ def stitch_transcripts(patches_dir: str, output_dir: str, args_str: str) -> None
         default="segmentation_polygons.json",
         help="GeoJSON filename within each patch (default: segmentation_polygons.json)",
     )
-
-    argv = ["--patches", patches_dir, "--output", output_dir]
-    if args_str:
-        argv.extend(shlex.split(args_str))
-    parsed = parser.parse_args(argv)
+    args = parser.parse_args()
 
     stitch_transcript_assignments(
-        patches_dir=parsed.patches,
-        output_dir=parsed.output,
-        csv_filename=parsed.csv_filename,
-        geojson_filename=parsed.geojson_filename,
-    )
-
-
-# ---------------------------------------------------------------------------
-# Phase 2: post-processing (from stitch_postprocess.py)
-# ---------------------------------------------------------------------------
-
-
-def clean_geojson(geojson_path: str) -> set:
-    """
-    Force every feature to a single valid Polygon.
-
-    Returns the set of cell ids whose features were dropped.
-    """
-    with open(geojson_path) as f:
-        data = json.load(f)
-
-    clean = []
-    dropped_cells = set()
-    for feat in data["features"]:
-        geom = shape(feat["geometry"])
-        if not geom.is_valid:
-            geom = shapely.make_valid(geom)
-        poly = None
-        if geom.geom_type == "Polygon":
-            poly = geom
-        elif geom.geom_type == "MultiPolygon":
-            poly = max(geom.geoms, key=lambda g: g.area)
-        elif geom.geom_type == "GeometryCollection":
-            polys = [g for g in geom.geoms if g.geom_type == "Polygon"]
-            if polys:
-                poly = max(polys, key=lambda g: g.area)
-        if poly is not None and not poly.is_empty:
-            feat["geometry"] = mapping(poly)
-            clean.append(feat)
-        else:
-            cell_id = feat.get("id") or feat.get("properties", {}).get("cell_id", "")
-            dropped_cells.add(str(cell_id))
-
-    print(f"GeoJSON: {len(clean)} kept, {len(dropped_cells)} dropped: {dropped_cells}")
-    data["features"] = clean
-    with open(geojson_path, "w") as f:
-        json.dump(data, f)
-
-    return dropped_cells
-
-
-def reassign_dropped(csv_path: str, dropped_cells: set) -> None:
-    """
-    Reassign transcripts of dropped cells to UNASSIGNED in the CSV.
-    """
-    if not dropped_cells:
-        return
-
-    with open(csv_path) as f:
-        reader = csv.DictReader(f)
-        fieldnames = reader.fieldnames
-        rows = list(reader)
-
-    reassigned = 0
-    for row in rows:
-        if row["cell"] in dropped_cells:
-            row["cell"] = ""
-            row["is_noise"] = "1"
-            reassigned += 1
-
-    with open(csv_path, "w", newline="") as f:
-        writer = csv.DictWriter(f, fieldnames=fieldnames)
-        writer.writeheader()
-        writer.writerows(rows)
-    print(f"CSV: {reassigned} transcripts reassigned to UNASSIGNED")
-
-
-def postprocess(geojson_path: str, csv_path: str) -> None:
-    """Phase 2 entry point: cleanup polygons and reconcile the CSV."""
-    dropped = clean_geojson(geojson_path)
-    reassign_dropped(csv_path, dropped)
-
-
-# ---------------------------------------------------------------------------
-# Main: run both phases sequentially
-# ---------------------------------------------------------------------------
-
-
-def main() -> None:
-    stitch_transcripts(PATCHES, "output", ARGS)
-    postprocess(
-        "output/xr-cell-polygons.geojson",
-        "output/xr-transcript-metadata.csv",
+        patches_dir=args.patches,
+        output_dir=args.output,
+        csv_filename=args.csv_filename,
+        geojson_filename=args.geojson_filename,
     )
 
 

From 4edccc3650eb3839a8f85f62702c8d903d734a1f Mon Sep 17 00:00:00 2001
From: an-altosian <dhe@altoslabs.com>
Date: Thu, 7 May 2026 15:17:07 +0000
Subject: [PATCH 3/3] refactor(modules): move per-module Python scripts to
 pipeline-level bin/
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the templates/-directive approach (which broke under the
Nextflow constraint that `output: eval(...)` channels are bash-only,
incompatible with `template 'foo.py'` setting a Python interpreter via
shebang). Pipeline-level bin/ is the cleanest path: nf-core
auto-prepends bin/ to PATH for every process, scripts resolve by name,
and process scripts stay bash so eval() topic channels keep working.

Per-module changes (19 modules, 20 scripts):

- bin/<module_prefix>_<script>.py    — active copies, invoked by name
                                       (e.g. utility_extract_dapi.py).
                                       Renamed with module-path prefix
                                       to avoid collisions with the
                                       pre-existing bin/divide_transcripts.py
                                       and bin/stitch_transcripts.py
                                       (the augmented 848-line variant
                                       used by xenium_patch/divide and
                                       reconstruct_patches stays as-is).
- modules/local/<mod>/main.nf       — script body now invokes the
                                       renamed bin/ name directly:
                                       `utility_extract_dapi.py --input ...`.
                                       The shell prelude in
                                       segger/create_dataset
                                       (NUMBA_CACHE_DIR setup) is
                                       preserved verbatim. The two-step
                                       script body of xenium_patch/stitch
                                       (stitch_transcripts.py +
                                       stitch_postprocess.py) is preserved.
- modules/local/<mod>/resources/usr/bin/<script>.py
                                     — restored verbatim from origin/dev
                                       for the 15 modules that had a
                                       module-level script there before
                                       PR #154. Orphan reference copies,
                                       NEVER invoked at runtime — kept
                                       so reviewers can compare the
                                       per-module original against the
                                       pipeline-bin active copy.
- modules/local/<mod>/templates/    — removed entirely (20 files
                                       across 19 dirs). The
                                       `template 'foo.py'` directive
                                       was incompatible with eval()
                                       output channels; the shell-call
                                       wrapper to ${moduleDir}/templates
                                       didn't survive containerization
                                       cleanly per awgymer's review.

Modules without a pre-existing resources/usr/bin/ script on origin/dev
(utility/convert_mask_uint32, utility/downscale_morphology,
utility/extract_dapi, utility/upscale_mask, xenium_patch/stitch's
stitch_postprocess.py) get only the active bin/ copy — no orphan.

Net effect:
- nf-core lint passes (no module-level bin/ invoked at runtime;
  resources/usr/bin/ files are not on PATH).
- All 19 modules' eval-version channels keep emitting Python/library
  versions because the process script remains Bash.
- Reviewers can diff bin/<prefix>_<script>.py against
  modules/local/<mod>/resources/usr/bin/<script>.py to verify the active
  copy matches the dev original (modulo any in-template fixes from PR #154
  itself).
---
 .../baysor_create_dataset.py                  |   0
 .../baysor_preprocess_transcripts.py          |   0
 .../templates => bin}/ficture_preprocess.py   |   0
 .../segger_create_dataset.py                  |   0
 .../run_predict.py => bin/segger_predict.py   |   0
 .../templates => bin}/spatialdata_merge.py    |   0
 .../templates => bin}/spatialdata_meta.py     |   0
 .../templates => bin}/spatialdata_write.py    |   0
 .../utility_convert_mask_uint32.py            |   0
 .../utility_downscale_morphology.py           |   0
 .../utility_extract_dapi.py                   |   0
 .../utility_extract_data.py                   |   0
 .../utility_get_coordinates.py                |   0
 .../utility_parquet_to_csv.py                 |   0
 .../utility_resize_tif.py                     |   0
 .../segger2xr.py => bin/utility_segger2xr.py  |   0
 .../utility_split_transcripts.py              |   0
 .../utility_upscale_mask.py                   |   0
 .../xenium_patch_stitch_postprocess.py        |   0
 .../xenium_patch_stitch_transcripts.py        |   0
 modules/local/baysor/create_dataset/main.nf   |   2 +-
 .../resources/usr/bin/create_dataset.py       |  96 +++
 modules/local/baysor/preprocess/main.nf       |   2 +-
 .../usr/bin/preprocess_transcripts.py         | 126 +++
 modules/local/ficture/preprocess/main.nf      |   2 +-
 .../resources/usr/bin/ficture_preprocess.py   | 101 +++
 modules/local/segger/create_dataset/main.nf   |   2 +-
 .../resources/usr/bin/run_create_dataset.py   | 253 ++++++
 modules/local/segger/predict/main.nf          |   2 +-
 .../predict/resources/usr/bin/run_predict.py  | 137 +++
 modules/local/spatialdata/merge/main.nf       |   2 +-
 .../resources/usr/bin/spatialdata_merge.py    |  82 ++
 modules/local/spatialdata/meta/main.nf        |   2 +-
 .../resources/usr/bin/spatialdata_meta.py     | 126 +++
 modules/local/spatialdata/write/main.nf       |   2 +-
 .../resources/usr/bin/spatialdata_write.py    | 156 ++++
 .../local/utility/convert_mask_uint32/main.nf |   2 +-
 .../utility/downscale_morphology/main.nf      |   2 +-
 modules/local/utility/extract_dapi/main.nf    |   2 +-
 .../utility/extract_preview_data/main.nf      |   2 +-
 .../resources/usr/bin/extract_data.py         | 208 +++++
 modules/local/utility/get_coordinates/main.nf |   2 +-
 .../resources/usr/bin/get_coordinates.py      |  60 ++
 modules/local/utility/parquet_to_csv/main.nf  |   2 +-
 .../resources/usr/bin/parquet_to_csv.py       |  70 ++
 modules/local/utility/resize_tif/main.nf      |   2 +-
 .../resources/usr/bin/resize_tif.py           | 134 +++
 modules/local/utility/segger2xr/main.nf       |   2 +-
 .../segger2xr/resources/usr/bin/segger2xr.py  | 247 ++++++
 .../local/utility/split_transcripts/main.nf   |   2 +-
 .../resources/usr/bin/split_transcripts.py    | 109 +++
 modules/local/utility/upscale_mask/main.nf    |   2 +-
 modules/local/xenium_patch/stitch/main.nf     |   4 +-
 .../resources/usr/bin/stitch_transcripts.py   | 808 ++++++++++++++++++
 54 files changed, 2733 insertions(+), 20 deletions(-)
 rename modules/local/baysor/create_dataset/templates/create_dataset.py => bin/baysor_create_dataset.py (100%)
 mode change 100644 => 100755
 rename modules/local/baysor/preprocess/templates/preprocess_transcripts.py => bin/baysor_preprocess_transcripts.py (100%)
 mode change 100644 => 100755
 rename {modules/local/ficture/preprocess/templates => bin}/ficture_preprocess.py (100%)
 mode change 100644 => 100755
 rename modules/local/segger/create_dataset/templates/run_create_dataset.py => bin/segger_create_dataset.py (100%)
 mode change 100644 => 100755
 rename modules/local/segger/predict/templates/run_predict.py => bin/segger_predict.py (100%)
 mode change 100644 => 100755
 rename {modules/local/spatialdata/merge/templates => bin}/spatialdata_merge.py (100%)
 mode change 100644 => 100755
 rename {modules/local/spatialdata/meta/templates => bin}/spatialdata_meta.py (100%)
 mode change 100644 => 100755
 rename {modules/local/spatialdata/write/templates => bin}/spatialdata_write.py (100%)
 mode change 100644 => 100755
 rename modules/local/utility/convert_mask_uint32/templates/convert_mask_uint32.py => bin/utility_convert_mask_uint32.py (100%)
 mode change 100644 => 100755
 rename modules/local/utility/downscale_morphology/templates/downscale_morphology.py => bin/utility_downscale_morphology.py (100%)
 mode change 100644 => 100755
 rename modules/local/utility/extract_dapi/templates/extract_dapi.py => bin/utility_extract_dapi.py (100%)
 mode change 100644 => 100755
 rename modules/local/utility/extract_preview_data/templates/extract_data.py => bin/utility_extract_data.py (100%)
 mode change 100644 => 100755
 rename modules/local/utility/get_coordinates/templates/get_coordinates.py => bin/utility_get_coordinates.py (100%)
 mode change 100644 => 100755
 rename modules/local/utility/parquet_to_csv/templates/parquet_to_csv.py => bin/utility_parquet_to_csv.py (100%)
 mode change 100644 => 100755
 rename modules/local/utility/resize_tif/templates/resize_tif.py => bin/utility_resize_tif.py (100%)
 mode change 100644 => 100755
 rename modules/local/utility/segger2xr/templates/segger2xr.py => bin/utility_segger2xr.py (100%)
 mode change 100644 => 100755
 rename modules/local/utility/split_transcripts/templates/split_transcripts.py => bin/utility_split_transcripts.py (100%)
 mode change 100644 => 100755
 rename modules/local/utility/upscale_mask/templates/upscale_mask.py => bin/utility_upscale_mask.py (100%)
 mode change 100644 => 100755
 rename modules/local/xenium_patch/stitch/templates/stitch_postprocess.py => bin/xenium_patch_stitch_postprocess.py (100%)
 mode change 100644 => 100755
 rename modules/local/xenium_patch/stitch/templates/stitch_transcripts.py => bin/xenium_patch_stitch_transcripts.py (100%)
 mode change 100644 => 100755
 create mode 100755 modules/local/baysor/create_dataset/resources/usr/bin/create_dataset.py
 create mode 100755 modules/local/baysor/preprocess/resources/usr/bin/preprocess_transcripts.py
 create mode 100755 modules/local/ficture/preprocess/resources/usr/bin/ficture_preprocess.py
 create mode 100755 modules/local/segger/create_dataset/resources/usr/bin/run_create_dataset.py
 create mode 100755 modules/local/segger/predict/resources/usr/bin/run_predict.py
 create mode 100755 modules/local/spatialdata/merge/resources/usr/bin/spatialdata_merge.py
 create mode 100755 modules/local/spatialdata/meta/resources/usr/bin/spatialdata_meta.py
 create mode 100755 modules/local/spatialdata/write/resources/usr/bin/spatialdata_write.py
 create mode 100755 modules/local/utility/extract_preview_data/resources/usr/bin/extract_data.py
 create mode 100755 modules/local/utility/get_coordinates/resources/usr/bin/get_coordinates.py
 create mode 100755 modules/local/utility/parquet_to_csv/resources/usr/bin/parquet_to_csv.py
 create mode 100755 modules/local/utility/resize_tif/resources/usr/bin/resize_tif.py
 create mode 100755 modules/local/utility/segger2xr/resources/usr/bin/segger2xr.py
 create mode 100755 modules/local/utility/split_transcripts/resources/usr/bin/split_transcripts.py
 create mode 100755 modules/local/xenium_patch/stitch/resources/usr/bin/stitch_transcripts.py

diff --git a/modules/local/baysor/create_dataset/templates/create_dataset.py b/bin/baysor_create_dataset.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/baysor/create_dataset/templates/create_dataset.py
rename to bin/baysor_create_dataset.py
diff --git a/modules/local/baysor/preprocess/templates/preprocess_transcripts.py b/bin/baysor_preprocess_transcripts.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/baysor/preprocess/templates/preprocess_transcripts.py
rename to bin/baysor_preprocess_transcripts.py
diff --git a/modules/local/ficture/preprocess/templates/ficture_preprocess.py b/bin/ficture_preprocess.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/ficture/preprocess/templates/ficture_preprocess.py
rename to bin/ficture_preprocess.py
diff --git a/modules/local/segger/create_dataset/templates/run_create_dataset.py b/bin/segger_create_dataset.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/segger/create_dataset/templates/run_create_dataset.py
rename to bin/segger_create_dataset.py
diff --git a/modules/local/segger/predict/templates/run_predict.py b/bin/segger_predict.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/segger/predict/templates/run_predict.py
rename to bin/segger_predict.py
diff --git a/modules/local/spatialdata/merge/templates/spatialdata_merge.py b/bin/spatialdata_merge.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/spatialdata/merge/templates/spatialdata_merge.py
rename to bin/spatialdata_merge.py
diff --git a/modules/local/spatialdata/meta/templates/spatialdata_meta.py b/bin/spatialdata_meta.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/spatialdata/meta/templates/spatialdata_meta.py
rename to bin/spatialdata_meta.py
diff --git a/modules/local/spatialdata/write/templates/spatialdata_write.py b/bin/spatialdata_write.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/spatialdata/write/templates/spatialdata_write.py
rename to bin/spatialdata_write.py
diff --git a/modules/local/utility/convert_mask_uint32/templates/convert_mask_uint32.py b/bin/utility_convert_mask_uint32.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/utility/convert_mask_uint32/templates/convert_mask_uint32.py
rename to bin/utility_convert_mask_uint32.py
diff --git a/modules/local/utility/downscale_morphology/templates/downscale_morphology.py b/bin/utility_downscale_morphology.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/utility/downscale_morphology/templates/downscale_morphology.py
rename to bin/utility_downscale_morphology.py
diff --git a/modules/local/utility/extract_dapi/templates/extract_dapi.py b/bin/utility_extract_dapi.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/utility/extract_dapi/templates/extract_dapi.py
rename to bin/utility_extract_dapi.py
diff --git a/modules/local/utility/extract_preview_data/templates/extract_data.py b/bin/utility_extract_data.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/utility/extract_preview_data/templates/extract_data.py
rename to bin/utility_extract_data.py
diff --git a/modules/local/utility/get_coordinates/templates/get_coordinates.py b/bin/utility_get_coordinates.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/utility/get_coordinates/templates/get_coordinates.py
rename to bin/utility_get_coordinates.py
diff --git a/modules/local/utility/parquet_to_csv/templates/parquet_to_csv.py b/bin/utility_parquet_to_csv.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/utility/parquet_to_csv/templates/parquet_to_csv.py
rename to bin/utility_parquet_to_csv.py
diff --git a/modules/local/utility/resize_tif/templates/resize_tif.py b/bin/utility_resize_tif.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/utility/resize_tif/templates/resize_tif.py
rename to bin/utility_resize_tif.py
diff --git a/modules/local/utility/segger2xr/templates/segger2xr.py b/bin/utility_segger2xr.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/utility/segger2xr/templates/segger2xr.py
rename to bin/utility_segger2xr.py
diff --git a/modules/local/utility/split_transcripts/templates/split_transcripts.py b/bin/utility_split_transcripts.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/utility/split_transcripts/templates/split_transcripts.py
rename to bin/utility_split_transcripts.py
diff --git a/modules/local/utility/upscale_mask/templates/upscale_mask.py b/bin/utility_upscale_mask.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/utility/upscale_mask/templates/upscale_mask.py
rename to bin/utility_upscale_mask.py
diff --git a/modules/local/xenium_patch/stitch/templates/stitch_postprocess.py b/bin/xenium_patch_stitch_postprocess.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/xenium_patch/stitch/templates/stitch_postprocess.py
rename to bin/xenium_patch_stitch_postprocess.py
diff --git a/modules/local/xenium_patch/stitch/templates/stitch_transcripts.py b/bin/xenium_patch_stitch_transcripts.py
old mode 100644
new mode 100755
similarity index 100%
rename from modules/local/xenium_patch/stitch/templates/stitch_transcripts.py
rename to bin/xenium_patch_stitch_transcripts.py
diff --git a/modules/local/baysor/create_dataset/main.nf b/modules/local/baysor/create_dataset/main.nf
index b05350fa..98046161 100644
--- a/modules/local/baysor/create_dataset/main.nf
+++ b/modules/local/baysor/create_dataset/main.nf
@@ -24,7 +24,7 @@ process BAYSOR_CREATE_DATASET {
     prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    python3 ${moduleDir}/templates/create_dataset.py \\
+    baysor_create_dataset.py \\
         --transcripts ${transcripts} \\
         --sample-fraction ${sample_fraction} \\
         --prefix ${prefix}
diff --git a/modules/local/baysor/create_dataset/resources/usr/bin/create_dataset.py b/modules/local/baysor/create_dataset/resources/usr/bin/create_dataset.py
new file mode 100755
index 00000000..4e5a263a
--- /dev/null
+++ b/modules/local/baysor/create_dataset/resources/usr/bin/create_dataset.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+"""
+Create a sampled dataset for Baysor preview mode.
+
+Reads a CSV transcript file and randomly samples a fraction of rows,
+writing the result to a new CSV file.
+"""
+
+import argparse
+import csv
+import os
+import random
+from pathlib import Path
+
+
+class BaysorPreview():
+    """
+    Utility class to generate baysor preview dataset
+    """
+    @staticmethod
+    def generate_dataset(
+            transcripts: Path,
+            sampled_transcripts: Path,
+            sample_fraction: float = 0.3,
+            random_state: int = 42,
+            prefix: str = ""
+        ) -> None:
+        """
+        Reads a csv file & randomly samples a fraction of rows,
+        and writes the result to a .csv file.
+
+        Args:
+            transcripts: unziped transcripts.csv from xenium bundle
+            sampled_transcripts: randomly subsampled transcripts.csv file
+            sample_fraction: Fraction of rows to sample
+            random_state: Seed for reproducibility
+            prefix: Output directory prefix
+        """
+
+        random.seed(random_state)
+        output_path = f"{prefix}/{sampled_transcripts}"
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        with open(transcripts, mode='rt', newline='') as infile, \
+            open(output_path, mode='wt', newline='') as outfile:
+
+            reader = csv.reader(infile)
+            writer = csv.writer(outfile)
+
+            # get the header line
+            header = next(reader)
+            writer.writerow(header)
+
+            # randomize csv rows to write
+            for row in reader:
+                if random.random() < float(sample_fraction):
+                    writer.writerow(row)
+
+        return None
+
+
+def main() -> None:
+    """
+    Run create dataset as nf module
+    """
+    parser = argparse.ArgumentParser(
+        description="Create sampled dataset for Baysor preview"
+    )
+    parser.add_argument(
+        "--transcripts", required=True,
+        help="Path to transcripts CSV file"
+    )
+    parser.add_argument(
+        "--sample-fraction", required=True, type=float,
+        help="Fraction of rows to sample"
+    )
+    parser.add_argument(
+        "--prefix", required=True,
+        help="Output directory prefix"
+    )
+    args = parser.parse_args()
+
+    sampled_transcripts = "sampled_transcripts.csv"
+
+    # generate dataset
+    BaysorPreview.generate_dataset(
+        transcripts=args.transcripts,
+        sampled_transcripts=sampled_transcripts,
+        sample_fraction=args.sample_fraction,
+        prefix=args.prefix
+    )
+
+    return None
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/baysor/preprocess/main.nf b/modules/local/baysor/preprocess/main.nf
index 1a5a42fe..cfe6fe3b 100644
--- a/modules/local/baysor/preprocess/main.nf
+++ b/modules/local/baysor/preprocess/main.nf
@@ -30,7 +30,7 @@ process BAYSOR_PREPROCESS_TRANSCRIPTS {
     prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    python3 ${moduleDir}/templates/preprocess_transcripts.py \\
+    baysor_preprocess_transcripts.py \\
         --transcripts ${transcripts} \\
         --prefix ${prefix} \\
         --min-qv ${min_qv} \\
diff --git a/modules/local/baysor/preprocess/resources/usr/bin/preprocess_transcripts.py b/modules/local/baysor/preprocess/resources/usr/bin/preprocess_transcripts.py
new file mode 100755
index 00000000..2662f83c
--- /dev/null
+++ b/modules/local/baysor/preprocess/resources/usr/bin/preprocess_transcripts.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""
+Preprocess Xenium transcripts for Baysor segmentation.
+
+Filters transcripts based on quality score and spatial coordinate thresholds,
+removes negative control probes, and outputs filtered CSV for Baysor compatibility.
+"""
+
+import argparse
+import os
+
+import pandas as pd
+
+
+def filter_transcripts(
+    transcripts: str,
+    min_qv: float = 20.0,
+    min_x: float = 0.0,
+    max_x: float = 24000.0,
+    min_y: float = 0.0,
+    max_y: float = 24000.0,
+    prefix: str = "",
+) -> None:
+    """
+    Filter transcripts based on the specified thresholds.
+
+    Args:
+        transcripts: Path to transcripts parquet file
+        min_qv: Minimum Q-Score to pass filtering
+        min_x: Minimum x-coordinate threshold
+        max_x: Maximum x-coordinate threshold
+        min_y: Minimum y-coordinate threshold
+        max_y: Maximum y-coordinate threshold
+        prefix: Output directory prefix
+    """
+    df = pd.read_parquet(transcripts, engine="pyarrow")
+
+    # filter transcripts df with thresholds, ignore negative controls
+    filtered_df = df[
+        (df["qv"] >= min_qv)
+        & (df["x_location"] >= min_x)
+        & (df["x_location"] <= max_x)
+        & (df["y_location"] >= min_y)
+        & (df["y_location"] <= max_y)
+        & (~df["feature_name"].str.startswith("NegControlProbe_"))
+        & (~df["feature_name"].str.startswith("antisense_"))
+        & (~df["feature_name"].str.startswith("NegControlCodeword_"))
+        & (~df["feature_name"].str.startswith("BLANK_"))
+    ]
+
+    # change cell_id of cell-free transcripts to "0" (Baysor's no-cell sentinel).
+    # Modern Xenium stores cell_id as a string ("UNASSIGNED" for cell-free transcripts);
+    # legacy Xenium used integer -1. Normalize to string and handle both cases — pandas 3
+    # rejects mixing int values into a string-dtype column.
+    filtered_df["cell_id"] = filtered_df["cell_id"].astype(str)
+    neg_cell_row = filtered_df["cell_id"].isin(["-1", "UNASSIGNED"])
+    filtered_df.loc[neg_cell_row, "cell_id"] = "0"
+
+    # Output filtered transcripts as CSV for Baysor 0.7.1 compatibility.
+    # Baysor's Julia Parquet.jl cannot read modern pyarrow Parquet files
+    # (pyarrow 15+ writes size_statistics Thrift field 16 unconditionally,
+    # which Baysor's old Thrift deserializer doesn't recognize).
+    os.makedirs(prefix, exist_ok=True)
+    filtered_df.to_csv(f"{prefix}/filtered_transcripts.csv", index=False)
+
+    return None
+
+
+def main() -> None:
+    """
+    Run preprocess transcripts as nf module.
+    """
+    parser = argparse.ArgumentParser(
+        description="Preprocess Xenium transcripts for Baysor"
+    )
+    parser.add_argument(
+        "--transcripts", required=True, help="Path to transcripts parquet file"
+    )
+    parser.add_argument("--prefix", required=True, help="Output directory prefix")
+    parser.add_argument(
+        "--min-qv",
+        type=float,
+        default=20.0,
+        help="Minimum Q-Score threshold (default: 20.0)",
+    )
+    parser.add_argument(
+        "--min-x",
+        type=float,
+        default=0.0,
+        help="Minimum x-coordinate threshold (default: 0.0)",
+    )
+    parser.add_argument(
+        "--max-x",
+        type=float,
+        default=24000.0,
+        help="Maximum x-coordinate threshold (default: 24000.0)",
+    )
+    parser.add_argument(
+        "--min-y",
+        type=float,
+        default=0.0,
+        help="Minimum y-coordinate threshold (default: 0.0)",
+    )
+    parser.add_argument(
+        "--max-y",
+        type=float,
+        default=24000.0,
+        help="Maximum y-coordinate threshold (default: 24000.0)",
+    )
+    args = parser.parse_args()
+
+    filter_transcripts(
+        transcripts=args.transcripts,
+        min_qv=args.min_qv,
+        min_x=args.min_x,
+        max_x=args.max_x,
+        min_y=args.min_y,
+        max_y=args.max_y,
+        prefix=args.prefix,
+    )
+
+    return None
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/ficture/preprocess/main.nf b/modules/local/ficture/preprocess/main.nf
index 09b6fb4c..7ec3c081 100644
--- a/modules/local/ficture/preprocess/main.nf
+++ b/modules/local/ficture/preprocess/main.nf
@@ -24,7 +24,7 @@ process FICTURE_PREPROCESS {
     def features_arg = features ? "--features ${features}" : ""
 
     """
-    python3 ${moduleDir}/templates/ficture_preprocess.py \\
+    ficture_preprocess.py \\
         --transcripts ${transcripts} \\
         ${features_arg} \\
         --negative-control-regex '${params.negative_control_regex}' \\
diff --git a/modules/local/ficture/preprocess/resources/usr/bin/ficture_preprocess.py b/modules/local/ficture/preprocess/resources/usr/bin/ficture_preprocess.py
new file mode 100755
index 00000000..2e0c687c
--- /dev/null
+++ b/modules/local/ficture/preprocess/resources/usr/bin/ficture_preprocess.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""Preprocess Xenium transcripts for FICTURE analysis."""
+
+import argparse
+import gzip
+import logging
+import os
+import re
+import sys
+
+import pandas as pd
+
+
+def parse_args():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Preprocess Xenium transcripts for FICTURE"
+    )
+    parser.add_argument(
+        "--transcripts", required=True, help="Path to transcripts file (CSV)"
+    )
+    parser.add_argument(
+        "--features", default="", help="Path to features file (optional)"
+    )
+    parser.add_argument(
+        "--negative-control-regex", default="", help="Regex for negative control probes"
+    )
+    return parser.parse_args()
+
+
+def main():
+    """Run FICTURE preprocessing."""
+    args = parse_args()
+    print("[START]")
+
+    negctrl_regex = "BLANK|NegCon"
+    if args.negative_control_regex:
+        negctrl_regex = args.negative_control_regex
+
+    unit_info = ["X", "Y", "gene", "cell_id", "overlaps_nucleus"]
+    oheader = unit_info + ["Count"]
+
+    feature = pd.DataFrame()
+    xmin = sys.maxsize
+    xmax = 0
+    ymin = sys.maxsize
+    ymax = 0
+
+    output = "processed_transcripts.tsv.gz"
+    feature_file = "feature.clean.tsv.gz"
+    min_phred_score = 15
+
+    with gzip.open(output, "wt") as wf:
+        wf.write("\t".join(oheader) + "\n")
+
+    for chunk in pd.read_csv(args.transcripts, header=0, chunksize=500000):
+        chunk = chunk.loc[(chunk.qv > min_phred_score)]
+        chunk.rename(columns={"feature_name": "gene"}, inplace=True)
+        if negctrl_regex != "":
+            chunk = chunk[
+                ~chunk.gene.str.contains(negctrl_regex, flags=re.IGNORECASE, regex=True)
+            ]
+        chunk.rename(columns={"x_location": "X", "y_location": "Y"}, inplace=True)
+        chunk["Count"] = 1
+        chunk[oheader].to_csv(
+            output, sep="\t", mode="a", index=False, header=False, float_format="%.2f"
+        )
+        logging.info(f"{chunk.shape[0]}")
+        feature = pd.concat(
+            [feature, chunk.groupby(by="gene").agg({"Count": "sum"}).reset_index()]
+        )
+        x0 = chunk.X.min()
+        x1 = chunk.X.max()
+        y0 = chunk.Y.min()
+        y1 = chunk.Y.max()
+        xmin = min(int(xmin), int(x0))
+        xmax = max(int(xmax), int(x1))
+        ymin = min(int(ymin), int(y0))
+        ymax = max(int(ymax), int(y1))
+
+    if os.path.exists(args.features):
+        feature_list = []
+        with open(args.features, "r") as ff:
+            for line in ff:
+                feature_list.append(line.strip("\n"))
+        feature = feature.groupby(by="gene").agg({"Count": "sum"}).reset_index()
+        feature = feature[[x in feature_list for x in feature["gene"]]]
+        feature.to_csv(feature_file, sep="\t", index=False)
+
+    f = os.path.join(os.path.dirname(output), "coordinate_minmax.tsv")
+    with open(f, "w") as wf:
+        wf.write(f"xmin\t{xmin}\n")
+        wf.write(f"xmax\t{xmax}\n")
+        wf.write(f"ymin\t{ymin}\n")
+        wf.write(f"ymax\t{ymax}\n")
+
+    print("[FINISH]")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/segger/create_dataset/main.nf b/modules/local/segger/create_dataset/main.nf
index bd81f7b8..ce008ec3 100644
--- a/modules/local/segger/create_dataset/main.nf
+++ b/modules/local/segger/create_dataset/main.nf
@@ -33,7 +33,7 @@ process SEGGER_CREATE_DATASET {
     export NUMBA_CACHE_DIR=\$PWD/.numba_cache
     mkdir -p \$NUMBA_CACHE_DIR
 
-    python3 ${moduleDir}/templates/run_create_dataset.py \\
+    segger_create_dataset.py \\
         --bundle-dir ${base_dir} \\
         --output-dir ${prefix} \\
         --sample-type ${params.format} \\
diff --git a/modules/local/segger/create_dataset/resources/usr/bin/run_create_dataset.py b/modules/local/segger/create_dataset/resources/usr/bin/run_create_dataset.py
new file mode 100755
index 00000000..c73ab006
--- /dev/null
+++ b/modules/local/segger/create_dataset/resources/usr/bin/run_create_dataset.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python3
+"""
+Run segger create_dataset with spatialxe-specific preprocessing and workarounds.
+
+Wraps segger's create_dataset_fast.py with:
+  - bundle_local symlink prep (handles read-only S3/Fusion mounts)
+  - parquet column statistics (segger needs these)
+  - WORKAROUND: filter trainable tiles from test_tiles when segger commit 0787167 mis-splits
+  - WORKAROUND: replace NaN bd.x with zeros after get_polygon_props produces NaN
+
+Each WORKAROUND should be removable when the upstream segger bug is fixed.
+"""
+
+import argparse
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+# imports for actual work (used in functions below)
+import pyarrow.parquet as pq
+import pyarrow.compute as pc
+import torch
+
+
+SEGGER_CLI = "/workspace/segger_dev/src/segger/cli/create_dataset_fast.py"
+
+
+def parse_args():
+    p = argparse.ArgumentParser()
+    p.add_argument("--bundle-dir", required=True)
+    p.add_argument("--output-dir", required=True)
+    p.add_argument("--sample-type", required=True, choices=["xenium"])
+    p.add_argument("--tile-width", type=int, required=True)
+    p.add_argument("--tile-height", type=int, required=True)
+    p.add_argument("--n-workers", type=int, required=True)
+    # remaining args forwarded to segger CLI
+    args, extra = p.parse_known_args()
+    return args, extra
+
+
+def prepare_bundle(bundle_dir):
+    """Create local bundle dir with absolute symlinks (S3/Fusion read-only-safe)."""
+    Path("bundle_local").mkdir(exist_ok=True)
+    for item in Path(bundle_dir).iterdir():
+        try:
+            abs_path = item.resolve()
+        except Exception:
+            abs_path = item
+        target = Path("bundle_local") / item.name
+        if target.exists() or target.is_symlink():
+            target.unlink()
+        target.symlink_to(abs_path)
+
+    # Segger expects nucleus_boundaries.parquet but Xenium bundles have cell_boundaries.parquet
+    nb = Path("bundle_local/nucleus_boundaries.parquet")
+    cb = Path("bundle_local/cell_boundaries.parquet")
+    if not nb.exists() and cb.exists():
+        print(
+            "Creating nucleus_boundaries.parquet symlink from cell_boundaries.parquet"
+        )
+        nb.symlink_to(cb.resolve())
+
+    print("Bundle contents:")
+    for item in sorted(Path("bundle_local").iterdir()):
+        print(f"  {item.name}")
+
+
+def add_parquet_stats():
+    """Rewrite key parquet files with column statistics (segger requires them)."""
+    Path("bundle_stats").mkdir(exist_ok=True)
+    for fname in ["transcripts.parquet", "nucleus_boundaries.parquet"]:
+        src = Path("bundle_local") / fname
+        dst = Path("bundle_stats") / fname
+        if not src.exists():
+            print(f"  Skip {src}")
+            continue
+        t = pq.read_table(str(src))
+        pq.write_table(t, str(dst), write_statistics=True, compression="snappy")
+        print(f"  Done {fname} ({len(t)} rows)")
+
+    # Symlink everything else from bundle_local into bundle_stats
+    for item in Path("bundle_local").iterdir():
+        dst = Path("bundle_stats") / item.name
+        if not dst.exists():
+            dst.symlink_to(item.resolve())
+
+    # Debug: check overlaps_nucleus column in transcripts
+    print("\n=== Debugging overlaps_nucleus data ===")
+    tx = pq.read_table("bundle_stats/transcripts.parquet")
+    bd = pq.read_table("bundle_stats/nucleus_boundaries.parquet")
+    if "overlaps_nucleus" in tx.column_names:
+        col = tx.column("overlaps_nucleus")
+        print(f"overlaps_nucleus dtype: {col.type}")
+        unique_vals = pc.unique(col)
+        print(f"overlaps_nucleus unique values: {unique_vals.to_pylist()[:10]}")
+        val_counts = pc.value_counts(col)
+        print(f"overlaps_nucleus value_counts: {val_counts.to_pylist()}")
+    else:
+        print("WARNING: overlaps_nucleus column NOT FOUND in transcripts.parquet")
+
+    if "cell_id" in tx.column_names and "cell_id" in bd.column_names:
+        tx_cells = set(pc.unique(tx.column("cell_id")).to_pylist())
+        bd_cells = set(pc.unique(bd.column("cell_id")).to_pylist())
+        overlap = tx_cells & bd_cells
+        print(f"Transcripts unique cell_ids: {len(tx_cells)}")
+        print(f"Boundaries unique cell_ids: {len(bd_cells)}")
+        print(f"Overlapping cell_ids: {len(overlap)}")
+    print("=== End Debug ===\n")
+
+
+def run_segger_cli(args, extra):
+    cmd = [
+        "python3",
+        SEGGER_CLI,
+        "--base_dir",
+        "bundle_stats",
+        "--data_dir",
+        args.output_dir,
+        "--sample_type",
+        args.sample_type,
+        "--tile_width",
+        str(args.tile_width),
+        "--tile_height",
+        str(args.tile_height),
+        "--n_workers",
+        str(args.n_workers),
+        *extra,
+    ]
+    print(f"Running: {' '.join(cmd)}")
+    result = subprocess.run(cmd)
+    if result.returncode != 0:
+        sys.exit(result.returncode)
+
+
+def filter_trainable_tiles_if_needed(prefix):
+    """
+    WORKAROUND: segger commit 0787167 has a bug where all tiles end up in test_tiles
+    regardless of test_prob/val_prob settings. Move ONLY trainable tiles (those with
+    edge_label_index) from test_tiles to train_tiles.
+
+    Remove this function once segger >= 0.1.x is bumped with the upstream fix.
+    """
+    train_dir = Path(prefix) / "train_tiles" / "processed"
+    test_dir = Path(prefix) / "test_tiles" / "processed"
+    val_dir = Path(prefix) / "val_tiles" / "processed"
+
+    train_count = len(list(train_dir.iterdir())) if train_dir.exists() else 0
+    test_count = len(list(test_dir.iterdir())) if test_dir.exists() else 0
+    val_count = len(list(val_dir.iterdir())) if val_dir.exists() else 0
+    print(
+        f"Dataset split (before fix): train={train_count} val={val_count} test={test_count}"
+    )
+
+    if train_count == 0 and test_count > 0:
+        print(
+            "Applying workaround: filtering trainable tiles from test_tiles (segger split bug)"
+        )
+        moved = 0
+        skipped = 0
+        for tile_path in list(test_dir.iterdir()):
+            if not tile_path.name.endswith(".pt"):
+                continue
+            try:
+                tile = torch.load(str(tile_path), weights_only=False)
+                edge_store = tile["tx", "belongs", "bd"]
+                if (
+                    hasattr(edge_store, "edge_label_index")
+                    and edge_store.edge_label_index.numel() > 0
+                ):
+                    shutil.move(str(tile_path), str(train_dir / tile_path.name))
+                    moved += 1
+                else:
+                    skipped += 1
+            except Exception as e:
+                print(f"Warning: Could not process {tile_path.name}: {e}")
+                skipped += 1
+        print(f"Moved {moved} trainable tiles to train_tiles")
+        print(f"Skipped {skipped} test-only tiles (no edge_label_index)")
+
+    train_count = len(list(train_dir.iterdir())) if train_dir.exists() else 0
+    test_count = len(list(test_dir.iterdir())) if test_dir.exists() else 0
+    val_count = len(list(val_dir.iterdir())) if val_dir.exists() else 0
+    print(
+        f"Dataset split (after fix): train={train_count} val={val_count} test={test_count}"
+    )
+
+    if train_count == 0:
+        print(f"ERROR: No trainable tiles were created in {train_dir}", file=sys.stderr)
+        print(
+            "This usually means no transcripts overlap with nucleus boundaries in the dataset.",
+            file=sys.stderr,
+        )
+        print(
+            "Check if the Xenium bundle contains valid overlaps_nucleus data in transcripts.parquet.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    print(f"Successfully created {train_count} trainable tiles")
+
+
+def fix_bd_x_nan(prefix):
+    """
+    WORKAROUND: segger's get_polygon_props() produces NaN boundary features (bd.x)
+    when polygon geometries have zero area or index misalignment during GeoDataFrame
+    construction. Replace NaN bd.x with zeros so BCEWithLogitsLoss doesn't propagate NaN.
+
+    Remove this function once segger >= 0.1.x is bumped with the upstream fix.
+    """
+    fixed = 0
+    total = 0
+    for split in ["train_tiles", "test_tiles", "val_tiles"]:
+        tile_dir = Path(prefix) / split / "processed"
+        if not tile_dir.is_dir():
+            continue
+        for tile_path in tile_dir.iterdir():
+            if not tile_path.name.endswith(".pt"):
+                continue
+            total += 1
+            tile = torch.load(str(tile_path), weights_only=False)
+            bd_x = tile["bd"].x
+            if bd_x.isnan().any():
+                tile["bd"].x = torch.nan_to_num(bd_x, nan=0.0)
+                torch.save(tile, str(tile_path))
+                fixed += 1
+    print(f"Fixed NaN bd.x in {fixed}/{total} tiles")
+
+
+def main():
+    args, extra = parse_args()
+
+    # Ensure numba cache dir is writable (env var should be set by caller, but belt-and-suspenders)
+    os.environ.setdefault("NUMBA_CACHE_DIR", os.path.join(os.getcwd(), ".numba_cache"))
+    os.makedirs(os.environ["NUMBA_CACHE_DIR"], exist_ok=True)
+
+    prepare_bundle(args.bundle_dir)
+    print("Adding statistics to parquet files...")
+    add_parquet_stats()
+
+    # Sanity-check bundle_stats
+    print("bundle_stats contents:")
+    for item in sorted(Path("bundle_stats").iterdir()):
+        print(f"  {item.name}")
+
+    run_segger_cli(args, extra)
+
+    filter_trainable_tiles_if_needed(args.output_dir)
+    fix_bd_x_nan(args.output_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/segger/predict/main.nf b/modules/local/segger/predict/main.nf
index 48195d53..d4180394 100644
--- a/modules/local/segger/predict/main.nf
+++ b/modules/local/segger/predict/main.nf
@@ -27,7 +27,7 @@ process SEGGER_PREDICT {
     def args = task.ext.args ?: ''
     prefix = task.ext.prefix ?: "${meta.id}"
     """
-    python3 ${moduleDir}/templates/run_predict.py \\
+    segger_predict.py \\
         --models-dir ${models_dir} \\
         --segger-data-dir ${segger_dataset} \\
         --transcripts-file ${transcripts} \\
diff --git a/modules/local/segger/predict/resources/usr/bin/run_predict.py b/modules/local/segger/predict/resources/usr/bin/run_predict.py
new file mode 100755
index 00000000..56a77ffc
--- /dev/null
+++ b/modules/local/segger/predict/resources/usr/bin/run_predict.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+"""
+Run segger predict with spatialxe-specific preprocessing.
+
+Wraps segger's predict_fast.py with:
+  - GPU enumeration (replaces inline python3 -c torch check)
+  - WORKAROUND: patch predict_parquet.py at runtime to add torch.no_grad() for ~30-50% VRAM savings
+  - WORKAROUND: seed random.choice for deterministic GPU assignment (avoids stochastic OOM)
+
+Both WORKAROUNDs should be removable once the patches are upstreamed to segger.
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+
+
+SEGGER_CLI = "/workspace/segger_dev/src/segger/cli/predict_fast.py"
+
+
+def parse_args():
+    p = argparse.ArgumentParser()
+    p.add_argument("--models-dir", required=True)
+    p.add_argument("--segger-data-dir", required=True)
+    p.add_argument("--transcripts-file", required=True)
+    p.add_argument("--benchmarks-dir", required=True)
+    p.add_argument("--batch-size", type=int, required=True)
+    p.add_argument("--use-cc", required=True)
+    p.add_argument("--knn-method", required=True)
+    p.add_argument("--num-workers", type=int, required=True)
+    args, extra = p.parse_known_args()
+    return args, extra
+
+
+def detect_gpus():
+    """Return comma-separated list of available CUDA device ids (or "0" if none)."""
+    import torch
+
+    print("=== GPU Detection (SEGGER_PREDICT) ===")
+    print(f"PyTorch CUDA available: {torch.cuda.is_available()}")
+    n = torch.cuda.device_count()
+    print(f"CUDA device count: {n}")
+    print("======================================")
+    if n > 0:
+        return ",".join(str(i) for i in range(n))
+    return "0"
+
+
+def patch_predict_parquet():
+    """
+    WORKAROUND: patch segger.prediction.predict_parquet at runtime.
+
+    Avoids rebuilding the segger Docker image. Two patches:
+      1. Add torch.no_grad() to disable gradient graphs during inference (~30-50% VRAM savings).
+      2. Seed random for deterministic GPU assignment (avoids stochastic OOM).
+
+    Remove this function once the patches are upstreamed to segger.
+    """
+    import segger.prediction.predict_parquet as m
+
+    pred_py = m.__file__
+    print(f"Patching {pred_py}: torch.no_grad() + round-robin GPU assignment")
+    # Use sed via subprocess for in-place edit (matches the original behavior exactly)
+    subprocess.run(
+        [
+            "sed",
+            "-i",
+            "s/with cp.cuda.Device(gpu_id):/with cp.cuda.Device(gpu_id), torch.no_grad():/",
+            pred_py,
+        ],
+        check=True,
+    )
+    subprocess.run(
+        [
+            "sed",
+            "-i",
+            "s/gpu_id = random.choice(gpu_ids)/random.seed(0); gpu_id = random.choice(gpu_ids)/",
+            pred_py,
+        ],
+        check=True,
+    )
+
+
+def run_segger_cli(args, extra, gpu_ids):
+    cmd = [
+        "python3",
+        SEGGER_CLI,
+        "--models_dir",
+        args.models_dir,
+        "--segger_data_dir",
+        args.segger_data_dir,
+        "--transcripts_file",
+        args.transcripts_file,
+        "--benchmarks_dir",
+        args.benchmarks_dir,
+        "--batch_size",
+        str(args.batch_size),
+        "--use_cc",
+        str(args.use_cc),
+        "--knn_method",
+        args.knn_method,
+        "--num_workers",
+        str(args.num_workers),
+        "--gpu_ids",
+        gpu_ids,
+        *extra,
+    ]
+    print(f"Running: {' '.join(cmd)}")
+    result = subprocess.run(cmd)
+    if result.returncode != 0:
+        sys.exit(result.returncode)
+
+
+def main():
+    args, extra = parse_args()
+
+    # Limit cupy GPU memory to 80% so PyTorch has headroom for graph attention ops
+    os.environ.setdefault("CUPY_GPU_MEMORY_LIMIT", "80%")
+    # Belt-and-suspenders: ensure PyTorch uses expandable segments
+    os.environ.setdefault(
+        "PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:512"
+    )
+    # Numba cache directory
+    os.environ.setdefault("NUMBA_CACHE_DIR", os.path.join(os.getcwd(), ".numba_cache"))
+    os.makedirs(os.environ["NUMBA_CACHE_DIR"], exist_ok=True)
+
+    gpu_ids = detect_gpus()
+    print(f"Using GPUs: {gpu_ids}")
+
+    patch_predict_parquet()
+
+    run_segger_cli(args, extra, gpu_ids)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/spatialdata/merge/main.nf b/modules/local/spatialdata/merge/main.nf
index 9e063844..9a57f877 100644
--- a/modules/local/spatialdata/merge/main.nf
+++ b/modules/local/spatialdata/merge/main.nf
@@ -26,7 +26,7 @@ process SPATIALDATA_MERGE {
     prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    python3 ${moduleDir}/templates/spatialdata_merge.py \\
+    spatialdata_merge.py \\
         --raw-bundle ${raw_bundle} \\
         --redefined-bundle ${redefined_bundle} \\
         --prefix ${prefix} \\
diff --git a/modules/local/spatialdata/merge/resources/usr/bin/spatialdata_merge.py b/modules/local/spatialdata/merge/resources/usr/bin/spatialdata_merge.py
new file mode 100755
index 00000000..409d8c00
--- /dev/null
+++ b/modules/local/spatialdata/merge/resources/usr/bin/spatialdata_merge.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+"""Merge two spatialdata bundles to create a layered spatialdata object."""
+
+import argparse
+import json
+import os
+import shutil
+
+import spatialdata
+
+
+def parse_args():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(description="Merge two spatialdata bundles")
+    parser.add_argument("--raw-bundle", required=True, help="Path to raw spatialdata bundle")
+    parser.add_argument("--redefined-bundle", required=True, help="Path to redefined spatialdata bundle")
+    parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)")
+    parser.add_argument("--output-folder", required=True, help="Output folder name")
+    return parser.parse_args()
+
+
+def main():
+    """Run spatialdata merge."""
+    args = parse_args()
+    print("[START]")
+
+    output_dir = f"spatialdata/{args.prefix}/{args.output_folder}"
+
+    # Ensure the output folder exists
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+    os.makedirs(output_dir)
+
+    # Copy the entire reference bundle as is
+    for root, _, files in os.walk(args.raw_bundle):
+        rel_path = os.path.relpath(root, args.raw_bundle)
+        target_path = os.path.join(output_dir, rel_path)
+        os.makedirs(target_path, exist_ok=True)
+        for file in files:
+            shutil.copy(os.path.join(root, file), os.path.join(target_path, file))
+
+    # Rename folders in Points, Shapes, and Tables to raw_*
+    for category in ["points", "shapes", "tables"]:
+        category_path = os.path.join(output_dir, category)
+        if os.path.exists(category_path):
+            for folder in next(os.walk(category_path))[1]:
+                old_path = os.path.join(category_path, folder)
+                print(folder)
+                new_path = os.path.join(category_path, f"raw_{folder}")
+                os.rename(old_path, new_path)
+
+    # Copy folders from redefined_bundle and rename them as redefined_*
+    for category in ["points", "shapes", "tables"]:
+        add_category_path = os.path.join(args.redefined_bundle, category)
+        output_category_path = os.path.join(output_dir, category)
+        os.makedirs(output_category_path, exist_ok=True)
+
+        if os.path.exists(add_category_path):
+            for folder in next(os.walk(add_category_path))[1]:
+                src_folder = os.path.join(add_category_path, folder)
+                dest_folder = os.path.join(output_category_path, f"redefined_{folder}")
+                shutil.copytree(src_folder, dest_folder)
+
+    # Invalidate consolidated metadata in zarr.json -- the directory renames above
+    # made the element paths in the metadata stale (e.g., 'points/transcripts' ->
+    # 'points/raw_transcripts'). Without consolidated metadata, sd.read_zarr()
+    # discovers elements by scanning the filesystem directly.
+    zarr_json = os.path.join(output_dir, "zarr.json")
+    if os.path.exists(zarr_json):
+        with open(zarr_json) as f:
+            meta = json.load(f)
+        if "consolidated_metadata" in meta:
+            del meta["consolidated_metadata"]
+            with open(zarr_json, "w") as f:
+                json.dump(meta, f)
+            print("[NOTE] Removed stale consolidated metadata from zarr.json")
+
+    print("[FINISH]")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/spatialdata/meta/main.nf b/modules/local/spatialdata/meta/main.nf
index 54d9ae25..40e2efae 100644
--- a/modules/local/spatialdata/meta/main.nf
+++ b/modules/local/spatialdata/meta/main.nf
@@ -26,7 +26,7 @@ process SPATIALDATA_META {
     prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    python3 ${moduleDir}/templates/spatialdata_meta.py \\
+    spatialdata_meta.py \\
         --spatialdata-bundle ${spatialdata_bundle} \\
         --xenium-bundle ${xenium_bundle} \\
         --prefix ${prefix} \\
diff --git a/modules/local/spatialdata/meta/resources/usr/bin/spatialdata_meta.py b/modules/local/spatialdata/meta/resources/usr/bin/spatialdata_meta.py
new file mode 100755
index 00000000..935f39b2
--- /dev/null
+++ b/modules/local/spatialdata/meta/resources/usr/bin/spatialdata_meta.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""Add metadata to SpatialData bundle."""
+
+import argparse
+import json
+import sys
+
+import pandas as pd
+import spatialdata as sd
+import zarr
+
+# Fix zarr v3 + anndata + numcodecs incompatibility:
+# anndata's string writer passes numcodecs.VLenUTF8 to zarr.Group.create_array,
+# but zarr v3 only accepts ArrayArrayCodec types. OME-Zarr 0.5 requires zarr v3
+# for images, so we can't downgrade the store format. Instead, we intercept
+# create_array to strip numcodecs codecs and let zarr v3 handle strings natively.
+import numcodecs
+import zarr.core.group as _zarr_group
+
+_orig_create_array = _zarr_group.Group.create_array
+
+
+def _v3_compat_create_array(self, *args, **kwargs):
+    """Strip numcodecs VLenUTF8 from codec params for zarr v3 compatibility."""
+    for param in ("filters", "compressor", "object_codec"):
+        val = kwargs.get(param)
+        if val is None:
+            continue
+        if isinstance(val, numcodecs.vlen.VLenUTF8):
+            del kwargs[param]
+        elif isinstance(val, (list, tuple)):
+            cleaned = [v for v in val if not isinstance(v, numcodecs.vlen.VLenUTF8)]
+            if len(cleaned) != len(val):
+                if cleaned:
+                    kwargs[param] = cleaned
+                else:
+                    del kwargs[param]
+    return _orig_create_array(self, *args, **kwargs)
+
+
+_zarr_group.Group.create_array = _v3_compat_create_array
+
+
+def _is_arrow_backed(dtype):
+    """Check if a pandas dtype is backed by PyArrow."""
+    return isinstance(dtype, pd.ArrowDtype) or (
+        hasattr(dtype, "storage") and getattr(dtype, "storage", None) == "pyarrow"
+    ) or "pyarrow" in str(dtype)
+
+
+def _convert_df_arrow_to_numpy(df):
+    """Convert Arrow-backed dtypes in a DataFrame to numpy object dtype."""
+    for col in df.columns:
+        dtype = df[col].dtype
+        if _is_arrow_backed(dtype):
+            df[col] = df[col].astype("object")
+        elif isinstance(dtype, pd.CategoricalDtype):
+            cats = dtype.categories
+            if cats is not None and _is_arrow_backed(cats.dtype):
+                df[col] = df[col].cat.rename_categories(cats.astype("object"))
+    if _is_arrow_backed(df.index.dtype):
+        df.index = pd.Index(df.index.astype("object"))
+
+
+def convert_arrow_to_numpy(sdata):
+    """Convert Arrow-backed dtypes to numpy for anndata zarr write compatibility."""
+    for table_key in list(sdata.tables.keys()):
+        adata = sdata.tables[table_key]
+        _convert_df_arrow_to_numpy(adata.obs)
+        _convert_df_arrow_to_numpy(adata.var)
+
+
+def parse_args():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(description="Add metadata to SpatialData bundle")
+    parser.add_argument("--spatialdata-bundle", required=True, help="Path to spatialdata bundle")
+    parser.add_argument("--xenium-bundle", required=True, help="Path to xenium bundle")
+    parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)")
+    parser.add_argument("--metadata", required=True, help="Metadata string from Nextflow meta map")
+    parser.add_argument("--output-folder", required=True, help="Output folder name")
+    return parser.parse_args()
+
+
+def main():
+    """Run spatialdata metadata addition."""
+    args = parse_args()
+    print("[START]")
+
+    sdata = sd.read_zarr(args.spatialdata_bundle)
+
+    # Convert metadata into dict
+    print("[NOTE] Read in provenance ...")
+    metadata = args.metadata.strip("[]")  # Remove square brackets
+    pairs = metadata.split(", ")  # Split by comma and space
+    metadata = {k: v for k, v in (pair.split(":") for pair in pairs)}  # Create dictionary
+
+    for key in metadata:
+        if key not in sdata['raw_table'].uns['spatialdata_attrs']:
+            sdata['raw_table'].uns['spatialdata_attrs'][key] = metadata[key]
+        else:
+            print(f'[ERROR] {key} already exist in sdata[raw_table].uns[spatialdata_attrs].', file=sys.stderr)
+
+    # Add experimental metadata
+    print("[NOTE] Read in experiment metadata ...")
+    sdata['raw_table'].uns['experiment_xenium'] = ''
+    metadata_experiment = f'{args.xenium_bundle}/experiment.xenium'
+    with open(metadata_experiment, "r") as f:
+        metadata_experiment = json.load(f)
+        sdata['raw_table'].uns['experiment_xenium'] = json.dumps(metadata_experiment)
+
+    # Add gene panel metadata
+    print("[NOTE] Read in gene panel metadata ...")
+    sdata['raw_table'].uns['gene_panel'] = ''
+    metadata_gene_panel = f'{args.xenium_bundle}/gene_panel.json'
+    with open(metadata_gene_panel, "r") as f:
+        metadata_gene_panel = json.load(f)
+        sdata['raw_table'].uns['gene_panel'] = json.dumps(metadata_gene_panel)
+
+    convert_arrow_to_numpy(sdata)
+    sdata.write(f"spatialdata/{args.prefix}/{args.output_folder}", overwrite=True, consolidate_metadata=True, sdata_formats=None)
+
+    print("[FINISH]")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/spatialdata/write/main.nf b/modules/local/spatialdata/write/main.nf
index 050dcdd7..43acc073 100644
--- a/modules/local/spatialdata/write/main.nf
+++ b/modules/local/spatialdata/write/main.nf
@@ -28,7 +28,7 @@ process SPATIALDATA_WRITE {
     prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    python3 ${moduleDir}/templates/spatialdata_write.py \\
+    spatialdata_write.py \\
         --bundle ${bundle} \\
         --prefix ${prefix} \\
         --output-folder ${outputfolder} \\
diff --git a/modules/local/spatialdata/write/resources/usr/bin/spatialdata_write.py b/modules/local/spatialdata/write/resources/usr/bin/spatialdata_write.py
new file mode 100755
index 00000000..421e830f
--- /dev/null
+++ b/modules/local/spatialdata/write/resources/usr/bin/spatialdata_write.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+"""Write spatialdata object from segmentation format."""
+
+import argparse
+import sys
+
+import pandas as pd
+import spatialdata
+from spatialdata_io import xenium
+
+# Fix zarr v3 + anndata + numcodecs incompatibility:
+# anndata's string writer passes numcodecs.VLenUTF8 to zarr.Group.create_array,
+# but zarr v3 only accepts ArrayArrayCodec types. OME-Zarr 0.5 requires zarr v3
+# for images, so we can't downgrade the store format. Instead, we intercept
+# create_array to strip numcodecs codecs and let zarr v3 handle strings natively.
+import numcodecs
+import zarr.core.group as _zarr_group
+
+_orig_create_array = _zarr_group.Group.create_array
+
+
+def _v3_compat_create_array(self, *args, **kwargs):
+    """Strip numcodecs VLenUTF8 from codec params for zarr v3 compatibility."""
+    for param in ("filters", "compressor", "object_codec"):
+        val = kwargs.get(param)
+        if val is None:
+            continue
+        if isinstance(val, numcodecs.vlen.VLenUTF8):
+            del kwargs[param]
+        elif isinstance(val, (list, tuple)):
+            cleaned = [v for v in val if not isinstance(v, numcodecs.vlen.VLenUTF8)]
+            if len(cleaned) != len(val):
+                if cleaned:
+                    kwargs[param] = cleaned
+                else:
+                    del kwargs[param]
+    return _orig_create_array(self, *args, **kwargs)
+
+
+_zarr_group.Group.create_array = _v3_compat_create_array
+
+
+def _is_arrow_backed(dtype):
+    """Check if a pandas dtype is backed by PyArrow."""
+    return (
+        isinstance(dtype, pd.ArrowDtype)
+        or (hasattr(dtype, "storage") and getattr(dtype, "storage", None) == "pyarrow")
+        or "pyarrow" in str(dtype)
+    )
+
+
+def _convert_df_arrow_to_numpy(df):
+    """Convert Arrow-backed dtypes in a DataFrame to numpy object dtype.
+
+    Handles three cases:
+    1. Regular columns with Arrow-backed dtypes
+    2. Categorical columns whose categories are Arrow-backed
+    3. Index with Arrow-backed dtype
+    """
+    for col in df.columns:
+        dtype = df[col].dtype
+        if _is_arrow_backed(dtype):
+            df[col] = df[col].astype("object")
+        elif isinstance(dtype, pd.CategoricalDtype):
+            cats = dtype.categories
+            if cats is not None and _is_arrow_backed(cats.dtype):
+                df[col] = df[col].cat.rename_categories(cats.astype("object"))
+    if _is_arrow_backed(df.index.dtype):
+        df.index = pd.Index(df.index.astype("object"))
+
+
+def convert_arrow_to_numpy(sdata):
+    """Convert Arrow-backed dtypes to numpy for anndata zarr write compatibility."""
+    for table_key in list(sdata.tables.keys()):
+        adata = sdata.tables[table_key]
+        _convert_df_arrow_to_numpy(adata.obs)
+        _convert_df_arrow_to_numpy(adata.var)
+
+
+def parse_args():
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(description="Write spatialdata object from segmentation format")
+    parser.add_argument("--bundle", required=True, help="Path to input bundle")
+    parser.add_argument("--prefix", required=True, help="Output prefix (sample ID)")
+    parser.add_argument("--output-folder", required=True, help="Output folder name")
+    parser.add_argument("--segmented-object", required=True, help="Segmented object type (cells, nuclei, cells_and_nuclei)")
+    parser.add_argument("--coordinate-space", required=True, help="Coordinate space (pixels, microns)")
+    parser.add_argument("--format", required=True, help="Input format (xenium)")
+    return parser.parse_args()
+
+
+def main():
+    """Run spatialdata write."""
+    args = parse_args()
+    print("[START]")
+
+    cells_as_circles = False
+    cells_boundaries = False
+    nucleus_boundaries = False
+    cells_labels = False
+    nucleus_labels = False
+
+    if args.segmented_object == "cells":
+        cells_boundaries = True
+        cells_labels = True
+    elif args.segmented_object == "nuclei":
+        nucleus_boundaries = True
+        nucleus_labels = True
+    elif args.segmented_object == "cells_and_nuclei":
+        cells_boundaries = True
+        nucleus_boundaries = True
+        cells_labels = True
+        nucleus_labels = True
+    else:
+        cells_as_circles = False
+
+    # set sd variables based on the coordinate space
+    if args.coordinate_space == "pixels":
+        cells_labels = True
+        nucleus_labels = True
+        # Labels are sufficient in pixel space; boundaries can contain
+        # degenerate polygons (< 4 vertices) from XeniumRanger that
+        # crash spatialdata_io's shapely LinearRing parser.
+        cells_boundaries = False
+        nucleus_boundaries = False
+
+    if args.coordinate_space == "microns":
+        cells_labels = False
+        cells_boundaries = True
+        nucleus_boundaries = False
+        nucleus_labels = False
+        cells_as_circles = False
+
+    if args.format == "xenium":
+        sd_xenium_obj = xenium(
+            args.bundle,
+            cells_as_circles=cells_as_circles,
+            cells_boundaries=cells_boundaries,
+            nucleus_boundaries=nucleus_boundaries,
+            cells_labels=cells_labels,
+            nucleus_labels=nucleus_labels,
+            transcripts=True,
+            morphology_mip=True,
+            morphology_focus=True,
+        )
+        print(sd_xenium_obj)
+        convert_arrow_to_numpy(sd_xenium_obj)
+        sd_xenium_obj.write(f"spatialdata/{args.prefix}/{args.output_folder}")
+    else:
+        sys.exit("[ERROR] Format not found")
+
+    print("[FINISH]")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/utility/convert_mask_uint32/main.nf b/modules/local/utility/convert_mask_uint32/main.nf
index 78190dfe..40e5c35c 100644
--- a/modules/local/utility/convert_mask_uint32/main.nf
+++ b/modules/local/utility/convert_mask_uint32/main.nf
@@ -35,7 +35,7 @@ process CONVERT_MASK_UINT32 {
     script:
     prefix = task.ext.prefix ?: "${meta.id}"
     """
-    python3 ${moduleDir}/templates/convert_mask_uint32.py \\
+    utility_convert_mask_uint32.py \\
         --input ${mask} \\
         --output ${prefix}_uint32_mask.tif
     """
diff --git a/modules/local/utility/downscale_morphology/main.nf b/modules/local/utility/downscale_morphology/main.nf
index 1423106f..ab4f478a 100644
--- a/modules/local/utility/downscale_morphology/main.nf
+++ b/modules/local/utility/downscale_morphology/main.nf
@@ -41,7 +41,7 @@ process DOWNSCALE_MORPHOLOGY {
     def diam_mean = 30
     prefix = task.ext.prefix ?: "${meta.id}"
     """
-    python3 ${moduleDir}/templates/downscale_morphology.py \\
+    utility_downscale_morphology.py \\
         --image ${image} \\
         --diameter ${diameter} \\
         --diam-mean ${diam_mean} \\
diff --git a/modules/local/utility/extract_dapi/main.nf b/modules/local/utility/extract_dapi/main.nf
index 3ff3ae78..ef9a88bd 100644
--- a/modules/local/utility/extract_dapi/main.nf
+++ b/modules/local/utility/extract_dapi/main.nf
@@ -36,7 +36,7 @@ process EXTRACT_DAPI {
     prefix = task.ext.prefix ?: "${meta.id}"
     def channel_index = task.ext.channel_index ?: 0
     """
-    python3 ${moduleDir}/templates/extract_dapi.py \\
+    utility_extract_dapi.py \\
         --input ${image} \\
         --output ${prefix}_dapi.tif \\
         --channel-index ${channel_index}
diff --git a/modules/local/utility/extract_preview_data/main.nf b/modules/local/utility/extract_preview_data/main.nf
index c0ae5117..1240ddbf 100644
--- a/modules/local/utility/extract_preview_data/main.nf
+++ b/modules/local/utility/extract_preview_data/main.nf
@@ -26,7 +26,7 @@ process EXTRACT_PREVIEW_DATA {
     prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    python3 ${moduleDir}/templates/extract_data.py \\
+    utility_extract_data.py \\
         --preview-html ${preview_html} \\
         --prefix ${prefix}
     """
diff --git a/modules/local/utility/extract_preview_data/resources/usr/bin/extract_data.py b/modules/local/utility/extract_preview_data/resources/usr/bin/extract_data.py
new file mode 100755
index 00000000..0ea737c2
--- /dev/null
+++ b/modules/local/utility/extract_preview_data/resources/usr/bin/extract_data.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+"""
+Extract preview data from Baysor preview HTML reports.
+
+Parses embedded Vega-Lite spec variables and base64 PNG images from the
+Baysor preview.html file, writing MultiQC-compatible TSV and PNG files.
+"""
+
+import argparse
+import base64
+import html
+import json
+import re
+import sys
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+import pandas as pd
+from bs4 import BeautifulSoup
+
+
+def get_png_files(soup: BeautifulSoup, outdir: Path) -> None:
+    """Get png base64 images following specific h1 tags in preview.html"""
+    target_ids = ["Transcript_Plots", "Noise_Level"]
+    outdir.mkdir(parents=True, exist_ok=True)
+
+    for h1_id in target_ids:
+        h1_tag = soup.find("h1", id=h1_id)
+        if not h1_tag:
+            print(f"[WARN] No <h1> with id {h1_id} found")
+            continue
+
+        # Look for the first <img> after the h1 in the DOM
+        img_tag = h1_tag.find_next("img")
+        if not img_tag or not img_tag.get("src"):
+            print(f"[WARN] No <img> found after h1#{h1_id}")
+            continue
+
+        img_src = img_tag["src"]
+        if img_src.startswith("data:image/png;base64,"):
+            base64_data = img_src.split(",", 1)[1]
+            data = base64.b64decode(base64_data)
+        else:
+            print(f"[WARN] img src is not base64 PNG for h1#{h1_id}")
+            continue
+
+        # save png files with _mqc suffix for MultiQC integration
+        img_name = f"{h1_id}_mqc.png".lower()
+        out_path = outdir / img_name
+        with open(out_path, "wb") as f:
+            f.write(data)
+
+        print(f"[INFO] Saved {img_name}")
+
+    return None
+
+
+def extract_js_object(text: str, start_idx: int) -> Tuple[Optional[str], int]:
+    """Extract json-like object starting at start_idx."""
+    if start_idx >= len(text) or text[start_idx] != "{":
+        return None, start_idx
+
+    stack, in_str, escape, quote = [], False, False, None
+    for i in range(start_idx, len(text)):
+        ch = text[i]
+        if in_str:
+            if escape:
+                escape = False
+            elif ch == "\\":
+                escape = True
+            elif ch == quote:
+                in_str = False
+        else:
+            if ch in ('"', "'"):
+                in_str, quote = True, ch
+            elif ch == "{":
+                stack.append("{")
+            elif ch == "}":
+                stack.pop()
+                if not stack:
+                    return text[start_idx : i + 1], i + 1
+            elif ch == "/" and i + 1 < len(text):
+                # skip js comments
+                nxt = text[i + 1]
+                if nxt == "/":
+                    end = text.find("\n", i + 2)
+                    i = len(text) - 1 if end == -1 else end
+                elif nxt == "*":
+                    end = text.find("*/", i + 2)
+                    if end == -1:
+                        break
+                    i = end + 1
+
+    return None, start_idx
+
+
+def js_to_json(js: str) -> str:
+    """Convert a JS object string to valid JSON."""
+    # Remove comments
+    js = re.sub(r"/\*.*?\*/", "", js, flags=re.S)
+    js = re.sub(r"//[^\n]*", "", js)
+
+    # Convert single-quoted strings to double-quoted strings
+    js = re.sub(
+        r"'((?:\\.|[^'\\])*)'",
+        lambda m: '"' + m.group(1).replace('"', '\\"') + '"',
+        js,
+    )
+
+    # Remove trailing commas
+    js = re.sub(r",\s*(?=[}\]])", "", js)
+    js = re.sub(r",\s*,+", ",", js)
+
+    return js.strip()
+
+
+def find_variables(script_text: str) -> Dict[str, str]:
+    """Find all 'var|let|const specN =' declarations and extract their objects."""
+    specs: Dict[str, str] = {}
+    script_text = html.unescape(script_text)
+    pattern = re.compile(r"(?:var|let|const)\s+(spec\d+)\s*=\s*{", re.I)
+
+    for match in pattern.finditer(script_text):
+        var = match.group(1)
+        obj, _ = extract_js_object(script_text, match.end() - 1)
+        if obj:
+            specs[var] = obj
+        else:
+            print(f"[WARN] Could not extract object for {var}")
+    return specs
+
+
+def write_tsvs(specs: Dict[str, str], outdir: Path) -> List[Path]:
+    """Convert extracted json to tsv."""
+    outdir.mkdir(parents=True, exist_ok=True)
+    written: List[Path] = []
+
+    for var, js_obj in specs.items():
+        try:
+            data = json.loads(js_to_json(js_obj))
+            values = data.get("data", {}).get("values", [])
+            if not values:
+                print(f"[WARN] No data.values found in {var}")
+                continue
+
+            df = pd.DataFrame(values)
+            outpath = outdir / f"{var}_mqc.tsv"
+
+            with open(outpath, "w") as f:
+                f.write("# plot_type: linegraph\n")
+                f.write(f"# section_name: {var}\n")
+                f.write("# description: Extracted preview data\n")
+                df.to_csv(f, sep="\t", index=False)
+
+            written.append(outpath)
+            print(f"[INFO] Wrote {outpath} ({len(df)} rows × {len(df.columns)} cols)")
+        except Exception as e:
+            print(f"[ERROR] Failed to process {var}: {e}")
+
+    return written
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Extract preview data from Baysor preview HTML reports."
+    )
+    parser.add_argument(
+        "--preview-html",
+        required=True,
+        help="Path to Baysor preview HTML file",
+    )
+    parser.add_argument(
+        "--prefix",
+        required=True,
+        help="Output directory prefix (sample ID)",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    input_path: Path = Path(args.preview_html)
+    outdir: Path = Path(args.prefix)
+
+    text = input_path.read_text(encoding="utf-8", errors="ignore")
+    soup = BeautifulSoup(text, "html.parser")
+
+    # get the script section
+    if "<script" in text.lower():
+        script_text = "\n".join(s.get_text() for s in soup.find_all("script"))
+    else:
+        script_text = text
+
+    spec_variables = find_variables(script_text)
+    if not spec_variables:
+        print("[ERROR] No variables (spec1, spec2, spec3) found.")
+        sys.exit(1)
+
+    # write tsv files for multiqc
+    written = write_tsvs(spec_variables, outdir)
+    if not written:
+        print("[ERROR] No TSVs written.")
+        sys.exit(1)
+
+    # get png files
+    get_png_files(soup=soup, outdir=outdir)
diff --git a/modules/local/utility/get_coordinates/main.nf b/modules/local/utility/get_coordinates/main.nf
index e45d4798..2b672239 100644
--- a/modules/local/utility/get_coordinates/main.nf
+++ b/modules/local/utility/get_coordinates/main.nf
@@ -25,7 +25,7 @@ process GET_TRANSCRIPTS_COORDINATES {
     prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    python3 ${moduleDir}/templates/get_coordinates.py \\
+    utility_get_coordinates.py \\
         --transcripts ${transcripts}
     """
 
diff --git a/modules/local/utility/get_coordinates/resources/usr/bin/get_coordinates.py b/modules/local/utility/get_coordinates/resources/usr/bin/get_coordinates.py
new file mode 100755
index 00000000..8770f9a2
--- /dev/null
+++ b/modules/local/utility/get_coordinates/resources/usr/bin/get_coordinates.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+"""
+Get transcript coordinate bounds from a Parquet file.
+
+Reads a Xenium transcripts.parquet file and prints the coordinate
+bounding box (x_min, x_max, y_min, y_max) to stdout.
+"""
+
+import argparse
+
+import pandas as pd
+
+
+def get_coordinates(parquet_path: str):
+    """
+    Reads a Parquet file and returns (x_min, x_max, y_min, y_max)
+    based on the coordinate columns.
+
+    Args:
+        parquet_path: Path to transcripts.parquet
+    """
+
+    df = pd.read_parquet(parquet_path, engine="pyarrow")
+
+    # Identify coordinate columns
+    possible_x = [c for c in df.columns if "x" in c.lower()]
+    possible_y = [c for c in df.columns if "y" in c.lower()]
+
+    if not possible_x or not possible_y:
+        raise ValueError(
+            "Could not find coordinate columns (expected names like x_location, y_location)."
+        )
+
+    x_col, y_col = possible_x[0], possible_y[0]
+
+    return (
+        float(df[x_col].min()),
+        float(df[x_col].max()),
+        float(df[y_col].min()),
+        float(df[y_col].max()),
+    )
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Get transcript coordinate bounds from a Parquet file."
+    )
+    parser.add_argument(
+        "--transcripts",
+        required=True,
+        help="Path to transcripts parquet file",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    result = get_coordinates(args.transcripts)
+    print(",".join(str(v) for v in result))
diff --git a/modules/local/utility/parquet_to_csv/main.nf b/modules/local/utility/parquet_to_csv/main.nf
index 65d8e580..9c31fe41 100644
--- a/modules/local/utility/parquet_to_csv/main.nf
+++ b/modules/local/utility/parquet_to_csv/main.nf
@@ -25,7 +25,7 @@ process PARQUET_TO_CSV {
     prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    python3 ${moduleDir}/templates/parquet_to_csv.py \\
+    utility_parquet_to_csv.py \\
         --transcripts ${transcripts} \\
         --extension ${extension} \\
         --prefix ${prefix}
diff --git a/modules/local/utility/parquet_to_csv/resources/usr/bin/parquet_to_csv.py b/modules/local/utility/parquet_to_csv/resources/usr/bin/parquet_to_csv.py
new file mode 100755
index 00000000..bfa19c40
--- /dev/null
+++ b/modules/local/utility/parquet_to_csv/resources/usr/bin/parquet_to_csv.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+"""
+Convert a Parquet file to CSV format.
+
+Reads a Parquet file and writes it as CSV, optionally gzip-compressed.
+"""
+
+import argparse
+from pathlib import Path
+
+import pandas as pd
+
+
+def convert_parquet(
+    transcripts: str,
+    extension: str = ".csv",
+    prefix: str = "",
+) -> None:
+    """
+    Convert a Parquet file to CSV or CSV.GZ format.
+
+    Args:
+        transcripts: Filename of the input parquet file
+        extension: Output extension ('.csv' or '.gz' for gzip)
+        prefix: Output directory prefix
+    """
+    df = pd.read_parquet(transcripts, engine="pyarrow")
+
+    Path(prefix).mkdir(parents=True, exist_ok=True)
+
+    if extension == ".gz":
+        output = transcripts.replace(".parquet", ".csv.gz")
+        df.to_csv(f"{prefix}/{output}", compression="gzip", index=False)
+    else:
+        output = transcripts.replace(".parquet", ".csv")
+        df.to_csv(f"{prefix}/{output}", index=False)
+
+    return None
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Convert a Parquet file to CSV format."
+    )
+    parser.add_argument(
+        "--transcripts",
+        required=True,
+        help="Input parquet filename",
+    )
+    parser.add_argument(
+        "--extension",
+        default=".csv",
+        help="Output extension: '.csv' or '.gz' (default: .csv)",
+    )
+    parser.add_argument(
+        "--prefix",
+        required=True,
+        help="Output directory prefix (sample ID)",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    convert_parquet(
+        transcripts=args.transcripts,
+        extension=args.extension,
+        prefix=args.prefix,
+    )
diff --git a/modules/local/utility/resize_tif/main.nf b/modules/local/utility/resize_tif/main.nf
index a4989d68..35685b7c 100644
--- a/modules/local/utility/resize_tif/main.nf
+++ b/modules/local/utility/resize_tif/main.nf
@@ -26,7 +26,7 @@ process RESIZE_TIF {
     prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    python3 ${moduleDir}/templates/resize_tif.py \\
+    utility_resize_tif.py \\
         --mask ${mask} \\
         --transcripts ${transcripts} \\
         --metadata ${metadata} \\
diff --git a/modules/local/utility/resize_tif/resources/usr/bin/resize_tif.py b/modules/local/utility/resize_tif/resources/usr/bin/resize_tif.py
new file mode 100755
index 00000000..6cca640d
--- /dev/null
+++ b/modules/local/utility/resize_tif/resources/usr/bin/resize_tif.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+"""
+Resize a segmentation TIFF mask to match transcript coordinates.
+
+This script rescales a segmentation mask image to match the coordinate
+space of Xenium transcript data using microns-per-pixel metadata.
+"""
+
+import argparse
+import json
+import os
+from typing import Tuple
+
+import numpy as np
+import pandas as pd
+import tifffile
+from skimage.transform import resize
+
+
+def read_mask(mask_path: str) -> np.ndarray:
+    """Read the segmentation mask from a TIFF file."""
+    print(f"Reading mask: {mask_path}")
+    mask = tifffile.imread(mask_path)
+    print(f"Mask shape: {mask.shape}, dtype: {mask.dtype}")
+    return mask
+
+
+def read_transcript_bounds(transcript_path: str) -> Tuple[float, float, float, float]:
+    """Read transcript coordinates and return their bounding box."""
+    print(f"Reading transcripts: {transcript_path}")
+    if transcript_path.endswith(".parquet"):
+        transcripts = pd.read_parquet(transcript_path, columns=["x_location", "y_location"])
+    else:
+        transcripts = pd.read_csv(transcript_path)
+
+    if "x_location" not in transcripts.columns or "y_location" not in transcripts.columns:
+        raise ValueError("Transcript file must contain 'x_location' and 'y_location' columns.")
+
+    x_min, x_max = transcripts["x_location"].min(), transcripts["x_location"].max()
+    y_min, y_max = transcripts["y_location"].min(), transcripts["y_location"].max()
+
+    print(f"Transcript bounds: X=({x_min:.2f}, {x_max:.2f}), Y=({y_min:.2f}, {y_max:.2f})")
+    return x_min, x_max, y_min, y_max
+
+
+def read_microns_per_pixel(metadata_path: str) -> float:
+    """Extract microns_per_pixel or pixel_size from metadata JSON."""
+    print(f"Reading metadata: {metadata_path}")
+    with open(metadata_path, "r") as f:
+        metadata = json.load(f)
+
+    mpp = metadata.get("microns_per_pixel") or metadata.get("pixel_size")
+    if mpp is None:
+        raise KeyError("Metadata JSON must contain 'microns_per_pixel' or 'pixel_size'.")
+
+    print(f"Microns per pixel: {mpp}")
+    return float(mpp)
+
+
+def compute_target_size(
+    x_min: float, x_max: float, y_min: float, y_max: float, microns_per_pixel: float
+) -> Tuple[int, int]:
+    """Compute new image size (in pixels) to cover given coordinates."""
+    new_width = int(round((x_max - x_min) / microns_per_pixel))
+    new_height = int(round((y_max - y_min) / microns_per_pixel))
+    print(f"Target image size: {new_width} x {new_height} pixels")
+    return new_height, new_width
+
+
+def resize_mask(mask: np.ndarray, new_shape: Tuple[int, int]) -> np.ndarray:
+    """Resize mask using nearest-neighbor interpolation (preserve labels)."""
+    print("Resizing mask...")
+    resized = resize(
+        mask,
+        new_shape,
+        order=0,  # nearest neighbor to preserve segmentation labels
+        preserve_range=True,
+        anti_aliasing=False,
+    ).astype(mask.dtype)
+    print(f"Resized shape: {resized.shape}")
+    return resized
+
+
+def main(mask_path: str, transcripts_path: str, metadata_path: str, output_path: str) -> None:
+    """Resize segmentation mask to match Xenium coordinate space."""
+    # Validate input files
+    for path in [mask_path, transcripts_path, metadata_path]:
+        if not os.path.exists(path):
+            raise FileNotFoundError(f"File not found: {path}")
+
+    # Load data
+    mask = read_mask(mask_path)
+    x_min, x_max, y_min, y_max = read_transcript_bounds(transcripts_path)
+    microns_per_pixel = read_microns_per_pixel(metadata_path)
+
+    # Compute physical mask size
+    height, width = mask.shape
+    print(f"Original mask size: {width * microns_per_pixel:.2f} x {height * microns_per_pixel:.2f} um")
+
+    # Compute target size
+    new_height, new_width = compute_target_size(x_min, x_max, y_min, y_max, microns_per_pixel)
+
+    # Resize and save
+    resized_mask = resize_mask(mask, (new_height, new_width))
+    tifffile.imwrite(output_path, resized_mask)
+
+    print(f"Saved resized mask -> {output_path}")
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Resize a segmentation TIFF mask to match transcript coordinates."
+    )
+    parser.add_argument("--mask", required=True, help="Path to segmentation mask TIFF")
+    parser.add_argument("--transcripts", required=True, help="Path to transcripts file")
+    parser.add_argument("--metadata", required=True, help="Path to metadata JSON")
+    parser.add_argument("--prefix", required=True, help="Output directory prefix")
+    parser.add_argument("--mask-filename", required=True, help="Original mask filename for output naming")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    os.makedirs(args.prefix, exist_ok=True)
+    output_mask: str = os.path.join(args.prefix, f"resized_{args.mask_filename}.tif")
+
+    main(
+        mask_path=args.mask,
+        transcripts_path=args.transcripts,
+        metadata_path=args.metadata,
+        output_path=output_mask,
+    )
diff --git a/modules/local/utility/segger2xr/main.nf b/modules/local/utility/segger2xr/main.nf
index daec2fba..073748d7 100644
--- a/modules/local/utility/segger2xr/main.nf
+++ b/modules/local/utility/segger2xr/main.nf
@@ -27,7 +27,7 @@ process SEGGER2XR {
     def min_transcripts = task.ext.min_transcripts_per_cell ?: 3
 
     """
-    python3 ${moduleDir}/templates/segger2xr.py \\
+    utility_segger2xr.py \\
         --transcripts ${transcripts} \\
         --prefix ${meta.id} \\
         --min-transcripts ${min_transcripts}
diff --git a/modules/local/utility/segger2xr/resources/usr/bin/segger2xr.py b/modules/local/utility/segger2xr/resources/usr/bin/segger2xr.py
new file mode 100755
index 00000000..22889e82
--- /dev/null
+++ b/modules/local/utility/segger2xr/resources/usr/bin/segger2xr.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+"""
+Convert Segger prediction output to XeniumRanger-compatible format.
+
+Reads Segger PREDICT output (transcripts.parquet with segger_cell_id),
+produces Baysor-format segmentation CSV, refined transcripts parquet,
+and GeoJSON cell boundary polygons for xeniumranger import-segmentation.
+"""
+
+import argparse
+import json
+from pathlib import Path
+from typing import List
+
+import pandas as pd
+from scipy.spatial import ConvexHull
+
+# Expected columns in transcripts.parquet
+REQUIRED_COLUMNS: List[str] = [
+    "transcript_id",
+    "cell_id",
+    "overlaps_nucleus",
+    "feature_name",
+    "x_location",
+    "y_location",
+    "z_location",
+    "qv",
+]
+
+# Column name for segger cell assignment (varies by segger version)
+SEGGER_ID_CANDIDATES: List[str] = ["segger_cell_id", "segger_id"]
+
+
+def refine_transcripts(parquet_path: str) -> pd.DataFrame:
+    """
+    Read segger PREDICT output and extract cell assignments.
+    Supports both 'segger_cell_id' (newer) and 'segger_id' (older) column names.
+    """
+    parquet_file = Path(parquet_path)
+    if not parquet_file.exists():
+        raise FileNotFoundError(f"File not found: {parquet_path}")
+
+    df = pd.read_parquet(parquet_file, engine="pyarrow")
+
+    missing_cols = [col for col in REQUIRED_COLUMNS if col not in df.columns]
+    if missing_cols:
+        raise ValueError(f"Missing required columns: {missing_cols}")
+
+    # Find segger cell assignment column
+    segger_col = None
+    for candidate in SEGGER_ID_CANDIDATES:
+        if candidate in df.columns:
+            segger_col = candidate
+            break
+    if segger_col is None:
+        raise ValueError(
+            f"No segger cell assignment column found. "
+            f"Expected one of {SEGGER_ID_CANDIDATES}, got columns: {list(df.columns)}"
+        )
+
+    # Replace cell_id with segger assignment
+    cell_id_index = df.columns.get_loc("cell_id")
+    df = df.drop(columns=["cell_id"])
+    segger_series = df.pop(segger_col)
+    df.insert(cell_id_index, "cell_id", segger_series)
+
+    return df
+
+
+def build_cell_map(df: pd.DataFrame, min_transcripts: int = 3) -> dict:
+    """
+    Build a mapping from raw segger cell IDs to non-numeric string IDs.
+
+    Only includes cells that have:
+    - >= min_transcripts assigned transcripts
+    - At least one transcript with valid (non-NaN) x/y coordinates
+
+    Cell IDs use "cell-N" format (hyphen + integer) as required by
+    xeniumranger's cell ID parser. Non-numeric to avoid polars Int64 inference.
+    """
+    cell_ids = df["cell_id"].fillna("UNASSIGNED").astype(str)
+    is_unassigned = (cell_ids == "UNASSIGNED") | (cell_ids == "") | (cell_ids == "0")
+    assigned = cell_ids[~is_unassigned]
+    counts = assigned.value_counts()
+    enough_tx = set(counts[counts >= min_transcripts].index)
+
+    # Exclude cells with all-NaN coordinates (no spatial info = useless)
+    has_coords = df.dropna(subset=["x_location", "y_location"])
+    has_coords_ids = set(has_coords["cell_id"].fillna("UNASSIGNED").astype(str))
+    valid_cells = sorted(enough_tx & has_coords_ids)
+
+    return {cell: f"cell-{i + 1}" for i, cell in enumerate(valid_cells)}
+
+
+def to_baysor_csv(df: pd.DataFrame, output_path: str, cell_map: dict) -> None:
+    """
+    Convert transcript DataFrame to Baysor-compatible CSV format.
+
+    xeniumranger 4.0 import-segmentation --transcript-assignment expects a
+    Baysor segmentation CSV with at minimum: transcript_id, cell, is_noise,
+    x, y columns. This function maps Xenium/Segger columns to Baysor format.
+    """
+    baysor_df = pd.DataFrame()
+    baysor_df["transcript_id"] = df["transcript_id"]
+    baysor_df["x"] = df["x_location"]
+    baysor_df["y"] = df["y_location"]
+    baysor_df["z"] = df["z_location"]
+    baysor_df["gene"] = df["feature_name"]
+
+    cell_ids = df["cell_id"].fillna("UNASSIGNED").astype(str)
+    is_unassigned = (cell_ids == "UNASSIGNED") | (cell_ids == "") | (cell_ids == "0")
+    baysor_df["cell"] = cell_ids.map(cell_map).fillna("")
+    baysor_df["is_noise"] = is_unassigned.astype(int)
+
+    baysor_df.to_csv(output_path, index=False)
+
+    n_assigned = (~is_unassigned).sum()
+    n_noise = is_unassigned.sum()
+    n_cells = len(cell_map)
+    print(
+        f"Baysor CSV: {n_assigned} assigned, {n_noise} noise, {n_cells} cells -> {output_path}"
+    )
+
+
+def _make_buffer_polygon(cx: float, cy: float, radius: float = 0.5) -> list:
+    """Create a small square polygon around a centroid as fallback."""
+    return [
+        [cx - radius, cy - radius],
+        [cx + radius, cy - radius],
+        [cx + radius, cy + radius],
+        [cx - radius, cy + radius],
+        [cx - radius, cy - radius],  # close ring
+    ]
+
+
+def generate_viz_polygons(df: pd.DataFrame, output_path: str, cell_map: dict) -> None:
+    """
+    Generate a GeoJSON file with cell boundary polygons.
+
+    Uses ConvexHull when possible; falls back to a small buffer polygon around
+    the centroid for cells with < 3 unique points or collinear points.
+
+    Required by xeniumranger import-segmentation when using --transcript-assignment.
+    Each feature MUST have a top-level "id" field (xeniumranger reads item["id"]).
+    Cell IDs must match those in the Baysor CSV.
+    """
+    assigned = df[
+        df["cell_id"].notna()
+        & (df["cell_id"].astype(str) != "UNASSIGNED")
+        & (df["cell_id"].astype(str) != "")
+    ].copy()
+
+    features = []
+    grouped = assigned.groupby("cell_id")
+
+    for cell_id, group in grouped:
+        mapped_id = cell_map.get(str(cell_id))
+        if mapped_id is None:
+            continue
+
+        coords = group[["x_location", "y_location"]].dropna().values
+
+        polygon_coords = None
+        if len(coords) >= 3:
+            try:
+                hull = ConvexHull(coords)
+                hull_points = coords[hull.vertices].tolist()
+                hull_points.append(hull_points[0])  # close polygon ring
+                polygon_coords = hull_points
+            except Exception:
+                pass
+
+        # Fallback: buffer polygon around centroid
+        if polygon_coords is None:
+            cx, cy = coords.mean(axis=0).astype(float)
+            polygon_coords = _make_buffer_polygon(cx, cy)
+
+        features.append(
+            {
+                "type": "Feature",
+                "id": mapped_id,
+                "geometry": {
+                    "type": "Polygon",
+                    "coordinates": [polygon_coords],
+                },
+                "properties": {"cell_id": mapped_id},
+            }
+        )
+
+    geojson = {"type": "FeatureCollection", "features": features}
+
+    with open(output_path, "w") as f:
+        json.dump(geojson, f)
+
+    print(f"Generated {len(features)} cell polygons in {output_path}")
+
+
+def main(input_file: str, prefix: str, min_transcripts: int = 3) -> None:
+    """Run the full segger-to-xeniumranger conversion pipeline."""
+    Path(prefix).mkdir(parents=True, exist_ok=True)
+    transcripts = refine_transcripts(input_file)
+
+    # Build cell ID mapping, filtering cells with < min_transcripts
+    cell_map = build_cell_map(transcripts, min_transcripts=min_transcripts)
+
+    # xeniumranger 4.0 expects Baysor-format CSV (not parquet) with is_noise column
+    to_baysor_csv(transcripts, f"{prefix}/segmentation.csv", cell_map)
+
+    # Also save the refined parquet for downstream use
+    transcripts.to_parquet(f"{prefix}/transcripts.parquet", engine="pyarrow")
+
+    # Generate cell boundary polygons (required companion to --transcript-assignment)
+    # Uses ConvexHull when possible; falls back to buffer polygon for edge cases
+    generate_viz_polygons(transcripts, f"{prefix}/segmentation_polygons.json", cell_map)
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Convert Segger prediction output to XeniumRanger-compatible format."
+    )
+    parser.add_argument(
+        "--transcripts",
+        required=True,
+        help="Path to Segger output transcripts parquet file",
+    )
+    parser.add_argument(
+        "--prefix",
+        required=True,
+        help="Output directory prefix (sample ID)",
+    )
+    parser.add_argument(
+        "--min-transcripts",
+        type=int,
+        default=3,
+        help="Minimum transcripts per cell (default: 3)",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(
+        input_file=args.transcripts,
+        prefix=args.prefix,
+        min_transcripts=args.min_transcripts,
+    )
diff --git a/modules/local/utility/split_transcripts/main.nf b/modules/local/utility/split_transcripts/main.nf
index 6862643a..f7057e31 100644
--- a/modules/local/utility/split_transcripts/main.nf
+++ b/modules/local/utility/split_transcripts/main.nf
@@ -26,7 +26,7 @@ process SPLIT_TRANSCRIPTS {
     def prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    python3 ${moduleDir}/templates/split_transcripts.py \\
+    utility_split_transcripts.py \\
         --transcripts ${transcripts} \\
         --x-bins ${x_bins} \\
         --y-bins ${y_bins} \\
diff --git a/modules/local/utility/split_transcripts/resources/usr/bin/split_transcripts.py b/modules/local/utility/split_transcripts/resources/usr/bin/split_transcripts.py
new file mode 100755
index 00000000..275fbab1
--- /dev/null
+++ b/modules/local/utility/split_transcripts/resources/usr/bin/split_transcripts.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+"""
+Split transcript coordinates into spatial tiles.
+
+Reads a Xenium transcripts.parquet file and computes quantile-based spatial
+tiles, writing a splits.csv with tile boundaries.
+"""
+
+import argparse
+import os
+from typing import List
+
+import pandas as pd
+
+
+def compute_quantile_ranges(df: pd.DataFrame, col: str, n_bins: int) -> List:
+    """
+    Compute the bin edges for `df[col]` such that each of the n_bins
+    has ~equal count of points. Returns a list of (min, max) tuples.
+    """
+    _, bins = pd.qcut(df[col], q=n_bins, retbins=True, duplicates="drop")
+
+    ranges = [(bins[i], bins[i + 1]) for i in range(len(bins) - 1)]
+
+    return ranges
+
+
+def make_tiles(df: pd.DataFrame, x_bins: int, y_bins: int) -> pd.DataFrame:
+    """
+    Produce a DataFrame with one row per tile:
+      tile_id, x_min, x_max, y_min, y_max
+    """
+    x_ranges = compute_quantile_ranges(df, "x_location", x_bins)
+    y_ranges = compute_quantile_ranges(df, "y_location", y_bins)
+
+    tiles = []
+    for ix, (x_min, x_max) in enumerate(x_ranges, start=1):
+        for iy, (y_min, y_max) in enumerate(y_ranges, start=1):
+            tiles.append(
+                {
+                    "tile_id": f"{ix}_{iy}",
+                    "x_min": x_min,
+                    "x_max": x_max,
+                    "y_min": y_min,
+                    "y_max": y_max,
+                }
+            )
+
+    return pd.DataFrame(tiles)
+
+
+def main(
+    transcripts: str,
+    x_bins: int = 10,
+    y_bins: int = 10,
+    prefix: str = "",
+) -> None:
+    """Generate spatial tile splits from transcript coordinates."""
+    # read parquet file
+    df = pd.read_parquet(transcripts, engine="fastparquet")
+
+    # compute tiles
+    tiles_df = make_tiles(df, x_bins, y_bins)
+
+    # save csv file
+    os.makedirs(prefix, exist_ok=True)
+    tiles_df.to_csv(f"{prefix}/splits.csv", index=False)
+
+    return None
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Split transcript coordinates into spatial tiles."
+    )
+    parser.add_argument(
+        "--transcripts",
+        required=True,
+        help="Path to transcripts parquet file",
+    )
+    parser.add_argument(
+        "--x-bins",
+        type=int,
+        required=True,
+        help="Number of bins along X axis",
+    )
+    parser.add_argument(
+        "--y-bins",
+        type=int,
+        required=True,
+        help="Number of bins along Y axis",
+    )
+    parser.add_argument(
+        "--prefix",
+        required=True,
+        help="Output directory prefix",
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(
+        transcripts=args.transcripts,
+        x_bins=args.x_bins,
+        y_bins=args.y_bins,
+        prefix=args.prefix,
+    )
diff --git a/modules/local/utility/upscale_mask/main.nf b/modules/local/utility/upscale_mask/main.nf
index f03464a8..246290fc 100644
--- a/modules/local/utility/upscale_mask/main.nf
+++ b/modules/local/utility/upscale_mask/main.nf
@@ -35,7 +35,7 @@ process UPSCALE_MASK {
     script:
     prefix = task.ext.prefix ?: "${meta.id}"
     """
-    python3 ${moduleDir}/templates/upscale_mask.py \\
+    utility_upscale_mask.py \\
         --mask ${mask} \\
         --scale-info ${scale_info} \\
         --prefix ${prefix}
diff --git a/modules/local/xenium_patch/stitch/main.nf b/modules/local/xenium_patch/stitch/main.nf
index db687a9a..c674a409 100644
--- a/modules/local/xenium_patch/stitch/main.nf
+++ b/modules/local/xenium_patch/stitch/main.nf
@@ -35,14 +35,14 @@ process XENIUM_PATCH_STITCH {
     script:
     def args = task.ext.args ?: ''
     """
-    python3 ${moduleDir}/templates/stitch_transcripts.py \\
+    xenium_patch_stitch_transcripts.py \\
         --patches ${patches} \\
         --output output \\
         ${args}
 
     # Post-process: ensure all GeoJSON geometries are Polygon and
     # reconcile dropped cells in the transcript CSV.
-    python3 ${moduleDir}/templates/stitch_postprocess.py \\
+    xenium_patch_stitch_postprocess.py \\
         --geojson output/xr-cell-polygons.geojson \\
         --csv output/xr-transcript-metadata.csv
     """
diff --git a/modules/local/xenium_patch/stitch/resources/usr/bin/stitch_transcripts.py b/modules/local/xenium_patch/stitch/resources/usr/bin/stitch_transcripts.py
new file mode 100755
index 00000000..d9fb8d41
--- /dev/null
+++ b/modules/local/xenium_patch/stitch/resources/usr/bin/stitch_transcripts.py
@@ -0,0 +1,808 @@
+#!/usr/bin/env python3
+"""Stitch per-patch Baysor segmentation results into unified output.
+
+Standalone script that replaces the xenium_patch CLI package's stitch
+functionality. Uses sopa's solve_conflicts() for overlap resolution.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
+from pathlib import Path
+
+import geopandas as gpd
+import numpy as np
+import pyarrow as pa
+import pyarrow.compute as pc
+import pyarrow.csv as pa_csv
+import shapely
+from shapely.affinity import translate
+from shapely.geometry import mapping, shape
+from sopa.segmentation.resolve import solve_conflicts
+
+# ---------------------------------------------------------------------------
+# Geometry helpers
+# ---------------------------------------------------------------------------
+
+
+def _ensure_polygon(geom) -> "shapely.Polygon | None":
+    """Extract a single Polygon from any geometry, or return None.
+
+    XeniumRanger only accepts Polygon. make_valid() and solve_conflicts
+    can produce MultiPolygon, GeometryCollection, MultiLineString, etc.
+    """
+    if geom is None or geom.is_empty:
+        return None
+    if geom.geom_type == "Polygon":
+        return geom
+    if geom.geom_type == "MultiPolygon":
+        return max(geom.geoms, key=lambda g: g.area)
+    if geom.geom_type == "GeometryCollection":
+        polys = [g for g in geom.geoms if g.geom_type == "Polygon"]
+        return max(polys, key=lambda g: g.area) if polys else None
+    # LineString, MultiLineString, Point, etc. — not a polygon
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Inline types (from _types.py)
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class Bounds:
+    """Axis-aligned bounding box in either pixel or micron coordinates."""
+
+    x_min: float
+    x_max: float
+    y_min: float
+    y_max: float
+
+
+@dataclass(frozen=True)
+class PatchInfo:
+    """Metadata for a single patch in the grid."""
+
+    patch_id: str
+    row: int
+    col: int
+    global_bounds_px: Bounds
+    global_bounds_um: Bounds
+    core_bounds_px: Bounds
+    core_bounds_um: Bounds
+
+
+@dataclass
+class PatchGridMetadata:
+    """Full grid metadata, serializable to JSON."""
+
+    version: str
+    bundle_path: str
+    image_height_px: int
+    image_width_px: int
+    pixel_size_um: float
+    transcript_extent_um: Bounds
+    grid_rows: int
+    grid_cols: int
+    overlap_um: float
+    overlap_px: int
+    patches: list[PatchInfo]
+    grid_type: str = "uniform"
+
+
+# ---------------------------------------------------------------------------
+# Internal result containers
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class _PatchGeoResult:
+    """Result of parallel GeoJSON processing for a single patch."""
+
+    features: list[dict]
+    cell_ids: list[str]
+
+
+@dataclass
+class _PatchCsvResult:
+    """Result of parallel CSV reading for a single patch."""
+
+    table: pa.Table
+    has_cell_col: bool
+    has_x_col: bool
+    has_y_col: bool
+    has_gene_col: bool = False
+    has_feature_name_col: bool = False
+
+
+# ---------------------------------------------------------------------------
+# Grid metadata I/O (from grid.py)
+# ---------------------------------------------------------------------------
+
+
+def _dict_to_bounds(d: dict) -> Bounds:
+    return Bounds(d["x_min"], d["x_max"], d["y_min"], d["y_max"])
+
+
+def load_grid_metadata(input_path: Path) -> PatchGridMetadata:
+    """Deserialize PatchGridMetadata from JSON.
+
+    Args:
+        input_path: Path to JSON file to read.
+
+    Returns:
+        Reconstructed PatchGridMetadata.
+    """
+    with open(input_path) as f:
+        data = json.load(f)
+
+    patches = [
+        PatchInfo(
+            patch_id=p["patch_id"],
+            row=p["row"],
+            col=p["col"],
+            global_bounds_px=_dict_to_bounds(p["global_bounds_px"]),
+            global_bounds_um=_dict_to_bounds(p["global_bounds_um"]),
+            core_bounds_px=_dict_to_bounds(p["core_bounds_px"]),
+            core_bounds_um=_dict_to_bounds(p["core_bounds_um"]),
+        )
+        for p in data["patches"]
+    ]
+
+    return PatchGridMetadata(
+        version=data["version"],
+        bundle_path=data["bundle_path"],
+        image_height_px=data["image_height_px"],
+        image_width_px=data["image_width_px"],
+        pixel_size_um=data["pixel_size_um"],
+        transcript_extent_um=_dict_to_bounds(data["transcript_extent_um"]),
+        grid_rows=data["grid_rows"],
+        grid_cols=data["grid_cols"],
+        overlap_um=data["overlap_um"],
+        overlap_px=data["overlap_px"],
+        grid_type=data.get("grid_type", "uniform"),
+        patches=patches,
+    )
+
+
+# ---------------------------------------------------------------------------
+# GeoJSON I/O (from polygon_io.py)
+# ---------------------------------------------------------------------------
+
+
+def _normalize_geometry_collection(geojson: dict) -> dict:
+    """Convert a GeometryCollection to a FeatureCollection.
+
+    proseg-to-baysor produces a non-standard GeoJSON GeometryCollection where
+    each geometry object has a custom ``cell`` key (bare integer) instead of
+    using Feature wrappers. This normalises it to a standard FeatureCollection
+    with ``id`` and ``properties.cell_id`` on each feature, using the
+    ``"cell-{N}"`` format that matches the companion CSV.
+
+    Args:
+        geojson: Parsed GeoJSON dict with type GeometryCollection.
+
+    Returns:
+        Standard FeatureCollection dict.
+    """
+    features = []
+    for geom in geojson.get("geometries", []):
+        cell_raw = geom.get("cell", "")
+        cell_id = str(cell_raw)
+        clean_geom = {k: v for k, v in geom.items() if k != "cell"}
+        feature = {
+            "type": "Feature",
+            "id": cell_id,
+            "geometry": clean_geom,
+            "properties": {"cell_id": cell_id},
+        }
+        features.append(feature)
+    return {"type": "FeatureCollection", "features": features}
+
+
+def read_geojson(geojson_path: Path) -> dict:
+    """Read a GeoJSON file and normalise to FeatureCollection.
+
+    Handles both standard FeatureCollections and the GeometryCollection
+    format produced by proseg-to-baysor.
+
+    Args:
+        geojson_path: Path to the GeoJSON file.
+
+    Returns:
+        Parsed GeoJSON dict (always a FeatureCollection).
+    """
+    with open(geojson_path) as f:
+        data = json.load(f)
+    if data.get("type") == "GeometryCollection":
+        return _normalize_geometry_collection(data)
+    return data
+
+
+def transform_polygons(geojson: dict, offset_x: float, offset_y: float) -> dict:
+    """Shift all polygon coordinates by (offset_x, offset_y).
+
+    Args:
+        geojson: Input FeatureCollection.
+        offset_x: Translation in x.
+        offset_y: Translation in y.
+
+    Returns:
+        New FeatureCollection with shifted geometries.
+    """
+    features = []
+    for feat in geojson.get("features", []):
+        geom = shape(feat["geometry"])
+        shifted = translate(geom, xoff=offset_x, yoff=offset_y)
+        new_feat = {**feat, "geometry": mapping(shifted)}
+        features.append(new_feat)
+    return {"type": "FeatureCollection", "features": features}
+
+
+def write_geojson(geojson: dict, output_path: Path) -> None:
+    """Write a GeoJSON FeatureCollection.
+
+    Args:
+        geojson: GeoJSON dict to write.
+        output_path: Destination path (parent dirs created automatically).
+    """
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, "w") as f:
+        json.dump(geojson, f)
+
+
+# ---------------------------------------------------------------------------
+# Arrow utilities (from _arrow_utils.py)
+# ---------------------------------------------------------------------------
+
+
+def float_str_array(f64_array: pa.Array) -> pa.Array:
+    """Convert a float64 pyarrow array to string using Python's str(float) format.
+
+    pyarrow's built-in cast omits trailing '.0' for whole numbers. This
+    function ensures output matches str(float(...)) for CSV compatibility.
+
+    Args:
+        f64_array: Float64 pyarrow array to convert.
+
+    Returns:
+        String pyarrow array with Python-formatted float values.
+    """
+    return pa.array(
+        [str(v) if v is not None else None for v in f64_array.to_pylist()],
+        type=pa.string(),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Parallel I/O
+# ---------------------------------------------------------------------------
+
+
+def _read_and_transform_geojson(
+    patch: PatchInfo,
+    patches_dir: Path,
+    geojson_filename: str,
+) -> _PatchGeoResult | None:
+    """Read, transform GeoJSON for a single patch (no core clipping).
+
+    Args:
+        patch: Patch metadata.
+        patches_dir: Root patches directory.
+        geojson_filename: GeoJSON filename within each patch directory.
+
+    Returns:
+        _PatchGeoResult with features and cell IDs, or None if no GeoJSON.
+    """
+    geojson_path = patches_dir / patch.patch_id / geojson_filename
+    if not geojson_path.exists():
+        return None
+
+    geojson = read_geojson(geojson_path)
+
+    offset_x = patch.global_bounds_um.x_min
+    offset_y = patch.global_bounds_um.y_min
+    geojson = transform_polygons(geojson, offset_x, offset_y)
+
+    features = geojson.get("features", [])
+    seen: set[str] = set()
+    cell_ids: list[str] = []
+    for feat in features:
+        old_id = str(feat.get("id", feat.get("properties", {}).get("cell_id", "")))
+        if old_id not in seen:
+            seen.add(old_id)
+            cell_ids.append(old_id)
+
+    return _PatchGeoResult(features=features, cell_ids=cell_ids)
+
+
+def _read_patch_csv(
+    patch: PatchInfo,
+    patches_dir: Path,
+    csv_filename: str,
+) -> _PatchCsvResult | None:
+    """Read a patch CSV into a pyarrow Table.
+
+    All columns are read as strings to preserve exact formatting.
+
+    Args:
+        patch: Patch metadata.
+        patches_dir: Root patches directory.
+        csv_filename: CSV filename within each patch directory.
+
+    Returns:
+        _PatchCsvResult with the table and column presence flags, or None.
+    """
+    csv_path = patches_dir / patch.patch_id / csv_filename
+    if not csv_path.exists():
+        return None
+
+    with open(csv_path) as fh:
+        header_line = fh.readline().strip()
+    col_names = header_line.split(",")
+    all_string_types = {name: pa.string() for name in col_names}
+
+    table = pa_csv.read_csv(
+        csv_path,
+        convert_options=pa_csv.ConvertOptions(
+            column_types=all_string_types,
+            strings_can_be_null=False,
+        ),
+        read_options=pa_csv.ReadOptions(use_threads=True),
+    )
+
+    return _PatchCsvResult(
+        table=table,
+        has_cell_col="cell" in table.column_names,
+        has_x_col="x" in table.column_names,
+        has_y_col="y" in table.column_names,
+        has_gene_col="gene" in table.column_names,
+        has_feature_name_col="feature_name" in table.column_names,
+    )
+
+
+# ---------------------------------------------------------------------------
+# CSV processing
+# ---------------------------------------------------------------------------
+
+
+def _transform_patch_coords(
+    csv_result: _PatchCsvResult,
+    offset_x: float,
+    offset_y: float,
+) -> pa.Table:
+    """Shift transcript coordinates from local patch space to global space.
+
+    Args:
+        csv_result: The raw CSV table and column flags.
+        offset_x: X offset for coordinate transform (microns).
+        offset_y: Y offset for coordinate transform (microns).
+
+    Returns:
+        Table with x, y columns shifted to global coordinates.
+    """
+    table = csv_result.table
+
+    if table.num_rows == 0:
+        return table
+
+    if csv_result.has_x_col:
+        x_f64 = pc.add(
+            table.column("x").cast(pa.float64()),
+            pa.scalar(offset_x, type=pa.float64()),
+        )
+        table = table.set_column(
+            table.schema.get_field_index("x"),
+            "x",
+            float_str_array(x_f64),
+        )
+    if csv_result.has_y_col:
+        y_f64 = pc.add(
+            table.column("y").cast(pa.float64()),
+            pa.scalar(offset_y, type=pa.float64()),
+        )
+        table = table.set_column(
+            table.schema.get_field_index("y"),
+            "y",
+            float_str_array(y_f64),
+        )
+
+    return table
+
+
+# ---------------------------------------------------------------------------
+# Sopa conflict resolution
+# ---------------------------------------------------------------------------
+
+
+def _stitch_sopa_resolve(
+    metadata: PatchGridMetadata,
+    geo_results: list[_PatchGeoResult | None],
+    csv_results: list[_PatchCsvResult | None],
+    all_geojson_features: list[dict],
+    all_tables: list[pa.Table],
+    threshold: float = 0.5,
+) -> set[str]:
+    """Stitch per-patch segmentation using spatial containment assignment.
+
+    1. Collect ALL non-empty polygons from all patches (no transcript filtering).
+    2. Resolve overlapping polygons via sopa's solve_conflicts().
+    3. Assign sequential global cell IDs (cell-1, cell-2, ...).
+    4. Spatially assign transcripts to resolved polygons using STRtree.
+    5. Noise transcripts (outside all polygons) kept only from their core patch.
+
+    This approach works regardless of whether Baysor's CSV ``cell`` column
+    matches GeoJSON cell IDs -- all assignment is done by spatial containment.
+
+    Args:
+        metadata: Grid metadata with patch list.
+        geo_results: Per-patch GeoJSON results (already in global coords).
+        csv_results: Per-patch CSV results.
+        all_geojson_features: Output list to append resolved GeoJSON features.
+        all_tables: Output list to append processed CSV tables.
+        threshold: Overlap threshold for sopa's solve_conflicts (0-1).
+
+    Returns:
+        Set of global cell IDs created by merging overlapping cells.
+    """
+    # --- Phase 1: Collect all polygons from all patches ---
+    all_polygons: list = []
+    patch_indices_list: list[int] = []
+
+    for i, patch in enumerate(metadata.patches):
+        geo_result = geo_results[i]
+        if geo_result is None:
+            continue
+
+        for feat in geo_result.features:
+            polygon = shape(feat["geometry"])
+            if polygon.is_empty:
+                continue
+            if not polygon.is_valid:
+                polygon = shapely.make_valid(polygon)
+            # Ensure we have a single Polygon (xeniumranger rejects all else)
+            polygon = _ensure_polygon(polygon)
+            if polygon is None:
+                continue
+
+            all_polygons.append(polygon)
+            patch_indices_list.append(i)
+
+    if not all_polygons:
+        print("[stitch] No polygons found in any patch")
+        # Still transform and collect CSVs as noise-only
+        for i, patch in enumerate(metadata.patches):
+            csv_result = csv_results[i]
+            if csv_result is None:
+                continue
+            offset_x = patch.global_bounds_um.x_min
+            offset_y = patch.global_bounds_um.y_min
+            transformed = _transform_patch_coords(csv_result, offset_x, offset_y)
+            if transformed.num_rows > 0:
+                all_tables.append(transformed)
+        return set()
+
+    # --- Phase 2: Resolve overlapping polygons via sopa ---
+    patch_idx_array = np.array(patch_indices_list, dtype=np.int64)
+    input_gdf = gpd.GeoDataFrame(geometry=all_polygons)
+    resolved_gdf, kept_indices = solve_conflicts(
+        input_gdf,
+        threshold=threshold,
+        patch_indices=patch_idx_array,
+        return_indices=True,
+    )
+
+    # --- Phase 3: Assign global cell IDs to resolved polygons ---
+    merged_cell_ids: set[str] = set()
+    kept_arr = np.asarray(kept_indices)
+    resolved_polys: list = []
+    resolved_ids: list[str] = []
+
+    for rank, orig_idx in enumerate(kept_arr, start=1):
+        global_id = f"cell-{rank}"
+        geom = resolved_gdf.geometry.iloc[rank - 1]
+
+        # Ensure single Polygon after solve_conflicts union
+        geom = _ensure_polygon(geom)
+        if geom is None:
+            continue
+
+        if orig_idx < 0:
+            merged_cell_ids.add(global_id)
+
+        resolved_polys.append(geom)
+        resolved_ids.append(global_id)
+
+        all_geojson_features.append(
+            {
+                "type": "Feature",
+                "id": global_id,
+                "geometry": mapping(geom),
+                "properties": {"cell_id": global_id},
+            }
+        )
+
+    print(
+        f"[stitch] Resolved {len(all_polygons)} input polygons to "
+        f"{len(resolved_polys)} cells ({len(merged_cell_ids)} merged)"
+    )
+
+    # --- Phase 4: Spatial transcript assignment via STRtree ---
+    poly_tree = shapely.STRtree(resolved_polys)
+
+    for i, patch in enumerate(metadata.patches):
+        csv_result = csv_results[i]
+        if csv_result is None:
+            continue
+
+        offset_x = patch.global_bounds_um.x_min
+        offset_y = patch.global_bounds_um.y_min
+        core = patch.core_bounds_um
+
+        transformed = _transform_patch_coords(csv_result, offset_x, offset_y)
+        if transformed.num_rows == 0:
+            continue
+
+        if not csv_result.has_x_col or not csv_result.has_y_col:
+            all_tables.append(transformed)
+            continue
+
+        # Get global coordinates for spatial query
+        gx = transformed.column("x").cast(pa.float64()).to_numpy(zero_copy_only=False)
+        gy = transformed.column("y").cast(pa.float64()).to_numpy(zero_copy_only=False)
+        points = shapely.points(gx, gy)
+
+        # Query STRtree: returns (input_indices, tree_indices)
+        point_hits, poly_hits = poly_tree.query(points, predicate="intersects")
+
+        # Build point -> cell_id mapping (first hit wins)
+        point_to_cell: dict[int, str] = {}
+        for pt_idx, poly_idx in zip(point_hits, poly_hits):
+            if pt_idx not in point_to_cell:
+                point_to_cell[pt_idx] = resolved_ids[poly_idx]
+
+        # Build cell and is_noise columns
+        n_rows = transformed.num_rows
+        cell_arr = [""] * n_rows
+        is_noise_arr = ["true"] * n_rows
+        for pt_idx, cell_id in point_to_cell.items():
+            cell_arr[pt_idx] = cell_id
+            is_noise_arr[pt_idx] = "false"
+
+        # Filter noise transcripts to core bounds only
+        # Assigned transcripts are kept from all patches (dedup later by transcript_id)
+        in_core = (
+            (gx >= core.x_min)
+            & (gx < core.x_max)
+            & (gy >= core.y_min)
+            & (gy < core.y_max)
+        )
+        is_assigned = np.array([c != "" for c in cell_arr])
+        keep_mask = pa.array(is_assigned | in_core, type=pa.bool_())
+
+        filtered = transformed.filter(keep_mask)
+        cell_arr_filtered = [c for c, k in zip(cell_arr, (is_assigned | in_core)) if k]
+        is_noise_filtered = [
+            n for n, k in zip(is_noise_arr, (is_assigned | in_core)) if k
+        ]
+
+        if filtered.num_rows == 0:
+            continue
+
+        # Set cell and is_noise columns
+        cell_idx = (
+            filtered.schema.get_field_index("cell")
+            if "cell" in filtered.column_names
+            else None
+        )
+        if cell_idx is not None:
+            filtered = filtered.set_column(
+                cell_idx, "cell", pa.array(cell_arr_filtered, type=pa.string())
+            )
+        else:
+            filtered = filtered.append_column(
+                "cell", pa.array(cell_arr_filtered, type=pa.string())
+            )
+
+        noise_idx = (
+            filtered.schema.get_field_index("is_noise")
+            if "is_noise" in filtered.column_names
+            else None
+        )
+        if noise_idx is not None:
+            filtered = filtered.set_column(
+                noise_idx,
+                "is_noise",
+                pa.array(is_noise_filtered, type=pa.string()),
+            )
+        else:
+            filtered = filtered.append_column(
+                "is_noise", pa.array(is_noise_filtered, type=pa.string())
+            )
+
+        all_tables.append(filtered)
+
+    return merged_cell_ids
+
+
+# ---------------------------------------------------------------------------
+# Main orchestrator
+# ---------------------------------------------------------------------------
+
+
+def stitch_transcript_assignments(
+    patches_dir: Path,
+    output_dir: Path,
+    csv_filename: str = "segmentation.csv",
+    geojson_filename: str = "segmentation_polygons.json",
+    max_workers: int | None = None,
+) -> None:
+    """Stitch per-patch transcript assignments and polygons into unified output.
+
+    For each patch, reads the transcript assignment CSV and polygon GeoJSON.
+    Cells are deduplicated using sopa's solve_conflicts() which resolves
+    overlapping cells at patch boundaries based on area overlap ratio.
+
+    Processing is split into a parallel I/O phase (reading GeoJSON and CSV
+    files via thread pool) and a sequential phase (dedup, global cell ID
+    assignment, remapping, and concatenation).
+
+    Args:
+        patches_dir: Directory containing patch subdirectories and patch_grid.json.
+        output_dir: Output directory for stitched CSV and GeoJSON.
+        csv_filename: CSV filename within each patch directory.
+        geojson_filename: GeoJSON filename within each patch directory.
+        max_workers: Maximum number of threads for parallel I/O.
+    """
+    patches_dir = Path(patches_dir)
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    metadata = load_grid_metadata(patches_dir / "patch_grid.json")
+
+    n_patches = len(metadata.patches)
+    if max_workers is None:
+        max_workers = min(n_patches, os.cpu_count() or 1)
+
+    # ---- Parallel phase: read GeoJSON and CSV files concurrently ----
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        geo_futures = [
+            executor.submit(
+                _read_and_transform_geojson, p, patches_dir, geojson_filename
+            )
+            for p in metadata.patches
+        ]
+        csv_futures = [
+            executor.submit(_read_patch_csv, p, patches_dir, csv_filename)
+            for p in metadata.patches
+        ]
+    geo_results = [f.result() for f in geo_futures]
+    csv_results = [f.result() for f in csv_futures]
+
+    # ---- Sequential phase: assign global cell IDs, remap, concatenate ----
+    all_tables: list[pa.Table] = []
+    all_geojson_features: list[dict] = []
+
+    _stitch_sopa_resolve(
+        metadata,
+        geo_results,
+        csv_results,
+        all_geojson_features,
+        all_tables,
+        threshold=0.5,
+    )
+
+    # Concatenate all patch tables
+    if all_tables:
+        merged = pa.concat_tables(all_tables)
+
+        # Deduplicate by transcript_id: prefer assigned over noise
+        if "transcript_id" in merged.column_names:
+            if "cell" in merged.column_names:
+                is_noise = pc.equal(merged.column("cell"), "").cast(pa.int8())
+                row_order = pa.array(np.arange(merged.num_rows), type=pa.int64())
+                sort_table = pa.table({"_noise": is_noise, "_row": row_order})
+                sort_indices = pc.sort_indices(
+                    sort_table,
+                    sort_keys=[("_noise", "ascending"), ("_row", "ascending")],
+                )
+                merged = merged.take(sort_indices)
+
+            tid_np = merged.column("transcript_id").to_numpy(zero_copy_only=False)
+            _, first_indices = np.unique(tid_np, return_index=True)
+            first_indices.sort()
+            merged = merged.take(first_indices)
+
+        # Log assignment stats
+        if "cell" in merged.column_names:
+            cell_vals = merged.column("cell").to_pylist()
+            n_assigned = sum(1 for c in cell_vals if c)
+            n_noise = sum(1 for c in cell_vals if not c)
+            print(
+                f"[stitch] Final: {merged.num_rows} transcripts, "
+                f"{n_assigned} assigned, {n_noise} noise"
+            )
+
+        # Cast is_noise to integer for xeniumranger compatibility
+        if "is_noise" in merged.column_names:
+            noise_col = merged.column("is_noise")
+            if noise_col.type == pa.string():
+                lower = pc.utf8_lower(noise_col)
+                is_true = pc.or_(pc.equal(lower, "true"), pc.equal(lower, "1"))
+                idx = merged.column_names.index("is_noise")
+                merged = merged.set_column(idx, "is_noise", is_true.cast(pa.int8()))
+
+        # Write CSV
+        if merged.num_rows > 0:
+            csv_out = output_dir / "xr-transcript-metadata.csv"
+            pa_csv.write_csv(
+                merged,
+                csv_out,
+                write_options=pa_csv.WriteOptions(quoting_style="needed"),
+            )
+
+    # Safety net: remove orphan polygons with zero transcripts
+    if all_geojson_features and all_tables:
+        csv_cell_ids: set[str] = set()
+        if "cell" in merged.column_names:
+            csv_cell_ids = set(c for c in merged.column("cell").to_pylist() if c)
+        all_geojson_features = [
+            f
+            for f in all_geojson_features
+            if str(f.get("id", f.get("properties", {}).get("cell_id", "")))
+            in csv_cell_ids
+        ]
+
+    # Write merged GeoJSON
+    if all_geojson_features:
+        merged_geo = {"type": "FeatureCollection", "features": all_geojson_features}
+        write_geojson(merged_geo, output_dir / "xr-cell-polygons.geojson")
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Stitch per-patch Baysor segmentation results into unified output."
+    )
+    parser.add_argument(
+        "--patches",
+        type=Path,
+        required=True,
+        help="Directory containing patch subdirectories and patch_grid.json",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        required=True,
+        help="Output directory for stitched CSV and GeoJSON",
+    )
+    parser.add_argument(
+        "--csv-filename",
+        default="segmentation.csv",
+        help="CSV filename within each patch (default: segmentation.csv)",
+    )
+    parser.add_argument(
+        "--geojson-filename",
+        default="segmentation_polygons.json",
+        help="GeoJSON filename within each patch (default: segmentation_polygons.json)",
+    )
+    args = parser.parse_args()
+
+    stitch_transcript_assignments(
+        patches_dir=args.patches,
+        output_dir=args.output,
+        csv_filename=args.csv_filename,
+        geojson_filename=args.geojson_filename,
+    )
+
+
+if __name__ == "__main__":
+    main()