From c30be3eb3228e3223c3f82aa3399b9973b35129b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96zlem=20Muslu?= Date: Wed, 28 Jan 2026 10:53:28 +0100 Subject: [PATCH 1/4] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6d7cb2a..4879e5b 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ OPTIONAL ARGUMENTS: ```bash $ bash variantmedium.sh \ - --samplehsheet \ + --samplesheet \ --outdir \ --profile conda ``` From 294453408b61a27469066de009fbc250f47dfbe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96zlem=20Muslu?= Date: Wed, 28 Jan 2026 16:52:09 +0100 Subject: [PATCH 2/4] Update main.nf --- subworkflows/parse_samplesheet/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/parse_samplesheet/main.nf b/subworkflows/parse_samplesheet/main.nf index d547fa6..feba6a4 100644 --- a/subworkflows/parse_samplesheet/main.nf +++ b/subworkflows/parse_samplesheet/main.nf @@ -35,7 +35,7 @@ workflow PARSE_SAMPLESHEET { validateSamplesheet(ch_samplesheet) log.info "[INFO] Samplesheet validated" - def sep = ch_samplesheet_file.name.endsWith('.tsv') ? '\t' : ',' + def sep = ch_samplesheet.name.endsWith('.tsv') ? '\t' : ',' ch_samplesheet .splitCsv(header: true, sep: sep) .map { row -> From 4250e09374653f7c3404281da4cda9f5f20f7e2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96zlem=20Muslu?= Date: Wed, 28 Jan 2026 16:56:07 +0100 Subject: [PATCH 3/4] Update main.nf --- subworkflows/parse_samplesheet/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/parse_samplesheet/main.nf b/subworkflows/parse_samplesheet/main.nf index feba6a4..9398882 100644 --- a/subworkflows/parse_samplesheet/main.nf +++ b/subworkflows/parse_samplesheet/main.nf @@ -35,7 +35,7 @@ workflow PARSE_SAMPLESHEET { validateSamplesheet(ch_samplesheet) log.info "[INFO] Samplesheet validated" - def sep = ch_samplesheet.name.endsWith('.tsv') ? '\t' : ',' + def sep = params.samplesheet.endsWith('.tsv') ? '\t' : ',' ch_samplesheet .splitCsv(header: true, sep: sep) .map { row -> From fd6784f621460bba306364592d8dc0b695755a01 Mon Sep 17 00:00:00 2001 From: khersameesh24 Date: Wed, 29 Apr 2026 14:28:36 +0200 Subject: [PATCH 4/4] testing changes --- conf/modules.config | 6 +-- modules/variantmedium/call/environment.yml | 3 +- modules/variantmedium/call/main.nf | 7 ++- nextflow.config | 27 ++++++++-- subworkflows/parse_samplesheet/main.nf | 60 ++++++++++++---------- variantmedium.sh | 16 ++++-- 6 files changed, 78 insertions(+), 41 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9732a20..3513d94 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -48,8 +48,6 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] - clusterOptions = '--gres=gpu:1' - accelerator = 'nvidia' } withName: CALL_VARIANTS_SNV { @@ -58,8 +56,8 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] + queue = 'GPU' clusterOptions = '--gres=gpu:1' - accelerator = 'nvidia' } withName: CALL_VARIANTS_INDEL { @@ -68,8 +66,8 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] + queue = 'GPU' clusterOptions = '--gres=gpu:1' - accelerator = 'nvidia' } } \ No newline at end of file diff --git a/modules/variantmedium/call/environment.yml b/modules/variantmedium/call/environment.yml index 58c9328..66b0f5e 100644 --- a/modules/variantmedium/call/environment.yml +++ b/modules/variantmedium/call/environment.yml @@ -10,4 +10,5 @@ dependencies: - pip - pip: - fire==0.5.0 - - torch==2.0.1 \ No newline at end of file + - --extra-index-url https://download.pytorch.org/whl/cu121 + - torch==2.1.0+cu121 \ No newline at end of file diff --git a/modules/variantmedium/call/main.nf b/modules/variantmedium/call/main.nf index d3586fb..3df9fd5 100644 --- a/modules/variantmedium/call/main.nf +++ b/modules/variantmedium/call/main.nf @@ -1,6 +1,9 @@ process CALL_VARIANTS { - label "process_high_memory" - + label ( + "${params.use_gpu}" ? "process_gpu" : + error("GPU support is required for CALL_VARIANTS process. Please include the 'gpu' profile to run variantmedium call") + ) + conda "${moduleDir}/environment.yml" container "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a7/a73b7de4a8d00029f69b6cef20b74e1a1d6b48c1d7d5a65b5e55cf09c3fe6ce7/data" diff --git a/nextflow.config b/nextflow.config index 02cd286..2899294 100644 --- a/nextflow.config +++ b/nextflow.config @@ -41,12 +41,16 @@ params { // version - needs an update with each release version = '1.1.0' + // gpu specific + use_gpu = false + } process { - beforeScript = ''' - echo SLURM_JOB_ID=$SLURM_JOB_ID - ''' + resourceLimits = [ + cpus: 4, + memory: '16.GB', + ] } profiles { @@ -60,9 +64,24 @@ profiles { } slurm { process { - executor = 'slurm' + executor = 'slurm' } } + gpu { + params.use_gpu = true + apptainer.runOptions = '--nv' + singularity.runOptions = '--nv' + } +} + +// trace file and reports +report { + enabled = true + overwrite = true +} +trace { + enabled = true + overwrite = true } // Capture exit codes from upstream processes when piping diff --git a/subworkflows/parse_samplesheet/main.nf b/subworkflows/parse_samplesheet/main.nf index fddaf4c..b604298 100644 --- a/subworkflows/parse_samplesheet/main.nf +++ b/subworkflows/parse_samplesheet/main.nf @@ -3,55 +3,63 @@ // ------------------------------------------------------- def validateSamplesheet(samplesheet_ch) { samplesheet_ch.map { path -> - def header = path.text.readLines()[0] - def cols = header.split(/,|\t/) // handle CSV or TSV + def sep = path.name.endsWith('.tsv') ? '\t' : ',' + def lines = path.text.readLines() + if (!lines) error "Samplesheet is empty: ${path}" + + def header = lines[0].split(sep).collect { colname -> colname.trim() } def required = ['sample_name','pair_identifier','tumor_bam','normal_bam'] - def missing = required.findAll { it !in cols } + def missing = required.findAll { colname -> colname !in header } if (missing) { error "Samplesheet is missing required columns: ${missing.join(', ')}" } // Optional: check BAM files exist - path.text.readLines().tail().each { line -> - def vals = line.split(/,|\t/) + lines.tail().eachWithIndex { line, idx -> + def vals = line.split(sep).collect { val -> val.trim() } + if (vals.size() < 4) error "Line ${idx + 2} is malformed: ${line}" + def tumor = file(vals[2]) def normal = file(vals[3]) - if (!tumor.exists()) error "Tumor BAM missing: $tumor" - if (!normal.exists()) error "Normal BAM missing: $normal" + if (!tumor.exists()) error "Tumor BAM missing: ${tumor}" + if (!normal.exists()) error "Normal BAM missing: ${normal}" } + + return [path, sep] // pass separator for downstream use } } - + workflow PARSE_SAMPLESHEET { take: - ch_samplesheet // channel ["path-to-samplesheet"] + ch_samplesheet // channel ["path-to-samplesheet"] main: - - validateSamplesheet(ch_samplesheet) - log.info "[INFO] Samplesheet validated" - - ch_samplesheet - .splitCsv(header: true) - .map { row -> - def tumorPath = row.tumor_bam.trim() - def normalPath = row.normal_bam.trim() + // Validate samplesheet + def validated_ch = validateSamplesheet(ch_samplesheet) + log.info "[INFO] Samplesheet validated" - // get file object - def tumorFile = file(tumorPath) - def normalFile = file(normalPath) + // Split samplesheet into sample info + validated_ch + .map { path, sep -> + path.text.readLines().tail().collect { line -> + def vals = line.split(sep).collect { val -> val.trim() } - tuple(row.sample_name, row.pair_identifier, tumorFile, normalFile) + tuple( + vals[0], // sample_name + vals[1], // pair_identifier + file(vals[2]), // tumor_bam + file(vals[3]) // normal_bam + ) + } } + .flatten() .set { sample_info_ch } - - emit: + emit: ch_samples = sample_info_ch - -} \ No newline at end of file +} diff --git a/variantmedium.sh b/variantmedium.sh index 7f2e372..632dbbd 100644 --- a/variantmedium.sh +++ b/variantmedium.sh @@ -287,6 +287,7 @@ CMD=(nextflow run tron-bioinformatics/VariantMedium --samplesheet "${SAMPLESHEET}" --outdir "${OUTDIR}" --execution_step "${PIPELINE_STEP}" + -work-dir "${OUTDIR}/nxfwork" ) # add report/trace args if requested CMD+=("${REPORT_ARGS[@]}") @@ -312,6 +313,7 @@ else --samplesheet "${SAMPLESHEET}" --outdir "${OUTDIR}" --execution_step "${PIPELINE_STEP}" + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") [[ -n "$MOUNT_PATH" ]] && CMD+=(--mount_path "${MOUNT_PATH}") @@ -329,7 +331,7 @@ else readarray -t REPORT_ARGS < <(generate_nf_report "$PIPELINE_STEP") CMD=(nextflow run tron-bioinformatics/tronflow-bam-preprocessing - -r v2.1.0 + -r v2.2.2 -profile "${PROFILE}" --input_files "${TSV_FOLDER}/preproc.tsv" --reference "${REF}" @@ -339,6 +341,7 @@ else --output "${OUTDIR}/output_01_01_preprocessed_bams" --skip_deduplication --skip_metrics + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") @@ -363,11 +366,12 @@ else [[ -f "$EXOME_BED" ]] && INTERVALS_PARAM=(--intervals "$EXOME_BED") CMD=(nextflow run tron-bioinformatics/tronflow-strelka2 + -r v0.2.4 -profile "${PROFILE}" --input_files "${TSV_FOLDER}/pairs_wo_reps.tsv" --reference "${REF}" --output "${OUTDIR}/output_01_02_candidates_strelka2" - -r v0.2.4 + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") @@ -394,12 +398,13 @@ else readarray -t REPORT_ARGS < <(generate_nf_report "$PIPELINE_STEP") CMD=(nextflow run tron-bioinformatics/tronflow-vcf-postprocessing - -r v3.1.2 + -r v3.1.4 -profile "${PROFILE}" --input_vcfs "${TSV_FOLDER}/vcfs.tsv" --input_bams "${TSV_FOLDER}/bams.tsv" --reference "${REF}" --output "${OUTDIR}/output_01_03_vcf_postprocessing" + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") @@ -425,6 +430,7 @@ else --samplesheet "${SAMPLESHEET}" --outdir "${OUTDIR}" --execution_step "${PIPELINE_STEP}" + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") [[ -n "$RESUME" ]] && CMD+=("$RESUME") @@ -452,6 +458,7 @@ else --read_length 50 --max_mapq 60 --max_baseq 82 + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") @@ -474,6 +481,7 @@ CMD=(nextflow run tron-bioinformatics/VariantMedium --samplesheet "${SAMPLESHEET}" --outdir "${OUTDIR}" --execution_step "${PIPELINE_STEP}" + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") [[ -n "$RESUME" ]] && CMD+=("$RESUME") @@ -481,4 +489,4 @@ CMD+=("${REPORT_ARGS[@]}") run_step "3D DenseNet SNV/Indel calling" "${CMD[@]}" #--------------------------------------- log "🎉 Pipeline completed successfully!" -#--------------------------------------- \ No newline at end of file +#---------------------------------------