diff --git a/README.md b/README.md index 6d7cb2a..4879e5b 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ OPTIONAL ARGUMENTS: ```bash $ bash variantmedium.sh \ - --samplehsheet \ + --samplesheet \ --outdir \ --profile conda ``` diff --git a/conf/modules.config b/conf/modules.config index 9732a20..3513d94 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -48,8 +48,6 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] - clusterOptions = '--gres=gpu:1' - accelerator = 'nvidia' } withName: CALL_VARIANTS_SNV { @@ -58,8 +56,8 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] + queue = 'GPU' clusterOptions = '--gres=gpu:1' - accelerator = 'nvidia' } withName: CALL_VARIANTS_INDEL { @@ -68,8 +66,8 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] + queue = 'GPU' clusterOptions = '--gres=gpu:1' - accelerator = 'nvidia' } } \ No newline at end of file diff --git a/modules/variantmedium/call/environment.yml b/modules/variantmedium/call/environment.yml index 58c9328..66b0f5e 100644 --- a/modules/variantmedium/call/environment.yml +++ b/modules/variantmedium/call/environment.yml @@ -10,4 +10,5 @@ dependencies: - pip - pip: - fire==0.5.0 - - torch==2.0.1 \ No newline at end of file + - --extra-index-url https://download.pytorch.org/whl/cu121 + - torch==2.1.0+cu121 \ No newline at end of file diff --git a/modules/variantmedium/call/main.nf b/modules/variantmedium/call/main.nf index d3586fb..3df9fd5 100644 --- a/modules/variantmedium/call/main.nf +++ b/modules/variantmedium/call/main.nf @@ -1,6 +1,9 @@ process CALL_VARIANTS { - label "process_high_memory" - + label ( + "${params.use_gpu}" ? "process_gpu" : + error("GPU support is required for CALL_VARIANTS process. Please include the 'gpu' profile to run variantmedium call") + ) + conda "${moduleDir}/environment.yml" container "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a7/a73b7de4a8d00029f69b6cef20b74e1a1d6b48c1d7d5a65b5e55cf09c3fe6ce7/data" diff --git a/nextflow.config b/nextflow.config index 0171eda..14f0b6f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -41,12 +41,16 @@ params { // version - needs an update with each release version = '1.2.0' + // gpu specific + use_gpu = false + } process { - beforeScript = ''' - echo SLURM_JOB_ID=$SLURM_JOB_ID - ''' + resourceLimits = [ + cpus: 4, + memory: '16.GB', + ] } profiles { @@ -60,9 +64,24 @@ profiles { } slurm { process { - executor = 'slurm' + executor = 'slurm' } } + gpu { + params.use_gpu = true + apptainer.runOptions = '--nv' + singularity.runOptions = '--nv' + } +} + +// trace file and reports +report { + enabled = true + overwrite = true +} +trace { + enabled = true + overwrite = true } // Capture exit codes from upstream processes when piping diff --git a/subworkflows/parse_samplesheet/main.nf b/subworkflows/parse_samplesheet/main.nf index d547fa6..d39f3c1 100644 --- a/subworkflows/parse_samplesheet/main.nf +++ b/subworkflows/parse_samplesheet/main.nf @@ -3,57 +3,75 @@ // ------------------------------------------------------- def validateSamplesheet(samplesheet_ch) { samplesheet_ch.map { path -> - def header = path.text.readLines()[0] - def cols = header.split(/,|\t/) // handle CSV or TSV + def sep = path.name.endsWith('.tsv') ? '\t' : ',' + def lines = path.text.readLines() + if (!lines) error "Samplesheet is empty: ${path}" + + def header = lines[0].split(sep).collect { colname -> colname.trim() } def required = ['sample_name','pair_identifier','tumor_bam','normal_bam'] - def missing = required.findAll { it !in cols } + def missing = required.findAll { colname -> colname !in header } if (missing) { error "Samplesheet is missing required columns: ${missing.join(', ')}" } // Optional: check BAM files exist - path.text.readLines().tail().each { line -> - def vals = line.split(/,|\t/) + lines.tail().eachWithIndex { line, idx -> + def vals = line.split(sep).collect { val -> val.trim() } + if (vals.size() < 4) error "Line ${idx + 2} is malformed: ${line}" + def tumor = file(vals[2]) def normal = file(vals[3]) - if (!tumor.exists()) error "Tumor BAM missing: $tumor" - if (!normal.exists()) error "Normal BAM missing: $normal" + if (!tumor.exists()) error "Tumor BAM missing: ${tumor}" + if (!normal.exists()) error "Normal BAM missing: ${normal}" } + + return [path, sep] // pass separator for downstream use } } - + workflow PARSE_SAMPLESHEET { take: - ch_samplesheet // channel ["path-to-samplesheet"] + ch_samplesheet // channel ["path-to-samplesheet"] main: validateSamplesheet(ch_samplesheet) log.info "[INFO] Samplesheet validated" - def sep = ch_samplesheet_file.name.endsWith('.tsv') ? '\t' : ',' + def sep = params.samplesheet.endsWith('.tsv') ? '\t' : ',' ch_samplesheet .splitCsv(header: true, sep: sep) .map { row -> - def tumorPath = row.tumor_bam.trim() - def normalPath = row.normal_bam.trim() + // Validate samplesheet + def validated_ch = validateSamplesheet(ch_samplesheet) + log.info "[INFO] Samplesheet validated" - // get file object - def tumorFile = file(tumorPath) - def normalFile = file(normalPath) + // Split samplesheet into sample info + validated_ch + .map { path, sep -> + path.text.readLines().tail().collect { line -> + def vals = line.split(sep).collect { val -> val.trim() } - tuple(row.sample_name, row.pair_identifier, tumorFile, normalFile) + tuple( + vals[0], // sample_name + vals[1], // pair_identifier + file(vals[2]), // tumor_bam + file(vals[3]) // normal_bam + ) + } } + .flatten() .set { sample_info_ch } emit: + emit: ch_samples = sample_info_ch } diff --git a/variantmedium.sh b/variantmedium.sh index 7f2e372..632dbbd 100644 --- a/variantmedium.sh +++ b/variantmedium.sh @@ -287,6 +287,7 @@ CMD=(nextflow run tron-bioinformatics/VariantMedium --samplesheet "${SAMPLESHEET}" --outdir "${OUTDIR}" --execution_step "${PIPELINE_STEP}" + -work-dir "${OUTDIR}/nxfwork" ) # add report/trace args if requested CMD+=("${REPORT_ARGS[@]}") @@ -312,6 +313,7 @@ else --samplesheet "${SAMPLESHEET}" --outdir "${OUTDIR}" --execution_step "${PIPELINE_STEP}" + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") [[ -n "$MOUNT_PATH" ]] && CMD+=(--mount_path "${MOUNT_PATH}") @@ -329,7 +331,7 @@ else readarray -t REPORT_ARGS < <(generate_nf_report "$PIPELINE_STEP") CMD=(nextflow run tron-bioinformatics/tronflow-bam-preprocessing - -r v2.1.0 + -r v2.2.2 -profile "${PROFILE}" --input_files "${TSV_FOLDER}/preproc.tsv" --reference "${REF}" @@ -339,6 +341,7 @@ else --output "${OUTDIR}/output_01_01_preprocessed_bams" --skip_deduplication --skip_metrics + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") @@ -363,11 +366,12 @@ else [[ -f "$EXOME_BED" ]] && INTERVALS_PARAM=(--intervals "$EXOME_BED") CMD=(nextflow run tron-bioinformatics/tronflow-strelka2 + -r v0.2.4 -profile "${PROFILE}" --input_files "${TSV_FOLDER}/pairs_wo_reps.tsv" --reference "${REF}" --output "${OUTDIR}/output_01_02_candidates_strelka2" - -r v0.2.4 + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") @@ -394,12 +398,13 @@ else readarray -t REPORT_ARGS < <(generate_nf_report "$PIPELINE_STEP") CMD=(nextflow run tron-bioinformatics/tronflow-vcf-postprocessing - -r v3.1.2 + -r v3.1.4 -profile "${PROFILE}" --input_vcfs "${TSV_FOLDER}/vcfs.tsv" --input_bams "${TSV_FOLDER}/bams.tsv" --reference "${REF}" --output "${OUTDIR}/output_01_03_vcf_postprocessing" + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") @@ -425,6 +430,7 @@ else --samplesheet "${SAMPLESHEET}" --outdir "${OUTDIR}" --execution_step "${PIPELINE_STEP}" + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") [[ -n "$RESUME" ]] && CMD+=("$RESUME") @@ -452,6 +458,7 @@ else --read_length 50 --max_mapq 60 --max_baseq 82 + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") @@ -474,6 +481,7 @@ CMD=(nextflow run tron-bioinformatics/VariantMedium --samplesheet "${SAMPLESHEET}" --outdir "${OUTDIR}" --execution_step "${PIPELINE_STEP}" + -work-dir "${OUTDIR}/nxfwork" ) CMD+=("${REPORT_ARGS[@]}") [[ -n "$RESUME" ]] && CMD+=("$RESUME") @@ -481,4 +489,4 @@ CMD+=("${REPORT_ARGS[@]}") run_step "3D DenseNet SNV/Indel calling" "${CMD[@]}" #--------------------------------------- log "🎉 Pipeline completed successfully!" -#--------------------------------------- \ No newline at end of file +#---------------------------------------