Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ OPTIONAL ARGUMENTS:

```bash
$ bash variantmedium.sh \
--samplehsheet <path/to/samplesheet.csv> \
--samplesheet <path/to/samplesheet.csv> \
--outdir <path/to/pipeline-output-directory> \
--profile conda
```
Expand Down
6 changes: 2 additions & 4 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ process {
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
clusterOptions = '--gres=gpu:1'
accelerator = 'nvidia'
}

withName: CALL_VARIANTS_SNV {
Expand All @@ -58,8 +56,8 @@ process {
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
queue = 'GPU'
clusterOptions = '--gres=gpu:1'
accelerator = 'nvidia'
}

withName: CALL_VARIANTS_INDEL {
Expand All @@ -68,8 +66,8 @@ process {
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
queue = 'GPU'
clusterOptions = '--gres=gpu:1'
accelerator = 'nvidia'
}

}
3 changes: 2 additions & 1 deletion modules/variantmedium/call/environment.yml
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will be updating the torch in this environment, so please remove this part from the PR

Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ dependencies:
- pip
- pip:
- fire==0.5.0
- torch==2.0.1
- --extra-index-url https://download.pytorch.org/whl/cu121
- torch==2.1.0+cu121
7 changes: 5 additions & 2 deletions modules/variantmedium/call/main.nf
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am adding CPU support back, so let's remove this as well, thanks!

Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
process CALL_VARIANTS {
label "process_high_memory"

label (
"${params.use_gpu}" ? "process_gpu" :
error("GPU support is required for CALL_VARIANTS process. Please include the 'gpu' profile to run variantmedium call")
)

conda "${moduleDir}/environment.yml"
container "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a7/a73b7de4a8d00029f69b6cef20b74e1a1d6b48c1d7d5a65b5e55cf09c3fe6ce7/data"

Expand Down
27 changes: 23 additions & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,16 @@ params {
// version - needs an update with each release
version = '1.2.0'

// gpu specific
use_gpu = false

}

process {
beforeScript = '''
echo SLURM_JOB_ID=$SLURM_JOB_ID
'''
resourceLimits = [
cpus: 4,
memory: '16.GB',
]
}

profiles {
Expand All @@ -60,9 +64,24 @@ profiles {
}
slurm {
process {
executor = 'slurm'
executor = 'slurm'
}
}
gpu {
params.use_gpu = true
apptainer.runOptions = '--nv'
singularity.runOptions = '--nv'
}
}

// trace file and reports
report {
enabled = true
overwrite = true
}
trace {
enabled = true
overwrite = true
}

// Capture exit codes from upstream processes when piping
Expand Down
50 changes: 34 additions & 16 deletions subworkflows/parse_samplesheet/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,57 +3,75 @@
// -------------------------------------------------------
def validateSamplesheet(samplesheet_ch) {
samplesheet_ch.map { path ->
def header = path.text.readLines()[0]
def cols = header.split(/,|\t/) // handle CSV or TSV
def sep = path.name.endsWith('.tsv') ? '\t' : ','
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the variable sep was computed before the function call, and can be passed as an argument to this function


def lines = path.text.readLines()
if (!lines) error "Samplesheet is empty: ${path}"

def header = lines[0].split(sep).collect { colname -> colname.trim() }
def required = ['sample_name','pair_identifier','tumor_bam','normal_bam']
def missing = required.findAll { it !in cols }
def missing = required.findAll { colname -> colname !in header }
if (missing) {
error "Samplesheet is missing required columns: ${missing.join(', ')}"
}

// Optional: check BAM files exist
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The existence of BAM paths is checked elsewhere already:

def validate_paths(df, skip_preprocessing):

path.text.readLines().tail().each { line ->
def vals = line.split(/,|\t/)
lines.tail().eachWithIndex { line, idx ->
def vals = line.split(sep).collect { val -> val.trim() }
if (vals.size() < 4) error "Line ${idx + 2} is malformed: ${line}"

def tumor = file(vals[2])
def normal = file(vals[3])

if (!tumor.exists()) error "Tumor BAM missing: $tumor"
if (!normal.exists()) error "Normal BAM missing: $normal"
if (!tumor.exists()) error "Tumor BAM missing: ${tumor}"
if (!normal.exists()) error "Normal BAM missing: ${normal}"
}

return [path, sep] // pass separator for downstream use
}
}


workflow PARSE_SAMPLESHEET {

take:
ch_samplesheet // channel ["path-to-samplesheet"]
ch_samplesheet // channel ["path-to-samplesheet"]

main:

validateSamplesheet(ch_samplesheet)
log.info "[INFO] Samplesheet validated"

def sep = ch_samplesheet_file.name.endsWith('.tsv') ? '\t' : ','
def sep = params.samplesheet.endsWith('.tsv') ? '\t' : ','
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here it seems the file extension is read from params.samplesheet, but the file that's being read is ch_samplesheet. This looks error-prone, please use the same file path for both.

Also overall, there are variable names like samplesheet, ch_samplesheet, samplesheet_ch. are these all referring to the same file? if so, we can use the same name, if not, let's use more descriptive names

ch_samplesheet
.splitCsv(header: true, sep: sep)
.map { row ->

def tumorPath = row.tumor_bam.trim()
def normalPath = row.normal_bam.trim()
// Validate samplesheet
def validated_ch = validateSamplesheet(ch_samplesheet)
log.info "[INFO] Samplesheet validated"

// get file object
def tumorFile = file(tumorPath)
def normalFile = file(normalPath)
// Split samplesheet into sample info
validated_ch
.map { path, sep ->
path.text.readLines().tail().collect { line ->
def vals = line.split(sep).collect { val -> val.trim() }

tuple(row.sample_name, row.pair_identifier, tumorFile, normalFile)
tuple(
vals[0], // sample_name
vals[1], // pair_identifier
file(vals[2]), // tumor_bam
file(vals[3]) // normal_bam
)
}
}
.flatten()
.set { sample_info_ch }


emit:

emit:
ch_samples = sample_info_ch

}
16 changes: 12 additions & 4 deletions variantmedium.sh
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ CMD=(nextflow run tron-bioinformatics/VariantMedium
--samplesheet "${SAMPLESHEET}"
--outdir "${OUTDIR}"
--execution_step "${PIPELINE_STEP}"
-work-dir "${OUTDIR}/nxfwork"
)
# add report/trace args if requested
CMD+=("${REPORT_ARGS[@]}")
Expand All @@ -312,6 +313,7 @@ else
--samplesheet "${SAMPLESHEET}"
--outdir "${OUTDIR}"
--execution_step "${PIPELINE_STEP}"
-work-dir "${OUTDIR}/nxfwork"
)
CMD+=("${REPORT_ARGS[@]}")
[[ -n "$MOUNT_PATH" ]] && CMD+=(--mount_path "${MOUNT_PATH}")
Expand All @@ -329,7 +331,7 @@ else
readarray -t REPORT_ARGS < <(generate_nf_report "$PIPELINE_STEP")

CMD=(nextflow run tron-bioinformatics/tronflow-bam-preprocessing
-r v2.1.0
-r v2.2.2
-profile "${PROFILE}"
--input_files "${TSV_FOLDER}/preproc.tsv"
--reference "${REF}"
Expand All @@ -339,6 +341,7 @@ else
--output "${OUTDIR}/output_01_01_preprocessed_bams"
--skip_deduplication
--skip_metrics
-work-dir "${OUTDIR}/nxfwork"
)
CMD+=("${REPORT_ARGS[@]}")

Expand All @@ -363,11 +366,12 @@ else
[[ -f "$EXOME_BED" ]] && INTERVALS_PARAM=(--intervals "$EXOME_BED")

CMD=(nextflow run tron-bioinformatics/tronflow-strelka2
-r v0.2.4
-profile "${PROFILE}"
--input_files "${TSV_FOLDER}/pairs_wo_reps.tsv"
--reference "${REF}"
--output "${OUTDIR}/output_01_02_candidates_strelka2"
-r v0.2.4
-work-dir "${OUTDIR}/nxfwork"
)
CMD+=("${REPORT_ARGS[@]}")

Expand All @@ -394,12 +398,13 @@ else
readarray -t REPORT_ARGS < <(generate_nf_report "$PIPELINE_STEP")

CMD=(nextflow run tron-bioinformatics/tronflow-vcf-postprocessing
-r v3.1.2
-r v3.1.4
-profile "${PROFILE}"
--input_vcfs "${TSV_FOLDER}/vcfs.tsv"
--input_bams "${TSV_FOLDER}/bams.tsv"
--reference "${REF}"
--output "${OUTDIR}/output_01_03_vcf_postprocessing"
-work-dir "${OUTDIR}/nxfwork"
)
CMD+=("${REPORT_ARGS[@]}")

Expand All @@ -425,6 +430,7 @@ else
--samplesheet "${SAMPLESHEET}"
--outdir "${OUTDIR}"
--execution_step "${PIPELINE_STEP}"
-work-dir "${OUTDIR}/nxfwork"
)
CMD+=("${REPORT_ARGS[@]}")
[[ -n "$RESUME" ]] && CMD+=("$RESUME")
Expand Down Expand Up @@ -452,6 +458,7 @@ else
--read_length 50
--max_mapq 60
--max_baseq 82
-work-dir "${OUTDIR}/nxfwork"
)
CMD+=("${REPORT_ARGS[@]}")

Expand All @@ -474,11 +481,12 @@ CMD=(nextflow run tron-bioinformatics/VariantMedium
--samplesheet "${SAMPLESHEET}"
--outdir "${OUTDIR}"
--execution_step "${PIPELINE_STEP}"
-work-dir "${OUTDIR}/nxfwork"
)
CMD+=("${REPORT_ARGS[@]}")
[[ -n "$RESUME" ]] && CMD+=("$RESUME")
[[ -n "$MOUNT_PATH" ]] && CMD+=(--mount_path "${MOUNT_PATH}")
run_step "3D DenseNet SNV/Indel calling" "${CMD[@]}"
#---------------------------------------
log "🎉 Pipeline completed successfully!"
#---------------------------------------
#---------------------------------------