Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/nf-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ jobs:
NFT_VER: ${{ env.NFT_VER }}
with:
max_shards: 7
tags: ${{ github.event_name == 'pull_request' && 'small' || '' }}

- name: debug
run: |
Expand Down Expand Up @@ -100,6 +101,7 @@ jobs:
profile: ${{ matrix.profile }}
shard: ${{ matrix.shard }}
total_shards: ${{ env.TOTAL_SHARDS }}
tags: ${{ github.event_name == 'pull_request' && 'small' || '' }}

- name: Report test status
if: ${{ always() }}
Expand Down
19 changes: 18 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -359,13 +359,20 @@ process {
]
}
withName: '.*:LONGPHASE_PHASE_SOMATIC' {
ext.prefix = { "somatic_smallvariants" }
ext.prefix = { "somatic_smallvariants_combined" }
ext.args = {
[
meta.platform == 'pb' ? '--pb' : '--ont',
"--indels",
].join(' ').trim()
}
// Intermediate output (somatic+germline combined); filtered version published below
publishDir = [
enabled: false
]
}
withName: '.*:PHASING_HAPLOTYPING:BCFTOOLS_VIEW' {
ext.prefix = { "somatic_smallvariants" }
publishDir = [
path: { "${params.outdir}/${meta.id}/variants/phased" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -430,6 +437,16 @@ process {
enabled: false
]
}
withName: '.*:GERMLINE_CONSENSUS:BCFTOOLS_SORT_CONSENSUS' {
ext.prefix = { "${meta.id}_germline_sorted" }
ext.args = { '-Oz -W=tbi' }
publishDir = [ enabled: false ]
}
withName: '.*:SOMATIC_CONSENSUS:BCFTOOLS_SORT_CONSENSUS' {
ext.prefix = { "${meta.id}_somatic_sorted" }
ext.args = { '-Oz -W=tbi' }
publishDir = [ enabled: false ]
}
withName: '.*:GERMLINE_CONSENSUS:SORT_POST_NORM' {
ext.prefix = { "${meta.id}.${meta.caller}_norm_sorted" }
ext.args = { '-Oz -W=tbi' }
Expand Down
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ params {
fasta = "https://raw.githubusercontent.com/IntGenomicsLab/test-datasets/main/references/GRCh38_chr19.fasta.gz"

// Additional params
genome = "CHM13"
genome = "GRCh38"
vep_genome = "WBcel235"
vep_species = "caenorhabditis_elegans"
skip_wakhan = true
Expand Down
9 changes: 9 additions & 0 deletions modules/local/bcftools/view/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
# renovate: datasource=conda depName=bioconda/htslib
- bioconda::bcftools=1.22
- bioconda::htslib=1.22.1
40 changes: 40 additions & 0 deletions modules/local/bcftools/view/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
process BCFTOOLS_VIEW {
tag "${meta.id}"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data'
: 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}"

input:
tuple val(meta), path(vcf), path(tbi), path(targets), path(targets_tbi)

output:
tuple val(meta), path("*.vcf.gz"), emit: vcf
tuple val(meta), path("*.tbi"), emit: tbi
tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
bcftools view \\
-T ${targets} \\
-Oz \\
-W=tbi \\
${args} \\
-o ${prefix}.vcf.gz \\
${vcf}
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
echo '' | gzip > ${prefix}.vcf.gz
touch ${prefix}.vcf.gz.tbi
"""
}
56 changes: 56 additions & 0 deletions modules/local/bcftools/view/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: bcftools_view
description: Filter VCF to positions defined by a targets file using bcftools view -T
keywords:
- filtering
- VCF
- variant calling
tools:
- view:
description: VCF/BCF conversion, view, subset and filter VCF/BCF files.
homepage: http://samtools.github.io/bcftools/bcftools.html
documentation: http://www.htslib.org/doc/bcftools.html
tool_dev_url: https://github.com/samtools/bcftools
doi: "10.1093/bioinformatics/btp352"
licence: ["MIT"]
identifier: biotools:bcftools
input:
- - meta:
type: map
description: Groovy Map containing sample information e.g. [ id:'test' ]
- vcf:
type: file
description: Input VCF/BCF file to filter
pattern: "*.{vcf.gz,vcf,bcf}"
- tbi:
type: file
description: Tabix index of the input VCF
pattern: "*.tbi"
- targets:
type: file
description: VCF file used as position filter (-T)
pattern: "*.{vcf.gz,vcf,bcf}"
- targets_tbi:
type: file
description: Tabix index of the targets VCF
pattern: "*.tbi"
output:
vcf:
- - meta:
type: map
description: Groovy Map containing sample information
- "*.vcf.gz":
type: file
description: Filtered VCF file
pattern: "*.vcf.gz"
tbi:
- - meta:
type: map
description: Groovy Map containing sample information
- "*.tbi":
type: file
description: Tabix index of filtered VCF
pattern: "*.tbi"
authors:
- "@rforsyth"
maintainers:
- "@rforsyth"
4 changes: 2 additions & 2 deletions modules/local/deepsomatic/postprocessvariants/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ process DEEPSOMATIC_POSTPROCESSVARIANTS {
error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead."
}
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
prefix = task.ext.prefix ?: "${meta.id}_somatic"

def regions = intervals ? "--regions ${intervals}" : ""
def variant_calls_tfrecord_name = variant_calls_tfrecord_files[0].name.replaceFirst(/-\d{5}-of-\d{5}/, "")
Expand Down Expand Up @@ -121,7 +121,7 @@ process DEEPSOMATIC_POSTPROCESSVARIANTS {
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead."
}
prefix = task.ext.prefix ?: "${meta.id}"
prefix = task.ext.prefix ?: "${meta.id}_somatic"
"""
echo "" | gzip > ${prefix}.vcf.gz
touch ${prefix}.vcf.gz.tbi
Expand Down
4 changes: 2 additions & 2 deletions modules/nf-core/deepvariant/postprocessvariants/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ params {
input = null

// Small variant calling options
germline_var_keep = ['deepvariant', 'clair']
somatic_var_keep = ['deepsomatic', 'clair']
germline_var_keep = ['clair']
somatic_var_keep = ['clair']
germline_var_combine = 'all'
somatic_var_combine = 'all'
prioritize_caller_germline = 'deepvariant'
prioritize_caller_somatic = 'deepsomatic'
prioritize_caller_germline = 'clair'
Comment thread
robert-a-forsyth marked this conversation as resolved.
prioritize_caller_somatic = 'clair'
Comment on lines 16 to +22
generate_gvcf = false

// PON Options
Expand Down
6 changes: 4 additions & 2 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
"description": "List of germline variant callers to use. Must include at least one of [deepvariant, clair].",
"items": {
"type": "string",
"default": "['clair']",
"enum": ["deepvariant", "clair"]
},
Comment on lines 70 to 77
"minItems": 1
Expand All @@ -81,6 +82,7 @@
"description": "List of somatic variant callers to use. Must include at least one of [deepsomatic, clair].",
"items": {
"type": "string",
"default": "['clair']",
Comment on lines 83 to +85
"enum": ["deepsomatic", "clair"]
},
"minItems": 1
Expand All @@ -100,13 +102,13 @@
"prioritize_caller_germline": {
"type": "string",
"description": "When both germline callers are used, specifies which caller's format to use for variants called by both. Must be [deepvariant, clair].",
"default": "deepvariant",
"default": "clair",
"enum": ["deepvariant", "clair"]
},
"prioritize_caller_somatic": {
"type": "string",
"description": "When both somatic callers are used, specifies which caller's format to use for variants called by both. Must be [deepsomatic, clair].",
"default": "deepsomatic",
"default": "clair",
"enum": ["deepsomatic", "clair"]
}
}
Expand Down
13 changes: 8 additions & 5 deletions subworkflows/local/paired/paired_smallvar_germline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@ workflow PAIRED_SMALLVAR_GERMLINE {

main:
germline_vcf = channel.empty()
def germline_var_keep = params.germline_var_keep instanceof List ? params.germline_var_keep : [params.germline_var_keep]
clair3_ch = channel.empty()
deepvariant_ch = channel.empty()

// COMBINE NORMAL BAMS WITH DOWNLOADED CLAIR3 MODELS
// Clair3 requires the model directory path; models are keyed by model name (meta.id)
if(params.germline_var_keep.contains('clair')) {
if(germline_var_keep.contains('clair')) {

// Extract model name from meta.id for combine-by key
clair3_models
Expand Down Expand Up @@ -81,7 +84,7 @@ workflow PAIRED_SMALLVAR_GERMLINE {
}

// DEEPVARIANT
if(params.germline_var_keep.contains('deepvariant')) {
if(germline_var_keep.contains('deepvariant')) {

//
// SUBWORKFLOW: DEEPVARIANT (nf-core)
Expand Down Expand Up @@ -128,7 +131,7 @@ workflow PAIRED_SMALLVAR_GERMLINE {

// COMBINE GERMLINE VARIATION
// If both callers requested: run consensus subworkflow; otherwise pass through single-caller output
if (params.germline_var_keep.size() > 1) {
if (germline_var_keep.size() > 1) {
// Mix both caller VCFs into a single channel for GERMLINE_CONSENSUS
clair3_ch
.mix(deepvariant_ch)
Expand All @@ -149,11 +152,11 @@ workflow PAIRED_SMALLVAR_GERMLINE {
.set{ germline_vcf }
// germline_vcf: [meta(+caller from consensus), vcf, tbi]
}
else if (params.germline_var_keep == ['clair']) {
else if (germline_var_keep == ['clair']) {
clair3_ch
.set{germline_vcf}
}
else if (params.germline_var_keep == ['deepvariant']) {
else if (germline_var_keep == ['deepvariant']) {
deepvariant_ch
.set{germline_vcf}
}
Expand Down
13 changes: 8 additions & 5 deletions subworkflows/local/paired/paired_smallvar_somatic.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ workflow PAIRED_SMALLVAR_SOMATIC {

main:
somatic_vcf = channel.empty()
def somatic_var_keep = params.somatic_var_keep instanceof List ? params.somatic_var_keep : [params.somatic_var_keep]
clairs_ch = channel.empty()
deepsomatic_ch = channel.empty()

// CLAIRS: somatic SNV/indel calling from T/N paired BAMs
if(params.somatic_var_keep.contains('clair')) {
if(somatic_var_keep.contains('clair')) {
// Append ClairS model name (from meta) as the last element for CLAIRS module
tumor_normal_bams
.map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai ->
Expand Down Expand Up @@ -79,7 +82,7 @@ workflow PAIRED_SMALLVAR_SOMATIC {
}

// DEEPSOMATIC: somatic variant calling using deep learning T/N model
if(params.somatic_var_keep.contains('deepsomatic')) {
if(somatic_var_keep.contains('deepsomatic')) {

// DeepSomatic expects [normal, tumor] order (opposite of input tuple)
tumor_normal_bams
Expand Down Expand Up @@ -118,7 +121,7 @@ workflow PAIRED_SMALLVAR_SOMATIC {

// COMBINE SOMATIC VARIATION
// If both callers requested: run consensus subworkflow; otherwise pass through single-caller output
if (params.somatic_var_keep.size() > 1) {
if (somatic_var_keep.size() > 1) {
clairs_ch
.mix(deepsomatic_ch)
.set{combine_somatic_ch}
Expand All @@ -138,11 +141,11 @@ workflow PAIRED_SMALLVAR_SOMATIC {
.set{ somatic_vcf }
// somatic_vcf: [meta(+caller from consensus), vcf, tbi]
}
else if (params.somatic_var_keep == ['clair']) {
else if (somatic_var_keep == ['clair']) {
clairs_ch
.set{somatic_vcf}
}
else if (params.somatic_var_keep == ['deepsomatic']) {
else if (somatic_var_keep == ['deepsomatic']) {
deepsomatic_ch
.set{somatic_vcf}
}
Expand Down
28 changes: 27 additions & 1 deletion subworkflows/local/phasing_haplotyping.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ include { LONGPHASE_MODCALL as LONGPHASE_MODCALL_SOMATIC } from '../../module
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf'
include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat/main'
include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main'
include { BCFTOOLS_VIEW } from '../../modules/local/bcftools/view/main.nf'


workflow PHASING_HAPLOTYPING {
Expand Down Expand Up @@ -252,8 +253,33 @@ workflow PHASING_HAPLOTYPING {

LONGPHASE_PHASE_SOMATIC.out.snv_vcf
.join(LONGPHASE_PHASE_SOMATIC.out.snv_vcf_index)
.set{ phased_somatic_germline_vcf }
// phased_somatic_germline_vcf: [meta, vcf, tbi] -- Longphase-phased somatic+germline VCF (unfiltered)

//
// MODULE: BCFTOOLS_VIEW (label: process_medium)
// Filter the phased somatic+germline VCF to somatic-only positions.
// Uses the original somatic VCF as a targets (-T) file so only positions
// called as somatic are retained. Phase tags (PS/HP) on somatic variants
// are preserved; germline records are dropped.
Comment on lines +260 to +264
// Input: [meta, phased_combined_vcf, phased_combined_tbi, somatic_vcf, somatic_tbi]
// Output: .vcf -- [meta, vcf.gz] -- phased somatic-only VCF
// .tbi -- [meta, tbi]
//
phased_somatic_germline_vcf
.join(somatic_vcf)
.map { meta, phased_vcf, phased_tbi, som_vcf, som_tbi ->
return [ meta, phased_vcf, phased_tbi, som_vcf, som_tbi ]
}
.set { bcftools_view_input_ch }
// bcftools_view_input_ch: [meta, phased_combined_vcf, tbi, somatic_vcf, somatic_tbi]

BCFTOOLS_VIEW ( bcftools_view_input_ch )

BCFTOOLS_VIEW.out.vcf
.join(BCFTOOLS_VIEW.out.tbi)
.set{ phased_somatic_vcf }
// phased_somatic_vcf: [meta, vcf, tbi] -- Longphase-phased somatic (+ germline) VCF
// phased_somatic_vcf: [meta, vcf.gz, tbi] -- phased somatic-only VCF (germline removed)

// HAPLOTAGGING: tag each read in the BAM with its haplotype (HP tag) using the phased germline VCF
// All sample types (tumor, normal, tumor-only) are haplotagged using the germline phase blocks
Expand Down
Loading
Loading