@@ -15,16 +15,16 @@ READS = [filepath for filepath in Path(READSDIR).glob('**/*')]
1515
1616rule all :
1717 input :
18- # clean_sam = expand( OUTDIR / "TALON" / "cleaned_alignments" / "{sample}" / "{sample}_clean.sam", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS]) ,
19- # database = OUTDIR / "TALON" / "talon.db" ,
20- # OUTDIR / "TALON " / "config.csv" ,
21- # database_ann = OUTDIR / "TALON " / "ann_talon.db"
22- # filtered_transcripts = OUTDIR / "TALON " / "filtered_transcripts.csv"
23- abundance = OUTDIR / "TALON " / 'filtered_talon_abundance_filtered.tsv' ,
24- GTF = OUTDIR / "TALON " / 'filtered_talon.gtf'
25- # labeled_sam = expand( OUTDIR / "TALON " / "labeled " / "{sample}_labeled.sam",sample=[read.name.split('.')[0] for read in READS]),
26- # config = OUTDIR / "TALON " / "config.csv",
27- # database = OUTDIR / "TALON " / "talon.db "
18+ # abundance= OUTDIR / "TALON" / 'filtered_talon_abundance_filtered.tsv' ,
19+ # GTF = OUTDIR / "TALON" / 'filtered_talon.gtf' ,
20+ # bam = expand( OUTDIR / "alignments " / "BAM" / "{sample}_sorted.bam", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS]) ,
21+ # bed12 = expand( OUTDIR / "FLAIR " / "BED12" / "{sample}.bed", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS])
22+ # bed = expand( OUTDIR / "FLAIR " / "BED12" / "{sample}.bed", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS])
23+ # bed_corrected = expand( OUTDIR / "FLAIR " / "corrected" / "{sample}" / "{sample}_all_corrected.bed", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS])
24+ # bed_concatenated = OUTDIR / "FLAIR " / "concatenated_all_corrected.bed"
25+ # gtf = OUTDIR / "FLAIR " / "COLLAPSE " / "flair.collapse.isoforms.gtf"
26+ # config = OUTDIR / "FLAIR " / "manifest.tsv"
27+ abundance = OUTDIR / "FLAIR " / "quantify" / "flair_counts_matrix.tsv "
2828
2929
3030rule minimap2_align :
@@ -52,6 +52,24 @@ rule minimap2_align:
5252 {input.fq} > {output.sam_files}
5353 '''
5454
55+ rule sam_to_bam :
56+ '''
57+ Converts SAM to BAM.
58+ '''
59+ input :
60+ sam = rules .minimap2_align .output
61+ params :
62+ outdir = lambda wildcards : OUTDIR / "alignments" / "BAM" / wildcards .sample
63+ output :
64+ bam = OUTDIR / "alignments" / "BAM" / "{sample}_sorted.bam"
65+ threads : 10
66+ singularity :
67+ "docker://quay.io/biocontainers/samtools:1.14--hb421002_0"
68+ shell :
69+ '''
70+ samtools view -Sb {input.sam} | samtools sort -@ {threads} -o {output.bam}
71+ samtools index {output.bam}
72+ '''
5573
5674# TALON
5775rule get_SJs_from_gtf :
@@ -260,3 +278,163 @@ rule talon_create_GTF:
260278 --o {params.outdir}
261279 '''
262280
281+ #FLAIR
282+ # rule flair_bam_to_bed12:
283+ # input:
284+ # bam = rules.sam_to_bam.output.bam
285+ # params:
286+ # outdir = lambda wildcards: OUTDIR / "FLAIR" / "BED12" / wildcards.sample
287+ # output:
288+ # bed12 = OUTDIR / "FLAIR" / "BED12" / "{sample}.bed"
289+ # threads: 1
290+ # conda:
291+ # "envs/flair_conda_env.yaml"
292+ # shell:
293+ # '''
294+ # scripts/bam2Bed12.py --input_bam {input.bam} > {output.bed12}
295+ # '''
296+
297+
298+ rule flair_align :
299+ '''
300+ Aligns samples against reference genome and smooths gaps in
301+ the alignment.
302+ '''
303+ input :
304+ genome = genome_fasta ,
305+ fq = READSDIR / "{sample}.fastq"
306+ params :
307+ outdir = lambda wildcards : OUTDIR / "FLAIR" / "BED12" / wildcards .sample
308+ output :
309+ bed = OUTDIR / "FLAIR" / "BED12" / "{sample}.bed"
310+ threads : 10
311+ singularity :
312+ 'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
313+ shell :
314+ '''
315+ flair.py align \
316+ --genome {input.genome}\
317+ --reads {input.fq}\
318+ --threads {threads}\
319+ --nvrna \
320+ --version1.3 \
321+ --output {params.outdir}
322+ '''
323+
324+ rule flair_correct :
325+ '''
326+ Corrects misaligned splice sites using genome annotations
327+ and/or short-read splice junctions.
328+ '''
329+ input :
330+ genome = genome_fasta ,
331+ annotation = existing_annotation ,
332+ bed = rules .flair_align .output .bed
333+ params :
334+ outdir = lambda wildcards : OUTDIR / "FLAIR" / "corrected" / wildcards .sample / wildcards .sample ,
335+ window = config ["flair_correct_window" ]
336+ output :
337+ bed_corrected = OUTDIR / "FLAIR" / "corrected" / "{sample}" / "{sample}_all_corrected.bed"
338+ threads : 10
339+ singularity :
340+ 'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
341+ shell :
342+ '''
343+ flair.py correct \
344+ --genome {input.genome} \
345+ --query {input.bed} \
346+ --gtf {input.annotation} \
347+ --nvrna \
348+ --threads {threads} \
349+ --window {params.window} \
350+ --output {params.outdir}
351+ '''
352+
353+ rule flair_concatenate :
354+ '''
355+ Combines BED12 output into one file.
356+ '''
357+ input :
358+ bed_corrected = expand (OUTDIR / "FLAIR" / "corrected" / "{sample}" / "{sample}_all_corrected.bed" , sample = ["." .join (read .name .split ('.' )[:- 1 ]) for read in READS ])
359+ output :
360+ bed_concatenated = OUTDIR / "FLAIR" / "concatenated_all_corrected.bed"
361+ threads : 10
362+ singularity :
363+ 'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
364+ shell :
365+ '''
366+ cat {input.bed_corrected} >> {output.bed_concatenated}
367+ '''
368+
369+ rule flair_collapse :
370+ '''
371+ Defines high-confidence isoforms from corrected reads.
372+ '''
373+ input :
374+ bed_concatenated = rules .flair_concatenate .output .bed_concatenated ,
375+ genome = genome_fasta ,
376+ annotation = existing_annotation ,
377+ params :
378+ reads = READS ,
379+ temp_dir = OUTDIR / "FLAIR" / "COLLAPSE" / "collapse_logs" ,
380+ outdir = OUTDIR / "FLAIR" / "COLLAPSE" / "flair.collapse" ,
381+ quality = config ["flair_collapse_quality" ]
382+ output :
383+ fa = OUTDIR / "FLAIR" / "COLLAPSE" / "flair.collapse.isoforms.fa"
384+ threads : 10
385+ singularity :
386+ 'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
387+ shell :
388+ '''
389+ flair.py collapse \
390+ --genome {input.genome} \
391+ --gtf {input.annotation} \
392+ --reads {params.reads} \
393+ --query {input.bed_concatenated} \
394+ --temp_dir {params.temp_dir} \
395+ --generate_map \
396+ --threads {threads} \
397+ --quality {params.quality} \
398+ --output {params.outdir}
399+ '''
400+
401+ rule flair_config :
402+ '''
403+ Creates read manifest.
404+ '''
405+ input :
406+ reads = READS
407+ params :
408+ datasetnames = ["." .join (read .name .split ('.' )[:- 1 ]) for read in READS ]
409+ output :
410+ config = OUTDIR / "FLAIR" / "manifest.tsv"
411+ threads : 1
412+ run :
413+ for read , name in zip (input .reads , params .datasetnames ):
414+ with open (output .config , 'a+' ) as config :
415+ config .write ("%s\t condition\t batch\t %s\n " % (name , read ))
416+
417+ rule flair_quantify :
418+ '''
419+ Quantify FLAIR isoform usage across samples using minimap2.
420+ '''
421+ input :
422+ manifest = rules .flair_config .output .config ,
423+ coll_fasta = rules .flair_collapse .output .fa
424+ params :
425+ quality = config ["flair_abundance_quality" ]
426+ output :
427+ abundance = OUTDIR / "FLAIR" / "quantify" / "flair_counts_matrix.tsv"
428+ threads : 10
429+ singularity :
430+ 'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
431+ shell :
432+ '''
433+ flair.py quantify \
434+ --reads_manifest {input.manifest} \
435+ --isoforms {input.coll_fasta} \
436+ --threads {threads} \
437+ --tpm \
438+ --quality {params.quality}
439+ --output {output.abundance}
440+ '''
0 commit comments