-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUKP8_SNPAnalysis
127 lines (89 loc) · 3.98 KB
/
UKP8_SNPAnalysis
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#indexes reference genome
#PBS -S /bin/bash
#PBS -N j_bwa
#PBS -l walltime=128:00:00
#PBS -l nodes=1:ppn=1:AMD
#PBS -q batch
cd $PBS_O_WORKDIR
module load BWA/0.7.17-foss-2016b
time bwa index /scratch/gwc32007/crypto_genomes/cparvum_iowaII_genome.fasta
#aligns reads to reference genome
#PBS -S /bin/bash
#PBS -N j_bwa
#PBS -l walltime=128:00:00
#PBS -l nodes=1:ppn=1:AMD
#PBS -q batch
cd $PBS_O_WORKDIR
module load BWA/0.7.17-foss-2016b
time bwa mem -M -t 16 /scratch/gwc32007/crypto_genomes/cparvum_iowaII_genome.fasta /scratch/gwc32007/crypto_fastq/UKP8/SRR6148259_1.fastq.gz /scratch/gwc32007/crypto_fastq/UKP8/SRR6148259_2.fastq.gz > /scratch/gwc32007/fastq_alignments/UKP8_aln.sam
#converts the .sam file to a more condensed .bam file
#PBS -S /bin/bash
#PBS -N j_s_samtools
#PBS -q batch
#PBS -l nodes=1:ppn=1:AMD
#PBS -l mem=100gb
#PBS -l walltime=480:00:00
cd $PBS_O_WORKDIR
module load SAMtools/1.6-foss-2016b
time samtools view -bt /scratch/gwc32007/crypto_genomes/cparvum_iowaII_genome.fasta -o /scratch/gwc32007/fastq_alignments/UKP8_aln.bam /scratch/gwc32007/fastq_alignments/UKP8_aln.sam
#sorts the alignments from smallest to largest
#PBS -S /bin/bash
#PBS -N j_s_samtools
#PBS -q batch
#PBS -l nodes=1:ppn=1:AMD
#PBS -l mem=100gb
#PBS -l walltime=480:00:00
cd $PBS_O_WORKDIR
module load SAMtools/1.6-foss-2016b
time samtools sort /scratch/gwc32007/fastq_alignments/UKP8_aln.bam -o /scratch/gwc32007/fastq_alignments/UKP8_aln.sorted.bam
#marks the duplicate reads in the alignment
#PBS -S /bin/bash
#PBS -N j_picard
#PBS -q batch
#PBS -l nodes=1:ppn=1:AMD
#PBS -l walltime=480:00:00
#PBS -l mem=25g
cd $PBS_O_WORKDIR
module load picard/2.16.0-Java-1.8.0_144
time java -Xmx20g -classpath "/usr/local/apps/eb/picard/2.16.0-Java-1.8.0_144" -jar /usr/local/apps/eb/picard/2.16.0-Java-1.8.0_144/picard.jar MarkDuplicates I=/scratch/gwc32007/fastq_alignments/UKP8_aln.sorted.bam O=/scratch/gwc32007/fastq_alignments/UKP8_aln.sorted_duplicates.bam M=marked_dup_metrics.txt REMOVE_DUPLICATES=false
#replaces read groups in the bam file with one read group
#PBS -S /bin/bash
#PBS -N j_picard
#PBS -q batch
#PBS -l nodes=1:ppn=1:AMD
#PBS -l walltime=480:00:00
#PBS -l mem=25g
cd $PBS_O_WORKDIR
module load picard/2.16.0-Java-1.8.0_144
time java -Xmx20g -classpath "/usr/local/apps/eb/picard/2.16.0-Java-1.8.0_144" -jar /usr/local/apps/eb/picard/2.16.0-Java-1.8.0_144/picard.jar AddOrReplaceReadGroups I=/scratch/gwc32007/fastq_alignments/UKP8_aln.sorted_duplicates.bam O=/scratch/gwc32007/fastq_alignments/UKP8_aln.dupl.read.sort.bam SORT_ORDER=coordinate RGLB=lib1 RGPL=illumina RGSM=20 RGPU=unit1 VALIDATION_STRINGENCY=LENIENT
#indexing the reference
#PBS -S /bin/bash
#PBS -N j_s_samtools
#PBS -q batch
#PBS -l nodes=1:ppn=1:AMD
#PBS -l mem=100gb
#PBS -l walltime=480:00:00
cd $PBS_O_WORKDIR
module load SAMtools/1.6-foss-2016b
time samtools faidx /scratch/gwc32007/crypto_genomes/cparvum_iowaII_genome.fasta
time samtools index /scratch/gwc32007/fastq_alignments/UKP8_aln.dupl.read.sort.bam
#creating dictionary for reference
module load picard/2.16.0-Java-1.8.0_144
#PBS -S /bin/bash
#PBS -N j_picard
#PBS -q batch
#PBS -l nodes=1:ppn=1:AMD
#PBS -l walltime=480:00:00
#PBS -l mem=25g
cd $PBS_O_WORKDIR
module load picard/2.16.0-Java-1.8.0_144
time java -Xmx20g -classpath "/usr/local/apps/eb/picard/2.16.0-Java-1.8.0_144" -jar /usr/local/apps/eb/picard/2.16.0-Java-1.8.0_144/picard.jar CreateSequenceDictionary R=/scratch/gwc32007/crypto_genomes/cparvum_iowaII_genome.fasta O=/scratch/gwc32007/crypto_genomes/cparvum_iowaII_genome.dict
#find SNP calls in the alignments
#PBS -S /bin/bash
#PBS -N j_gatk
#PBS -q batch
#PBS -l nodes=1:ppn=1
#PBS -l walltime=48:00:00
#PBS -l mem=2gb
module load GATK/3.8-0-Java-1.8.0_144
java -Xmx4g -jar /usr/local/apps/eb/GATK/3.8-0-Java-1.8.0_144/GenomeAnalysisTK.jar -T HaplotypeCaller -R /scratch/gwc32007/crypto_genomes/cparvum_iowaII_genome.fasta -I /scratch/gwc32007/fastq_alignments/UKP8_aln.dupl.read.sort.bam -o /scratch/gwc32007/SNPGenie/gvcf_CURO/UKP8_output.g.vcf.gz -ERC GVCF