forked from bioinfodlsu/attack_amr
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Bioinfo Commands.txt
164 lines (128 loc) · 6.8 KB
/
Bioinfo Commands.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
Run Snakemake:
conda activate snakemake
snakemake --use-conda --cores all
Run Miniwdl (Miniwdl installation guide: https://awesome-mice-0fe.notion.site/WDL-30db3e93cf2d401f857f03ed29ab8076)
miniwdl run translated.wdl --verbose --input inputs.json
Install Snakemake:
conda install -n base -c conda-forge mamba
mamba create -c conda-forge -c bioconda -n snakemake snakemake
Download Sequence File: (Sample)
wget ftp://ftp.sra.ebi.ac.uk/vol1/run/ERR701/ERR7015395/FH1_S162_R1_001.fastq.gz
Downsample sequences: (Sample)
zcat -c BFH1_S123_R1_trimmed.fastq.gz | head -n 12518768 | gzip > zBFH1_S123_R1_trimmed.fastq.gz
Downsample Metaphlan database: (Sample) SGB vs VSG??
sudo apt-get update
sudo apt-get install bzip2
bunzip2 mpa_vOct22_CHOCOPhlAnSGB_202212_VSG.fna.bz2
bunzip2 mpa_vOct22_CHOCOPhlAnSGB_202212_SGB.fna.bz2
head -n 1447453 mpa_vOct22_CHOCOPhlAnSGB_202212_VSG.fna | bzip2 > mpa_vOct22_CHOCOPhlAnSGB_202212_VSG.fna.bz2
wc -l mpa_vOct22_CHOCOPhlAnSGB_202212_SGB.fna
head -n 17035287 mpa_vOct22_CHOCOPhlAnSGB_202212_SGB.fna | bzip2 > mpa_vOct22_CHOCOPhlAnSGB_202212.fna.bz2
conda create -n bowtie2env
conda activate bowtie2env
mamba install -c bioconda bowtie2
cd metaphlan3
bowtie2-build --large-index mpa_vOct22_CHOCOPhlAnSGB_202212.fna mpa_vOct22_CHOCOPhlAnSGB_202212
Installation: (For testing only)
mamba install -c bioconda fastqc
conda create -n multiqcenv
mamba install -c bioconda multiqc
conda create -n cutadaptenv
mamba install -c bioconda cutadapt
conda create -n bowtie2env
mamba install -c bioconda bowtie2
mamba install -c bioconda samtools
conda create -n metaphlanenv
mamba install -c bioconda -c biobakery -c conda-forge python=3.8 metaphlan
conda create -n metaxaenv
mamba install -c bioconda metaxa
conda create -n cdhitenv
mamba install -c bioconda cd-hit
Remove environment: (Sample)
conda env list
conda activate snakemake
conda deactivate
conda env remove -n snakemake
Report:
snakemake --report report.html
Commands: (For testing only)
fastqc:
fastqc --quiet -t 2 --outdir data/FASTQC -f fastq data/FH1_S162_R1_001.fastq.gz
fastqc --quiet -t 2 --outdir data/FASTQC -f fastq data/FH1_S162_R2_001.fastq.gz
fastqc --quiet -t 2 --outdir trimmed_data/FASTQC -f fastq trimmed_data/FH1_S162_R1_001.fastq.gz
fastqc --quiet -t 2 --outdir trimmed_data/FASTQC -f fastq trimmed_data/FH1_S162_R2_001.fastq.gz
multiqc:
multiqc -f --interactive --quiet data/ -o data/
cutadapt:
cutadapt -a CTGTCTCTTATACACATCT -A CTGTCTCTTATACACATCT -O 10 -m 30 -q 20 \
data/FH1_S162_R1_001.fastq.gz data/FH1_S162_R2_001.fastq.gz -o trimmed_data/FH1_S162_R1_trimmed.fastq.gz -p trimmed_data/FH1_S162_R2_trimmed.fastq.gz > trimmed_data/FH1_S162.trimmed.txt
fastqc_trim:
fastqc --quiet -t 2 --outdir trimmed_data/FASTQC -f fastq trimmed_data/FH1_S162_R1_trimmed.fastq.gz
fastqc --quiet -t 2 --outdir trimmed_data/FASTQC -f fastq trimmed_data/FH1_S162_R2_trimmed.fastq.gz
multiqc_trim:
multiqc -f --interactive --quiet trimmed_data/ -o trimmed_data/
resfinder_db:
bowtie2-build resfinder_db/resfinder.fasta resfinder_db/resfinder
resfinder_mapping:
(bowtie2 -x resfinder_db/resfinder -1 trimmed_data/FH1_S162_R1_trimmed.fastq.gz -2 trimmed_data/FH1_S162_R2_trimmed.fastq.gz -p 1 -D 20 -R 3 -N 1 -L 20 -i S,1,0.50 | \
samtools view -Sb - > mapped_reads/FH1_S162_unfiltered.bam) 2> logs/bowtie2/FH1_S162.log
resfinder_filtering:
samtools view -h mapped_reads/FH1_S162_unfiltered.bam | awk 'BEGIN {{FS="\t"; OFS="\t"}} \
{{if (/^@/ && substr($2, 3, 1)==":") {{print}} \
else if (($7!="=" || $7=="=") && and($2, 0x40)) {{print}}}}' \
| samtools view -Shu - > mapped_reads/FH1_S162.bam
resfinder_sorting:
samtools sort -T sorted_reads/FH1_S162 \
-O bam mapped_reads/FH1_S162.bam > sorted_reads/FH1_S162.bam
resfinder_indexing:
samtools index sorted_reads/FH1_S162.bam
combine_results_1:
samtools idxstats sorted_reads/FH1_S162.bam | grep -v "\*" | cut -f1 > resfinder_out/gene_names
sed -i '1 i\GENE' resfinder_out/gene_names
combine_results_2:
samtools idxstats sorted_reads/FH1_S162.bam | grep -v "\*" | cut -f3 > resfinder_out/FH1_S162_counts
combine_results_3:
sed '1 i\FH1_S162' resfinder_out/FH1_S162_counts > resfinder_out/renamed_FH1_S162_counts
combine_results_4:
paste resfinder_out/gene_names resfinder_out/renamed_FH1_S162_counts > resfinder_out/ARG_genemat.txt
MGE_db:
bowtie2-build MGE_db/MGE.fasta MGE_db/MGE
MGE_mapping:
(bowtie2 -x MGE_db/MGE -1 trimmed_data/FH1_S162_R1_trimmed.fastq.gz -2 trimmed_data/FH1_S162_R2_trimmed.fastq.gz -p 6 -D 20 -R 3 -N 1 -L 20 -i S,1,0.50 | \
samtools view -Sb - > mapped_reads_MGE/FH1_S162_unfiltered.bam) 2> logs/MGE/FH1_S162.log
MGE_filtering:
samtools view -h mapped_reads_MGE/FH1_S162_unfiltered.bam | awk 'BEGIN {{FS="\t"; OFS="\t"}} \
{{if (/^@/ && substr($2, 3, 1)==":") {{print}} \
else if (($7!="=" || $7=="=") && and($2, 0x40)) {{print}}}}' \
| samtools view -Shu - > mapped_reads_MGE/FH1_S162.bam
MGE_sorting:
samtools sort -T sorted_reads_MGE/FH1_S162 \
-O bam mapped_reads_MGE/FH1_S162.bam > sorted_reads_MGE/FH1_S162.bam
MGE_indexing:
samtools index sorted_reads_MGE/FH1_S162.bam
combine_MGE_results_1:
samtools idxstats sorted_reads_MGE/FH1_S162.bam | grep -v "\*" | cut -f1 > MGE_out/gene_names
sed -i '1 i\GENE' MGE_out/gene_names
combine_MGE_results_2:
samtools idxstats sorted_reads_MGE/FH1_S162.bam | grep -v "\*" | cut -f3 > MGE_out/FH1_S162_counts
combine_MGE_results_3:
sed '1 i\FH1_S162' MGE_out/FH1_S162_counts > MGE_out/renamed_FH1_S162_counts
combine_MGE_results_4:
paste MGE_out/gene_names MGE_out/renamed_FH1_S162_counts > MGE_out/MGE_genemat.txt
metaphlan3:
metaphlan --bowtie2db metaphlan3/ trimmed_data/FH1_S162_R1_trimmed.fastq.gz,trimmed_data/FH1_S162_R2_trimmed.fastq.gz --nproc 6 --bowtie2out metaphlan3/FH1_S162.bowtie2.bz2 --sample_id FH1_S162 --input_type fastq > metaphlan3/FH1_S162_profile.txt
# to create counts
merge_metaphlan_tables.py *_profile.txt > metaphlan3_counts_merged_abundance_table.txt
sed -i 's/_profile//g' metaphlan3_counts_merged_abundance_table.txt
sed -n '2p' metaphlan3_counts_merged_abundance_table.txt > merged_counts_abundance_table_species.txt
grep -E "s__" metaphlan3_counts_merged_abundance_table.txt >> merged_counts_abundance_table_species.txt
tr '|' ';' <merged_counts_abundance_table_species.txt > merged_metaphlan_count_species.txt
# to create tax table
awk '{print $1}' merged_metaphlan_count_species.txt > tax_table_count_metaphlan3_species
grep -E "s__" tax_table_count_metaphlan3_species > tax_table_count_metaphlan3_species.txt
metaxa2:
metaxa2 -1 trimmed_data/FH1_S162_R1_trimmed.fastq.gz -2 trimmed_data/FH1_S162_R2_trimmed.fastq.gz -f fastq -z gzip -t b -o metaxa2/FH1_S162 --align none --graphical F --cpu 16 --plus
metaxa2_ttt -i metaxa2/FH1_S162.taxonomy.txt -t b -o metaxa2/FH1_S162
metaxa2_dc -o metaxa2/metaxa_genus.txt metaxa2/*level_6.txt
cd-hit:
cd-hit-est -i resfinder.fasta -o clusters_resfinder.fasta -c 0.90