-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathsettings.yaml.in
executable file
·210 lines (202 loc) · 6.03 KB
/
settings.yaml.in
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
locations:
reads-dir: /path/to/reads/
output-dir: /path/to/output/
genome-fasta: /path/to/genome.fasta
cdna-fasta: /path/to/sample.cdna.fasta
gtf-file: /path/to/sample.gtf
# The "organism" field is needed for GO term analysis. Leave it empty
# if not interested in GO analysis. Otherwise provide a string with
# the initial of genus and the species name (e.g. hsapiens, mmusculus,
# dmelanogaster, celegans)
organism: ''
mapping:
mapper: 'hisat2' # options are star or hisat2
genome_build: 'GRCm38' #genome build version identifier that is used in hisat2 index file names
coverage:
tool: 'megadepth' # options are megadepth or bamCoverage
# Configuration for how to do read counting using
# GenomicAlignments::summarizeOverlaps feature type and grouping
# features must match what is available in the GTF file most GTF files
# will use "exon" or "CDS" as features and "gene_id", and
# "transcript_id" for combining features.
counting:
counting_mode: "Union" # other options are "IntersectionStrict" and "IntersectionNotEmpty"
drop_nonunique: TRUE # boolean, When set to TRUE, multi-mapping reads are not counted (see inter.feature argument of summarizeOverlaps)
strandedness: "unspecific" # other options are "forward" and "reverse" for strand-specific read-counting
feature: "exon"
group_feature_by: "gene_id"
yield_size: 2000000 # how many reads to process at a time (this impacts memory consumption)
#DEanalyses:
# # The names of analyses can be anything but they have to be unique
# # for each combination of case control group comparisons.
# # Every analysis also needs a covariates line with a quoted character
# # string value.
#
# analysis1:
# description: "A couple of sentences describing the purpose to perform this particular comparison and the selection of covariates."
# # If multiple sample names are provided, they must be separated by comma.
# case_sample_groups: "HBR"
# control_sample_groups: "UHR"
#
# # comma separated list of additional co-variates to control for
# # in differential expression analysis (e.g. batch, age,
# # temperature, sequencing_technology etc.). Must correspond to a
# # column field in the sample_sheet.csv file.)
# covariates: ""
#
# # reports that are self-contained have all JavaScript code shipping as part of the result file.
# # That result file will hence be functional throughout time, which is the default setting.
# # For a more modular approach that references external sources set "FALSE"
# self_contained: TRUE
#
# analysis2:
# description: "Every analysis has its disting description which is then shown in the introduction section of the report."
# case_sample_groups: "UHR"
# control_sample_groups: "HBR"
#
# # comma separated list of additional co-variates to control for
# # in differential expression analysis (e.g. batch, age,
# # temperature, sequencing_technology etc.). Must correspond to a
# # column field in the sample_sheet.csv file.)
# covariates: ''
execution:
submit-to-cluster: no
jobs: 6
nice: 19
mem_mb: 64000
cluster:
missing-file-timeout: 120
memory: 8G
stack: 128M
queue: all.q
contact-email: none
log-dir: '.'
args: ''
rules:
__default__:
threads: 1
memory: 2000
translate_sample_sheet_for_report:
threads: 1
memory: 500
trim_qc_reads_pe:
threads: 1
memory: 4000
trim_qc_reads_se:
threads: 1
memory: 4000
star_index:
threads: 2
memory: 32000
hisat2_index:
threads: 2
memory: 32000
salmon_index:
threads: 8
memory: 5000
salmon_quant:
threads: 8
memory: 6000
counts_from_SALMON:
threads: 1
memory: 200
collate_read_counts:
threads: 1
memory: 200
norm_counts_deseq:
threads: 1
memory: 1000
star_map:
threads: 2
memory: 16000
hisat2_map:
threads: 2
memory: 8000
disk_mb: 7000
index_bam:
threads: 2
memory: 500
multiqc:
threads: 2
memory: 500
disk_mb: 20000
coverage_bamCoverage:
threads: 1
memory: 4000
coverage_megadepth:
threads: 2
memory: 4000
count_reads:
threads: 1
memory: 6000
check_annotation_files:
threads: 1
memory: 6000
report1:
threads: 1
memory: 4000
deseq_collate_report1:
threads: 1
memory: 4000
report2:
threads: 1
memory: 4000
deseq_collate_report2:
threads: 1
memory: 4000
report3:
threads: 1
memory: 4000
deseq_collate_report3:
threads: 1
memory: 4000
tools:
gunzip:
executable: @GUNZIP@
args: ""
multiqc:
executable: @MULTIQC@
args: ""
star_map:
executable: @STAR@
args: ""
star_index:
executable: @STAR@
args: ""
# If STAR gives an error then that error message is likely suggest the parameter to adjust.
# args: "--limitGenomeGenerateRAM 31000000000"
hisat2:
executable: @HISAT2@
args: "--fast"
# If low alignment rates are observed with "--fast" setting,
# other options to try are "--sensitive or --very-sensitive".
hisat2-build:
executable: @HISAT2_BUILD@
args: ""
samtools:
executable: @SAMTOOLS@
args: ""
salmon_index:
executable: @SALMON@
args: "index"
salmon_quant:
executable: @SALMON@
args: "quant"
Rscript:
executable: @RSCRIPT@
args: "--vanilla"
sed:
executable: @SED@
args: ""
fastp:
executable: @FASTP@
args: "--adapter_sequence=AGATCGGAAGAGCACACGTCTGAACTCCAGTCA --adapter_sequence_r2=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
# Please refer to fastp manual about setting adapter sequences: https://github.com/OpenGene/fastp#adapters
bamCoverage:
executable: @BAMCOVERAGE@
args: "--normalizeUsing BPM --numberOfProcessors 2"
# see https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html
# for more detailed arguments used for normalising coverage data files
megadepth:
executable: @MEGADEPTH@
args: ""