-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.yml
119 lines (108 loc) · 4.25 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
run_params:
output: 'output_dir'
footprint:
##### MANDATORY INPUT FILES #####
genome_fasta: ''
# Optional: Provide a list of two files for differential scoring. E.g.: ["path/to/bigwig1.bw", "path/to/bigwig2.bw"]
# Will get peaks unique to the first file.
score_bigwig: ''
peak_bed: ''
# Leave empty to skip extraction of unknown footprints.
# This will consider all footprints unknown and therefore use all in motif discovery.
motif_file: ''
# peak parameter
# Extension of base pairs on both sides for all regions in peak_bed. To ensure full peak is collected.
extend_region: 100
# footprint parameter#
# Minimum local height of a footprint to not be excluded as noise.
min_local_height: 0.05
# Accepted slope range to identify sloped peaks. E.g. 35 accepts thresholds between 15% to 85% of the observed slopes in the region.
percentile_slope: 35
# Minimum and maximum allowed footprint size
min_size: 4
max_size: 100
# Minimum average a footprint must have.
min_score: 0.5
# Maximum gap size between two footprints to be able to merge
gap: 10
# Maximum gap depth between two footprints to be able to merge
gap_depth: 0.2
# motif parameter
# P-value like filter for motif binding. Values closer to 0 mean smaller distance between motif and binding site.
moods_threshold: 0.00005
motif:
discovery:
# number of consecutive retries if no motif is found
retry: 10
# method to escape a local optimum; no knee available for filter
# strongest: remove motif with lowest e-value
# weakest: remove motif with highest e-value
# random: randomly remove footprints
escape_method: 'strongest' # ['strongest', 'weakest', 'random']
# the type if knee function used for knee finding
knee_function: 'linear' # ['linear', curved']
# Whether knee filter should be applied.
keep_all: True
threads: 15
# Minimum and maximum allowed motif width
min_len: 4
max_len: 10 # max len must be <= footprint min_size (ONLY for mod = oops)
# Minimum e-value a motif must have
e_value: 0.05
# Sequence binding model
# oops: Exactly one binding position per sequence
# zoops: Zero or one binding position per sequence
# anr: Any number of none overlapping binding positions per sequence
mod: 'anr' # ['oops', 'zoops', 'anr']
# How to calculate the motifs statistical significance
# See https://meme-suite.org/meme/doc/meme.html?man_type=web
objfun: 'classic' # ['classic', 'de', 'se', 'cd', 'ce', 'nc']
# Maximum number of motifs per iteration
nmotifs: 30
# Minimum and maximum sites a motif must have. Note maximum sites has a heavy impact on runtime and should be increased with caution.
minsites: 2
maxsites: 2000
# Number of allowed iterations
# -1 for unlimited
maxiter: -1
### control motifs ###
# select known motifs to compare against
known:
# Whether to shuffle known motifs as additional control group
shuffle: False
# Number of known motifs to be selected as control
# chosen to have a high distance from each other
quantity: 15
# additional selection for motifs of interest. Can be id or name e.g.:
# - 'MA0677.1'
# or
# - 'Nr2f6'
motifs:
processing:
# Distance threshold for motifs to be merged. Motifs with a distance below this threshold will be merged to a consensus motif.
threshold: 0.5
# Resolution of motif sequence logo
dpi: 300
evaluation:
motif:
# Number of most similar database motifs that is stored in file.
top_x: 10
rescan:
# Strictness for binding site matching motif.
pvalue: 0.0001
cores: 15
split: 100
annotation:
gtf: '' # Leave empty to skip annotation
config_template: 'uropa_template.json'
# select groups to annotate:
# discovered = 'motif_\d+_motif_\d+'
# known_shuffled = 'shuffled_.+_shuffled_.+'
# all = '.+' (or leave empty)
annotate: ['.+']
threads: 15
# Additional UROPA parameters (space separated). See https://uropa-manual.readthedocs.io/output.html
flags: ""
# only run when annotation is available
go_enrichment:
email: '' # Email needed to fetch ncbi data. Skips analysis is empty.