Skip to content

Commit 0bb3540

Browse files
Added extra validation of ENA fields using ENA docs
1 parent 84f625c commit 0bb3540

1 file changed

Lines changed: 108 additions & 12 deletions

File tree

assets/schema_input_reads.json

Lines changed: 108 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,24 @@
4646
},
4747
"platform": {
4848
"type": "string",
49-
"pattern": "^\\S+$",
50-
"errorMessage": "Platform must be provided and cannot contain spaces",
51-
"description": "Sequencing platform (e.g., ILLUMINA, PACBIO_SMRT, OXFORD_NANOPORE, ION_TORRENT)"
49+
"enum": [
50+
"BGISEQ",
51+
"CAPILLARY",
52+
"DNBSEQ",
53+
"ELEMENT",
54+
"GENAPSYS",
55+
"GENEMIND",
56+
"HELICOS",
57+
"ILLUMINA",
58+
"ION_TORRENT",
59+
"LS454",
60+
"OXFORD_NANOPORE",
61+
"PACBIO_SMRT",
62+
"TAPESTRI",
63+
"ULTIMA",
64+
"VELA_DIAGNOSTICS"
65+
],
66+
"description": "Sequencing platform. Must be one of the ENA controlled vocabulary values listed in the enum."
5267
},
5368
"instrument": {
5469
"type": "string",
@@ -58,21 +73,102 @@
5873
},
5974
"library_source": {
6075
"type": "string",
61-
"pattern": "^\\S+$",
62-
"errorMessage": "Library source must be provided and cannot contain spaces",
63-
"description": "Library source (GENOMIC, METAGENOMIC, TRANSCRIPTOMIC, etc.)"
76+
"enum": [
77+
"GENOMIC",
78+
"GENOMIC SINGLE CELL",
79+
"TRANSCRIPTOMIC",
80+
"TRANSCRIPTOMIC SINGLE CELL",
81+
"METAGENOMIC",
82+
"METATRANSCRIPTOMIC",
83+
"SYNTHETIC",
84+
"VIRAL RNA",
85+
"OTHER"
86+
],
87+
"description": "Library source. Must be one of the ENA controlled vocabulary values listed in the enum."
6488
},
6589
"library_selection": {
6690
"type": "string",
67-
"pattern": "^\\S+$",
68-
"errorMessage": "Library selection must be provided and cannot contain spaces",
69-
"description": "Library selection (RANDOM, PCR, cDNA, etc.)"
91+
"enum": [
92+
"RANDOM",
93+
"PCR",
94+
"RANDOM PCR",
95+
"RT-PCR",
96+
"HMPR",
97+
"MF",
98+
"repeat fractionation",
99+
"size fractionation",
100+
"MSLL",
101+
"cDNA",
102+
"cDNA_randomPriming",
103+
"cDNA_oligo_dT",
104+
"PolyA",
105+
"Oligo-dT",
106+
"Inverse rRNA",
107+
"Inverse rRNA selection",
108+
"ChIP",
109+
"ChIP-Seq",
110+
"MNase",
111+
"DNase",
112+
"Hybrid Selection",
113+
"Reduced Representation",
114+
"Restriction Digest",
115+
"5-methylcytidine antibody",
116+
"MBD2 protein methyl-CpG binding domain",
117+
"CAGE",
118+
"RACE",
119+
"MDA",
120+
"padlock probes capture method",
121+
"other",
122+
"unspecified"
123+
],
124+
"description": "Library selection. Must be one of the ENA controlled vocabulary values listed in the enum."
70125
},
71126
"library_strategy": {
72127
"type": "string",
73-
"pattern": "^\\S+$",
74-
"errorMessage": "Library strategy must be provided and cannot contain spaces",
75-
"description": "Library strategy (WGS, RNA-Seq, AMPLICON, etc.)"
128+
"enum": [
129+
"WGS",
130+
"WGA",
131+
"WXS",
132+
"RNA-Seq",
133+
"snRNA-seq",
134+
"ssRNA-seq",
135+
"miRNA-Seq",
136+
"ncRNA-Seq",
137+
"FL-cDNA",
138+
"EST",
139+
"Hi-C",
140+
"ATAC-seq",
141+
"WCS",
142+
"RAD-Seq",
143+
"CLONE",
144+
"POOLCLONE",
145+
"AMPLICON",
146+
"CLONEEND",
147+
"FINISHING",
148+
"ChIP-Seq",
149+
"MNase-Seq",
150+
"Ribo-Seq",
151+
"DNase-Hypersensitivity",
152+
"Bisulfite-Seq",
153+
"CTS",
154+
"ChM-Seq",
155+
"GBS",
156+
"MRE-Seq",
157+
"MeDIP-Seq",
158+
"MBD-Seq",
159+
"NOMe-Seq",
160+
"Tn-Seq",
161+
"VALIDATION",
162+
"FAIRE-seq",
163+
"SELEX",
164+
"RIP-Seq",
165+
"ChIA-PET",
166+
"Synthetic-Long-Read",
167+
"Targeted-Capture",
168+
"Tethered Chromatin Conformation Capture",
169+
"OTHER"
170+
],
171+
"description": "Library strategy. Must be one of the ENA controlled vocabulary values listed in the enum."
76172
},
77173
"insert_size": {
78174
"anyOf": [

0 commit comments

Comments
 (0)