Skip to content

Commit 24d608c

Browse files
committed
do the parsing, update schema
1 parent 8e3a749 commit 24d608c

File tree

7 files changed

+559
-69
lines changed

7 files changed

+559
-69
lines changed

import_specifications/schema/dts_manifest.json

+14-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,20 @@
1313
"type": "object",
1414
"properties": {
1515
"data_type": {"type": "string"},
16-
"parameters": {"type": "object"}
16+
"parameters": {
17+
"type": "object",
18+
"patternProperties": {
19+
".*": {
20+
"oneOf": [
21+
{"type": "string"},
22+
{"type": "number"},
23+
{"type": "boolean"},
24+
{"type": "null"}
25+
]
26+
}
27+
},
28+
"additionalProperties": false
29+
}
1730
},
1831
"required": ["data_type", "parameters"]
1932
}

staging_service/import_specifications/individual_parsers.py

+54-3
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,14 @@
6161
"null",
6262
]
6363

64+
_DTS_INSTRUCTIONS_KEY = "instructions"
65+
_DTS_INSTRUCTIONS_DATATYPE_KEY = "data_type"
66+
_DTS_INSTRUCTIONS_PARAMETERS_KEY = "parameters"
67+
_DTS_INSTRUCTIONS_REQUIRED_KEYS = [_DTS_INSTRUCTIONS_DATATYPE_KEY, _DTS_INSTRUCTIONS_PARAMETERS_KEY]
68+
_DTS_INSTRUCTIONS_PROTOCOL_KEY = "protocol"
69+
_DTS_INSTRUCTIONS_PROTOCOL = "KBase narrative import"
70+
_DTS_INSTRUCTIONS_OBJECTS_KEY = "objects"
71+
6472

6573
class _ParseException(Exception):
6674
pass
@@ -361,10 +369,18 @@ def parse_dts_manifest(path: Path, dts_manifest_schema: dict) -> ParseResults:
361369
err_str = err.message
362370
err_path = err.absolute_path
363371
if err_path:
364-
if isinstance(err_path[-1], int):
365-
err_path[-1] = f"item {err_path[-1]}"
366-
err_str += f" at {'/'.join(err_path)}"
372+
# paths can look like, say, ["instructions", "objects", 0, "data_type"]
373+
# convert that '0' to "item 0" to be slightly more readable to users.
374+
# kind of a mouthful below, but does that conversion in place
375+
err_path = [f"item {elem}" if isinstance(elem, int) else elem for elem in err_path]
376+
prep = "for"
377+
if len(err_path) > 1:
378+
prep = "at"
379+
err_str += f" {prep} {'/'.join(err_path)}"
367380
errors.append(Error(ErrorType.PARSE_FAIL, err_str, spcsrc))
381+
if not errors:
382+
results = _process_dts_manifest(manifest_json, spcsrc)
383+
368384
except jsonschema.exceptions.SchemaError:
369385
return _error(Error(ErrorType.OTHER, "Manifest schema is invalid", spcsrc))
370386
except json.JSONDecodeError:
@@ -373,9 +389,44 @@ def parse_dts_manifest(path: Path, dts_manifest_schema: dict) -> ParseResults:
373389
return _error(Error(ErrorType.FILE_NOT_FOUND, source_1=spcsrc))
374390
except IsADirectoryError:
375391
return _error(Error(ErrorType.PARSE_FAIL, "The given path is a directory", spcsrc))
392+
except _ParseException as err:
393+
return _error(err.args[0])
376394
if errors:
377395
return ParseResults(errors=tuple(errors))
378396
elif results:
379397
return ParseResults(frozendict(results))
380398
else:
381399
return _error(Error(ErrorType.PARSE_FAIL, "No import specification data in file", spcsrc))
400+
401+
402+
def _process_dts_manifest(
403+
manifest: dict[str, Any], spcsrc: SpecificationSource
404+
) -> Tuple[dict[str, ParseResult]]:
405+
"""Parse the DTS manifest file and return the results and a list of errors if applicable.
406+
407+
Results are returned as a dictionary where keys are data types, and values are ParseResults for that data type.
408+
This assumes that the manifest has the correct structure, i.e. is validated via jsonschema.
409+
Will raise KeyErrors otherwise.
410+
"""
411+
results = {}
412+
instructions = manifest[_DTS_INSTRUCTIONS_KEY]
413+
# Make sure the protocol value matches.
414+
if instructions[_DTS_INSTRUCTIONS_PROTOCOL_KEY] != _DTS_INSTRUCTIONS_PROTOCOL:
415+
raise _ParseException(
416+
Error(
417+
ErrorType.PARSE_FAIL,
418+
f"The instructions protocol must be '{_DTS_INSTRUCTIONS_PROTOCOL}'",
419+
spcsrc,
420+
)
421+
)
422+
for resource_obj in instructions[_DTS_INSTRUCTIONS_OBJECTS_KEY]:
423+
datatype = resource_obj[_DTS_INSTRUCTIONS_DATATYPE_KEY]
424+
parameters = frozendict(resource_obj[_DTS_INSTRUCTIONS_PARAMETERS_KEY])
425+
if datatype not in results:
426+
results[datatype] = []
427+
results[datatype].append(parameters)
428+
# Package results as a dict of {datatype: ParseResult}
429+
parsed_result = {
430+
source: ParseResult(spcsrc, tuple(parsed)) for source, parsed in results.items()
431+
}
432+
return parsed_result
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
{
2+
"name": "manifest",
3+
"resources": [
4+
{
5+
"id": "JDP:555518eb0d8785178e712d88",
6+
"name": "61564.assembled",
7+
"path": "img/submissions/61564/61564.assembled.gff",
8+
"format": "gff",
9+
"media_type": "text/plain",
10+
"bytes": 455161,
11+
"hash": "",
12+
"credit": {
13+
"comment": "",
14+
"content_url": "",
15+
"contributors": null,
16+
"credit_metadata_source": "",
17+
"dates": null,
18+
"descriptions": null,
19+
"funding": null,
20+
"identifier": "JDP:555518eb0d8785178e712d88",
21+
"license": {
22+
"id": "",
23+
"url": ""
24+
},
25+
"publisher": {
26+
"organization_id": "",
27+
"organization_name": ""
28+
},
29+
"related_identifiers": null,
30+
"resource_type": "dataset",
31+
"titles": null,
32+
"url": "",
33+
"version": ""
34+
}
35+
},
36+
{
37+
"id": "JDP:555518eb0d8785178e712d84",
38+
"name": "61564.assembled",
39+
"path": "img/submissions/61564/61564.assembled.fna",
40+
"format": "fasta",
41+
"media_type": "text/plain",
42+
"bytes": 6354414,
43+
"hash": "",
44+
"credit": {
45+
"comment": "",
46+
"content_url": "",
47+
"contributors": null,
48+
"credit_metadata_source": "",
49+
"dates": null,
50+
"descriptions": null,
51+
"funding": null,
52+
"identifier": "JDP:555518eb0d8785178e712d84",
53+
"license": {
54+
"id": "",
55+
"url": ""
56+
},
57+
"publisher": {
58+
"organization_id": "",
59+
"organization_name": ""
60+
},
61+
"related_identifiers": null,
62+
"resource_type": "dataset",
63+
"titles": null,
64+
"url": "",
65+
"version": ""
66+
}
67+
}
68+
],
69+
"instructions": {
70+
"protocol": "KBase narrative import",
71+
"objects": [
72+
{
73+
"data_type": "gff_metagenome",
74+
"parameters": {
75+
"param1": "value1",
76+
"param2": "value2"
77+
}
78+
}
79+
]
80+
}
81+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{
2+
"name": "manifest",
3+
"resources": [
4+
{
5+
"id": "JDP:555518eb0d8785178e712d88",
6+
"name": "61564.assembled",
7+
"path": "img/submissions/61564/61564.assembled.gff",
8+
"format": "gff"
9+
},
10+
{
11+
"id": "JDP:555518eb0d8785178e712d84",
12+
"name": "61564.assembled",
13+
"path": "img/submissions/61564/61564.assembled.fna",
14+
"format": "fasta"
15+
},
16+
{
17+
"id": "JDP:555518ec0d8785178e712d9f",
18+
"name": "61567.assembled",
19+
"path": "img/submissions/61567/61567.assembled.gff",
20+
"format": "gff"
21+
},
22+
{
23+
"id": "JDP:555518ec0d8785178e712d9b",
24+
"name": "61567.assembled",
25+
"path": "img/submissions/61567/61567.assembled.fna",
26+
"format": "fasta"
27+
}
28+
],
29+
"instructions": {
30+
"protocol": "KBase narrative import",
31+
"objects": [
32+
{
33+
"data_type": "gff_metagenome",
34+
"parameters": {
35+
"mg_param1": "value1",
36+
"mg_param2": "value2"
37+
}
38+
},
39+
{
40+
"data_type": "gff_metagenome",
41+
"parameters": {
42+
"mg_param1": "value3",
43+
"mg_param2": "value4"
44+
}
45+
},
46+
{
47+
"data_type": "gff_genome",
48+
"parameters": {
49+
"gen_param1": "value1",
50+
"gen_param2": "value2"
51+
}
52+
},
53+
{
54+
"data_type": "gff_genome",
55+
"parameters": {
56+
"gen_param1": "value3",
57+
"gen_param2": "value4"
58+
}
59+
}
60+
]
61+
}
62+
}

tests/import_specifications/test_data/manifest_small.json

+22-17
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,6 @@
3131
"titles": null,
3232
"url": "",
3333
"version": ""
34-
},
35-
"instructions": {
36-
"data_type": "gff_metagenome",
37-
"parameters": {
38-
"param1": "value1",
39-
"param2": "value2"
40-
}
4134
}
4235
},
4336
{
@@ -70,13 +63,6 @@
7063
"titles": null,
7164
"url": "",
7265
"version": ""
73-
},
74-
"instructions": {
75-
"data_type": "gff_metagenome",
76-
"parameters": {
77-
"param1": "value1",
78-
"param2": "value2"
79-
}
8066
}
8167
},
8268
{
@@ -109,14 +95,33 @@
10995
"titles": null,
11096
"url": "",
11197
"version": ""
98+
}
99+
}
100+
],
101+
"instructions": {
102+
"protocol": "KBase narrative import",
103+
"objects": [
104+
{
105+
"data_type": "gff_metagenome",
106+
"parameters": {
107+
"param1": "value1",
108+
"param2": "value2"
109+
}
112110
},
113-
"instructions": {
111+
{
112+
"data_type": "gff_metagenome",
113+
"parameters": {
114+
"param1": "value1",
115+
"param2": "value2"
116+
}
117+
},
118+
{
114119
"data_type": "gff_metagenome",
115120
"parameters": {
116121
"param1": "value1",
117122
"param2": "value2"
118123
}
119124
}
120-
}
121-
]
125+
]
126+
}
122127
}

0 commit comments

Comments
 (0)