diff --git a/deployment/conf/deployment.cfg b/deployment/conf/deployment.cfg index b6622c58..76d86cc7 100644 --- a/deployment/conf/deployment.cfg +++ b/deployment/conf/deployment.cfg @@ -4,4 +4,4 @@ DATA_DIR = /kb/deployment/lib/src/data/bulk/ AUTH_URL = https://ci.kbase.us/services/auth/api/V2/token CONCIERGE_PATH = /kbaseconcierge FILE_EXTENSION_MAPPINGS = /kb/deployment/conf/supported_apps_w_extensions.json -DTS_MANIFEST_SCHEMA = /kb/deployment/import_specifications/schema/dts_manifest_schema.json +DTS_MANIFEST_SCHEMA = /kb/module/import_specifications/schema/dts_manifest_schema.json diff --git a/staging_service/import_specifications/individual_parsers.py b/staging_service/import_specifications/individual_parsers.py index e0379caf..b13aba62 100644 --- a/staging_service/import_specifications/individual_parsers.py +++ b/staging_service/import_specifications/individual_parsers.py @@ -2,6 +2,7 @@ Contains parser functions for use with the file parser framework. """ +from collections import defaultdict import csv import json import math @@ -60,6 +61,11 @@ "null", ] +_DTS_INSTRUCTIONS_KEY = "instructions" +_DTS_INSTRUCTIONS_DATATYPE_KEY = "data_type" +_DTS_INSTRUCTIONS_PARAMETERS_KEY = "parameters" +_DTS_INSTRUCTIONS_OBJECTS_KEY = "objects" + class _ParseException(Exception): pass @@ -351,15 +357,27 @@ def parse_dts_manifest(path: Path, validator: Draft202012Validator) -> ParseResu results = {} try: with open(path, "r") as manifest: - manifest_json = json.load(manifest) - if not isinstance(manifest_json, dict): + manifest = json.load(manifest) + if not isinstance(manifest, dict): return _error(Error(ErrorType.PARSE_FAIL, "Manifest is not a dictionary", spcsrc)) - for err in validator.iter_errors(manifest_json): + for err in validator.iter_errors(manifest): err_str = err.message err_path = list(err.absolute_path) if err_path: err_str += f" at {err_path}" errors.append(Error(ErrorType.PARSE_FAIL, err_str, spcsrc)) + if not errors: + results = defaultdict(list) + instructions = manifest[_DTS_INSTRUCTIONS_KEY] + for resource_obj in instructions[_DTS_INSTRUCTIONS_OBJECTS_KEY]: + datatype = resource_obj[_DTS_INSTRUCTIONS_DATATYPE_KEY] + parameters = frozendict(resource_obj[_DTS_INSTRUCTIONS_PARAMETERS_KEY]) + results[datatype].append(parameters) + # Re-package results as a dict of {datatype: ParseResult} + results = { + datatype: ParseResult(spcsrc, tuple(paramlist)) + for datatype, paramlist in results.items() + } except json.JSONDecodeError: return _error(Error(ErrorType.PARSE_FAIL, "File must be in JSON format", spcsrc)) except FileNotFoundError: diff --git a/tests/import_specifications/test_data/manifest_errors.json b/tests/import_specifications/test_data/manifest_errors.json new file mode 100644 index 00000000..5960b51d --- /dev/null +++ b/tests/import_specifications/test_data/manifest_errors.json @@ -0,0 +1,81 @@ +{ + "name": "manifest", + "resources": [ + { + "id": "JDP:555518eb0d8785178e712d88", + "name": "61564.assembled", + "path": "img/submissions/61564/61564.assembled.gff", + "format": "gff", + "media_type": "text/plain", + "bytes": 455161, + "hash": "", + "credit": { + "comment": "", + "content_url": "", + "contributors": null, + "credit_metadata_source": "", + "dates": null, + "descriptions": null, + "funding": null, + "identifier": "JDP:555518eb0d8785178e712d88", + "license": { + "id": "", + "url": "" + }, + "publisher": { + "organization_id": "", + "organization_name": "" + }, + "related_identifiers": null, + "resource_type": "dataset", + "titles": null, + "url": "", + "version": "" + } + }, + { + "id": "JDP:555518eb0d8785178e712d84", + "name": "61564.assembled", + "path": "img/submissions/61564/61564.assembled.fna", + "format": "fasta", + "media_type": "text/plain", + "bytes": 6354414, + "hash": "", + "credit": { + "comment": "", + "content_url": "", + "contributors": null, + "credit_metadata_source": "", + "dates": null, + "descriptions": null, + "funding": null, + "identifier": "JDP:555518eb0d8785178e712d84", + "license": { + "id": "", + "url": "" + }, + "publisher": { + "organization_id": "", + "organization_name": "" + }, + "related_identifiers": null, + "resource_type": "dataset", + "titles": null, + "url": "", + "version": "" + } + } + ], + "instructions": { + "protocol": "KBase narrative import", + "objects": [ + { + "data_type": "gff_metagenome", + "parameters": { + "param1": "value1", + "param2": "value2" + } + } + ] + } + } diff --git a/tests/import_specifications/test_data/manifest_multiple.json b/tests/import_specifications/test_data/manifest_multiple.json new file mode 100644 index 00000000..393bfb84 --- /dev/null +++ b/tests/import_specifications/test_data/manifest_multiple.json @@ -0,0 +1,62 @@ +{ + "name": "manifest", + "resources": [ + { + "id": "JDP:555518eb0d8785178e712d88", + "name": "61564.assembled", + "path": "img/submissions/61564/61564.assembled.gff", + "format": "gff" + }, + { + "id": "JDP:555518eb0d8785178e712d84", + "name": "61564.assembled", + "path": "img/submissions/61564/61564.assembled.fna", + "format": "fasta" + }, + { + "id": "JDP:555518ec0d8785178e712d9f", + "name": "61567.assembled", + "path": "img/submissions/61567/61567.assembled.gff", + "format": "gff" + }, + { + "id": "JDP:555518ec0d8785178e712d9b", + "name": "61567.assembled", + "path": "img/submissions/61567/61567.assembled.fna", + "format": "fasta" + } + ], + "instructions": { + "protocol": "KBase narrative import", + "objects": [ + { + "data_type": "gff_metagenome", + "parameters": { + "mg_param1": "value1", + "mg_param2": "value2" + } + }, + { + "data_type": "gff_metagenome", + "parameters": { + "mg_param1": "value3", + "mg_param2": "value4" + } + }, + { + "data_type": "gff_genome", + "parameters": { + "gen_param1": "value1", + "gen_param2": "value2" + } + }, + { + "data_type": "gff_genome", + "parameters": { + "gen_param1": "value3", + "gen_param2": "value4" + } + } + ] + } + } diff --git a/tests/import_specifications/test_data/manifest_small.json b/tests/import_specifications/test_data/manifest_small.json index e87472d5..54a102f8 100644 --- a/tests/import_specifications/test_data/manifest_small.json +++ b/tests/import_specifications/test_data/manifest_small.json @@ -31,13 +31,6 @@ "titles": null, "url": "", "version": "" - }, - "instructions": { - "data_type": "gff_metagenome", - "parameters": { - "param1": "value1", - "param2": "value2" - } } }, { @@ -70,13 +63,6 @@ "titles": null, "url": "", "version": "" - }, - "instructions": { - "data_type": "gff_metagenome", - "parameters": { - "param1": "value1", - "param2": "value2" - } } }, { @@ -109,14 +95,33 @@ "titles": null, "url": "", "version": "" - }, - "instructions": { + } + } + ], + "instructions": { + "protocol": "KBase narrative import", + "objects": [ + { "data_type": "gff_metagenome", "parameters": { "param1": "value1", "param2": "value2" } + }, + { + "data_type": "gff_metagenome", + "parameters": { + "param1": "value3", + "param2": "value4" + } + }, + { + "data_type": "gff_metagenome", + "parameters": { + "param1": "value5", + "param2": "value6" + } } - } - ] + ] + } } diff --git a/tests/import_specifications/test_individual_parsers.py b/tests/import_specifications/test_individual_parsers.py index 8c6c3af4..61232d9f 100644 --- a/tests/import_specifications/test_individual_parsers.py +++ b/tests/import_specifications/test_individual_parsers.py @@ -24,6 +24,7 @@ _TEST_DATA_DIR = (Path(__file__).parent / "test_data").resolve() +_DTS_INSTRUCTIONS_PROTOCOL = "KBase narrative import" @pytest.fixture(scope="module", name="temp_dir") @@ -779,22 +780,6 @@ def test_excel_parse_fail_unequal_rows(): ) -def test_dts_manifest_parse_success(dts_validator: Draft202012Validator): - f = _get_test_file("manifest_small.json") - res = parse_dts_manifest(f, dts_validator) - # fails for now - assert res.results is None - assert res.errors == tuple( - [ - Error( - ErrorType.PARSE_FAIL, - "'instructions' is a required property", - SpecificationSource(f), - ) - ] - ) - - @pytest.fixture(scope="module") def write_dts_manifest(temp_dir: Generator[Path, None, None]) -> Callable[[dict | list], Path]: def manifest_writer(input_json: dict | list) -> Path: @@ -806,6 +791,52 @@ def manifest_writer(input_json: dict | list) -> Path: return manifest_writer +def test_dts_manifest_parse_success(dts_validator: Draft202012Validator): + f = _get_test_file("manifest_small.json") + res = parse_dts_manifest(f, dts_validator) + assert res.results + assert res.errors is None + assert list(res.results.keys()) == ["gff_metagenome"] + assert res.results["gff_metagenome"].source.file == f + assert res.results["gff_metagenome"].result == ( + frozendict({"param1": "value1", "param2": "value2"}), + frozendict({"param1": "value3", "param2": "value4"}), + frozendict({"param1": "value5", "param2": "value6"}), + ) + + +def test_dts_manifest_parse_multi_data_types_success(dts_validator: Draft202012Validator): + f = _get_test_file("manifest_multiple.json") + res = parse_dts_manifest(f, dts_validator) + assert res.results + assert res.errors is None + assert len(res.results.keys()) == 2 + expected = { + "gff_metagenome": ( + frozendict({"mg_param1": "value1", "mg_param2": "value2"}), + frozendict({"mg_param1": "value3", "mg_param2": "value4"}), + ), + "gff_genome": ( + frozendict( + { + "gen_param1": "value1", + "gen_param2": "value2", + } + ), + frozendict( + { + "gen_param1": "value3", + "gen_param2": "value4", + } + ), + ), + } + for key in expected.keys(): + assert key in res.results + assert res.results[key].source.file == f + assert res.results[key].result == expected[key] + + def _dts_manifest_parse_fail( input_file: Path, validator: Draft202012Validator, errors: list[Error] ): @@ -912,7 +943,7 @@ def test_dts_manifest_fail_with_path( { "resources": [], "instructions": { - "protocol": "KBase narrative import", + "protocol": _DTS_INSTRUCTIONS_PROTOCOL, "objects": [{"data_type": "foo", "parameters": {}}, {"parameters": {}}], }, } @@ -928,3 +959,243 @@ def test_dts_manifest_fail_with_path( ) ], ) + + +malformed_dict = [[], 1, "nope", None] + + +@pytest.mark.parametrize("bad_instruction", malformed_dict) +def test_dts_manifest_malformed_instructions( + write_dts_manifest: Callable[[dict | list], Path], + dts_validator: Draft202012Validator, + bad_instruction: list | int | str | None, +): + manifest_file = write_dts_manifest({"resources": [], "instructions": bad_instruction}) + err_val = bad_instruction + if isinstance(err_val, str): + err_val = f"'{err_val}'" + _dts_manifest_parse_fail( + manifest_file, + dts_validator, + [ + Error( + ErrorType.PARSE_FAIL, + f"{err_val} is not of type 'object' at ['instructions']", + SpecificationSource(manifest_file), + ) + ], + ) + + +@pytest.mark.parametrize("bad_parameters", malformed_dict) +def test_dts_manifest_malformed_parameters( + write_dts_manifest: Callable[[dict | list], Path], + dts_validator: Draft202012Validator, + bad_parameters: list | int | str | None, +): + manifest_file = write_dts_manifest( + { + "resources": [], + "instructions": { + "protocol": _DTS_INSTRUCTIONS_PROTOCOL, + "objects": [{"data_type": "some_type", "parameters": bad_parameters}], + }, + } + ) + err_val = bad_parameters + if isinstance(err_val, str): + err_val = f"'{err_val}'" + _dts_manifest_parse_fail( + manifest_file, + dts_validator, + [ + Error( + ErrorType.PARSE_FAIL, + f"{err_val} is not of type 'object' at ['instructions', 'objects', 0, 'parameters']", + SpecificationSource(manifest_file), + ) + ], + ) + + +missing_key_cases = [["data_type"], ["parameters"], ["data_type", "parameters"]] + + +@pytest.mark.parametrize("missing_keys", missing_key_cases) +def test_dts_manifest_missing_instruction_keys( + write_dts_manifest: Callable[[dict | list], Path], + dts_validator: Draft202012Validator, + missing_keys: list[str], +): + resource_obj = {"data_type": "some_type", "parameters": {"p1": "v1"}} + for key in missing_keys: + del resource_obj[key] + manifest_file = write_dts_manifest( + { + "resources": [], + "instructions": {"protocol": _DTS_INSTRUCTIONS_PROTOCOL, "objects": [resource_obj]}, + } + ) + error_list = [] + for key in missing_keys: + error_list.append( + Error( + ErrorType.PARSE_FAIL, + f"'{key}' is a required property at ['instructions', 'objects', 0]", + SpecificationSource(manifest_file), + ) + ) + _dts_manifest_parse_fail(manifest_file, dts_validator, error_list) + + +def test_dts_manifest_empty( + write_dts_manifest: Callable[[dict | list], Path], dts_validator: Draft202012Validator +): + manifest_file = write_dts_manifest( + {"resources": [], "instructions": {"protocol": _DTS_INSTRUCTIONS_PROTOCOL, "objects": []}} + ) + _dts_manifest_parse_fail( + manifest_file, + dts_validator, + [ + Error( + ErrorType.PARSE_FAIL, + "No import specification data in file", + SpecificationSource(manifest_file), + ) + ], + ) + + +@pytest.mark.parametrize("non_str", [{"a": "b"}, ["a", "b"], 1, None]) +def test_dts_manifest_fail_data_type_not_str( + write_dts_manifest: Callable[[dict | list], Path], + dts_validator: Draft202012Validator, + non_str: dict | list | int | None, +): + manifest_file = write_dts_manifest( + { + "resources": [], + "instructions": { + "protocol": _DTS_INSTRUCTIONS_PROTOCOL, + "objects": [{"data_type": non_str, "parameters": {}}], + }, + } + ) + _dts_manifest_parse_fail( + manifest_file, + dts_validator, + [ + Error( + ErrorType.PARSE_FAIL, + f"{non_str} is not of type 'string' at ['instructions', 'objects', 0, 'data_type']", + SpecificationSource(manifest_file), + ) + ], + ) + + +def test_dts_manifest_missing_instructions_protocol( + write_dts_manifest: Callable[[dict | list], Path], dts_validator: Draft202012Validator +): + manifest_file = write_dts_manifest({"resources": [], "instructions": {"objects": []}}) + _dts_manifest_parse_fail( + manifest_file, + dts_validator, + [ + Error( + ErrorType.PARSE_FAIL, + "'protocol' is a required property at ['instructions']", + SpecificationSource(manifest_file), + ) + ], + ) + + +def test_dts_manifest_wrong_protocol( + write_dts_manifest: Callable[[dict | list], Path], dts_validator: Draft202012Validator +): + manifest_file = write_dts_manifest( + {"resources": [], "instructions": {"protocol": "some wrong protocol", "objects": []}} + ) + _dts_manifest_parse_fail( + manifest_file, + dts_validator, + [ + Error( + ErrorType.PARSE_FAIL, + f"'{_DTS_INSTRUCTIONS_PROTOCOL}' was expected at ['instructions', 'protocol']", + SpecificationSource(manifest_file), + ) + ], + ) + + +def test_dts_manifest_missing_objects( + write_dts_manifest: Callable[[dict | list], Path], dts_validator: Draft202012Validator +): + manifest_file = write_dts_manifest( + {"resources": [], "instructions": {"protocol": _DTS_INSTRUCTIONS_PROTOCOL}} + ) + _dts_manifest_parse_fail( + manifest_file, + dts_validator, + [ + Error( + ErrorType.PARSE_FAIL, + "'objects' is a required property at ['instructions']", + SpecificationSource(manifest_file), + ) + ], + ) + + +@pytest.mark.parametrize("not_dict", malformed_dict) +def test_dts_manifest_resource_not_dict( + write_dts_manifest: Callable[[dict | list], Path], dts_validator: Draft202012Validator, not_dict +): + manifest_file = write_dts_manifest( + { + "resources": [], + "instructions": {"protocol": _DTS_INSTRUCTIONS_PROTOCOL, "objects": [not_dict]}, + } + ) + err_val = not_dict + if isinstance(err_val, str): + err_val = f"'{err_val}'" + _dts_manifest_parse_fail( + manifest_file, + dts_validator, + [ + Error( + ErrorType.PARSE_FAIL, + f"{err_val} is not of type 'object' at ['instructions', 'objects', 0]", + SpecificationSource(manifest_file), + ) + ], + ) + + +def test_dts_manifest_parameter_not_primitive( + write_dts_manifest: Callable[[dict | list], Path], dts_validator: Draft202012Validator +): + manifest_file = write_dts_manifest( + { + "resources": [], + "instructions": { + "protocol": _DTS_INSTRUCTIONS_PROTOCOL, + "objects": [{"data_type": "some_type", "parameters": {"foo": ["not", "allowed"]}}], + }, + } + ) + _dts_manifest_parse_fail( + manifest_file, + dts_validator, + [ + Error( + ErrorType.PARSE_FAIL, + "['not', 'allowed'] is not valid under any of the given schemas at ['instructions', 'objects', 0, 'parameters', 'foo']", + SpecificationSource(manifest_file), + ) + ], + ) diff --git a/tests/test_app.py b/tests/test_app.py index 1f78e849..b18bf9d8 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1117,41 +1117,31 @@ async def test_bulk_specification_dts_success(): f"bulk_specification/?files={sub_dir}/{manifest_1} , {sub_dir}/{manifest_2}&dts" ) jsn = await resp.json() - # fails for now. will update when schema/parser is properly finished. assert jsn == { - "errors": [ - { - "type": "cannot_parse_file", - "file": f"{dts_dir}/{manifest_1}", - "message": "No import specification data in file", - "tab": None, - }, - { - "type": "cannot_parse_file", - "file": f"{dts_dir}/{manifest_2}", - "message": "No import specification data in file", - "tab": None, - }, - ] + "types": { + "gff_genome": [ + { + "fasta_file": "g_fasta_1", + "gff_file": "g_gff_1", + "genome_name": "genome_1", + }, + { + "fasta_file": "g_fasta_2", + "gff_file": "g_gff_2", + "genome_name": "genome_2", + }, + ], + "gff_metagenome": [ + {"fasta_file": "fasta_1", "gff_file": "gff_1", "genome_name": "mg_1"}, + {"fasta_file": "fasta_2", "gff_file": "gff_2", "genome_name": "mg_2"}, + ], + }, + "files": { + "gff_metagenome": {"file": f"testuser/{sub_dir}/{manifest_1}", "tab": None}, + "gff_genome": {"file": f"testuser/{sub_dir}/{manifest_2}", "tab": None}, + }, } - # soon will be this: - # assert json == { - # "types": { - # "gff_genome": [ - # {"fasta_file": "g_fasta_1", "gff_file": "g_gff_1", "genome_name": "genome_1"}, - # {"fasta_file": "g_fasta_2", "gff_file": "g_gff_2", "genome_name": "genome_2"} - # ], - # "gff_metagenome": [ - # {"fasta_file": "fasta_1", "gff_file": "gff_1", "genome_name": "mg_1"}, - # {"fasta_file": "fasta_2", "gff_file": "gff_2", "genome_name": "mg_2"} - # ] - # }, - # "files": { - # "gff_metagenome": {"file": f"testuser/{manifest_1}", "tab": None}, - # "gff_genome": {"file": f"testuser/{manifest_2}", "tab": None} - # } - # } - assert resp.status == 400 + assert resp.status == 200 async def test_bulk_specification_dts_fail_json_without_dts():