From 9c68cc0277f7077a9ad14929da8bdd1ee9892525 Mon Sep 17 00:00:00 2001 From: aschroed Date: Thu, 18 Apr 2024 14:09:56 -0400 Subject: [PATCH 1/3] Fix of bug I introduced that skipped validation error reporting --- tests/test_import_data.py | 57 ++++++++++++++++++++++++++++++------ wranglertools/import_data.py | 9 +++--- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/tests/test_import_data.py b/tests/test_import_data.py index 4f88a228..cfe8e105 100644 --- a/tests/test_import_data.py +++ b/tests/test_import_data.py @@ -280,14 +280,15 @@ def test_combine_set_expsets_with_existing(): def test_error_report(connection_mock): - # There are x errors, x of them are legit, need to be checked against the all aliases list, and excluded + # There are 6 errors in the err_dict, 5 of them are legit, 1 is checked against the all aliases list, and excluded err_dict = { "title": "Unprocessable Entity", "status": "error", "errors": [ {"location": "body", "description": "Test for error with no name"}, - {"name": "Schema: ", "location": "body", + {# This one should be excluded from report as this alias is in the sheet alias list + "name": "Schema: ", "location": "body", "description": "Unable to resolve link: siyuan-wang-lab:region_1MB_TAD_1"}, {"name": "Schema: ", "location": "body", "description": "Unable to resolve link: siyuan-wang-lab:region_5MB_TAD_2"}, @@ -307,14 +308,52 @@ def test_error_report(connection_mock): rep = imp.error_report(err_dict, "Vendor", ['dcic:insituhicagar', 'siyuan-wang-lab:region_1MB_TAD_1'], connection_mock) message = ''' ERROR vendor Test for error with no name -ERROR vendor Field 'Schema: ': Unable to resolve link: siyuan-wang-lab:region_5MB_TAD_2 -ERROR vendor Field 'Schema: genome_location.1': 'siyuan-wang-lab:region_5MB_TAD_2' not found -ERROR vendor Field 'age': 'at' is not of type 'number' -ERROR vendor Field 'sex': 'green' is not one of ['male', 'female', 'unknown', 'mixed'] +ERROR vendor Field 'Schema: ': Unable to resolve link: siyuan-wang-lab:region_5MB_TAD_2 +ERROR vendor Field 'Schema: genome_location.1': 'siyuan-wang-lab:region_5MB_TAD_2' not found +ERROR vendor Field 'age': 'at' is not of type 'number' +ERROR vendor Field 'sex': 'green' is not one of ['male', 'female', 'unknown', 'mixed'] +''' + assert rep.strip() == message.strip() + + +def test_error_with_alias_param_report(connection_mock): + # There are 6 errors in the err_dict, 5 of them are legit, 1 is checked against the all aliases list, and excluded + err_dict = { + "title": "Unprocessable Entity", + "status": "error", + "errors": [ + {"location": "body", + "description": "Test for error with no name"}, + {# This one should be excluded from report as this alias is in the sheet alias list + "name": "Schema: ", "location": "body", + "description": "Unable to resolve link: siyuan-wang-lab:region_1MB_TAD_1"}, + {"name": "Schema: ", "location": "body", + "description": "Unable to resolve link: siyuan-wang-lab:region_5MB_TAD_2"}, + {"location": "body", "name": "Schema: genome_location.1", + "description": "'siyuan-wang-lab:region_5MB_TAD_2' not found"}, + {"name": "protocol_documents", + "description": "'dcic:insituhicagar' not found", "location": "body"}, + {"name": "age", + "description": "'at' is not of type 'number'", "location": "body"}, + {"name": "sex", + "description": "'green' is not one of ['male', 'female', 'unknown', 'mixed']", "location": "body"} + ], + "code": 422, + "@type": ["ValidationFailure", "Error"], + "description": "Failed validation" + } + rep = imp.error_report(err_dict, "Vendor", ['dcic:insituhicagar', 'siyuan-wang-lab:region_1MB_TAD_1'], connection_mock, error_id='dcic:insituhicagar') + message = ''' +ERROR vendor Test for error with no name +ERROR vendor dcic:insituhicagar Field 'Schema: ': Unable to resolve link: siyuan-wang-lab:region_5MB_TAD_2 +ERROR vendor dcic:insituhicagar Field 'Schema: genome_location.1': 'siyuan-wang-lab:region_5MB_TAD_2' not found +ERROR vendor dcic:insituhicagar Field 'age': 'at' is not of type 'number' +ERROR vendor dcic:insituhicagar Field 'sex': 'green' is not one of ['male', 'female', 'unknown', 'mixed'] ''' assert rep.strip() == message.strip() + def test_error_conflict_report(connection_mock): # There is one conflict error err_dict = {"title": "Conflict", @@ -485,7 +524,7 @@ def test_workbook_reader_update_new_file_fastq_post_and_file_upload(capsys, mock all_aliases, dict_load, dict_rep, dict_set, True, []) args = imp.ff_utils.post_metadata.call_args out = capsys.readouterr()[0] - outlist = [i.strip() for i in out.split('\n') if i is not ""] + outlist = [i.strip() for i in out.split('\n') if i != ""] post_json_arg = args[0][0] assert post_json_arg['md5sum'] == '8f8cc612e5b2d25c52b1d29017e38f2b' assert message0 == outlist[0] @@ -533,7 +572,7 @@ def test_workbook_reader_patch_file_meta_and_file_upload(capsys, mocker, connect assert updated_post['@graph'][0]['upload_credentials'] == 'new_creds' # check for output message out = capsys.readouterr()[0] - outlist = [i.strip() for i in out.split('\n') if i is not ""] + outlist = [i.strip() for i in out.split('\n') if i != ""] assert message0 == outlist[0] assert message1 == outlist[1] @@ -681,7 +720,7 @@ def test_order_sorter(capsys): message1 = '''WARNING! Check the sheet names and the reference list "sheet_order"''' assert ordered_list == imp.order_sorter(test_list) out = capsys.readouterr()[0] - outlist = [i.strip() for i in out.split('\n') if i is not ""] + outlist = [i.strip() for i in out.split('\n') if i != ""] import sys if (sys.version_info > (3, 0)): assert message0 in outlist[0] diff --git a/wranglertools/import_data.py b/wranglertools/import_data.py index 8dc5ba3c..c1d21fd4 100755 --- a/wranglertools/import_data.py +++ b/wranglertools/import_data.py @@ -793,9 +793,8 @@ def error_report(error_dic, sheet, all_aliases, connection, error_id=''): nf_txt = 'not found' not_found = None alias_bit = None - if error_id: - alias_bit = error_id - elif utrl_txt in error_description: + #import pdb; pdb.set_trace() + if utrl_txt in error_description: alias_bit = error_description.replace(utrl_txt, '') elif error_description.endswith(nf_txt): alias_bit = error_description.replace(nf_txt, '').replace("'", '') @@ -805,8 +804,8 @@ def error_report(error_dic, sheet, all_aliases, connection, error_id=''): if not_found and not_found in all_aliases: continue error_field = err['name'] - report.append("{sheet:<30}Field '{er}': {des}" - .format(er=error_field, des=error_description, sheet="ERROR " + sheet.lower())) + report.append("{sheet:<30}{eid} Field '{er}': {des}" + .format(er=error_field, des=error_description, eid=error_id, sheet="ERROR " + sheet.lower())) # if there is a an access forbidden error elif error_dic.get('title') == 'Forbidden': error_description = error_dic['description'] From ee5f15cc3025a8ce2298417b1125f46ec8860b3e Mon Sep 17 00:00:00 2001 From: aschroed Date: Thu, 18 Apr 2024 14:15:10 -0400 Subject: [PATCH 2/3] version bump and update changelog --- CHANGELOG.rst | 8 ++++++++ pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 67bf3d4e..102898bb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,14 @@ Submit4DN Change Log ---------- +4.0.2 +===== + +`PR 175: error reporting bug fix `_ + +* bug fix of bug that did not properly report validation errors +* added a test for case where an alias is passed to error_report function + 4.0.1 ===== diff --git a/pyproject.toml b/pyproject.toml index 01f7185d..8a4a6807 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "Submit4DN" -version = "4.0.1" +version = "4.0.2" description = "Utility package for submitting data to the 4DN Data Portal" authors = ["4DN-DCIC Team "] license = "MIT" From 067cfcea05d87c06e3a1153b4eb08463053c2ae2 Mon Sep 17 00:00:00 2001 From: aschroed Date: Thu, 18 Apr 2024 14:17:35 -0400 Subject: [PATCH 3/3] removed commented out pdb --- wranglertools/import_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wranglertools/import_data.py b/wranglertools/import_data.py index c1d21fd4..7eaaae51 100755 --- a/wranglertools/import_data.py +++ b/wranglertools/import_data.py @@ -793,7 +793,6 @@ def error_report(error_dic, sheet, all_aliases, connection, error_id=''): nf_txt = 'not found' not_found = None alias_bit = None - #import pdb; pdb.set_trace() if utrl_txt in error_description: alias_bit = error_description.replace(utrl_txt, '') elif error_description.endswith(nf_txt):