diff --git a/pyQuARC/code/schema_validator.py b/pyQuARC/code/schema_validator.py index c743741e..fcdc2078 100644 --- a/pyQuARC/code/schema_validator.py +++ b/pyQuARC/code/schema_validator.py @@ -136,11 +136,10 @@ def _build_errors(error_log, paths): # For DIF, because the namespace is specified in the metadata file, lxml library # provides field name concatenated with the namespace, # the following 3 lines of code removes the namespace - namespaces = re.findall(r"(\{http[^}]*\})", line) for namespace in namespaces: line = line.replace(namespace, "") - field_name = re.search(r"Element\s'(.*)':", line)[1] + field_name = re.search(r"Element\s'(.*)':", line)[1] field_paths = [abs_path for abs_path in paths if field_name in abs_path] field_name = field_paths[0] if len(field_paths) == 1 else field_name message = re.search(r"Element\s'.+':\s(\[.*\])?(.*)", line)[2].strip() diff --git a/pyQuARC/code/url_validator.py b/pyQuARC/code/url_validator.py index 55a74e61..9b6befd6 100644 --- a/pyQuARC/code/url_validator.py +++ b/pyQuARC/code/url_validator.py @@ -117,3 +117,14 @@ def doi_link_update(value, bad_urls): validity = False return {"valid": validity, "value": value} + + @staticmethod + @if_arg + def url_update_email_check(url, bad_urls): + validity = True + # Check if the URL matches 'support-cddis@earthdata.nasa.gov' + if url in bad_urls or url == "support-cddis@earthdata.nasa.gov": + # Update the URL + url = "support-cddis@nasa.gov" + validity = False # Mark as invalid if the URL was updated + return {"valid": validity, "value": url} diff --git a/pyQuARC/schemas/check_messages.json b/pyQuARC/schemas/check_messages.json index 444eb71e..4ee6cbea 100644 --- a/pyQuARC/schemas/check_messages.json +++ b/pyQuARC/schemas/check_messages.json @@ -47,6 +47,14 @@ }, "remediation": "This often indicates a broken link. If the URL is broken, recommend revising." }, + "url_update_email_check": { + "failure": "The listed email contact information must be updated.", + "help": { + "message": "Recommend providing the updated contact information as per the data product.", + "url": "https://wiki.earthdata.nasa.gov/display/CMR/Data+Center" + }, + "remediation": "Recommend changing the contact information to 'support-cddis@nasa.gov'. " + }, "shortname_uniqueness": { "failure": "The EntryTitle/DataSetId `{}` is identical to the ShortName `{}`.", "help": { diff --git a/pyQuARC/schemas/checks.json b/pyQuARC/schemas/checks.json index 778f4da3..d921ac28 100644 --- a/pyQuARC/schemas/checks.json +++ b/pyQuARC/schemas/checks.json @@ -24,6 +24,11 @@ "check_function": "health_and_status_check", "available": true }, + "url_update_email_check": { + "data_type": "url", + "check_function": "url_update_email_check", + "available": true + }, "string_compare": { "data_type": "string", "check_function": "compare", diff --git a/pyQuARC/schemas/rule_mapping.json b/pyQuARC/schemas/rule_mapping.json index 95ab58b5..3409ca63 100644 --- a/pyQuARC/schemas/rule_mapping.json +++ b/pyQuARC/schemas/rule_mapping.json @@ -5564,5 +5564,20 @@ }, "severity": "warning", "check_id": "one_item_presence_check" - } + }, + "url_update_email_check": { + "rule_name": "URL Email address check", + "fields_to_apply": { + "umm-c": [ + { + "fields": [ + "DataCenters/ContactGroups/ContactInformation/ContactMechanisms/Value", + "DataCenters/ContactGroups/ContactInformation/ContactInstruction" + ] + } + ] + }, + "severity": "info", + "check_id": "url_update_email_check" +} } \ No newline at end of file diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 5a6ca777..ca1762c8 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -9,11 +9,11 @@ class TestDownloader: def setup_method(self): self.concept_ids = { "collection": { - "real": "C1000000010-CDDIS", + "real": "C1000000042-CDDIS", "dummy": "C123456-LPDAAC_ECS", }, "granule": { - "real": "G1001434969-CDDIS", + "real": "G1018577631-CDDIS", "dummy": "G1000000002-CMR_PROV", }, "invalid": "asdfasdf",