diff --git a/pyQuARC/main.py b/pyQuARC/main.py index 6995b50c..49458433 100644 --- a/pyQuARC/main.py +++ b/pyQuARC/main.py @@ -138,6 +138,35 @@ def _cmr_query(self): query = f"{orig_query}&page_num={page_num}" return concept_ids + + + def _get_collection_version(self, concept_id): + """ + Fetches collection information from CMR for a given concept_id. + Args: + concept_id (str): The concept ID to query. + + Returns: + dict: {"revision_id": str | None, "metadata_version": str | None } A dict of Revision ID and Metadata Version of the collection. + """ + failure_return_value = {"revision_id": None, "metadata_version": None} + try: + url = f"{self.cmr_host}/search/concepts/{concept_id}.umm_json" + headers = get_headers() + response = requests.get(url, headers=headers) + response.raise_for_status() + + data = response.json() if response.content else {} + return { + "revision_id": response.headers.get("CMR-Revision-Id"), + "metadata_version": data.get("MetadataSpecification", {}).get("Version"), + } + + except Exception as e: + # Unified error handling — return dict even on failure + print(f"Error fetching collection info for {concept_id}: {str(e)}") + return failure_return_value + def _validate_with_cmr(self, concept_id, metadata_content): """ @@ -181,8 +210,17 @@ def validate(self): if self.concept_ids: for concept_id in tqdm(self.concept_ids): + # If no version specified, get the latest version + # Get both revision and metadata version in one call + info = self._get_collection_version(concept_id) + version_to_use = self.version or info["revision_id"] + + metadata_version = info["metadata_version"] + if metadata_version: + print(f"Collection {concept_id} schema version: {metadata_version}") + downloader = Downloader( - concept_id, self.metadata_format, self.version, self.cmr_host + concept_id, self.metadata_format, version_to_use, self.cmr_host ) if not (content := downloader.download()): self.errors.append( @@ -193,6 +231,7 @@ def validate(self): } ) continue + content = content.encode() cmr_response = self._validate_with_cmr(concept_id, content) validation_errors, pyquarc_errors = checker.run(content) @@ -220,8 +259,10 @@ def validate(self): "pyquarc_errors": pyquarc_errors, } ) + return self.errors + @staticmethod def _error_message(messages): severities = ["error", "warning", "info"] @@ -287,7 +328,10 @@ def display_results(self): f"\n\t {COLOR['title']}{COLOR['bright']} pyQuARC ERRORS: {END}\n" ) for error in pyquarc_errors: - error_prompt += f"\t\t ERROR: {error['type']}. Details: {error['details']} \n" + error_prompt += ( + f"\t\t ERROR: {error.get('message', 'No message available')} \n" + f"\t\t DETAILS: {error.get('details', 'No details available')} \n" + ) if cmr_validation := error.get("cmr_validation"): cmr_error_msg = self._format_cmr_error(cmr_validation) @@ -388,3 +432,4 @@ def display_results(self): ) results = arc.validate() arc.display_results() + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 30aec17c..6432dc89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ colorama==0.4.4 idna==2.10 jsonschema==4.17.3 -lxml==4.9.1 +lxml==5.3.0 #4.9.1 pytest==5.4.3 pytz==2020.1 requests==2.24.0