Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 53 additions & 8 deletions pyQuARC/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,39 @@ def _cmr_query(self):
query = f"{orig_query}&page_num={page_num}"

return concept_ids


def _get_collection_version(self, concept_id):
"""
Fetches collection information from CMR for a given concept_id.
Args:
concept_id (str): The concept ID to query.
info_type (str): Type of information to fetch.
Options: "revision" or "metadata_version".

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think this is implemented??

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is only the metadata version, and not the revision.

Returns:
str: The requested info (revision ID or MetadataSpecification.Version), or None if not found.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
str: The requested info (revision ID or MetadataSpecification.Version), or None if not found.
dict: {"revision_id": str | None, "metadata_version": str | None } A dict of Revision ID and Metadata Version of the collection

"""
try:
url = f"{self.cmr_host}/search/concepts/{concept_id}.umm_json"
headers = get_headers()
response = requests.get(url, headers=headers)

if response.status_code != 200:
print(f"Warning: Could not fetch data for {concept_id}. Status: {response.status_code}")
return {"revision_id": None, "metadata_version": None}

data = response.json() if response.content else {}
return {
"revision_id": response.headers.get("CMR-Revision-Id"),
"metadata_version": data.get("MetadataSpecification", {}).get("Version"),
}

except Exception as e:
# Unified error handling — return dict even on failure
print(f"Error fetching collection info for {concept_id}: {str(e)}")
return {"revision_id": None, "metadata_version": None}


def _validate_with_cmr(self, concept_id, metadata_content):
"""
Expand Down Expand Up @@ -181,8 +214,19 @@ def validate(self):

if self.concept_ids:
for concept_id in tqdm(self.concept_ids):
# If no version specified, get the latest version
# Get both revision and metadata version in one call
info = self._get_collection_version(concept_id)
version_to_use = self.version or info["revision_id"]
metadata_version = info["metadata_version"]

if version_to_use:
print(f"Using latest revision {version_to_use} for {concept_id}")
if metadata_version:
print(f"Collection {concept_id} schema version: {metadata_version}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove these print statements


downloader = Downloader(
concept_id, self.metadata_format, self.version, self.cmr_host
concept_id, self.metadata_format, version_to_use, self.cmr_host
)
if not (content := downloader.download()):
self.errors.append(
Expand All @@ -193,18 +237,13 @@ def validate(self):
}
)
continue

content = content.encode()
cmr_response = self._validate_with_cmr(concept_id, content)
validation_errors, pyquarc_errors = checker.run(content)
self.errors.append(
{
"concept_id": concept_id,
"errors": validation_errors,
"cmr_validation": {
"errors": cmr_response.json().get("errors", []),
# TODO: show warnings
"warnings": cmr_response.json().get("warnings", [])
},
"pyquarc_errors": pyquarc_errors,
}
)
Expand All @@ -220,8 +259,10 @@ def validate(self):
"pyquarc_errors": pyquarc_errors,
}
)

return self.errors


@staticmethod
def _error_message(messages):
severities = ["error", "warning", "info"]
Expand Down Expand Up @@ -287,7 +328,10 @@ def display_results(self):
f"\n\t {COLOR['title']}{COLOR['bright']} pyQuARC ERRORS: {END}\n"
)
for error in pyquarc_errors:
error_prompt += f"\t\t ERROR: {error['type']}. Details: {error['details']} \n"
error_prompt += (
f"\t\t ERROR: {error.get('message', 'No message available')} \n"
f"\t\t DETAILS: {error.get('details', 'No details available')} \n"
)

if cmr_validation := error.get("cmr_validation"):
cmr_error_msg = self._format_cmr_error(cmr_validation)
Expand Down Expand Up @@ -388,3 +432,4 @@ def display_results(self):
)
results = arc.validate()
arc.display_results()

2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
colorama==0.4.4
idna==2.10
jsonschema==4.17.3
lxml==4.9.1
lxml==5.3.0 #4.9.1
pytest==5.4.3
pytz==2020.1
requests==2.24.0
Expand Down