From decb5c9637758c845241065effa610b686526aeb Mon Sep 17 00:00:00 2001 From: Ziad Ashraf Date: Fri, 27 Feb 2026 13:54:58 +0200 Subject: [PATCH] [BUG] map AIBuilder artifact fields and sanitize tokenized URLs --- .../aibuilder/aibuilder_mappings.py | 3 + .../aibuilder/aibuilder_mlmodel_connector.py | 145 +++++++++++++----- .../test_aibuilder_mlmodel_connector.py | 43 +++++- 3 files changed, 145 insertions(+), 46 deletions(-) diff --git a/src/connectors/aibuilder/aibuilder_mappings.py b/src/connectors/aibuilder/aibuilder_mappings.py index c010371af..c6afbffb6 100644 --- a/src/connectors/aibuilder/aibuilder_mappings.py +++ b/src/connectors/aibuilder/aibuilder_mappings.py @@ -6,4 +6,7 @@ "creator": "publisher", "description": "description", "keyword": "tags", + "version": "version", + "distribution": "artifacts", + "license": "license", } diff --git a/src/connectors/aibuilder/aibuilder_mlmodel_connector.py b/src/connectors/aibuilder/aibuilder_mlmodel_connector.py index 18d50a266..62614555c 100644 --- a/src/connectors/aibuilder/aibuilder_mlmodel_connector.py +++ b/src/connectors/aibuilder/aibuilder_mlmodel_connector.py @@ -75,6 +75,7 @@ def get_response(self, url) -> dict | list | RecordError: response = requests.get(url, timeout=REQUEST_TIMEOUT) except Exception as e: return RecordError(identifier=None, error=e) + safe_url = _sanitize_url(url, self.token) if response.status_code == status.HTTP_200_OK: return response.json() else: @@ -83,7 +84,7 @@ def get_response(self, url) -> dict | list | RecordError: msg = "Unauthorized token." else: msg = response.reason - err_msg = f"Error while fetching {url} from AIBuilder: ({status_code}) {msg}" + err_msg = f"Error while fetching {safe_url} from AIBuilder: ({status_code}) {msg}" logging.error(err_msg) err = HTTPError(err_msg) return RecordError(identifier=None, error=err) @@ -100,13 +101,12 @@ def _mlmodel_from_solution( attributes received in a `dict`. """ - if not set(mlmodel_mapping.values()) <= set(solution.keys()): + required_fields = ["platform_resource_identifier", "name"] + if any(_mapped_value(solution, key) is None for key in required_fields): err_msg = "Bad structure on the received solution." return RecordError(identifier=id, error=err_msg) - identifier = "" - if "platform_resource_identifier" in mlmodel_mapping.keys(): - identifier = solution[mlmodel_mapping["platform_resource_identifier"]] + identifier = _mapped_value(solution, "platform_resource_identifier", "") if not identifier: err_msg = "The platform identifier is mandatory." @@ -116,56 +116,40 @@ def _mlmodel_from_solution( err_msg = f"The identifier {identifier} does not correspond with the fetched solution." return RecordError(identifier=id, error=err_msg) - name = "" - if "name" in mlmodel_mapping.keys(): - name = solution[mlmodel_mapping["name"]] + name = _mapped_value(solution, "name", "") if not name: err_msg = "The name field is mandatory." return RecordError(identifier=id, error=err_msg) - date_published = "" - if "date_published" in mlmodel_mapping.keys(): - date_published = solution[mlmodel_mapping["date_published"]] + date_published = _mapped_value(solution, "date_published", "") - # TODO: Review the AIBuilder schema to map version - version = "" - if "version" in mlmodel_mapping.keys(): - version = solution[mlmodel_mapping["version"]] + version = _mapped_value(solution, "version") or _version_from_artifacts(solution) - description = "" - if "description" in mlmodel_mapping.keys(): - description = _description_format(solution[mlmodel_mapping["description"]]) + description_raw = _mapped_value(solution, "description", "") + description = _description_format(description_raw) - # TODO: Review the AIBuilder schema to map distribution - distribution = [] - if "distribution" in mlmodel_mapping.keys(): - distribution = _distribution_format(solution[mlmodel_mapping["distribution"]]) + distribution = _distribution_format(_mapped_value(solution, "distribution")) - tags = [] - if "keyword" in mlmodel_mapping.keys(): - tags = solution[mlmodel_mapping["keyword"]] + tags = _as_list(_mapped_value(solution, "keyword")) - # TODO: Review the AIBuilder schema to map license - license = "" - if "license" in mlmodel_mapping.keys(): - license = solution[mlmodel_mapping["license"]] + license = _mapped_value(solution, "license", "") related_resources = {} - if "contact" in mlmodel_mapping.keys(): + if _mapped_value(solution, "contact") is not None: pydantic_class_contact = resource_create(Contact) contact_names = [ pydantic_class_contact(name=name) - for name in _as_list(solution[mlmodel_mapping["contact"]]) + for name in _as_list(_mapped_value(solution, "contact")) ] related_resources["contact"] = contact_names - if "creator" in mlmodel_mapping.keys(): + if _mapped_value(solution, "creator") is not None: pydantic_class_creator = resource_create(Contact) creator_names = [ pydantic_class_creator(name=name) - for name in _as_list(solution[mlmodel_mapping["creator"]]) + for name in _as_list(_mapped_value(solution, "creator")) ] related_resources["creator"] = creator_names @@ -175,7 +159,7 @@ def _mlmodel_from_solution( platform_resource_identifier=identifier, name=name, date_published=date_published, - same_as=url, # TODO: Review the concept of having the TOKEN inside the url!!! + same_as=url, is_accessible_for_free=True, version=version, aiod_entry=AIoDEntryCreate(), @@ -213,6 +197,13 @@ def fetch( self.is_concluded = True yield None, response return + if not isinstance(response, list): + self.is_concluded = True + yield ( + None, + RecordError(identifier=None, error="Bad structure on the received catalog list."), + ) + return try: catalog_list = [catalog["catalogId"] for catalog in response] @@ -235,6 +226,15 @@ def fetch( self.is_concluded = num_catalog == len(catalog_list) - 1 yield None, response continue + if not isinstance(response, list): + self.is_concluded = num_catalog == len(catalog_list) - 1 + yield ( + None, + RecordError( + identifier=None, error="Bad structure on the received solution list." + ), + ) + continue try: solutions_list = [ @@ -254,9 +254,7 @@ def fetch( for num_solution, solution in enumerate(solutions_list): url_get_solution = f"{API_URL}/get_solution?fullId={solution}&apiToken={self.token}" - url_to_show = ( - f"{API_URL}/get_solution?fullId={solution}&apiToken=AIBUILDER_API_TOKEN" - ) + url_to_show = _public_solution_url(solution) response = self.get_response(url_get_solution) if isinstance(response, RecordError): self.is_concluded = ( @@ -265,6 +263,18 @@ def fetch( ) yield None, response continue + if not isinstance(response, dict): + self.is_concluded = ( + num_catalog == len(catalog_list) - 1 + and num_solution == len(solutions_list) - 1 + ) + yield ( + None, + RecordError( + identifier=solution, error="Bad structure on the received solution." + ), + ) + continue try: self.is_concluded = ( @@ -293,9 +303,64 @@ def _description_format(description: str) -> Text: return Text(plain=description) -# TODO: Review the AIBuilder schema to map distribution -def _distribution_format(distribution) -> list[RunnableDistribution]: - return [] +def _distribution_format(distribution: Any) -> list[RunnableDistribution]: + if not isinstance(distribution, list): + return [] + formatted: list[RunnableDistribution] = [] + for artifact in distribution: + if not isinstance(artifact, dict): + continue + content_size_kb = None + size = artifact.get("size") + if isinstance(size, int): + content_size_kb = size + elif isinstance(size, str) and size.isdigit(): + content_size_kb = int(size) + formatted.append( + RunnableDistribution( + checksum=None, + checksum_algorithm=None, + copyright=None, + name=artifact.get("name"), + description=artifact.get("description"), + content_url=artifact.get("uri") or artifact.get("filename"), + content_size_kb=content_size_kb, + encoding_format=artifact.get("artifactTypeCode"), + technology_readiness_level=None, + installation_time_milliseconds=None, + deployment_time_milliseconds=None, + ) + ) + return formatted + + +def _public_solution_url(solution_id: str) -> str: + return f"{API_URL}/get_solution?fullId={solution_id}" + + +def _sanitize_url(url: str, token: str) -> str: + if token: + return url.replace(token, "AIBUILDER_API_TOKEN") + return url + + +def _mapped_value(solution: dict, field: str, default: Any = None) -> Any: + key = mlmodel_mapping.get(field) + if key is None: + return default + return solution.get(key, default) + + +def _version_from_artifacts(solution: dict) -> str | None: + artifacts = solution.get("artifacts") + if not isinstance(artifacts, list): + return None + for artifact in artifacts: + if isinstance(artifact, dict): + version = artifact.get("version") + if isinstance(version, str) and version: + return version + return None def _as_list(value: Any | list[Any]) -> list[Any]: diff --git a/src/tests/connectors/aibuilder/test_aibuilder_mlmodel_connector.py b/src/tests/connectors/aibuilder/test_aibuilder_mlmodel_connector.py index f55d821cc..f84c9ef7e 100644 --- a/src/tests/connectors/aibuilder/test_aibuilder_mlmodel_connector.py +++ b/src/tests/connectors/aibuilder/test_aibuilder_mlmodel_connector.py @@ -21,6 +21,11 @@ catalog_solutions_url = f"{API_URL}/get_catalog_solutions?catalogId=1&apiToken={TOKEN}" solution_1_url = f"{API_URL}/get_solution?fullId=1&apiToken={TOKEN}" solution_2_url = f"{API_URL}/get_solution?fullId=2&apiToken={TOKEN}" +catalog_list_url_safe = f"{API_URL}/get_catalog_list?apiToken=AIBUILDER_API_TOKEN" +catalog_solutions_url_safe = ( + f"{API_URL}/get_catalog_solutions?catalogId=1&apiToken=AIBUILDER_API_TOKEN" +) +solution_1_url_safe = f"{API_URL}/get_solution?fullId=1&apiToken=AIBUILDER_API_TOKEN" mocked_datetime_from = datetime.fromisoformat("2023-09-01T00:00:00Z") mocked_datetime_to = datetime.fromisoformat("2023-09-01T00:00:01Z") @@ -55,6 +60,11 @@ def test_fetch_happy_path(): assert resource.resource.platform_resource_identifier == str(i + 1) assert resource.resource.name == f"Mocking Full Solution {i + 1}" assert resource.resource.date_published == mocked_datetime_from + assert resource.resource.version == "1.0.0" + assert resource.resource.same_as == f"{API_URL}/get_solution?fullId={i + 1}" + assert len(resource.resource.distribution) == 1 + assert resource.resource.distribution[0].name == f"Mocking artifact {i + 1}" + assert resource.resource.distribution[0].encoding_format == "PJ" assert resource.resource.description == Text(plain=f"The mocked full solution {i + 1}.") assert set(resource.resource.keyword) == {f"Mocked tag {i + 1}."} assert resource.resource.is_accessible_for_free @@ -94,6 +104,11 @@ def test_fetch_happy_path_unaware_datetime(): assert resource.resource.platform_resource_identifier == str(i + 1) assert resource.resource.name == f"Mocking Full Solution {i + 1}" assert resource.resource.date_published == mocked_datetime_from + assert resource.resource.version == "1.0.0" + assert resource.resource.same_as == f"{API_URL}/get_solution?fullId={i + 1}" + assert len(resource.resource.distribution) == 1 + assert resource.resource.distribution[0].name == f"Mocking artifact {i + 1}" + assert resource.resource.distribution[0].encoding_format == "PJ" assert resource.resource.description == Text(plain=f"The mocked full solution {i + 1}.") assert set(resource.resource.keyword) == {f"Mocked tag {i + 1}."} assert resource.resource.is_accessible_for_free @@ -101,7 +116,9 @@ def test_fetch_happy_path_unaware_datetime(): def test_unautorized_token_error(): error = {"error": {"message": "Unauthorized token."}} - err_msg = f"Error while fetching {catalog_list_url} from AIBuilder: (401) Unauthorized token." + err_msg = ( + f"Error while fetching {catalog_list_url_safe} from AIBuilder: (401) Unauthorized token." + ) fetched_resources = [] with responses.RequestsMock() as mocked_requests: mocked_requests.add(responses.GET, catalog_list_url, json=error, status=401) @@ -119,7 +136,9 @@ def test_unautorized_token_error(): def test_catalog_list_http_error(): error = {"error": {"message": "HTTP Error."}} - err_msg = f"Error while fetching {catalog_list_url} from AIBuilder: (500) Internal Server Error" + err_msg = ( + f"Error while fetching {catalog_list_url_safe} from AIBuilder: (500) Internal Server Error" + ) fetched_resources = [] with responses.RequestsMock() as mocked_requests: mocked_requests.add(responses.GET, catalog_list_url, json=error, status=500) @@ -175,9 +194,7 @@ def test_empty_catalog_list(): def test_catalog_solutions_http_error(): catalog_list_path = os.path.join(test_resources_path, "catalog_list.json") error = {"error": {"message": "HTTP Error."}} - err_msg = ( - f"Error while fetching {catalog_solutions_url} from AIBuilder: (500) Internal Server Error" - ) + err_msg = f"Error while fetching {catalog_solutions_url_safe} from AIBuilder: (500) Internal Server Error" fetched_resources = [] with responses.RequestsMock() as mocked_requests: with open(catalog_list_path, "r") as f: @@ -244,7 +261,9 @@ def test_solution_http_error(): catalog_list_path = os.path.join(test_resources_path, "catalog_list.json") catalog_solutions_path = os.path.join(test_resources_path, "catalog_solutions.json") error = {"error": {"message": "HTTP Error."}} - err_msg = f"Error while fetching {solution_1_url} from AIBuilder: (500) Internal Server Error" + err_msg = ( + f"Error while fetching {solution_1_url_safe} from AIBuilder: (500) Internal Server Error" + ) solution_2_path = os.path.join(test_resources_path, "solution_2.json") fetched_resources = [] with responses.RequestsMock() as mocked_requests: @@ -275,6 +294,10 @@ def test_solution_http_error(): assert resource.resource.platform_resource_identifier == "2" assert resource.resource.name == "Mocking Full Solution 2" assert resource.resource.date_published == mocked_datetime_from + assert resource.resource.version == "1.0.0" + assert resource.resource.same_as == f"{API_URL}/get_solution?fullId=2" + assert len(resource.resource.distribution) == 1 + assert resource.resource.distribution[0].name == "Mocking artifact 2" assert resource.resource.description == Text(plain="The mocked full solution 2.") assert set(resource.resource.keyword) == {"Mocked tag 2."} assert resource.resource.is_accessible_for_free @@ -315,6 +338,10 @@ def test_solution_format_error(): assert resource.resource.platform_resource_identifier == "2" assert resource.resource.name == "Mocking Full Solution 2" assert resource.resource.date_published == mocked_datetime_from + assert resource.resource.version == "1.0.0" + assert resource.resource.same_as == f"{API_URL}/get_solution?fullId=2" + assert len(resource.resource.distribution) == 1 + assert resource.resource.distribution[0].name == "Mocking artifact 2" assert resource.resource.description == Text(plain="The mocked full solution 2.") assert set(resource.resource.keyword) == {"Mocked tag 2."} assert resource.resource.is_accessible_for_free @@ -356,6 +383,10 @@ def test_solution_last_modified_exception(): assert resource.resource.platform_resource_identifier == "2" assert resource.resource.name == "Mocking Full Solution 2" assert resource.resource.date_published == mocked_datetime_from + assert resource.resource.version == "1.0.0" + assert resource.resource.same_as == f"{API_URL}/get_solution?fullId=2" + assert len(resource.resource.distribution) == 1 + assert resource.resource.distribution[0].name == "Mocking artifact 2" assert resource.resource.description == Text(plain="The mocked full solution 2.") assert set(resource.resource.keyword) == {"Mocked tag 2."} assert resource.resource.is_accessible_for_free