From decb5c9637758c845241065effa610b686526aeb Mon Sep 17 00:00:00 2001
From: Ziad Ashraf <ziad.ashraf.cs@gmail.com>
Date: Fri, 27 Feb 2026 13:54:58 +0200
Subject: [PATCH] [BUG] map AIBuilder artifact fields and sanitize tokenized
 URLs

---
 .../aibuilder/aibuilder_mappings.py           |   3 +
 .../aibuilder/aibuilder_mlmodel_connector.py  | 145 +++++++++++++-----
 .../test_aibuilder_mlmodel_connector.py       |  43 +++++-
 3 files changed, 145 insertions(+), 46 deletions(-)

diff --git a/src/connectors/aibuilder/aibuilder_mappings.py b/src/connectors/aibuilder/aibuilder_mappings.py
index c010371af..c6afbffb6 100644
--- a/src/connectors/aibuilder/aibuilder_mappings.py
+++ b/src/connectors/aibuilder/aibuilder_mappings.py
@@ -6,4 +6,7 @@
     "creator": "publisher",
     "description": "description",
     "keyword": "tags",
+    "version": "version",
+    "distribution": "artifacts",
+    "license": "license",
 }
diff --git a/src/connectors/aibuilder/aibuilder_mlmodel_connector.py b/src/connectors/aibuilder/aibuilder_mlmodel_connector.py
index 18d50a266..62614555c 100644
--- a/src/connectors/aibuilder/aibuilder_mlmodel_connector.py
+++ b/src/connectors/aibuilder/aibuilder_mlmodel_connector.py
@@ -75,6 +75,7 @@ def get_response(self, url) -> dict | list | RecordError:
             response = requests.get(url, timeout=REQUEST_TIMEOUT)
         except Exception as e:
             return RecordError(identifier=None, error=e)
+        safe_url = _sanitize_url(url, self.token)
         if response.status_code == status.HTTP_200_OK:
             return response.json()
         else:
@@ -83,7 +84,7 @@ def get_response(self, url) -> dict | list | RecordError:
                 msg = "Unauthorized token."
             else:
                 msg = response.reason
-            err_msg = f"Error while fetching {url} from AIBuilder: ({status_code}) {msg}"
+            err_msg = f"Error while fetching {safe_url} from AIBuilder: ({status_code}) {msg}"
             logging.error(err_msg)
             err = HTTPError(err_msg)
             return RecordError(identifier=None, error=err)
@@ -100,13 +101,12 @@ def _mlmodel_from_solution(
         attributes received in a `dict`.
         """
 
-        if not set(mlmodel_mapping.values()) <= set(solution.keys()):
+        required_fields = ["platform_resource_identifier", "name"]
+        if any(_mapped_value(solution, key) is None for key in required_fields):
             err_msg = "Bad structure on the received solution."
             return RecordError(identifier=id, error=err_msg)
 
-        identifier = ""
-        if "platform_resource_identifier" in mlmodel_mapping.keys():
-            identifier = solution[mlmodel_mapping["platform_resource_identifier"]]
+        identifier = _mapped_value(solution, "platform_resource_identifier", "")
 
         if not identifier:
             err_msg = "The platform identifier is mandatory."
@@ -116,56 +116,40 @@ def _mlmodel_from_solution(
             err_msg = f"The identifier {identifier} does not correspond with the fetched solution."
             return RecordError(identifier=id, error=err_msg)
 
-        name = ""
-        if "name" in mlmodel_mapping.keys():
-            name = solution[mlmodel_mapping["name"]]
+        name = _mapped_value(solution, "name", "")
 
         if not name:
             err_msg = "The name field is mandatory."
             return RecordError(identifier=id, error=err_msg)
 
-        date_published = ""
-        if "date_published" in mlmodel_mapping.keys():
-            date_published = solution[mlmodel_mapping["date_published"]]
+        date_published = _mapped_value(solution, "date_published", "")
 
-        # TODO: Review the AIBuilder schema to map version
-        version = ""
-        if "version" in mlmodel_mapping.keys():
-            version = solution[mlmodel_mapping["version"]]
+        version = _mapped_value(solution, "version") or _version_from_artifacts(solution)
 
-        description = ""
-        if "description" in mlmodel_mapping.keys():
-            description = _description_format(solution[mlmodel_mapping["description"]])
+        description_raw = _mapped_value(solution, "description", "")
+        description = _description_format(description_raw)
 
-        # TODO: Review the AIBuilder schema to map distribution
-        distribution = []
-        if "distribution" in mlmodel_mapping.keys():
-            distribution = _distribution_format(solution[mlmodel_mapping["distribution"]])
+        distribution = _distribution_format(_mapped_value(solution, "distribution"))
 
-        tags = []
-        if "keyword" in mlmodel_mapping.keys():
-            tags = solution[mlmodel_mapping["keyword"]]
+        tags = _as_list(_mapped_value(solution, "keyword"))
 
-        # TODO: Review the AIBuilder schema to map license
-        license = ""
-        if "license" in mlmodel_mapping.keys():
-            license = solution[mlmodel_mapping["license"]]
+        license = _mapped_value(solution, "license", "")
 
         related_resources = {}
 
-        if "contact" in mlmodel_mapping.keys():
+        if _mapped_value(solution, "contact") is not None:
             pydantic_class_contact = resource_create(Contact)
             contact_names = [
                 pydantic_class_contact(name=name)
-                for name in _as_list(solution[mlmodel_mapping["contact"]])
+                for name in _as_list(_mapped_value(solution, "contact"))
             ]
             related_resources["contact"] = contact_names
 
-        if "creator" in mlmodel_mapping.keys():
+        if _mapped_value(solution, "creator") is not None:
             pydantic_class_creator = resource_create(Contact)
             creator_names = [
                 pydantic_class_creator(name=name)
-                for name in _as_list(solution[mlmodel_mapping["creator"]])
+                for name in _as_list(_mapped_value(solution, "creator"))
             ]
             related_resources["creator"] = creator_names
 
@@ -175,7 +159,7 @@ def _mlmodel_from_solution(
             platform_resource_identifier=identifier,
             name=name,
             date_published=date_published,
-            same_as=url,  # TODO: Review the concept of having the TOKEN inside the url!!!
+            same_as=url,
             is_accessible_for_free=True,
             version=version,
             aiod_entry=AIoDEntryCreate(),
@@ -213,6 +197,13 @@ def fetch(
             self.is_concluded = True
             yield None, response
             return
+        if not isinstance(response, list):
+            self.is_concluded = True
+            yield (
+                None,
+                RecordError(identifier=None, error="Bad structure on the received catalog list."),
+            )
+            return
 
         try:
             catalog_list = [catalog["catalogId"] for catalog in response]
@@ -235,6 +226,15 @@ def fetch(
                 self.is_concluded = num_catalog == len(catalog_list) - 1
                 yield None, response
                 continue
+            if not isinstance(response, list):
+                self.is_concluded = num_catalog == len(catalog_list) - 1
+                yield (
+                    None,
+                    RecordError(
+                        identifier=None, error="Bad structure on the received solution list."
+                    ),
+                )
+                continue
 
             try:
                 solutions_list = [
@@ -254,9 +254,7 @@ def fetch(
 
             for num_solution, solution in enumerate(solutions_list):
                 url_get_solution = f"{API_URL}/get_solution?fullId={solution}&apiToken={self.token}"
-                url_to_show = (
-                    f"{API_URL}/get_solution?fullId={solution}&apiToken=AIBUILDER_API_TOKEN"
-                )
+                url_to_show = _public_solution_url(solution)
                 response = self.get_response(url_get_solution)
                 if isinstance(response, RecordError):
                     self.is_concluded = (
@@ -265,6 +263,18 @@ def fetch(
                     )
                     yield None, response
                     continue
+                if not isinstance(response, dict):
+                    self.is_concluded = (
+                        num_catalog == len(catalog_list) - 1
+                        and num_solution == len(solutions_list) - 1
+                    )
+                    yield (
+                        None,
+                        RecordError(
+                            identifier=solution, error="Bad structure on the received solution."
+                        ),
+                    )
+                    continue
 
                 try:
                     self.is_concluded = (
@@ -293,9 +303,64 @@ def _description_format(description: str) -> Text:
     return Text(plain=description)
 
 
-# TODO: Review the AIBuilder schema to map distribution
-def _distribution_format(distribution) -> list[RunnableDistribution]:
-    return []
+def _distribution_format(distribution: Any) -> list[RunnableDistribution]:
+    if not isinstance(distribution, list):
+        return []
+    formatted: list[RunnableDistribution] = []
+    for artifact in distribution:
+        if not isinstance(artifact, dict):
+            continue
+        content_size_kb = None
+        size = artifact.get("size")
+        if isinstance(size, int):
+            content_size_kb = size
+        elif isinstance(size, str) and size.isdigit():
+            content_size_kb = int(size)
+        formatted.append(
+            RunnableDistribution(
+                checksum=None,
+                checksum_algorithm=None,
+                copyright=None,
+                name=artifact.get("name"),
+                description=artifact.get("description"),
+                content_url=artifact.get("uri") or artifact.get("filename"),
+                content_size_kb=content_size_kb,
+                encoding_format=artifact.get("artifactTypeCode"),
+                technology_readiness_level=None,
+                installation_time_milliseconds=None,
+                deployment_time_milliseconds=None,
+            )
+        )
+    return formatted
+
+
+def _public_solution_url(solution_id: str) -> str:
+    return f"{API_URL}/get_solution?fullId={solution_id}"
+
+
+def _sanitize_url(url: str, token: str) -> str:
+    if token:
+        return url.replace(token, "AIBUILDER_API_TOKEN")
+    return url
+
+
+def _mapped_value(solution: dict, field: str, default: Any = None) -> Any:
+    key = mlmodel_mapping.get(field)
+    if key is None:
+        return default
+    return solution.get(key, default)
+
+
+def _version_from_artifacts(solution: dict) -> str | None:
+    artifacts = solution.get("artifacts")
+    if not isinstance(artifacts, list):
+        return None
+    for artifact in artifacts:
+        if isinstance(artifact, dict):
+            version = artifact.get("version")
+            if isinstance(version, str) and version:
+                return version
+    return None
 
 
 def _as_list(value: Any | list[Any]) -> list[Any]:
diff --git a/src/tests/connectors/aibuilder/test_aibuilder_mlmodel_connector.py b/src/tests/connectors/aibuilder/test_aibuilder_mlmodel_connector.py
index f55d821cc..f84c9ef7e 100644
--- a/src/tests/connectors/aibuilder/test_aibuilder_mlmodel_connector.py
+++ b/src/tests/connectors/aibuilder/test_aibuilder_mlmodel_connector.py
@@ -21,6 +21,11 @@
 catalog_solutions_url = f"{API_URL}/get_catalog_solutions?catalogId=1&apiToken={TOKEN}"
 solution_1_url = f"{API_URL}/get_solution?fullId=1&apiToken={TOKEN}"
 solution_2_url = f"{API_URL}/get_solution?fullId=2&apiToken={TOKEN}"
+catalog_list_url_safe = f"{API_URL}/get_catalog_list?apiToken=AIBUILDER_API_TOKEN"
+catalog_solutions_url_safe = (
+    f"{API_URL}/get_catalog_solutions?catalogId=1&apiToken=AIBUILDER_API_TOKEN"
+)
+solution_1_url_safe = f"{API_URL}/get_solution?fullId=1&apiToken=AIBUILDER_API_TOKEN"
 mocked_datetime_from = datetime.fromisoformat("2023-09-01T00:00:00Z")
 mocked_datetime_to = datetime.fromisoformat("2023-09-01T00:00:01Z")
 
@@ -55,6 +60,11 @@ def test_fetch_happy_path():
         assert resource.resource.platform_resource_identifier == str(i + 1)
         assert resource.resource.name == f"Mocking Full Solution {i + 1}"
         assert resource.resource.date_published == mocked_datetime_from
+        assert resource.resource.version == "1.0.0"
+        assert resource.resource.same_as == f"{API_URL}/get_solution?fullId={i + 1}"
+        assert len(resource.resource.distribution) == 1
+        assert resource.resource.distribution[0].name == f"Mocking artifact {i + 1}"
+        assert resource.resource.distribution[0].encoding_format == "PJ"
         assert resource.resource.description == Text(plain=f"The mocked full solution {i + 1}.")
         assert set(resource.resource.keyword) == {f"Mocked tag {i + 1}."}
         assert resource.resource.is_accessible_for_free
@@ -94,6 +104,11 @@ def test_fetch_happy_path_unaware_datetime():
         assert resource.resource.platform_resource_identifier == str(i + 1)
         assert resource.resource.name == f"Mocking Full Solution {i + 1}"
         assert resource.resource.date_published == mocked_datetime_from
+        assert resource.resource.version == "1.0.0"
+        assert resource.resource.same_as == f"{API_URL}/get_solution?fullId={i + 1}"
+        assert len(resource.resource.distribution) == 1
+        assert resource.resource.distribution[0].name == f"Mocking artifact {i + 1}"
+        assert resource.resource.distribution[0].encoding_format == "PJ"
         assert resource.resource.description == Text(plain=f"The mocked full solution {i + 1}.")
         assert set(resource.resource.keyword) == {f"Mocked tag {i + 1}."}
         assert resource.resource.is_accessible_for_free
@@ -101,7 +116,9 @@ def test_fetch_happy_path_unaware_datetime():
 
 def test_unautorized_token_error():
     error = {"error": {"message": "Unauthorized token."}}
-    err_msg = f"Error while fetching {catalog_list_url} from AIBuilder: (401) Unauthorized token."
+    err_msg = (
+        f"Error while fetching {catalog_list_url_safe} from AIBuilder: (401) Unauthorized token."
+    )
     fetched_resources = []
     with responses.RequestsMock() as mocked_requests:
         mocked_requests.add(responses.GET, catalog_list_url, json=error, status=401)
@@ -119,7 +136,9 @@ def test_unautorized_token_error():
 
 def test_catalog_list_http_error():
     error = {"error": {"message": "HTTP Error."}}
-    err_msg = f"Error while fetching {catalog_list_url} from AIBuilder: (500) Internal Server Error"
+    err_msg = (
+        f"Error while fetching {catalog_list_url_safe} from AIBuilder: (500) Internal Server Error"
+    )
     fetched_resources = []
     with responses.RequestsMock() as mocked_requests:
         mocked_requests.add(responses.GET, catalog_list_url, json=error, status=500)
@@ -175,9 +194,7 @@ def test_empty_catalog_list():
 def test_catalog_solutions_http_error():
     catalog_list_path = os.path.join(test_resources_path, "catalog_list.json")
     error = {"error": {"message": "HTTP Error."}}
-    err_msg = (
-        f"Error while fetching {catalog_solutions_url} from AIBuilder: (500) Internal Server Error"
-    )
+    err_msg = f"Error while fetching {catalog_solutions_url_safe} from AIBuilder: (500) Internal Server Error"
     fetched_resources = []
     with responses.RequestsMock() as mocked_requests:
         with open(catalog_list_path, "r") as f:
@@ -244,7 +261,9 @@ def test_solution_http_error():
     catalog_list_path = os.path.join(test_resources_path, "catalog_list.json")
     catalog_solutions_path = os.path.join(test_resources_path, "catalog_solutions.json")
     error = {"error": {"message": "HTTP Error."}}
-    err_msg = f"Error while fetching {solution_1_url} from AIBuilder: (500) Internal Server Error"
+    err_msg = (
+        f"Error while fetching {solution_1_url_safe} from AIBuilder: (500) Internal Server Error"
+    )
     solution_2_path = os.path.join(test_resources_path, "solution_2.json")
     fetched_resources = []
     with responses.RequestsMock() as mocked_requests:
@@ -275,6 +294,10 @@ def test_solution_http_error():
     assert resource.resource.platform_resource_identifier == "2"
     assert resource.resource.name == "Mocking Full Solution 2"
     assert resource.resource.date_published == mocked_datetime_from
+    assert resource.resource.version == "1.0.0"
+    assert resource.resource.same_as == f"{API_URL}/get_solution?fullId=2"
+    assert len(resource.resource.distribution) == 1
+    assert resource.resource.distribution[0].name == "Mocking artifact 2"
     assert resource.resource.description == Text(plain="The mocked full solution 2.")
     assert set(resource.resource.keyword) == {"Mocked tag 2."}
     assert resource.resource.is_accessible_for_free
@@ -315,6 +338,10 @@ def test_solution_format_error():
     assert resource.resource.platform_resource_identifier == "2"
     assert resource.resource.name == "Mocking Full Solution 2"
     assert resource.resource.date_published == mocked_datetime_from
+    assert resource.resource.version == "1.0.0"
+    assert resource.resource.same_as == f"{API_URL}/get_solution?fullId=2"
+    assert len(resource.resource.distribution) == 1
+    assert resource.resource.distribution[0].name == "Mocking artifact 2"
     assert resource.resource.description == Text(plain="The mocked full solution 2.")
     assert set(resource.resource.keyword) == {"Mocked tag 2."}
     assert resource.resource.is_accessible_for_free
@@ -356,6 +383,10 @@ def test_solution_last_modified_exception():
     assert resource.resource.platform_resource_identifier == "2"
     assert resource.resource.name == "Mocking Full Solution 2"
     assert resource.resource.date_published == mocked_datetime_from
+    assert resource.resource.version == "1.0.0"
+    assert resource.resource.same_as == f"{API_URL}/get_solution?fullId=2"
+    assert len(resource.resource.distribution) == 1
+    assert resource.resource.distribution[0].name == "Mocking artifact 2"
     assert resource.resource.description == Text(plain="The mocked full solution 2.")
     assert set(resource.resource.keyword) == {"Mocked tag 2."}
     assert resource.resource.is_accessible_for_free