diff --git a/components/lif/mdr_dto/transformation_dto.py b/components/lif/mdr_dto/transformation_dto.py index 8a0a072..b43d07b 100644 --- a/components/lif/mdr_dto/transformation_dto.py +++ b/components/lif/mdr_dto/transformation_dto.py @@ -1,8 +1,8 @@ -from pydantic import BaseModel -from typing import List, Optional from datetime import datetime +from typing import List, Optional from lif.datatypes.mdr_sql_model import ExpressionLanguageType +from pydantic import BaseModel class TransformationAttributeDTO(BaseModel): @@ -24,7 +24,7 @@ class Config: class CreateTransformationAttributeDTO(BaseModel): - AttributeId: int + AttributeId: Optional[int] = None # No longer used EntityId: Optional[int] = None # Existing column # AttributeName: Optional[str] = None # EntityName: Optional[str] = None diff --git a/components/lif/mdr_services/entity_association_service.py b/components/lif/mdr_services/entity_association_service.py index 0cd53ce..a835334 100644 --- a/components/lif/mdr_services/entity_association_service.py +++ b/components/lif/mdr_services/entity_association_service.py @@ -1,4 +1,5 @@ from typing import List, Optional + from fastapi import HTTPException from lif.datatypes.mdr_sql_model import DataModel, DataModelType, Entity, EntityAssociation, ExtInclusionsFromBaseDM from lif.mdr_dto.entity_association_dto import ( @@ -11,9 +12,8 @@ from lif.mdr_services.helper_service import check_datamodel_by_id, check_entity_by_id from lif.mdr_utils.logger_config import get_logger from sqlalchemy.ext.asyncio import AsyncSession -from sqlmodel import or_, select from sqlalchemy.orm import aliased - +from sqlmodel import or_, select logger = get_logger(__name__) @@ -61,6 +61,24 @@ async def check_existing_association( return result.scalar_one_or_none() is not None +async def check_entity_association_strict( + session: AsyncSession, parent_entity_id: int, child_entity_id: int, extended_by_data_model_id: int | None +) -> None: + query = select(EntityAssociation).where( + EntityAssociation.ParentEntityId == parent_entity_id, + EntityAssociation.ChildEntityId == child_entity_id, + EntityAssociation.Deleted == False, + EntityAssociation.ExtendedByDataModelId == extended_by_data_model_id, + ) + result = await session.execute(query) + association = result.scalar_one_or_none() is not None + if not association: + raise HTTPException( + status_code=404, + detail=f"Entity {child_entity_id} with parent {parent_entity_id} not found in data model association with extended-by data model of '{extended_by_data_model_id}'", + ) + + async def validate_entity_associations_for_transformation_attribute( session: AsyncSession, transformation_attribute: TransformationAttributeDTO ) -> bool: diff --git a/components/lif/mdr_services/entity_attribute_association_service.py b/components/lif/mdr_services/entity_attribute_association_service.py index 001e9f8..c5db218 100644 --- a/components/lif/mdr_services/entity_attribute_association_service.py +++ b/components/lif/mdr_services/entity_attribute_association_service.py @@ -1,6 +1,6 @@ from typing import List, Optional, Tuple + from fastapi import HTTPException -from sqlalchemy import Select, func, or_, select from lif.datatypes.mdr_sql_model import ( Attribute, DataModelType, @@ -15,6 +15,7 @@ ) from lif.mdr_services.helper_service import check_attribute_by_id, check_datamodel_by_id, check_entity_by_id from lif.mdr_utils.logger_config import get_logger +from sqlalchemy import Select, func, or_, select from sqlalchemy.ext.asyncio import AsyncSession logger = get_logger(__name__) @@ -41,6 +42,24 @@ async def check_existing_association( return result.scalar_one_or_none() is not None +async def check_entity_attribute_association_strict( + session: AsyncSession, entity_id: int, attribute_id: int, extended_by_data_model_id: int | None +) -> None: + query = select(EntityAttributeAssociation).where( + EntityAttributeAssociation.EntityId == entity_id, + EntityAttributeAssociation.AttributeId == attribute_id, + EntityAttributeAssociation.Deleted == False, + EntityAttributeAssociation.ExtendedByDataModelId == extended_by_data_model_id, + ) + result = await session.execute(query) + association = result.scalar_one_or_none() is not None + if not association: + raise HTTPException( + status_code=404, + detail=f"Attribute {attribute_id} association with entity {entity_id} not found in data model association with extended-by data model of '{extended_by_data_model_id}'", + ) + + async def create_entity_attribute_association(session: AsyncSession, data: CreateEntityAttributeAssociationDTO): # checking if provided entity id and attribute id exist or not entity = await check_entity_by_id(session=session, id=data.EntityId) diff --git a/components/lif/mdr_services/inclusions_service.py b/components/lif/mdr_services/inclusions_service.py index f05d0c9..dfc541e 100644 --- a/components/lif/mdr_services/inclusions_service.py +++ b/components/lif/mdr_services/inclusions_service.py @@ -1,12 +1,12 @@ from fastapi import HTTPException +from lif.datatypes.mdr_sql_model import DatamodelElementType, EntityAttributeAssociation, ExtInclusionsFromBaseDM from lif.mdr_dto.inclusion_dto import CreateInclusionDTO, InclusionDTO, UpdateInclusionDTO from lif.mdr_services.attribute_service import get_attribute_by_id from lif.mdr_services.entity_service import get_entity_by_id from lif.mdr_services.helper_service import check_datamodel_by_id from lif.mdr_utils.logger_config import get_logger from sqlalchemy.ext.asyncio import AsyncSession -from lif.datatypes.mdr_sql_model import EntityAttributeAssociation, ExtInclusionsFromBaseDM -from sqlmodel import select, func +from sqlmodel import func, select logger = get_logger(__name__) @@ -229,3 +229,19 @@ async def get_attribute_inclusions_by_data_model_id_and_entity_id( inclusions = result.scalars().all() inclusion_dtos = [InclusionDTO.from_orm(inclusion) for inclusion in inclusions] return inclusion_dtos + + +async def check_existing_inclusion( + session: AsyncSession, type: DatamodelElementType, node_id: int, included_by_data_model_id: int +) -> None: + query = select(ExtInclusionsFromBaseDM).where( + ExtInclusionsFromBaseDM.ExtDataModelId == included_by_data_model_id, + ExtInclusionsFromBaseDM.IncludedElementId == node_id, + ExtInclusionsFromBaseDM.ElementType == type, + ExtInclusionsFromBaseDM.Deleted == False, + ) + result = await session.execute(query) + if result.scalar_one_or_none() is None: + raise HTTPException( + status_code=404, detail=f"Inclusion of {type} {node_id} not found in data model {included_by_data_model_id}" + ) diff --git a/components/lif/mdr_services/transformation_service.py b/components/lif/mdr_services/transformation_service.py index c8356b2..315f7d2 100644 --- a/components/lif/mdr_services/transformation_service.py +++ b/components/lif/mdr_services/transformation_service.py @@ -1,7 +1,10 @@ from typing import Dict, List + from fastapi import HTTPException from lif.datatypes.mdr_sql_model import ( DataModel, + DatamodelElementType, + DataModelType, EntityAttributeAssociation, Transformation, TransformationAttribute, @@ -17,24 +20,29 @@ UpdateTransformationDTO, ) from lif.mdr_dto.transformation_group_dto import ( - TransformationGroupDTO, CreateTransformationGroupDTO, + TransformationGroupDTO, UpdateTransformationGroupDTO, ) from lif.mdr_services.attribute_service import get_attribute_dto_by_id -from lif.mdr_services.entity_association_service import validate_entity_associations_for_transformation_attribute +from lif.mdr_services.entity_association_service import ( + check_entity_association_strict, + validate_entity_associations_for_transformation_attribute, +) +from lif.mdr_services.entity_attribute_association_service import check_entity_attribute_association_strict from lif.mdr_services.entity_service import is_entity_by_unique_name from lif.mdr_services.helper_service import ( check_attribute_by_id, + check_datamodel_by_id, check_entity_attribute_association, check_entity_by_id, - check_datamodel_by_id, ) +from lif.mdr_services.inclusions_service import check_existing_inclusion from lif.mdr_utils.logger_config import get_logger +from sqlalchemy import and_ from sqlalchemy.ext.asyncio import AsyncSession -from sqlmodel import select, func from sqlalchemy.orm import aliased -from sqlalchemy import and_ +from sqlmodel import func, select logger = get_logger(__name__) @@ -57,30 +65,223 @@ async def validate_entity_id_path(session: AsyncSession, transformation_attribut ) +def parse_transformation_path(id_path: str) -> List[int]: + """ + Parses IDs from a transformation path string into a list of IDs which represent entity IDs (positive value) or attribute IDs (negative value). + + All IDs in the path are expected to be integers. + + :param id_path: + Format is `id1,id2,...,idN` + :type id_path: str + :return: A list of entity (or attribute) IDs. + """ + if not id_path: + msg = "Invalid EntityIdPath format. The path must not be empty." + logger.error(msg) + raise HTTPException(status_code=400, detail=msg) + + ids = [] + for id_str in id_path.split(","): + try: + id = int(id_str) + ids.append(id) + except ValueError: + logger.error( + f"Invalid EntityIdPath format: '{id_path}'. IDs must be in the format 'id1,id2,...,idN' and all IDs must be integers." + ) + raise HTTPException( + status_code=400, + detail="Invalid EntityIdPath format. IDs must be in the format 'id1,id2,...,idN' and all IDs must be integers.", + ) + + if len(ids) < 1: + logger.error(f"Invalid EntityIdPath format: '{id_path}'. The path must contain at least one ID.") + raise HTTPException( + status_code=400, detail="Invalid EntityIdPath format. The path must contain at least one ID." + ) + + return ids + + +async def check_transformation_node_grounded_in_data_model( + session: AsyncSession, + anchor_data_model_id: int, + raw_node_id: int, + id_path: str, + fail_if_origin_data_model_is_not_the_anchor: bool, + is_last_node: bool, + is_self_contained_anchor_model: bool, +) -> tuple[DatamodelElementType, bool]: + """ + Checks whether the node identified by node_id in the provided id_path is grounded in the anchor data model. + + Args: + session (AsyncSession): DB session + anchor_data_model_id (int): Data Model ID for the anchor of this transformation attribute. Should be either the source or target data model for the transformation group. + raw_node_id (int): ID of the node to check. A negative ID indicates an attribute. + id_path (str): ID path representing the chain of entity/attribute IDs with the final ID being an attribute ID IF it's a negative value. + fail_if_origin_data_model_is_not_the_anchor (bool): If True, raises an exception if the node does not originate in the anchor data model. + is_last_node (bool): Whether this node is the last node in the path + is_self_contained_anchor_model (bool): Whether the anchor data model can have associations with other data models (Base LIF or Source Schema) + + Returns: + DatamodelElementType: The type of the node (Entity or Attribute) + bool: Whether the node originates from the anchor data model + """ + node_type = DatamodelElementType.Attribute if is_last_node and raw_node_id < 0 else DatamodelElementType.Entity + if node_type == DatamodelElementType.Entity and raw_node_id < 0: + raise HTTPException( + status_code=400, + detail=f"Invalid EntityIdPath format: '{id_path}'. Only the last ID in the path can be an attribute ID (negative value).", + ) + + cleaned_node_id = abs(raw_node_id) + node_data_model_id = ( + await check_entity_by_id(session=session, id=cleaned_node_id) + if node_type == DatamodelElementType.Entity + else await check_attribute_by_id(session=session, id=cleaned_node_id) + ).DataModelId + + originates_in_anchor = anchor_data_model_id == node_data_model_id + if not originates_in_anchor: + signature = f"{node_type} {raw_node_id}({cleaned_node_id}) for data model {node_data_model_id} in the entityIdPath {id_path}" + if fail_if_origin_data_model_is_not_the_anchor: + message = f"{signature} should, but does not, originate in the anchor data model {anchor_data_model_id}." + logger.warning(message) + raise HTTPException(status_code=400, detail=message) + + if is_self_contained_anchor_model: + message = f"{signature} does not originate in the anchor data model {anchor_data_model_id}, which is a self-contained data model. Therefore, it cannot be included via inclusions." + logger.warning(message) + raise HTTPException(status_code=400, detail=message) + + logger.debug( + f"{signature} did not originate in the anchor data model {anchor_data_model_id}. Checking for inclusion." + ) + + # Will only be checked for Org LIF and Partner LIF anchor data models + await check_existing_inclusion( + session=session, type=node_type, node_id=cleaned_node_id, included_by_data_model_id=anchor_data_model_id + ) + + logger.debug( + f"{signature} is not directly part of, but is included in, the anchor data model {anchor_data_model_id}." + ) + + return (node_type, originates_in_anchor) + + +async def check_transformation_attribute(session: AsyncSession, anchor_data_model: DataModel, id_path: str): + """ + Confirms the provided ID path is valid for the given transformation attribute. + + :param session: DB session + :param anchor_data_model: Data model for the anchor of this transformation attribute. Should be either the source or target data model for the transformation group. + :param id_path: ID path representing the chain of entity IDs with the final ID possibly being an attribute ID (which is marked as such by a negative sign). + + - The path must be in the correct format and contain at least one ID. + - The path may end with a non-deleted attribute, and the rest be non-deleted entities. + - Entities and attributes via (Entity/Attribute.DataModelId) must belong to the anchor data model or be included in (ExtInclusionFromBaseDM.ExtDataModelId). + - Entities and attributes must 'chain' together via the association tables. This is different based on the type of the anchor Data Model: + - Base LIF && Source Schema: The ExtendedByDataModelId in the association tables will be Null + - Org LIF & Partner LIF: The ExtendedByDataModelId in the association tables will be: + - The anchor Data Model ID if the entity/attribute does not originate in the anchor data model + - Null if the entity/attribute does originate in the anchor data model + """ + transformation_path_ids = parse_transformation_path(id_path) + previous_id = None + + for i, raw_node_id in enumerate(transformation_path_ids): + logger.debug( + f"Checking node {raw_node_id} in entityIdPath {id_path} with the anchor data model {anchor_data_model.Id}" + ) + + (node_type, originates_in_anchor) = await check_transformation_node_grounded_in_data_model( + session=session, + anchor_data_model_id=anchor_data_model.Id, + raw_node_id=raw_node_id, + id_path=id_path, + fail_if_origin_data_model_is_not_the_anchor=( + i == 0 + ), # Only fail if the first node does not originate on the anchor data model + is_last_node=(i == len(transformation_path_ids) - 1), + is_self_contained_anchor_model=anchor_data_model.Type + in [DataModelType.BaseLIF, DataModelType.SourceSchema], + ) + + extended_by_data_model_id = None if originates_in_anchor else anchor_data_model.Id + + # First node has no association checks needed, check the rest for associations + if i > 0: + if node_type == DatamodelElementType.Entity: + await check_entity_association_strict( + session=session, + parent_entity_id=previous_id, + child_entity_id=raw_node_id, + extended_by_data_model_id=extended_by_data_model_id, + ) + else: + # Must be an attribute. Remove the negative sign and check attribute association + await check_entity_attribute_association_strict( + session=session, + entity_id=previous_id, + attribute_id=abs(raw_node_id), + extended_by_data_model_id=extended_by_data_model_id, + ) + + # this will always be a positive id except for possibly the last node + previous_id = raw_node_id + + async def create_transformation(session: AsyncSession, data: CreateTransformationDTO): + # Once the UX is in place, only keep the `is_entity_id_path_v2` code flows + is_entity_id_path_v2 = ( + True + if data.TargetAttribute + and ( + data.TargetAttribute.EntityIdPath.__contains__(",") + or data.TargetAttribute.EntityIdPath.lstrip("-").isdigit() + ) + else False + ) + logger.info(f"Creating transformation; is_entity_id_path_v2={is_entity_id_path_v2}") + # Checking if transformation group exists transformation_group = await get_transformation_group_by_id(session=session, id=data.TransformationGroupId) + source_data_model = await check_datamodel_by_id(session=session, id=transformation_group.SourceDataModelId) + target_data_model = await check_datamodel_by_id(session=session, id=transformation_group.TargetDataModelId) # Validate source attributes for attribute in data.SourceAttributes: - src_attribute = await check_attribute_by_id(session=session, id=attribute.AttributeId) - if src_attribute.DataModelId != transformation_group.SourceDataModelId: - raise HTTPException( - status_code=400, - detail="The source attribute is not under the source data model for this transformation group.", + if is_entity_id_path_v2: + await check_transformation_attribute( + session=session, anchor_data_model=source_data_model, id_path=attribute.EntityIdPath + ) + else: + src_attribute = await check_attribute_by_id(session=session, id=attribute.AttributeId) + if src_attribute.DataModelId != transformation_group.SourceDataModelId: + raise HTTPException( + status_code=400, + detail="The source attribute is not under the source data model for this transformation group.", + ) + await check_entity_by_id(session=session, id=attribute.EntityId) + await check_entity_attribute_association( + session=session, entity_id=attribute.EntityId, attribute_id=attribute.AttributeId ) - await check_entity_by_id(session=session, id=attribute.EntityId) - await check_entity_attribute_association( - session=session, entity_id=attribute.EntityId, attribute_id=attribute.AttributeId - ) # Validate target attributes - tar_attribute = await check_attribute_by_id(session=session, id=data.TargetAttribute.AttributeId) - # BS: removed check for same data model in target because an OrgLIF extends BaseLIF via inclusions and so can be mapped. - await check_entity_by_id(session=session, id=data.TargetAttribute.EntityId) - await check_entity_attribute_association( - session=session, entity_id=data.TargetAttribute.EntityId, attribute_id=data.TargetAttribute.AttributeId - ) + if is_entity_id_path_v2: + await check_transformation_attribute( + session=session, anchor_data_model=target_data_model, id_path=data.TargetAttribute.EntityIdPath + ) + else: + tar_attribute = await check_attribute_by_id(session=session, id=data.TargetAttribute.AttributeId) + # BS: removed check for same data model in target because an OrgLIF extends BaseLIF via inclusions and so can be mapped. + await check_entity_by_id(session=session, id=data.TargetAttribute.EntityId) + await check_entity_attribute_association( + session=session, entity_id=data.TargetAttribute.EntityId, attribute_id=data.TargetAttribute.AttributeId + ) # Step 1: Create the Transformation transformation = Transformation( @@ -103,8 +304,9 @@ async def create_transformation(session: AsyncSession, data: CreateTransformatio # Step 2: Create TransformationAttributes (Source and Target) source_attributes = [] for attribute in data.SourceAttributes: - # Validate entity id path - await validate_entity_id_path(session, attribute) + if not is_entity_id_path_v2: + # Validate entity id path + await validate_entity_id_path(session, attribute) source_attribute = TransformationAttribute( TransformationId=transformation.Id, @@ -122,8 +324,9 @@ async def create_transformation(session: AsyncSession, data: CreateTransformatio source_attributes.append(TransformationAttributeDTO.from_orm(source_attribute)) session.add(source_attribute) - # Validate entity id path - await validate_entity_id_path(session, data.TargetAttribute) + if not is_entity_id_path_v2: + # Validate entity id path + await validate_entity_id_path(session, data.TargetAttribute) target_attribute = TransformationAttribute( TransformationId=transformation.Id, @@ -227,6 +430,18 @@ async def get_transformation_by_id(session: AsyncSession, transformation_id: int async def update_transformation(session: AsyncSession, transformation_id: int, data: UpdateTransformationDTO) -> dict: + # Once the UX is in place, only keep the `is_entity_id_path_v2` code flows + is_entity_id_path_v2 = ( + True + if data.TargetAttribute + and ( + data.TargetAttribute.EntityIdPath.__contains__(",") + or data.TargetAttribute.EntityIdPath.lstrip("-").isdigit() + ) + else False + ) + logger.info(f"Updating transformation; is_entity_id_path_v2={is_entity_id_path_v2}") + # Validate transformation transformation = await session.get(Transformation, transformation_id) if not transformation: @@ -253,19 +468,24 @@ async def update_transformation(session: AsyncSession, transformation_id: int, d update_source_attribute_ids = [] for attr in data.SourceAttributes: # Validate source attribute - attribute = await check_attribute_by_id(session=session, id=attr.AttributeId) - if attribute.DataModelId != transformation_group.SourceDataModelId: - raise HTTPException( - status_code=400, - detail="The source attribute is not under the source data model for this transformation group.", + if is_entity_id_path_v2: + await check_transformation_attribute( + session=session, anchor_data_model=transformation_group.SourceDataModelId, id_path=attr.EntityIdPath + ) + else: + attribute = await check_attribute_by_id(session=session, id=attr.AttributeId) + if attribute.DataModelId != transformation_group.SourceDataModelId: + raise HTTPException( + status_code=400, + detail="The source attribute is not under the source data model for this transformation group.", + ) + await check_entity_by_id(session=session, id=attr.EntityId) + await check_entity_attribute_association( + session=session, entity_id=attr.EntityId, attribute_id=attr.AttributeId ) - await check_entity_by_id(session=session, id=attr.EntityId) - await check_entity_attribute_association( - session=session, entity_id=attr.EntityId, attribute_id=attr.AttributeId - ) - # Validate entity path - await validate_entity_id_path(session, attr) + # Validate entity path + await validate_entity_id_path(session, attr) update_source_attribute_ids.append(attr.AttributeId) @@ -337,19 +557,26 @@ async def update_transformation(session: AsyncSession, transformation_id: int, d if data.TargetAttribute: # Validate target attribute - tar_attribute = await check_attribute_by_id(session=session, id=data.TargetAttribute.AttributeId) - if tar_attribute.DataModelId != transformation_group.TargetDataModelId: - raise HTTPException( - status_code=400, - detail="The target attribute is not under the target data model for this transformation group.", + if is_entity_id_path_v2: + await check_transformation_attribute( + session=session, + anchor_data_model=transformation_group.TargetDataModelId, + id_path=data.TargetAttribute.EntityIdPath, + ) + else: + tar_attribute = await check_attribute_by_id(session=session, id=data.TargetAttribute.AttributeId) + if tar_attribute.DataModelId != transformation_group.TargetDataModelId: + raise HTTPException( + status_code=400, + detail="The target attribute is not under the target data model for this transformation group.", + ) + await check_entity_by_id(session=session, id=data.TargetAttribute.EntityId) + await check_entity_attribute_association( + session=session, entity_id=data.TargetAttribute.EntityId, attribute_id=data.TargetAttribute.AttributeId ) - await check_entity_by_id(session=session, id=data.TargetAttribute.EntityId) - await check_entity_attribute_association( - session=session, entity_id=data.TargetAttribute.EntityId, attribute_id=data.TargetAttribute.AttributeId - ) - # Validate entity path - await validate_entity_id_path(session, data.TargetAttribute) + # Validate entity path + await validate_entity_id_path(session, data.TargetAttribute) # Update target attribute if target_transformation_attribute: diff --git a/test/bases/lif/mdr_restapi/conftest.py b/test/bases/lif/mdr_restapi/conftest.py new file mode 100644 index 0000000..fcced16 --- /dev/null +++ b/test/bases/lif/mdr_restapi/conftest.py @@ -0,0 +1,145 @@ +import os +import subprocess +from pathlib import Path +from urllib.parse import urlparse + +import pytest +import testing.postgresql +from httpx import ASGITransport, AsyncClient +from lif.translator_restapi import core as translator_core +from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine +from sqlalchemy.orm import sessionmaker + +# +# Setup to test the end-to-end API with a real postgreSQL database +# + + +@pytest.fixture +def mdr_api_headers(): + """Standard headers for MDR API requests.""" + return {"X-API-Key": "changeme1"} + + +@pytest.fixture(scope="session") +def postgres_server(): + """Start a PostgreSQL server for testing (no Docker required). + + Requires PostgreSQL to be installed locally (e.g., `brew install postgresql` on macOS). + """ + + # Get the absolute path to the SQL file that is used to initialize the database + backup_sql_path = Path(__file__).parent.parent.parent.parent.parent / "projects/lif_mdr_database/backup.sql" + + try: + postgresql = testing.postgresql.Postgresql() + except RuntimeError as e: + pytest.skip(f"PostgreSQL not available locally: {e}") + + with postgresql: + # Initialize database with backup.sql using psql command + # (psycopg2 can't handle COPY ... FROM stdin with inline data) + parsed = urlparse(postgresql.url()) + env = os.environ.copy() + env["PGPASSWORD"] = parsed.password or "" + + result = subprocess.run( + [ + "psql", + "-h", + parsed.hostname, + "-p", + str(parsed.port), + "-U", + parsed.username, + "-d", + parsed.path.lstrip("/"), + "-f", + str(backup_sql_path), + ], + env=env, + capture_output=True, + text=True, + ) + if result.returncode != 0: + pytest.fail(f"Failed to load backup.sql: {result.stderr}") + + # Set environment variables for MDR + os.environ["POSTGRESQL_USER"] = parsed.username or "postgres" + os.environ["POSTGRESQL_PASSWORD"] = parsed.password or "" + os.environ["POSTGRESQL_HOST"] = parsed.hostname or "localhost" + os.environ["POSTGRESQL_PORT"] = str(parsed.port) + os.environ["POSTGRESQL_DB"] = parsed.path.lstrip("/") + + yield postgresql + + +@pytest.fixture(scope="function") +async def test_db_session(postgres_server): + """Create a new database session for each test.""" + # Convert psycopg2 URL to asyncpg URL format + parsed = urlparse(postgres_server.url()) + DATABASE_URL = ( + f"postgresql+asyncpg://{parsed.username}:{parsed.password or ''}@{parsed.hostname}:{parsed.port}{parsed.path}" + ) + + engine = create_async_engine(DATABASE_URL, echo=True) + async_session_maker = sessionmaker(bind=engine, class_=AsyncSession, expire_on_commit=False) + + async with async_session_maker() as session: + yield session + + await engine.dispose() + + +@pytest.fixture(scope="function") +async def async_client_mdr(test_db_session): + """Create async HTTP client for testing MDR.""" + + # Leave imports here to force database setup with + # the test container environment variables + from lif.mdr_restapi import core + from lif.mdr_utils.database_setup import get_session + + # Override MDR's get_session dependency to use the test database session + # so the event loop is the same as the test, otherwise, errors such as "got + # Future attached + # to a different loop" get thrown. + async def override_get_session(): + yield test_db_session + + core.app.dependency_overrides[get_session] = override_get_session + + async with AsyncClient(transport=ASGITransport(app=core.app), base_url="http://test") as client: + yield client + + # Clean up + core.app.dependency_overrides.clear() + + +@pytest.fixture(scope="function") +async def async_client_translator(async_client_mdr): + """Create async HTTP client for testing the Translator.""" + from lif.mdr_client import core as mdr_client_core + + # Store original function for cleanup + original_get_mdr_client = mdr_client_core._get_mdr_client + original_get_mdr_api_url = mdr_client_core._get_mdr_api_url + original_get_mdr_api_auth_token = mdr_client_core._get_mdr_api_auth_token + + # Override mdr_client's _get_mdr_client to use the test MDR app + async def override_get_mdr_client(): + yield async_client_mdr + + mdr_client_core._get_mdr_client = override_get_mdr_client + mdr_client_core._get_mdr_api_url = lambda: "http://test" + mdr_client_core._get_mdr_api_auth_token = lambda: "changeme1" + + # Create the translator client + async with AsyncClient(transport=ASGITransport(app=translator_core.app), base_url="http://test") as client: + yield client + + # Clean up - restore original function + mdr_client_core._get_mdr_client = original_get_mdr_client + mdr_client_core._get_mdr_api_url = original_get_mdr_api_url + mdr_client_core._get_mdr_api_auth_token = original_get_mdr_api_auth_token diff --git a/test/bases/lif/mdr_restapi/test_core.py b/test/bases/lif/mdr_restapi/test_core.py index c8d166f..ea9e03c 100644 --- a/test/bases/lif/mdr_restapi/test_core.py +++ b/test/bases/lif/mdr_restapi/test_core.py @@ -1,648 +1,12 @@ -import json -import os -import subprocess -from pathlib import Path -from urllib.parse import urlparse - import pytest -import testing.postgresql -from deepdiff import DeepDiff -from httpx import ASGITransport, AsyncClient -from lif.translator_restapi import core as translator_core -from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine -from sqlalchemy.orm import sessionmaker - -from test.utils.lif.mdr.api import create_data_model_by_upload, create_transformation, create_transformation_groups -from test.utils.lif.translator.api import create_translation - -HEADER_MDR_API_KEY_GRAPHQL = {"X-API-Key": "changeme1"} - - -def find_object_by_unique_name(schema_part: dict, unique_name: str) -> dict | None: - """ - Recursively search for an object with the given UniqueName. - - Returns: - The object dictionary if found, None otherwise - """ - - for _, value in schema_part.items(): - if isinstance(value, dict) and value.get("UniqueName") == unique_name: - return value - - # Recursively search - if isinstance(value, dict): - result = find_object_by_unique_name(value, unique_name) - if result: - return result - - return None - - -# -# Setup to test the end-to-end API with a real postgreSQL database -# - - -@pytest.fixture(scope="session") -def postgres_server(): - """Start a PostgreSQL server for testing (no Docker required). - - Requires PostgreSQL to be installed locally (e.g., `brew install postgresql` on macOS). - """ - - # Get the absolute path to the SQL file that is used to initialize the database - backup_sql_path = Path(__file__).parent.parent.parent.parent.parent / "projects/lif_mdr_database/backup.sql" - - try: - postgresql = testing.postgresql.Postgresql() - except RuntimeError as e: - pytest.skip(f"PostgreSQL not available locally: {e}") - - with postgresql: - # Initialize database with backup.sql using psql command - # (psycopg2 can't handle COPY ... FROM stdin with inline data) - parsed = urlparse(postgresql.url()) - env = os.environ.copy() - env["PGPASSWORD"] = parsed.password or "" - - result = subprocess.run( - [ - "psql", - "-h", parsed.hostname, - "-p", str(parsed.port), - "-U", parsed.username, - "-d", parsed.path.lstrip("/"), - "-f", str(backup_sql_path), - ], - env=env, - capture_output=True, - text=True, - ) - if result.returncode != 0: - pytest.fail(f"Failed to load backup.sql: {result.stderr}") - - # Set environment variables for MDR - os.environ["POSTGRESQL_USER"] = parsed.username or "postgres" - os.environ["POSTGRESQL_PASSWORD"] = parsed.password or "" - os.environ["POSTGRESQL_HOST"] = parsed.hostname or "localhost" - os.environ["POSTGRESQL_PORT"] = str(parsed.port) - os.environ["POSTGRESQL_DB"] = parsed.path.lstrip("/") - - yield postgresql - - -@pytest.fixture(scope="function") -async def test_db_session(postgres_server): - """Create a new database session for each test.""" - # Convert psycopg2 URL to asyncpg URL format - parsed = urlparse(postgres_server.url()) - DATABASE_URL = f"postgresql+asyncpg://{parsed.username}:{parsed.password or ''}@{parsed.hostname}:{parsed.port}{parsed.path}" - - engine = create_async_engine(DATABASE_URL, echo=True) - async_session_maker = sessionmaker(bind=engine, class_=AsyncSession, expire_on_commit=False) - - async with async_session_maker() as session: - yield session - - await engine.dispose() - - -@pytest.fixture(scope="function") -async def async_client_mdr(test_db_session): - """Create async HTTP client for testing MDR.""" - - # Leave imports here to force database setup with - # the test container environment variables - from lif.mdr_restapi import core - from lif.mdr_utils.database_setup import get_session - - # Override MDR's get_session dependency to use the test database session - # so the event loop is the same as the test, otherwise, errors such as "got - # Future attached - # to a different loop" get thrown. - async def override_get_session(): - yield test_db_session - - core.app.dependency_overrides[get_session] = override_get_session - - async with AsyncClient(transport=ASGITransport(app=core.app), base_url="http://test") as client: - yield client - - # Clean up - core.app.dependency_overrides.clear() - - -@pytest.fixture(scope="function") -async def async_client_translator(async_client_mdr): - """Create async HTTP client for testing the Translator.""" - from lif.mdr_client import core as mdr_client_core - - # Store original function for cleanup - original_get_mdr_client = mdr_client_core._get_mdr_client - original_get_mdr_api_url = mdr_client_core._get_mdr_api_url - original_get_mdr_api_auth_token = mdr_client_core._get_mdr_api_auth_token - - # Override mdr_client's _get_mdr_client to use the test MDR app - async def override_get_mdr_client(): - yield async_client_mdr - - mdr_client_core._get_mdr_client = override_get_mdr_client - mdr_client_core._get_mdr_api_url = lambda: "http://test" - mdr_client_core._get_mdr_api_auth_token = lambda: "changeme1" - - # Create the translator client - async with AsyncClient(transport=ASGITransport(app=translator_core.app), base_url="http://test") as client: - yield client - - # Clean up - restore original function - mdr_client_core._get_mdr_client = original_get_mdr_client - mdr_client_core._get_mdr_api_url = original_get_mdr_api_url - mdr_client_core._get_mdr_api_auth_token = original_get_mdr_api_auth_token - - -# -# Test cases -# @pytest.mark.asyncio -async def test_test_auth_info_graphql_api_key(async_client_mdr): - response = await async_client_mdr.get("/test/auth-info", headers=HEADER_MDR_API_KEY_GRAPHQL) +async def test_test_auth_info_graphql_api_key(async_client_mdr, mdr_api_headers): + response = await async_client_mdr.get("/test/auth-info", headers=mdr_api_headers) assert response.status_code == 200, str(response.content) assert response.json() == { "auth_type": "API key", "authenticated_as": "microservice", "service_name": "graphql-service", } - - -@pytest.mark.asyncio -async def test_create_source_schema_datamodel_without_upload_success(async_client_mdr): - # Create data model without OpenAPI schema upload - - response = await async_client_mdr.post( - "/datamodels/", - headers=HEADER_MDR_API_KEY_GRAPHQL, - json={ - "DataModelVersion": "1.0", - "State": "Draft", - "CreationDate": "2025-12-02T21:01:00Z", - "ActivationDate": "2025-12-02T21:01:00Z", - "Name": "Test Source Schema Data Model", - "Type": "SourceSchema", - "BaseDataModelId": None, - "Description": "Test Source Schema Data Model description", - "Notes": "For testing", - "UseConsiderations": "Public use", - "Tags": "test1", - "Contributor": "JSmith", - "ContributorOrganization": "Acme", - "DeprecationDate": "2040-12-12T01:02:00Z", - }, - ) - assert response.status_code == 201, str(response.content) + str(response.text) + str(response.headers) - - # Extract ID from location header - - location = response.headers.get("location") - datamodel_id = int(location.split("/")[-1]) - - # Confirm creation response - - assert response.json() == { - "ActivationDate": "2025-12-02T21:01:00Z", - "BaseDataModelId": None, - "Contributor": "JSmith", - "ContributorOrganization": "Acme", - "CreationDate": "2025-12-02T21:01:00Z", - "DataModelVersion": "1.0", - "Deleted": False, - "DeprecationDate": "2040-12-12T01:02:00Z", - "Description": "Test Source Schema Data Model description", - "Id": datamodel_id, - "Name": "Test Source Schema Data Model", - "Notes": "For testing", - "State": "Draft", - "Tags": "test1", - "Type": "SourceSchema", - "UseConsiderations": "Public use", - } - - # Download full OpenAPI schema with metadata to verify creation - - retrieve_response = await async_client_mdr.get( - f"/datamodels/open_api_schema/{datamodel_id}?download=true&include_entity_md=true&include_attr_md=true&full_export=true", - headers=HEADER_MDR_API_KEY_GRAPHQL, - ) - assert retrieve_response.status_code == 200, str(retrieve_response.text) - - retrieved_schema = retrieve_response.json() - assert retrieved_schema == { - "components": {"schemas": {}}, - "info": { - "description": "OpenAPI Spec", - "title": "Machine-Readable Schema for Test Source Schema Data Model", - "version": "1.0", - }, - "openapi": "3.0.0", - "paths": {}, - }, "Retrieved schema does not match empty schema" - - -@pytest.mark.asyncio -async def test_create_source_schema_datamodel_with_duplicate_valuesets(async_client_mdr): - """ - Create data model with OpenAPI schema upload that contains duplicate valuesets. - - Should fail the creation call. - """ - - schema_path = Path(__file__).parent / "data_model_test_duplicate_valuesets.json" - create_response = await async_client_mdr.post( - "/datamodels/open_api_schema/upload", - headers=HEADER_MDR_API_KEY_GRAPHQL, - files={"file": ("filename.json", open(schema_path, "rb"), "application/json")}, - data={ - "data_model_version": "1.0", - "state": "Draft", - "activation_date": "2025-12-02T21:01:00Z", - "data_model_name": "Test Source Schema Data Model with Duplicate ValueSets", - "data_model_type": "SourceSchema", - }, - ) - - # Confirm creation response - - assert create_response.status_code == 500, str(create_response.text) + str(create_response.headers) - assert "IntegrityError" in create_response.json()["detail"], str(create_response.text) + str( - create_response.headers - ) - - -@pytest.mark.asyncio -async def test_create_source_schema_datamodel_with_duplicate_valuesetvalues(async_client_mdr): - """ - Create data model with OpenAPI schema upload that contains duplicate valueset values. - - Should fail the creation call. - """ - - schema_path = Path(__file__).parent / "data_model_test_duplicate_valuesetvalues.json" - create_response = await async_client_mdr.post( - "/datamodels/open_api_schema/upload", - headers=HEADER_MDR_API_KEY_GRAPHQL, - files={"file": ("filename.json", open(schema_path, "rb"), "application/json")}, - data={ - "data_model_version": "1.0", - "state": "Draft", - "activation_date": "2025-12-02T21:01:00Z", - "data_model_name": "Test Source Schema Data Model with Duplicate ValueSetValues", - "data_model_type": "SourceSchema", - }, - ) - - # Confirm creation response - - assert create_response.status_code == 500, str(create_response.text) + str(create_response.headers) - assert "IntegrityError" in create_response.json()["detail"], str(create_response.text) + str( - create_response.headers - ) - - -@pytest.mark.asyncio -async def test_transforms_deep_literal_attribute(async_client_mdr, async_client_translator): - """ - Transform a 'deep' literal attribute to another deep literal attribute. - - Source and Target are source schemas. - - """ - - # Create Source Data Model and extract IDs for the entity and attribute - - (source_data_model_id, source_schema) = await create_data_model_by_upload( - async_client_mdr=async_client_mdr, - schema_path=Path(__file__).parent / "data_model_test_transforms_deep_literal_attribute_source.json", - data_model_name="test_transforms_deep_literal_attribute_source", - data_model_type="SourceSchema", - ) - source_parent_entity_id = find_object_by_unique_name(source_schema, "person.courses")["Id"] - assert source_parent_entity_id is not None, "Could not find source parent entity ID for person.courses... " + str( - source_schema - ) - source_attribute_id = find_object_by_unique_name(source_schema, "person.courses.grade")["Id"] - assert source_attribute_id is not None, "Could not find source attribute ID for person.courses.grade... " + str( - source_schema - ) - - # Create Target Data Model and extract IDs for the entity and attribute - - (target_data_model_id, target_schema) = await create_data_model_by_upload( - async_client_mdr=async_client_mdr, - schema_path=Path(__file__).parent / "data_model_test_transforms_deep_literal_attribute_target.json", - data_model_name="test_transforms_deep_literal_attribute_target", - data_model_type="SourceSchema", - ) - target_parent_entity_id = find_object_by_unique_name(target_schema, "user.skills")["Id"] - assert target_parent_entity_id is not None, "Could not find target parent entity ID for user.skills... " + str( - target_schema - ) - target_attribute_id = find_object_by_unique_name(target_schema, "user.skills.genre")["Id"] - assert target_attribute_id is not None, "Could not find target attribute ID for user.skills.genre... " + str( - target_schema - ) - - # Create transform group between source and target - - transformation_group_id = await create_transformation_groups( - async_client_mdr=async_client_mdr, - source_data_model_id=source_data_model_id, - target_data_model_id=target_data_model_id, - group_name=test_transforms_deep_literal_attribute.__name__, - ) - - # Create transform - - _ = await create_transformation( - async_client_mdr=async_client_mdr, - transformation_group_id=transformation_group_id, - source_parent_entity_id=source_parent_entity_id, - source_attribute_id=source_attribute_id, - source_entity_path="Person.Courses", - target_parent_entity_id=target_parent_entity_id, - target_attribute_id=target_attribute_id, - target_entity_path="User.Skills", - mapping_expression='{ "User": { "Skills": { "Genre": Person.Courses.Grade } } }', - transformation_name="User.Skills.Genre", - ) - - # Use the transform via the Translator endpoint - - translated_json = await create_translation( - async_client_translator=async_client_translator, - source_data_model_id=source_data_model_id, - target_data_model_id=target_data_model_id, - json_to_translate={"Person": {"Courses": {"Grade": "A", "Style": "Lecture"}}}, - headers=HEADER_MDR_API_KEY_GRAPHQL, - ) - assert translated_json == {"User": {"Skills": {"Genre": "A"}}} - - -@pytest.mark.asyncio -async def test_transforms_with_embeddings(async_client_mdr, async_client_translator): - """ - Transform source and target attributes both from their original location and their entity embedded location. - - Source and Target are source schemas. - - """ - - # Create Source Data Model and extract IDs for the entity and attribute - - (source_data_model_id, source_schema) = await create_data_model_by_upload( - async_client_mdr=async_client_mdr, - schema_path=Path(__file__).parent / "data_model_test_transforms_with_embeddings_source.json", - data_model_name="test_transforms_with_embeddings_source", - data_model_type="SourceSchema", - ) - - t1_source_parent_entity_id = find_object_by_unique_name(source_schema, "person.courses.skillsgainedfromcourses")[ - "Id" - ] - assert t1_source_parent_entity_id is not None, ( - "Could not find source parent entity ID of person.courses.skillsgainedfromcourses... " + str(source_schema) - ) - t1_source_attribute_id = find_object_by_unique_name( - source_schema, "person.courses.skillsgainedfromcourses.skilllevel" - )["Id"] - assert t1_source_attribute_id is not None, ( - "Could not find source attribute ID of person.courses.skillsgainedfromcourses.skilllevel... " - + str(source_schema) - ) - - t2_source_parent_entity_id = find_object_by_unique_name(source_schema, "person.employment.profession")["Id"] - assert t2_source_parent_entity_id is not None, ( - "Could not find source parent entity ID of person.employment.profession... " + str(source_schema) - ) - t2_source_attribute_id = find_object_by_unique_name( - source_schema, "person.employment.profession.durationatprofession" - )["Id"] - assert t2_source_attribute_id is not None, ( - "Could not find source attribute ID of person.employment.profession.durationatprofession... " - + str(source_schema) - ) - - t3_source_parent_entity_id = find_object_by_unique_name(source_schema, "person.courses.skillsgainedfromcourses")[ - "Id" - ] - assert t3_source_parent_entity_id is not None, ( - "Could not find source parent entity ID of person.courses.skillsgainedfromcourses... " + str(source_schema) - ) - t3_source_attribute_id = find_object_by_unique_name( - source_schema, "person.courses.skillsgainedfromcourses.skilllevel" - )["Id"] - assert t3_source_attribute_id is not None, ( - "Could not find source attribute ID of person.courses.skillsgainedfromcourses.skilllevel... " - + str(source_schema) - ) - - # Create Target Data Model and extract IDs for the entity and attribute - - (target_data_model_id, target_schema) = await create_data_model_by_upload( - async_client_mdr=async_client_mdr, - schema_path=Path(__file__).parent / "data_model_test_transforms_with_embeddings_target.json", - data_model_name="test_transforms_with_embeddings_target", - data_model_type="SourceSchema", - ) - t1_target_parent_entity_id = find_object_by_unique_name(target_schema, "user.abilities.skills")["Id"] - assert t1_target_parent_entity_id is not None, ( - "Could not find target parent entity ID of user.abilities.skills... " + str(target_schema) - ) - t1_target_attribute_id = find_object_by_unique_name(target_schema, "user.abilities.skills.levelofskillability")[ - "Id" - ] - assert t1_target_attribute_id is not None, ( - "Could not find target attribute ID of user.abilities.skills.levelofskillability... " + str(target_schema) - ) - - t2_target_parent_entity_id = find_object_by_unique_name(target_schema, "user.abilities.skills")["Id"] - assert t2_target_parent_entity_id is not None, ( - "Could not find target parent entity ID of user.abilities.skills... " + str(target_schema) - ) - t2_target_attribute_id = find_object_by_unique_name(target_schema, "user.abilities.skills.levelofskillability")[ - "Id" - ] - assert t2_target_attribute_id is not None, ( - "Could not find target attribute ID of user.abilities.skills.levelofskillability..." + str(target_schema) - ) - - t3_target_parent_entity_id = find_object_by_unique_name(target_schema, "user.preferences")["Id"] - assert t3_target_parent_entity_id is not None, ( - "Could not find target parent entity ID of user.preferences... " + str(target_schema) - ) - t3_target_attribute_id = find_object_by_unique_name(target_schema, "user.preferences.workpreference")["Id"] - assert t3_target_attribute_id is not None, ( - "Could not find target attribute ID of user.preferences.workpreference..." + str(target_schema) - ) - - # Create transform group between source and target - - transformation_group_id = await create_transformation_groups( - async_client_mdr=async_client_mdr, - source_data_model_id=source_data_model_id, - target_data_model_id=target_data_model_id, - group_name=test_transforms_deep_literal_attribute.__name__, - ) - - # Create transformations - - _ = await create_transformation( - async_client_mdr=async_client_mdr, - transformation_group_id=transformation_group_id, - source_parent_entity_id=t1_source_parent_entity_id, - source_attribute_id=t1_source_attribute_id, - source_entity_path="Person.Employment.SkillsGainedFromCourses", - target_parent_entity_id=t1_target_parent_entity_id, - target_attribute_id=t1_target_attribute_id, - target_entity_path="User.Workplace.Abilities.Skills", - mapping_expression='{ "User": { "Workplace": { "Abilities": { "Skills": { "LevelOfSkillAbility": Person.Employment.SkillsGainedFromCourses.SkillLevel } } } } }', - transformation_name="User.Workplace.Abilities.Skills.LevelOfSkillAbility", - ) - - _ = await create_transformation( - async_client_mdr=async_client_mdr, - transformation_group_id=transformation_group_id, - source_parent_entity_id=t2_source_parent_entity_id, - source_attribute_id=t2_source_attribute_id, - source_entity_path="Person.Employment.Profession", - target_parent_entity_id=t2_target_parent_entity_id, - target_attribute_id=t2_target_attribute_id, - target_entity_path="User.Abilities.Skills", - mapping_expression='{ "User": { "Abilities": { "Skills": { "LevelOfSkillAbility": Person.Employment.Profession.DurationAtProfession } } } }', - transformation_name="User.Abilities.Skills.LevelOfSkillAbility", - ) - - _ = await create_transformation( - async_client_mdr=async_client_mdr, - transformation_group_id=transformation_group_id, - source_parent_entity_id=t3_source_parent_entity_id, - source_attribute_id=t3_source_attribute_id, - source_entity_path="Person.Courses.SkillsGainedFromCourses", - target_parent_entity_id=t3_target_parent_entity_id, - target_attribute_id=t3_target_attribute_id, - target_entity_path="User.Preferences", - mapping_expression='{ "User": { "Preferences": { "WorkPreference": Person.Courses.SkillsGainedFromCourses.SkillLevel } } }', - transformation_name="User.Preferences.WorkPreference", - ) - - # Use the transformations via the Translator endpoint - - translated_json = await create_translation( - async_client_translator=async_client_translator, - source_data_model_id=source_data_model_id, - target_data_model_id=target_data_model_id, - json_to_translate={ - "Person": { - "Employment": { - "SkillsGainedFromCourses": {"SkillLevel": "Mastery"}, - "Profession": {"DurationAtProfession": "10 Years"}, - }, - "Courses": {"SkillsGainedFromCourses": {"SkillLevel": "Advanced"}}, - } - }, - headers=HEADER_MDR_API_KEY_GRAPHQL, - ) - assert translated_json == { - "User": { - "Workplace": {"Abilities": {"Skills": {"LevelOfSkillAbility": "Mastery"}}}, - "Abilities": {"Skills": {"LevelOfSkillAbility": "10 Years"}}, - "Preferences": {"WorkPreference": "Advanced"}, - } - } - - -@pytest.mark.asyncio -async def test_create_source_schema_datamodel_with_upload_success(async_client_mdr): - # Create data model with OpenAPI schema upload - - schema_path = Path(__file__).parent / "data_model_example_datasource_full_openapi_schema.json" - create_response = await async_client_mdr.post( - "/datamodels/open_api_schema/upload", - headers=HEADER_MDR_API_KEY_GRAPHQL, - files={"file": ("filename.json", open(schema_path, "rb"), "application/json")}, - data={ - "data_model_version": "1.0", - "state": "Draft", - "activation_date": "2025-12-02T21:01:00Z", - "data_model_name": "Test Source Schema Data Model with Upload", - "data_model_type": "SourceSchema", - }, - ) - - # Confirm creation response - - assert create_response.status_code == 201, str(create_response.text) + str(create_response.headers) - # Location header is not populated for this endpoint, so extract ID from response body - data_model_id = create_response.json()["Id"] - assert data_model_id is not None - assert isinstance(data_model_id, int) - - assert create_response.json() == { - "ActivationDate": "2025-12-02T21:01:00Z", - "BaseDataModelId": None, - "Contributor": None, - "ContributorOrganization": None, - "CreationDate": None, - "DataModelVersion": "1.0", - "Deleted": False, - "DeprecationDate": None, - "Description": None, - "Id": data_model_id, - "Name": "Test Source Schema Data Model with Upload", - "Notes": None, - "State": "Draft", - "Tags": None, - "Type": "SourceSchema", - "UseConsiderations": None, - } - - # Download full OpenAPI schema with metadata to verify upload - - retrieve_response = await async_client_mdr.get( - f"/datamodels/open_api_schema/{data_model_id}?download=true&include_entity_md=true&include_attr_md=true&full_export=true", - headers=HEADER_MDR_API_KEY_GRAPHQL, - ) - assert retrieve_response.status_code == 200, str(retrieve_response.text) - - retrieved_schema = retrieve_response.json() - with open(schema_path, "r") as f: - original_schema = json.load(f) - original_schema["info"]["title"] = "Machine-Readable Schema for Test Source Schema Data Model with Upload" - - diff = DeepDiff( - original_schema, - retrieved_schema, - ignore_order=True, - exclude_paths=[ - "root['components']['schemas']['person']['Id']", - "root['components']['schemas']['person']['DataModelId']", - "root['components']['schemas']['person']['properties']['id']['Id']", - "root['components']['schemas']['person']['properties']['id']['DataModelId']", - "root['components']['schemas']['person']['properties']['id']['EntityAttributeAssociationId']", - "root['components']['schemas']['person']['properties']['id']['EntityId']", - "root['components']['schemas']['person']['properties']['employment']['DataModelId']", - "root['components']['schemas']['person']['properties']['employment']['Id']", - "root['components']['schemas']['person']['properties']['employment']['EntityAssociationId']", - "root['components']['schemas']['person']['properties']['employment']['EntityAssociationParentEntityId']", - "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['DataModelId']", - "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['Id']", - "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['EntityAssociationId']", - "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['EntityAssociationParentEntityId']", - "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['properties']['preferred_org_types']['Id']", - "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['properties']['preferred_org_types']['DataModelId']", - "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['properties']['preferred_org_types']['EntityAttributeAssociationId']", - "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['properties']['preferred_org_types']['EntityId']", - ], - ) - assert not diff, f"Retrieved schema does not match original: {diff}" diff --git a/test/bases/lif/mdr_restapi/test_datamodel_endpoint.py b/test/bases/lif/mdr_restapi/test_datamodel_endpoint.py new file mode 100644 index 0000000..d0050fd --- /dev/null +++ b/test/bases/lif/mdr_restapi/test_datamodel_endpoint.py @@ -0,0 +1,224 @@ +import json +from pathlib import Path + +import pytest +from deepdiff import DeepDiff + + +@pytest.mark.asyncio +async def test_create_source_schema_datamodel_without_upload_success(async_client_mdr, mdr_api_headers): + # Create data model without OpenAPI schema upload + + response = await async_client_mdr.post( + "/datamodels/", + headers=mdr_api_headers, + json={ + "DataModelVersion": "1.0", + "State": "Draft", + "CreationDate": "2025-12-02T21:01:00Z", + "ActivationDate": "2025-12-02T21:01:00Z", + "Name": "Test Source Schema Data Model", + "Type": "SourceSchema", + "BaseDataModelId": None, + "Description": "Test Source Schema Data Model description", + "Notes": "For testing", + "UseConsiderations": "Public use", + "Tags": "test1", + "Contributor": "JSmith", + "ContributorOrganization": "Acme", + "DeprecationDate": "2040-12-12T01:02:00Z", + }, + ) + assert response.status_code == 201, str(response.content) + str(response.text) + str(response.headers) + + # Extract ID from location header + + location = response.headers.get("location") + datamodel_id = int(location.split("/")[-1]) + + # Confirm creation response + + assert response.json() == { + "ActivationDate": "2025-12-02T21:01:00Z", + "BaseDataModelId": None, + "Contributor": "JSmith", + "ContributorOrganization": "Acme", + "CreationDate": "2025-12-02T21:01:00Z", + "DataModelVersion": "1.0", + "Deleted": False, + "DeprecationDate": "2040-12-12T01:02:00Z", + "Description": "Test Source Schema Data Model description", + "Id": datamodel_id, + "Name": "Test Source Schema Data Model", + "Notes": "For testing", + "State": "Draft", + "Tags": "test1", + "Type": "SourceSchema", + "UseConsiderations": "Public use", + } + + # Download full OpenAPI schema with metadata to verify creation + + retrieve_response = await async_client_mdr.get( + f"/datamodels/open_api_schema/{datamodel_id}?download=true&include_entity_md=true&include_attr_md=true&full_export=true", + headers=mdr_api_headers, + ) + assert retrieve_response.status_code == 200, str(retrieve_response.text) + + retrieved_schema = retrieve_response.json() + assert retrieved_schema == { + "components": {"schemas": {}}, + "info": { + "description": "OpenAPI Spec", + "title": "Machine-Readable Schema for Test Source Schema Data Model", + "version": "1.0", + }, + "openapi": "3.0.0", + "paths": {}, + }, "Retrieved schema does not match empty schema" + + +@pytest.mark.asyncio +async def test_create_source_schema_datamodel_with_duplicate_valuesets(async_client_mdr, mdr_api_headers): + """ + Create data model with OpenAPI schema upload that contains duplicate valuesets. + + Should fail the creation call. + """ + + schema_path = Path(__file__).parent / "data_model_test_duplicate_valuesets.json" + create_response = await async_client_mdr.post( + "/datamodels/open_api_schema/upload", + headers=mdr_api_headers, + files={"file": ("filename.json", open(schema_path, "rb"), "application/json")}, + data={ + "data_model_version": "1.0", + "state": "Draft", + "activation_date": "2025-12-02T21:01:00Z", + "data_model_name": "Test Source Schema Data Model with Duplicate ValueSets", + "data_model_type": "SourceSchema", + }, + ) + + # Confirm creation response + + assert create_response.status_code == 500, str(create_response.text) + str(create_response.headers) + assert "IntegrityError" in create_response.json()["detail"], str(create_response.text) + str( + create_response.headers + ) + + +@pytest.mark.asyncio +async def test_create_source_schema_datamodel_with_duplicate_valuesetvalues(async_client_mdr, mdr_api_headers): + """ + Create data model with OpenAPI schema upload that contains duplicate valueset values. + + Should fail the creation call. + """ + + schema_path = Path(__file__).parent / "data_model_test_duplicate_valuesetvalues.json" + create_response = await async_client_mdr.post( + "/datamodels/open_api_schema/upload", + headers=mdr_api_headers, + files={"file": ("filename.json", open(schema_path, "rb"), "application/json")}, + data={ + "data_model_version": "1.0", + "state": "Draft", + "activation_date": "2025-12-02T21:01:00Z", + "data_model_name": "Test Source Schema Data Model with Duplicate ValueSetValues", + "data_model_type": "SourceSchema", + }, + ) + + # Confirm creation response + + assert create_response.status_code == 500, str(create_response.text) + str(create_response.headers) + assert "IntegrityError" in create_response.json()["detail"], str(create_response.text) + str( + create_response.headers + ) + + +@pytest.mark.asyncio +async def test_create_source_schema_datamodel_with_upload_success(async_client_mdr, mdr_api_headers): + # Create data model with OpenAPI schema upload + + schema_path = Path(__file__).parent / "data_model_example_datasource_full_openapi_schema.json" + create_response = await async_client_mdr.post( + "/datamodels/open_api_schema/upload", + headers=mdr_api_headers, + files={"file": ("filename.json", open(schema_path, "rb"), "application/json")}, + data={ + "data_model_version": "1.0", + "state": "Draft", + "activation_date": "2025-12-02T21:01:00Z", + "data_model_name": "Test Source Schema Data Model with Upload", + "data_model_type": "SourceSchema", + }, + ) + + # Confirm creation response + + assert create_response.status_code == 201, str(create_response.text) + str(create_response.headers) + # Location header is not populated for this endpoint, so extract ID from response body + data_model_id = create_response.json()["Id"] + assert data_model_id is not None + assert isinstance(data_model_id, int) + + assert create_response.json() == { + "ActivationDate": "2025-12-02T21:01:00Z", + "BaseDataModelId": None, + "Contributor": None, + "ContributorOrganization": None, + "CreationDate": None, + "DataModelVersion": "1.0", + "Deleted": False, + "DeprecationDate": None, + "Description": None, + "Id": data_model_id, + "Name": "Test Source Schema Data Model with Upload", + "Notes": None, + "State": "Draft", + "Tags": None, + "Type": "SourceSchema", + "UseConsiderations": None, + } + + # Download full OpenAPI schema with metadata to verify upload + + retrieve_response = await async_client_mdr.get( + f"/datamodels/open_api_schema/{data_model_id}?download=true&include_entity_md=true&include_attr_md=true&full_export=true", + headers=mdr_api_headers, + ) + assert retrieve_response.status_code == 200, str(retrieve_response.text) + + retrieved_schema = retrieve_response.json() + with open(schema_path, "r") as f: + original_schema = json.load(f) + original_schema["info"]["title"] = "Machine-Readable Schema for Test Source Schema Data Model with Upload" + + diff = DeepDiff( + original_schema, + retrieved_schema, + ignore_order=True, + exclude_paths=[ + "root['components']['schemas']['person']['Id']", + "root['components']['schemas']['person']['DataModelId']", + "root['components']['schemas']['person']['properties']['id']['Id']", + "root['components']['schemas']['person']['properties']['id']['DataModelId']", + "root['components']['schemas']['person']['properties']['id']['EntityAttributeAssociationId']", + "root['components']['schemas']['person']['properties']['id']['EntityId']", + "root['components']['schemas']['person']['properties']['employment']['DataModelId']", + "root['components']['schemas']['person']['properties']['employment']['Id']", + "root['components']['schemas']['person']['properties']['employment']['EntityAssociationId']", + "root['components']['schemas']['person']['properties']['employment']['EntityAssociationParentEntityId']", + "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['DataModelId']", + "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['Id']", + "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['EntityAssociationId']", + "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['EntityAssociationParentEntityId']", + "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['properties']['preferred_org_types']['Id']", + "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['properties']['preferred_org_types']['DataModelId']", + "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['properties']['preferred_org_types']['EntityAttributeAssociationId']", + "root['components']['schemas']['person']['properties']['employment']['properties']['preferences']['properties']['preferred_org_types']['EntityId']", + ], + ) + assert not diff, f"Retrieved schema does not match original: {diff}" diff --git a/test/bases/lif/mdr_restapi/test_transformation_endpoint.py b/test/bases/lif/mdr_restapi/test_transformation_endpoint.py new file mode 100644 index 0000000..4f0c3e5 --- /dev/null +++ b/test/bases/lif/mdr_restapi/test_transformation_endpoint.py @@ -0,0 +1,480 @@ +import inspect + +import pytest + +from test.utils.lif.datasets.transform_deep_literal_attribute.loader import DatasetTransformDeepLiteralAttribute +from test.utils.lif.datasets.transform_with_embeddings.loader import DatasetTransformWithEmbeddings +from test.utils.lif.mdr.api import convert_unique_names_to_id_path, create_transformation, update_transformation +from test.utils.lif.translator.api import create_translation + + +# Old tests use the name.dot.name format for entityIdPath and once that API logic is +# removed, can be deleted. +@pytest.mark.asyncio +async def test_transforms_deep_literal_attribute_old_api_format( + async_client_mdr, async_client_translator, mdr_api_headers +): + """ + Transform a 'deep' literal attribute to another deep literal attribute using the old API format for entityIdPath. + + Source and Target are source schemas. + + """ + + test_case_name = inspect.currentframe().f_code.co_name + + # General setup for dataset deep_literal_attribute + + dataset_transform_deep_literal_attribute = await DatasetTransformDeepLiteralAttribute.prepare( + async_client_mdr=async_client_mdr, + source_data_model_name=f"{test_case_name}_source", + target_data_model_name=f"{test_case_name}_target", + transformation_group_name=f"{test_case_name}_transform_group", + ) + + # Create transform - Old API format for entityIdPath + + _ = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_deep_literal_attribute.transformation_group_id, + source_parent_entity_id=dataset_transform_deep_literal_attribute.source_parent_entity_id, + source_attribute_id=dataset_transform_deep_literal_attribute.source_attribute_id, + source_entity_path="Person.Courses", + target_parent_entity_id=dataset_transform_deep_literal_attribute.target_parent_entity_id, + target_attribute_id=dataset_transform_deep_literal_attribute.target_attribute_id, + target_entity_path="User.Skills", + mapping_expression='{ "User": { "Skills": { "Genre": Person.Courses.Grade } } }', + transformation_name="User.Skills.Genre", + ) + + # Use the transform via the Translator endpoint + + translated_json = await create_translation( + async_client_translator=async_client_translator, + source_data_model_id=dataset_transform_deep_literal_attribute.source_data_model_id, + target_data_model_id=dataset_transform_deep_literal_attribute.target_data_model_id, + json_to_translate={"Person": {"Courses": {"Grade": "A", "Style": "Lecture"}}}, + headers=mdr_api_headers, + ) + assert translated_json == {"User": {"Skills": {"Genre": "A"}}} + + +@pytest.mark.asyncio +async def test_transforms_deep_literal_attribute(async_client_mdr, async_client_translator, mdr_api_headers): + """ + Transform a 'deep' literal attribute to another deep literal attribute. + + Source and Target are source schemas. + + """ + + test_case_name = inspect.currentframe().f_code.co_name + + # General setup for dataset deep_literal_attribute + + dataset_transform_deep_literal_attribute = await DatasetTransformDeepLiteralAttribute.prepare( + async_client_mdr=async_client_mdr, + source_data_model_name=f"{test_case_name}_source", + target_data_model_name=f"{test_case_name}_target", + transformation_group_name=f"{test_case_name}_transform_group", + ) + + # Create transform + + _ = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_deep_literal_attribute.transformation_group_id, + source_parent_entity_id=None, + source_attribute_id=dataset_transform_deep_literal_attribute.source_attribute_id, + source_entity_path=convert_unique_names_to_id_path( + dataset_transform_deep_literal_attribute.source_schema, + ["person", "person.courses", "person.courses.grade"], + True, + ), + target_parent_entity_id=None, + target_attribute_id=dataset_transform_deep_literal_attribute.target_attribute_id, + target_entity_path=convert_unique_names_to_id_path( + dataset_transform_deep_literal_attribute.target_schema, ["user", "user.skills", "user.skills.genre"], True + ), + mapping_expression='{ "User": { "Skills": { "Genre": Person.Courses.Grade } } }', + transformation_name="User.Skills.Genre", + ) + + # Use the transform via the Translator endpoint + + translated_json = await create_translation( + async_client_translator=async_client_translator, + source_data_model_id=dataset_transform_deep_literal_attribute.source_data_model_id, + target_data_model_id=dataset_transform_deep_literal_attribute.target_data_model_id, + json_to_translate={"Person": {"Courses": {"Grade": "A", "Style": "Lecture"}}}, + headers=mdr_api_headers, + ) + assert translated_json == {"User": {"Skills": {"Genre": "A"}}} + + +@pytest.mark.asyncio +async def test_transforms_into_target_entity(async_client_mdr, async_client_translator, mdr_api_headers): + """ + Transform a 'deep' literal attribute into a target entity. + + Source and Target are source schemas. + + """ + + test_case_name = inspect.currentframe().f_code.co_name + + # General setup for dataset deep_literal_attribute + + dataset_transform_deep_literal_attribute = await DatasetTransformDeepLiteralAttribute.prepare( + async_client_mdr=async_client_mdr, + source_data_model_name=f"{test_case_name}_source", + target_data_model_name=f"{test_case_name}_target", + transformation_group_name=f"{test_case_name}_transform_group", + ) + + # Create transform + + _ = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_deep_literal_attribute.transformation_group_id, + source_parent_entity_id=None, + source_attribute_id=dataset_transform_deep_literal_attribute.source_attribute_id, + source_entity_path=convert_unique_names_to_id_path( + dataset_transform_deep_literal_attribute.source_schema, + ["person", "person.courses", "person.courses.grade"], + True, + ), + target_parent_entity_id=None, + target_attribute_id=dataset_transform_deep_literal_attribute.target_attribute_id, + target_entity_path=convert_unique_names_to_id_path( + dataset_transform_deep_literal_attribute.target_schema, ["user"], False + ), + mapping_expression='{ "User": Person.Courses.Grade }', + transformation_name="User.Skills.Genre", + ) + + # Use the transform via the Translator endpoint + + translated_json = await create_translation( + async_client_translator=async_client_translator, + source_data_model_id=dataset_transform_deep_literal_attribute.source_data_model_id, + target_data_model_id=dataset_transform_deep_literal_attribute.target_data_model_id, + json_to_translate={"Person": {"Courses": {"Grade": "A", "Style": "Lecture"}}}, + headers=mdr_api_headers, + ) + assert translated_json == {"User": "A"} + + +@pytest.mark.asyncio +async def test_create_transform_fail_empty_source_attribute_path(async_client_mdr, async_client_translator): + """ + Confirms an empty source attribute path is rejected. + + Source and Target are source schemas. + + """ + + test_case_name = inspect.currentframe().f_code.co_name + + # General setup for dataset deep_literal_attribute + + dataset_transform_deep_literal_attribute = await DatasetTransformDeepLiteralAttribute.prepare( + async_client_mdr=async_client_mdr, + source_data_model_name=f"{test_case_name}_source", + target_data_model_name=f"{test_case_name}_target", + transformation_group_name=f"{test_case_name}_transform_group", + ) + + # Create transform + + _ = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_deep_literal_attribute.transformation_group_id, + source_parent_entity_id=dataset_transform_deep_literal_attribute.source_parent_entity_id, + source_attribute_id=dataset_transform_deep_literal_attribute.source_attribute_id, + source_entity_path="", # This is the point of the test! + target_parent_entity_id=dataset_transform_deep_literal_attribute.target_parent_entity_id, + target_attribute_id=dataset_transform_deep_literal_attribute.target_attribute_id, + target_entity_path="0,0", # Doesn't matter for this test + mapping_expression='{ "User": { "Skills": { "Genre": Person.Courses.Grade } } }', + transformation_name="User.Skills.Genre", + expected_status_code=400, + expected_response={"detail": "Invalid EntityIdPath format. The path must not be empty."}, + ) + + +@pytest.mark.asyncio +async def test_create_transform_fail_non_numeric_source_attribute_path_entry(async_client_mdr, async_client_translator): + """ + Confirms only numeric IDs in the source attribute path are allowed. + + Source and Target are source schemas. + + """ + + test_case_name = inspect.currentframe().f_code.co_name + + # General setup for dataset deep_literal_attribute (source sourceSchema, target sourceSchema, transform group, and relevant IDs) + + dataset_transform_deep_literal_attribute = await DatasetTransformDeepLiteralAttribute.prepare( + async_client_mdr=async_client_mdr, + source_data_model_name=f"{test_case_name}_source", + target_data_model_name=f"{test_case_name}_target", + transformation_group_name=f"{test_case_name}_transform_group", + ) + + # Create transform + + _ = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_deep_literal_attribute.transformation_group_id, + source_parent_entity_id=dataset_transform_deep_literal_attribute.source_parent_entity_id, + source_attribute_id=dataset_transform_deep_literal_attribute.source_attribute_id, + source_entity_path="a,b", # This is the point of the test! + target_parent_entity_id=dataset_transform_deep_literal_attribute.target_parent_entity_id, + target_attribute_id=dataset_transform_deep_literal_attribute.target_attribute_id, + target_entity_path="0,0", # Doesn't matter for this test + mapping_expression='{ "User": { "Skills": { "Genre": Person.Courses.Grade } } }', + transformation_name="User.Skills.Genre", + expected_status_code=400, + expected_response={ + "detail": "Invalid EntityIdPath format. IDs must be in the format 'id1,id2,...,idN' and all IDs must be integers." + }, + ) + + +# Old tests use the name.dot.name format for entityIdPath and once that API logic is +# removed, can be deleted. +@pytest.mark.asyncio +async def test_transforms_with_embeddings_old_api_format(async_client_mdr, async_client_translator, mdr_api_headers): + """ + Transform source and target attributes both from their original location and their entity embedded location. + + Source and Target are source schemas. + + """ + + test_case_name = inspect.currentframe().f_code.co_name + + dataset_transform_with_embeddings = await DatasetTransformWithEmbeddings.prepare( + async_client_mdr=async_client_mdr, + source_data_model_name=f"{test_case_name}_source", + target_data_model_name=f"{test_case_name}_target", + transformation_group_name=f"{test_case_name}_transform_group", + ) + + # Create transformations + + _ = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_with_embeddings.transformation_group_id, + source_parent_entity_id=dataset_transform_with_embeddings.flow1_source_parent_entity_id, + source_attribute_id=dataset_transform_with_embeddings.flow1_source_attribute_id, + source_entity_path="Person.Employment.SkillsGainedFromCourses", + target_parent_entity_id=dataset_transform_with_embeddings.flow1_target_parent_entity_id, + target_attribute_id=dataset_transform_with_embeddings.flow1_target_attribute_id, + target_entity_path="User.Workplace.Abilities.Skills", + mapping_expression='{ "User": { "Workplace": { "Abilities": { "Skills": { "LevelOfSkillAbility": Person.Employment.SkillsGainedFromCourses.SkillLevel } } } } }', + transformation_name="User.Workplace.Abilities.Skills.LevelOfSkillAbility", + ) + + _ = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_with_embeddings.transformation_group_id, + source_parent_entity_id=dataset_transform_with_embeddings.flow2_source_parent_entity_id, + source_attribute_id=dataset_transform_with_embeddings.flow2_source_attribute_id, + source_entity_path="Person.Employment.Profession", + target_parent_entity_id=dataset_transform_with_embeddings.flow2_target_parent_entity_id, + target_attribute_id=dataset_transform_with_embeddings.flow2_target_attribute_id, + target_entity_path="User.Abilities.Skills", + mapping_expression='{ "User": { "Abilities": { "Skills": { "LevelOfSkillAbility": Person.Employment.Profession.DurationAtProfession } } } }', + transformation_name="User.Abilities.Skills.LevelOfSkillAbility", + ) + + _ = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_with_embeddings.transformation_group_id, + source_parent_entity_id=dataset_transform_with_embeddings.flow3_source_parent_entity_id, + source_attribute_id=dataset_transform_with_embeddings.flow3_source_attribute_id, + source_entity_path="Person.Courses.SkillsGainedFromCourses", + target_parent_entity_id=dataset_transform_with_embeddings.flow3_target_parent_entity_id, + target_attribute_id=dataset_transform_with_embeddings.flow3_target_attribute_id, + target_entity_path="User.Preferences", + mapping_expression='{ "User": { "Preferences": { "WorkPreference": Person.Courses.SkillsGainedFromCourses.SkillLevel } } }', + transformation_name="User.Preferences.WorkPreference", + ) + + # Use the transformations via the Translator endpoint + + translated_json = await create_translation( + async_client_translator=async_client_translator, + source_data_model_id=dataset_transform_with_embeddings.source_data_model_id, + target_data_model_id=dataset_transform_with_embeddings.target_data_model_id, + json_to_translate={ + "Person": { + "Employment": { + "SkillsGainedFromCourses": {"SkillLevel": "Mastery"}, + "Profession": {"DurationAtProfession": "10 Years"}, + }, + "Courses": {"SkillsGainedFromCourses": {"SkillLevel": "Advanced"}}, + } + }, + headers=mdr_api_headers, + ) + assert translated_json == { + "User": { + "Workplace": {"Abilities": {"Skills": {"LevelOfSkillAbility": "Mastery"}}}, + "Abilities": {"Skills": {"LevelOfSkillAbility": "10 Years"}}, + "Preferences": {"WorkPreference": "Advanced"}, + } + } + + +@pytest.mark.asyncio +async def test_transforms_with_embeddings(async_client_mdr, async_client_translator, mdr_api_headers): + """ + Transform source and target attributes both from their original location and their entity embedded location. + + Source and Target are source schemas. + + """ + + test_case_name = inspect.currentframe().f_code.co_name + + dataset_transform_with_embeddings = await DatasetTransformWithEmbeddings.prepare( + async_client_mdr=async_client_mdr, + source_data_model_name=f"{test_case_name}_source", + target_data_model_name=f"{test_case_name}_target", + transformation_group_name=f"{test_case_name}_transform_group", + ) + + # Create transformations + + _ = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_with_embeddings.transformation_group_id, + source_parent_entity_id=None, + source_attribute_id=dataset_transform_with_embeddings.flow1_source_attribute_id, + source_entity_path=dataset_transform_with_embeddings.flow1_source_entity_id_path, + target_parent_entity_id=None, + target_attribute_id=dataset_transform_with_embeddings.flow1_target_attribute_id, + target_entity_path=dataset_transform_with_embeddings.flow1_target_entity_id_path, + mapping_expression='{ "User": { "Workplace": { "Abilities": { "Skills": { "LevelOfSkillAbility": Person.Employment.SkillsGainedFromCourses.SkillLevel } } } } }', + transformation_name="User.Workplace.Abilities.Skills.LevelOfSkillAbility", + ) + + _ = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_with_embeddings.transformation_group_id, + source_parent_entity_id=None, + source_attribute_id=dataset_transform_with_embeddings.flow2_source_attribute_id, + source_entity_path=dataset_transform_with_embeddings.flow2_source_entity_id_path, + target_parent_entity_id=None, + target_attribute_id=dataset_transform_with_embeddings.flow2_target_attribute_id, + target_entity_path=dataset_transform_with_embeddings.flow2_target_entity_id_path, + mapping_expression='{ "User": { "Abilities": { "Skills": { "LevelOfSkillAbility": Person.Employment.Profession.DurationAtProfession } } } }', + transformation_name="User.Abilities.Skills.LevelOfSkillAbility", + ) + + _ = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_with_embeddings.transformation_group_id, + source_parent_entity_id=None, + source_attribute_id=dataset_transform_with_embeddings.flow3_source_attribute_id, + source_entity_path=dataset_transform_with_embeddings.flow3_source_entity_id_path, + target_parent_entity_id=None, + target_attribute_id=dataset_transform_with_embeddings.flow3_target_attribute_id, + target_entity_path=dataset_transform_with_embeddings.flow3_target_entity_id_path, + mapping_expression='{ "User": { "Preferences": { "WorkPreference": Person.Courses.SkillsGainedFromCourses.SkillLevel } } }', + transformation_name="User.Preferences.WorkPreference", + ) + + # Use the transformations via the Translator endpoint + + translated_json = await create_translation( + async_client_translator=async_client_translator, + source_data_model_id=dataset_transform_with_embeddings.source_data_model_id, + target_data_model_id=dataset_transform_with_embeddings.target_data_model_id, + json_to_translate={ + "Person": { + "Employment": { + "SkillsGainedFromCourses": {"SkillLevel": "Mastery"}, + "Profession": {"DurationAtProfession": "10 Years"}, + }, + "Courses": {"SkillsGainedFromCourses": {"SkillLevel": "Advanced"}}, + } + }, + headers=mdr_api_headers, + ) + assert translated_json == { + "User": { + "Workplace": {"Abilities": {"Skills": {"LevelOfSkillAbility": "Mastery"}}}, + "Abilities": {"Skills": {"LevelOfSkillAbility": "10 Years"}}, + "Preferences": {"WorkPreference": "Advanced"}, + } + } + + +@pytest.mark.asyncio +async def test_update_transform_only_expression(async_client_mdr, async_client_translator, mdr_api_headers): + """ + Confirms a transformation update can occur for just the expression. + + Source and Target are source schemas. + + """ + + test_case_name = inspect.currentframe().f_code.co_name + + # General setup for dataset deep_literal_attribute (source sourceSchema, target sourceSchema, transform group, and relevant IDs) + + dataset_transform_deep_literal_attribute = await DatasetTransformDeepLiteralAttribute.prepare( + async_client_mdr=async_client_mdr, + source_data_model_name=f"{test_case_name}_source", + target_data_model_name=f"{test_case_name}_target", + transformation_group_name=f"{test_case_name}_transform_group", + ) + + # Create transform + + transformation = await create_transformation( + async_client_mdr=async_client_mdr, + transformation_group_id=dataset_transform_deep_literal_attribute.transformation_group_id, + source_parent_entity_id=dataset_transform_deep_literal_attribute.source_parent_entity_id, + source_attribute_id=dataset_transform_deep_literal_attribute.source_attribute_id, + source_entity_path=dataset_transform_deep_literal_attribute.source_entity_id_path, + target_parent_entity_id=dataset_transform_deep_literal_attribute.target_parent_entity_id, + target_attribute_id=dataset_transform_deep_literal_attribute.target_attribute_id, + target_entity_path=dataset_transform_deep_literal_attribute.target_entity_id_path, + mapping_expression='{ "User": { "Skills": { "Genre": Person.Courses.Grade } } }', + transformation_name="User.Skills.Genre", + ) + + # Use the transform via the Translator endpoint to prove original translation + + json_to_translate = {"Person": {"Courses": {"Grade": "K"}}} + translated_json = await create_translation( + async_client_translator=async_client_translator, + source_data_model_id=dataset_transform_deep_literal_attribute.source_data_model_id, + target_data_model_id=dataset_transform_deep_literal_attribute.target_data_model_id, + json_to_translate=json_to_translate, + headers=mdr_api_headers, + ) + assert translated_json == {"User": {"Skills": {"Genre": "K"}}} + + _ = await update_transformation( + async_client_mdr=async_client_mdr, + original_transformation=transformation, + expression='{ "User": { "Skills": { "Genre": Person.Courses } } }', + ) + + # Use the transform via the Translator endpoint to prove the updated expression + + translated_json = await create_translation( + async_client_translator=async_client_translator, + source_data_model_id=dataset_transform_deep_literal_attribute.source_data_model_id, + target_data_model_id=dataset_transform_deep_literal_attribute.target_data_model_id, + json_to_translate=json_to_translate, + headers=mdr_api_headers, + ) + assert translated_json == {"User": {"Skills": {"Genre": {"Grade": "K"}}}} diff --git a/test/utils/lif/datasets/__init__.py b/test/utils/lif/datasets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/utils/lif/datasets/transform_deep_literal_attribute/__init__.py b/test/utils/lif/datasets/transform_deep_literal_attribute/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/utils/lif/datasets/transform_deep_literal_attribute/loader.py b/test/utils/lif/datasets/transform_deep_literal_attribute/loader.py new file mode 100644 index 0000000..1ac0e25 --- /dev/null +++ b/test/utils/lif/datasets/transform_deep_literal_attribute/loader.py @@ -0,0 +1,98 @@ +from dataclasses import dataclass +from pathlib import Path + +from httpx import AsyncClient + +from test.utils.lif.mdr.api import ( + convert_unique_names_to_id_path, + create_data_model_by_upload, + create_transformation_groups, + find_object_property_by_unique_name, +) + + +@dataclass +class DatasetTransformDeepLiteralAttribute: + source_data_model_id: str + source_parent_entity_id: str + source_attribute_id: str + source_entity_id_path: str + source_schema: dict + target_data_model_id: str + target_parent_entity_id: str + target_attribute_id: str + target_entity_id_path: str + target_schema: dict + transformation_group_id: str + + @classmethod + async def prepare( + cls, + async_client_mdr: AsyncClient, + source_data_model_name: str, + target_data_model_name: str, + transformation_group_name: str, + ) -> "DatasetTransformDeepLiteralAttribute": + """Prepare the dataset by creating source/target data models and transformation group.""" + + # Create Source Data Model and extract IDs for the entity and attribute + + (source_data_model_id, source_schema) = await create_data_model_by_upload( + async_client_mdr=async_client_mdr, + schema_path=Path(__file__).parent / "transform_deep_literal_attribute_source.json", + data_model_name=source_data_model_name, + data_model_type="SourceSchema", + ) + source_parent_entity_id = find_object_property_by_unique_name(source_schema, "person.courses", "Id") + assert source_parent_entity_id is not None, ( + "Could not find source parent entity ID for person.courses... " + str(source_schema) + ) + source_attribute_id = find_object_property_by_unique_name(source_schema, "person.courses.grade", "Id") + assert source_attribute_id is not None, "Could not find source attribute ID for person.courses.grade... " + str( + source_schema + ) + source_entity_id_path = convert_unique_names_to_id_path( + source_schema, ["person", "person.courses", "person.courses.grade"], True + ) + + # Create Target Data Model and extract IDs for the entity and attribute + + (target_data_model_id, target_schema) = await create_data_model_by_upload( + async_client_mdr=async_client_mdr, + schema_path=Path(__file__).parent / "transform_deep_literal_attribute_target.json", + data_model_name=target_data_model_name, + data_model_type="SourceSchema", + ) + target_parent_entity_id = find_object_property_by_unique_name(target_schema, "user.skills", "Id") + assert target_parent_entity_id is not None, "Could not find target parent entity ID for user.skills... " + str( + target_schema + ) + target_attribute_id = find_object_property_by_unique_name(target_schema, "user.skills.genre", "Id") + assert target_attribute_id is not None, "Could not find target attribute ID for user.skills.genre... " + str( + target_schema + ) + target_entity_id_path = convert_unique_names_to_id_path( + target_schema, ["user", "user.skills", "user.skills.genre"], True + ) + + # Create transform group between source and target + + transformation_group_id = await create_transformation_groups( + async_client_mdr=async_client_mdr, + source_data_model_id=source_data_model_id, + target_data_model_id=target_data_model_id, + group_name=transformation_group_name, + ) + return cls( + source_data_model_id=source_data_model_id, + source_parent_entity_id=source_parent_entity_id, + source_attribute_id=source_attribute_id, + source_entity_id_path=source_entity_id_path, + source_schema=source_schema, + target_data_model_id=target_data_model_id, + target_parent_entity_id=target_parent_entity_id, + target_attribute_id=target_attribute_id, + target_entity_id_path=target_entity_id_path, + target_schema=target_schema, + transformation_group_id=transformation_group_id, + ) diff --git a/test/bases/lif/mdr_restapi/data_model_test_transforms_deep_literal_attribute_source.json b/test/utils/lif/datasets/transform_deep_literal_attribute/transform_deep_literal_attribute_source.json similarity index 100% rename from test/bases/lif/mdr_restapi/data_model_test_transforms_deep_literal_attribute_source.json rename to test/utils/lif/datasets/transform_deep_literal_attribute/transform_deep_literal_attribute_source.json diff --git a/test/bases/lif/mdr_restapi/data_model_test_transforms_deep_literal_attribute_target.json b/test/utils/lif/datasets/transform_deep_literal_attribute/transform_deep_literal_attribute_target.json similarity index 100% rename from test/bases/lif/mdr_restapi/data_model_test_transforms_deep_literal_attribute_target.json rename to test/utils/lif/datasets/transform_deep_literal_attribute/transform_deep_literal_attribute_target.json diff --git a/test/utils/lif/datasets/transform_with_embeddings/__init__.py b/test/utils/lif/datasets/transform_with_embeddings/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/utils/lif/datasets/transform_with_embeddings/loader.py b/test/utils/lif/datasets/transform_with_embeddings/loader.py new file mode 100644 index 0000000..5166521 --- /dev/null +++ b/test/utils/lif/datasets/transform_with_embeddings/loader.py @@ -0,0 +1,222 @@ +from dataclasses import dataclass +from pathlib import Path + +from httpx import AsyncClient + +from test.utils.lif.mdr.api import ( + convert_unique_names_to_id_path, + create_data_model_by_upload, + create_transformation_groups, + find_object_property_by_unique_name, +) + + +@dataclass +class DatasetTransformWithEmbeddings: + source_data_model_id: str + source_schema: dict + flow1_source_parent_entity_id: str + flow1_source_attribute_id: str + flow1_source_entity_id_path: str + flow2_source_parent_entity_id: str + flow2_source_attribute_id: str + flow2_source_entity_id_path: str + flow3_source_parent_entity_id: str + flow3_source_attribute_id: str + flow3_source_entity_id_path: str + target_data_model_id: str + target_schema: dict + flow1_target_parent_entity_id: str + flow1_target_attribute_id: str + flow1_target_entity_id_path: str + flow2_target_parent_entity_id: str + flow2_target_attribute_id: str + flow2_target_entity_id_path: str + flow3_target_parent_entity_id: str + flow3_target_attribute_id: str + flow3_target_entity_id_path: str + transformation_group_id: str + + @classmethod + async def prepare( + cls, + async_client_mdr: AsyncClient, + source_data_model_name: str, + target_data_model_name: str, + transformation_group_name: str, + ) -> "DatasetTransformWithEmbeddings": + """Prepare the dataset by creating source/target data models and transformation group.""" + + # Create Source Data Model and extract IDs for the entity and attribute + + (source_data_model_id, source_schema) = await create_data_model_by_upload( + async_client_mdr=async_client_mdr, + schema_path=Path(__file__).parent / "transform_with_embeddings_source.json", + data_model_name=f"{source_data_model_name}_source", + data_model_type="SourceSchema", + ) + + flow1_source_parent_entity_id = find_object_property_by_unique_name( + source_schema, "person.courses.skillsgainedfromcourses", "Id" + ) + assert flow1_source_parent_entity_id is not None, ( + "Could not find source parent entity ID of person.courses.skillsgainedfromcourses... " + str(source_schema) + ) + flow1_source_attribute_id = find_object_property_by_unique_name( + source_schema, "person.courses.skillsgainedfromcourses.skilllevel", "Id" + ) + assert flow1_source_attribute_id is not None, ( + "Could not find source attribute ID of person.courses.skillsgainedfromcourses.skilllevel... " + + str(source_schema) + ) + flow1_source_entity_id_path = convert_unique_names_to_id_path( + source_schema, + [ + "person", + "person.courses", + "person.courses.skillsgainedfromcourses", + "person.courses.skillsgainedfromcourses.skilllevel", + ], + True, + ) + + flow2_source_parent_entity_id = find_object_property_by_unique_name( + source_schema, "person.employment.profession", "Id" + ) + assert flow2_source_parent_entity_id is not None, ( + "Could not find source parent entity ID of person.employment.profession... " + str(source_schema) + ) + flow2_source_attribute_id = find_object_property_by_unique_name( + source_schema, "person.employment.profession.durationatprofession", "Id" + ) + assert flow2_source_attribute_id is not None, ( + "Could not find source attribute ID of person.employment.profession.durationatprofession... " + + str(source_schema) + ) + flow2_source_entity_id_path = convert_unique_names_to_id_path( + source_schema, + [ + "person", + "person.employment", + "person.employment.profession", + "person.employment.profession.durationatprofession", + ], + True, + ) + + flow3_source_parent_entity_id = find_object_property_by_unique_name( + source_schema, "person.courses.skillsgainedfromcourses", "Id" + ) + assert flow3_source_parent_entity_id is not None, ( + "Could not find source parent entity ID of person.courses.skillsgainedfromcourses... " + str(source_schema) + ) + flow3_source_attribute_id = find_object_property_by_unique_name( + source_schema, "person.courses.skillsgainedfromcourses.skilllevel", "Id" + ) + assert flow3_source_attribute_id is not None, ( + "Could not find source attribute ID of person.courses.skillsgainedfromcourses.skilllevel... " + + str(source_schema) + ) + flow3_source_entity_id_path = convert_unique_names_to_id_path( + source_schema, + [ + "person", + "person.courses", + "person.courses.skillsgainedfromcourses", + "person.courses.skillsgainedfromcourses.skilllevel", + ], + True, + ) + + # Create Target Data Model and extract IDs for the entity and attribute + + (target_data_model_id, target_schema) = await create_data_model_by_upload( + async_client_mdr=async_client_mdr, + schema_path=Path(__file__).parent / "transform_with_embeddings_target.json", + data_model_name=f"{source_data_model_name}_target", + data_model_type="SourceSchema", + ) + flow1_target_parent_entity_id = find_object_property_by_unique_name( + target_schema, "user.abilities.skills", "Id" + ) + assert flow1_target_parent_entity_id is not None, ( + "Could not find target parent entity ID of user.abilities.skills... " + str(target_schema) + ) + flow1_target_attribute_id = find_object_property_by_unique_name( + target_schema, "user.abilities.skills.levelofskillability", "Id" + ) + assert flow1_target_attribute_id is not None, ( + "Could not find target attribute ID of user.abilities.skills.levelofskillability... " + str(target_schema) + ) + flow1_target_entity_id_path = convert_unique_names_to_id_path( + target_schema, + ["user", "user.abilities", "user.abilities.skills", "user.abilities.skills.levelofskillability"], + True, + ) + + flow2_target_parent_entity_id = find_object_property_by_unique_name( + target_schema, "user.abilities.skills", "Id" + ) + assert flow2_target_parent_entity_id is not None, ( + "Could not find target parent entity ID of user.abilities.skills... " + str(target_schema) + ) + flow2_target_attribute_id = find_object_property_by_unique_name( + target_schema, "user.abilities.skills.levelofskillability", "Id" + ) + assert flow2_target_attribute_id is not None, ( + "Could not find target attribute ID of user.abilities.skills.levelofskillability..." + str(target_schema) + ) + flow2_target_entity_id_path = convert_unique_names_to_id_path( + target_schema, + ["user", "user.abilities", "user.abilities.skills", "user.abilities.skills.levelofskillability"], + True, + ) + + flow3_target_parent_entity_id = find_object_property_by_unique_name(target_schema, "user.preferences", "Id") + assert flow3_target_parent_entity_id is not None, ( + "Could not find target parent entity ID of user.preferences... " + str(target_schema) + ) + flow3_target_attribute_id = find_object_property_by_unique_name( + target_schema, "user.preferences.workpreference", "Id" + ) + assert flow3_target_attribute_id is not None, ( + "Could not find target attribute ID of user.preferences.workpreference..." + str(target_schema) + ) + flow3_target_entity_id_path = convert_unique_names_to_id_path( + target_schema, ["user", "user.preferences", "user.preferences.workpreference"], True + ) + + # Create transform group between source and target + + transformation_group_id = await create_transformation_groups( + async_client_mdr=async_client_mdr, + source_data_model_id=source_data_model_id, + target_data_model_id=target_data_model_id, + group_name=f"{transformation_group_name}_transform_group", + ) + + return cls( + source_data_model_id=source_data_model_id, + source_schema=source_schema, + flow1_source_parent_entity_id=flow1_source_parent_entity_id, + flow1_source_attribute_id=flow1_source_attribute_id, + flow1_source_entity_id_path=flow1_source_entity_id_path, + flow2_source_parent_entity_id=flow2_source_parent_entity_id, + flow2_source_attribute_id=flow2_source_attribute_id, + flow2_source_entity_id_path=flow2_source_entity_id_path, + flow3_source_parent_entity_id=flow3_source_parent_entity_id, + flow3_source_attribute_id=flow3_source_attribute_id, + flow3_source_entity_id_path=flow3_source_entity_id_path, + target_data_model_id=target_data_model_id, + flow1_target_parent_entity_id=flow1_target_parent_entity_id, + flow1_target_attribute_id=flow1_target_attribute_id, + flow1_target_entity_id_path=flow1_target_entity_id_path, + flow2_target_parent_entity_id=flow2_target_parent_entity_id, + flow2_target_attribute_id=flow2_target_attribute_id, + flow2_target_entity_id_path=flow2_target_entity_id_path, + flow3_target_parent_entity_id=flow3_target_parent_entity_id, + flow3_target_attribute_id=flow3_target_attribute_id, + flow3_target_entity_id_path=flow3_target_entity_id_path, + transformation_group_id=transformation_group_id, + target_schema=target_schema, + ) diff --git a/test/bases/lif/mdr_restapi/data_model_test_transforms_with_embeddings_source.json b/test/utils/lif/datasets/transform_with_embeddings/transform_with_embeddings_source.json similarity index 100% rename from test/bases/lif/mdr_restapi/data_model_test_transforms_with_embeddings_source.json rename to test/utils/lif/datasets/transform_with_embeddings/transform_with_embeddings_source.json diff --git a/test/bases/lif/mdr_restapi/data_model_test_transforms_with_embeddings_target.json b/test/utils/lif/datasets/transform_with_embeddings/transform_with_embeddings_target.json similarity index 100% rename from test/bases/lif/mdr_restapi/data_model_test_transforms_with_embeddings_target.json rename to test/utils/lif/datasets/transform_with_embeddings/transform_with_embeddings_target.json diff --git a/test/utils/lif/mdr/api.py b/test/utils/lif/mdr/api.py index 5259861..199784a 100644 --- a/test/utils/lif/mdr/api.py +++ b/test/utils/lif/mdr/api.py @@ -1,11 +1,56 @@ +import copy from pathlib import Path -from typing import Tuple +from typing import Optional, Tuple +from deepdiff import DeepDiff from httpx import AsyncClient HEADER_MDR_API_KEY_GRAPHQL = {"X-API-Key": "changeme1"} +def find_object_property_by_unique_name(schema: dict, unique_name: str, property_name: str) -> str | None: + """ + Recursively search for an object with the given UniqueName. + + Returns: + The object dictionary if found, None otherwise + """ + + for _, value in schema.items(): + if isinstance(value, dict): + # Check if this dict has the matching UniqueName + if value.get("UniqueName") == unique_name: + if property_name in value: + return value[property_name] + else: + # Found the object but the property doesn't exist + raise AssertionError( + f"Property '{property_name}' not found in object with UniqueName '{unique_name}'. Object: {value}" + ) + + # Recursively search this dict + result = find_object_property_by_unique_name(value, unique_name, property_name) + if result is not None: + return result + + # Return None if not found (let caller decide to assert) + return None + + +def convert_unique_names_to_id_path(schema: dict, unique_names: list[str], ends_in_an_attribute: bool) -> str: + assert len(unique_names) > 0, "unique_names list must not be empty" + sub_schema = schema.get("components", {}).get("schemas", None) + assert sub_schema is not None, f"Could not find components.schemas in schema: {schema}" + id_path_list = [] + for unique_name in unique_names: + object_id = find_object_property_by_unique_name(sub_schema, unique_name, "Id") + assert object_id is not None, f"Could not find object ID for UniqueName '{unique_name}' in schema: {sub_schema}" + id_path_list.append(str(object_id)) + if ends_in_an_attribute: + id_path_list[-1] = str(int(id_path_list[-1]) * -1) # Mark the last ID as an attribute (a negative id) + return ",".join(id_path_list) + + async def create_data_model_by_upload( *, async_client_mdr: AsyncClient, @@ -132,6 +177,8 @@ async def create_transformation( mapping_expression: str, # '{ "User": { "Skills": { "Genre": Person.Courses.Grade } } }' transformation_name: str, # "User.Skills.Genre", headers: dict = HEADER_MDR_API_KEY_GRAPHQL, + expected_status_code: int = 201, + expected_response: Optional[dict] = None, ) -> str: """ Helper function to create a transform between a single source attribute and a target attribute @@ -178,50 +225,99 @@ async def create_transformation( ) # Confirm transform response and gather ID - - assert response.status_code == 201, str(response.text) + str(response.headers) - transformation_id = response.json()["Id"] - assert response.json() == { - "Id": transformation_id, - "TransformationGroupId": transformation_group_id, - "Name": transformation_name, - "Expression": mapping_expression, - "ExpressionLanguage": "JSONata", - "Notes": None, - "Alignment": None, - "CreationDate": None, - "ActivationDate": None, - "DeprecationDate": None, - "Contributor": None, - "ContributorOrganization": None, - "SourceAttributes": [ + response_json = response.json() + if expected_status_code == 201: + assert response.status_code == 201, str(response.text) + str(response.headers) + transformation_id = response_json["Id"] + diff = DeepDiff( + response_json, { - "AttributeId": source_attribute_id, - "EntityId": source_parent_entity_id, - "AttributeName": None, - "AttributeType": "Source", + "Id": transformation_id, + "TransformationGroupId": transformation_group_id, + "Name": transformation_name, + "Expression": mapping_expression, + "ExpressionLanguage": "JSONata", "Notes": None, + "Alignment": None, "CreationDate": None, "ActivationDate": None, "DeprecationDate": None, "Contributor": None, "ContributorOrganization": None, - "EntityIdPath": source_entity_path, - } - ], - "TargetAttribute": { - "AttributeId": target_attribute_id, - "EntityId": target_parent_entity_id, - "AttributeName": None, - "AttributeType": "Target", - "Notes": None, - "CreationDate": None, - "ActivationDate": None, - "DeprecationDate": None, - "Contributor": None, - "ContributorOrganization": None, - "EntityIdPath": target_entity_path, - }, - } + "SourceAttributes": [ + { + "AttributeId": source_attribute_id, + "EntityId": source_parent_entity_id, + "AttributeName": None, + "AttributeType": "Source", + "Notes": None, + "CreationDate": None, + "ActivationDate": None, + "DeprecationDate": None, + "Contributor": None, + "ContributorOrganization": None, + "EntityIdPath": source_entity_path, + } + ], + "TargetAttribute": { + "AttributeId": target_attribute_id, + "EntityId": target_parent_entity_id, + "AttributeName": None, + "AttributeType": "Target", + "Notes": None, + "CreationDate": None, + "ActivationDate": None, + "DeprecationDate": None, + "Contributor": None, + "ContributorOrganization": None, + "EntityIdPath": target_entity_path, + }, + }, + ) + assert diff == {}, diff + return response_json + else: + assert response.status_code == expected_status_code, str(response.text) + str(response.headers) + if expected_response is not None: + assert response.json() == expected_response, str(response.text) + str(response.headers) + return response.json() + + +async def update_transformation( + *, + async_client_mdr: AsyncClient, + original_transformation: dict, + expression: Optional[str], # '{ "User": { "Skills": { "Genre": Person.Courses.Grade } } }' + headers: dict = HEADER_MDR_API_KEY_GRAPHQL, + expected_status_code: int = 200, + expected_response: Optional[dict] = None, +) -> Optional[str]: + """ + Helper function to update a transform + + Currently only supports updating the Expression field. More to come! + """ + + expected_transformation = copy.deepcopy(original_transformation) + update_payload = {"TransformationGroupId": original_transformation["TransformationGroupId"]} + + if expression: + expected_transformation["Expression"] = expression + update_payload["Expression"] = expression + + response = await async_client_mdr.put( + f"/transformation_groups/transformations/{original_transformation['Id']}", headers=headers, json=update_payload + ) + + # Confirm transform response - return transformation_id + if expected_status_code == 200: + assert response.status_code == 200, str(response.text) + str(response.headers) + diff = DeepDiff(response.json(), expected_transformation) + assert diff == {}, diff + else: + assert response.status_code == expected_status_code, str(response.text) + str(response.headers) + if expected_response is not None: + assert response.json() == expected_response, str(response.text) + str(response.headers) + return response.text + return None