From 73622119242a5a118e3673e1750070509b459f9d Mon Sep 17 00:00:00 2001
From: Dumitru <d.prijilevschi@gmail.com>
Date: Thu, 16 Jan 2025 12:07:49 +0200
Subject: [PATCH] implement feature tests

---
 .../notice_metadata_processor/conftest.py     |  42 +++++++
 .../notice_normalisation.feature              |  26 ++++
 .../notice_normalisation.py                   | 119 ++++++++++++++++++
 tests/features/notice_packager/conftest.py    |  28 ++++-
 .../test_template_generator.feature           |  10 ++
 .../test_template_generator.py                |  49 ++++++++
 .../test_metadata_normaliser.py               |   9 +-
 7 files changed, 277 insertions(+), 6 deletions(-)
 create mode 100644 tests/features/notice_metadata_processor/notice_normalisation.feature
 create mode 100644 tests/features/notice_metadata_processor/notice_normalisation.py
 create mode 100644 tests/features/notice_packager/test_template_generator.feature
 create mode 100644 tests/features/notice_packager/test_template_generator.py
diff --git a/tests/features/notice_metadata_processor/conftest.py b/tests/features/notice_metadata_processor/conftest.py
index e73e498e..129699aa 100644
--- a/tests/features/notice_metadata_processor/conftest.py
+++ b/tests/features/notice_metadata_processor/conftest.py
@@ -1,8 +1,13 @@
+import pathlib
+
 import pytest
 
 from ted_sws import config
+from ted_sws.core.model.manifestation import XMLManifestation
+from ted_sws.core.model.notice import Notice
 from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryInFileSystem, \
     MappingSuiteRepositoryMongoDB
+from ted_sws.data_sampler.services.notice_xml_indexer import index_notice
 from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice
 from tests import TEST_DATA_PATH
 from tests.fakes.fake_repository import FakeNoticeRepository
@@ -34,6 +39,7 @@ def normalised_notice(notice_2020):
     normalise_notice(notice=notice)
     return notice
 
+
 @pytest.fixture
 def normalised_eForm_notice(indexed_eform_notice_622690):
     notice = indexed_eform_notice_622690.copy()
@@ -51,3 +57,39 @@ def mapping_suite_repository_with_mapping_suite(notice_eligibility_repository_pa
 def clean_mapping_suite_repository(mongodb_client):
     mapping_suite_repository = MappingSuiteRepositoryMongoDB(mongodb_client=mongodb_client)
     return mapping_suite_repository
+
+
+@pytest.fixture
+def sample_ef_html_unsafe_notice_path() -> pathlib.Path:
+    return TEST_DATA_PATH / "notice_normalisation" / "ef_html_unsafe_notice.xml"
+
+
+@pytest.fixture
+def sample_indexed_ef_html_unsafe_notice(
+        sample_ef_html_unsafe_notice_path: pathlib.Path) -> Notice:
+    notice: Notice = Notice(ted_id=sample_ef_html_unsafe_notice_path.name)
+    notice.set_xml_manifestation(
+        XMLManifestation(object_data=sample_ef_html_unsafe_notice_path.read_text()))
+
+    return index_notice(notice)
+
+
+@pytest.fixture
+def sample_sf_html_unsafe_notice_path() -> pathlib.Path:
+    return TEST_DATA_PATH / "notice_normalisation" / "sf_html_unsafe_notice.xml"
+
+
+@pytest.fixture
+def sample_indexed_sf_html_unsafe_notice(
+        sample_sf_html_unsafe_notice_path: pathlib.Path) -> Notice:
+    notice: Notice = Notice(ted_id=sample_sf_html_unsafe_notice_path.name)
+    notice.set_xml_manifestation(
+        XMLManifestation(object_data=sample_sf_html_unsafe_notice_path.read_text()))
+
+    return index_notice(notice)
+
+
+@pytest.fixture
+def html_incompatible_str() -> str:
+    """Provides a test string containing HTML incompatible characters."""
+    return "Construction work & planning <br />"
diff --git a/tests/features/notice_metadata_processor/notice_normalisation.feature b/tests/features/notice_metadata_processor/notice_normalisation.feature
new file mode 100644
index 00000000..fe130375
--- /dev/null
+++ b/tests/features/notice_metadata_processor/notice_normalisation.feature
@@ -0,0 +1,26 @@
+Feature: Notice Normalisation
+  As a User
+  I want to normalize notice data
+  So that it meets the required format
+
+  Scenario: Normalising notice with spaces in notice ID
+    Given an EF notice with spaces in notice ID
+    And an SF notice with spaces in notice ID
+    When the EF notice is normalised
+    Then the EF notice ID should not contain leading or trailing spaces
+    When the SF notice is normalised
+    Then the SF notice ID should not contain leading or trailing spaces
+
+  Scenario: Processing HTML incompatible string
+    Given an HTML incompatible string
+    When the string cannot be parsed as XML
+    And the string is converted to HTML compatible format
+    Then the resulting string should be well-formed XML
+
+  Scenario: Normalising notice with HTML incompatible title
+    Given an EF notice with HTML incompatible title
+    And an SF notice with HTML incompatible title
+    When the EF notice is normalised
+    Then all EF notice titles should be valid XML
+    When the SF notice is normalised
+    Then all SF notice titles should be valid XML
\ No newline at end of file
diff --git a/tests/features/notice_metadata_processor/notice_normalisation.py b/tests/features/notice_metadata_processor/notice_normalisation.py
new file mode 100644
index 00000000..8b5af7b3
--- /dev/null
+++ b/tests/features/notice_metadata_processor/notice_normalisation.py
@@ -0,0 +1,119 @@
+from xml.etree import ElementTree
+from xml.etree.ElementTree import ParseError
+
+import pytest
+from pytest_bdd import scenario, given, when, then
+
+from ted_sws.core.model.metadata import LanguageTaggedString
+from ted_sws.core.model.notice import Notice
+from ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser import get_html_compatible_string
+from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice
+
+
+def html_str(content: str) -> str:
+    return f"""<?xml version="1.0" encoding="UTF-8"?> <body>{content}</body>"""
+
+
+@scenario('notice_normalisation.feature', 'Normalising notice with spaces in notice ID')
+def test_normalising_notice_with_spaces():
+    """Test normalisation of notices with spaces in ID."""
+
+
+@scenario('notice_normalisation.feature', 'Processing HTML incompatible string')
+def test_processing_html_incompatible_string():
+    """Test processing of HTML incompatible strings."""
+
+
+@scenario('notice_normalisation.feature', 'Normalising notice with HTML incompatible title')
+def test_normalising_notice_with_html_incompatible_title():
+    """Test normalisation of notices with HTML incompatible titles."""
+
+
+# Shared fixtures and steps
+@given('an EF notice with spaces in notice ID')
+def an_ef_notice_with_spaces_in_notice_id(sample_indexed_ef_html_unsafe_notice: Notice) -> Notice:
+    """Provide EF notice fixture."""
+    return sample_indexed_ef_html_unsafe_notice
+
+
+@given('an SF notice with spaces in notice ID')
+def an_sf_notice_with_spaces_in_notice_id(sample_indexed_sf_html_unsafe_notice: Notice) -> Notice:
+    """Provide SF notice fixture."""
+    return sample_indexed_sf_html_unsafe_notice
+
+
+@when('the EF notice is normalised', target_fixture='normalised_ef_notice')
+def the_ef_notice_is_normalised(sample_indexed_ef_html_unsafe_notice: Notice) -> Notice:
+    """Normalize EF notice."""
+    return normalise_notice(sample_indexed_ef_html_unsafe_notice)
+
+
+@when('the SF notice is normalised', target_fixture='normalised_sf_notice')
+def the_sf_notice_is_normalised(sample_indexed_sf_html_unsafe_notice: Notice) -> Notice:
+    """Normalize SF notice."""
+    return normalise_notice(sample_indexed_sf_html_unsafe_notice)
+
+
+@then('the EF notice ID should not contain leading or trailing spaces')
+def the_ef_notice_id_should_not_contain_leading_or_trailing_spaces(normalised_ef_notice: Notice):
+    """Verify EF notice ID has no extra spaces."""
+    assert normalised_ef_notice.normalised_metadata.notice_publication_number.strip() == \
+           normalised_ef_notice.normalised_metadata.notice_publication_number
+
+
+@then('the SF notice ID should not contain leading or trailing spaces')
+def the_sf_notice_id_should_not_contain_leading_or_trailing_spaces(normalised_sf_notice: Notice):
+    """Verify SF notice ID has no extra spaces."""
+    assert normalised_sf_notice.normalised_metadata.notice_publication_number.strip() == \
+           normalised_sf_notice.normalised_metadata.notice_publication_number
+
+
+# HTML incompatible string scenario steps
+@given('an HTML incompatible string', target_fixture='incompatible_string')
+def an_html_incompatible_string(html_incompatible_str: str) -> str:
+    """Provide HTML incompatible string fixture."""
+    return html_incompatible_str
+
+
+@when('the string cannot be parsed as XML')
+def the_string_cannot_be_parsed_as_xml(html_incompatible_str: str):
+    """Verify string cannot be parsed as XML."""
+    with pytest.raises(ParseError):
+        ElementTree.fromstring(html_incompatible_str)
+
+
+@when('the string is converted to HTML compatible format', target_fixture='compatible_string')
+def the_string_is_converted_to_html_compatible_format(html_incompatible_str: str) -> LanguageTaggedString:
+    """Convert string to HTML compatible format."""
+    return get_html_compatible_string(LanguageTaggedString(text=html_incompatible_str))
+
+
+@then('the resulting string should be well-formed XML')
+def the_resulting_string_should_be_well_formed_xml(compatible_string: LanguageTaggedString):
+    """Verify string is well-formed XML."""
+    ElementTree.fromstring(html_str(compatible_string.text))
+
+
+# HTML incompatible title scenario steps
+@given('an EF notice with HTML incompatible title')
+def an_ef_notice_with_html_incompatible_title(sample_indexed_ef_html_unsafe_notice: Notice) -> Notice:
+    """Provide EF notice with incompatible title fixture."""
+    return sample_indexed_ef_html_unsafe_notice
+
+
+@given('an SF notice with HTML incompatible title')
+def an_sf_notice_with_html_incompatible_title(sample_indexed_sf_html_unsafe_notice: Notice) -> Notice:
+    """Provide SF notice with incompatible title fixture."""
+    return sample_indexed_sf_html_unsafe_notice
+
+
+@then('all EF notice titles should be valid XML')
+def all_ef_notice_titles_should_be_valid_xml(normalised_ef_notice: Notice):
+    """Verify all EF notice titles are valid XML."""
+    [ElementTree.fromstring(html_str(title.text)) for title in normalised_ef_notice.normalised_metadata.title]
+
+
+@then('all SF notice titles should be valid XML')
+def all_sf_notice_titles_should_be_valid_xml(normalised_sf_notice: Notice):
+    """Verify all SF notice titles are valid XML."""
+    [ElementTree.fromstring(html_str(title.text)) for title in normalised_sf_notice.normalised_metadata.title]
diff --git a/tests/features/notice_packager/conftest.py b/tests/features/notice_packager/conftest.py
index 309d705f..461b9e6a 100644
--- a/tests/features/notice_packager/conftest.py
+++ b/tests/features/notice_packager/conftest.py
@@ -1,7 +1,10 @@
+import json
+from typing import Dict
+
 import pytest
 
-from ted_sws.core.model.manifestation import RDFManifestation
 from ted_sws.core.model.notice import NoticeStatus, Notice
+from ted_sws.notice_packager.model.metadata import PackagerMetadata
 from tests import TEST_DATA_PATH
 
 
@@ -12,3 +15,26 @@ def package_eligible_notice(publicly_available_notice) -> Notice:
             TEST_DATA_PATH / "notice_packager" / "templates" / "2021_S_004_003545_0.notice.rdf").read_text()
     notice.update_status_to(NoticeStatus.ELIGIBLE_FOR_PACKAGING)
     return notice
+
+
+@pytest.fixture
+def template_sample_metadata_json() -> Dict:
+    """Load template metadata from JSON file."""
+    return json.load((TEST_DATA_PATH / "notice_packager" / "template_metadata.json").open())
+
+
+@pytest.fixture
+def template_sample_metadata(template_sample_metadata_json) -> PackagerMetadata:
+    """Create PackagerMetadata from JSON."""
+    return PackagerMetadata(**template_sample_metadata_json)
+
+@pytest.fixture
+def work_id_str() -> str:
+    """Returns the work_id as str"""
+    return "work_id"
+
+
+@pytest.fixture
+def work_id_predicate() -> str:
+    """Returns the URI predicate for the CDM work identifier."""
+    return "http://publications.europa.eu/ontology/cdm#work_id"
\ No newline at end of file
diff --git a/tests/features/notice_packager/test_template_generator.feature b/tests/features/notice_packager/test_template_generator.feature
new file mode 100644
index 00000000..cc7d9ab0
--- /dev/null
+++ b/tests/features/notice_packager/test_template_generator.feature
@@ -0,0 +1,10 @@
+
+Feature: Template Generator
+  The system is able to generate METS packages based on jinja templates.
+
+  Scenario: Template Generator generates METS DMD RDF that has work_id
+    Given a PackagerMetadata
+    And a work_id predicate
+    When METS DMD RDF generator is executed
+    Then METS DMD RDF is a valid RDF
+    And work_id persist in METS DMD RDF
\ No newline at end of file
diff --git a/tests/features/notice_packager/test_template_generator.py b/tests/features/notice_packager/test_template_generator.py
new file mode 100644
index 00000000..cd6f4735
--- /dev/null
+++ b/tests/features/notice_packager/test_template_generator.py
@@ -0,0 +1,49 @@
+"""Tests for Template Generator's METS DMD RDF generation."""
+
+from urllib.parse import urlparse, ParseResult
+from pytest_bdd import given, when, then, scenario
+from rdflib import Graph
+
+from ted_sws.notice_packager.adapters.template_generator import TemplateGenerator
+from ted_sws.notice_packager.model.metadata import PackagerMetadata
+
+
+@scenario('test_template_generator.feature', 'Template Generator generates METS DMD RDF that has work_id')
+def test_package_a_ted_notice_in_a_mets_package() -> None:
+    """Test METS package generation with work_id."""
+
+
+@given('a PackagerMetadata')
+def a_packager_metadata(template_sample_metadata: PackagerMetadata) -> None:
+    """Verify PackagerMetadata existence."""
+    assert template_sample_metadata
+    assert isinstance(template_sample_metadata, PackagerMetadata)
+
+
+@given('a work_id predicate')
+def a_work_id_predicate(work_id_predicate: str, work_id_str: str) -> None:
+    """Check work_id predicate validity."""
+    assert work_id_predicate
+    assert isinstance(work_id_predicate, str)
+    assert work_id_str in work_id_predicate
+    valid_url: ParseResult = urlparse(work_id_predicate)
+    assert valid_url.netloc
+    assert valid_url.fragment == work_id_str
+
+
+@when("METS DMD RDF generator is executed", target_fixture="mets_xml_dmd_rdf")
+def mets_dmd_rdf_generator_is_executed(template_sample_metadata: PackagerMetadata) -> str:
+    """Generate METS DMD RDF."""
+    return TemplateGenerator.mets_xml_dmd_rdf_generator(template_sample_metadata)
+
+
+@then("METS DMD RDF is a valid RDF")
+def mets_dmd_rdf_is_a_valid_rdf(mets_xml_dmd_rdf: str) -> None:
+    """Validate RDF format."""
+    Graph().parse(data=mets_xml_dmd_rdf, format="xml")
+
+
+@then("work_id persist in METS DMD RDF")
+def work_id_persist_in_mets_dmd_rdf(mets_xml_dmd_rdf: str, work_id_str: str) -> None:
+    """Verify work_id presence in RDF."""
+    assert work_id_str in mets_xml_dmd_rdf
\ No newline at end of file
diff --git a/tests/unit/notice_metadata_processor/test_metadata_normaliser.py b/tests/unit/notice_metadata_processor/test_metadata_normaliser.py
index dad59604..9ebd48ee 100644
--- a/tests/unit/notice_metadata_processor/test_metadata_normaliser.py
+++ b/tests/unit/notice_metadata_processor/test_metadata_normaliser.py
@@ -1,4 +1,3 @@
-import pathlib
 from xml.etree import ElementTree
 from xml.etree.ElementTree import ParseError
 
@@ -20,9 +19,11 @@
     extract_and_normalise_notice_metadata
 from ted_sws.resources.mapping_files_registry import MappingFilesRegistry
 
+
 def html_str(content: str) -> str:
     return f"""<?xml version="1.0" encoding="UTF-8"?> <body>{content}</body>"""
 
+
 def test_metadata_normaliser_by_notice(indexed_notice):
     notice = normalise_notice(indexed_notice)
     assert notice.normalised_metadata
@@ -261,17 +262,15 @@ def test_get_html_compatible_string(html_incompatible_str: str):
 
     compatible_str: LanguageTaggedString = get_html_compatible_string(LanguageTaggedString(text=html_incompatible_str))
 
-
     # Parse to check if str is well-formed (HTML-safe sequences or elements)
     ElementTree.fromstring(html_str(compatible_str.text))
 
 
 def test_normalising_notice_with_html_incompatible_title(sample_indexed_ef_html_unsafe_notice: Notice,
-                                                     sample_indexed_sf_html_unsafe_notice: Notice):
-
+                                                         sample_indexed_sf_html_unsafe_notice: Notice):
     normalised_ef_notice: Notice = normalise_notice(sample_indexed_ef_html_unsafe_notice)
 
-    [ElementTree.fromstring(html_str(title.text)) for title in normalised_ef_notice.normalised_metadata.title  ]
+    [ElementTree.fromstring(html_str(title.text)) for title in normalised_ef_notice.normalised_metadata.title]
 
     normalised_sf_notice: Notice = normalise_notice(sample_indexed_sf_html_unsafe_notice)