From 73622119242a5a118e3673e1750070509b459f9d Mon Sep 17 00:00:00 2001 From: Dumitru Date: Thu, 16 Jan 2025 12:07:49 +0200 Subject: [PATCH] implement feature tests --- .../notice_metadata_processor/conftest.py | 42 +++++++ .../notice_normalisation.feature | 26 ++++ .../notice_normalisation.py | 119 ++++++++++++++++++ tests/features/notice_packager/conftest.py | 28 ++++- .../test_template_generator.feature | 10 ++ .../test_template_generator.py | 49 ++++++++ .../test_metadata_normaliser.py | 9 +- 7 files changed, 277 insertions(+), 6 deletions(-) create mode 100644 tests/features/notice_metadata_processor/notice_normalisation.feature create mode 100644 tests/features/notice_metadata_processor/notice_normalisation.py create mode 100644 tests/features/notice_packager/test_template_generator.feature create mode 100644 tests/features/notice_packager/test_template_generator.py diff --git a/tests/features/notice_metadata_processor/conftest.py b/tests/features/notice_metadata_processor/conftest.py index e73e498e..129699aa 100644 --- a/tests/features/notice_metadata_processor/conftest.py +++ b/tests/features/notice_metadata_processor/conftest.py @@ -1,8 +1,13 @@ +import pathlib + import pytest from ted_sws import config +from ted_sws.core.model.manifestation import XMLManifestation +from ted_sws.core.model.notice import Notice from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryInFileSystem, \ MappingSuiteRepositoryMongoDB +from ted_sws.data_sampler.services.notice_xml_indexer import index_notice from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice from tests import TEST_DATA_PATH from tests.fakes.fake_repository import FakeNoticeRepository @@ -34,6 +39,7 @@ def normalised_notice(notice_2020): normalise_notice(notice=notice) return notice + @pytest.fixture def normalised_eForm_notice(indexed_eform_notice_622690): notice = indexed_eform_notice_622690.copy() @@ -51,3 +57,39 @@ def mapping_suite_repository_with_mapping_suite(notice_eligibility_repository_pa def clean_mapping_suite_repository(mongodb_client): mapping_suite_repository = MappingSuiteRepositoryMongoDB(mongodb_client=mongodb_client) return mapping_suite_repository + + +@pytest.fixture +def sample_ef_html_unsafe_notice_path() -> pathlib.Path: + return TEST_DATA_PATH / "notice_normalisation" / "ef_html_unsafe_notice.xml" + + +@pytest.fixture +def sample_indexed_ef_html_unsafe_notice( + sample_ef_html_unsafe_notice_path: pathlib.Path) -> Notice: + notice: Notice = Notice(ted_id=sample_ef_html_unsafe_notice_path.name) + notice.set_xml_manifestation( + XMLManifestation(object_data=sample_ef_html_unsafe_notice_path.read_text())) + + return index_notice(notice) + + +@pytest.fixture +def sample_sf_html_unsafe_notice_path() -> pathlib.Path: + return TEST_DATA_PATH / "notice_normalisation" / "sf_html_unsafe_notice.xml" + + +@pytest.fixture +def sample_indexed_sf_html_unsafe_notice( + sample_sf_html_unsafe_notice_path: pathlib.Path) -> Notice: + notice: Notice = Notice(ted_id=sample_sf_html_unsafe_notice_path.name) + notice.set_xml_manifestation( + XMLManifestation(object_data=sample_sf_html_unsafe_notice_path.read_text())) + + return index_notice(notice) + + +@pytest.fixture +def html_incompatible_str() -> str: + """Provides a test string containing HTML incompatible characters.""" + return "Construction work & planning
" diff --git a/tests/features/notice_metadata_processor/notice_normalisation.feature b/tests/features/notice_metadata_processor/notice_normalisation.feature new file mode 100644 index 00000000..fe130375 --- /dev/null +++ b/tests/features/notice_metadata_processor/notice_normalisation.feature @@ -0,0 +1,26 @@ +Feature: Notice Normalisation + As a User + I want to normalize notice data + So that it meets the required format + + Scenario: Normalising notice with spaces in notice ID + Given an EF notice with spaces in notice ID + And an SF notice with spaces in notice ID + When the EF notice is normalised + Then the EF notice ID should not contain leading or trailing spaces + When the SF notice is normalised + Then the SF notice ID should not contain leading or trailing spaces + + Scenario: Processing HTML incompatible string + Given an HTML incompatible string + When the string cannot be parsed as XML + And the string is converted to HTML compatible format + Then the resulting string should be well-formed XML + + Scenario: Normalising notice with HTML incompatible title + Given an EF notice with HTML incompatible title + And an SF notice with HTML incompatible title + When the EF notice is normalised + Then all EF notice titles should be valid XML + When the SF notice is normalised + Then all SF notice titles should be valid XML \ No newline at end of file diff --git a/tests/features/notice_metadata_processor/notice_normalisation.py b/tests/features/notice_metadata_processor/notice_normalisation.py new file mode 100644 index 00000000..8b5af7b3 --- /dev/null +++ b/tests/features/notice_metadata_processor/notice_normalisation.py @@ -0,0 +1,119 @@ +from xml.etree import ElementTree +from xml.etree.ElementTree import ParseError + +import pytest +from pytest_bdd import scenario, given, when, then + +from ted_sws.core.model.metadata import LanguageTaggedString +from ted_sws.core.model.notice import Notice +from ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser import get_html_compatible_string +from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice + + +def html_str(content: str) -> str: + return f""" {content}""" + + +@scenario('notice_normalisation.feature', 'Normalising notice with spaces in notice ID') +def test_normalising_notice_with_spaces(): + """Test normalisation of notices with spaces in ID.""" + + +@scenario('notice_normalisation.feature', 'Processing HTML incompatible string') +def test_processing_html_incompatible_string(): + """Test processing of HTML incompatible strings.""" + + +@scenario('notice_normalisation.feature', 'Normalising notice with HTML incompatible title') +def test_normalising_notice_with_html_incompatible_title(): + """Test normalisation of notices with HTML incompatible titles.""" + + +# Shared fixtures and steps +@given('an EF notice with spaces in notice ID') +def an_ef_notice_with_spaces_in_notice_id(sample_indexed_ef_html_unsafe_notice: Notice) -> Notice: + """Provide EF notice fixture.""" + return sample_indexed_ef_html_unsafe_notice + + +@given('an SF notice with spaces in notice ID') +def an_sf_notice_with_spaces_in_notice_id(sample_indexed_sf_html_unsafe_notice: Notice) -> Notice: + """Provide SF notice fixture.""" + return sample_indexed_sf_html_unsafe_notice + + +@when('the EF notice is normalised', target_fixture='normalised_ef_notice') +def the_ef_notice_is_normalised(sample_indexed_ef_html_unsafe_notice: Notice) -> Notice: + """Normalize EF notice.""" + return normalise_notice(sample_indexed_ef_html_unsafe_notice) + + +@when('the SF notice is normalised', target_fixture='normalised_sf_notice') +def the_sf_notice_is_normalised(sample_indexed_sf_html_unsafe_notice: Notice) -> Notice: + """Normalize SF notice.""" + return normalise_notice(sample_indexed_sf_html_unsafe_notice) + + +@then('the EF notice ID should not contain leading or trailing spaces') +def the_ef_notice_id_should_not_contain_leading_or_trailing_spaces(normalised_ef_notice: Notice): + """Verify EF notice ID has no extra spaces.""" + assert normalised_ef_notice.normalised_metadata.notice_publication_number.strip() == \ + normalised_ef_notice.normalised_metadata.notice_publication_number + + +@then('the SF notice ID should not contain leading or trailing spaces') +def the_sf_notice_id_should_not_contain_leading_or_trailing_spaces(normalised_sf_notice: Notice): + """Verify SF notice ID has no extra spaces.""" + assert normalised_sf_notice.normalised_metadata.notice_publication_number.strip() == \ + normalised_sf_notice.normalised_metadata.notice_publication_number + + +# HTML incompatible string scenario steps +@given('an HTML incompatible string', target_fixture='incompatible_string') +def an_html_incompatible_string(html_incompatible_str: str) -> str: + """Provide HTML incompatible string fixture.""" + return html_incompatible_str + + +@when('the string cannot be parsed as XML') +def the_string_cannot_be_parsed_as_xml(html_incompatible_str: str): + """Verify string cannot be parsed as XML.""" + with pytest.raises(ParseError): + ElementTree.fromstring(html_incompatible_str) + + +@when('the string is converted to HTML compatible format', target_fixture='compatible_string') +def the_string_is_converted_to_html_compatible_format(html_incompatible_str: str) -> LanguageTaggedString: + """Convert string to HTML compatible format.""" + return get_html_compatible_string(LanguageTaggedString(text=html_incompatible_str)) + + +@then('the resulting string should be well-formed XML') +def the_resulting_string_should_be_well_formed_xml(compatible_string: LanguageTaggedString): + """Verify string is well-formed XML.""" + ElementTree.fromstring(html_str(compatible_string.text)) + + +# HTML incompatible title scenario steps +@given('an EF notice with HTML incompatible title') +def an_ef_notice_with_html_incompatible_title(sample_indexed_ef_html_unsafe_notice: Notice) -> Notice: + """Provide EF notice with incompatible title fixture.""" + return sample_indexed_ef_html_unsafe_notice + + +@given('an SF notice with HTML incompatible title') +def an_sf_notice_with_html_incompatible_title(sample_indexed_sf_html_unsafe_notice: Notice) -> Notice: + """Provide SF notice with incompatible title fixture.""" + return sample_indexed_sf_html_unsafe_notice + + +@then('all EF notice titles should be valid XML') +def all_ef_notice_titles_should_be_valid_xml(normalised_ef_notice: Notice): + """Verify all EF notice titles are valid XML.""" + [ElementTree.fromstring(html_str(title.text)) for title in normalised_ef_notice.normalised_metadata.title] + + +@then('all SF notice titles should be valid XML') +def all_sf_notice_titles_should_be_valid_xml(normalised_sf_notice: Notice): + """Verify all SF notice titles are valid XML.""" + [ElementTree.fromstring(html_str(title.text)) for title in normalised_sf_notice.normalised_metadata.title] diff --git a/tests/features/notice_packager/conftest.py b/tests/features/notice_packager/conftest.py index 309d705f..461b9e6a 100644 --- a/tests/features/notice_packager/conftest.py +++ b/tests/features/notice_packager/conftest.py @@ -1,7 +1,10 @@ +import json +from typing import Dict + import pytest -from ted_sws.core.model.manifestation import RDFManifestation from ted_sws.core.model.notice import NoticeStatus, Notice +from ted_sws.notice_packager.model.metadata import PackagerMetadata from tests import TEST_DATA_PATH @@ -12,3 +15,26 @@ def package_eligible_notice(publicly_available_notice) -> Notice: TEST_DATA_PATH / "notice_packager" / "templates" / "2021_S_004_003545_0.notice.rdf").read_text() notice.update_status_to(NoticeStatus.ELIGIBLE_FOR_PACKAGING) return notice + + +@pytest.fixture +def template_sample_metadata_json() -> Dict: + """Load template metadata from JSON file.""" + return json.load((TEST_DATA_PATH / "notice_packager" / "template_metadata.json").open()) + + +@pytest.fixture +def template_sample_metadata(template_sample_metadata_json) -> PackagerMetadata: + """Create PackagerMetadata from JSON.""" + return PackagerMetadata(**template_sample_metadata_json) + +@pytest.fixture +def work_id_str() -> str: + """Returns the work_id as str""" + return "work_id" + + +@pytest.fixture +def work_id_predicate() -> str: + """Returns the URI predicate for the CDM work identifier.""" + return "http://publications.europa.eu/ontology/cdm#work_id" \ No newline at end of file diff --git a/tests/features/notice_packager/test_template_generator.feature b/tests/features/notice_packager/test_template_generator.feature new file mode 100644 index 00000000..cc7d9ab0 --- /dev/null +++ b/tests/features/notice_packager/test_template_generator.feature @@ -0,0 +1,10 @@ + +Feature: Template Generator + The system is able to generate METS packages based on jinja templates. + + Scenario: Template Generator generates METS DMD RDF that has work_id + Given a PackagerMetadata + And a work_id predicate + When METS DMD RDF generator is executed + Then METS DMD RDF is a valid RDF + And work_id persist in METS DMD RDF \ No newline at end of file diff --git a/tests/features/notice_packager/test_template_generator.py b/tests/features/notice_packager/test_template_generator.py new file mode 100644 index 00000000..cd6f4735 --- /dev/null +++ b/tests/features/notice_packager/test_template_generator.py @@ -0,0 +1,49 @@ +"""Tests for Template Generator's METS DMD RDF generation.""" + +from urllib.parse import urlparse, ParseResult +from pytest_bdd import given, when, then, scenario +from rdflib import Graph + +from ted_sws.notice_packager.adapters.template_generator import TemplateGenerator +from ted_sws.notice_packager.model.metadata import PackagerMetadata + + +@scenario('test_template_generator.feature', 'Template Generator generates METS DMD RDF that has work_id') +def test_package_a_ted_notice_in_a_mets_package() -> None: + """Test METS package generation with work_id.""" + + +@given('a PackagerMetadata') +def a_packager_metadata(template_sample_metadata: PackagerMetadata) -> None: + """Verify PackagerMetadata existence.""" + assert template_sample_metadata + assert isinstance(template_sample_metadata, PackagerMetadata) + + +@given('a work_id predicate') +def a_work_id_predicate(work_id_predicate: str, work_id_str: str) -> None: + """Check work_id predicate validity.""" + assert work_id_predicate + assert isinstance(work_id_predicate, str) + assert work_id_str in work_id_predicate + valid_url: ParseResult = urlparse(work_id_predicate) + assert valid_url.netloc + assert valid_url.fragment == work_id_str + + +@when("METS DMD RDF generator is executed", target_fixture="mets_xml_dmd_rdf") +def mets_dmd_rdf_generator_is_executed(template_sample_metadata: PackagerMetadata) -> str: + """Generate METS DMD RDF.""" + return TemplateGenerator.mets_xml_dmd_rdf_generator(template_sample_metadata) + + +@then("METS DMD RDF is a valid RDF") +def mets_dmd_rdf_is_a_valid_rdf(mets_xml_dmd_rdf: str) -> None: + """Validate RDF format.""" + Graph().parse(data=mets_xml_dmd_rdf, format="xml") + + +@then("work_id persist in METS DMD RDF") +def work_id_persist_in_mets_dmd_rdf(mets_xml_dmd_rdf: str, work_id_str: str) -> None: + """Verify work_id presence in RDF.""" + assert work_id_str in mets_xml_dmd_rdf \ No newline at end of file diff --git a/tests/unit/notice_metadata_processor/test_metadata_normaliser.py b/tests/unit/notice_metadata_processor/test_metadata_normaliser.py index dad59604..9ebd48ee 100644 --- a/tests/unit/notice_metadata_processor/test_metadata_normaliser.py +++ b/tests/unit/notice_metadata_processor/test_metadata_normaliser.py @@ -1,4 +1,3 @@ -import pathlib from xml.etree import ElementTree from xml.etree.ElementTree import ParseError @@ -20,9 +19,11 @@ extract_and_normalise_notice_metadata from ted_sws.resources.mapping_files_registry import MappingFilesRegistry + def html_str(content: str) -> str: return f""" {content}""" + def test_metadata_normaliser_by_notice(indexed_notice): notice = normalise_notice(indexed_notice) assert notice.normalised_metadata @@ -261,17 +262,15 @@ def test_get_html_compatible_string(html_incompatible_str: str): compatible_str: LanguageTaggedString = get_html_compatible_string(LanguageTaggedString(text=html_incompatible_str)) - # Parse to check if str is well-formed (HTML-safe sequences or elements) ElementTree.fromstring(html_str(compatible_str.text)) def test_normalising_notice_with_html_incompatible_title(sample_indexed_ef_html_unsafe_notice: Notice, - sample_indexed_sf_html_unsafe_notice: Notice): - + sample_indexed_sf_html_unsafe_notice: Notice): normalised_ef_notice: Notice = normalise_notice(sample_indexed_ef_html_unsafe_notice) - [ElementTree.fromstring(html_str(title.text)) for title in normalised_ef_notice.normalised_metadata.title ] + [ElementTree.fromstring(html_str(title.text)) for title in normalised_ef_notice.normalised_metadata.title] normalised_sf_notice: Notice = normalise_notice(sample_indexed_sf_html_unsafe_notice)