diff --git a/mycore-mods/src/main/java/org/mycore/mods/merger/MCRModspersonNameMerger.java b/mycore-mods/src/main/java/org/mycore/mods/merger/MCRModspersonNameMerger.java new file mode 100644 index 0000000000..bf32655fea --- /dev/null +++ b/mycore-mods/src/main/java/org/mycore/mods/merger/MCRModspersonNameMerger.java @@ -0,0 +1,112 @@ +package org.mycore.mods.merger; + +import java.util.Objects; + +import org.jdom2.Element; +import org.mycore.common.MCRConstants; + +/** + * Compares and merges mods:name elements. Extends MCRNameMerger and provides + * additional functionality: + *
    + *
  1. It can handle mods:alternativeName and considers them when determining + * if two names are probably the same.
  2. + *
  3. If first name is the same and one of the family names contains the other, + * the names are considered as "probablySameAs". + * This takes double-barreled names (Doppelnamen) into account.
  4. + *
+ */ +public class MCRModspersonNameMerger extends MCRNameMerger { + + private static final String ALTERNATIVE_NAME = "alternativeName"; + + private static final String TYPE = "type"; + + private static final String NAME_PART = "namePart"; + + @Override + public boolean isProbablySameAs(MCRMerger e) { + if (!(e instanceof MCRNameMerger other)) { + return false; + } + + if (haveContradictingNameIds(this.nameIds, other.nameIds)) { + return false; + } else if (this.allNames.equals(other.allNames)) { + return true; + } else if (Objects.equals(familyName, other.familyName)) { + if (initials.isEmpty() && other.initials.isEmpty()) { + return true; // same family name, no given name, no initals, then assumed same + } else if (!haveAtLeastOneCommon(this.initials, other.initials)) { + return false; + } else if (this.givenNames.isEmpty() || other.givenNames.isEmpty()) { + return true; + } else { + return haveAtLeastOneCommon(this.givenNames, other.givenNames); + } + } else { + // double-barreled name with same given names assumes same + return this.givenNames.equals(other.givenNames) && + (this.familyName.contains(other.familyName) || other.familyName.contains(this.familyName)); + } + } + + /** + * Checks if this merger has an alternativeName-element that is + * {@link MCRNameMerger#isProbablySameAs(MCRMerger) probably the same as} the other given merger. + * @param other the other merger + * @return returns true if the other merger is also a {@link MCRNameMerger} and if this merger has + * an alternative name that matches the other + */ + public boolean hasAlternativeNameSameAs(MCRMerger other) { + if (!(other instanceof MCRNameMerger)) { + return false; + } + return this.element.getChildren(ALTERNATIVE_NAME, MCRConstants.MODS_NAMESPACE) + .stream() + .map(MCRMergerFactory::buildFrom) + .anyMatch(altMerger -> altMerger.isProbablySameAs(other)); + } + + /** + * Merges the contents of the element wrapped by the other merger into a new alternativeName element + * in the element wrapped by this merger. Should only be called if this.isProbablySameAs(other). + * The alternative name is only added if the two names are not exactly the same and if the + * alternative name doesn't yet exist in the element wrapped by this merger. + * Only the family name and given names are merged into the alternativeName element. + * @param e the other merger + */ + public void mergeAsAlternativeName(MCRMerger e) { + if (!(e instanceof MCRNameMerger other)) { + return; + } + if (this.allNames.equals(other.allNames)) { + return; + } + if (this.hasAlternativeNameSameAs(e)) { + return; + } + Element alternativeName = new Element(ALTERNATIVE_NAME, MCRConstants.MODS_NAMESPACE); + + other.element.getChildren(NAME_PART, MCRConstants.MODS_NAMESPACE) + .stream() + .filter(namePart -> "given".equals(namePart.getAttributeValue(TYPE))) + .forEach(namePart -> { + Element altGivenName = new Element(NAME_PART, MCRConstants.MODS_NAMESPACE) + .setAttribute(TYPE, "given"); + altGivenName.addContent(namePart.getText()); + alternativeName.addContent(altGivenName); + }); + + Element altFamilyName = new Element(NAME_PART, MCRConstants.MODS_NAMESPACE) + .setAttribute(TYPE, "family"); + Element familyName = other.element.getChildren(NAME_PART, MCRConstants.MODS_NAMESPACE).stream() + .filter(namePart -> "family".equals(namePart.getAttributeValue(TYPE))) + .findFirst() + .orElse(null); + altFamilyName.addContent(familyName != null ? familyName.getText() : null); + alternativeName.addContent(altFamilyName); + + this.element.addContent(alternativeName); + } +} diff --git a/mycore-mods/src/main/java/org/mycore/mods/merger/MCRNameMerger.java b/mycore-mods/src/main/java/org/mycore/mods/merger/MCRNameMerger.java index 1e3b1ca989..b6fc871092 100644 --- a/mycore-mods/src/main/java/org/mycore/mods/merger/MCRNameMerger.java +++ b/mycore-mods/src/main/java/org/mycore/mods/merger/MCRNameMerger.java @@ -37,16 +37,20 @@ * @author Frank Lützenkirchen */ public class MCRNameMerger extends MCRMerger { + + private static final String TYPE = "type"; - private String familyName; + private static final String NAME_PART = "namePart"; - private Set givenNames = new HashSet<>(); + protected String familyName; - private Set initials = new HashSet<>(); + protected Set givenNames = new HashSet<>(); - private Set allNames = new HashSet<>(); + protected Set initials = new HashSet<>(); - private Map> nameIds = new HashMap<>(); + protected Set allNames = new HashSet<>(); + + protected Map> nameIds = new HashMap<>(); @Override public void setElement(Element element) { @@ -69,8 +73,8 @@ private void setFromDisplayForm(Element element) { } private void setFromNameParts(Element modsName) { - for (Element namePart : modsName.getChildren("namePart", MCRConstants.MODS_NAMESPACE)) { - String type = namePart.getAttributeValue("type"); + for (Element namePart : modsName.getChildren(NAME_PART, MCRConstants.MODS_NAMESPACE)) { + String type = namePart.getAttributeValue(TYPE); String nameFragment = namePart.getText().replaceAll("\\p{Zs}+", " "); if (Objects.equals(type, "family")) { @@ -83,7 +87,7 @@ private void setFromNameParts(Element modsName) { continue; } else if (Objects.equals(type, "termsOfAddress")) { continue; - } else if ("personal".equals(modsName.getAttributeValue("type"))) { + } else if ("personal".equals(modsName.getAttributeValue(TYPE))) { setFromCombinedName(nameFragment); } else { setFamilyName(nameFragment); @@ -185,13 +189,13 @@ public boolean isProbablySameAs(MCRMerger e) { } } - private boolean haveAtLeastOneCommon(Set a, Set b) { + protected boolean haveAtLeastOneCommon(Set a, Set b) { Set intersection = new HashSet<>(a); intersection.retainAll(b); return !intersection.isEmpty(); } - private boolean haveContradictingNameIds(Map> a, Map> b) { + protected boolean haveContradictingNameIds(Map> a, Map> b) { Set intersection; boolean foundContradictingNameIds = false; for (String type : a.keySet()) { @@ -210,7 +214,7 @@ private boolean haveContradictingNameIds(Map> a, Map ids; diff --git a/mycore-mods/src/main/resources/components/mods/config/mycore.properties b/mycore-mods/src/main/resources/components/mods/config/mycore.properties index a3ea58adc4..c1d1e853d3 100644 --- a/mycore-mods/src/main/resources/components/mods/config/mycore.properties +++ b/mycore-mods/src/main/resources/components/mods/config/mycore.properties @@ -155,6 +155,7 @@ MCR.MODS.Merger.relatedItem=org.mycore.mods.merger.MCRRelatedItemMerger MCR.MODS.Merger.classification=org.mycore.mods.merger.MCRCategoryMerger MCR.MODS.Merger.genre=org.mycore.mods.merger.MCRCategoryMerger MCR.MODS.Merger.typeOfResource=org.mycore.mods.merger.MCRCategoryMerger +MCR.MODS.Merger.alternativeName=org.mycore.mods.merger.MCRModspersonNameMerger MCR.MODS.Merger.default=org.mycore.mods.merger.MCRMerger # Maximum number of characters to compare from two abstracts diff --git a/mycore-mods/src/test/java/org/mycore/mods/merger/MCRModspersonNameMergerTest.java b/mycore-mods/src/test/java/org/mycore/mods/merger/MCRModspersonNameMergerTest.java new file mode 100644 index 0000000000..fbb5a608be --- /dev/null +++ b/mycore-mods/src/test/java/org/mycore/mods/merger/MCRModspersonNameMergerTest.java @@ -0,0 +1,145 @@ +package org.mycore.mods.merger; + +import org.jaxen.JaxenException; +import org.jdom2.Element; +import org.junit.jupiter.api.Test; +import org.mycore.common.MCRConstants; +import org.mycore.common.xml.MCRNodeBuilder; +import org.mycore.test.MyCoReTest; + +import java.util.Arrays; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +@MyCoReTest +public class MCRModspersonNameMergerTest { + + @Test + public void testIsProbablySameAs() throws Exception { + MCRNameMerger a = buildNameEntry("[mods:namePart='Thomas Müller']"); + MCRNameMerger b = buildNameEntry("[mods:namePart='thomas Mueller']"); + assertTrue(a.isProbablySameAs(b)); + + MCRNameMerger c = buildNameEntry("[mods:namePart='Muller, T.']"); + assertTrue(a.isProbablySameAs(c)); + + MCRNameMerger d = buildNameEntry("[mods:namePart='Mueller, T']"); + assertTrue(a.isProbablySameAs(d)); + + MCRNameMerger e = buildNameEntry("[mods:namePart='Müller, Egon']"); + assertFalse(a.isProbablySameAs(e)); + + MCRNameMerger f = buildNameEntry("[mods:namePart='Thorsten Mueller']"); + assertTrue(c.isProbablySameAs(f)); + assertFalse(a.isProbablySameAs(f)); + + MCRNameMerger g = buildNameEntry("[mods:namePart='Thorsten Egon Mueller']"); + assertTrue(e.isProbablySameAs(g)); + assertTrue(f.isProbablySameAs(g)); + + MCRNameMerger h = buildNameEntry( + "[mods:namePart[@type='given']='Thomas'][mods:namePart[@type='family']='Müller']"); + assertTrue(h.isProbablySameAs(a)); + assertTrue(h.isProbablySameAs(d)); + + MCRNameMerger i = buildNameEntry("[mods:namePart[@type='given']='T.'][mods:namePart[@type='family']='Müller']" + + "[mods:namePart[@type='termsOfAddress']='Jun.']"); + assertTrue(i.isProbablySameAs(h)); + assertTrue(i.isProbablySameAs(a)); + assertTrue(i.isProbablySameAs(d)); + + MCRNameMerger j = buildNameEntry("[mods:namePart='Thorsten Müller-Doppelname']"); + assertFalse(j.isProbablySameAs(a)); + assertFalse(j.isProbablySameAs(g)); + assertFalse(j.isProbablySameAs(i)); + assertTrue(j.isProbablySameAs(f)); + + MCRNameMerger k = buildNameEntry("[mods:namePart='Thomas Müller-Doppelname']"); + assertTrue(k.isProbablySameAs(a)); + assertTrue(k.isProbablySameAs(b)); + assertTrue(k.isProbablySameAs(h)); + assertFalse(k.isProbablySameAs(c)); + assertFalse(k.isProbablySameAs(d)); + assertFalse(k.isProbablySameAs(i)); + + assertThrows(NullPointerException.class, () -> new MCRModspersonNameMerger().setElement(null)); + } + + @Test + public void testHasAlternativeNameSameAs() throws JaxenException { + Element modsNameElement = new MCRNodeBuilder() + .buildElement("mods:name[@type='personal'][mods:namePart[@type='given']='Thomas']" + + "[mods:namePart[@type='family']='Müller']", null, null); + + Element altNameElement = buildAlternativeNameElement("Thomas", "Meyer"); + modsNameElement.addContent(altNameElement); + + MCRModspersonNameMerger a = new MCRModspersonNameMerger(); + a.setElement(modsNameElement); + MCRModspersonNameMerger b = buildNameEntry( + "[mods:namePart[@type='given']='Thomas'][mods:namePart[@type='family']='Meyer']"); + MCRModspersonNameMerger c = buildNameEntry( + "[mods:namePart[@type='given']='T'][mods:namePart[@type='family']='Meyer']"); + MCRModspersonNameMerger d = buildNameEntry( + "[mods:namePart[@type='given']='Thomas'][mods:namePart[@type='family']='Mayer']"); + + assertTrue(a.hasAlternativeNameSameAs(b)); + assertTrue(a.hasAlternativeNameSameAs(c)); + assertFalse(a.hasAlternativeNameSameAs(d)); + assertFalse(b.hasAlternativeNameSameAs(c)); + } + + @Test + public void testMergeAsAlternativeName() throws JaxenException { + MCRModspersonNameMerger a = buildNameEntry( + "[mods:namePart[@type='given']='Thomas'][mods:namePart[@type='family']='Müller']"); + MCRModspersonNameMerger b = buildNameEntry( + "[mods:namePart[@type='given']='Thomas'][mods:namePart[@type='family']='Meyer']"); + + a.mergeAsAlternativeName(b); + + Element mergedModsName = a.element; + + List nameParts = mergedModsName.getChildren("namePart", MCRConstants.MODS_NAMESPACE); + assertEquals(2, nameParts.size()); + + assertEquals("Thomas", nameParts.getFirst().getText()); + assertEquals("Müller", nameParts.get(1).getText()); + + List alternativeNames = mergedModsName.getChildren("alternativeName", MCRConstants.MODS_NAMESPACE); + assertEquals(1, alternativeNames.size()); + Element alternativeName = alternativeNames.getFirst(); + nameParts = alternativeName.getChildren("namePart", MCRConstants.MODS_NAMESPACE); + assertEquals(2, nameParts.size()); + + assertEquals("Thomas", nameParts.getFirst().getText()); + assertEquals("Meyer", nameParts.get(1).getText()); + } + + private MCRModspersonNameMerger buildNameEntry(String predicates) throws JaxenException { + Element modsName = new MCRNodeBuilder().buildElement("mods:name[@type='personal']" + predicates, null, null); + MCRModspersonNameMerger ne = new MCRModspersonNameMerger(); + ne.setElement(modsName); + return ne; + } + + private Element buildAlternativeNameElement(String givenName, String familyName) { + Element altNameElement = new Element("alternativeName", MCRConstants.MODS_NAMESPACE); + + Element altFamilyNameElement = new Element("namePart", MCRConstants.MODS_NAMESPACE); + altFamilyNameElement.setAttribute("type", "family"); + altFamilyNameElement.setText(familyName); + + Element altGivenNameElement = new Element("namePart", MCRConstants.MODS_NAMESPACE); + altGivenNameElement.setAttribute("type", "given"); + altGivenNameElement.setText(givenName); + + altNameElement.setContent(Arrays.asList(altFamilyNameElement, altGivenNameElement)); + + return altNameElement; + } +} diff --git a/mycore-mods/src/test/java/org/mycore/mods/merger/MCRNameMergerTest.java b/mycore-mods/src/test/java/org/mycore/mods/merger/MCRNameMergerTest.java index dd475bd8ee..9b5c32603f 100644 --- a/mycore-mods/src/test/java/org/mycore/mods/merger/MCRNameMergerTest.java +++ b/mycore-mods/src/test/java/org/mycore/mods/merger/MCRNameMergerTest.java @@ -30,7 +30,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.assertThrows; @MyCoReTest public class MCRNameMergerTest { @@ -69,12 +69,7 @@ public void testIsProbablySameAs() throws Exception { assertTrue(i.isProbablySameAs(a)); assertTrue(i.isProbablySameAs(d)); - try { - new MCRNameMerger().setElement(null); - fail("No name should result in NPE while creating a MCRNameMerger"); - } catch (NullPointerException ex) { - // exception excepted - } + assertThrows(NullPointerException.class, () -> new MCRNameMerger().setElement(null)); } @Test diff --git a/mycore-mods/src/test/resources/mycore.properties b/mycore-mods/src/test/resources/mycore.properties index 83ef0c6e96..570a257f7e 100644 --- a/mycore-mods/src/test/resources/mycore.properties +++ b/mycore-mods/src/test/resources/mycore.properties @@ -64,3 +64,5 @@ MCR.Category.XPathMapping.Pattern.title-contains=mods:titleInfo[mods:title[conta # MCRMODSLinkProviderTest#testHandleObjectCreated() MCR.Metadata.Type.modsperson=true +# MCRModspersonNameMergerTest#testHasAlternativeNameSameAs() +MCR.MODS.Merger.alternativeName=org.mycore.mods.merger.MCRModspersonNameMerger