Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package org.mycore.mods.merger;

import java.util.Objects;

import org.jdom2.Element;
import org.mycore.common.MCRConstants;

/**
* Compares and merges mods:name elements. Extends MCRNameMerger and provides
* additional functionality:
* <ol>
* <li>It can handle mods:alternativeName and considers them when determining
* if two names are probably the same.</li>
* <li>If first name is the same and one of the family names contains the other,
* the names are considered as "probablySameAs".
* This takes double-barreled names (Doppelnamen) into account.</li>
* </ol>
*/
public class MCRModspersonNameMerger extends MCRNameMerger {

private static final String ALTERNATIVE_NAME = "alternativeName";

private static final String TYPE = "type";

private static final String NAME_PART = "namePart";

@Override
public boolean isProbablySameAs(MCRMerger e) {
if (!(e instanceof MCRNameMerger other)) {
return false;
}

if (haveContradictingNameIds(this.nameIds, other.nameIds)) {
return false;
} else if (this.allNames.equals(other.allNames)) {
return true;
} else if (Objects.equals(familyName, other.familyName)) {
if (initials.isEmpty() && other.initials.isEmpty()) {
return true; // same family name, no given name, no initals, then assumed same
} else if (!haveAtLeastOneCommon(this.initials, other.initials)) {
return false;
} else if (this.givenNames.isEmpty() || other.givenNames.isEmpty()) {
return true;
} else {
return haveAtLeastOneCommon(this.givenNames, other.givenNames);
}
} else {
// double-barreled name with same given names assumes same
return this.givenNames.equals(other.givenNames) &&
(this.familyName.contains(other.familyName) || other.familyName.contains(this.familyName));
}
}

/**
* Checks if this merger has an alternativeName-element that is
* {@link MCRNameMerger#isProbablySameAs(MCRMerger) probably the same as} the other given merger.
* @param other the other merger
* @return returns true if the other merger is also a {@link MCRNameMerger} and if this merger has
* an alternative name that matches the other
*/
public boolean hasAlternativeNameSameAs(MCRMerger other) {
if (!(other instanceof MCRNameMerger)) {
return false;
}
return this.element.getChildren(ALTERNATIVE_NAME, MCRConstants.MODS_NAMESPACE)
.stream()
.map(MCRMergerFactory::buildFrom)
.anyMatch(altMerger -> altMerger.isProbablySameAs(other));
}

/**
* Merges the contents of the element wrapped by the other merger into a new alternativeName element
* in the element wrapped by this merger. Should only be called if this.isProbablySameAs(other).
* The alternative name is only added if the two names are not exactly the same and if the
* alternative name doesn't yet exist in the element wrapped by this merger.
* Only the family name and given names are merged into the alternativeName element.
* @param e the other merger
*/
public void mergeAsAlternativeName(MCRMerger e) {
if (!(e instanceof MCRNameMerger other)) {
return;
}
if (this.allNames.equals(other.allNames)) {
return;
}
if (this.hasAlternativeNameSameAs(e)) {
return;
}
Element alternativeName = new Element(ALTERNATIVE_NAME, MCRConstants.MODS_NAMESPACE);

other.element.getChildren(NAME_PART, MCRConstants.MODS_NAMESPACE)
.stream()
.filter(namePart -> "given".equals(namePart.getAttributeValue(TYPE)))
.forEach(namePart -> {
Element altGivenName = new Element(NAME_PART, MCRConstants.MODS_NAMESPACE)
.setAttribute(TYPE, "given");
altGivenName.addContent(namePart.getText());
alternativeName.addContent(altGivenName);
});

Element altFamilyName = new Element(NAME_PART, MCRConstants.MODS_NAMESPACE)
.setAttribute(TYPE, "family");
Element familyName = other.element.getChildren(NAME_PART, MCRConstants.MODS_NAMESPACE).stream()
.filter(namePart -> "family".equals(namePart.getAttributeValue(TYPE)))
.findFirst()
.orElse(null);
altFamilyName.addContent(familyName != null ? familyName.getText() : null);
alternativeName.addContent(altFamilyName);

this.element.addContent(alternativeName);
}
}
26 changes: 15 additions & 11 deletions mycore-mods/src/main/java/org/mycore/mods/merger/MCRNameMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,20 @@
* @author Frank Lützenkirchen
*/
public class MCRNameMerger extends MCRMerger {

private static final String TYPE = "type";

private String familyName;
private static final String NAME_PART = "namePart";

private Set<String> givenNames = new HashSet<>();
protected String familyName;

private Set<String> initials = new HashSet<>();
protected Set<String> givenNames = new HashSet<>();

private Set<String> allNames = new HashSet<>();
protected Set<String> initials = new HashSet<>();

private Map<String, Set<String>> nameIds = new HashMap<>();
protected Set<String> allNames = new HashSet<>();

protected Map<String, Set<String>> nameIds = new HashMap<>();

@Override
public void setElement(Element element) {
Expand All @@ -69,8 +73,8 @@ private void setFromDisplayForm(Element element) {
}

private void setFromNameParts(Element modsName) {
for (Element namePart : modsName.getChildren("namePart", MCRConstants.MODS_NAMESPACE)) {
String type = namePart.getAttributeValue("type");
for (Element namePart : modsName.getChildren(NAME_PART, MCRConstants.MODS_NAMESPACE)) {
String type = namePart.getAttributeValue(TYPE);
String nameFragment = namePart.getText().replaceAll("\\p{Zs}+", " ");

if (Objects.equals(type, "family")) {
Expand All @@ -83,7 +87,7 @@ private void setFromNameParts(Element modsName) {
continue;
} else if (Objects.equals(type, "termsOfAddress")) {
continue;
} else if ("personal".equals(modsName.getAttributeValue("type"))) {
} else if ("personal".equals(modsName.getAttributeValue(TYPE))) {
setFromCombinedName(nameFragment);
} else {
setFamilyName(nameFragment);
Expand Down Expand Up @@ -185,13 +189,13 @@ public boolean isProbablySameAs(MCRMerger e) {
}
}

private boolean haveAtLeastOneCommon(Set<String> a, Set<String> b) {
protected boolean haveAtLeastOneCommon(Set<String> a, Set<String> b) {
Set<String> intersection = new HashSet<>(a);
intersection.retainAll(b);
return !intersection.isEmpty();
}

private boolean haveContradictingNameIds(Map<String, Set<String>> a, Map<String, Set<String>> b) {
protected boolean haveContradictingNameIds(Map<String, Set<String>> a, Map<String, Set<String>> b) {
Set<String> intersection;
boolean foundContradictingNameIds = false;
for (String type : a.keySet()) {
Expand All @@ -210,7 +214,7 @@ private boolean haveContradictingNameIds(Map<String, Set<String>> a, Map<String,

private void collectNameIds(Element modsName) {
for (Element nameId : modsName.getChildren("nameIdentifier", MCRConstants.MODS_NAMESPACE)) {
String type = nameId.getAttributeValue("type");
String type = nameId.getAttributeValue(TYPE);
String id = nameId.getText();

Set<String> ids;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ MCR.MODS.Merger.relatedItem=org.mycore.mods.merger.MCRRelatedItemMerger
MCR.MODS.Merger.classification=org.mycore.mods.merger.MCRCategoryMerger
MCR.MODS.Merger.genre=org.mycore.mods.merger.MCRCategoryMerger
MCR.MODS.Merger.typeOfResource=org.mycore.mods.merger.MCRCategoryMerger
MCR.MODS.Merger.alternativeName=org.mycore.mods.merger.MCRModspersonNameMerger
MCR.MODS.Merger.default=org.mycore.mods.merger.MCRMerger

# Maximum number of characters to compare from two abstracts
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package org.mycore.mods.merger;

import org.jaxen.JaxenException;
import org.jdom2.Element;
import org.junit.jupiter.api.Test;
import org.mycore.common.MCRConstants;
import org.mycore.common.xml.MCRNodeBuilder;
import org.mycore.test.MyCoReTest;

import java.util.Arrays;
import java.util.List;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertThrows;

@MyCoReTest
public class MCRModspersonNameMergerTest {

@Test
public void testIsProbablySameAs() throws Exception {
MCRNameMerger a = buildNameEntry("[mods:namePart='Thomas Müller']");
MCRNameMerger b = buildNameEntry("[mods:namePart='thomas Mueller']");
assertTrue(a.isProbablySameAs(b));

MCRNameMerger c = buildNameEntry("[mods:namePart='Muller, T.']");
assertTrue(a.isProbablySameAs(c));

MCRNameMerger d = buildNameEntry("[mods:namePart='Mueller, T']");
assertTrue(a.isProbablySameAs(d));

MCRNameMerger e = buildNameEntry("[mods:namePart='Müller, Egon']");
assertFalse(a.isProbablySameAs(e));

MCRNameMerger f = buildNameEntry("[mods:namePart='Thorsten Mueller']");
assertTrue(c.isProbablySameAs(f));
assertFalse(a.isProbablySameAs(f));

MCRNameMerger g = buildNameEntry("[mods:namePart='Thorsten Egon Mueller']");
assertTrue(e.isProbablySameAs(g));
assertTrue(f.isProbablySameAs(g));

MCRNameMerger h = buildNameEntry(
"[mods:namePart[@type='given']='Thomas'][mods:namePart[@type='family']='Müller']");
assertTrue(h.isProbablySameAs(a));
assertTrue(h.isProbablySameAs(d));

MCRNameMerger i = buildNameEntry("[mods:namePart[@type='given']='T.'][mods:namePart[@type='family']='Müller']"
+ "[mods:namePart[@type='termsOfAddress']='Jun.']");
assertTrue(i.isProbablySameAs(h));
assertTrue(i.isProbablySameAs(a));
assertTrue(i.isProbablySameAs(d));

MCRNameMerger j = buildNameEntry("[mods:namePart='Thorsten Müller-Doppelname']");
assertFalse(j.isProbablySameAs(a));
assertFalse(j.isProbablySameAs(g));
assertFalse(j.isProbablySameAs(i));
assertTrue(j.isProbablySameAs(f));

MCRNameMerger k = buildNameEntry("[mods:namePart='Thomas Müller-Doppelname']");
assertTrue(k.isProbablySameAs(a));
assertTrue(k.isProbablySameAs(b));
assertTrue(k.isProbablySameAs(h));
assertFalse(k.isProbablySameAs(c));
assertFalse(k.isProbablySameAs(d));
assertFalse(k.isProbablySameAs(i));

assertThrows(NullPointerException.class, () -> new MCRModspersonNameMerger().setElement(null));
}

@Test
public void testHasAlternativeNameSameAs() throws JaxenException {
Element modsNameElement = new MCRNodeBuilder()
.buildElement("mods:name[@type='personal'][mods:namePart[@type='given']='Thomas']"
+ "[mods:namePart[@type='family']='Müller']", null, null);

Element altNameElement = buildAlternativeNameElement("Thomas", "Meyer");
modsNameElement.addContent(altNameElement);

MCRModspersonNameMerger a = new MCRModspersonNameMerger();
a.setElement(modsNameElement);
MCRModspersonNameMerger b = buildNameEntry(
"[mods:namePart[@type='given']='Thomas'][mods:namePart[@type='family']='Meyer']");
MCRModspersonNameMerger c = buildNameEntry(
"[mods:namePart[@type='given']='T'][mods:namePart[@type='family']='Meyer']");
MCRModspersonNameMerger d = buildNameEntry(
"[mods:namePart[@type='given']='Thomas'][mods:namePart[@type='family']='Mayer']");

assertTrue(a.hasAlternativeNameSameAs(b));
assertTrue(a.hasAlternativeNameSameAs(c));
assertFalse(a.hasAlternativeNameSameAs(d));
assertFalse(b.hasAlternativeNameSameAs(c));
}

@Test
public void testMergeAsAlternativeName() throws JaxenException {
MCRModspersonNameMerger a = buildNameEntry(
"[mods:namePart[@type='given']='Thomas'][mods:namePart[@type='family']='Müller']");
MCRModspersonNameMerger b = buildNameEntry(
"[mods:namePart[@type='given']='Thomas'][mods:namePart[@type='family']='Meyer']");

a.mergeAsAlternativeName(b);

Element mergedModsName = a.element;

List<Element> nameParts = mergedModsName.getChildren("namePart", MCRConstants.MODS_NAMESPACE);
assertEquals(2, nameParts.size());

assertEquals("Thomas", nameParts.getFirst().getText());
assertEquals("Müller", nameParts.get(1).getText());

List<Element> alternativeNames = mergedModsName.getChildren("alternativeName", MCRConstants.MODS_NAMESPACE);
assertEquals(1, alternativeNames.size());
Element alternativeName = alternativeNames.getFirst();
nameParts = alternativeName.getChildren("namePart", MCRConstants.MODS_NAMESPACE);
assertEquals(2, nameParts.size());

assertEquals("Thomas", nameParts.getFirst().getText());
assertEquals("Meyer", nameParts.get(1).getText());
}

private MCRModspersonNameMerger buildNameEntry(String predicates) throws JaxenException {
Element modsName = new MCRNodeBuilder().buildElement("mods:name[@type='personal']" + predicates, null, null);
MCRModspersonNameMerger ne = new MCRModspersonNameMerger();
ne.setElement(modsName);
return ne;
}

private Element buildAlternativeNameElement(String givenName, String familyName) {
Element altNameElement = new Element("alternativeName", MCRConstants.MODS_NAMESPACE);

Element altFamilyNameElement = new Element("namePart", MCRConstants.MODS_NAMESPACE);
altFamilyNameElement.setAttribute("type", "family");
altFamilyNameElement.setText(familyName);

Element altGivenNameElement = new Element("namePart", MCRConstants.MODS_NAMESPACE);
altGivenNameElement.setAttribute("type", "given");
altGivenNameElement.setText(givenName);

altNameElement.setContent(Arrays.asList(altFamilyNameElement, altGivenNameElement));

return altNameElement;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.api.Assertions.assertThrows;

@MyCoReTest
public class MCRNameMergerTest {
Expand Down Expand Up @@ -69,12 +69,7 @@ public void testIsProbablySameAs() throws Exception {
assertTrue(i.isProbablySameAs(a));
assertTrue(i.isProbablySameAs(d));

try {
new MCRNameMerger().setElement(null);
fail("No name should result in NPE while creating a MCRNameMerger");
} catch (NullPointerException ex) {
// exception excepted
}
assertThrows(NullPointerException.class, () -> new MCRNameMerger().setElement(null));
}

@Test
Expand Down
2 changes: 2 additions & 0 deletions mycore-mods/src/test/resources/mycore.properties
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,5 @@ MCR.Category.XPathMapping.Pattern.title-contains=mods:titleInfo[mods:title[conta

# MCRMODSLinkProviderTest#testHandleObjectCreated()
MCR.Metadata.Type.modsperson=true
# MCRModspersonNameMergerTest#testHasAlternativeNameSameAs()
MCR.MODS.Merger.alternativeName=org.mycore.mods.merger.MCRModspersonNameMerger