Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 92 additions & 50 deletions src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Licensed to the Apache Software Foundation (ASF) under one
package gr.forth.ics.isl.x3ml.engine;

import gr.forth.ics.isl.x3ml.X3MLEngine;
import gr.forth.ics.isl.x3ml.engine.X3ML.GeneratedType;
import org.w3c.dom.Node;
import static gr.forth.ics.isl.x3ml.engine.X3ML.ArgValue;
import static gr.forth.ics.isl.x3ml.engine.X3ML.Condition;
Expand All @@ -32,8 +33,11 @@ Licensed to the Apache Software Foundation (ASF) under one
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Pattern;

import org.w3c.dom.Attr;
import static gr.forth.ics.isl.x3ml.X3MLEngine.exception;
Expand Down Expand Up @@ -170,9 +174,7 @@ public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMul
});
put(variable_deprecated,VariableScope.WITHIN_MAPPING, generatedValue);
context.putGeneratedValue(extractXPath(node) + unique+"-"+variable, generatedValue);
if(X3MLEngine.ENABLE_ASSOCIATION_TABLE){
this.createAssociationTable(generatedValue, null, extractAssocTableXPath(node));
}
this.createAssociationTable(generatedValue, generator, node);
}
}else{
// String nodeName = extractXPath(node) + unique+"-"+typeAwareVar;
Expand All @@ -197,21 +199,9 @@ public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMul
}
}
});
GeneratedValue genArg=null;
if(generator.getName().equalsIgnoreCase("Literal")){
genArg = context.policy().generate(generator, new Generator.ArgValues() {
@Override
public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMultipleValues) {
return context.input().evaluateArgument2(node, index, generator, name, sourceType);

}
});
}
log.debug("put generated value: {}\t{}", nodeName, generatedValue);
context.putGeneratedValue(nodeName, generatedValue);
if(X3MLEngine.ENABLE_ASSOCIATION_TABLE){
this.createAssociationTable(generatedValue, genArg, extractAssocTableXPath(node));
}
this.createAssociationTable(generatedValue, generator, node);
}
}
}
Expand Down Expand Up @@ -254,20 +244,8 @@ public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMul
}
}
});
GeneratedValue genArg=null;
if(generator.getName().equalsIgnoreCase("Literal")){
genArg = context.policy().generate(generator, new Generator.ArgValues() {
@Override
public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMultipleValues) {
return context.input().evaluateArgument2(node, index, generator, name, sourceType);

}
});
}
context.putGeneratedValue(nodeName, generatedValue);
if(X3MLEngine.ENABLE_ASSOCIATION_TABLE){
this.createAssociationTable(generatedValue, genArg, extractAssocTableXPath(node));
}
this.createAssociationTable(generatedValue, generator, node);
}
}
}
Expand Down Expand Up @@ -315,16 +293,6 @@ public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMul
}
}
});
GeneratedValue genArg=null;
if(generator.getName().equalsIgnoreCase("Literal")){
genArg = context.policy().generate(generator, new Generator.ArgValues() {
@Override
public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMultipleValues) {
return context.input().evaluateArgument2(node, index, generator, name, sourceType);

}
});
}
}

context.putGeneratedValue(nodeName, generatedValue);
Expand All @@ -338,21 +306,95 @@ public boolean conditionFails(Condition condition, GeneratorContext context) {
return condition != null && condition.failure(context);
}

private void createAssociationTable(GeneratedValue generatedValue, GeneratedValue generatedArg, String xpathProper){
String value="";
if(generatedValue.type == X3ML.GeneratedType.LITERAL){
value="\""+generatedValue.text+"\"";
private void createAssociationTable(GeneratedValue generatedValue, GeneratorElement generator, Node node){
if(X3MLEngine.ENABLE_ASSOCIATION_TABLE) {
String xpathProper = extractAssocTableXPath(node);

String value="";
if(generatedValue.type == GeneratedType.LITERAL || generatedValue.type == GeneratedType.TYPED_LITERAL) {
// we assume that there is argument named text for generators that generate Literal or Typed Literals
// and that this argument is of type xpath
String generatedArg =
generator.getArgs()
.stream()
.filter(arg -> SourceType.xpath.name().equals(arg.type))
.findFirst()
.map(arg -> this.rewriteArgXPath(arg.value))
.orElse(null);

value="\""+generatedValue.text+"\"";
if(generatedArg != null)
xpathProper+="/"+generatedArg;
else
xpathProper+="/text()";
}
else if(generatedValue.type == X3ML.GeneratedType.URI) {
value=generatedValue.text;
}

if(generatedArg!=null)
xpathProper+="/"+generatedArg.text;
else
xpathProper+="/text()";
}
else if(generatedValue.type == X3ML.GeneratedType.URI)
value=generatedValue.text;
if(xpathProper!=null){ //Needs a little more inspection this
AssociationTable.addEntry(xpathProper,value);
}
}
}

private final Pattern NUMERIC_INDEX_PATTERN = Pattern.compile(".*\\[\\d+\\]$");
private final Pattern FUNCTION_PATTERN = Pattern.compile(".*\\(.*\\)$");

/**
* In case of multiple intermediary elements we re-write xpath to always point to the first one
* because this is a default behaviour of non merging generators
*/
public String rewriteArgXPath(String xpath) {
// because we need to add [1] to every tag without index,
// but at the same time don't messup with function calls we are spliting xpath on "/"
// but doing this only if "/" is not inside function call or attribtue acces
List<String> segments = new ArrayList<>();
int lastSegmentStart = 0;
int bracketDepth = 0;
int parenthesisDepth = 0;

for (int i = 0; i < xpath.length(); i++) {
char ch = xpath.charAt(i);
if (ch == '[') {
bracketDepth++;
} else if (ch == ']') {
bracketDepth--;
} else if (ch == '(') {
parenthesisDepth++;
} else if (ch == ')') {
parenthesisDepth--;
} else if (ch == '/' && bracketDepth == 0 && parenthesisDepth == 0 && i != 0) {
// we are not inside function call or attribute access

// Check for double slash
if (i + 1 < xpath.length() && xpath.charAt(i + 1) == '/') {
i++; // Skip the next slash
}

// If i is not 0, add the substring excluding the slash
if (i != 0) {
segments.add(xpath.substring(lastSegmentStart, i));
}
lastSegmentStart = i + 1; // Move past the slash for the start of the next segment
}
}

segments.add(xpath.substring(lastSegmentStart)); // Add the last segment

for (int i = 0; i < segments.size(); i++) {
String segment = segments.get(i);
// Check if segment is not a function call, not a relative path,
// and does not already contain indexed access
if (!segment.isEmpty() && !segment.equals(".") && !segment.equals("..")
&& !NUMERIC_INDEX_PATTERN.matcher(segment).matches()
&& !FUNCTION_PATTERN.matcher(segment).matches()) {
segments.set(i, segment + "[1]");
}
}

// re-construct xpath
return String.join("/", segments);
}

/**Adds a new entry in the association table with the given XPATH expression and
Expand Down
52 changes: 0 additions & 52 deletions src/main/java/gr/forth/ics/isl/x3ml/engine/XPathInput.java
Original file line number Diff line number Diff line change
Expand Up @@ -151,58 +151,6 @@ public X3ML.ArgValue evaluateArgument(Node node, int index, GeneratorElement gen
}
return value;
}

public X3ML.ArgValue evaluateArgument2(Node node, int index, GeneratorElement generatorElement, String argName, SourceType defaultType) {
X3ML.GeneratorArg foundArg = null;
SourceType type = defaultType;
if (generatorElement.getArgs() != null) {
for (X3ML.GeneratorArg arg : generatorElement.getArgs()) {
if (arg.name == null) {
arg.name = "text";
}
if (arg.name.equals(argName)) {
foundArg = arg;
type = sourceType(arg.type, defaultType);
}
}

}
X3ML.ArgValue value = null;
switch (type) {

case xpath:
if (foundArg == null) {
return null;
}
String lang = getLanguageFromSource(node);
if (lang == null) {
lang = languageFromMapping;
}
if (!foundArg.value.isEmpty()) {
value = argVal( foundArg.value.replaceAll("/", "[1]/"), lang);
if (value.string.isEmpty()) {
throw exception("Empty result for arg " + foundArg.name + " at node " + node.getNodeName() + " in generator\n" + generatorElement);
}
}
break;
case constant:
if (foundArg == null) {
return null;
}
value = argVal(foundArg.value, languageFromMapping);
break;
case position:
value = argVal(String.valueOf(index), null);
break;
case entireInput:
value=argVal(this.getEntireXpathInput(), languageFromMapping);
entireInputExportedRefUri=domainURIForNamedgraps;
break;
default:
throw new RuntimeException("Not implemented");
}
return value;
}

/** Returns the value that can be found in the corresponding node, after the evaluation
* of the given XPath expression. More specifically it returns the results after
Expand Down
73 changes: 73 additions & 0 deletions src/test/java/eu/delving/x3ml/TestAssociationTable.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*==============================================================================
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
==============================================================================*/
package eu.delving.x3ml;

import static eu.delving.x3ml.AllTests.document;
import static eu.delving.x3ml.AllTests.engine;
import static eu.delving.x3ml.AllTests.policy;
import static eu.delving.x3ml.AllTests.resource;
import static org.junit.Assert.assertEquals;

import java.io.IOException;
import java.nio.charset.StandardCharsets;

import org.apache.commons.io.IOUtils;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

import gr.forth.ics.isl.x3ml.X3MLEngine;
import gr.forth.ics.isl.x3ml.engine.GeneratorContext;
import gr.forth.ics.isl.x3ml_reverse_utils.AssociationTable;

public class TestAssociationTable {

@BeforeClass
public static void setUp() {
X3MLEngine.ENABLE_ASSOCIATION_TABLE = true;
}

@AfterClass
public static void tearDown() {
// because this flag is a static variable we need to make sure that we reset it
// after we are done with association table tests
X3MLEngine.ENABLE_ASSOCIATION_TABLE = false;
}

@Before
public void before() {
AssociationTable.clearAssociationTable();
}

@Test
public void testCustomLiteralGenerator() throws IOException {
// test to check that proper xpath is generated not only for default Literal
// generator but also for a custom one like DateNormalizer
X3MLEngine engine = engine("/association_table/01_date-mappings.x3ml");
X3MLEngine.Output output = engine.execute(document("/association_table/01_date-input.xml"),
policy("/association_table/01_date-generator-policy.xml"));
output.close();

String expected = IOUtils.toString(
resource("/association_table/01_date-expected-association-table.xml"),
StandardCharsets.UTF_8);
assertEquals(expected, GeneratorContext.exportAssociationTableToString());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<associationTableEntries>
<associationTableEntry>
<xpath>/record[1]</xpath>
<value>http://vocab.getty.edu/aat/300133025</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]</xpath>
<value>https://artresearch.net/resource/frick/work/991013309269707141</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/controlfield[2]</xpath>
<value>https://artresearch.net/resource/frick/work/991013309269707141/production</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/controlfield[2]</xpath>
<value>https://artresearch.net/resource/frick/work/991013309269707141/production/timespan</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/controlfield[2]/substring(substring-after(text(), 'k'), 1, 4)</xpath>
<value>"1527-12-31T23:59:59"</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/controlfield[2]/substring(substring-after(text(), 'k'), 5, 4)</xpath>
<value>"1527-01-01T00:00:00"</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/datafield[2]/subfield[1]</xpath>
<value>https://artresearch.net/resource/frick/work/991013309269707141/production-timespan-appellation/F38A46F2-D8E8-3427-8010-70AA0BAB25FC</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/datafield[2]/subfield[1]/text()</xpath>
<value>"1527."</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/datafield[3]/subfield[5]</xpath>
<value>https://artresearch.net/resource/frick/type/5B02D2E1-391B-3BF8-9813-758D731C7183</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/datafield[3]/subfield[5]</xpath>
<value>https://artresearch.net/resource/frick/work/991013309269707141/acquisition/94D0DF61-CB3E-3D02-B669-4A6F2A4F189F</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/datafield[3]/subfield[5]/string-join((../subfield[@code="a"]/text(),"-"),' ')</xpath>
<value>"Location: Frick Collection, - Acquisition: Henry Clay Frick Bequest, - -"</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/datafield[4]/subfield[1]</xpath>
<value>https://artresearch.net/resource/frick/type/8E0AC9AA-3D9D-357A-984E-9B1E52105392</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/datafield[4]/subfield[1]</xpath>
<value>https://artresearch.net/resource/frick/work/991013309269707141/subject/54F23224-5B7B-3276-8C6D-16F231A0A4BB</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/datafield[4]/subfield[1]/../../datafield[@ind2="7" and @tag="650"][1]/subfield[@code="a"][1]/text()</xpath>
<value>"Portraits: Men: With hands: With hats: Head to right."</value>
</associationTableEntry>
<associationTableEntry>
<xpath>/record[1]/datafield[@tag="245"][1]/subfield[@code="a"][1]/text()</xpath>
<value>"Sir Thomas More."</value>
</associationTableEntry>
</associationTableEntries>
Loading