Skip to content

Commit 0b94e56

Browse files
authored
Fix BASE resolution for formats that support it (#222)
1 parent d1a7d17 commit 0b94e56

File tree

7 files changed

+101
-29
lines changed

7 files changed

+101
-29
lines changed

src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import eu.neverblink.jelly.core.proto.google.v1 as google
1414
import eu.neverblink.jelly.core.proto.v1.*
1515
import eu.neverblink.jelly.core.utils.IoUtils
1616
import org.apache.jena.riot.system.StreamRDFWriter
17-
import org.apache.jena.riot.{Lang, RIOT}
17+
import org.apache.jena.riot.RIOT
1818

1919
import java.io.{BufferedReader, FileInputStream, InputStream, InputStreamReader, OutputStream}
2020
import scala.util.Using
@@ -77,7 +77,7 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
7777
lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Readable] = RdfToJellyPrint
7878

7979
val defaultAction: (InputStream, OutputStream) => Unit =
80-
langToJelly(RdfFormat.NQuads.jenaLang, _, _)
80+
langToJelly(RdfFormat.NQuads, _, _)
8181

8282
private def loadOptionsFromFile(filename: String): RdfStreamOptions =
8383
val inputStream = new FileInputStream(filename)
@@ -114,20 +114,20 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
114114
override def matchFormatToAction(
115115
format: RdfFormat.Readable,
116116
): Option[(InputStream, OutputStream) => Unit] = format match {
117-
case f: RdfFormat.Jena.Readable => Some(langToJelly(f.jenaLang, _, _))
117+
case f: RdfFormat.Jena.Readable => Some(langToJelly(f, _, _))
118118
case f: RdfFormat.JellyText.type => Some(jellyTextToJelly)
119119
}
120120

121121
/** This method reads the file, rewrites it to Jelly and writes it to some output stream
122-
* @param jenaLang
122+
* @param format
123123
* Language that should be converted to Jelly
124124
* @param inputStream
125125
* InputStream
126126
* @param outputStream
127127
* OutputStream
128128
*/
129129
private def langToJelly(
130-
jenaLang: Lang,
130+
format: RdfFormat.Jena,
131131
inputStream: InputStream,
132132
outputStream: OutputStream,
133133
): Unit =
@@ -189,8 +189,8 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
189189
JellyStreamWriter(JenaConverterFactory.getInstance(), variant, outputStream)
190190

191191
RiotParserUtil.parse(
192-
getOptions.rdfPerformanceOptions.validateTerms.getOrElse(false),
193-
jenaLang,
192+
getOptions.rdfPerformanceOptions.resolveIris,
193+
format,
194194
inputStream,
195195
jellyWriter,
196196
)

src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,8 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]:
250250
val output = StreamRdfCollector()
251251
Using.resource(IoUtil.inputStream(fileName)) { is =>
252252
RiotParserUtil.parse(
253-
getOptions.rdfPerformanceOptions.validateTerms.getOrElse(true),
254-
format.jenaLang,
253+
getOptions.rdfPerformanceOptions.resolveIris,
254+
format,
255255
is,
256256
output,
257257
)

src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import org.apache.jena.riot.{Lang, RDFLanguages}
66
sealed trait RdfFormat:
77
val fullName: String
88
val cliOptions: List[String]
9+
val supportsBaseIri: Boolean
910

1011
object RdfFormat:
1112

@@ -29,6 +30,7 @@ object RdfFormat:
2930
override val fullName: String = "N-Quads"
3031
override val cliOptions: List[String] = List("nq", "nquads")
3132
override val jenaLang: Lang = RDFLanguages.NQUADS
33+
override val supportsBaseIri: Boolean = false
3234

3335
case object NTriples
3436
extends RdfFormat.Jena.StreamWriteable,
@@ -37,16 +39,19 @@ object RdfFormat:
3739
override val fullName: String = "N-Triples"
3840
override val cliOptions: List[String] = List("nt", "ntriples")
3941
override val jenaLang: Lang = RDFLanguages.NTRIPLES
42+
override val supportsBaseIri: Boolean = false
4043

4144
case object Turtle extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable:
4245
override val fullName: String = "Turtle"
4346
override val cliOptions: List[String] = List("ttl", "turtle")
4447
override val jenaLang: Lang = RDFLanguages.TURTLE
48+
override val supportsBaseIri: Boolean = true
4549

4650
case object TriG extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable:
4751
override val fullName: String = "TriG"
4852
override val cliOptions: List[String] = List("trig")
4953
override val jenaLang: Lang = RDFLanguages.TRIG
54+
override val supportsBaseIri: Boolean = true
5055

5156
case object RdfProto
5257
extends RdfFormat.Jena.StreamWriteable,
@@ -55,6 +60,7 @@ object RdfFormat:
5560
override val fullName: String = "RDF Protobuf"
5661
override val cliOptions: List[String] = List("jenaproto", "jena-proto")
5762
override val jenaLang: Lang = RDFLanguages.RDFPROTO
63+
override val supportsBaseIri: Boolean = false
5864

5965
case object Thrift
6066
extends RdfFormat.Jena.StreamWriteable,
@@ -63,23 +69,27 @@ object RdfFormat:
6369
override val fullName: String = "RDF Thrift"
6470
override val cliOptions: List[String] = List("jenathrift", "jena-thrift")
6571
override val jenaLang: Lang = RDFLanguages.RDFTHRIFT
72+
override val supportsBaseIri: Boolean = false
6673

6774
case object RdfXml extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
6875
override val fullName: String = "RDF/XML"
6976
override val cliOptions: List[String] = List("rdfxml", "rdf-xml")
7077
override val jenaLang: Lang = RDFLanguages.RDFXML
78+
override val supportsBaseIri: Boolean = true
7179

7280
case object JsonLd extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
7381
override val fullName: String = "JSON-LD"
7482
override val cliOptions: List[String] = List("jsonld", "json-ld")
7583
override val jenaLang: Lang = RDFLanguages.JSONLD
84+
override val supportsBaseIri: Boolean = true
7685

7786
// We do not ever want to write or read from Jelly to Jelly
7887
// So better not have it as Writeable or Readable, just mark that it's integrated into Jena
7988
case object JellyBinary extends RdfFormat.Jena, RdfFormat.SupportsGeneralizedRdf:
8089
override val fullName: String = "Jelly binary"
8190
override val cliOptions: List[String] = List("jelly")
8291
override val jenaLang: Lang = JellyLanguage.JELLY
92+
override val supportsBaseIri: Boolean = false
8393

8494
case object JellyText
8595
extends RdfFormat,
@@ -89,6 +99,7 @@ object RdfFormat:
8999
override val fullName: String = "Jelly text"
90100
override val cliOptions: List[String] = List("jelly-text")
91101
val extension = ".jelly.txt"
102+
override val supportsBaseIri: Boolean = false
92103

93104
private val rdfFormats: List[RdfFormat] =
94105
List(NQuads, NTriples, JellyBinary, JellyText, Turtle, TriG, RdfProto, Thrift, RdfXml, JsonLd)

src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,14 @@ import caseapp.HelpMessage
66
*/
77
case class RdfPerformanceOptions(
88
@HelpMessage(
9-
"Enable term validation and IRI resolution (slower). Default: false for all commands except 'rdf validate'.",
9+
"Resolve IRIs with regard to the base specified in the input document. " +
10+
"Disabling this will result in faster parsing of Turtle, JSON-LD and RDF/XML, but will " +
11+
"also potentially result in relative IRIs in the output. " +
12+
"Default: true (ignored for formats that don't support base IRIs).",
13+
)
14+
resolveIris: Boolean = true,
15+
@HelpMessage(
16+
"Enable term validation (slower). Default: false for all commands except 'rdf validate'.",
1017
)
1118
validateTerms: Option[Boolean] = None,
1219
)

src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
package eu.neverblink.jelly.cli.util.jena
22

33
import org.apache.jena.graph.impl.LiteralLabel
4-
import org.apache.jena.irix.{IRIProviderAny, SystemIRIx}
54

65
import scala.util.Try
76

87
object JenaSystemOptions:
9-
/** Enable faster parsing by disabling strict IRI and literal validation.
8+
/** Enable faster parsing by disabling strict literal validation.
109
* @return
1110
* A Success if the operation was successful, or a Failure with the exception if not. The
1211
* operation may fail in environments where reflection is not supported. The failure can be
@@ -21,13 +20,9 @@ object JenaSystemOptions:
2120
toggle(true)
2221

2322
private def toggle(enable: Boolean): Try[Unit] =
24-
val valueMode = if enable then
25-
SystemIRIx.reset()
26-
"EAGER"
27-
else
28-
// Set the IRI provider to one that does no validation or resolving whatsoever
29-
SystemIRIx.setProvider(IRIProviderAny.stringProvider())
30-
"LAZY"
23+
val valueMode =
24+
if enable then "EAGER"
25+
else "LAZY"
3126

3227
// Disable/enable eager computation of literal values, which does strict checking.
3328
// This requires reflection as the field is private static final.
Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package eu.neverblink.jelly.cli.util.jena.riot
22

3-
import org.apache.jena.riot.{Lang, RDFParser, RDFParserRegistry, RIOT}
3+
import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat
4+
import org.apache.jena.riot.lang.LabelToNode
5+
import org.apache.jena.riot.{RDFParser, RDFParserRegistry, RIOT}
46
import org.apache.jena.riot.system.StreamRDF
57

68
import java.io.InputStream
@@ -9,19 +11,24 @@ import java.io.InputStream
911
*/
1012
object RiotParserUtil:
1113
def parse(
12-
enableTermValidation: Boolean,
13-
lang: Lang,
14+
resolveIris: Boolean,
15+
format: RdfFormat.Jena,
1416
source: InputStream,
1517
output: StreamRDF,
16-
): Unit =
17-
if enableTermValidation then
18-
// Standard parser with validation enabled
18+
): Unit = {
19+
// Only really enable IRI resolution if the format supports it
20+
if resolveIris && format.supportsBaseIri then
21+
// Parser with full IRI resolution
1922
RDFParser.source(source)
20-
.lang(lang)
23+
.lang(format.jenaLang)
24+
.labelToNode(LabelToNode.createUseLabelAsGiven())
25+
.checking(false)
26+
.strict(false)
2127
.parse(output)
2228
else
2329
// Fast parser with validation disabled
2430
RDFParserRegistry
25-
.getFactory(lang)
26-
.create(lang, FastParserProfile())
27-
.read(source, "", lang.getContentType, output, RIOT.getContext)
31+
.getFactory(format.jenaLang)
32+
.create(format.jenaLang, FastParserProfile())
33+
.read(source, "", format.jenaLang.getContentType, output, RIOT.getContext)
34+
}

src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,4 +883,56 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers:
883883
)
884884
}
885885
}
886+
887+
"handle IRI resolution" when {
888+
"IRI resolution enabled (default), input TTL stream" in withEmptyJellyFile { j =>
889+
val input =
890+
"""BASE <http://example.org/>
891+
|<a> <http://example.org/p> <b> .
892+
|""".stripMargin
893+
RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes))
894+
RdfToJelly.runTestCommand(
895+
List("rdf", "to-jelly", "--in-format=ttl", "--to", j),
896+
)
897+
val content = translateJellyBack(new FileInputStream(j))
898+
val stmts = content.listStatements().asScala.toSeq
899+
stmts.size should be(1)
900+
stmts.head.getSubject.getURI should be("http://example.org/a")
901+
stmts.head.getPredicate.getURI should be("http://example.org/p")
902+
stmts.head.getObject.asResource().getURI should be("http://example.org/b")
903+
}
904+
905+
"IRI resolution disabled, input TTL stream" in withEmptyJellyFile { j =>
906+
val input =
907+
"""BASE <http://example.org/>
908+
|<a> <http://example.org/p> <b> .
909+
|""".stripMargin
910+
RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes))
911+
RdfToJelly.runTestCommand(
912+
List("rdf", "to-jelly", "--in-format=ttl", "--resolve-iris=false", "--to", j),
913+
)
914+
val content = translateJellyBack(new FileInputStream(j))
915+
val stmts = content.listStatements().asScala.toSeq
916+
stmts.size should be(1)
917+
stmts.head.getSubject.getURI should be("a")
918+
stmts.head.getPredicate.getURI should be("http://example.org/p")
919+
stmts.head.getObject.asResource().getURI should be("b")
920+
}
921+
922+
"IRI resolution enabled (but ignored), input NT stream" in withEmptyJellyFile { j =>
923+
val input =
924+
"""<a> <http://example.org/p> <b> .
925+
|""".stripMargin
926+
RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes))
927+
RdfToJelly.runTestCommand(
928+
List("rdf", "to-jelly", "--to", j),
929+
)
930+
val content = translateJellyBack(new FileInputStream(j))
931+
val stmts = content.listStatements().asScala.toSeq
932+
stmts.size should be(1)
933+
stmts.head.getSubject.getURI should be("a")
934+
stmts.head.getPredicate.getURI should be("http://example.org/p")
935+
stmts.head.getObject.asResource().getURI should be("b")
936+
}
937+
}
886938
}

0 commit comments

Comments
 (0)