diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala
index 3366f2b..84fd30b 100644
--- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala
+++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala
@@ -14,7 +14,7 @@ import eu.neverblink.jelly.core.proto.google.v1 as google
import eu.neverblink.jelly.core.proto.v1.*
import eu.neverblink.jelly.core.utils.IoUtils
import org.apache.jena.riot.system.StreamRDFWriter
-import org.apache.jena.riot.{Lang, RIOT}
+import org.apache.jena.riot.RIOT
import java.io.{BufferedReader, FileInputStream, InputStream, InputStreamReader, OutputStream}
import scala.util.Using
@@ -77,7 +77,7 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Readable] = RdfToJellyPrint
val defaultAction: (InputStream, OutputStream) => Unit =
- langToJelly(RdfFormat.NQuads.jenaLang, _, _)
+ langToJelly(RdfFormat.NQuads, _, _)
private def loadOptionsFromFile(filename: String): RdfStreamOptions =
val inputStream = new FileInputStream(filename)
@@ -114,12 +114,12 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
override def matchFormatToAction(
format: RdfFormat.Readable,
): Option[(InputStream, OutputStream) => Unit] = format match {
- case f: RdfFormat.Jena.Readable => Some(langToJelly(f.jenaLang, _, _))
+ case f: RdfFormat.Jena.Readable => Some(langToJelly(f, _, _))
case f: RdfFormat.JellyText.type => Some(jellyTextToJelly)
}
/** This method reads the file, rewrites it to Jelly and writes it to some output stream
- * @param jenaLang
+ * @param format
* Language that should be converted to Jelly
* @param inputStream
* InputStream
@@ -127,7 +127,7 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
* OutputStream
*/
private def langToJelly(
- jenaLang: Lang,
+ format: RdfFormat.Jena,
inputStream: InputStream,
outputStream: OutputStream,
): Unit =
@@ -189,8 +189,8 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
JellyStreamWriter(JenaConverterFactory.getInstance(), variant, outputStream)
RiotParserUtil.parse(
- getOptions.rdfPerformanceOptions.validateTerms.getOrElse(false),
- jenaLang,
+ getOptions.rdfPerformanceOptions.resolveIris,
+ format,
inputStream,
jellyWriter,
)
diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala
index a99c85f..985ec46 100644
--- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala
+++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala
@@ -250,8 +250,8 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]:
val output = StreamRdfCollector()
Using.resource(IoUtil.inputStream(fileName)) { is =>
RiotParserUtil.parse(
- getOptions.rdfPerformanceOptions.validateTerms.getOrElse(true),
- format.jenaLang,
+ getOptions.rdfPerformanceOptions.resolveIris,
+ format,
is,
output,
)
diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala
index 2e3ddcd..3aae663 100644
--- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala
+++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala
@@ -6,6 +6,7 @@ import org.apache.jena.riot.{Lang, RDFLanguages}
sealed trait RdfFormat:
val fullName: String
val cliOptions: List[String]
+ val supportsBaseIri: Boolean
object RdfFormat:
@@ -29,6 +30,7 @@ object RdfFormat:
override val fullName: String = "N-Quads"
override val cliOptions: List[String] = List("nq", "nquads")
override val jenaLang: Lang = RDFLanguages.NQUADS
+ override val supportsBaseIri: Boolean = false
case object NTriples
extends RdfFormat.Jena.StreamWriteable,
@@ -37,16 +39,19 @@ object RdfFormat:
override val fullName: String = "N-Triples"
override val cliOptions: List[String] = List("nt", "ntriples")
override val jenaLang: Lang = RDFLanguages.NTRIPLES
+ override val supportsBaseIri: Boolean = false
case object Turtle extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable:
override val fullName: String = "Turtle"
override val cliOptions: List[String] = List("ttl", "turtle")
override val jenaLang: Lang = RDFLanguages.TURTLE
+ override val supportsBaseIri: Boolean = true
case object TriG extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable:
override val fullName: String = "TriG"
override val cliOptions: List[String] = List("trig")
override val jenaLang: Lang = RDFLanguages.TRIG
+ override val supportsBaseIri: Boolean = true
case object RdfProto
extends RdfFormat.Jena.StreamWriteable,
@@ -55,6 +60,7 @@ object RdfFormat:
override val fullName: String = "RDF Protobuf"
override val cliOptions: List[String] = List("jenaproto", "jena-proto")
override val jenaLang: Lang = RDFLanguages.RDFPROTO
+ override val supportsBaseIri: Boolean = false
case object Thrift
extends RdfFormat.Jena.StreamWriteable,
@@ -63,16 +69,19 @@ object RdfFormat:
override val fullName: String = "RDF Thrift"
override val cliOptions: List[String] = List("jenathrift", "jena-thrift")
override val jenaLang: Lang = RDFLanguages.RDFTHRIFT
+ override val supportsBaseIri: Boolean = false
case object RdfXml extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
override val fullName: String = "RDF/XML"
override val cliOptions: List[String] = List("rdfxml", "rdf-xml")
override val jenaLang: Lang = RDFLanguages.RDFXML
+ override val supportsBaseIri: Boolean = true
case object JsonLd extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
override val fullName: String = "JSON-LD"
override val cliOptions: List[String] = List("jsonld", "json-ld")
override val jenaLang: Lang = RDFLanguages.JSONLD
+ override val supportsBaseIri: Boolean = true
// We do not ever want to write or read from Jelly to Jelly
// So better not have it as Writeable or Readable, just mark that it's integrated into Jena
@@ -80,6 +89,7 @@ object RdfFormat:
override val fullName: String = "Jelly binary"
override val cliOptions: List[String] = List("jelly")
override val jenaLang: Lang = JellyLanguage.JELLY
+ override val supportsBaseIri: Boolean = false
case object JellyText
extends RdfFormat,
@@ -89,6 +99,7 @@ object RdfFormat:
override val fullName: String = "Jelly text"
override val cliOptions: List[String] = List("jelly-text")
val extension = ".jelly.txt"
+ override val supportsBaseIri: Boolean = false
private val rdfFormats: List[RdfFormat] =
List(NQuads, NTriples, JellyBinary, JellyText, Turtle, TriG, RdfProto, Thrift, RdfXml, JsonLd)
diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala
index 166c09f..454632b 100644
--- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala
+++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala
@@ -6,7 +6,14 @@ import caseapp.HelpMessage
*/
case class RdfPerformanceOptions(
@HelpMessage(
- "Enable term validation and IRI resolution (slower). Default: false for all commands except 'rdf validate'.",
+ "Resolve IRIs with regard to the base specified in the input document. " +
+ "Disabling this will result in faster parsing of Turtle, JSON-LD and RDF/XML, but will " +
+ "also potentially result in relative IRIs in the output. " +
+ "Default: true (ignored for formats that don't support base IRIs).",
+ )
+ resolveIris: Boolean = true,
+ @HelpMessage(
+ "Enable term validation (slower). Default: false for all commands except 'rdf validate'.",
)
validateTerms: Option[Boolean] = None,
)
diff --git a/src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala b/src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala
index 2a7683b..483546a 100644
--- a/src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala
+++ b/src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala
@@ -1,12 +1,11 @@
package eu.neverblink.jelly.cli.util.jena
import org.apache.jena.graph.impl.LiteralLabel
-import org.apache.jena.irix.{IRIProviderAny, SystemIRIx}
import scala.util.Try
object JenaSystemOptions:
- /** Enable faster parsing by disabling strict IRI and literal validation.
+ /** Enable faster parsing by disabling strict literal validation.
* @return
* A Success if the operation was successful, or a Failure with the exception if not. The
* operation may fail in environments where reflection is not supported. The failure can be
@@ -21,13 +20,9 @@ object JenaSystemOptions:
toggle(true)
private def toggle(enable: Boolean): Try[Unit] =
- val valueMode = if enable then
- SystemIRIx.reset()
- "EAGER"
- else
- // Set the IRI provider to one that does no validation or resolving whatsoever
- SystemIRIx.setProvider(IRIProviderAny.stringProvider())
- "LAZY"
+ val valueMode =
+ if enable then "EAGER"
+ else "LAZY"
// Disable/enable eager computation of literal values, which does strict checking.
// This requires reflection as the field is private static final.
diff --git a/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/RiotParserUtil.scala b/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/RiotParserUtil.scala
index 7004fa3..99d43e2 100644
--- a/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/RiotParserUtil.scala
+++ b/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/RiotParserUtil.scala
@@ -1,6 +1,8 @@
package eu.neverblink.jelly.cli.util.jena.riot
-import org.apache.jena.riot.{Lang, RDFParser, RDFParserRegistry, RIOT}
+import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat
+import org.apache.jena.riot.lang.LabelToNode
+import org.apache.jena.riot.{RDFParser, RDFParserRegistry, RIOT}
import org.apache.jena.riot.system.StreamRDF
import java.io.InputStream
@@ -9,19 +11,24 @@ import java.io.InputStream
*/
object RiotParserUtil:
def parse(
- enableTermValidation: Boolean,
- lang: Lang,
+ resolveIris: Boolean,
+ format: RdfFormat.Jena,
source: InputStream,
output: StreamRDF,
- ): Unit =
- if enableTermValidation then
- // Standard parser with validation enabled
+ ): Unit = {
+ // Only really enable IRI resolution if the format supports it
+ if resolveIris && format.supportsBaseIri then
+ // Parser with full IRI resolution
RDFParser.source(source)
- .lang(lang)
+ .lang(format.jenaLang)
+ .labelToNode(LabelToNode.createUseLabelAsGiven())
+ .checking(false)
+ .strict(false)
.parse(output)
else
// Fast parser with validation disabled
RDFParserRegistry
- .getFactory(lang)
- .create(lang, FastParserProfile())
- .read(source, "", lang.getContentType, output, RIOT.getContext)
+ .getFactory(format.jenaLang)
+ .create(format.jenaLang, FastParserProfile())
+ .read(source, "", format.jenaLang.getContentType, output, RIOT.getContext)
+ }
diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala
index 9507e00..c9aace6 100644
--- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala
+++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala
@@ -883,4 +883,56 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers:
)
}
}
+
+ "handle IRI resolution" when {
+ "IRI resolution enabled (default), input TTL stream" in withEmptyJellyFile { j =>
+ val input =
+ """BASE
+ | .
+ |""".stripMargin
+ RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes))
+ RdfToJelly.runTestCommand(
+ List("rdf", "to-jelly", "--in-format=ttl", "--to", j),
+ )
+ val content = translateJellyBack(new FileInputStream(j))
+ val stmts = content.listStatements().asScala.toSeq
+ stmts.size should be(1)
+ stmts.head.getSubject.getURI should be("http://example.org/a")
+ stmts.head.getPredicate.getURI should be("http://example.org/p")
+ stmts.head.getObject.asResource().getURI should be("http://example.org/b")
+ }
+
+ "IRI resolution disabled, input TTL stream" in withEmptyJellyFile { j =>
+ val input =
+ """BASE
+ | .
+ |""".stripMargin
+ RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes))
+ RdfToJelly.runTestCommand(
+ List("rdf", "to-jelly", "--in-format=ttl", "--resolve-iris=false", "--to", j),
+ )
+ val content = translateJellyBack(new FileInputStream(j))
+ val stmts = content.listStatements().asScala.toSeq
+ stmts.size should be(1)
+ stmts.head.getSubject.getURI should be("a")
+ stmts.head.getPredicate.getURI should be("http://example.org/p")
+ stmts.head.getObject.asResource().getURI should be("b")
+ }
+
+ "IRI resolution enabled (but ignored), input NT stream" in withEmptyJellyFile { j =>
+ val input =
+ """ .
+ |""".stripMargin
+ RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes))
+ RdfToJelly.runTestCommand(
+ List("rdf", "to-jelly", "--to", j),
+ )
+ val content = translateJellyBack(new FileInputStream(j))
+ val stmts = content.listStatements().asScala.toSeq
+ stmts.size should be(1)
+ stmts.head.getSubject.getURI should be("a")
+ stmts.head.getPredicate.getURI should be("http://example.org/p")
+ stmts.head.getObject.asResource().getURI should be("b")
+ }
+ }
}