diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index 3366f2b..84fd30b 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -14,7 +14,7 @@ import eu.neverblink.jelly.core.proto.google.v1 as google import eu.neverblink.jelly.core.proto.v1.* import eu.neverblink.jelly.core.utils.IoUtils import org.apache.jena.riot.system.StreamRDFWriter -import org.apache.jena.riot.{Lang, RIOT} +import org.apache.jena.riot.RIOT import java.io.{BufferedReader, FileInputStream, InputStream, InputStreamReader, OutputStream} import scala.util.Using @@ -77,7 +77,7 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Readable] = RdfToJellyPrint val defaultAction: (InputStream, OutputStream) => Unit = - langToJelly(RdfFormat.NQuads.jenaLang, _, _) + langToJelly(RdfFormat.NQuads, _, _) private def loadOptionsFromFile(filename: String): RdfStreamOptions = val inputStream = new FileInputStream(filename) @@ -114,12 +114,12 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable override def matchFormatToAction( format: RdfFormat.Readable, ): Option[(InputStream, OutputStream) => Unit] = format match { - case f: RdfFormat.Jena.Readable => Some(langToJelly(f.jenaLang, _, _)) + case f: RdfFormat.Jena.Readable => Some(langToJelly(f, _, _)) case f: RdfFormat.JellyText.type => Some(jellyTextToJelly) } /** This method reads the file, rewrites it to Jelly and writes it to some output stream - * @param jenaLang + * @param format * Language that should be converted to Jelly * @param inputStream * InputStream @@ -127,7 +127,7 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable * OutputStream */ private def langToJelly( - jenaLang: Lang, + format: RdfFormat.Jena, inputStream: InputStream, outputStream: OutputStream, ): Unit = @@ -189,8 +189,8 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable JellyStreamWriter(JenaConverterFactory.getInstance(), variant, outputStream) RiotParserUtil.parse( - getOptions.rdfPerformanceOptions.validateTerms.getOrElse(false), - jenaLang, + getOptions.rdfPerformanceOptions.resolveIris, + format, inputStream, jellyWriter, ) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala index a99c85f..985ec46 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala @@ -250,8 +250,8 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]: val output = StreamRdfCollector() Using.resource(IoUtil.inputStream(fileName)) { is => RiotParserUtil.parse( - getOptions.rdfPerformanceOptions.validateTerms.getOrElse(true), - format.jenaLang, + getOptions.rdfPerformanceOptions.resolveIris, + format, is, output, ) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala index 2e3ddcd..3aae663 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala @@ -6,6 +6,7 @@ import org.apache.jena.riot.{Lang, RDFLanguages} sealed trait RdfFormat: val fullName: String val cliOptions: List[String] + val supportsBaseIri: Boolean object RdfFormat: @@ -29,6 +30,7 @@ object RdfFormat: override val fullName: String = "N-Quads" override val cliOptions: List[String] = List("nq", "nquads") override val jenaLang: Lang = RDFLanguages.NQUADS + override val supportsBaseIri: Boolean = false case object NTriples extends RdfFormat.Jena.StreamWriteable, @@ -37,16 +39,19 @@ object RdfFormat: override val fullName: String = "N-Triples" override val cliOptions: List[String] = List("nt", "ntriples") override val jenaLang: Lang = RDFLanguages.NTRIPLES + override val supportsBaseIri: Boolean = false case object Turtle extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable: override val fullName: String = "Turtle" override val cliOptions: List[String] = List("ttl", "turtle") override val jenaLang: Lang = RDFLanguages.TURTLE + override val supportsBaseIri: Boolean = true case object TriG extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable: override val fullName: String = "TriG" override val cliOptions: List[String] = List("trig") override val jenaLang: Lang = RDFLanguages.TRIG + override val supportsBaseIri: Boolean = true case object RdfProto extends RdfFormat.Jena.StreamWriteable, @@ -55,6 +60,7 @@ object RdfFormat: override val fullName: String = "RDF Protobuf" override val cliOptions: List[String] = List("jenaproto", "jena-proto") override val jenaLang: Lang = RDFLanguages.RDFPROTO + override val supportsBaseIri: Boolean = false case object Thrift extends RdfFormat.Jena.StreamWriteable, @@ -63,16 +69,19 @@ object RdfFormat: override val fullName: String = "RDF Thrift" override val cliOptions: List[String] = List("jenathrift", "jena-thrift") override val jenaLang: Lang = RDFLanguages.RDFTHRIFT + override val supportsBaseIri: Boolean = false case object RdfXml extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable: override val fullName: String = "RDF/XML" override val cliOptions: List[String] = List("rdfxml", "rdf-xml") override val jenaLang: Lang = RDFLanguages.RDFXML + override val supportsBaseIri: Boolean = true case object JsonLd extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable: override val fullName: String = "JSON-LD" override val cliOptions: List[String] = List("jsonld", "json-ld") override val jenaLang: Lang = RDFLanguages.JSONLD + override val supportsBaseIri: Boolean = true // We do not ever want to write or read from Jelly to Jelly // So better not have it as Writeable or Readable, just mark that it's integrated into Jena @@ -80,6 +89,7 @@ object RdfFormat: override val fullName: String = "Jelly binary" override val cliOptions: List[String] = List("jelly") override val jenaLang: Lang = JellyLanguage.JELLY + override val supportsBaseIri: Boolean = false case object JellyText extends RdfFormat, @@ -89,6 +99,7 @@ object RdfFormat: override val fullName: String = "Jelly text" override val cliOptions: List[String] = List("jelly-text") val extension = ".jelly.txt" + override val supportsBaseIri: Boolean = false private val rdfFormats: List[RdfFormat] = List(NQuads, NTriples, JellyBinary, JellyText, Turtle, TriG, RdfProto, Thrift, RdfXml, JsonLd) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala index 166c09f..454632b 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala @@ -6,7 +6,14 @@ import caseapp.HelpMessage */ case class RdfPerformanceOptions( @HelpMessage( - "Enable term validation and IRI resolution (slower). Default: false for all commands except 'rdf validate'.", + "Resolve IRIs with regard to the base specified in the input document. " + + "Disabling this will result in faster parsing of Turtle, JSON-LD and RDF/XML, but will " + + "also potentially result in relative IRIs in the output. " + + "Default: true (ignored for formats that don't support base IRIs).", + ) + resolveIris: Boolean = true, + @HelpMessage( + "Enable term validation (slower). Default: false for all commands except 'rdf validate'.", ) validateTerms: Option[Boolean] = None, ) diff --git a/src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala b/src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala index 2a7683b..483546a 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala @@ -1,12 +1,11 @@ package eu.neverblink.jelly.cli.util.jena import org.apache.jena.graph.impl.LiteralLabel -import org.apache.jena.irix.{IRIProviderAny, SystemIRIx} import scala.util.Try object JenaSystemOptions: - /** Enable faster parsing by disabling strict IRI and literal validation. + /** Enable faster parsing by disabling strict literal validation. * @return * A Success if the operation was successful, or a Failure with the exception if not. The * operation may fail in environments where reflection is not supported. The failure can be @@ -21,13 +20,9 @@ object JenaSystemOptions: toggle(true) private def toggle(enable: Boolean): Try[Unit] = - val valueMode = if enable then - SystemIRIx.reset() - "EAGER" - else - // Set the IRI provider to one that does no validation or resolving whatsoever - SystemIRIx.setProvider(IRIProviderAny.stringProvider()) - "LAZY" + val valueMode = + if enable then "EAGER" + else "LAZY" // Disable/enable eager computation of literal values, which does strict checking. // This requires reflection as the field is private static final. diff --git a/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/RiotParserUtil.scala b/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/RiotParserUtil.scala index 7004fa3..99d43e2 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/RiotParserUtil.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/RiotParserUtil.scala @@ -1,6 +1,8 @@ package eu.neverblink.jelly.cli.util.jena.riot -import org.apache.jena.riot.{Lang, RDFParser, RDFParserRegistry, RIOT} +import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat +import org.apache.jena.riot.lang.LabelToNode +import org.apache.jena.riot.{RDFParser, RDFParserRegistry, RIOT} import org.apache.jena.riot.system.StreamRDF import java.io.InputStream @@ -9,19 +11,24 @@ import java.io.InputStream */ object RiotParserUtil: def parse( - enableTermValidation: Boolean, - lang: Lang, + resolveIris: Boolean, + format: RdfFormat.Jena, source: InputStream, output: StreamRDF, - ): Unit = - if enableTermValidation then - // Standard parser with validation enabled + ): Unit = { + // Only really enable IRI resolution if the format supports it + if resolveIris && format.supportsBaseIri then + // Parser with full IRI resolution RDFParser.source(source) - .lang(lang) + .lang(format.jenaLang) + .labelToNode(LabelToNode.createUseLabelAsGiven()) + .checking(false) + .strict(false) .parse(output) else // Fast parser with validation disabled RDFParserRegistry - .getFactory(lang) - .create(lang, FastParserProfile()) - .read(source, "", lang.getContentType, output, RIOT.getContext) + .getFactory(format.jenaLang) + .create(format.jenaLang, FastParserProfile()) + .read(source, "", format.jenaLang.getContentType, output, RIOT.getContext) + } diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala index 9507e00..c9aace6 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala @@ -883,4 +883,56 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: ) } } + + "handle IRI resolution" when { + "IRI resolution enabled (default), input TTL stream" in withEmptyJellyFile { j => + val input = + """BASE + | . + |""".stripMargin + RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes)) + RdfToJelly.runTestCommand( + List("rdf", "to-jelly", "--in-format=ttl", "--to", j), + ) + val content = translateJellyBack(new FileInputStream(j)) + val stmts = content.listStatements().asScala.toSeq + stmts.size should be(1) + stmts.head.getSubject.getURI should be("http://example.org/a") + stmts.head.getPredicate.getURI should be("http://example.org/p") + stmts.head.getObject.asResource().getURI should be("http://example.org/b") + } + + "IRI resolution disabled, input TTL stream" in withEmptyJellyFile { j => + val input = + """BASE + | . + |""".stripMargin + RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes)) + RdfToJelly.runTestCommand( + List("rdf", "to-jelly", "--in-format=ttl", "--resolve-iris=false", "--to", j), + ) + val content = translateJellyBack(new FileInputStream(j)) + val stmts = content.listStatements().asScala.toSeq + stmts.size should be(1) + stmts.head.getSubject.getURI should be("a") + stmts.head.getPredicate.getURI should be("http://example.org/p") + stmts.head.getObject.asResource().getURI should be("b") + } + + "IRI resolution enabled (but ignored), input NT stream" in withEmptyJellyFile { j => + val input = + """ . + |""".stripMargin + RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes)) + RdfToJelly.runTestCommand( + List("rdf", "to-jelly", "--to", j), + ) + val content = translateJellyBack(new FileInputStream(j)) + val stmts = content.listStatements().asScala.toSeq + stmts.size should be(1) + stmts.head.getSubject.getURI should be("a") + stmts.head.getPredicate.getURI should be("http://example.org/p") + stmts.head.getObject.asResource().getURI should be("b") + } + } }