From 7b65d7cded5f7843ce9274145449de1089c52a17 Mon Sep 17 00:00:00 2001 From: niegrzybkowski Date: Fri, 18 Jul 2025 17:45:17 +0200 Subject: [PATCH 1/7] Initial implementation --- .../jelly/cli/command/rdf/RdfToJelly.scala | 16 ++++++++++++++-- .../util/RdfJellySerializationOptions.scala | 9 ++++++++- .../cli/command/rdf/RdfToJellySpec.scala | 19 +++++++++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index cb67aff..9e4a79f 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -10,12 +10,12 @@ import eu.neverblink.jelly.convert.jena.JenaConverterFactory import eu.neverblink.jelly.convert.jena.riot.{JellyFormatVariant, JellyLanguage, JellyStreamWriter} import eu.neverblink.jelly.core.{JellyOptions, RdfProtoDeserializationError} import eu.neverblink.jelly.core.proto.google.v1 as google -import eu.neverblink.jelly.core.proto.v1.{LogicalStreamType, PhysicalStreamType, RdfStreamOptions} +import eu.neverblink.jelly.core.proto.v1.{LogicalStreamType, PhysicalStreamType, RdfStreamFrame, RdfStreamOptions} import org.apache.jena.riot.lang.LabelToNode import org.apache.jena.riot.system.StreamRDFWriter import org.apache.jena.riot.{Lang, RDFParser, RIOT} -import java.io.{BufferedReader, InputStream, InputStreamReader, OutputStream} +import java.io.{BufferedReader, FileInputStream, InputStream, InputStreamReader, OutputStream} import scala.util.Using object RdfToJellyPrint extends RdfCommandPrintUtil[RdfFormat.Readable]: @@ -45,6 +45,10 @@ case class RdfToJellyOptions( @ExtraName("in-format") inputFormat: Option[String] = None, @Recurse jellySerializationOptions: RdfJellySerializationOptions = RdfJellySerializationOptions(), + @HelpMessage( + "Jelly file to load and copy serialization options from." + ) + optionsFrom: Option[String] = None, @HelpMessage( "Target number of rows per frame – the writer may slightly exceed that. Default: 256", ) @@ -72,8 +76,16 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable val defaultAction: (InputStream, OutputStream) => Unit = langToJelly(RdfFormat.NQuads.jenaLang, _, _) + def loadOptionsFromFile(filename: String): RdfStreamOptions = + val inputStream = new FileInputStream(filename) + val frame = Using(inputStream) {content => + RdfStreamFrame.parseDelimitedFrom(content) + } + frame.get.getRows.iterator().next().getOptions + override def doRun(options: RdfToJellyOptions, remainingArgs: RemainingArgs): Unit = // Infer before touching options + options.optionsFrom.map(loadOptionsFromFile).foreach(options.jellySerializationOptions.setOptions) options.jellySerializationOptions.inferGeneralized( options.inputFormat, remainingArgs.remaining.headOption, diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala index 6bf33d3..88be5b2 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala @@ -41,8 +41,13 @@ case class RdfJellySerializationOptions( `opt.logicalType`: Option[String] = None, ): private object inferred: + var options: Option[RdfStreamOptions] = None var generalized: Boolean = false + def setOptions(rdfStreamOptions: RdfStreamOptions): Unit = inferred.options = Some(rdfStreamOptions) + + + def inferGeneralized(inputFormat: Option[String], filename: Option[String]): Unit = val explicitFormat = inputFormat.flatMap(RdfFormat.find) val implicitFormat = filename.flatMap(RdfFormat.inferFormat) @@ -52,7 +57,7 @@ case class RdfJellySerializationOptions( case _ => false } - lazy val asRdfStreamOptions: RdfStreamOptions = + private def makeStreamOptions(): RdfStreamOptions = val logicalIri = `opt.logicalType` .map(_.trim).filter(_.nonEmpty) .map { @@ -92,3 +97,5 @@ case class RdfJellySerializationOptions( .setMaxDatatypeTableSize(`opt.maxDatatypeTableSize`) .setPhysicalType(physicalType) .setLogicalType(logicalType.getOrElse(LogicalStreamType.UNSPECIFIED)) + + lazy val asRdfStreamOptions: RdfStreamOptions = inferred.options.getOrElse(makeStreamOptions()) diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala index e098417..cf7f1bc 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala @@ -454,6 +454,25 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: }, jenaLang = RDFLanguages.NTRIPLES, ) + + "loading options from another file" in withSpecificJellyFile( optionsFile => withFullJenaFile(jenaFile => { + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + "--options-from", + optionsFile, + jenaFile, + ), + ) + val frames = readJellyFile(new FileInputStream(optionsFile)) + val opts = frames.head.getRows.asScala.head.getOptions + val newFrames = readJellyFile(new ByteArrayInputStream(RdfToJelly.getOutBytes)) + val newOpts = newFrames.head.getRows.asScala.head.getOptions + opts should equal(newOpts) + + }, jenaLang = RDFLanguages.NTRIPLES), + fileName = "options2.jelly") } "Turtle" in { val input = DataGenHelper.generateJenaInputStream(testCardinality, RDFLanguages.TURTLE) From 68741e50788c9610b8e47071d4c860c55c2edbc3 Mon Sep 17 00:00:00 2001 From: niegrzybkowski Date: Mon, 21 Jul 2025 08:46:34 +0200 Subject: [PATCH 2/7] Formatting --- .../jelly/cli/command/rdf/RdfToJelly.scala | 10 +++-- .../util/RdfJellySerializationOptions.scala | 6 +-- .../cli/command/rdf/RdfToJellySpec.scala | 42 +++++++++++-------- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index 9e4a79f..8451c44 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -10,7 +10,7 @@ import eu.neverblink.jelly.convert.jena.JenaConverterFactory import eu.neverblink.jelly.convert.jena.riot.{JellyFormatVariant, JellyLanguage, JellyStreamWriter} import eu.neverblink.jelly.core.{JellyOptions, RdfProtoDeserializationError} import eu.neverblink.jelly.core.proto.google.v1 as google -import eu.neverblink.jelly.core.proto.v1.{LogicalStreamType, PhysicalStreamType, RdfStreamFrame, RdfStreamOptions} +import eu.neverblink.jelly.core.proto.v1.* import org.apache.jena.riot.lang.LabelToNode import org.apache.jena.riot.system.StreamRDFWriter import org.apache.jena.riot.{Lang, RDFParser, RIOT} @@ -46,7 +46,7 @@ case class RdfToJellyOptions( @Recurse jellySerializationOptions: RdfJellySerializationOptions = RdfJellySerializationOptions(), @HelpMessage( - "Jelly file to load and copy serialization options from." + "Jelly file to load and copy serialization options from.", ) optionsFrom: Option[String] = None, @HelpMessage( @@ -78,14 +78,16 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable def loadOptionsFromFile(filename: String): RdfStreamOptions = val inputStream = new FileInputStream(filename) - val frame = Using(inputStream) {content => + val frame = Using(inputStream) { content => RdfStreamFrame.parseDelimitedFrom(content) } frame.get.getRows.iterator().next().getOptions override def doRun(options: RdfToJellyOptions, remainingArgs: RemainingArgs): Unit = // Infer before touching options - options.optionsFrom.map(loadOptionsFromFile).foreach(options.jellySerializationOptions.setOptions) + options.optionsFrom.map(loadOptionsFromFile).foreach( + options.jellySerializationOptions.setOptions, + ) options.jellySerializationOptions.inferGeneralized( options.inputFormat, remainingArgs.remaining.headOption, diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala index 88be5b2..8bcb7ec 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala @@ -44,9 +44,9 @@ case class RdfJellySerializationOptions( var options: Option[RdfStreamOptions] = None var generalized: Boolean = false - def setOptions(rdfStreamOptions: RdfStreamOptions): Unit = inferred.options = Some(rdfStreamOptions) - - + def setOptions(rdfStreamOptions: RdfStreamOptions): Unit = inferred.options = Some( + rdfStreamOptions, + ) def inferGeneralized(inputFormat: Option[String], filename: Option[String]): Unit = val explicitFormat = inputFormat.flatMap(RdfFormat.find) diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala index cf7f1bc..6d7a463 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala @@ -455,24 +455,30 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: jenaLang = RDFLanguages.NTRIPLES, ) - "loading options from another file" in withSpecificJellyFile( optionsFile => withFullJenaFile(jenaFile => { - RdfToJelly.runTestCommand( - List( - "rdf", - "to-jelly", - "--options-from", - optionsFile, - jenaFile, - ), - ) - val frames = readJellyFile(new FileInputStream(optionsFile)) - val opts = frames.head.getRows.asScala.head.getOptions - val newFrames = readJellyFile(new ByteArrayInputStream(RdfToJelly.getOutBytes)) - val newOpts = newFrames.head.getRows.asScala.head.getOptions - opts should equal(newOpts) - - }, jenaLang = RDFLanguages.NTRIPLES), - fileName = "options2.jelly") + "loading options from another file" in withSpecificJellyFile( + optionsFile => + withFullJenaFile( + jenaFile => { + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + "--options-from", + optionsFile, + jenaFile, + ), + ) + val frames = readJellyFile(new FileInputStream(optionsFile)) + val opts = frames.head.getRows.asScala.head.getOptions + val newFrames = readJellyFile(new ByteArrayInputStream(RdfToJelly.getOutBytes)) + val newOpts = newFrames.head.getRows.asScala.head.getOptions + opts should equal(newOpts) + + }, + jenaLang = RDFLanguages.NTRIPLES, + ), + fileName = "options2.jelly", + ) } "Turtle" in { val input = DataGenHelper.generateJenaInputStream(testCardinality, RDFLanguages.TURTLE) From 515ef3323d75f265d9e3f2aafa93235979199c0c Mon Sep 17 00:00:00 2001 From: niegrzybkowski Date: Mon, 21 Jul 2025 08:50:23 +0200 Subject: [PATCH 3/7] Whoops --- .../eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala index 6d7a463..3b448f3 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala @@ -477,7 +477,7 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: }, jenaLang = RDFLanguages.NTRIPLES, ), - fileName = "options2.jelly", + fileName = "options.jelly", ) } "Turtle" in { From d90c50c30ccfe1c7a7540e87baa8bb6d5a33e957 Mon Sep 17 00:00:00 2001 From: niegrzybkowski Date: Tue, 22 Jul 2025 14:19:23 +0200 Subject: [PATCH 4/7] Add delimiting detection from option file --- .../neverblink/jelly/cli/command/rdf/RdfToJelly.scala | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index 8451c44..969eb2d 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -11,6 +11,7 @@ import eu.neverblink.jelly.convert.jena.riot.{JellyFormatVariant, JellyLanguage, import eu.neverblink.jelly.core.{JellyOptions, RdfProtoDeserializationError} import eu.neverblink.jelly.core.proto.google.v1 as google import eu.neverblink.jelly.core.proto.v1.* +import eu.neverblink.jelly.core.utils.IoUtils import org.apache.jena.riot.lang.LabelToNode import org.apache.jena.riot.system.StreamRDFWriter import org.apache.jena.riot.{Lang, RDFParser, RIOT} @@ -76,11 +77,13 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable val defaultAction: (InputStream, OutputStream) => Unit = langToJelly(RdfFormat.NQuads.jenaLang, _, _) - def loadOptionsFromFile(filename: String): RdfStreamOptions = + private def loadOptionsFromFile(filename: String): RdfStreamOptions = val inputStream = new FileInputStream(filename) - val frame = Using(inputStream) { content => - RdfStreamFrame.parseDelimitedFrom(content) - } + val response = IoUtils.autodetectDelimiting(inputStream) + val frame = + if response.isDelimited then Using(response.newInput())(RdfStreamFrame.parseDelimitedFrom) + else Using(response.newInput())(RdfStreamFrame.parseFrom) + frame.get.getRows.iterator().next().getOptions override def doRun(options: RdfToJellyOptions, remainingArgs: RemainingArgs): Unit = From 67429a3d1c69d63840de45e41c235494ef9441e7 Mon Sep 17 00:00:00 2001 From: niegrzybkowski Date: Tue, 22 Jul 2025 14:23:55 +0200 Subject: [PATCH 5/7] Change the default behavior to allow overriding options loaded from a file --- .../jelly/cli/command/rdf/RdfToJelly.scala | 2 +- .../util/RdfJellySerializationOptions.scala | 73 ++++++++++++++----- 2 files changed, 57 insertions(+), 18 deletions(-) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index 969eb2d..0eaa138 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -47,7 +47,7 @@ case class RdfToJellyOptions( @Recurse jellySerializationOptions: RdfJellySerializationOptions = RdfJellySerializationOptions(), @HelpMessage( - "Jelly file to load and copy serialization options from.", + "Jelly file to copy serialization options from. Options can be overridden with command line --opt.* options. Default: (unset)", ) optionsFrom: Option[String] = None, @HelpMessage( diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala index 8bcb7ec..2884bb9 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala @@ -6,28 +6,37 @@ import eu.neverblink.jelly.core.proto.v1.{LogicalStreamType, PhysicalStreamType, import eu.neverblink.jelly.core.utils.LogicalStreamTypeUtils import eu.neverblink.jelly.core.JellyOptions +private val `default.opt.streamName`: String = "" +private val `default.opt.rdfStar`: Boolean = true +private val `default.opt.maxNameTableSize`: Int = JellyOptions.BIG_STRICT.getMaxNameTableSize +private val `default.opt.maxPrefixTableSize`: Int = JellyOptions.BIG_STRICT.getMaxPrefixTableSize +private val `default.opt.maxDatatypeTableSize`: Int = + JellyOptions.BIG_STRICT.getMaxDatatypeTableSize + /** Options for serializing in Jelly-RDF */ case class RdfJellySerializationOptions( @HelpMessage("Name of the output stream (in metadata). Default: (empty)") - `opt.streamName`: String = "", + `opt.streamName`: Option[String] = None, @HelpMessage( "Whether the stream may contain generalized triples, quads, or datasets. Default: (true for N-Triples/N-Quads and Jena binary formats, false otherwise)", ) `opt.generalizedStatements`: Option[Boolean] = None, - @HelpMessage("Whether the stream may contain RDF-star statements. Default: true") - `opt.rdfStar`: Boolean = true, @HelpMessage( - "Maximum size of the name lookup table. Default: " + JellyOptions.BIG_STRICT.getMaxNameTableSize, + "Whether the stream may contain RDF-star statements. Default: " + `default.opt.rdfStar`, + ) + `opt.rdfStar`: Option[Boolean] = None, + @HelpMessage( + "Maximum size of the name lookup table. Default: " + `default.opt.maxNameTableSize`, ) - `opt.maxNameTableSize`: Int = JellyOptions.BIG_STRICT.getMaxNameTableSize, + `opt.maxNameTableSize`: Option[Int] = None, @HelpMessage( - "Maximum size of the prefix lookup table. Default: " + JellyOptions.BIG_STRICT.getMaxPrefixTableSize, + "Maximum size of the prefix lookup table. Default: " + `default.opt.maxPrefixTableSize`, ) - `opt.maxPrefixTableSize`: Int = JellyOptions.BIG_STRICT.getMaxPrefixTableSize, + `opt.maxPrefixTableSize`: Option[Int] = None, @HelpMessage( - "Maximum size of the datatype lookup table. Default: " + JellyOptions.BIG_STRICT.getMaxDatatypeTableSize, + "Maximum size of the datatype lookup table. Default: " + `default.opt.maxDatatypeTableSize`, ) - `opt.maxDatatypeTableSize`: Int = JellyOptions.BIG_STRICT.getMaxDatatypeTableSize, + `opt.maxDatatypeTableSize`: Option[Int] = None, @HelpMessage( "Physical stream type. One of: TRIPLES, QUADS, GRAPHS. " + "Default: either TRIPLES or QUADS, depending on the input format.", @@ -57,7 +66,7 @@ case class RdfJellySerializationOptions( case _ => false } - private def makeStreamOptions(): RdfStreamOptions = + private lazy val logicalType: Option[LogicalStreamType] = val logicalIri = `opt.logicalType` .map(_.trim).filter(_.nonEmpty) .map { @@ -77,7 +86,10 @@ case class RdfJellySerializationOptions( `opt.logicalType`.get, Some("Logical type must be either a full RDF-STaX IRI or a name like `FLAT_QUADS`"), ) - val physicalType = `opt.physicalType`.map(_.trim.toUpperCase) match + logicalType + + private lazy val physicalType: PhysicalStreamType = + `opt.physicalType`.map(_.trim.toUpperCase) match case Some("TRIPLES") => PhysicalStreamType.TRIPLES case Some("QUADS") => PhysicalStreamType.QUADS case Some("GRAPHS") => PhysicalStreamType.GRAPHS @@ -88,14 +100,41 @@ case class RdfJellySerializationOptions( Some("Physical type must be one of: TRIPLES, QUADS, GRAPHS"), ) case None => PhysicalStreamType.UNSPECIFIED + + private def makeStreamOptions(): RdfStreamOptions = RdfStreamOptions.newInstance() - .setStreamName(`opt.streamName`) + .setStreamName(`opt.streamName`.getOrElse(`default.opt.streamName`)) .setGeneralizedStatements(`opt.generalizedStatements`.getOrElse(inferred.generalized)) - .setRdfStar(`opt.rdfStar`) - .setMaxNameTableSize(`opt.maxNameTableSize`) - .setMaxPrefixTableSize(`opt.maxPrefixTableSize`) - .setMaxDatatypeTableSize(`opt.maxDatatypeTableSize`) + .setRdfStar(`opt.rdfStar`.getOrElse(`default.opt.rdfStar`)) + .setMaxNameTableSize(`opt.maxNameTableSize`.getOrElse(`default.opt.maxNameTableSize`)) + .setMaxPrefixTableSize(`opt.maxPrefixTableSize`.getOrElse(`default.opt.maxPrefixTableSize`)) + .setMaxDatatypeTableSize( + `opt.maxDatatypeTableSize`.getOrElse(`default.opt.maxDatatypeTableSize`), + ) .setPhysicalType(physicalType) .setLogicalType(logicalType.getOrElse(LogicalStreamType.UNSPECIFIED)) - lazy val asRdfStreamOptions: RdfStreamOptions = inferred.options.getOrElse(makeStreamOptions()) + private lazy val optionsFromFileWithOverrides: Option[RdfStreamOptions] = + inferred.options.map(x => { + val cloned = x.clone() + if `opt.generalizedStatements`.isDefined then + cloned.setGeneralizedStatements(`opt.generalizedStatements`.get) + if `opt.streamName`.isDefined then // comment to stop scalafmt from making this a mess + cloned.setStreamName(`opt.streamName`.get) + if `opt.rdfStar`.isDefined then // comment to stop scalafmt from making this a mess + cloned.setRdfStar(`opt.rdfStar`.get) + if `opt.maxNameTableSize`.isDefined then + cloned.setMaxNameTableSize(`opt.maxNameTableSize`.get) + if `opt.maxPrefixTableSize`.isDefined then + cloned.setMaxPrefixTableSize(`opt.maxPrefixTableSize`.get) + if `opt.maxDatatypeTableSize`.isDefined then + cloned.setMaxDatatypeTableSize(`opt.maxDatatypeTableSize`.get) + if `opt.physicalType`.isDefined then // comment to stop scalafmt from making this a mess + cloned.setPhysicalType(physicalType) + if `opt.logicalType`.isDefined then + cloned.setLogicalType(logicalType.getOrElse(LogicalStreamType.UNSPECIFIED)) + cloned + }) + + lazy val asRdfStreamOptions: RdfStreamOptions = + optionsFromFileWithOverrides.getOrElse(makeStreamOptions()) From 8ea5cb56db9a281c5f704300cf3bec4c43727765 Mon Sep 17 00:00:00 2001 From: niegrzybkowski Date: Tue, 22 Jul 2025 14:24:29 +0200 Subject: [PATCH 6/7] Add tests for overriding the options and for loading from non-delimited files --- src/test/resources/optionsNonDelimited.jelly | 11 +++++++++ .../cli/command/rdf/RdfToJellySpec.scala | 23 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 src/test/resources/optionsNonDelimited.jelly diff --git a/src/test/resources/optionsNonDelimited.jelly b/src/test/resources/optionsNonDelimited.jelly new file mode 100644 index 0000000..9076949 --- /dev/null +++ b/src/test/resources/optionsNonDelimited.jelly @@ -0,0 +1,11 @@ + + + H P–X px + Rhttp://example.org/resource/ +Jr1 + Rhttp://example.org/property/ +J  announcedAt + Rhttp://example.org/location/ +Jl1 + +*J \ No newline at end of file diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala index 3b448f3..bfd19ad 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala @@ -479,6 +479,29 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: ), fileName = "options.jelly", ) + "loading options from non-delimited file" in withSpecificJellyFile( + optionsFile => + withFullJenaFile( + jenaFile => { + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + "--options-from", + optionsFile, + jenaFile, + ), + ) + val frame = Using(new FileInputStream(optionsFile))(RdfStreamFrame.parseFrom).get + val opts = frame.getRows.asScala.head.getOptions + val newFrames = readJellyFile(new ByteArrayInputStream(RdfToJelly.getOutBytes)) + val newOpts = newFrames.head.getRows.asScala.head.getOptions + opts should equal(newOpts) + }, + jenaLang = RDFLanguages.NTRIPLES, + ), + fileName = "optionsNonDelimited.jelly", + ) } "Turtle" in { val input = DataGenHelper.generateJenaInputStream(testCardinality, RDFLanguages.TURTLE) From be57496d27d779b43db85f80c055f5b12c38cb86 Mon Sep 17 00:00:00 2001 From: niegrzybkowski Date: Tue, 22 Jul 2025 15:01:29 +0200 Subject: [PATCH 7/7] I swear scalafmt ate my homework --- .../cli/command/rdf/RdfToJellySpec.scala | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala index bfd19ad..b7ed437 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala @@ -479,6 +479,34 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: ), fileName = "options.jelly", ) + + "loading options from another and overriding" in withSpecificJellyFile( + optionsFile => + withFullJenaFile( + jenaFile => { + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + "--options-from", + optionsFile, + jenaFile, + "--opt.rdf-star", + "false", + ), + ) + val frames = readJellyFile(new FileInputStream(optionsFile)) + val opts = frames.head.getRows.asScala.head.getOptions + val newFrames = readJellyFile(new ByteArrayInputStream(RdfToJelly.getOutBytes)) + val newOpts = newFrames.head.getRows.asScala.head.getOptions + opts shouldNot equal(newOpts) + opts.clone().setRdfStar(true) should equal(newOpts) + }, + jenaLang = RDFLanguages.NTRIPLES, + ), + fileName = "options.jelly", + ) + "loading options from non-delimited file" in withSpecificJellyFile( optionsFile => withFullJenaFile(