Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@ import eu.neverblink.jelly.convert.jena.JenaConverterFactory
import eu.neverblink.jelly.convert.jena.riot.{JellyFormatVariant, JellyLanguage, JellyStreamWriter}
import eu.neverblink.jelly.core.{JellyOptions, RdfProtoDeserializationError}
import eu.neverblink.jelly.core.proto.google.v1 as google
import eu.neverblink.jelly.core.proto.v1.{LogicalStreamType, PhysicalStreamType, RdfStreamOptions}
import eu.neverblink.jelly.core.proto.v1.*
import eu.neverblink.jelly.core.utils.IoUtils
import org.apache.jena.riot.lang.LabelToNode
import org.apache.jena.riot.system.StreamRDFWriter
import org.apache.jena.riot.{Lang, RDFParser, RIOT}

import java.io.{BufferedReader, InputStream, InputStreamReader, OutputStream}
import java.io.{BufferedReader, FileInputStream, InputStream, InputStreamReader, OutputStream}
import scala.util.Using

object RdfToJellyPrint extends RdfCommandPrintUtil[RdfFormat.Readable]:
Expand Down Expand Up @@ -45,6 +46,10 @@ case class RdfToJellyOptions(
@ExtraName("in-format") inputFormat: Option[String] = None,
@Recurse
jellySerializationOptions: RdfJellySerializationOptions = RdfJellySerializationOptions(),
@HelpMessage(
"Jelly file to copy serialization options from. Options can be overridden with command line --opt.* options. Default: (unset)",
)
optionsFrom: Option[String] = None,
@HelpMessage(
"Target number of rows per frame – the writer may slightly exceed that. Default: 256",
)
Expand Down Expand Up @@ -72,8 +77,20 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
val defaultAction: (InputStream, OutputStream) => Unit =
langToJelly(RdfFormat.NQuads.jenaLang, _, _)

private def loadOptionsFromFile(filename: String): RdfStreamOptions =
val inputStream = new FileInputStream(filename)
val response = IoUtils.autodetectDelimiting(inputStream)
val frame =
if response.isDelimited then Using(response.newInput())(RdfStreamFrame.parseDelimitedFrom)
else Using(response.newInput())(RdfStreamFrame.parseFrom)

frame.get.getRows.iterator().next().getOptions

override def doRun(options: RdfToJellyOptions, remainingArgs: RemainingArgs): Unit =
// Infer before touching options
options.optionsFrom.map(loadOptionsFromFile).foreach(
options.jellySerializationOptions.setOptions,
)
options.jellySerializationOptions.inferGeneralized(
options.inputFormat,
remainingArgs.remaining.headOption,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,37 @@ import eu.neverblink.jelly.core.proto.v1.{LogicalStreamType, PhysicalStreamType,
import eu.neverblink.jelly.core.utils.LogicalStreamTypeUtils
import eu.neverblink.jelly.core.JellyOptions

private val `default.opt.streamName`: String = ""
private val `default.opt.rdfStar`: Boolean = true
private val `default.opt.maxNameTableSize`: Int = JellyOptions.BIG_STRICT.getMaxNameTableSize
private val `default.opt.maxPrefixTableSize`: Int = JellyOptions.BIG_STRICT.getMaxPrefixTableSize
private val `default.opt.maxDatatypeTableSize`: Int =
JellyOptions.BIG_STRICT.getMaxDatatypeTableSize

/** Options for serializing in Jelly-RDF */
case class RdfJellySerializationOptions(
@HelpMessage("Name of the output stream (in metadata). Default: (empty)")
`opt.streamName`: String = "",
`opt.streamName`: Option[String] = None,
@HelpMessage(
"Whether the stream may contain generalized triples, quads, or datasets. Default: (true for N-Triples/N-Quads and Jena binary formats, false otherwise)",
)
`opt.generalizedStatements`: Option[Boolean] = None,
@HelpMessage("Whether the stream may contain RDF-star statements. Default: true")
`opt.rdfStar`: Boolean = true,
@HelpMessage(
"Maximum size of the name lookup table. Default: " + JellyOptions.BIG_STRICT.getMaxNameTableSize,
"Whether the stream may contain RDF-star statements. Default: " + `default.opt.rdfStar`,
)
`opt.rdfStar`: Option[Boolean] = None,
@HelpMessage(
"Maximum size of the name lookup table. Default: " + `default.opt.maxNameTableSize`,
)
`opt.maxNameTableSize`: Int = JellyOptions.BIG_STRICT.getMaxNameTableSize,
`opt.maxNameTableSize`: Option[Int] = None,
@HelpMessage(
"Maximum size of the prefix lookup table. Default: " + JellyOptions.BIG_STRICT.getMaxPrefixTableSize,
"Maximum size of the prefix lookup table. Default: " + `default.opt.maxPrefixTableSize`,
)
`opt.maxPrefixTableSize`: Int = JellyOptions.BIG_STRICT.getMaxPrefixTableSize,
`opt.maxPrefixTableSize`: Option[Int] = None,
@HelpMessage(
"Maximum size of the datatype lookup table. Default: " + JellyOptions.BIG_STRICT.getMaxDatatypeTableSize,
"Maximum size of the datatype lookup table. Default: " + `default.opt.maxDatatypeTableSize`,
)
`opt.maxDatatypeTableSize`: Int = JellyOptions.BIG_STRICT.getMaxDatatypeTableSize,
`opt.maxDatatypeTableSize`: Option[Int] = None,
@HelpMessage(
"Physical stream type. One of: TRIPLES, QUADS, GRAPHS. " +
"Default: either TRIPLES or QUADS, depending on the input format.",
Expand All @@ -41,8 +50,13 @@ case class RdfJellySerializationOptions(
`opt.logicalType`: Option[String] = None,
):
private object inferred:
var options: Option[RdfStreamOptions] = None
var generalized: Boolean = false

def setOptions(rdfStreamOptions: RdfStreamOptions): Unit = inferred.options = Some(
rdfStreamOptions,
)

def inferGeneralized(inputFormat: Option[String], filename: Option[String]): Unit =
val explicitFormat = inputFormat.flatMap(RdfFormat.find)
val implicitFormat = filename.flatMap(RdfFormat.inferFormat)
Expand All @@ -52,7 +66,7 @@ case class RdfJellySerializationOptions(
case _ => false
}

lazy val asRdfStreamOptions: RdfStreamOptions =
private lazy val logicalType: Option[LogicalStreamType] =
val logicalIri = `opt.logicalType`
.map(_.trim).filter(_.nonEmpty)
.map {
Expand All @@ -72,7 +86,10 @@ case class RdfJellySerializationOptions(
`opt.logicalType`.get,
Some("Logical type must be either a full RDF-STaX IRI or a name like `FLAT_QUADS`"),
)
val physicalType = `opt.physicalType`.map(_.trim.toUpperCase) match
logicalType

private lazy val physicalType: PhysicalStreamType =
`opt.physicalType`.map(_.trim.toUpperCase) match
case Some("TRIPLES") => PhysicalStreamType.TRIPLES
case Some("QUADS") => PhysicalStreamType.QUADS
case Some("GRAPHS") => PhysicalStreamType.GRAPHS
Expand All @@ -83,12 +100,41 @@ case class RdfJellySerializationOptions(
Some("Physical type must be one of: TRIPLES, QUADS, GRAPHS"),
)
case None => PhysicalStreamType.UNSPECIFIED

private def makeStreamOptions(): RdfStreamOptions =
RdfStreamOptions.newInstance()
.setStreamName(`opt.streamName`)
.setStreamName(`opt.streamName`.getOrElse(`default.opt.streamName`))
.setGeneralizedStatements(`opt.generalizedStatements`.getOrElse(inferred.generalized))
.setRdfStar(`opt.rdfStar`)
.setMaxNameTableSize(`opt.maxNameTableSize`)
.setMaxPrefixTableSize(`opt.maxPrefixTableSize`)
.setMaxDatatypeTableSize(`opt.maxDatatypeTableSize`)
.setRdfStar(`opt.rdfStar`.getOrElse(`default.opt.rdfStar`))
.setMaxNameTableSize(`opt.maxNameTableSize`.getOrElse(`default.opt.maxNameTableSize`))
.setMaxPrefixTableSize(`opt.maxPrefixTableSize`.getOrElse(`default.opt.maxPrefixTableSize`))
.setMaxDatatypeTableSize(
`opt.maxDatatypeTableSize`.getOrElse(`default.opt.maxDatatypeTableSize`),
)
.setPhysicalType(physicalType)
.setLogicalType(logicalType.getOrElse(LogicalStreamType.UNSPECIFIED))

private lazy val optionsFromFileWithOverrides: Option[RdfStreamOptions] =
inferred.options.map(x => {
val cloned = x.clone()
if `opt.generalizedStatements`.isDefined then
cloned.setGeneralizedStatements(`opt.generalizedStatements`.get)
if `opt.streamName`.isDefined then // comment to stop scalafmt from making this a mess
cloned.setStreamName(`opt.streamName`.get)
if `opt.rdfStar`.isDefined then // comment to stop scalafmt from making this a mess
cloned.setRdfStar(`opt.rdfStar`.get)
if `opt.maxNameTableSize`.isDefined then
cloned.setMaxNameTableSize(`opt.maxNameTableSize`.get)
if `opt.maxPrefixTableSize`.isDefined then
cloned.setMaxPrefixTableSize(`opt.maxPrefixTableSize`.get)
if `opt.maxDatatypeTableSize`.isDefined then
cloned.setMaxDatatypeTableSize(`opt.maxDatatypeTableSize`.get)
if `opt.physicalType`.isDefined then // comment to stop scalafmt from making this a mess
cloned.setPhysicalType(physicalType)
if `opt.logicalType`.isDefined then
cloned.setLogicalType(logicalType.getOrElse(LogicalStreamType.UNSPECIFIED))
cloned
})

lazy val asRdfStreamOptions: RdfStreamOptions =
optionsFromFileWithOverrides.getOrElse(makeStreamOptions())
11 changes: 11 additions & 0 deletions src/test/resources/optionsNonDelimited.jelly
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@


 H�P�X px
Rhttp://example.org/resource/
Jr1
Rhttp://example.org/property/
J announcedAt
Rhttp://example.org/location/
Jl1

*J
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,82 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers:
},
jenaLang = RDFLanguages.NTRIPLES,
)

"loading options from another file" in withSpecificJellyFile(
optionsFile =>
withFullJenaFile(
jenaFile => {
RdfToJelly.runTestCommand(
List(
"rdf",
"to-jelly",
"--options-from",
optionsFile,
jenaFile,
),
)
val frames = readJellyFile(new FileInputStream(optionsFile))
val opts = frames.head.getRows.asScala.head.getOptions
val newFrames = readJellyFile(new ByteArrayInputStream(RdfToJelly.getOutBytes))
val newOpts = newFrames.head.getRows.asScala.head.getOptions
opts should equal(newOpts)

},
jenaLang = RDFLanguages.NTRIPLES,
),
fileName = "options.jelly",
)

"loading options from another and overriding" in withSpecificJellyFile(
optionsFile =>
withFullJenaFile(
jenaFile => {
RdfToJelly.runTestCommand(
List(
"rdf",
"to-jelly",
"--options-from",
optionsFile,
jenaFile,
"--opt.rdf-star",
"false",
),
)
val frames = readJellyFile(new FileInputStream(optionsFile))
val opts = frames.head.getRows.asScala.head.getOptions
val newFrames = readJellyFile(new ByteArrayInputStream(RdfToJelly.getOutBytes))
val newOpts = newFrames.head.getRows.asScala.head.getOptions
opts shouldNot equal(newOpts)
opts.clone().setRdfStar(true) should equal(newOpts)
},
jenaLang = RDFLanguages.NTRIPLES,
),
fileName = "options.jelly",
)

"loading options from non-delimited file" in withSpecificJellyFile(
optionsFile =>
withFullJenaFile(
jenaFile => {
RdfToJelly.runTestCommand(
List(
"rdf",
"to-jelly",
"--options-from",
optionsFile,
jenaFile,
),
)
val frame = Using(new FileInputStream(optionsFile))(RdfStreamFrame.parseFrom).get
val opts = frame.getRows.asScala.head.getOptions
val newFrames = readJellyFile(new ByteArrayInputStream(RdfToJelly.getOutBytes))
val newOpts = newFrames.head.getRows.asScala.head.getOptions
opts should equal(newOpts)
},
jenaLang = RDFLanguages.NTRIPLES,
),
fileName = "optionsNonDelimited.jelly",
)
}
"Turtle" in {
val input = DataGenHelper.generateJenaInputStream(testCardinality, RDFLanguages.TURTLE)
Expand Down