Skip to content

Commit d97e3f0

Browse files
authored
Add the --merge-graphs option (#254)
* It works, let me check test coverage * Doc fixes * fix behavior inconsistency
1 parent 5129665 commit d97e3f0

File tree

5 files changed

+180
-79
lines changed

5 files changed

+180
-79
lines changed

src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ object RdfFromJellyPrint extends RdfCommandPrintUtil[RdfFormat.Writeable]:
3131
"If no input file is specified, the input is read from stdin.\n" +
3232
"If no output file is specified, the output is written to stdout.\n" +
3333
"If an error is detected, the program will exit with a non-zero code.\n" +
34-
"Otherwise, the program will exit with code 0.\n" +
34+
"Otherwise, the program will exit with code 0.\n\n" +
3535
"Note: this command works in a streaming manner where possible and scales well to\n" +
3636
"large files. Non-streaming formats (e.g. RDF/XML) by default work on a\n" +
3737
"frame-by-frame basis, but they can be combined into one dataset with the\n" +
38-
"--combine option. RDF/XML will only serialize the default model.",
38+
"--combine option.",
3939
)
4040
@ArgsName("<file-to-convert>")
4141
case class RdfFromJellyOptions(
@@ -56,10 +56,18 @@ case class RdfFromJellyOptions(
5656
)
5757
takeFrames: String = "",
5858
@HelpMessage(
59-
"Add to combine the results into one dataset, when using a non-streaming output format. " +
60-
"Ignored otherwise. Take care with input size, as this option will load everything into memory.",
59+
"Add to combine all stream frames into one dataset, when using a non-streaming output format. " +
60+
"Ignored otherwise. Take care with input size, as this option will load everything into memory. " +
61+
"Default: false.",
6162
)
6263
combine: Boolean = false,
64+
@HelpMessage(
65+
"Discard the named graph information, treating the input as triples in the default graph. " +
66+
"This allows you to convert a Jelly file containing quads to Turtle/N-Triples in a lossy manner. " +
67+
"This option has no impact on frame boundaries. To merge frames, use the --combine option. " +
68+
"Default: false.",
69+
)
70+
mergeGraphs: Boolean = false,
6371
@Recurse
6472
rdfPerformanceOptions: RdfPerformanceOptions = RdfPerformanceOptions(),
6573
) extends HasJellyCommandOptions
@@ -72,8 +80,14 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
7280

7381
lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Writeable] = RdfFromJellyPrint
7482

75-
val defaultAction: (InputStream, OutputStream) => Unit =
76-
(in, out) => jellyToLang(in, StreamRDFWriter.getWriterStream(out, RdfFormat.NQuads.jenaLang))
83+
val defaultAction: WriteAction =
84+
(in, out, opt) =>
85+
jellyToLang(
86+
in,
87+
StreamRDFWriter.getWriterStream(out, RdfFormat.NQuads.jenaLang),
88+
RdfFormat.NQuads,
89+
opt,
90+
)
7791

7892
private def takeFrames: IndexRange = IndexRange(getOptions.takeFrames, "--take-frames")
7993

@@ -84,34 +98,34 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
8498
takeFrames
8599
val (inputStream, outputStream) =
86100
this.getIoStreamsFromOptions(remainingArgs.remaining.headOption, options.outputFile)
87-
parseFormatArgs(inputStream, outputStream, options.outputFormat, options.outputFile)
101+
parseFormatArgs(inputStream, outputStream, options.outputFormat, options.outputFile, options)
88102

89103
override def matchFormatToAction(
90104
format: RdfFormat.Writeable,
91-
): Option[(InputStream, OutputStream) => Unit] =
105+
): Option[WriteAction] =
92106
(format, getOptions.combine) match
93107
case (j: RdfFormat.Jena.StreamWriteable, _) =>
94-
Some((in, out) => jellyToLang(in, StreamRDFWriter.getWriterStream(out, j.jenaLang)))
108+
Some((in, out, opt) =>
109+
jellyToLang(in, StreamRDFWriter.getWriterStream(out, j.jenaLang), j, opt),
110+
)
95111
case (j: RdfFormat.Jena.BatchWriteable, true) =>
96-
Some((in, out) =>
97-
StreamRdfCombiningBatchWriter(out, j.jenaLang).runAndOutput(x => jellyToLang(in, x)),
112+
Some((in, out, opt) =>
113+
StreamRdfCombiningBatchWriter(out, j.jenaLang).runAndOutput(x =>
114+
jellyToLang(in, x, j, opt),
115+
),
98116
)
99117
case (j: RdfFormat.Jena.BatchWriteable, false) =>
100-
Some((in, out) => jellyToLang(in, StreamRdfBatchWriter(out, j.jenaLang)))
118+
Some((in, out, opt) => jellyToLang(in, StreamRdfBatchWriter(out, j.jenaLang), j, opt))
101119
case (RdfFormat.JellyText, _) => Some(jellyBinaryToText)
102120

103121
/** This method reads the Jelly file, rewrites it to specified format and writes it to some output
104122
* stream
105-
* @param jenaLang
106-
* Language that jelly should be converted to
107-
* @param inputStream
108-
* InputStream
109-
* @param outputStream
110-
* OutputStream
111123
*/
112124
private def jellyToLang(
113125
inputStream: InputStream,
114126
writer: StreamRDF,
127+
format: RdfFormat,
128+
options: RdfFromJellyOptions,
115129
): Unit =
116130
// Whether the output is active at this moment
117131
var outputEnabled = false
@@ -125,7 +139,18 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
125139
}
126140

127141
override def handleQuad(subject: Node, predicate: Node, `object`: Node, graph: Node): Unit = {
128-
if outputEnabled then writer.quad(Quad.create(graph, subject, predicate, `object`))
142+
if outputEnabled then
143+
if options.mergeGraphs then writer.triple(Triple.create(subject, predicate, `object`))
144+
else if format.supportsQuads then
145+
writer.quad(Quad.create(graph, subject, predicate, `object`))
146+
else if Quad.isDefaultGraph(graph) then
147+
writer.triple(Triple.create(subject, predicate, `object`))
148+
else
149+
throw new CriticalException(
150+
f"Encountered a quad in the input ($subject $predicate ${`object`} $graph), " +
151+
f"but the output format ($format) does not support quads. Either choose a different output format " +
152+
"or use the --merge-graphs option to merge all named graphs into the default graph.",
153+
)
129154
}
130155
}
131156

@@ -153,13 +178,12 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
153178

154179
/** This method reads the Jelly file, rewrites it to Jelly text and writes it to some output
155180
* stream
156-
* @param inputStream
157-
* InputStream
158-
* @param outputStream
159-
* OutputStream
160181
*/
161-
private def jellyBinaryToText(inputStream: InputStream, outputStream: OutputStream): Unit =
162-
182+
private def jellyBinaryToText(
183+
inputStream: InputStream,
184+
outputStream: OutputStream,
185+
opt: RdfFromJellyOptions,
186+
): Unit =
163187
inline def writeFrameToOutput(f: RdfStreamFrame, frameIndex: Int): Unit =
164188
// we want to write a comment to the file before each frame
165189
val comment = f"# Frame $frameIndex\n"

src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfSerDesCommand.scala

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,52 +17,43 @@ abstract class RdfSerDesCommand[
1717
F <: RdfFormat: Typeable,
1818
] extends JellyCommand[T]:
1919

20+
type WriteAction = (InputStream, OutputStream, T) => Unit
21+
2022
override final def group = "rdf"
2123

2224
/** What is the default action if no formats specified */
23-
val defaultAction: (InputStream, OutputStream) => Unit
25+
val defaultAction: WriteAction
2426

2527
/** The print util responsible for handling the specific formats etc the command requires */
2628
lazy val printUtil: RdfCommandPrintUtil[F]
2729

2830
/** The method responsible for matching the format to a given action */
29-
def matchFormatToAction(format: F): Option[(InputStream, OutputStream) => Unit]
31+
def matchFormatToAction(format: F): Option[WriteAction]
3032

3133
/** This method takes care of proper error handling and takes care of the parameter priorities in
3234
* matching the input to a given format conversion
33-
*
34-
* @param inputStream
35-
* InputStream
36-
* @param outputStream
37-
* OutputStream
38-
* @param format
39-
* Option[String]
40-
* @param fileName
41-
* Option[String]
42-
* @throws JellyDeserializationError
43-
* @throws JenaRiotException
44-
* @throws InvalidJellyFile
4535
*/
4636
final def parseFormatArgs(
4737
inputStream: InputStream,
4838
outputStream: OutputStream,
4939
format: Option[String],
5040
fileName: Option[String],
41+
opt: T,
5142
): Unit =
5243
try {
5344
val explicitFormat = if (format.isDefined) RdfFormat.find(format.get) else None
5445
val implicitFormat =
5546
if (fileName.isDefined) RdfFormat.inferFormat(fileName.get) else None
5647
(explicitFormat, implicitFormat) match {
5748
case (Some(f: F), _) =>
58-
matchFormatToAction(f).get(inputStream, outputStream)
49+
matchFormatToAction(f).get(inputStream, outputStream, opt)
5950
// If format explicitly defined but does not match any available actions or formats, we throw an error
6051
case (_, _) if format.isDefined =>
6152
throw InvalidFormatSpecified(format.get, printUtil.validFormatsString)
6253
case (_, Some(f: F)) =>
63-
matchFormatToAction(f).get(inputStream, outputStream)
54+
matchFormatToAction(f).get(inputStream, outputStream, opt)
6455
// If format not explicitly defined but implicitly not understandable we default to this
65-
case (_, _) => defaultAction(inputStream, outputStream)
56+
case (_, _) => defaultAction(inputStream, outputStream, opt)
6657
}
6758
} catch
6859
case e: RiotException =>

src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
7676

7777
lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Readable] = RdfToJellyPrint
7878

79-
val defaultAction: (InputStream, OutputStream) => Unit =
80-
langToJelly(RdfFormat.NQuads, _, _)
79+
val defaultAction: WriteAction =
80+
langToJelly(RdfFormat.NQuads, _, _, _)
8181

8282
private def loadOptionsFromFile(filename: String): RdfStreamOptions =
8383
val inputStream = new FileInputStream(filename)
@@ -108,13 +108,14 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
108108
outputStream,
109109
options.inputFormat,
110110
remainingArgs.remaining.headOption,
111+
options,
111112
)
112113
if !isQuietMode then checkAndWarnTypeCombination()
113114

114115
override def matchFormatToAction(
115116
format: RdfFormat.Readable,
116-
): Option[(InputStream, OutputStream) => Unit] = format match {
117-
case f: RdfFormat.Jena.Readable => Some(langToJelly(f, _, _))
117+
): Option[WriteAction] = format match {
118+
case f: RdfFormat.Jena.Readable => Some(langToJelly(f, _, _, _))
118119
case f: RdfFormat.JellyText.type => Some(jellyTextToJelly)
119120
}
120121

@@ -130,6 +131,7 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
130131
format: RdfFormat.Jena,
131132
inputStream: InputStream,
132133
outputStream: OutputStream,
134+
opt: RdfToJellyOptions,
133135
): Unit =
134136
val jellyOpt = getOptions.jellySerializationOptions.asRdfStreamOptions
135137
// Configure the writer
@@ -202,7 +204,11 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
202204
* @param outputStream
203205
* Jelly binary output stream
204206
*/
205-
private def jellyTextToJelly(inputStream: InputStream, outputStream: OutputStream): Unit =
207+
private def jellyTextToJelly(
208+
inputStream: InputStream,
209+
outputStream: OutputStream,
210+
opt: RdfToJellyOptions,
211+
): Unit =
206212
if !isQuietMode then
207213
printLine(
208214
"WARNING: The Jelly text format is not stable and may change in incompatible " +

src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ sealed trait RdfFormat:
77
val fullName: String
88
val cliOptions: List[String]
99
val supportsBaseIri: Boolean
10+
val supportsQuads: Boolean
11+
override final def toString: String = fullName
1012

1113
object RdfFormat:
1214

@@ -31,6 +33,7 @@ object RdfFormat:
3133
override val cliOptions: List[String] = List("nq", "nquads")
3234
override val jenaLang: Lang = RDFLanguages.NQUADS
3335
override val supportsBaseIri: Boolean = false
36+
override val supportsQuads: Boolean = true
3437

3538
case object NTriples
3639
extends RdfFormat.Jena.StreamWriteable,
@@ -40,18 +43,21 @@ object RdfFormat:
4043
override val cliOptions: List[String] = List("nt", "ntriples")
4144
override val jenaLang: Lang = RDFLanguages.NTRIPLES
4245
override val supportsBaseIri: Boolean = false
46+
override val supportsQuads: Boolean = false
4347

4448
case object Turtle extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable:
4549
override val fullName: String = "Turtle"
4650
override val cliOptions: List[String] = List("ttl", "turtle")
4751
override val jenaLang: Lang = RDFLanguages.TURTLE
4852
override val supportsBaseIri: Boolean = true
53+
override val supportsQuads: Boolean = false
4954

5055
case object TriG extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable:
5156
override val fullName: String = "TriG"
5257
override val cliOptions: List[String] = List("trig")
5358
override val jenaLang: Lang = RDFLanguages.TRIG
5459
override val supportsBaseIri: Boolean = true
60+
override val supportsQuads: Boolean = true
5561

5662
case object RdfProto
5763
extends RdfFormat.Jena.StreamWriteable,
@@ -61,6 +67,7 @@ object RdfFormat:
6167
override val cliOptions: List[String] = List("jenaproto", "jena-proto")
6268
override val jenaLang: Lang = RDFLanguages.RDFPROTO
6369
override val supportsBaseIri: Boolean = false
70+
override val supportsQuads: Boolean = true
6471

6572
case object Thrift
6673
extends RdfFormat.Jena.StreamWriteable,
@@ -70,18 +77,21 @@ object RdfFormat:
7077
override val cliOptions: List[String] = List("jenathrift", "jena-thrift")
7178
override val jenaLang: Lang = RDFLanguages.RDFTHRIFT
7279
override val supportsBaseIri: Boolean = false
80+
override val supportsQuads: Boolean = true
7381

7482
case object RdfXml extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
7583
override val fullName: String = "RDF/XML"
7684
override val cliOptions: List[String] = List("rdfxml", "rdf-xml")
7785
override val jenaLang: Lang = RDFLanguages.RDFXML
7886
override val supportsBaseIri: Boolean = true
87+
override val supportsQuads: Boolean = false
7988

8089
case object JsonLd extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
8190
override val fullName: String = "JSON-LD"
8291
override val cliOptions: List[String] = List("jsonld", "json-ld")
8392
override val jenaLang: Lang = RDFLanguages.JSONLD
8493
override val supportsBaseIri: Boolean = true
94+
override val supportsQuads: Boolean = true
8595

8696
// We do not ever want to write or read from Jelly to Jelly
8797
// So better not have it as Writeable or Readable, just mark that it's integrated into Jena
@@ -90,6 +100,7 @@ object RdfFormat:
90100
override val cliOptions: List[String] = List("jelly")
91101
override val jenaLang: Lang = JellyLanguage.JELLY
92102
override val supportsBaseIri: Boolean = false
103+
override val supportsQuads: Boolean = true
93104

94105
case object JellyText
95106
extends RdfFormat,
@@ -100,6 +111,7 @@ object RdfFormat:
100111
override val cliOptions: List[String] = List("jelly-text")
101112
val extension = ".jelly.txt"
102113
override val supportsBaseIri: Boolean = false
114+
override val supportsQuads: Boolean = true
103115

104116
private val rdfFormats: List[RdfFormat] =
105117
List(NQuads, NTriples, JellyBinary, JellyText, Turtle, TriG, RdfProto, Thrift, RdfXml, JsonLd)

0 commit comments

Comments
 (0)