Skip to content

Commit 4881238

Browse files
authored
rdf to-jelly: add converting from Jelly text format (#61)
* rdf to-jelly: add converting from Jelly text format Issue: #56 * clean up build.sbt * Fix formatting * Refactor jellyTextToJelly
1 parent 7a02427 commit 4881238

File tree

7 files changed

+197
-15
lines changed

7 files changed

+197
-15
lines changed

src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import scala.compiletime.uninitialized
88

99
case class JellyCommandOptions(
1010
@HelpMessage("Add to run command in debug mode") debug: Boolean = false,
11+
@HelpMessage("Add to silence any warnings") quiet: Boolean = false,
1112
)
1213

1314
trait HasJellyCommandOptions:
@@ -81,6 +82,11 @@ abstract class JellyCommand[T <: HasJellyCommandOptions: {Parser, Help}] extends
8182
*/
8283
final def isDebugMode: Boolean = this.getOptions.common.debug
8384

85+
/** Returns information about whether the command is in quiet mode (which suppresses all warnings)
86+
* or not
87+
*/
88+
final def isQuietMode: Boolean = this.getOptions.common.quiet
89+
8490
/** Runs the command in test mode from the outside app parsing level
8591
* @param args
8692
* the command line arguments

src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFormat.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,14 @@ sealed trait RdfFormat:
1010
object RdfFormat:
1111

1212
sealed trait Writeable extends RdfFormat
13+
sealed trait Readable extends RdfFormat
1314

1415
sealed trait Jena extends RdfFormat:
1516
val jenaLang: Lang
1617

1718
object Jena:
1819
sealed trait Writeable extends Jena, RdfFormat.Writeable
19-
sealed trait Readable extends Jena
20+
sealed trait Readable extends Jena, RdfFormat.Readable
2021

2122
case object NQuads extends RdfFormat.Jena.Writeable, RdfFormat.Jena.Readable:
2223
override val fullName: String = "N-Quads"
@@ -65,7 +66,7 @@ object RdfFormat:
6566
override val cliOptions: List[String] = List("jelly")
6667
override val jenaLang: Lang = JellyLanguage.JELLY
6768

68-
case object JellyText extends RdfFormat, RdfFormat.Writeable:
69+
case object JellyText extends RdfFormat, RdfFormat.Writeable, RdfFormat.Readable:
6970
override val fullName: String = "Jelly text format"
7071
override val cliOptions: List[String] = List("jelly-text")
7172
val extension = ".jelly.txt"

src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala

Lines changed: 68 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ import caseapp.*
33
import eu.neverblink.jelly.cli.*
44
import eu.neverblink.jelly.cli.command.rdf.RdfFormat.*
55
import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage
6+
import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame
67
import org.apache.jena.riot.system.StreamRDFWriter
78
import org.apache.jena.riot.{Lang, RDFParser, RIOT}
89

9-
import java.io.{InputStream, OutputStream}
10+
import java.io.{BufferedReader, InputStream, InputStreamReader, OutputStream}
11+
import scala.util.Using
1012

11-
object RdfToJellyPrint extends RdfCommandPrintUtil[RdfFormat.Jena.Readable]:
13+
object RdfToJellyPrint extends RdfCommandPrintUtil[RdfFormat.Readable]:
1214
override val defaultFormat: RdfFormat = RdfFormat.NQuads
1315

1416
case class RdfToJellyOptions(
@@ -38,13 +40,13 @@ case class RdfToJellyOptions(
3840
delimited: Boolean = true,
3941
) extends HasJellyCommandOptions
4042

41-
object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Jena.Readable]:
43+
object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Readable]:
4244

4345
override def names: List[List[String]] = List(
4446
List("rdf", "to-jelly"),
4547
)
4648

47-
lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Jena.Readable] = RdfToJellyPrint
49+
lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Readable] = RdfToJellyPrint
4850

4951
val defaultAction: (InputStream, OutputStream) => Unit =
5052
langToJelly(RdfFormat.NQuads.jenaLang, _, _)
@@ -62,9 +64,11 @@ object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Jena.Readable]
6264
)
6365

6466
override def matchFormatToAction(
65-
format: RdfFormat.Jena.Readable,
66-
): Option[(InputStream, OutputStream) => Unit] =
67-
Some(langToJelly(format.jenaLang, _, _))
67+
format: RdfFormat.Readable,
68+
): Option[(InputStream, OutputStream) => Unit] = format match {
69+
case f: RdfFormat.Jena.Readable => Some(langToJelly(f.jenaLang, _, _))
70+
case f: RdfFormat.JellyText.type => Some(jellyTextToJelly)
71+
}
6872

6973
/** This method reads the file, rewrites it to Jelly and writes it to some output stream
7074
* @param jenaLang
@@ -97,3 +101,60 @@ object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Jena.Readable]
97101
writerContext,
98102
)
99103
RDFParser.source(inputStream).lang(jenaLang).parse(jellyWriter)
104+
105+
/** Convert Jelly text to Jelly binary.
106+
* @param inputStream
107+
* Jelly text input stream
108+
* @param outputStream
109+
* Jelly binary output stream
110+
*/
111+
private def jellyTextToJelly(inputStream: InputStream, outputStream: OutputStream): Unit =
112+
if !isQuietMode then
113+
printLine(
114+
"WARNING: The Jelly text format is not stable and may change in incompatible " +
115+
"ways in the future.\nIt's only intended for testing and development.\n" +
116+
"NEVER use it in production.\nUse --quiet to silence this warning.",
117+
true,
118+
)
119+
Using.resource(InputStreamReader(inputStream)) { r1 =>
120+
Using.resource(BufferedReader(r1)) { reader =>
121+
jellyTextStreamAsFrames(reader)
122+
.map(txt => RdfStreamFrame.fromAscii(txt))
123+
.foreach(frame => {
124+
if getOptions.delimited then frame.writeDelimitedTo(outputStream)
125+
else frame.writeTo(outputStream)
126+
})
127+
}
128+
}
129+
130+
/** Iterate over a Jelly text stream and return the frames as strings to be parsed.
131+
* @param reader
132+
* the reader to read from
133+
* @return
134+
* an iterator of Jelly text frames
135+
*/
136+
private def jellyTextStreamAsFrames(reader: BufferedReader): Iterator[String] =
137+
val buffer = new StringBuilder()
138+
val rows = Iterator.continually(()).map { _ =>
139+
reader.readLine() match {
140+
case null =>
141+
val s = buffer.toString()
142+
buffer.clear()
143+
(Some(s), false)
144+
case line if line.startsWith("}") =>
145+
buffer.append(line)
146+
buffer.append("\n")
147+
val s = buffer.toString()
148+
buffer.clear()
149+
(Some(s), true)
150+
case line =>
151+
buffer.append(line)
152+
buffer.append("\n")
153+
(None, true)
154+
}
155+
}.takeWhile(_._2).collect({ case (Some(row), _) => row })
156+
157+
// The only options we can respect in this mode are the frame size and the delimited flag
158+
// The others are ignored, because we are doing a 1:1 conversion
159+
if getOptions.delimited then rows.grouped(getOptions.rowsPerFrame).map(_.mkString("\n"))
160+
else Iterator(rows.mkString("\n"))

src/test/scala/eu/neverblink/jelly/cli/command/helpers/DataGenHelper.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package eu.neverblink.jelly.cli.command.helpers
22

33
import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage
4+
import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame
45
import org.apache.jena.rdf.model.{Model, ModelFactory, ResourceFactory}
56
import org.apache.jena.riot.{Lang, RDFDataMgr, RDFLanguages}
67

@@ -43,6 +44,17 @@ object DataGenHelper:
4344
RDFDataMgr.write(outputStream, model, JellyLanguage.JELLY)
4445
outputStream.toByteArray
4546

47+
/** Generate a Jelly frame in the Text Format.
48+
* @param nTriples
49+
* number of triples to generate
50+
* @return
51+
* String
52+
*/
53+
def generateJellyText(nTriples: Int): String =
54+
val bytes = generateJellyBytes(nTriples)
55+
val frame = RdfStreamFrame.parseDelimitedFrom(ByteArrayInputStream(bytes))
56+
frame.get.toProtoString
57+
4658
/** This method generates a Jelly byte input stream with nTriples
4759
* @param nTriples
4860
* number of triples to generate

src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ trait TestFixtureHelper extends BeforeAndAfterAll:
2424

2525
/** The number of triples to generate for the tests
2626
*/
27-
protected val testCardinality: Integer
27+
protected val testCardinality: Int
2828

2929
private def getFileExtension(format: Lang = RDFLanguages.NQUADS): String =
3030
format.getFileExtensions.get(0)
@@ -53,6 +53,14 @@ trait TestFixtureHelper extends BeforeAndAfterAll:
5353
testCode(tempFile.toString)
5454
} finally { tempFile.toFile.delete() }
5555

56+
def withFullJellyTextFile(testCode: (String) => Any): Unit =
57+
val tempFile = Files.createTempFile(tmpDir, randomUUID.toString, ".jelly.txt")
58+
val text = DataGenHelper.generateJellyText(testCardinality)
59+
Files.write(tempFile, text.getBytes)
60+
try {
61+
testCode(tempFile.toString)
62+
} finally { tempFile.toFile.delete() }
63+
5664
def withEmptyRandomFile(testCode: (String) => Any): Unit =
5765
val tempFile = Files.createTempFile(tmpDir, randomUUID.toString, ".random")
5866
try {

src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import scala.util.Using
1414

1515
class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
1616

17-
protected val testCardinality: Integer = 33
17+
protected val testCardinality: Int = 33
1818

1919
"rdf from-jelly command" should {
2020
"handle conversion of Jelly to NTriples" when {

src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala

Lines changed: 98 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ package eu.neverblink.jelly.cli.command.rdf
33
import eu.neverblink.jelly.cli.command.helpers.{DataGenHelper, TestFixtureHelper}
44
import eu.neverblink.jelly.cli.{ExitException, InvalidArgument, InvalidFormatSpecified}
55
import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage
6-
import eu.ostrzyciel.jelly.core.{IoUtils, JellyOptions}
76
import eu.ostrzyciel.jelly.core.proto.v1.{LogicalStreamType, RdfStreamFrame}
7+
import eu.ostrzyciel.jelly.core.{IoUtils, JellyOptions}
88
import org.apache.jena.rdf.model.{Model, ModelFactory}
99
import org.apache.jena.riot.{RDFLanguages, RDFParser}
1010
import org.scalatest.matchers.should.Matchers
@@ -15,7 +15,7 @@ import scala.util.Using
1515

1616
class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers:
1717

18-
protected val testCardinality: Integer = 33
18+
protected val testCardinality: Int = 33
1919

2020
def translateJellyBack(inputStream: InputStream): Model =
2121
Using(inputStream) { content =>
@@ -289,6 +289,100 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers:
289289
},
290290
jenaLang = RDFLanguages.JSONLD,
291291
)
292+
293+
"Jelly text format (implicit format)" in withFullJellyTextFile { inFile =>
294+
withEmptyJellyFile { outFile =>
295+
val (out, err) =
296+
RdfToJelly.runTestCommand(
297+
List(
298+
"rdf",
299+
"to-jelly",
300+
inFile,
301+
"--to",
302+
outFile,
303+
),
304+
)
305+
val content = translateJellyBack(new FileInputStream(outFile))
306+
content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements())
307+
RdfToJelly.getErrString should include("WARNING: The Jelly text format is not stable")
308+
}
309+
}
310+
311+
"Jelly text format (warning disabled)" in withFullJellyTextFile { inFile =>
312+
withEmptyJellyFile { outFile =>
313+
val (out, err) =
314+
RdfToJelly.runTestCommand(
315+
List(
316+
"rdf",
317+
"to-jelly",
318+
inFile,
319+
"--to",
320+
outFile,
321+
"--quiet",
322+
),
323+
)
324+
RdfToJelly.getErrString should be("")
325+
}
326+
}
327+
328+
"Jelly text format (explicit format parameter)" in withFullJellyTextFile { inFile =>
329+
withEmptyJellyFile { outFile =>
330+
val (out, err) =
331+
RdfToJelly.runTestCommand(
332+
List(
333+
"rdf",
334+
"to-jelly",
335+
inFile,
336+
"--in-format=jelly-text",
337+
"--to",
338+
outFile,
339+
),
340+
)
341+
val content = translateJellyBack(new FileInputStream(outFile))
342+
content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements())
343+
}
344+
}
345+
346+
"Jelly text format (non-delimited output)" in withFullJellyTextFile { inFile =>
347+
withEmptyJellyFile { outFile =>
348+
val (out, err) =
349+
RdfToJelly.runTestCommand(
350+
List(
351+
"rdf",
352+
"to-jelly",
353+
inFile,
354+
"--delimited=false",
355+
"--to",
356+
outFile,
357+
),
358+
)
359+
val (delimited, is) = IoUtils.autodetectDelimiting(new FileInputStream(outFile))
360+
delimited should be(false)
361+
val frame = RdfStreamFrame.parseFrom(is)
362+
frame.rows.size should be > 0
363+
}
364+
}
365+
366+
"Jelly text format (delimited, multiple frames)" in withFullJellyTextFile { inFile =>
367+
withEmptyJellyFile { outFile =>
368+
val (out, err) =
369+
RdfToJelly.runTestCommand(
370+
List(
371+
"rdf",
372+
"to-jelly",
373+
inFile,
374+
"--rows-per-frame=1",
375+
"--to",
376+
outFile,
377+
),
378+
)
379+
val (delimited, is) = IoUtils.autodetectDelimiting(new FileInputStream(outFile))
380+
delimited should be(true)
381+
val frames = readJellyFile(new FileInputStream(outFile))
382+
frames.size should be > testCardinality
383+
for frame <- frames do frame.rows.size should be(1)
384+
}
385+
}
292386
}
293387
"throw proper exception" when {
294388
"invalid format is specified" in withFullJenaFile { f =>
@@ -305,13 +399,13 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers:
305399
"invalid format out of existing is specified" in withFullJenaFile { f =>
306400
val e =
307401
intercept[ExitException] {
308-
RdfToJelly.runTestCommand(List("rdf", "to-jelly", f, "--in-format", "jelly-text"))
402+
RdfToJelly.runTestCommand(List("rdf", "to-jelly", f, "--in-format", "jelly"))
309403
}
310404
e.code should be(1)
311405
e.cause.get shouldBe a[InvalidFormatSpecified]
312406
val cause = e.cause.get.asInstanceOf[InvalidFormatSpecified]
313407
cause.validFormats should be(RdfToJellyPrint.validFormatsString)
314-
cause.format should be("jelly-text")
408+
cause.format should be("jelly")
315409
}
316410
"invalid logical stream type is specified" in withFullJenaFile { f =>
317411
val e =

0 commit comments

Comments
 (0)