diff --git a/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala b/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala index 7f648a7..63a6c2c 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala @@ -8,6 +8,7 @@ import scala.compiletime.uninitialized case class JellyCommandOptions( @HelpMessage("Add to run command in debug mode") debug: Boolean = false, + @HelpMessage("Add to silence any warnings") quiet: Boolean = false, ) trait HasJellyCommandOptions: @@ -81,6 +82,11 @@ abstract class JellyCommand[T <: HasJellyCommandOptions: {Parser, Help}] extends */ final def isDebugMode: Boolean = this.getOptions.common.debug + /** Returns information about whether the command is in quiet mode (which suppresses all warnings) + * or not + */ + final def isQuietMode: Boolean = this.getOptions.common.quiet + /** Runs the command in test mode from the outside app parsing level * @param args * the command line arguments diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFormat.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFormat.scala index f56240f..3abddc0 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFormat.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFormat.scala @@ -10,13 +10,14 @@ sealed trait RdfFormat: object RdfFormat: sealed trait Writeable extends RdfFormat + sealed trait Readable extends RdfFormat sealed trait Jena extends RdfFormat: val jenaLang: Lang object Jena: sealed trait Writeable extends Jena, RdfFormat.Writeable - sealed trait Readable extends Jena + sealed trait Readable extends Jena, RdfFormat.Readable case object NQuads extends RdfFormat.Jena.Writeable, RdfFormat.Jena.Readable: override val fullName: String = "N-Quads" @@ -65,7 +66,7 @@ object RdfFormat: override val cliOptions: List[String] = List("jelly") override val jenaLang: Lang = JellyLanguage.JELLY - case object JellyText extends RdfFormat, RdfFormat.Writeable: + case object JellyText extends RdfFormat, RdfFormat.Writeable, RdfFormat.Readable: override val fullName: String = "Jelly text format" override val cliOptions: List[String] = List("jelly-text") val extension = ".jelly.txt" diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index 9d96a1c..a50f468 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -3,12 +3,14 @@ import caseapp.* import eu.neverblink.jelly.cli.* import eu.neverblink.jelly.cli.command.rdf.RdfFormat.* import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame import org.apache.jena.riot.system.StreamRDFWriter import org.apache.jena.riot.{Lang, RDFParser, RIOT} -import java.io.{InputStream, OutputStream} +import java.io.{BufferedReader, InputStream, InputStreamReader, OutputStream} +import scala.util.Using -object RdfToJellyPrint extends RdfCommandPrintUtil[RdfFormat.Jena.Readable]: +object RdfToJellyPrint extends RdfCommandPrintUtil[RdfFormat.Readable]: override val defaultFormat: RdfFormat = RdfFormat.NQuads case class RdfToJellyOptions( @@ -38,13 +40,13 @@ case class RdfToJellyOptions( delimited: Boolean = true, ) extends HasJellyCommandOptions -object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Jena.Readable]: +object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Readable]: override def names: List[List[String]] = List( List("rdf", "to-jelly"), ) - lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Jena.Readable] = RdfToJellyPrint + lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Readable] = RdfToJellyPrint val defaultAction: (InputStream, OutputStream) => Unit = langToJelly(RdfFormat.NQuads.jenaLang, _, _) @@ -62,9 +64,11 @@ object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Jena.Readable] ) override def matchFormatToAction( - format: RdfFormat.Jena.Readable, - ): Option[(InputStream, OutputStream) => Unit] = - Some(langToJelly(format.jenaLang, _, _)) + format: RdfFormat.Readable, + ): Option[(InputStream, OutputStream) => Unit] = format match { + case f: RdfFormat.Jena.Readable => Some(langToJelly(f.jenaLang, _, _)) + case f: RdfFormat.JellyText.type => Some(jellyTextToJelly) + } /** This method reads the file, rewrites it to Jelly and writes it to some output stream * @param jenaLang @@ -97,3 +101,60 @@ object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Jena.Readable] writerContext, ) RDFParser.source(inputStream).lang(jenaLang).parse(jellyWriter) + + /** Convert Jelly text to Jelly binary. + * @param inputStream + * Jelly text input stream + * @param outputStream + * Jelly binary output stream + */ + private def jellyTextToJelly(inputStream: InputStream, outputStream: OutputStream): Unit = + if !isQuietMode then + printLine( + "WARNING: The Jelly text format is not stable and may change in incompatible " + + "ways in the future.\nIt's only intended for testing and development.\n" + + "NEVER use it in production.\nUse --quiet to silence this warning.", + true, + ) + Using.resource(InputStreamReader(inputStream)) { r1 => + Using.resource(BufferedReader(r1)) { reader => + jellyTextStreamAsFrames(reader) + .map(txt => RdfStreamFrame.fromAscii(txt)) + .foreach(frame => { + if getOptions.delimited then frame.writeDelimitedTo(outputStream) + else frame.writeTo(outputStream) + }) + } + } + + /** Iterate over a Jelly text stream and return the frames as strings to be parsed. + * @param reader + * the reader to read from + * @return + * an iterator of Jelly text frames + */ + private def jellyTextStreamAsFrames(reader: BufferedReader): Iterator[String] = + val buffer = new StringBuilder() + val rows = Iterator.continually(()).map { _ => + reader.readLine() match { + case null => + val s = buffer.toString() + buffer.clear() + (Some(s), false) + case line if line.startsWith("}") => + buffer.append(line) + buffer.append("\n") + val s = buffer.toString() + buffer.clear() + (Some(s), true) + case line => + buffer.append(line) + buffer.append("\n") + (None, true) + } + }.takeWhile(_._2).collect({ case (Some(row), _) => row }) + + // The only options we can respect in this mode are the frame size and the delimited flag + // The others are ignored, because we are doing a 1:1 conversion + if getOptions.delimited then rows.grouped(getOptions.rowsPerFrame).map(_.mkString("\n")) + else Iterator(rows.mkString("\n")) diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/helpers/DataGenHelper.scala b/src/test/scala/eu/neverblink/jelly/cli/command/helpers/DataGenHelper.scala index 4cd3051..fe396f6 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/helpers/DataGenHelper.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/helpers/DataGenHelper.scala @@ -1,6 +1,7 @@ package eu.neverblink.jelly.cli.command.helpers import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame import org.apache.jena.rdf.model.{Model, ModelFactory, ResourceFactory} import org.apache.jena.riot.{Lang, RDFDataMgr, RDFLanguages} @@ -43,6 +44,17 @@ object DataGenHelper: RDFDataMgr.write(outputStream, model, JellyLanguage.JELLY) outputStream.toByteArray + /** Generate a Jelly frame in the Text Format. + * @param nTriples + * number of triples to generate + * @return + * String + */ + def generateJellyText(nTriples: Int): String = + val bytes = generateJellyBytes(nTriples) + val frame = RdfStreamFrame.parseDelimitedFrom(ByteArrayInputStream(bytes)) + frame.get.toProtoString + /** This method generates a Jelly byte input stream with nTriples * @param nTriples * number of triples to generate diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala b/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala index 5d9799f..f08de69 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala @@ -24,7 +24,7 @@ trait TestFixtureHelper extends BeforeAndAfterAll: /** The number of triples to generate for the tests */ - protected val testCardinality: Integer + protected val testCardinality: Int private def getFileExtension(format: Lang = RDFLanguages.NQUADS): String = format.getFileExtensions.get(0) @@ -53,6 +53,14 @@ trait TestFixtureHelper extends BeforeAndAfterAll: testCode(tempFile.toString) } finally { tempFile.toFile.delete() } + def withFullJellyTextFile(testCode: (String) => Any): Unit = + val tempFile = Files.createTempFile(tmpDir, randomUUID.toString, ".jelly.txt") + val text = DataGenHelper.generateJellyText(testCardinality) + Files.write(tempFile, text.getBytes) + try { + testCode(tempFile.toString) + } finally { tempFile.toFile.delete() } + def withEmptyRandomFile(testCode: (String) => Any): Unit = val tempFile = Files.createTempFile(tmpDir, randomUUID.toString, ".random") try { diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala index 3aab8fd..0bec3b4 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala @@ -14,7 +14,7 @@ import scala.util.Using class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper: - protected val testCardinality: Integer = 33 + protected val testCardinality: Int = 33 "rdf from-jelly command" should { "handle conversion of Jelly to NTriples" when { diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala index 04cae1c..c6ac1c3 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala @@ -3,8 +3,8 @@ package eu.neverblink.jelly.cli.command.rdf import eu.neverblink.jelly.cli.command.helpers.{DataGenHelper, TestFixtureHelper} import eu.neverblink.jelly.cli.{ExitException, InvalidArgument, InvalidFormatSpecified} import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage -import eu.ostrzyciel.jelly.core.{IoUtils, JellyOptions} import eu.ostrzyciel.jelly.core.proto.v1.{LogicalStreamType, RdfStreamFrame} +import eu.ostrzyciel.jelly.core.{IoUtils, JellyOptions} import org.apache.jena.rdf.model.{Model, ModelFactory} import org.apache.jena.riot.{RDFLanguages, RDFParser} import org.scalatest.matchers.should.Matchers @@ -15,7 +15,7 @@ import scala.util.Using class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: - protected val testCardinality: Integer = 33 + protected val testCardinality: Int = 33 def translateJellyBack(inputStream: InputStream): Model = Using(inputStream) { content => @@ -289,6 +289,100 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: }, jenaLang = RDFLanguages.JSONLD, ) + + "Jelly text format (implicit format)" in withFullJellyTextFile { inFile => + withEmptyJellyFile { outFile => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + inFile, + "--to", + outFile, + ), + ) + val content = translateJellyBack(new FileInputStream(outFile)) + content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) + RdfToJelly.getErrString should include("WARNING: The Jelly text format is not stable") + } + } + + "Jelly text format (warning disabled)" in withFullJellyTextFile { inFile => + withEmptyJellyFile { outFile => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + inFile, + "--to", + outFile, + "--quiet", + ), + ) + RdfToJelly.getErrString should be("") + } + } + + "Jelly text format (explicit format parameter)" in withFullJellyTextFile { inFile => + withEmptyJellyFile { outFile => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + inFile, + "--in-format=jelly-text", + "--to", + outFile, + ), + ) + val content = translateJellyBack(new FileInputStream(outFile)) + content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) + } + } + + "Jelly text format (non-delimited output)" in withFullJellyTextFile { inFile => + withEmptyJellyFile { outFile => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + inFile, + "--delimited=false", + "--to", + outFile, + ), + ) + val (delimited, is) = IoUtils.autodetectDelimiting(new FileInputStream(outFile)) + delimited should be(false) + val frame = RdfStreamFrame.parseFrom(is) + frame.rows.size should be > 0 + } + } + + "Jelly text format (delimited, multiple frames)" in withFullJellyTextFile { inFile => + withEmptyJellyFile { outFile => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + inFile, + "--rows-per-frame=1", + "--to", + outFile, + ), + ) + val (delimited, is) = IoUtils.autodetectDelimiting(new FileInputStream(outFile)) + delimited should be(true) + val frames = readJellyFile(new FileInputStream(outFile)) + frames.size should be > testCardinality + for frame <- frames do frame.rows.size should be(1) + } + } } "throw proper exception" when { "invalid format is specified" in withFullJenaFile { f => @@ -305,13 +399,13 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: "invalid format out of existing is specified" in withFullJenaFile { f => val e = intercept[ExitException] { - RdfToJelly.runTestCommand(List("rdf", "to-jelly", f, "--in-format", "jelly-text")) + RdfToJelly.runTestCommand(List("rdf", "to-jelly", f, "--in-format", "jelly")) } e.code should be(1) e.cause.get shouldBe a[InvalidFormatSpecified] val cause = e.cause.get.asInstanceOf[InvalidFormatSpecified] cause.validFormats should be(RdfToJellyPrint.validFormatsString) - cause.format should be("jelly-text") + cause.format should be("jelly") } "invalid logical stream type is specified" in withFullJenaFile { f => val e =