Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ import eu.neverblink.jelly.cli.*
import eu.neverblink.jelly.cli.command.rdf.util.*
import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat.*
import eu.neverblink.jelly.cli.util.args.IndexRange
import eu.neverblink.jelly.cli.util.jena.StreamRdfBatchSink
import eu.neverblink.jelly.convert.jena.JenaConverterFactory
import eu.neverblink.jelly.core.JellyOptions
import eu.neverblink.jelly.core.RdfHandler.AnyStatementHandler
import eu.neverblink.jelly.core.proto.v1.RdfStreamFrame
import eu.neverblink.jelly.core.proto.google.v1 as google
import org.apache.jena.graph.{Node, Triple}
import org.apache.jena.riot.Lang
import org.apache.jena.riot.system.StreamRDF
import org.apache.jena.riot.system.StreamRDFWriter
import org.apache.jena.sparql.core.Quad

Expand Down Expand Up @@ -58,7 +59,7 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Writeable] = RdfFromJellyPrint

val defaultAction: (InputStream, OutputStream) => Unit =
jellyToLang(RdfFormat.NQuads.jenaLang, _, _)
(in, out) => jellyToLang(in, StreamRDFWriter.getWriterStream(out, RdfFormat.NQuads.jenaLang))

private def takeFrames: IndexRange = IndexRange(getOptions.takeFrames, "--take-frames")

Expand All @@ -73,7 +74,10 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
format: RdfFormat.Writeable,
): Option[(InputStream, OutputStream) => Unit] =
format match
case j: RdfFormat.Jena.Writeable => Some(jellyToLang(j.jenaLang, _, _))
case j: RdfFormat.Jena.Writeable =>
Some((in, out) => jellyToLang(in, StreamRDFWriter.getWriterStream(out, j.jenaLang)))
case j: RdfFormat.Jena.BatchWriteable =>
Some((in, out) => jellyToLang(in, StreamRdfBatchSink(out, j.jenaLang)))
case RdfFormat.JellyText => Some(jellyBinaryToText)

/** This method reads the Jelly file, rewrites it to specified format and writes it to some output
Expand All @@ -86,11 +90,9 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
* OutputStream
*/
private def jellyToLang(
jenaLang: Lang,
inputStream: InputStream,
outputStream: OutputStream,
writer: StreamRDF,
): Unit =
val writer = StreamRDFWriter.getWriterStream(outputStream, jenaLang)
// Whether the output is active at this moment
var outputEnabled = false
val handler = new AnyStatementHandler[Node] {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ object RdfFormat:
object Jena:
sealed trait Writeable extends Jena, RdfFormat.Writeable
sealed trait Readable extends Jena, RdfFormat.Readable
sealed trait BatchWriteable extends Jena, RdfFormat.Writeable

case object NQuads extends RdfFormat.Jena.Writeable, RdfFormat.Jena.Readable:
override val fullName: String = "N-Quads"
Expand Down Expand Up @@ -49,12 +50,12 @@ object RdfFormat:
override val cliOptions: List[String] = List("jenathrift", "jena-thrift")
override val jenaLang: Lang = RDFLanguages.RDFTHRIFT

case object RdfXml extends RdfFormat.Jena.Readable:
case object RdfXml extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
override val fullName: String = "RDF/XML"
override val cliOptions: List[String] = List("rdfxml", "rdf-xml")
override val jenaLang: Lang = RDFLanguages.RDFXML

case object JsonLd extends RdfFormat.Jena.Readable:
case object JsonLd extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
override val fullName: String = "JSON-LD"
override val cliOptions: List[String] = List("jsonld", "json-ld")
override val jenaLang: Lang = RDFLanguages.JSONLD
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package eu.neverblink.jelly.cli.util.jena

import org.apache.jena.riot.system.StreamRDF
import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.sparql.core.Quad
import org.apache.jena.graph.Triple
import org.apache.jena.rdf.model.Model
import org.apache.jena.riot.system.StreamRDFLib
import java.io.OutputStream
import org.apache.jena.riot.Lang
import org.apache.jena.riot.RDFDataMgr

/** A StreamRDF implementation that collects everything into a Model. When finishing, formats
* everything according to the lang, and emits it to the outputStream. This is meant to be a
* fallback for non-streaming RDF formats, as it requires all data to be loaded in memory.
*/
class StreamRdfBatchSink(val outputStream: OutputStream, val lang: Lang) extends StreamRDF:
private val model: Model = ModelFactory.createDefaultModel()
private val modelStream: StreamRDF = StreamRDFLib.graph(model.getGraph)
override def quad(quad: Quad): Unit = modelStream.quad(quad)
override def triple(triple: Triple): Unit = modelStream.triple(triple)
override def prefix(prefix: String, iri: String): Unit = modelStream.prefix(prefix, iri)
override def base(base: String): Unit = modelStream.base(base)
override def finish(): Unit = RDFDataMgr.write(outputStream, model, lang)
override def start(): Unit = ()
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import eu.neverblink.jelly.cli.command.helpers.*
import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat
import eu.neverblink.jelly.core.proto.v1.{PhysicalStreamType, RdfStreamFrame}
import eu.neverblink.jelly.core.{JellyOptions, JellyTranscoderFactory}
import org.apache.jena.riot.RDFLanguages
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

Expand All @@ -15,6 +14,8 @@ import java.nio.file.attribute.PosixFilePermissions
import java.nio.file.{Files, Paths}
import scala.io.Source
import scala.util.Using
import org.apache.jena.riot.RDFDataMgr
import org.apache.jena.rdf.model.ModelFactory

class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:

Expand Down Expand Up @@ -217,6 +218,21 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
}
}

for lang <- Seq(RdfFormat.JsonLd, RdfFormat.RdfXml) do
s"handle conversion of Jelly binary to ${lang.fullName}" when {
"input stream to output stream" in {
val input = DataGenHelper.generateJellyInputStream(testCardinality)
RdfFromJelly.setStdIn(input)
val model = DataGenHelper.generateTripleModel(testCardinality)
val (out, err) = RdfFromJelly.runTestCommand(
List("rdf", "from-jelly", "--out-format", lang.cliOptions.head),
)
val newModel = ModelFactory.createDefaultModel()
RDFDataMgr.read(newModel, new ByteArrayInputStream(out.getBytes()), lang.jenaLang)
model.isIsomorphicWith(newModel) shouldBe true
}
}

"throw proper exception" when {
"input file is not found" in {
val nonExist = "non-existing-file"
Expand Down Expand Up @@ -339,34 +355,6 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
}
}

"readable but not writable format supplied" in withFullJellyFile { j =>
withEmptyJenaFile(
testCode = { q =>
val exception =
intercept[ExitException] {
RdfFromJelly.runTestCommand(
List(
"rdf",
"from-jelly",
j,
"--to",
q,
"--out-format",
RdfFormat.RdfXml.cliOptions.head,
),
)
}
val msg = InvalidFormatSpecified(
RdfFormat.RdfXml.cliOptions.head,
RdfFromJellyPrint.validFormatsString,
)
RdfFromJelly.getErrString should include(msg.getMessage)
exception.code should be(1)
},
jenaLang = RDFLanguages.RDFXML,
)
}

"invalid --take-frames argument provided" in {
val e = intercept[ExitException] {
RdfFromJelly.runTestCommand(
Expand Down