Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/main/scala/eu/neverblink/jelly/cli/App.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package eu.neverblink.jelly.cli
import caseapp.*
import eu.neverblink.jelly.cli.command.*
import eu.neverblink.jelly.cli.command.rdf.*
import eu.neverblink.jelly.cli.util.jena.riot.CliRiot
import org.apache.jena.sys.JenaSystem

/** Main entrypoint.
Expand All @@ -11,6 +12,8 @@ object App extends CommandsEntryPoint:

// Initialize Jena now to avoid race conditions later
JenaSystem.init()
// Initialize the CLI Riot parsers
CliRiot.initialize()

override def enableCompletionsCommand: Boolean = true

Expand Down
8 changes: 4 additions & 4 deletions src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ abstract class JellyCommand[T <: HasJellyCommandOptions: {Parser, Help}] extends

private var isTest = false
private var options: Option[T] = None
final protected[cli] var out = System.out
final protected[cli] var err = System.err
final protected[cli] var in = System.in
final protected[cli] var out: PrintStream = System.out
final protected[cli] var err: PrintStream = System.err
final protected[cli] var in: InputStream = System.in

private var osOut: ByteArrayOutputStream = uninitialized
private var osErr: ByteArrayOutputStream = uninitialized
Expand Down Expand Up @@ -119,7 +119,7 @@ abstract class JellyCommand[T <: HasJellyCommandOptions: {Parser, Help}] extends
if isTest then in
else System.in

final def setStdIn(data: ByteArrayInputStream): Unit =
final def setStdIn(data: InputStream): Unit =
in = data

final def getOutStream: OutputStream =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package eu.neverblink.jelly.cli.util.jena.riot

import org.apache.jena.atlas.web.ContentType
import org.apache.jena.riot.{RDFLanguages, RDFParserRegistry, ReaderRIOT}
import org.apache.jena.riot.lang.{LangRIOT, RiotParsers}
import org.apache.jena.riot.system.{ParserProfile, StreamRDF}
import org.apache.jena.riot.tokens.{Tokenizer, TokenizerText}
import org.apache.jena.sparql.util.Context

import java.io.{InputStream, Reader}

/** Registration utilities for jelly-cli's overrides of Apache Jena's Riot components (e.g.,
* parsers).
*
* The initialize() method must be called before using any of the parsers, right after
* JenaSystem.init().
*/
object CliRiot:
private var initialized = false

def initialize(): Unit = CliRiot.synchronized {
if initialized then return
RiotParsers.factoryNT = (_, parserProfile) => NTriplesReader(parserProfile)
RiotParsers.factoryNQ = (_, parserProfile) => NQuadsReader(parserProfile)
RDFParserRegistry.registerLangTriples(RDFLanguages.NTRIPLES, RiotParsers.factoryNT)
RDFParserRegistry.registerLangQuads(RDFLanguages.NQUADS, RiotParsers.factoryNQ)
initialized = true
}

/** Base reader for parsing N-Triples and N-Quads. Heavily inspired by the Jena Riot code:
* https://github.com/apache/jena/blob/bd97ad4cf731ade857926787dd2df735644a354b/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java
* @param parserProfile
* parser profile
*/
private abstract class BaseReader(parserProfile: ParserProfile) extends ReaderRIOT:
def create(tokenizer: Tokenizer, output: StreamRDF, context: Context): LangRIOT

final def read(
in: InputStream,
baseURI: String,
ct: ContentType,
output: StreamRDF,
context: Context,
): Unit =
val tok = TokenizerText.create()
.source(in)
.errorHandler(parserProfile.getErrorHandler)
.build()
create(tok, output, context).parse()

final def read(
reader: Reader,
baseURI: String,
ct: ContentType,
output: StreamRDF,
context: Context,
): Unit =
val tok = TokenizerText.create()
.source(reader)
.errorHandler(parserProfile.getErrorHandler)
.build()
create(tok, output, context).parse()

private final class NTriplesReader(parserProfile: ParserProfile)
extends BaseReader(parserProfile):
override def create(
tokenizer: Tokenizer,
output: StreamRDF,
context: Context,
): LangRIOT = new LangNTriplesGeneralized(tokenizer, parserProfile, output)

private final class NQuadsReader(parserProfile: ParserProfile) extends BaseReader(parserProfile):
override def create(
tokenizer: Tokenizer,
output: StreamRDF,
context: Context,
): LangRIOT = new LangNQuadsGeneralized(tokenizer, parserProfile, output)
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package eu.neverblink.jelly.cli.util.jena.riot

import org.apache.jena.graph.Node
import org.apache.jena.riot.system.{ParserProfile, StreamRDF}
import org.apache.jena.riot.tokens.{Token, TokenType, Tokenizer}
import org.apache.jena.riot.{Lang, RDFLanguages}
import org.apache.jena.sparql.core.Quad

/** Parser for generalized N-Quads. Heavily inspired by the Jena Riot code:
* https://github.com/apache/jena/blob/bd97ad4cf731ade857926787dd2df735644a354b/jena-arq/src/main/java/org/apache/jena/riot/lang/LangNQuads.java
*/
final class LangNQuadsGeneralized(tokens: Tokenizer, profile: ParserProfile, dest: StreamRDF)
extends LangNTupleGeneralized[Quad](tokens, profile, dest):

// Null for no graph.
private var currentGraph: Node = null

override def getLang: Lang = RDFLanguages.NQUADS

/** Method to parse the whole stream of triples, sending each to the sink */
override protected def runParser(): Unit =
while (hasNext) {
val x = parseOne
if (x != null) dest.quad(x)
}

override protected def parseOne: Quad =
val sToken = nextToken
val s = parseNode(sToken)
val p = parseNode(nextToken)
val o = parseNode(nextToken)
var xToken = nextToken // Maybe DOT
if (xToken.getType eq TokenType.EOF)
exception(xToken, "Premature end of file: Quad not terminated by DOT: %s", xToken)
// Process graph node first, before S,P,O
// to set bnode label scope (if not global)
var c: Node = null
if (xToken.getType ne TokenType.DOT) {
c = parseNode(xToken)
xToken = nextToken
currentGraph = c
} else {
c = Quad.defaultGraphNodeGenerated
currentGraph = null
}
// Check end of quad
if (xToken.getType ne TokenType.DOT) exception(xToken, "Quad not terminated by DOT: %s", xToken)
profile.createQuad(c, s, p, o, sToken.getLine, sToken.getColumn)

override protected def tokenAsNode(token: Token): Node =
profile.create(currentGraph, token)
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package eu.neverblink.jelly.cli.util.jena.riot

import org.apache.jena.graph.{Node, Triple}
import org.apache.jena.riot.system.{ParserProfile, StreamRDF}
import org.apache.jena.riot.tokens.{Token, TokenType, Tokenizer}
import org.apache.jena.riot.{Lang, RDFLanguages}

/** Parser for generalized N-Triples. Heavily inspired by the Jena Riot code:
* https://github.com/apache/jena/blob/bd97ad4cf731ade857926787dd2df735644a354b/jena-arq/src/main/java/org/apache/jena/riot/lang/LangNTriples.java
*/
final class LangNTriplesGeneralized(tokens: Tokenizer, profile: ParserProfile, dest: StreamRDF)
extends LangNTupleGeneralized[Triple](tokens, profile, dest):

override def getLang: Lang = RDFLanguages.NTRIPLES

/** Method to parse the whole stream of triples, sending each to the sink */
override protected def runParser(): Unit =
while (hasNext) {
val x = parseOne
if (x != null) dest.triple(x)
}

override protected def parseOne: Triple =
val triple = parseTripleGeneralized
val x = nextToken
if (x.getType ne TokenType.DOT) exception(x, "Triple not terminated by DOT: %s", x)
triple

override protected def tokenAsNode(token: Token): Node =
profile.create(null, token)
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package eu.neverblink.jelly.cli.util.jena.riot

import org.apache.jena.graph.{Node, NodeFactory, Triple}
import org.apache.jena.riot.lang.LangNTuple
import org.apache.jena.riot.system.{ParserProfile, StreamRDF}
import org.apache.jena.riot.tokens.{Token, TokenType, Tokenizer}

/** Base class for parsing N-Triples and N-Quads. Heavily inspired by the Jena Riot code:
* https://github.com/apache/jena/blob/bd97ad4cf731ade857926787dd2df735644a354b/jena-arq/src/main/java/org/apache/jena/riot/lang/LangNTuple.java
*/
abstract class LangNTupleGeneralized[T](tokens: Tokenizer, profile: ParserProfile, dest: StreamRDF)
extends LangNTuple[T](tokens, profile, dest):

protected final def parseNode(token: Token): Node =
if (token.isEOF) exception(token, "Premature end of file: %s", token)
if (token.hasType(TokenType.LT2)) parseTripleTermGeneralized
else
checkRDFTerm(token)
tokenAsNode(token)

protected final def parseTripleGeneralized: Triple =
val sToken = nextToken
val s = parseNode(sToken)
val p = parseNode(nextToken)
val o = parseNode(nextToken)
profile.createTriple(s, p, o, sToken.getLine, sToken.getColumn)

protected final def parseTripleTermGeneralized: Node =
val t = parseTripleGeneralized
val x = nextToken
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why value names like x or t, this is not very informative

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's the original names from Jena... let's go with that for now.

if (x.getType ne TokenType.GT2) exception(x, "Triple term not terminated by >>: %s", x)
NodeFactory.createTripleNode(t)
8 changes: 8 additions & 0 deletions src/test/resources/generalized.nq
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<http://example.org/resource/r1> _:b1 <http://example.org/resource/r2> .
"Resource 1" <http://example.org/property/p> <http://example.org/resource/r3> .
<http://example.org/resource/r3> "Property Label" <http://example.org/resource/r1> .
_:b1 << _:b1 _:b2 _:b3 >> <http://example.org/resource/r4> .
<http://example.org/resource/r1> _:b1 <http://example.org/resource/r2> _:b1 .
"Resource 1" <http://example.org/property/p> <http://example.org/resource/r3> "literal graph"^^<http://example.org> .
<http://example.org/resource/r3> "Property Label" <http://example.org/resource/r1> <http://example.org> .
_:b1 << _:b1 _:b2 _:b3 >> <http://example.org/resource/r4> "literal"@en .
4 changes: 4 additions & 0 deletions src/test/resources/generalized.nt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<http://example.org/resource/r1> _:b1 <http://example.org/resource/r2> .
"Resource 1" <http://example.org/property/p> <http://example.org/resource/r3> .
<http://example.org/resource/r3> "Property Label" <http://example.org/resource/r1> .
_:b1 << _:b1 _:b2 _:b3 >> <http://example.org/resource/r4> .
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package eu.neverblink.jelly.cli.command.helpers

import eu.neverblink.jelly.cli.util.jena.riot.CliRiot
import eu.ostrzyciel.jelly.convert.jena.riot.{JellyFormatVariant, JellyLanguage}
import org.apache.jena.riot.{Lang, RDFDataMgr, RDFFormat, RDFLanguages}
import org.apache.jena.sys.JenaSystem
Expand All @@ -18,6 +19,7 @@ trait TestFixtureHelper extends BeforeAndAfterAll:

TestFixtureHelper.synchronized {
JenaSystem.init()
CliRiot.initialize()
}

private val tmpDir: Path = Files.createTempDirectory("jelly-cli")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import eu.ostrzyciel.jelly.core.proto.v1.{LogicalStreamType, RdfStreamFrame}
import eu.ostrzyciel.jelly.core.{IoUtils, JellyOptions}
import org.apache.jena.rdf.model.{Model, ModelFactory}
import org.apache.jena.riot.{RDFLanguages, RDFParser}
import org.apache.jena.sparql.core.DatasetGraphFactory
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

Expand Down Expand Up @@ -72,6 +73,30 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers:
content.containsAll(tripleModel.listStatements())
}

"input stream to output stream, generalized RDF (N-Triples)" in {
val inputStream = new FileInputStream(getClass.getResource("/generalized.nt").getPath)
RdfToJelly.setStdIn(inputStream)
val (out, err) = RdfToJelly.runTestCommand(
List("rdf", "to-jelly", "--in-format=nt"),
)
val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes)
val content = translateJellyBack(newIn)
content.size() should be(4)
}

"input stream to output stream, generalized RDF (N-Quads)" in {
val inputStream = new FileInputStream(getClass.getResource("/generalized.nq").getPath)
RdfToJelly.setStdIn(inputStream)
val (out, err) = RdfToJelly.runTestCommand(
List("rdf", "to-jelly", "--in-format=nq"),
)
val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes)
val ds = DatasetGraphFactory.create()
RDFParser.source(newIn).lang(JellyLanguage.JELLY).parse(ds)
ds.size() should be(4) // 4 named graphs
ds.getDefaultGraph.size() should be(4) // 4 triples in the default graph
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this test seems kind of roundabout to me. Why do we have a separate test nq file only to check at the end the number of quads? wouldn't it make more sense to create an nq file dynamically from a set of statements and then compare the final graph to the first set of statements, or try to translate the above graph to nquad and compare the two .nq files/strings?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point is to just check if the parser works. Generating this dynamically is a pain, I'd prefer to have it in a file.

}

"an input stream to file" in withEmptyJellyFile { j =>
val input = DataGenHelper.generateJenaInputStream(testCardinality)
RdfToJelly.setStdIn(input)
Expand Down Expand Up @@ -211,6 +236,7 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers:
}
}
}

"handle conversion of other formats to Jelly" when {
"NTriples" in {
val input = DataGenHelper.generateJenaInputStream(testCardinality, RDFLanguages.NTRIPLES)
Expand Down Expand Up @@ -385,6 +411,7 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers:
}
}
}

"throw proper exception" when {
"invalid format is specified" in withFullJenaFile { f =>
val e =
Expand Down