Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions .github/workflows/aot-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,19 @@ jobs:

- name: Test the binary
run: |
target/graalvm-native-image/jelly-cli version && \
set -eux

# See if it runs at all
target/graalvm-native-image/jelly-cli version

# Make sure reflection works
target/graalvm-native-image/jelly-cli version | grep "JVM reflection: supported"

# Test RDF conversions
echo '_:b <http://t.org/> _:b .' | target/graalvm-native-image/jelly-cli \
rdf to-jelly --in-format=nt > out.jelly && \
[ -s out.jelly ] &&
target/graalvm-native-image/jelly-cli \
[ -s out.jelly ]
target/graalvm-native-image/jelly-cli \
rdf from-jelly --out-format=jelly-text out.jelly > out.txt && \
[ -s out.txt ]
target/graalvm-native-image/jelly-cli \
Expand All @@ -45,6 +53,9 @@ jobs:
echo '{"@graph":[{"@id":"http://e.org/r","http://e.org/p":{"@value":"v"}}]}' | \
target/graalvm-native-image/jelly-cli rdf to-jelly --in-format "jsonld" > jsonld.jelly && \
[ -s jsonld.jelly ]
echo '<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><rdf:Seq rdf:about="http://example.org/favourite-fruit"></rdf:Seq></rdf:RDF>' | \
target/graalvm-native-image/jelly-cli rdf to-jelly --in-format "rdfxml" > rdfxml.jelly && \
[ -s rdfxml.jelly ]

- name: Upload binary
uses: actions/upload-artifact@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scala.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:

- name: Build and test
shell: bash
run: sbt -v +test
run: sbt -v +test test-serial:test

test-assembly:
runs-on: ubuntu-latest
Expand Down
13 changes: 11 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ lazy val graalOptions = Seq(
// For the release build, optimize for speed and make a build report
if (isDevBuild) Seq("-Ob") else Seq("-O3", "--emit build-report"),
).flatten ++ Seq(
"--features=eu.neverblink.jelly.cli.graal.ProtobufFeature",
"--features=eu.neverblink.jelly.cli.graal.ProtobufFeature," +
"eu.neverblink.jelly.cli.graal.JenaInternalsFeature",
"-H:ReflectionConfigurationFiles=" + file("graal.json").getAbsolutePath,
// Needed to skip initializing all charsets.
// See: https://github.com/Jelly-RDF/cli/issues/154
Expand All @@ -35,11 +36,14 @@ lazy val graalOptions = Seq(
"-H:+UsePredicates", // SkipFlow optimization -- will be default in GraalVM 25
)

lazy val TestSerial = config("test-serial") extend Test

lazy val root = (project in file("."))
.enablePlugins(
BuildInfoPlugin,
GraalVMNativeImagePlugin,
)
.configs(TestSerial)
.settings(
name := "jelly-cli",
libraryDependencies ++= Seq(
Expand All @@ -51,7 +55,7 @@ lazy val root = (project in file("."))
("eu.neverblink.jelly" % "jelly-jena" % jellyV).excludeAll(ExclusionRule("org.apache.jena")),
"eu.neverblink.jelly" % "jelly-core-protos-google" % jellyV,
"com.github.alexarchambault" %% "case-app" % "2.1.0",
"org.scalatest" %% "scalatest" % "3.2.19" % Test,
"org.scalatest" %% "scalatest" % "3.2.19" % "test,test-serial",
"org.yaml" % "snakeyaml" % "2.4" % Test,
// For native-image reflection compatibility
"org.graalvm.sdk" % "graal-sdk" % graalvmV % "provided",
Expand Down Expand Up @@ -79,6 +83,11 @@ lazy val root = (project in file("."))
case _ => MergeStrategy.first
},

// Serial tests should not run in parallel.
// They are used for tests that manipulate global state, like system properties.
inConfig(TestSerial)(Defaults.testSettings),
TestSerial / parallelExecution := false,

// GraalVM settings
Compile / mainClass := Some("eu.neverblink.jelly.cli.App"),
// Do a fast build if it's a dev build
Expand Down
14 changes: 14 additions & 0 deletions src/main/scala/eu/neverblink/jelly/cli/command/Version.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ package eu.neverblink.jelly.cli.command

import caseapp.*
import eu.neverblink.jelly.cli.*
import eu.neverblink.jelly.cli.util.jena.JenaSystemOptions

import scala.util.{Failure, Success}

@HelpMessage(
"Prints the version of the jelly-cli utility and the Jelly-JVM library.",
Expand All @@ -23,10 +26,21 @@ object Version extends JellyCommand[VersionOptions]:
.find(_.startsWith("org.apache.jena:jena-core:")).get.split(":")(2)
val jellyV = BuildInfo.libraryDependencies
.find(_.startsWith("eu.neverblink.jelly:jelly-jena:")).get.split(":")(2)
val reflectionSupported = JenaSystemOptions.disableTermValidation()
printLine(f"""
|jelly-cli ${BuildInfo.version}
|----------------------------------------------
|Jelly-JVM $jellyV
|Apache Jena $jenaV
|JVM ${System.getProperty("java.vm.name")} ${System.getProperty("java.vm.version")}
|----------------------------------------------
|""".stripMargin.trim)
reflectionSupported match {
case Failure(ex) =>
printLine("[ ] JVM reflection: not supported. Parsing will be slower.")
if getOptions.common.debug then
printLine(" The exception was:")
ex.printStackTrace(out)
else printLine(" Run with --debug for details.")
case Success(_) => printLine("[X] JVM reflection: supported. Parsing optimizations enabled.")
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ import eu.neverblink.jelly.cli.*
import eu.neverblink.jelly.cli.command.rdf.util.*
import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat.*
import eu.neverblink.jelly.cli.util.args.IndexRange
import eu.neverblink.jelly.cli.util.jena.StreamRdfBatchWriter
import eu.neverblink.jelly.cli.util.jena.{
JenaSystemOptions,
StreamRdfBatchWriter,
StreamRdfCombiningBatchWriter,
}
import eu.neverblink.jelly.convert.jena.JenaConverterFactory
import eu.neverblink.jelly.core.JellyOptions
import eu.neverblink.jelly.core.RdfHandler.AnyStatementHandler
Expand All @@ -18,7 +22,6 @@ import org.apache.jena.sparql.core.Quad

import java.io.{InputStream, OutputStream}
import scala.jdk.CollectionConverters.*
import eu.neverblink.jelly.cli.util.jena.StreamRdfCombiningBatchWriter

object RdfFromJellyPrint extends RdfCommandPrintUtil[RdfFormat.Writeable]:
override val defaultFormat: RdfFormat = RdfFormat.NQuads
Expand Down Expand Up @@ -57,6 +60,8 @@ case class RdfFromJellyOptions(
"Ignored otherwise. Take care with input size, as this option will load everything into memory.",
)
combine: Boolean = false,
@Recurse
rdfPerformanceOptions: RdfPerformanceOptions = RdfPerformanceOptions(),
) extends HasJellyCommandOptions

object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writeable]:
Expand All @@ -73,6 +78,8 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
private def takeFrames: IndexRange = IndexRange(getOptions.takeFrames, "--take-frames")

override def doRun(options: RdfFromJellyOptions, remainingArgs: RemainingArgs): Unit =
if !options.rdfPerformanceOptions.validateTerms.getOrElse(false) then
JenaSystemOptions.disableTermValidation()
// Parse options now to make sure they are valid
takeFrames
val (inputStream, outputStream) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ import com.google.protobuf.TextFormat
import eu.neverblink.jelly.cli.*
import eu.neverblink.jelly.cli.command.rdf.util.*
import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat.*
import eu.neverblink.jelly.cli.util.jena.riot.JellyStreamWriterGraphs
import eu.neverblink.jelly.cli.util.jena.JenaSystemOptions
import eu.neverblink.jelly.cli.util.jena.riot.{JellyStreamWriterGraphs, RiotParserUtil}
import eu.neverblink.jelly.convert.jena.JenaConverterFactory
import eu.neverblink.jelly.convert.jena.riot.{JellyFormatVariant, JellyLanguage, JellyStreamWriter}
import eu.neverblink.jelly.core.{JellyOptions, RdfProtoDeserializationError}
import eu.neverblink.jelly.core.proto.google.v1 as google
import eu.neverblink.jelly.core.proto.v1.*
import eu.neverblink.jelly.core.utils.IoUtils
import org.apache.jena.riot.lang.LabelToNode
import org.apache.jena.riot.system.StreamRDFWriter
import org.apache.jena.riot.{Lang, RDFParser, RIOT}
import org.apache.jena.riot.{Lang, RIOT}

import java.io.{BufferedReader, FileInputStream, InputStream, InputStreamReader, OutputStream}
import scala.util.Using
Expand Down Expand Up @@ -64,6 +64,8 @@ case class RdfToJellyOptions(
"frame – make sure you know what you are doing. Default: true",
)
delimited: Boolean = true,
@Recurse
rdfPerformanceOptions: RdfPerformanceOptions = RdfPerformanceOptions(),
) extends HasJellyCommandOptions

object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable]:
Expand All @@ -87,6 +89,8 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
frame.get.getRows.iterator().next().getOptions

override def doRun(options: RdfToJellyOptions, remainingArgs: RemainingArgs): Unit =
if !options.rdfPerformanceOptions.validateTerms.getOrElse(false) then
JenaSystemOptions.disableTermValidation()
// Infer before touching options
options.optionsFrom.map(loadOptionsFromFile).foreach(
options.jellySerializationOptions.setOptions,
Expand Down Expand Up @@ -184,10 +188,12 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
.build()
JellyStreamWriter(JenaConverterFactory.getInstance(), variant, outputStream)

RDFParser.source(inputStream)
.lang(jenaLang)
.labelToNode(LabelToNode.createUseLabelAsGiven())
.parse(jellyWriter)
RiotParserUtil.parse(
getOptions.rdfPerformanceOptions.validateTerms.getOrElse(false),
jenaLang,
inputStream,
jellyWriter,
)
jellyWriter.finish()

/** Convert Jelly text to Jelly binary.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ import eu.neverblink.jelly.cli.command.rdf.util.*
import eu.neverblink.jelly.cli.util.args.IndexRange
import eu.neverblink.jelly.cli.util.io.IoUtil
import eu.neverblink.jelly.cli.util.jena.*
import eu.neverblink.jelly.cli.util.jena.riot.RiotParserUtil
import eu.neverblink.jelly.convert.jena.JenaConverterFactory
import eu.neverblink.jelly.core.JellyOptions
import eu.neverblink.jelly.core.RdfHandler.AnyStatementHandler
import eu.neverblink.jelly.core.proto.v1.{RdfStreamFrame, RdfStreamOptions}
import org.apache.jena.graph.{Node, Triple}
import org.apache.jena.riot.RDFParser
import org.apache.jena.riot.system.StreamRDFLib
import org.apache.jena.sparql.core.Quad

Expand Down Expand Up @@ -63,6 +63,8 @@ case class RdfValidateOptions(
"Possible values: 'either', 'true', 'false'. Default: 'either'.",
)
delimited: String = "either",
@Recurse
rdfPerformanceOptions: RdfPerformanceOptions = RdfPerformanceOptions(),
) extends HasJellyCommandOptions

object RdfValidate extends JellyCommand[RdfValidateOptions]:
Expand Down Expand Up @@ -90,6 +92,8 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]:
options.compareToRdfFile.map(n => getRdfForComparison(n, options.compareToFormat))
val (inputStream, _) = getIoStreamsFromOptions(remainingArgs.remaining.headOption, None)
val (delimited, frameIterator) = JellyUtil.iterateRdfStreamWithDelimitingInfo(inputStream)
if !options.rdfPerformanceOptions.validateTerms.getOrElse(true) then
JenaSystemOptions.disableTermValidation()

// Step 1: Validate delimiting
validateDelimiting(delimiting, delimited)
Expand Down Expand Up @@ -245,8 +249,11 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]:
}
val output = StreamRdfCollector()
Using.resource(IoUtil.inputStream(fileName)) { is =>
RDFParser.source(is)
.lang(format.jenaLang)
.parse(output)
RiotParserUtil.parse(
getOptions.rdfPerformanceOptions.validateTerms.getOrElse(true),
format.jenaLang,
is,
output,
)
}
output
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package eu.neverblink.jelly.cli.command.rdf.util

import caseapp.HelpMessage

/** Performance-related options for RDF processing.
*/
case class RdfPerformanceOptions(
@HelpMessage(
"Enable term validation and IRI resolution (slower). Default: false for all commands except 'rdf validate'.",
)
validateTerms: Option[Boolean] = None,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package eu.neverblink.jelly.cli.graal

import org.apache.jena.graph.impl.LiteralLabel
import org.graalvm.nativeimage.hosted.{Feature, RuntimeReflection}

class JenaInternalsFeature extends Feature:
import Feature.*

override def getDescription: String =
"Registers Jena internals for reflection. Needed for JenaSystemOptions to disable a few " +
"checks during RDF parsing."

override def beforeAnalysis(access: BeforeAnalysisAccess): Unit =
val classes = classOf[LiteralLabel].getDeclaredClasses
val valueModeClass = classes.find(_.getSimpleName == "ValueMode").get
RuntimeReflection.register(valueModeClass)
RuntimeReflection.register(valueModeClass.getDeclaredField("LAZY"))
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package eu.neverblink.jelly.cli.util.jena

import org.apache.jena.graph.impl.LiteralLabel
import org.apache.jena.irix.{IRIProviderAny, SystemIRIx}

import scala.util.Try

object JenaSystemOptions:
/** Enable faster parsing by disabling strict IRI and literal validation.
* @return
* A Success if the operation was successful, or a Failure with the exception if not. The
* operation may fail in environments where reflection is not supported. The failure can be
* ignored, but parsing will be slower.
*/
def disableTermValidation(): Try[Unit] =
toggle(false)

/** For use only in tests.
*/
def resetTermValidation(): Try[Unit] =
toggle(true)

private def toggle(enable: Boolean): Try[Unit] =
val valueMode = if enable then
SystemIRIx.reset()
"EAGER"
else
// Set the IRI provider to one that does no validation or resolving whatsoever
SystemIRIx.setProvider(IRIProviderAny.stringProvider())
"LAZY"

// Disable/enable eager computation of literal values, which does strict checking.
// This requires reflection as the field is private static final.
Try {
val f = classOf[LiteralLabel].getDeclaredField("valueMode")
val valueModeClass =
classOf[LiteralLabel].getDeclaredClasses.find(_.getSimpleName == "ValueMode").get
val valueModeLazy = valueModeClass.getDeclaredField(valueMode)
valueModeLazy.setAccessible(true)
f.setAccessible(true)
f.set(null, valueModeLazy.get(null))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package eu.neverblink.jelly.cli.util.jena.riot

import org.apache.jena.irix.IRIxResolver
import org.apache.jena.riot.RIOT
import org.apache.jena.riot.lang.LabelToNode
import org.apache.jena.riot.system.*

/** Jena RIOT parser profile with optimizations for speed:
* - No IRI resolution
* - No error logging
* - Passing blank node labels as-is
* - No extra checks
*/
final class FastParserProfile
extends ParserProfileStd(
FactoryRDFCaching(FactoryRDFCaching.DftNodeCacheSize, LabelToNode.createUseLabelAsGiven()),
ErrorHandlerFactory.errorHandlerNoLogging,
IRIxResolver.create().noBase().resolve(false).allowRelative(true).build(),
PrefixMapStd(),
RIOT.getContext,
false,
false,
):

/** Skip IRI resolution for speed.
*/
override def resolveIRI(uriStr: String, line: Long, col: Long): String = uriStr
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package eu.neverblink.jelly.cli.util.jena.riot

import org.apache.jena.riot.{Lang, RDFParser, RDFParserRegistry, RIOT}
import org.apache.jena.riot.system.StreamRDF

import java.io.InputStream

/** Utility for creating Jena RDF parsers in jelly-cli.
*/
object RiotParserUtil:
def parse(
enableTermValidation: Boolean,
lang: Lang,
source: InputStream,
output: StreamRDF,
): Unit =
if enableTermValidation then
// Standard parser with validation enabled
RDFParser.source(source)
.lang(lang)
.parse(output)
else
// Fast parser with validation disabled
RDFParserRegistry
.getFactory(lang)
.create(lang, FastParserProfile())
.read(source, "", lang.getContentType, output, RIOT.getContext)
Loading