diff --git a/build.sbt b/build.sbt index 359d8d5..41a52b1 100644 --- a/build.sbt +++ b/build.sbt @@ -38,6 +38,7 @@ lazy val root = (project in file(".")) "eu.ostrzyciel.jelly" %% "jelly-jena" % jellyV, "com.github.alexarchambault" %% "case-app" % "2.1.0-M30", "org.scalatest" %% "scalatest" % "3.2.19" % Test, + "org.yaml" % "snakeyaml" % "2.4" % Test, ), scalacOptions ++= Seq( "-Wunused:imports", diff --git a/src/main/scala/eu/neverblink/jelly/cli/App.scala b/src/main/scala/eu/neverblink/jelly/cli/App.scala index 9b869f6..1585481 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/App.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/App.scala @@ -22,4 +22,5 @@ object App extends CommandsEntryPoint: Version, RdfFromJelly, RdfToJelly, + RdfInspect, ) diff --git a/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala b/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala index c741178..b7f6dd2 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala @@ -1,6 +1,5 @@ package eu.neverblink.jelly.cli -import com.google.protobuf.InvalidProtocolBufferException import org.apache.jena.riot.RiotException /** Contains a set of common jelly-cli exceptions with custom output messages. @@ -22,7 +21,7 @@ case class JellyTranscodingError(message: String) extends CriticalException(s"Jelly transcoding error: $message") case class JenaRiotException(e: RiotException) extends CriticalException(s"Jena RDF I/O exception: ${e.getMessage}") -case class InvalidJellyFile(e: InvalidProtocolBufferException) +case class InvalidJellyFile(e: Exception) extends CriticalException(s"Invalid Jelly file: ${e.getMessage}") case class InvalidFormatSpecified(format: String, validFormats: String) extends CriticalException( diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala index b111488..aa6db16 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala @@ -3,9 +3,9 @@ import caseapp.* import eu.neverblink.jelly.cli.* import eu.neverblink.jelly.cli.command.rdf.RdfFormat.* import eu.neverblink.jelly.cli.command.rdf.RdfFormat.Jena.* +import eu.neverblink.jelly.cli.util.JellyUtil import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame -import eu.ostrzyciel.jelly.core.IoUtils import org.apache.jena.riot.system.StreamRDFWriter import org.apache.jena.riot.{Lang, RDFParser} @@ -25,7 +25,7 @@ case class RdfFromJellyOptions( @ExtraName("out-format") outputFormat: Option[String] = None, ) extends HasJellyCommandOptions -object RdfFromJelly extends RdfCommand[RdfFromJellyOptions, RdfFormat.Writeable]: +object RdfFromJelly extends RdfTranscodeCommand[RdfFromJellyOptions, RdfFormat.Writeable]: override def names: List[List[String]] = List( List("rdf", "from-jelly"), @@ -83,30 +83,10 @@ object RdfFromJelly extends RdfCommand[RdfFromJellyOptions, RdfFormat.Writeable] outputStream.write(frame.getBytes) try { - iterateRdfStream(inputStream, outputStream).zipWithIndex.foreach { + JellyUtil.iterateRdfStream(inputStream).zipWithIndex.foreach { case (maybeFrame, frameIndex) => writeFrameToOutput(maybeFrame, frameIndex) } } finally { outputStream.flush() } - - /** This method reads the Jelly file and returns an iterator of RdfStreamFrame - * @param inputStream - * @param outputStream - * @return - */ - private def iterateRdfStream( - inputStream: InputStream, - outputStream: OutputStream, - ): Iterator[RdfStreamFrame] = - IoUtils.autodetectDelimiting(inputStream) match - case (false, newIn) => - // Non-delimited Jelly file - // In this case, we can only read one frame - Iterator(RdfStreamFrame.parseFrom(newIn)) - case (true, newIn) => - // Delimited Jelly file - // In this case, we can read multiple frames - Iterator.continually(RdfStreamFrame.parseDelimitedFrom(newIn)) - .takeWhile(_.isDefined).map(_.get) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspect.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspect.scala new file mode 100644 index 0000000..8065104 --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspect.scala @@ -0,0 +1,91 @@ +package eu.neverblink.jelly.cli.command.rdf + +import caseapp.{ExtraName, Recurse} +import caseapp.core.RemainingArgs +import eu.neverblink.jelly.cli.util.{FrameInfo, JellyUtil, MetricsPrinter} +import eu.neverblink.jelly.cli.* +import eu.ostrzyciel.jelly.core.proto.v1.* + +import java.io.InputStream + +case class RdfInspectOptions( + @Recurse + common: JellyCommandOptions = JellyCommandOptions(), + @ExtraName("to") outputFile: Option[String] = None, + @ExtraName("per-frame") perFrame: Boolean = false, +) extends HasJellyCommandOptions + +object RdfInspect extends JellyCommand[RdfInspectOptions]: + + override def names: List[List[String]] = List( + List("rdf", "inspect"), + ) + + override final def group = "rdf" + + override def doRun(options: RdfInspectOptions, remainingArgs: RemainingArgs): Unit = + val (inputStream, outputStream) = + this.getIoStreamsFromOptions(remainingArgs.remaining.headOption, options.outputFile) + val (streamOpts, frameIterator) = inspectJelly(inputStream) + if options.perFrame then MetricsPrinter.printPerFrame(streamOpts, frameIterator, outputStream) + else MetricsPrinter.printAggregate(streamOpts, frameIterator, outputStream) + + private def inspectJelly( + inputStream: InputStream, + ): (RdfStreamOptions, Iterator[FrameInfo]) = + + inline def computeMetrics( + frame: RdfStreamFrame, + frameIndex: Int, + ): FrameInfo = + val metrics = new FrameInfo(frameIndex) + frame.rows.foreach(r => metricsForRow(r, metrics)) + metrics + + try { + val allRows = JellyUtil.iterateRdfStream(inputStream).buffered + // we need to check if the first frame contains options + val streamOptions = checkOptions(allRows.headOption) + // We compute the metrics for each frame + // and then sum them all during the printing if desired + val frameIterator = allRows.zipWithIndex.map { case (maybeFrame, frameIndex) => + computeMetrics(maybeFrame, frameIndex) + } + (streamOptions, frameIterator) + } catch { + case e: Exception => + throw InvalidJellyFile(e) + } + + private def metricsForRow( + row: RdfStreamRow, + metadata: FrameInfo, + ): Unit = + row.row match { + case r: RdfTriple => metadata.tripleCount += 1 + case r: RdfQuad => metadata.quadCount += 1 + case r: RdfNameEntry => metadata.nameCount += 1 + case r: RdfPrefixEntry => metadata.prefixCount += 1 + case r: RdfNamespaceDeclaration => metadata.namespaceCount += 1 + case r: RdfDatatypeEntry => metadata.datatypeCount += 1 + case r: RdfGraphStart => metadata.graphStartCount += 1 + case r: RdfGraphEnd => metadata.graphEndCount += 1 + case r: RdfStreamOptions => metadata.optionCount += 1 + } + + /** Checks whether the first frame in the stream contains options and returns them. + * @param headFrame + * The first frame in the stream as an option. + * @return + * The options from the first frame. + * @throws RuntimeException + * If the first frame does not contain options or if there are no frames in the stream. + */ + private def checkOptions(headFrame: Option[RdfStreamFrame]): RdfStreamOptions = + if headFrame.isEmpty then throw new RuntimeException("No frames in the stream.") + if headFrame.get.rows.isEmpty then throw new RuntimeException("No rows in the frame.") + val frameRows = headFrame.get.rows + frameRows.head.row match { + case r: RdfStreamOptions => r + case _ => throw new RuntimeException("First row of the frame is not an options row.") + } diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index a50f468..276c13b 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -40,7 +40,7 @@ case class RdfToJellyOptions( delimited: Boolean = true, ) extends HasJellyCommandOptions -object RdfToJelly extends RdfCommand[RdfToJellyOptions, RdfFormat.Readable]: +object RdfToJelly extends RdfTranscodeCommand[RdfToJellyOptions, RdfFormat.Readable]: override def names: List[List[String]] = List( List("rdf", "to-jelly"), diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfCommand.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfTranscodeCommand.scala similarity index 95% rename from src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfCommand.scala rename to src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfTranscodeCommand.scala index 2696a0f..71a3c42 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfCommand.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfTranscodeCommand.scala @@ -12,8 +12,8 @@ import java.io.{InputStream, OutputStream} /** This abstract class is responsible for the common logic in both RDF parsing commands */ -abstract class RdfCommand[T <: HasJellyCommandOptions: {Parser, Help}, F <: RdfFormat](using - tt: TypeTest[RdfFormat, F], +abstract class RdfTranscodeCommand[T <: HasJellyCommandOptions: {Parser, Help}, F <: RdfFormat]( + using tt: TypeTest[RdfFormat, F], ) extends JellyCommand[T]: override final def group = "rdf" diff --git a/src/main/scala/eu/neverblink/jelly/cli/util/JellyUtil.scala b/src/main/scala/eu/neverblink/jelly/cli/util/JellyUtil.scala new file mode 100644 index 0000000..5924b84 --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/util/JellyUtil.scala @@ -0,0 +1,27 @@ +package eu.neverblink.jelly.cli.util + +import eu.ostrzyciel.jelly.core.IoUtils +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame + +import java.io.InputStream + +object JellyUtil: + /** This method reads the Jelly file and returns an iterator of RdfStreamFrame + * + * @param inputStream + * @param outputStream + * @return + */ + def iterateRdfStream( + inputStream: InputStream, + ): Iterator[RdfStreamFrame] = + IoUtils.autodetectDelimiting(inputStream) match + case (false, newIn) => + // Non-delimited Jelly file + // In this case, we can only read one frame + Iterator(RdfStreamFrame.parseFrom(newIn)) + case (true, newIn) => + // Delimited Jelly file + // In this case, we can read multiple frames + Iterator.continually(RdfStreamFrame.parseDelimitedFrom(newIn)) + .takeWhile(_.isDefined).map(_.get) diff --git a/src/main/scala/eu/neverblink/jelly/cli/util/MetricsPrinter.scala b/src/main/scala/eu/neverblink/jelly/cli/util/MetricsPrinter.scala new file mode 100644 index 0000000..82f0d9d --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/util/MetricsPrinter.scala @@ -0,0 +1,131 @@ +package eu.neverblink.jelly.cli.util + +import eu.neverblink.jelly.cli.util.YamlDocBuilder.* +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions + +import java.io.OutputStream + +/** This class is used to store the metrics for a single frame + */ +final class FrameInfo(val frameIndex: Int): + var frameCount: Int = 1 + var optionCount: Int = 0 + var nameCount: Int = 0 + var namespaceCount: Int = 0 + var tripleCount: Int = 0 + var quadCount: Int = 0 + var prefixCount: Int = 0 + var datatypeCount: Int = 0 + var graphStartCount: Int = 0 + var graphEndCount: Int = 0 + + def +=(other: FrameInfo): FrameInfo = { + this.frameCount += 1 + this.optionCount += other.optionCount + this.nameCount += other.nameCount + this.namespaceCount += other.namespaceCount + this.tripleCount += other.tripleCount + this.quadCount += other.quadCount + this.prefixCount += other.prefixCount + this.datatypeCount += other.datatypeCount + this.graphStartCount += other.graphStartCount + this.graphEndCount += other.graphEndCount + this + } + +end FrameInfo + +object MetricsPrinter: + + def printPerFrame( + options: RdfStreamOptions, + iterator: Iterator[FrameInfo], + o: OutputStream, + ): Unit = + printOptions(options, o) + val builder = + YamlDocBuilder.build( + YamlMap( + "frames" -> YamlBlank(), + ), + ) + val fullString = builder.getString + o.write(fullString.getBytes) + iterator.foreach { frame => + val yamlFrame = YamlListElem(formatStatsIndex(frame)) + val fullString = YamlDocBuilder.build(yamlFrame, builder.currIndent).getString + o.write(fullString.getBytes) + o.write(System.lineSeparator().getBytes) + } + + def printAggregate( + options: RdfStreamOptions, + iterator: Iterator[FrameInfo], + o: OutputStream, + ): Unit = { + printOptions(options, o) + val sumCounts = iterator.reduce((a, b) => a += b) + val fullString = + YamlDocBuilder.build( + YamlMap( + "frames" -> formatStatsCount(sumCounts), + ), + ).getString + o.write(fullString.getBytes) + } + + private def printOptions( + printOptions: RdfStreamOptions, + o: OutputStream, + ): Unit = + val options = formatOptions(options = printOptions) + val fullString = + YamlDocBuilder.build( + YamlMap( + "stream_options" -> options, + ), + ).getString + o.write(fullString.getBytes) + o.write(System.lineSeparator().getBytes) + + private def formatOptions( + options: RdfStreamOptions, + ): YamlMap = + YamlMap( + "stream_name" -> YamlString(options.streamName), + "physical_type" -> YamlEnum(options.physicalType.toString, options.physicalType.value), + "generalized_statements" -> YamlBool(options.generalizedStatements), + "rdf_star" -> YamlBool(options.rdfStar), + "max_name_table_size" -> YamlInt(options.maxNameTableSize), + "max_prefix_table_size" -> YamlInt(options.maxPrefixTableSize), + "max_datatype_table_size" -> YamlInt(options.maxDatatypeTableSize), + "logical_type" -> YamlEnum(options.logicalType.toString, options.logicalType.value), + "version" -> YamlInt(options.version), + ) + + private def formatStatsIndex( + frame: FrameInfo, + ): YamlMap = + YamlMap(Seq(("frame_index", YamlInt(frame.frameIndex))) ++ formatStats(frame)*) + + private def formatStatsCount( + frame: FrameInfo, + ): YamlMap = + YamlMap(Seq(("frame_count", YamlInt(frame.frameCount))) ++ formatStats(frame)*) + + private def formatStats( + frame: FrameInfo, + ): Seq[(String, YamlValue)] = + Seq( + ("option_count", YamlInt(frame.optionCount)), + ("triple_count", YamlInt(frame.tripleCount)), + ("quad_count", YamlInt(frame.quadCount)), + ("graph_start_count", YamlInt(frame.graphStartCount)), + ("graph_end_count", YamlInt(frame.graphEndCount)), + ("namespace_count", YamlInt(frame.namespaceCount)), + ("name_count", YamlInt(frame.nameCount)), + ("prefix_count", YamlInt(frame.prefixCount)), + ("datatype_count", YamlInt(frame.datatypeCount)), + ) + +end MetricsPrinter diff --git a/src/main/scala/eu/neverblink/jelly/cli/util/YamlDocBuilder.scala b/src/main/scala/eu/neverblink/jelly/cli/util/YamlDocBuilder.scala new file mode 100644 index 0000000..0f7f657 --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/util/YamlDocBuilder.scala @@ -0,0 +1,84 @@ +package eu.neverblink.jelly.cli.util + +import scala.collection.mutable + +class YamlDocBuilder(var currIndent: Int = 0): + import YamlDocBuilder.* + private val sb = new StringBuilder + + def getString: String = sb.toString + + private def build(root: YamlValue, indent: Int = currIndent): Unit = + if indent > currIndent then currIndent = indent + root match + case YamlString(v) => + sb.append(quoteAndEscape(v)) + case YamlInt(v) => + sb.append(v) + case YamlBool(v) => + sb.append(v.toString) + case YamlEnum(v, i) => + sb.append(f"${v} (${i})") + case YamlList(v) => + v.zipWithIndex.foreach { (e, index) => + this.build(e, indent) + if e != v.last then sb.append(System.lineSeparator()) + } + case YamlListElem(v) => + sb.append(System.lineSeparator()) + sb.append(" " * indent).append("- ") + this.build(v, indent + 1) + case YamlMap(v) => + v.zipWithIndex.foreach { case ((k, e), ix) => + if ix != 0 then sb.append(" " * indent) + sb.append(k) + sb.append(": ") + if e.isInstanceOf[YamlMap] then + // If a map nested inside a map we have to indent it properly + sb.append(System.lineSeparator()) + sb.append(" " * (indent + 1)) + this.build(e, indent + 1) + sb.append(System.lineSeparator()) + else this.build(e, indent + 1) + if ix != v.size - 1 then sb.append(System.lineSeparator()) + } + case YamlBlank() => () + +object YamlDocBuilder: + /** A lightweight YAML document builder based on + * https://github.com/RiverBench/ci-worker/blob/2d57a085f65a6eabbfe76f2de6794f025b211f4e/src/main/scala/util/YamlDocBuilder.scala#L4 + */ + + sealed trait YamlValue + sealed trait YamlScalar extends YamlValue + case class YamlBlank() extends YamlScalar + case class YamlEnum(v: String, i: Int) extends YamlScalar + case class YamlInt(v: Int) extends YamlScalar + case class YamlBool(v: Boolean) extends YamlScalar + case class YamlString(v: String) extends YamlScalar + + case class YamlList(v: Seq[YamlListElem]) extends YamlValue + case class YamlListElem(v: YamlValue) extends YamlValue + + object YamlMap: + def apply(v: (String, YamlValue)*): YamlMap = YamlMap(v.to(mutable.LinkedHashMap)) + def apply(k: String, v: String): YamlMap = YamlMap(mutable.LinkedHashMap(k -> YamlString(v))) + def apply(k: String, v: Int): YamlMap = YamlMap(mutable.LinkedHashMap(k -> YamlInt(v))) + def apply(k: String, v: YamlValue): YamlMap = YamlMap(mutable.LinkedHashMap(k -> v)) + + case class YamlMap(v: mutable.LinkedHashMap[String, YamlValue]) extends YamlValue + + def build(root: YamlValue, indent: Int = 0): YamlDocBuilder = + val builder = YamlDocBuilder(currIndent = indent) + builder.build(root) + builder + + private def quoteAndEscape(s: String): String = + "\"" + escape(s) + "\"" + + private def escape(s: String): String = + s.replace("\\", "\\\\") + .replace("\"", "\\\"") + .replace("\n", "\\n") + .replace("\r", "\\r") + .replace("\t", "\\t") diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala b/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala index f08de69..1570391 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala @@ -1,7 +1,7 @@ package eu.neverblink.jelly.cli.command.helpers -import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage -import org.apache.jena.riot.{Lang, RDFDataMgr, RDFLanguages} +import eu.ostrzyciel.jelly.convert.jena.riot.{JellyFormatVariant, JellyLanguage} +import org.apache.jena.riot.{Lang, RDFDataMgr, RDFFormat, RDFLanguages} import org.apache.jena.sys.JenaSystem import org.scalatest.BeforeAndAfterAll import org.scalatest.wordspec.AnyWordSpec @@ -67,11 +67,15 @@ trait TestFixtureHelper extends BeforeAndAfterAll: testCode(tempFile.toString) } finally { tempFile.toFile.delete() } - def withFullJellyFile(testCode: (String) => Any): Unit = + def withFullJellyFile(testCode: (String) => Any, frameSize: Int = 256): Unit = val extension = getFileExtension(JellyLanguage.JELLY) val tempFile = Files.createTempFile(tmpDir, randomUUID.toString, f".${extension}") + val customFormat = new RDFFormat( + JellyLanguage.JELLY, + JellyFormatVariant(frameSize = frameSize), + ) val model = DataGenHelper.generateTripleModel(testCardinality) - RDFDataMgr.write(new FileOutputStream(tempFile.toFile), model, JellyLanguage.JELLY) + RDFDataMgr.write(new FileOutputStream(tempFile.toFile), model, customFormat) try { testCode(tempFile.toString) } finally { tempFile.toFile.delete() } diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspectSpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspectSpec.scala new file mode 100644 index 0000000..f2ab433 --- /dev/null +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspectSpec.scala @@ -0,0 +1,75 @@ +package eu.neverblink.jelly.cli.command.rdf + +import eu.neverblink.jelly.cli.{ExitException, InvalidJellyFile} +import eu.neverblink.jelly.cli.command.helpers.TestFixtureHelper + +import scala.jdk.CollectionConverters.* +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import org.yaml.snakeyaml.Yaml + +import java.util + +class RdfInspectSpec extends AnyWordSpec with Matchers with TestFixtureHelper: + protected val testCardinality: Int = 33 + + "rdf inspect command" should { + "be able to return aggregate of all frames as a valid Yaml" in withFullJellyFile { j => + val (out, err) = RdfInspect.runTestCommand(List("rdf", "inspect", j)) + val yaml = new Yaml() + val parsed = yaml.load(out).asInstanceOf[java.util.Map[String, Any]] + parsed.get("stream_options") should not be None + val options = parsed.get("stream_options").asInstanceOf[java.util.Map[String, Any]] + options.get("max_name_table_size") should be(128) + parsed.get("frames") shouldBe a[util.LinkedHashMap[?, ?]] + val frames = parsed.get("frames").asInstanceOf[java.util.LinkedHashMap[String, Any]] + frames.get("triple_count") should be(testCardinality) + } + "be able to return all frames separately as a valid Yaml" in withFullJellyFile( + testCode = { j => + val (out, err) = RdfInspect.runTestCommand(List("rdf", "inspect", "--per-frame", j)) + val yaml = new Yaml() + val parsed = yaml.load(out).asInstanceOf[java.util.Map[String, Any]] + parsed.get("stream_options") should not be None + parsed.get("frames") shouldBe a[util.ArrayList[Map[String, Int]]] + val frames = + parsed.get("frames").asInstanceOf[util.ArrayList[util.HashMap[String, Int]]].asScala + frames.length should be <= 5 + frames.map(_.get("triple_count")).sum should be(testCardinality) + }, + frameSize = 15, + ) + "handle properly separate frame metrics for a singular frame" in withFullJellyFile { j => + val (out, err) = RdfInspect.runTestCommand(List("rdf", "inspect", "--per-frame", j)) + val yaml = new Yaml() + val parsed = yaml.load(out).asInstanceOf[java.util.Map[String, Any]] + parsed.get("stream_options") should not be None + parsed.get("frames") shouldBe a[util.ArrayList[?]] + val frames = + parsed.get("frames").asInstanceOf[util.ArrayList[util.HashMap[String, Int]]].asScala + frames.length should be(1) + frames.map(_.get("triple_count")).sum should be(testCardinality) + } + "handle properly frame count when aggregating multiple frames" in withFullJellyFile( + testCode = { j => + val (out, err) = RdfInspect.runTestCommand(List("rdf", "inspect", j)) + val yaml = new Yaml() + val parsed = yaml.load(out).asInstanceOf[java.util.Map[String, Any]] + parsed.get("stream_options") should not be None + val options = parsed.get("stream_options").asInstanceOf[java.util.Map[String, Any]] + options.get("max_name_table_size") should be(128) + parsed.get("frames") shouldBe a[util.LinkedHashMap[?, ?]] + val frames = parsed.get("frames").asInstanceOf[java.util.LinkedHashMap[String, Any]] + frames.get("triple_count") should be(testCardinality) + frames.get("frame_count") should be(5) + }, + frameSize = 15, + ) + "throw an error if the input file is not a valid Jelly file" in withEmptyJellyFile { j => + val exception = intercept[ExitException] { + RdfInspect.runTestCommand(List("rdf", "inspect", j, "--debug")) + } + val msg = InvalidJellyFile(RuntimeException("")).getMessage + exception.getMessage should include(msg) + } + }