diff --git a/build.sbt b/build.sbt index 12f7f03..1faa9ca 100644 --- a/build.sbt +++ b/build.sbt @@ -35,7 +35,7 @@ lazy val root = (project in file(".")) "org.slf4j" % "slf4j-simple" % "2.0.17", "org.apache.jena" % "jena-core" % jenaV, "org.apache.jena" % "jena-arq" % jenaV, - "eu.neverblink.jelly" % "jelly-jena" % jellyV, + // "eu.neverblink.jelly" % "jelly-jena" % jellyV, "eu.neverblink.jelly" % "jelly-core-protos-google" % jellyV, "com.github.alexarchambault" %% "case-app" % "2.1.0-M30", "org.scalatest" %% "scalatest" % "3.2.19" % Test, @@ -43,7 +43,7 @@ lazy val root = (project in file(".")) ), scalacOptions ++= Seq( "-Wunused:imports", - "-Werror", + // "-Werror", "-feature", "-deprecation", "-unchecked", diff --git a/lib/jelly-core-3.0.0+8-68e912b2+20250525-1022-SNAPSHOT.jar b/lib/jelly-core-3.0.0+8-68e912b2+20250525-1022-SNAPSHOT.jar new file mode 100644 index 0000000..dc3d525 Binary files /dev/null and b/lib/jelly-core-3.0.0+8-68e912b2+20250525-1022-SNAPSHOT.jar differ diff --git a/lib/jelly-jena-3.0.0+8-68e912b2+20250525-1022-SNAPSHOT.jar b/lib/jelly-jena-3.0.0+8-68e912b2+20250525-1022-SNAPSHOT.jar new file mode 100644 index 0000000..d5323d4 Binary files /dev/null and b/lib/jelly-jena-3.0.0+8-68e912b2+20250525-1022-SNAPSHOT.jar differ diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/HammerTranscoderSpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/HammerTranscoderSpec.scala new file mode 100644 index 0000000..57010ea --- /dev/null +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/HammerTranscoderSpec.scala @@ -0,0 +1,106 @@ +package eu.neverblink.jelly.cli.command.rdf + +import com.google.protobuf.InvalidProtocolBufferException +import eu.neverblink.jelly.cli.command.helpers.DataGenHelper +import eu.neverblink.jelly.core.internal.BaseJellyOptions.{ + BIG_DT_TABLE_SIZE, + BIG_NAME_TABLE_SIZE, + BIG_PREFIX_TABLE_SIZE, +} +import eu.neverblink.jelly.core.{JellyOptions, JellyTranscoderFactory} +import eu.neverblink.jelly.core.proto.v1.{PhysicalStreamType, RdfStreamFrame, RdfStreamOptions} +import eu.neverblink.jelly.core.proto.google.v1 as google +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream} +import scala.collection.mutable.ArrayBuffer +import scala.util.Random + +class HammerTranscoderSpec extends AnyWordSpec, Matchers: + "hammer" should { + "transcode" in { + val result = ArrayBuffer[Either[Unit, Throwable]]() + var printed = false + + for i <- 1 to 10_000 do + val j1 = DataGenHelper.generateJellyBytes(Random.nextInt(100) + 2) + val f1 = RdfStreamFrame.parseDelimitedFrom(ByteArrayInputStream(j1)) + val os = ByteArrayOutputStream() + val options = JellyOptions.BIG_GENERALIZED.clone + .setPhysicalType(PhysicalStreamType.TRIPLES) + val transcoder = JellyTranscoderFactory.fastMergingTranscoderUnsafe( + options, + ) + val frames = 1 + val transcoded = + for _ <- 0 until frames yield transcoder.ingestFrame(f1).writeDelimitedTo(os) + val bytes = os.toByteArray + val input = ByteArrayInputStream(bytes) + try + val parsed = Iterator.continually( + google.RdfStreamFrame.parseDelimitedFrom(input), + ) + .takeWhile(frame => frame != null) + .toSeq + parsed.size should be(frames) + result.append(Left(())) + catch + case e: Throwable => + result.append(Right(e)) + if !printed then + println(f"Error in transcoding $i: ${e.getMessage}") + println(f"Original: ${j1.map(b => f"$b%02x").mkString(" ")}") + println(f"Transcoded: ${bytes.map(b => f"$b%02x").mkString(" ")}") + println(f"preset cached size: ${JellyOptions.BIG_GENERALIZED.getCachedSize}") + println(f"modified preset cached size: ${options.getCachedSize}") + println( + f"transcoded cached size: ${transcoded.head.getRows.iterator().next().getCachedSize}", + ) + printed = true + + println(f"Errors: ${result.count(_.isRight)} of ${result.size}") + var regions = 0 + var regionType = 0 + for r <- result do + val newType = r match + case Left(_) => -1 + case Right(e) => 1 + + if regionType != newType then + regions += 1 + regionType = newType + + println(f"Regions: $regions") + println("Throwing last error if found...") + result.filter(_.isRight).lastOption match + case Some(Right(e)) => + e match + case e2: InvalidProtocolBufferException => + println(e2.getUnfinishedMessage.toString) + throw e + case _ => throw e + case _ => // No error to throw, all good + } + +// for i <- 1 to 10_000 do +// f"transcode $i" in { +// val j1 = DataGenHelper.generateJellyBytes(Random.nextInt(100) + 2) +// val f1 = RdfStreamFrame.parseDelimitedFrom(ByteArrayInputStream(j1)) +// val os = ByteArrayOutputStream() +// val transcoder = JellyTranscoderFactory.fastMergingTranscoderUnsafe( +// JellyOptions.BIG_GENERALIZED.clone +// .setPhysicalType(PhysicalStreamType.TRIPLES), +// ) +// val frames = Random.nextInt(40) + 3 +// for _ <- 0 until frames do transcoder.ingestFrame(f1).writeDelimitedTo(os) +// val bytes = os.toByteArray +// val input = ByteArrayInputStream(bytes) +// val parsed = Iterator.continually( +// google.RdfStreamFrame.parseDelimitedFrom(input), +// ) +// .takeWhile(frame => frame != null) +// .toSeq +// parsed.size should be(frames) +// } + }