Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package eu.neverblink.jelly.cli.command.rdf

import com.google.protobuf.InvalidProtocolBufferException
import org.apache.jena.riot.RiotException
import eu.neverblink.jelly.cli.*
import caseapp.*
import eu.ostrzyciel.jelly.core.{RdfProtoSerializationError, RdfProtoDeserializationError}

import java.io.{InputStream, OutputStream}

/** This abstract class is responsible for the common logic in both RDF parsing commands
*/
abstract class RdfCommand[T <: HasJellyOptions: {Parser, Help}] extends JellyCommand[T]:

override def group = "rdf"

/** What is the default action if no formats specified */
def defaultAction: (InputStream, OutputStream) => Unit

/** The print util responsible for handling the specific formats etc the command requires */
lazy val printUtil: RdfCommandPrintUtil

/** The method responsible for matching the format to a given action */
def matchToAction(option: RdfFormatOption): Option[(InputStream, OutputStream) => Unit]

/** This method takes care of proper error handling and takes care of the parameter priorities in
* matching the input to a given format conversion
*
* @param inputStream
* InputStream
* @param outputStream
* OutputStream
* @param format
* Option[String]
* @param fileName
* Option[String]
* @throws JellyDeserializationError
* @throws JenaRiotException
* @throws InvalidJellyFile
*/
def parseFormatArgs(
inputStream: InputStream,
outputStream: OutputStream,
format: Option[String],
fileName: Option[String],
): Unit =
try {
val explicitFormat = if (format.isDefined) RdfFormatOption.find(format.get) else None
val implicitFormat =
if (fileName.isDefined) RdfFormatOption.inferFormat(fileName.get) else None
(explicitFormat, implicitFormat) match {
case (Some(f: RdfFormatOption), _) if matchToAction(f).isDefined =>
matchToAction(f).get(inputStream, outputStream)
// If format explicitely defined but does not match any available actions or formats, we throw an error
case (_, _) if format.isDefined =>
throw InvalidFormatSpecified(format.get, printUtil.validFormatsString)
case (_, Some(f: RdfFormatOption)) if matchToAction(f).isDefined =>
matchToAction(f).get(inputStream, outputStream)
// If format not explicitely defined but implicitely not understandable we default to this
case (_, _) => defaultAction(inputStream, outputStream)
}
} catch
case e: RiotException =>
throw JenaRiotException(e)
case e: InvalidProtocolBufferException =>
throw InvalidJellyFile(e)
case e: RdfProtoDeserializationError =>
throw JellyDeserializationError(e.getMessage)
case e: RdfProtoSerializationError =>
throw JellySerializationError(e.getMessage)
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
package eu.neverblink.jelly.cli.command.rdf

enum RdfFormatOption(val cliOptions: List[String], val fullName: String):
case NQuads extends RdfFormatOption(List("nq", "nt", "nquads", "ntriples"), "N-Quads")
case JellyBinary extends RdfFormatOption(List("jelly"), "Jelly binary format")
import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage
import org.apache.jena.riot.RDFLanguages

enum RdfFormatOption(
val cliOptions: List[String],
val fullName: String,
):
case NQuads
extends RdfFormatOption(
List("nq", "nt", "nquads", "ntriples"),
"N-Quads",
)
case JellyBinary
extends RdfFormatOption(
List("jelly"),
"Jelly binary format",
)
case JellyText extends RdfFormatOption(List("jelly-text"), "Jelly text format")

object RdfFormatOption:
Expand All @@ -15,3 +29,16 @@ object RdfFormatOption:
*/
def find(cliOption: String): Option[RdfFormatOption] =
RdfFormatOption.values.find(_.cliOptions.contains(cliOption))

/** Infers the format based on the file name.
*/
def inferFormat(fileName: String): Option[RdfFormatOption] = {
RDFLanguages.guessContentType(fileName) match {
case contentType if contentType == RDFLanguages.NQUADS.getContentType =>
Some(RdfFormatOption.NQuads)
case contentType if contentType == JellyLanguage.JELLY.getContentType =>
Some(RdfFormatOption.JellyBinary)
case _ if fileName.endsWith(".jelly.txt") => Some(RdfFormatOption.JellyText)
case _ => None
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
package eu.neverblink.jelly.cli.command.rdf
import caseapp.*
import com.google.protobuf.InvalidProtocolBufferException
import eu.neverblink.jelly.cli.*
import eu.neverblink.jelly.cli.command.rdf.RdfFormatOption.*
import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage
import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame
import eu.ostrzyciel.jelly.core.{IoUtils, RdfProtoDeserializationError}
import eu.ostrzyciel.jelly.core.IoUtils
import org.apache.jena.riot.system.StreamRDFWriter
import org.apache.jena.riot.{RDFLanguages, RDFParser, RiotException}
import org.apache.jena.riot.{RDFLanguages, RDFParser}

import java.io.{InputStream, OutputStream}

Expand All @@ -26,55 +25,28 @@ case class RdfFromJellyOptions(
@ExtraName("out-format") outputFormat: Option[String] = None,
) extends HasJellyOptions

object RdfFromJelly extends JellyCommand[RdfFromJellyOptions]:
override def group = "rdf"
object RdfFromJelly extends RdfCommand[RdfFromJellyOptions]:

override def names: List[List[String]] = List(
List("rdf", "from-jelly"),
)

lazy val printUtil: RdfCommandPrintUtil = RdfFromJellyPrint

def defaultAction: (InputStream, OutputStream) => Unit = jellyToNQuad

override def doRun(options: RdfFromJellyOptions, remainingArgs: RemainingArgs): Unit =
val (inputStream, outputStream) =
this.getIoStreamsFromOptions(remainingArgs.remaining.headOption, options.outputFile)
doConversion(inputStream, outputStream, options.outputFormat)
parseFormatArgs(inputStream, outputStream, options.outputFormat, options.outputFile)

/** This method takes care of proper error handling and matches the desired output format to the
* correct conversion
*
* @param inputStream
* InputStream
* @param outputStream
* OutputStream
* @throws JellyDeserializationError
* @throws JenaRiotException
* @throws InvalidJellyFile
*/
private def doConversion(
inputStream: InputStream,
outputStream: OutputStream,
format: Option[String],
): Unit =
try {
format match {
case Some(f: String) =>
RdfFormatOption.find(f) match
case Some(JellyText) => jellyBinaryToText(inputStream, outputStream)
case Some(NQuads) => jellyToNQuad(inputStream, outputStream)
case _ =>
throw InvalidFormatSpecified(
f,
RdfFromJellyPrint.validFormatsString,
) // if anything else, it's an invalid option
case None =>
jellyToNQuad(inputStream, outputStream) // default option if no parameter supplied
}
} catch
case e: RdfProtoDeserializationError =>
throw JellyDeserializationError(e.getMessage)
case e: RiotException =>
throw JenaRiotException(e)
case e: InvalidProtocolBufferException =>
throw InvalidJellyFile(e)
override def matchToAction(
option: RdfFormatOption,
): Option[(InputStream, OutputStream) => Unit] =
option match
case JellyText => Some(jellyBinaryToText)
case NQuads => Some(jellyToNQuad)
case _ => None

/** This method reads the Jelly file, rewrites it to NQuads and writes it to some output stream
* @param inputStream
Expand Down
60 changes: 18 additions & 42 deletions src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
package eu.neverblink.jelly.cli.command.rdf
import caseapp.*
import com.google.protobuf.InvalidProtocolBufferException
import eu.neverblink.jelly.cli.*
import eu.neverblink.jelly.cli.command.rdf.RdfFormatOption.*
import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage
import eu.ostrzyciel.jelly.core.RdfProtoSerializationError
import org.apache.jena.riot.system.StreamRDFWriter
import org.apache.jena.riot.{RDFLanguages, RDFParser, RiotException}
import org.apache.jena.riot.{RDFLanguages, RDFParser}

import java.io.{InputStream, OutputStream}

Expand All @@ -25,54 +23,32 @@ case class RdfToJellyOptions(
@ExtraName("in-format") inputFormat: Option[String] = None,
) extends HasJellyOptions

object RdfToJelly extends JellyCommand[RdfToJellyOptions]:
override def group = "rdf"
object RdfToJelly extends RdfCommand[RdfToJellyOptions]:

override def names: List[List[String]] = List(
List("rdf", "to-jelly"),
)

lazy val printUtil: RdfCommandPrintUtil = RdfToJellyPrint

def defaultAction: (InputStream, OutputStream) => Unit = nQuadToJelly

override def doRun(options: RdfToJellyOptions, remainingArgs: RemainingArgs): Unit =
val (inputStream, outputStream) =
getIoStreamsFromOptions(remainingArgs.remaining.headOption, options.outputFile)
doConversion(inputStream, outputStream, options.inputFormat)
parseFormatArgs(
inputStream,
outputStream,
options.inputFormat,
remainingArgs.remaining.headOption,
)

/** This method takes care of proper error handling and matches the desired output format to the
* correct conversion
*
* @param inputStream
* InputStream
* @param outputStream
* OutputStream
* @throws JellySerializationError
* @throws JenaRiotException
* @throws InvalidJellyFile
*/
private def doConversion(
inputStream: InputStream,
outputStream: OutputStream,
format: Option[String],
): Unit =
try {
format match {
case Some(f: String) =>
RdfFormatOption.find(f) match
case Some(NQuads) => nQuadToJelly(inputStream, outputStream)
case _ =>
throw InvalidFormatSpecified(
f,
RdfToJellyPrint.validFormatsString,
) // if anything else, it's an invalid option
case None =>
nQuadToJelly(inputStream, outputStream) // default option if no parameter supplied
}
} catch
case e: RdfProtoSerializationError =>
throw JellySerializationError(e.getMessage)
case e: RiotException =>
throw JenaRiotException(e)
case e: InvalidProtocolBufferException =>
throw InvalidJellyFile(e)
override def matchToAction(
option: RdfFormatOption,
): Option[(InputStream, OutputStream) => Unit] =
option match
case NQuads => Some(nQuadToJelly)
case _ => None

/** This method reads the NQuad file, rewrites it to Jelly and writes it to some output stream
* @param inputStream
Expand Down
3 changes: 2 additions & 1 deletion src/main/scala/eu/neverblink/jelly/cli/util/IoUtil.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,6 @@ object IoUtil:
val file = File(fileName)
val suppFile = file.getParentFile
val parentFile = if (suppFile != null) suppFile else File(".")
if !parentFile.canWrite || !file.canWrite then throw OutputFileCannotBeCreated(fileName)
if !parentFile.canWrite || (file.exists() && !file.canWrite) then
throw OutputFileCannotBeCreated(fileName)
FileOutputStream(file, true)
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,21 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
out.length should be(0)
}
}
"a file to file when defaulting to nQuads" in withFullJellyFile { j =>
withEmptyRandomFile { q =>
val nQuadString = DataGenHelper.generateNQuadString(testCardinality)
val (out, err) =
RdfFromJelly.runTestCommand(
List("rdf", "from-jelly", j, "--to", q),
)
val sortedOut = Using.resource(Source.fromFile(q)) { content =>
content.getLines().toList.map(_.trim).sorted
}
val sortedQuads = nQuadString.split("\n").map(_.trim).sorted
sortedOut should contain theSameElementsAs sortedQuads
out.length should be(0)
}
}
"an input stream to file" in withEmptyQuadFile { q =>
val input = DataGenHelper.generateJellyInputStream(testCardinality)
RdfFromJelly.setStdIn(input)
Expand Down Expand Up @@ -97,6 +112,42 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
"rows".r.findAllIn(out).length should be(70)
"http://example.org/predicate/".r.findAllIn(out).length should be(1)
}
"a file to file when inferred type" in withFullJellyFile { j =>
withEmptyJellyTextFile { t =>
val (out, err) =
RdfFromJelly.runTestCommand(
List(
"rdf",
"from-jelly",
j,
"--to",
t,
),
)
val inTxt = Using.resource(Source.fromFile(t)) { content =>
content.getLines().mkString("\n")
}
val outString =
"""# Frame 0
|rows {
| options {
| stream_name: ""
| physical_type: PHYSICAL_STREAM_TYPE_TRIPLES
| generalized_statements: true
| rdf_star: true
| max_name_table_size: 128
| max_prefix_table_size: 16
| max_datatype_table_size: 16
| logical_type: LOGICAL_STREAM_TYPE_FLAT_TRIPLES
| version: 1
| }
|}""".stripMargin
inTxt should include(outString)
"rows".r.findAllIn(inTxt).length should be(70)
"http://example.org/predicate/".r.findAllIn(inTxt).length should be(1)
}

}
}
"throw proper exception" when {
"input file is not found" in {
Expand Down Expand Up @@ -191,5 +242,29 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
exception.code should be(1)
}
}
"invalid but known output format supplied" in withFullJellyFile { j =>
withEmptyJellyFile { q =>
val exception =
intercept[ExitException] {
RdfFromJelly.runTestCommand(
List(
"rdf",
"from-jelly",
j,
"--to",
q,
"--out-format",
RdfFormatOption.JellyBinary.cliOptions.head,
),
)
}
val msg = InvalidFormatSpecified(
RdfFormatOption.JellyBinary.cliOptions.head,
RdfFromJellyPrint.validFormatsString,
)
RdfFromJelly.getErrString should include(msg.getMessage)
exception.code should be(1)
}
}
}
}
Loading