Skip to content

Commit 47eb6bc

Browse files
Initial implementation with context parameters
1 parent f819c31 commit 47eb6bc

File tree

3 files changed

+232
-69
lines changed

3 files changed

+232
-69
lines changed

src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspect.scala

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import scala.jdk.CollectionConverters.*
1111
import java.io.InputStream
1212
@HelpMessage(
1313
"Prints statistics about a Jelly-RDF stream.\n" +
14-
"Statistics include: Jelly stream options and counts of various row types, " +
14+
"Statistics include: Jelly stream options and counts/sizes of various row types, " +
1515
"including triples, quads, names, prefixes, " +
1616
"namespaces, datatypes, and graphs.\n" +
1717
"Output statistics are returned as a valid YAML. \n" +
@@ -40,6 +40,10 @@ case class RdfInspectOptions(
4040
"term position ('term'), or doesn't aggregate ('all').",
4141
)
4242
detail: Option[String] = None,
43+
@HelpMessage(
44+
"Report the size (in bytes) of rows and other elements, rather than their counts.",
45+
)
46+
size: Boolean = false,
4347
) extends HasJellyCommandOptions
4448

4549
object RdfInspect extends JellyCommand[RdfInspectOptions]:
@@ -61,6 +65,8 @@ object RdfInspect extends JellyCommand[RdfInspectOptions]:
6165
throw InvalidArgument("--detail", value, Some("Must be one of 'all', 'node', 'term'"))
6266
case None => MetricsPrinter.allFormatter
6367
}
68+
val statCollector = if options.size then FrameInfo.SizeStatistic else FrameInfo.CountStatistic
69+
given FrameInfo.StatisticCollector = statCollector
6470
val (streamOpts, frameIterator) = inspectJelly(inputStream, options.detail.isDefined)
6571
val metricsPrinter = new MetricsPrinter(formatter)
6672
if options.perFrame then metricsPrinter.printPerFrame(streamOpts, frameIterator, outputStream)
@@ -69,7 +75,7 @@ object RdfInspect extends JellyCommand[RdfInspectOptions]:
6975
private def inspectJelly(
7076
inputStream: InputStream,
7177
detail: Boolean,
72-
): (RdfStreamOptions, Iterator[FrameInfo]) =
78+
)(using FrameInfo.StatisticCollector): (RdfStreamOptions, Iterator[FrameInfo]) =
7379

7480
inline def computeMetrics(
7581
frame: RdfStreamFrame,

src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/MetricsPrinter.scala

Lines changed: 96 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,34 @@ import com.google.protobuf.ByteString
44
import eu.neverblink.jelly.cli.util.io.YamlDocBuilder
55
import eu.neverblink.jelly.cli.util.io.YamlDocBuilder.*
66
import eu.neverblink.jelly.core.proto.v1.*
7+
import eu.neverblink.protoc.java.runtime.ProtoMessage
78

89
import java.io.OutputStream
910
import scala.language.postfixOps
1011

12+
object FrameInfo:
13+
trait StatisticCollector:
14+
def measure(r: ProtoMessage[?]): Long
15+
def measure(r: String): Long // Needed as bnodes are plain strings
16+
def name(): String
17+
18+
case object CountStatistic extends StatisticCollector:
19+
override def measure(r: ProtoMessage[?]): Long = 1
20+
override def measure(r: String): Long = 1
21+
override def name(): String = "count"
22+
23+
case object SizeStatistic extends StatisticCollector:
24+
override def measure(r: ProtoMessage[?]): Long = r.getSerializedSize
25+
override def measure(r: String): Long = r.getBytes.length + 1 // Encoded string size + tag
26+
override def name(): String = "size"
27+
1128
/** This class is used to store the metrics for a single frame
1229
*/
13-
class FrameInfo(val frameIndex: Long, val metadata: Map[String, ByteString]):
30+
class FrameInfo(val frameIndex: Long, val metadata: Map[String, ByteString])(using
31+
statCollector: FrameInfo.StatisticCollector,
32+
):
1433
var frameCount: Long = 1
15-
private object count:
34+
private object stat:
1635
var option: Long = 0
1736
var name: Long = 0
1837
var namespace: Long = 0
@@ -25,15 +44,15 @@ class FrameInfo(val frameIndex: Long, val metadata: Map[String, ByteString]):
2544

2645
def +=(other: FrameInfo): FrameInfo = {
2746
this.frameCount += 1
28-
this.count.option += other.count.option
29-
this.count.name += other.count.name
30-
this.count.namespace += other.count.namespace
31-
this.count.triple += other.count.triple
32-
this.count.quad += other.count.quad
33-
this.count.prefix += other.count.prefix
34-
this.count.datatype += other.count.datatype
35-
this.count.graphStart += other.count.graphStart
36-
this.count.graphEnd += other.count.graphEnd
47+
this.stat.option += other.stat.option
48+
this.stat.name += other.stat.name
49+
this.stat.namespace += other.stat.namespace
50+
this.stat.triple += other.stat.triple
51+
this.stat.quad += other.stat.quad
52+
this.stat.prefix += other.stat.prefix
53+
this.stat.datatype += other.stat.datatype
54+
this.stat.graphStart += other.stat.graphStart
55+
this.stat.graphEnd += other.stat.graphEnd
3756
this
3857
}
3958

@@ -49,77 +68,87 @@ class FrameInfo(val frameIndex: Long, val metadata: Map[String, ByteString]):
4968
case r: RdfStreamOptions => handleOption(r)
5069
}
5170

52-
protected def handleTriple(r: RdfTriple): Unit = count.triple += 1
53-
protected def handleQuad(r: RdfQuad): Unit = count.quad += 1
54-
protected def handleNameEntry(r: RdfNameEntry): Unit = count.name += 1
55-
protected def handlePrefixEntry(r: RdfPrefixEntry): Unit = count.prefix += 1
56-
protected def handleNamespaceDeclaration(r: RdfNamespaceDeclaration): Unit = count.namespace += 1
57-
protected def handleDatatypeEntry(r: RdfDatatypeEntry): Unit = count.datatype += 1
58-
protected def handleGraphStart(r: RdfGraphStart): Unit = count.graphStart += 1
59-
protected def handleGraphEnd(r: RdfGraphEnd): Unit = count.graphEnd += 1
60-
protected def handleOption(r: RdfStreamOptions): Unit = count.option += 1
61-
62-
def format(): Seq[(String, Long)] = Seq(
63-
("option_count", count.option),
64-
("triple_count", count.triple),
65-
("quad_count", count.quad),
66-
("graph_start_count", count.graphStart),
67-
("graph_end_count", count.graphEnd),
68-
("namespace_count", count.namespace),
69-
("name_count", count.name),
70-
("prefix_count", count.prefix),
71-
("datatype_count", count.datatype),
72-
)
71+
protected def handleTriple(r: RdfTriple): Unit = stat.triple += statCollector.measure(r)
72+
protected def handleQuad(r: RdfQuad): Unit = stat.quad += statCollector.measure(r)
73+
protected def handleNameEntry(r: RdfNameEntry): Unit = stat.name += statCollector.measure(r)
74+
protected def handlePrefixEntry(r: RdfPrefixEntry): Unit = stat.prefix += statCollector.measure(r)
75+
protected def handleNamespaceDeclaration(r: RdfNamespaceDeclaration): Unit =
76+
stat.namespace += statCollector.measure(r)
77+
protected def handleDatatypeEntry(r: RdfDatatypeEntry): Unit =
78+
stat.datatype += statCollector.measure(r)
79+
protected def handleGraphStart(r: RdfGraphStart): Unit =
80+
stat.graphStart += statCollector.measure(r)
81+
protected def handleGraphEnd(r: RdfGraphEnd): Unit = stat.graphEnd += statCollector.measure(r)
82+
protected def handleOption(r: RdfStreamOptions): Unit = stat.option += statCollector.measure(r)
83+
84+
def format(): Seq[(String, Long)] = {
85+
val name = statCollector.name()
86+
Seq(
87+
("option_" + name, stat.option),
88+
("triple_" + name, stat.triple),
89+
("quad_" + name, stat.quad),
90+
("graph_start_" + name, stat.graphStart),
91+
("graph_end_" + name, stat.graphEnd),
92+
("namespace_" + name, stat.namespace),
93+
("name_" + name, stat.name),
94+
("prefix_" + name, stat.prefix),
95+
("datatype_" + name, stat.datatype),
96+
)
97+
}
7398

7499
end FrameInfo
75100

76101
/** Class containing statistics for each node type. Combines nodes allowed in triple terms (IRI,
77102
* blank node, literal, triple) and graph term in quads (IRI, blank node, literal, default graph).
78103
* For simplicity, this class does not validate these constraints.
79104
*/
80-
class NodeDetailInfo:
81-
private object count:
105+
class NodeDetailInfo(using statCollector: FrameInfo.StatisticCollector):
106+
private object stat:
82107
var iri: Long = 0
83108
var bnode: Long = 0
84109
var literal: Long = 0
85110
var triple: Long = 0
86111
var defaultGraph: Long = 0
87112

88113
def handle(o: Object): Unit = o match {
89-
case r: RdfIri => count.iri += 1
90-
case r: String => count.bnode += 1 // bnodes are strings
91-
case r: RdfLiteral => count.literal += 1
92-
case r: RdfTriple => count.triple += 1
93-
case r: RdfDefaultGraph => count.defaultGraph += 1
114+
case r: RdfIri => stat.iri += statCollector.measure(r)
115+
case r: String => stat.bnode += statCollector.measure(r) // bnodes are strings
116+
case r: RdfLiteral => stat.literal += statCollector.measure(r)
117+
case r: RdfTriple => stat.triple += statCollector.measure(r)
118+
case r: RdfDefaultGraph => stat.defaultGraph += statCollector.measure(r)
94119
}
95120

96-
def format(): Seq[(String, Long)] = Seq(
97-
("iri_count", count.iri),
98-
("bnode_count", count.bnode),
99-
("literal_count", count.literal),
100-
("triple_count", count.triple),
101-
("default_graph_count", count.defaultGraph),
102-
).filter(_._2 > 0)
121+
def format(): Seq[(String, Long)] = {
122+
val name = statCollector.name()
123+
Seq(
124+
("iri_" + name, stat.iri),
125+
("bnode_" + name, stat.bnode),
126+
("literal_" + name, stat.literal),
127+
("triple_" + name, stat.triple),
128+
("default_graph_" + name, stat.defaultGraph),
129+
).filter(_._2 > 0)
130+
}
103131

104132
def +=(other: NodeDetailInfo): NodeDetailInfo = {
105-
this.count.iri += other.count.iri
106-
this.count.bnode += other.count.bnode
107-
this.count.literal += other.count.literal
108-
this.count.triple += other.count.triple
109-
this.count.defaultGraph += other.count.defaultGraph
133+
this.stat.iri += other.stat.iri
134+
this.stat.bnode += other.stat.bnode
135+
this.stat.literal += other.stat.literal
136+
this.stat.triple += other.stat.triple
137+
this.stat.defaultGraph += other.stat.defaultGraph
110138
this
111139
}
112140

113-
def total(): Long = count.iri
114-
+ count.bnode
115-
+ count.literal
116-
+ count.triple
117-
+ count.defaultGraph
141+
def total(): Long = stat.iri
142+
+ stat.bnode
143+
+ stat.literal
144+
+ stat.triple
145+
+ stat.defaultGraph
118146

119147
end NodeDetailInfo
120148

121-
class FrameDetailInfo(frameIndex: Long, metadata: Map[String, ByteString])
122-
extends FrameInfo(frameIndex, metadata):
149+
class FrameDetailInfo(frameIndex: Long, metadata: Map[String, ByteString])(using
150+
statCollector: FrameInfo.StatisticCollector,
151+
) extends FrameInfo(frameIndex, metadata):
123152
private object term:
124153
val subjectInfo = new NodeDetailInfo()
125154
val predicateInfo = new NodeDetailInfo()
@@ -168,12 +197,15 @@ class FrameDetailInfo(frameIndex: Long, metadata: Map[String, ByteString])
168197
out += term.graphInfo
169198
out.format()
170199

171-
def formatGroupByTerm(): Seq[(String, Long)] = Seq(
172-
"subject_count" -> term.subjectInfo.total(),
173-
"predicate_count" -> term.predicateInfo.total(),
174-
"object_count" -> term.objectInfo.total(),
175-
"graph_count" -> term.graphInfo.total(),
176-
)
200+
def formatGroupByTerm(): Seq[(String, Long)] = {
201+
val name = statCollector.name()
202+
Seq(
203+
"subject_" + name -> term.subjectInfo.total(),
204+
"predicate_" + name -> term.predicateInfo.total(),
205+
"object_" + name -> term.objectInfo.total(),
206+
"graph_" + name -> term.graphInfo.total(),
207+
)
208+
}
177209

178210
end FrameDetailInfo
179211

0 commit comments

Comments
 (0)