@@ -4,15 +4,34 @@ import com.google.protobuf.ByteString
44import eu .neverblink .jelly .cli .util .io .YamlDocBuilder
55import eu .neverblink .jelly .cli .util .io .YamlDocBuilder .*
66import eu .neverblink .jelly .core .proto .v1 .*
7+ import eu .neverblink .protoc .java .runtime .ProtoMessage
78
89import java .io .OutputStream
910import scala .language .postfixOps
1011
12+ object FrameInfo :
13+ trait StatisticCollector :
14+ def measure (r : ProtoMessage [? ]): Long
15+ def measure (r : String ): Long // Needed as bnodes are plain strings
16+ def name (): String
17+
18+ case object CountStatistic extends StatisticCollector :
19+ override def measure (r : ProtoMessage [? ]): Long = 1
20+ override def measure (r : String ): Long = 1
21+ override def name (): String = " count"
22+
23+ case object SizeStatistic extends StatisticCollector :
24+ override def measure (r : ProtoMessage [? ]): Long = r.getSerializedSize
25+ override def measure (r : String ): Long = r.getBytes.length + 1 // Encoded string size + tag
26+ override def name (): String = " size"
27+
1128/** This class is used to store the metrics for a single frame
1229 */
13- class FrameInfo (val frameIndex : Long , val metadata : Map [String , ByteString ]):
30+ class FrameInfo (val frameIndex : Long , val metadata : Map [String , ByteString ])(using
31+ statCollector : FrameInfo .StatisticCollector ,
32+ ):
1433 var frameCount : Long = 1
15- private object count :
34+ private object stat :
1635 var option : Long = 0
1736 var name : Long = 0
1837 var namespace : Long = 0
@@ -25,15 +44,15 @@ class FrameInfo(val frameIndex: Long, val metadata: Map[String, ByteString]):
2544
2645 def += (other : FrameInfo ): FrameInfo = {
2746 this .frameCount += 1
28- this .count .option += other.count .option
29- this .count .name += other.count .name
30- this .count .namespace += other.count .namespace
31- this .count .triple += other.count .triple
32- this .count .quad += other.count .quad
33- this .count .prefix += other.count .prefix
34- this .count .datatype += other.count .datatype
35- this .count .graphStart += other.count .graphStart
36- this .count .graphEnd += other.count .graphEnd
47+ this .stat .option += other.stat .option
48+ this .stat .name += other.stat .name
49+ this .stat .namespace += other.stat .namespace
50+ this .stat .triple += other.stat .triple
51+ this .stat .quad += other.stat .quad
52+ this .stat .prefix += other.stat .prefix
53+ this .stat .datatype += other.stat .datatype
54+ this .stat .graphStart += other.stat .graphStart
55+ this .stat .graphEnd += other.stat .graphEnd
3756 this
3857 }
3958
@@ -49,77 +68,87 @@ class FrameInfo(val frameIndex: Long, val metadata: Map[String, ByteString]):
4968 case r : RdfStreamOptions => handleOption(r)
5069 }
5170
52- protected def handleTriple (r : RdfTriple ): Unit = count.triple += 1
53- protected def handleQuad (r : RdfQuad ): Unit = count.quad += 1
54- protected def handleNameEntry (r : RdfNameEntry ): Unit = count.name += 1
55- protected def handlePrefixEntry (r : RdfPrefixEntry ): Unit = count.prefix += 1
56- protected def handleNamespaceDeclaration (r : RdfNamespaceDeclaration ): Unit = count.namespace += 1
57- protected def handleDatatypeEntry (r : RdfDatatypeEntry ): Unit = count.datatype += 1
58- protected def handleGraphStart (r : RdfGraphStart ): Unit = count.graphStart += 1
59- protected def handleGraphEnd (r : RdfGraphEnd ): Unit = count.graphEnd += 1
60- protected def handleOption (r : RdfStreamOptions ): Unit = count.option += 1
61-
62- def format (): Seq [(String , Long )] = Seq (
63- (" option_count" , count.option),
64- (" triple_count" , count.triple),
65- (" quad_count" , count.quad),
66- (" graph_start_count" , count.graphStart),
67- (" graph_end_count" , count.graphEnd),
68- (" namespace_count" , count.namespace),
69- (" name_count" , count.name),
70- (" prefix_count" , count.prefix),
71- (" datatype_count" , count.datatype),
72- )
71+ protected def handleTriple (r : RdfTriple ): Unit = stat.triple += statCollector.measure(r)
72+ protected def handleQuad (r : RdfQuad ): Unit = stat.quad += statCollector.measure(r)
73+ protected def handleNameEntry (r : RdfNameEntry ): Unit = stat.name += statCollector.measure(r)
74+ protected def handlePrefixEntry (r : RdfPrefixEntry ): Unit = stat.prefix += statCollector.measure(r)
75+ protected def handleNamespaceDeclaration (r : RdfNamespaceDeclaration ): Unit =
76+ stat.namespace += statCollector.measure(r)
77+ protected def handleDatatypeEntry (r : RdfDatatypeEntry ): Unit =
78+ stat.datatype += statCollector.measure(r)
79+ protected def handleGraphStart (r : RdfGraphStart ): Unit =
80+ stat.graphStart += statCollector.measure(r)
81+ protected def handleGraphEnd (r : RdfGraphEnd ): Unit = stat.graphEnd += statCollector.measure(r)
82+ protected def handleOption (r : RdfStreamOptions ): Unit = stat.option += statCollector.measure(r)
83+
84+ def format (): Seq [(String , Long )] = {
85+ val name = statCollector.name()
86+ Seq (
87+ (" option_" + name, stat.option),
88+ (" triple_" + name, stat.triple),
89+ (" quad_" + name, stat.quad),
90+ (" graph_start_" + name, stat.graphStart),
91+ (" graph_end_" + name, stat.graphEnd),
92+ (" namespace_" + name, stat.namespace),
93+ (" name_" + name, stat.name),
94+ (" prefix_" + name, stat.prefix),
95+ (" datatype_" + name, stat.datatype),
96+ )
97+ }
7398
7499end FrameInfo
75100
76101/** Class containing statistics for each node type. Combines nodes allowed in triple terms (IRI,
77102 * blank node, literal, triple) and graph term in quads (IRI, blank node, literal, default graph).
78103 * For simplicity, this class does not validate these constraints.
79104 */
80- class NodeDetailInfo :
81- private object count :
105+ class NodeDetailInfo ( using statCollector : FrameInfo . StatisticCollector ) :
106+ private object stat :
82107 var iri : Long = 0
83108 var bnode : Long = 0
84109 var literal : Long = 0
85110 var triple : Long = 0
86111 var defaultGraph : Long = 0
87112
88113 def handle (o : Object ): Unit = o match {
89- case r : RdfIri => count .iri += 1
90- case r : String => count .bnode += 1 // bnodes are strings
91- case r : RdfLiteral => count .literal += 1
92- case r : RdfTriple => count .triple += 1
93- case r : RdfDefaultGraph => count .defaultGraph += 1
114+ case r : RdfIri => stat .iri += statCollector.measure(r)
115+ case r : String => stat .bnode += statCollector.measure(r) // bnodes are strings
116+ case r : RdfLiteral => stat .literal += statCollector.measure(r)
117+ case r : RdfTriple => stat .triple += statCollector.measure(r)
118+ case r : RdfDefaultGraph => stat .defaultGraph += statCollector.measure(r)
94119 }
95120
96- def format (): Seq [(String , Long )] = Seq (
97- (" iri_count" , count.iri),
98- (" bnode_count" , count.bnode),
99- (" literal_count" , count.literal),
100- (" triple_count" , count.triple),
101- (" default_graph_count" , count.defaultGraph),
102- ).filter(_._2 > 0 )
121+ def format (): Seq [(String , Long )] = {
122+ val name = statCollector.name()
123+ Seq (
124+ (" iri_" + name, stat.iri),
125+ (" bnode_" + name, stat.bnode),
126+ (" literal_" + name, stat.literal),
127+ (" triple_" + name, stat.triple),
128+ (" default_graph_" + name, stat.defaultGraph),
129+ ).filter(_._2 > 0 )
130+ }
103131
104132 def += (other : NodeDetailInfo ): NodeDetailInfo = {
105- this .count .iri += other.count .iri
106- this .count .bnode += other.count .bnode
107- this .count .literal += other.count .literal
108- this .count .triple += other.count .triple
109- this .count .defaultGraph += other.count .defaultGraph
133+ this .stat .iri += other.stat .iri
134+ this .stat .bnode += other.stat .bnode
135+ this .stat .literal += other.stat .literal
136+ this .stat .triple += other.stat .triple
137+ this .stat .defaultGraph += other.stat .defaultGraph
110138 this
111139 }
112140
113- def total (): Long = count .iri
114- + count .bnode
115- + count .literal
116- + count .triple
117- + count .defaultGraph
141+ def total (): Long = stat .iri
142+ + stat .bnode
143+ + stat .literal
144+ + stat .triple
145+ + stat .defaultGraph
118146
119147end NodeDetailInfo
120148
121- class FrameDetailInfo (frameIndex : Long , metadata : Map [String , ByteString ])
122- extends FrameInfo (frameIndex, metadata):
149+ class FrameDetailInfo (frameIndex : Long , metadata : Map [String , ByteString ])(using
150+ statCollector : FrameInfo .StatisticCollector ,
151+ ) extends FrameInfo (frameIndex, metadata):
123152 private object term :
124153 val subjectInfo = new NodeDetailInfo ()
125154 val predicateInfo = new NodeDetailInfo ()
@@ -168,12 +197,15 @@ class FrameDetailInfo(frameIndex: Long, metadata: Map[String, ByteString])
168197 out += term.graphInfo
169198 out.format()
170199
171- def formatGroupByTerm (): Seq [(String , Long )] = Seq (
172- " subject_count" -> term.subjectInfo.total(),
173- " predicate_count" -> term.predicateInfo.total(),
174- " object_count" -> term.objectInfo.total(),
175- " graph_count" -> term.graphInfo.total(),
176- )
200+ def formatGroupByTerm (): Seq [(String , Long )] = {
201+ val name = statCollector.name()
202+ Seq (
203+ " subject_" + name -> term.subjectInfo.total(),
204+ " predicate_" + name -> term.predicateInfo.total(),
205+ " object_" + name -> term.objectInfo.total(),
206+ " graph_" + name -> term.graphInfo.total(),
207+ )
208+ }
177209
178210end FrameDetailInfo
179211
0 commit comments