Skip to content

Commit 2a2ce0e

Browse files
committed
Add tutorial's EventIterator to the core distribution.
1 parent 05bf546 commit 2a2ce0e

File tree

1 file changed

+148
-0
lines changed
  • core/src/main/scala/org/dianahep/histogrammar/tutorial

1 file changed

+148
-0
lines changed
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
// Copyright 2016 DIANA-HEP
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package org.dianahep.histogrammar.tutorial
16+
17+
/**
18+
* Most of the examples must use some dataset, so I prepared a sample of CMS public data to be read with no dependencies. The data came from the CERN Open Data portal; it is 50 fb-1 of highly processed particle physics data from the CMS experiment, with 469,384 events selected by the 24 GeV/c isolated muon trigger.
19+
*
20+
* For convenience, it has been converted to compressed JSON. The code that reads it into classes is provided below for you to copy-paste, rather than a JAR to load, because I want you to see how it’s done and you may want to modify it.
21+
*/
22+
package cmsdata {
23+
// class definitions
24+
25+
trait LorentzVector extends Serializable {
26+
// abstract members; must be defined by subclasses
27+
def px: Double
28+
def py: Double
29+
def pz: Double
30+
def E: Double
31+
32+
// methods common to all LorentzVectors
33+
def pt = Math.sqrt(px*px + py*py)
34+
def p = Math.sqrt(px*px + py*py + pz*pz)
35+
def mass = Math.sqrt(E*E - px*px - py*py - pz*pz)
36+
def eta = 0.5*Math.log((p + pz)/(p - pz))
37+
def phi = Math.atan2(py, px)
38+
39+
// addition operator is a method named "+"
40+
def +(two: LorentzVector) = {
41+
val one = this
42+
// create a subclass and an instance in one block
43+
new LorentzVector {
44+
def px = one.px + two.px
45+
def py = one.py + two.py
46+
def pz = one.pz + two.pz
47+
def E = one.E + two.E
48+
override def toString() = s"LorentzVector($px, $py, $pz, $E)"
49+
}
50+
}
51+
}
52+
53+
// particle class definitions are now one-liners
54+
55+
case class Jet(px: Double, py: Double, pz: Double, E: Double, btag: Double) extends LorentzVector
56+
57+
case class Muon(px: Double, py: Double, pz: Double, E: Double, q: Int, iso: Double) extends LorentzVector
58+
59+
case class Electron(px: Double, py: Double, pz: Double, E: Double, q: Int, iso: Double) extends LorentzVector
60+
61+
case class Photon(px: Double, py: Double, pz: Double, E: Double, iso: Double) extends LorentzVector
62+
63+
case class MET(px: Double, py: Double) {
64+
def pt = Math.sqrt(px*px + py*py)
65+
}
66+
67+
case class Event(jets: Seq[Jet], muons: Seq[Muon], electrons: Seq[Electron], photons: Seq[Photon], met: MET, numPrimaryVertices: Long)
68+
69+
// event data iterator
70+
case class EventIterator(location: String = "http://histogrammar.org/docs/data/triggerIsoMu24_50fb-1.json.gz") extends Iterator[Event] {
71+
import org.dianahep.histogrammar.json._
72+
73+
// use Java libraries to stream and decompress data on-the-fly
74+
@transient val scanner = new java.util.Scanner(
75+
new java.util.zip.GZIPInputStream(
76+
new java.net.URL(location).openStream))
77+
78+
// read one ahead so that hasNext can effectively "peek"
79+
private def getNext() =
80+
try {
81+
Json.parse(scanner.nextLine) collect {
82+
case event: JsonObject => eventFromJson(event)
83+
}
84+
}
85+
catch {
86+
case err: java.util.NoSuchElementException => None
87+
}
88+
89+
private var theNext = getNext()
90+
91+
// iterator interface
92+
def hasNext = !theNext.isEmpty
93+
def next() = {
94+
val out = theNext.get
95+
theNext = getNext()
96+
out
97+
}
98+
99+
def jetFromJson(params: Map[String, JsonNumber]) =
100+
new Jet(params("px").toDouble,
101+
params("py").toDouble,
102+
params("pz").toDouble,
103+
params("E").toDouble,
104+
params("btag").toDouble)
105+
106+
def muonFromJson(params: Map[String, JsonNumber]) =
107+
new Muon(params("px").toDouble,
108+
params("py").toDouble,
109+
params("pz").toDouble,
110+
params("E").toDouble,
111+
params("q").toInt,
112+
params("iso").toDouble)
113+
114+
def electronFromJson(params: Map[String, JsonNumber]) =
115+
new Electron(params("px").toDouble,
116+
params("py").toDouble,
117+
params("pz").toDouble,
118+
params("E").toDouble,
119+
params("q").toInt,
120+
params("iso").toDouble)
121+
122+
def photonFromJson(params: Map[String, JsonNumber]) =
123+
new Photon(params("px").toDouble,
124+
params("py").toDouble,
125+
params("pz").toDouble,
126+
params("E").toDouble,
127+
params("iso").toDouble)
128+
129+
def metFromJson(params: Map[String, JsonNumber]): MET =
130+
new MET(params("px").toDouble, params("py").toDouble)
131+
132+
def eventFromJson(params: JsonObject) = {
133+
val JsonArray(jets @ _*) = params("jets")
134+
val JsonArray(muons @ _*) = params("muons")
135+
val JsonArray(electrons @ _*) = params("electrons")
136+
val JsonArray(photons @ _*) = params("photons")
137+
val met = params("MET").asInstanceOf[JsonObject]
138+
val JsonInt(numPrimaryVertices) = params("numPrimaryVertices")
139+
new Event(
140+
jets collect {case j: JsonObject => jetFromJson(j.to[JsonNumber].toMap)},
141+
muons collect {case j: JsonObject => muonFromJson(j.to[JsonNumber].toMap)},
142+
electrons collect {case j: JsonObject => electronFromJson(j.to[JsonNumber].toMap)},
143+
photons collect {case j: JsonObject => photonFromJson(j.to[JsonNumber].toMap)},
144+
metFromJson(met.to[JsonNumber].toMap),
145+
numPrimaryVertices)
146+
}
147+
}
148+
}

0 commit comments

Comments
 (0)