Skip to content

Commit d397e73

Browse files
authored
Merge pull request #35 from histogrammar/1.0.x
1.0.x
2 parents c219764 + bbe9535 commit d397e73

File tree

12 files changed

+838
-120
lines changed

12 files changed

+838
-120
lines changed

.travis.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ os:
44
- linux
55

66
python:
7-
- 2.6
87
- 2.7
98
- 3.4
109
- 3.5
10+
- 3.6
11+
- 3.7
1112

1213
addons:
1314
apt:

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@ Python implementation of Histogrammar
55

66
See [histogrammar.org](http://histogrammar.org) for a complete introduction to Histogrammar.
77

8-
This is a pure Python implementation for Python versions 2.6, 2.7, and 3.4.
8+
This is a pure Python implementation for Python versions 2.7, and 3.4, 3.5, 3.6, 3.7
9+
10+
Latest Python release: v1.0.10 (Sep 2019).
11+
12+
Support of Histogrammar is provided on a best-effort basis only.
13+
914

1015
Installation
1116
============

docs/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@
4747
# built documents.
4848
#
4949
# The short X.Y version.
50-
version = "1.0.8"
50+
version = "1.0.10"
5151
# The full version, including alpha/beta/rc tags.
52-
release = "1.0.8"
52+
release = "1.0.10"
5353

5454
# The language for content autogenerated by Sphinx. Refer to documentation
5555
# for a list of supported languages.

histogrammar/plot/matplotlib.py

Lines changed: 0 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -67,42 +67,6 @@ def plotmatplotlib(self, name=None, **kwargs):
6767

6868
return ax
6969

70-
def num_bins(self):
71-
"""
72-
Returns number of bins
73-
"""
74-
return len(self.values)
75-
76-
def bin_width(self):
77-
"""
78-
Returns bin width
79-
"""
80-
return (self.high - self.low) / self.num_bins()
81-
82-
def bin_entries(self):
83-
"""
84-
Returns bin values
85-
"""
86-
import numpy as np
87-
return np.array([x.entries for x in self.values])
88-
89-
def bin_edges(self):
90-
"""
91-
Returns bin edges
92-
"""
93-
import numpy as np
94-
num_bins = self.num_bins()
95-
edges = np.linspace(self.low, self.high, num_bins + 1)
96-
return edges
97-
98-
def bin_centers(self):
99-
"""
100-
Returns bin centers
101-
"""
102-
import numpy as np
103-
return np.array([sum(self.range(i))/2.0 for i in self.indexes])
104-
105-
10670
class SparselyHistogramMethods(object):
10771
def plotmatplotlib(self, name=None, **kwargs):
10872
"""
@@ -129,58 +93,6 @@ def plotmatplotlib(self, name=None, **kwargs):
12993

13094
return ax
13195

132-
def num_bins(self):
133-
"""
134-
Returns number of bins
135-
"""
136-
if self.minBin is None or self.maxBin is None:
137-
return 0
138-
nbins = self.maxBin - self.minBin + 1
139-
return nbins
140-
141-
def bin_width(self):
142-
"""
143-
Returns bin width
144-
"""
145-
return self.binWidth
146-
147-
def bin_edges(self):
148-
"""
149-
Returns bin_edges
150-
"""
151-
import numpy as np
152-
153-
if self.minBin is None or self.maxBin is None:
154-
edges = np.array([self.origin, self.origin + 1])
155-
else:
156-
num_bins = self.maxBin - self.minBin + 1
157-
edges = np.linspace(self.low, self.high, num_bins + 1)
158-
return edges
159-
160-
def bin_entries(self):
161-
"""
162-
Returns bin values
163-
"""
164-
import numpy as np
165-
166-
if self.minBin is None or self.maxBin is None:
167-
entries = [self.bins[0].entries]
168-
else:
169-
entries = [self.bins[i].entries if i in self.bins else 0.0 \
170-
for i in range(self.minBin, self.maxBin + 1)]
171-
return np.array(entries)
172-
173-
def bin_centers(self):
174-
"""
175-
Returns bin centers
176-
"""
177-
import numpy as np
178-
179-
bin_edges = self.bin_edges()
180-
centers = [(bin_edges[i]+bin_edges[i+1])/2. for i in range(len(bin_edges)-1)]
181-
return np.array(centers)
182-
183-
18496
class CategorizeHistogramMethods(object):
18597
def plotmatplotlib(self, name=None, **kwargs):
18698
"""
@@ -223,28 +135,6 @@ def xtick(lab):
223135

224136
return ax
225137

226-
def bin_entries(self):
227-
"""
228-
Returns bin values
229-
"""
230-
import numpy as np
231-
return np.array([self.bins[i].entries for i in self.bins])
232-
233-
def bin_labels(self):
234-
"""
235-
Returns bin labels
236-
"""
237-
import numpy as np
238-
labels = []
239-
for i,key in enumerate(self.bins.keys()):
240-
try:
241-
label = str(key)
242-
except:
243-
label = 'bin_%d' % i
244-
labels.append(label)
245-
return np.asarray(labels)
246-
247-
248138
class ProfileMethods(object):
249139
def plotmatplotlib(self, name=None, **kwargs):
250140
""" Plotting method for Bin of Average

histogrammar/primitives/bin.py

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ def ed(low, high, entries, values, underflow, overflow, nanflow):
8484
out = Bin(len(values), float(low), float(high), None, None, underflow, overflow, nanflow)
8585
out.entries = float(entries)
8686
out.values = values
87+
out.contentType = values[0].name
8788
return out.specialize()
8889

8990
@staticmethod
@@ -134,8 +135,10 @@ def __init__(self, num, low, high, quantity, value=Count(), underflow=Count(), o
134135
self.quantity = serializable(quantity)
135136
if value is None:
136137
self.values = [None] * num
138+
self.contentType = "Count"
137139
else:
138140
self.values = [value.zero() for i in xrange(num)]
141+
self.contentType = value.name
139142
self.underflow = underflow.copy()
140143
self.overflow = overflow.copy()
141144
self.nanflow = nanflow.copy()
@@ -622,5 +625,204 @@ def __ne__(self, other): return not self == other
622625
def __hash__(self):
623626
return hash((self.low, self.high, self.quantity, self.entries, tuple(self.values), self.underflow, self.overflow, self.nanflow))
624627

628+
@property
629+
def n_bins(self):
630+
"""Get number of bins, consistent with SparselyBin and Categorize """
631+
return self.num
632+
633+
@property
634+
def n_dim(self):
635+
"""Histogram dimension
636+
637+
:returns: dimension of the histogram
638+
:rtype: int
639+
"""
640+
return get_n_dim(self)
641+
642+
def num_bins(self, low=None, high=None):
643+
"""
644+
Returns number of bins
645+
646+
Possible to set range with low and high params
647+
648+
:param low: lower edge of range, default is None
649+
:param high: higher edge of range, default is None
650+
:returns: number of bins in range
651+
:rtype: int
652+
"""
653+
import numpy as np
654+
# trivial cases first
655+
if low is None and high is None:
656+
return len(self.values)
657+
# catch weird cases
658+
elif low is not None and high is not None:
659+
if low > high:
660+
raise RuntimeError('low {low} greater than high {high}'.format(low=low, high=high))
661+
if low < self.low and high < self.low:
662+
# note: all these data end up in the underflow bin, with no real index
663+
return 0
664+
if low >= self.high and high >= self.high:
665+
# note: all these data end up in the overflow bin, with no real index
666+
return 0
667+
# lowest edge
668+
if low is None or low < self.low:
669+
low = self.low
670+
else: # low >= self.low and low < self.high
671+
minBin = self.bin(low)
672+
low = self.low + self.bin_width() * minBin
673+
# highest edge
674+
if high is None or high >= self.high:
675+
high = self.high
676+
else: # high < self.high and high >= self.low
677+
maxBin = self.bin(high)
678+
if np.isclose(high, self.low + self.bin_width() * maxBin):
679+
maxBin -= 1
680+
high = self.low + self.bin_width() * (maxBin + 1)
681+
# number of bins
682+
num_bins = int((high - low) / self.bin_width())
683+
return num_bins
684+
685+
def bin_width(self):
686+
"""
687+
Returns bin width
688+
"""
689+
return (self.high - self.low) / len(self.values)
690+
691+
def bin_entries(self, low=None, high=None, xvalues=[]):
692+
"""
693+
Returns bin values
694+
695+
Possible to set range with low and high params, and list of selected x-values
696+
697+
:param low: lower edge of range, default is None
698+
:param high: higher edge of range, default is None
699+
:param xvalues: list of x-values to get entries of, alternative to low and high
700+
:returns: numpy array with numbers of entries for selected bins
701+
:rtype: numpy.array
702+
"""
703+
import numpy as np
704+
# trivial case
705+
if low is None and high is None and len(xvalues) == 0:
706+
return np.array([x.entries for x in self.values])
707+
# catch weird cases
708+
elif low is not None and high is not None and len(xvalues) == 0:
709+
if low > high:
710+
raise RuntimeError('low {low} greater than high {high}'.format(low=low, high=high))
711+
if low < self.low and high < self.low:
712+
# note: all these data end up in the underflow bin
713+
return np.array([])
714+
if low >= self.high and high >= self.high:
715+
# note: all these data end up in the overflow bin
716+
return np.array([])
717+
# entries at request list of x-values
718+
elif len(xvalues) > 0:
719+
entries = [self.values[self.bin(x)].entries if self.bin(x) in self.indexes else 0.0 for x in xvalues]
720+
return np.array(entries)
721+
# lowest edge
722+
if low is None or low < self.low:
723+
minBin = 0
724+
else: # low >= self.low and low < self.high
725+
minBin = self.bin(low)
726+
# highest edge
727+
if high is None or high >= self.high:
728+
maxBin = len(self.values) - 1
729+
else: # high < self.high and high >= self.low
730+
maxBin = self.bin(high)
731+
if np.isclose(high, self.low + self.bin_width() * maxBin):
732+
maxBin -= 1
733+
return np.array([self.values[i].entries for i in range(minBin, maxBin + 1)])
734+
735+
def bin_edges(self, low=None, high=None):
736+
"""
737+
Returns bin edges
738+
739+
:param low: lower edge of range, default is None
740+
:param high: higher edge of range, default is None
741+
:returns: numpy array with bin edges for selected range
742+
:rtype: numpy.array
743+
"""
744+
import numpy as np
745+
num_bins = self.num_bins(low, high)
746+
# trivial cases first
747+
if low is None and high is None:
748+
return np.linspace(self.low, self.high, num_bins + 1)
749+
# catch weird cases
750+
elif low is not None and high is not None:
751+
if low > high:
752+
raise RuntimeError('low {low} greater than high {high}'.format(low=low, high=high))
753+
if low < self.low and high < self.low:
754+
# note: all these data end up in the underflow bin
755+
return np.linspace(self.low, self.low, num_bins + 1)
756+
if low >= self.high and high >= self.high:
757+
# note: all these data end up in the overflow bin
758+
return np.linspace(self.high, self.high, num_bins + 1)
759+
# lowest edge
760+
if low is None or low < self.low:
761+
low = self.low
762+
else: # low >= self.low and low < self.high
763+
minBin = self.bin(low)
764+
low = self.low + self.bin_width() * minBin
765+
# highest edge
766+
if high is None or high >= self.high:
767+
high = self.high
768+
else: # high < self.high and high >= self.low
769+
maxBin = self.bin(high)
770+
if np.isclose(high, self.low + self.bin_width() * maxBin):
771+
maxBin -= 1
772+
high = self.low + self.bin_width() * (maxBin + 1)
773+
774+
edges = np.linspace(low, high, num_bins + 1)
775+
return edges
776+
777+
def bin_centers(self, low=None, high=None):
778+
"""
779+
Returns bin centers
780+
781+
:param low: lower edge of range, default is None
782+
:param high: higher edge of range, default is None
783+
:returns: numpy array with bin centers for selected range
784+
:rtype: numpy.array
785+
"""
786+
import numpy as np
787+
# trivial case
788+
if low is None and high is None:
789+
return np.array([sum(self.range(i)) / 2.0 for i in self.indexes])
790+
# catch weird cases
791+
elif low is not None and high is not None:
792+
if low > high:
793+
raise RuntimeError('low {low} greater than high {high}'.format(low=low, high=high))
794+
if low < self.low and high < self.low:
795+
# note: all these data end up in the underflow bin
796+
return np.array([])
797+
if low >= self.high and high >= self.high:
798+
# note: all these data end up in the overflow bin
799+
return np.array([])
800+
# lowest edge
801+
if low is None or low < self.low:
802+
minBin = 0
803+
else: # low >= self.low and low < self.high
804+
minBin = self.bin(low)
805+
# highest edge
806+
if high is None or high >= self.high:
807+
maxBin = len(self.values) - 1
808+
else: # high < self.high and high >= self.low
809+
maxBin = self.bin(high)
810+
if np.isclose(high, self.low + self.bin_width() * maxBin):
811+
maxBin -= 1
812+
813+
return np.array([sum(self.range(i)) / 2.0 for i in range(minBin, maxBin + 1)])
814+
815+
@property
816+
def mpv(self):
817+
"""Return bin-center of most probable value
818+
"""
819+
bin_entries = self.bin_entries()
820+
bin_centers = self.bin_centers()
821+
822+
# if two max elements are equal, this will return the element with the lowest index.
823+
max_idx = max(enumerate(bin_entries), key=lambda x: x[1])[0]
824+
bc = bin_centers[max_idx]
825+
return bc
826+
625827
Factory.register(Bin)
626828

0 commit comments

Comments
 (0)