-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy path2Dbinning.py
executable file
·205 lines (175 loc) · 6.52 KB
/
2Dbinning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#!/usr/bin/env python
from __future__ import division
import math, sys, utilities, time, re
from optparse import OptionParser
from os import path
ttotstart = time.time()
parser = OptionParser()
parser.add_option('-f', '--file', dest='data_file', help='Input data file',
default=None)
parser.add_option('-o', '--output', dest='output_file', default=None,
help='Binned output file')
parser.add_option('-b', '--bins', dest='bins', default=None,
help='Number of bins [NxM]')
parser.add_option('-x', '--xbinrange', dest='xbinrange', default=None,
help='Range for bins in the x-direction')
parser.add_option('-y', '--ybinrange', dest='ybinrange', default=None,
help='Range for bins in the y-direction')
parser.add_option('-n', '--normalize', dest='normalize', default=False,
action='store_true', help='Normalize the histograms')
parser.add_option('-g', '--gnuplot', dest='gnuplot', default=None,
help='Name of gnuplot script to write.')
parser.add_option('-c', '--columns', dest='columns', default=None,
help='Which two columns to analyze [##,##] starting from 1')
parser.add_option('-d', '--delimiter', dest='delimiter', default=None,
help='The string/character that delimits fields in the ' +
'input data file')
opt, args = parser.parse_args()
# Set up regular expressions to parse the input commands.
numbins_re = re.compile(r'(\d+)x(\d+)')
binrange_re = re.compile(r'([+-]{0,1}\d+.*\d*)-([+-]{0,1}\d+.*\d*)')
columns_re = re.compile(r'(\d+),(\d+)')
# Process the command-line flags
if not opt.output_file:
parser.print_help()
sys.exit(0)
if opt.bins:
optmatch = numbins_re.match(opt.bins)
if not optmatch:
print 'Bad format for -b/--bins: INTxINT'
sys.exit(1)
bins = [int(i) for i in optmatch.groups()]
else:
bins = [0,0]
if opt.xbinrange:
optmatch = binrange_re.match(opt.xbinrange)
if not optmatch:
print 'Bad format for -x/--xbinrange: FLOAT-FLOAT'
sys.exit(1)
binrange = [float(x) for x in optmatch.groups()]
else:
binrange = [0,0]
if opt.ybinrange:
optmatch = binrange_re.match(opt.ybinrange)
if not optmatch:
print 'Bad format for -y/--ybinrange: FLOAT-FLOAT'
sys.exit(1)
binrange.extend([float(x) for x in optmatch.groups()])
else:
binrange.extend([0,0])
if opt.columns:
optmatch = columns_re.match(opt.columns)
if not optmatch:
print 'Bad format for -c/--columns: INT,INT'
sys.exit(1)
col1, col2 = tuple([int(i) for i in optmatch.groups()])
# Adjust for indexing
col1, col2 = col1 - 1, col2 - 1
else:
col1 = 0
col2 = 1
maxcol = max(col1, col2) + 1
if not opt.data_file:
input_data = sys.stdin
else:
if not path.exists(opt.data_file):
print 'Error: Could not find %s!' % opt.data_file
sys.exit(1)
input_data = open(opt.data_file)
phipsibins = [] # the list of all bins in both directions
data1 = [] # array of 2-element arrays that contains every pair of points
data2 = [] # array of 2-element arrays that contains every pair of points
discarded = 0 # count number of points that have been discarded
pointweight = 1.0 # how much a single point is worth. 1 if not normalized.
script_name = opt.gnuplot
# load data into file
for line in input_data:
if opt.delimiter is None:
words = line.split()
else:
words = line.split(opt.delimiter)
if len(words) < maxcol: # skip any lines that don't have enough values
continue
try: # skip any lines where at least 1 of first 2 columns are not floats
data1.append(float(words[col1].strip()))
data2.append(float(words[col2].strip()))
except ValueError:
print words
continue
input_data.close()
# Set up default ranges
if (binrange[0] == 0 and binrange[1] == 0) or \
(binrange[2] == 0 and binrange[3] == 0):
xmaxminholder = utilities.minmax(data1)
ymaxminholder = utilities.minmax(data2)
binrange[0] = math.floor(xmaxminholder[0])
binrange[1] = math.ceil(xmaxminholder[1])
binrange[2] = math.floor(ymaxminholder[0])
binrange[3] = math.ceil(ymaxminholder[1])
# Set up default number of bins according to "Scott's Choice"
if bins[0] == 0 or bins[1] == 0:
xinttmp = 3.5 * utilities.stdev(data1,'no') / float(len(data1)) ** (1/3)
yinttmp = 3.5 * utilities.stdev(data2,'no') / float(len(data2)) ** (1/3)
bins[0] = int(math.ceil((binrange[1] - binrange[0])/xinttmp))
bins[1] = int(math.ceil((binrange[3] - binrange[2])/yinttmp))
if opt.normalize:
pointweight /= float(len(data1)) * (
binrange[1]-binrange[0])*(binrange[3]-binrange[2])/(bins[0]*bins[1])
xinterval = (binrange[1] - binrange[0])/bins[0]
yinterval = (binrange[3] - binrange[2])/bins[1]
# create a large 1-D array with every bin
# (x1y1 x1y2 ... x1yN x2y1 x2y2 ... ... xNyN)
for x in range(bins[0]):
for y in range(bins[1]):
phipsibins.append(0)
skipped = 0
for x in range(len(data1)):
xval = data1[x]
yval = data2[x]
if xval > binrange[1] or xval < binrange[0] or yval > binrange[3] or \
yval < binrange[2]:
discarded += 1
continue
xval -= binrange[0]
yval -= binrange[2]
binnum = int(math.floor(xval/xinterval) * bins[1] + math.floor(yval/yinterval))
try:
phipsibins[binnum] += pointweight
except IndexError:
skipped += 1
if skipped > 0:
print 'Warning: %d points did not fit into the range.' % skipped
xprintval = binrange[0]
yprintval = binrange[2]
outputfile = open(opt.output_file,'w')
for x in range(len(phipsibins)):
if x != 0 and x % bins[1] == 0:
xprintval += xinterval
yprintval = binrange[2]
outputfile.write('\n')
outputfile.write('%s %s %s\n' % (xprintval,yprintval,phipsibins[x]))
yprintval += yinterval
outputfile.close()
if script_name:
script = open(script_name,'w')
script.write("""unset surface
set contour base
set cntrparam levels 20
set cntrparam bspline
set cntrparam order 7
set view 0,0
unset ztics\n""")
script.write("splot '%s' w l\n" % opt.output_file)
script.close()
ttotend = time.time()
print 'Binning Results:\n'
print 'Time Taken: %.4f sec.' % (ttotend - ttotstart)
print 'Data file: ' + opt.data_file
print 'Output file: ' + opt.output_file
if script_name:
print 'GNUPLOT script: ' + script_name
print 'Numbers of Bins: %d x %d' % (bins[0], bins[1])
print 'Bin X-Range: %.4f - %.4f' % (binrange[0], binrange[1])
print 'Bin Y-Range: %.4f - %.4f' % (binrange[2], binrange[3])
print 'Points Omitted: %d' % discarded
print 'Done!'