forked from Spinmob/spinmob
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path_data.py
More file actions
2141 lines (1596 loc) · 76.6 KB
/
_data.py
File metadata and controls
2141 lines (1596 loc) · 76.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os as _os
import shutil as _shutil
import _dialogs
# do this so all the scripts will work with all the numpy functions
import numpy as _n
import scipy.optimize as _opt
import pylab as _p
import textwrap as _textwrap
import spinmob as _s
import time as _time
#############################################################
# Class for storing / manipulating / saving / loading data
#############################################################
class databox:
# this is used by the load_file to rename some of the annoying
# column names that aren't consistent between different types of data files (and older data files)
# or to just rename columns with difficult-to-remember ckeys.
obnoxious_ckeys = {}
#obnoxious_ckeys = {"example_annoying1" : "unified_name1",
# "example_annoying2" : "unified_name2"}
directory = "default_directory"
path = "(no path)"
debug = False # Use this to print debug info in various places
delimiter = None # delimiter of the ascii file. If "None" this will just use any whitespace
headers = {} # this dictionary will hold the header information
columns = {} # this dictionary will hold the data columns
ckeys = [] # we need a special list of column keys to keep track of their order during data assembly
hkeys = [] # ordered list of header keys
extra_globals = {}
def __setitem__(self, n, x):
"""
set's the n'th column to x (n can be a column name too)
"""
if type(n) is str:
self.insert_column(data_array=x, ckey=n, index=None)
elif type(n) in [int, long] and n > len(self.ckeys)-1:
self.insert_column(data_array=x, ckey='_column'+str(len(self.ckeys)), index=None)
else:
self.columns[self.ckeys[n]] = _n.array(x)
def __len__(self):
return len(self.ckeys)
def __getslice__(self,i,j):
output = []
for n in range(i,min(j, len(self))): output.append(self[n])
return output
def __init__(self, delimiter=None, debug=False, **kwargs):
"""
delimiter The delimiter the file uses. None (default) means
"Try to figure it out" (reasonably smart)
debug Displays some partial debug information while running
**kwargs are sent to self.h()
"""
# this keeps the dictionaries from getting all jumbled with each other
self.clear_columns()
self.clear_headers()
self.debug = debug
self.delimiter = delimiter
def __repr__(self):
s = "<databox instance: "+str(len(self.hkeys))+" headers, "+str(len(self.ckeys))+" columns>"
return s
def more_info(self):
"""
Prints out more information about the databox.
"""
print "\nDatabox Instance", self.path
print "\nHeader"
for h in self.hkeys: print " "+h+":", self.h(h)
s = "\nColumns ("+str(len(self.ckeys))+"): "
for c in self.ckeys: s = s+c+", "
print s[:-2]
def _globals(self):
"""
Returns the globals needed for eval() statements.
"""
# start with numpy
globbies = dict(_n.__dict__)
# update with required stuff
globbies.update({'h':self.h, 'c':self.c, 'd':self, 'self':self})
# update with user stuff
globbies.update(self.extra_globals)
return globbies
def load_file(self, path="ask", first_data_line="auto", filters="*.*", text="Select a file, FACEPANTS.", default_directory=None, header_only=False, quiet=False):
"""
This will clear the databox, load a file, storing the header info in self.headers, and the data in
self.columns
If first_data_line="auto", then the first data line is assumed to be the first line
where all the elements are numbers.
If you specify a first_data_line (index, starting at 0), the columns need not be
numbers. Everything above will be considered header information and below will be
data columns.
In both cases, the line used to label the columns will always be the last
header line with the same (or more) number of elements as the first data line.
"""
if default_directory is None: default_directory = self.directory
if path == "ask":
path = _dialogs.open_single(filters=filters,
default_directory=self.directory,
text=text)
self.path = path
if path is None:
if not quiet: print "Aborted."
return None
# make sure the file exists!
if not _os.path.exists(path):
if not quiet: print "ERROR: "+repr(path)+" does not exist."
return None
# clear all the existing data
self.clear()
# open said file for reading, read in all the lines and close
f = open(path, 'rU')
lines = f.readlines()
f.close()
# Determine the delimiter
if self.delimiter is None:
# loop from the end of the file until we get something other than white space
for n in range(len(lines)):
# strip away the white space
s = lines[-n-1].strip()
# if this line has any content
if len(s) > 0:
# try the different delimiter schemes until we find one
# that produces a number. Otherwise it's ambiguous.
if _s.fun.is_a_number(s.split(None)[0]): self.delimiter = None
elif _s.fun.is_a_number(s.split(',') [0]): self.delimiter = ','
elif _s.fun.is_a_number(s.split(';') [0]): self.delimiter = ';'
# quit the loop!
break
##### read in the header information
self.header_lines = []
for n in range(len(lines)):
# split the line by the delimiter
s = lines[n].strip().split(self.delimiter)
# remove a trailing whitespace entry if it exists.
if len(s) and s[-1].strip() == '': s.pop(-1)
# first check and see if this is a data line (all elements are numbers)
if first_data_line == "auto" and _s.fun.elements_are_numbers(s):
# we've reached the first data line
first_data_line = n
# quit the header loop
break;
### now we know it's a header line
# save the lines for the avid user.
self.header_lines.append(lines[n].strip())
# store the hkey and the rest of it
if len(s):
hkey = s[0]
if self.delimiter is None: remainder = ' '.join(s[1:])
else: remainder = self.delimiter.join(s[1:])
# first thing to try is simply evaluating the remaining string
try: self.insert_header(hkey, eval(remainder, self._globals()))
# otherwise store the string
except: self.insert_header(hkey, remainder)
# now we have a valid set of column ckeys one way or another, and we know first_data_line.
if header_only: return self
# Make sure first_data_line isn't None (which happens if there's no data)
if first_data_line == "auto":
if not quiet: print "\nCould not find a line of pure data! Perhaps check the delimiter?"
if not quiet: print "The default delimiter is whitespace. For csv files, set delimiter=','\n"
return self
##### at this point we've found the first_data_line,
# look for the ckeys
# special case: no header
if first_data_line == 0: ckeys = []
# start by assuming it's the previous line
else: ckeys = lines[first_data_line-1].strip().split(self.delimiter)
# count the number of actual data columns for comparison
column_count = len(lines[first_data_line].strip().split(self.delimiter))
# check to see if ckeys is equal in length to the
# number of data columns. If it isn't, it's a false ckeys line
if len(ckeys) >= column_count:
# it is close enough
# if we have too many column keys, mention it
while len(ckeys) > column_count:
extra = ckeys.pop(-1)
if not quiet: print "Extra ckey: "+extra
else:
# it is an invalid ckeys line. Generate our own!
ckeys = []
for m in range(0, column_count): ckeys.append("c"+str(m))
# last step with ckeys: make sure they're all different!
self.ckeys = []
while len(ckeys):
# remove the key
ckey = ckeys.pop(0)
# if there is a duplicate
if (ckey in ckeys) or (ckey in self.ckeys):
# increase the label index until it's unique
n=0
while (ckey+"_"+str(n) in ckeys) or (ckey+"_"+str(n) in self.ckeys): n+=1
ckey = ckey+"_"+str(n)
self.ckeys.append(ckey)
# initialize the columns arrays
# I did benchmarks and there's not much improvement by using numpy-arrays here.
for label in self.ckeys: self.columns[label] = []
# define a quick function to convert i's to j's
def fix(x): return x.replace('i','j')
# loop over the remaining data lines, converting to numbers
z = _n.genfromtxt((fix(x) for x in lines[first_data_line:]),
delimiter=self.delimiter,
dtype=complex)
# genfromtxt returns a 1D array if there is only one data line.
# highly confusing behavior, numpy!
if len(_n.shape(z)) == 1: z = _n.array([z])
# fix for different behavior of genfromtxt on single columns
if len(z.shape) == 2: z = z.transpose()
else: z = [z]
# Add all the columns
for n in range(len(self.ckeys)):
# if any of the imaginary components are non-zero, use complex
if _n.any(_n.imag(z[n])): self[n] = z[n]
else: self[n] = _n.real(z[n])
# now, as an added bonus, rename some of the obnoxious headers
for k in self.obnoxious_ckeys:
if self.columns.has_key(k):
self.columns[self.obnoxious_ckeys[k]] = self.columns[k]
return self
def save_file(self, path="ask", filters="*.dat", force_overwrite=False, header_only=False, delimiter='use current'):
"""
This will save all the header info and columns to an ascii file with
the specified path.
filters="*.dat" File filter for the file dialog (for path="ask")
force_overwrite=False Normally, if the file * exists, this will copy that
to *.backup. If the backup already exists, this
function will abort. Setting this to True will
force overwriting the backup file.
header_only=False Only output the header?
delimiter="use current" This will set the delimiter of the output file
"use current" means use self.delimiter
"""
# This is the final path. We now write to a temporary file in the user
# directory, then move it to the destination. This (hopefully) fixes
# problems with sync programs.
if path == "ask": path = _dialogs.save(filters, default_directory=self.directory)
if path in ["", None]:
print "Aborted."
return False
# Save the path for future reference
self.path=path
# if the path exists, make a backup
if _os.path.exists(path) and not force_overwrite:
_os.rename(path,path+".backup")
# get the delimiter
if delimiter == "use current":
if self.delimiter is None: delimiter = "\t"
else: delimiter = self.delimiter
# figure out the temporary path
temporary_path = _os.path.join(_s.settings.path_home, "temp-"+str(int(1e3*_time.time()))+'-'+str(int(1e9*_n.random.rand(1))))
# open the file and write the header
f = open(temporary_path, 'w')
for k in self.hkeys: f.write(k + delimiter + repr(self.headers[k]) + "\n")
f.write('\n')
# if we're only supposed to write the header
if header_only: return
# now write the ckeys
elements = []
for ckey in self.ckeys: elements.append(str(ckey))
f.write(delimiter.join(elements) + "\n")
# now loop over the data
for n in range(0, len(self[0])):
# loop over each column
elements = []
for m in range(0, len(self.ckeys)):
# write the data if there is any, otherwise, placeholder ("x")
if n < len(self[m]):
elements.append(str(self[m][n]))
else:
elements.append('_')
f.write(delimiter.join(elements) + "\n")
f.close()
# now move it
_shutil.move(temporary_path, path)
def get_data_point(self, n):
"""
Returns the n'th data point (starting at 0) from all columns.
n index of data point to return.
"""
# loop over the columns and pop the data
point = []
for k in self.ckeys: point.append(self[k][n])
return point
def pop_data_point(self, n):
"""
This will remove and return the n'th data point (starting at 0) from
all columns.
n index of data point to pop.
"""
# loop over the columns and pop the data
popped = []
for k in self.ckeys:
# first convert to a list
data = list(self.c(k))
# pop the data
popped.append(data.pop(n))
# now set this column again
self.insert_column(_n.array(data), k)
return popped
def insert_data_point(self, new_data, index=None):
"""
Inserts a data point at index n.
new_data a list or array of new data points, one for each column.
index where to insert the point(s) in each column. None => append.
"""
if not len(new_data) == len(self.columns) and not len(self.columns)==0:
print "ERROR: new_data must have as many elements as there are columns."
return
# otherwise, we just auto-add this data point as new columns
elif len(self.columns)==0:
for i in range(len(new_data)): self[i] = [new_data[i]]
# otherwise it matches length so just insert it.
else:
for i in range(len(new_data)):
# get the array and turn it into a list
data = list(self[i])
# append or insert
if index is None: data.append( new_data[i])
else: data.insert(index, new_data[i])
# reconvert to an array
self[i] = _n.array(data)
def append_data_point(self, new_data):
"""
Appends the supplied data point to the column(s).
new_data a list or array of new data points, one for each column.
"""
return self.insert_data_point(new_data)
def execute_script(self, script, g={}):
"""
Runs a script, returning the result.
Scripts are of the form:
"3.0 + x/y - self[0] where x=3.0*c('my_column')+h('setting'); y=c(1)"
"self" refers to the data object, giving access to everything, enabling
complete control over the universe. c() and h() give quick reference
to self.c() and self.h() to get columns and header lines
Additionally, these scripts can see all of the numpy functions like sin,
cos, sqrt, etc.
Finally, if you would like access to additional globals, set
self.extra_globals to the appropriate globals dictionary or add globals
using insert_global(). Setting g=globals() will automatically insert
your globals into this databox instance.
There are a few shorthand scripts available as well. You can simply type
a column name such as "my_column" or a column number like 2. However, I
only added this functionality as a shortcut, and something like
"2.0*a where a=F" will not work unless F is defined somehow. I figure
since you're already writing a complicated script, you don't want to
accidentally shortcut your way into using a column instead of a constant!
Use "2.0*a where a=c('F')" instead.
NOTE: You shouldn't try to use variables like 'c=...' or 'h=...' because
they are already column and header functions!
"""
# add any extra user-supplied global variables for the eventual eval() call.
self.extra_globals.update(g)
# If the script is not a list of scripts, return the script value.
# This is the termination of a recursive call.
if not _s.fun.is_iterable(script):
# special case
if script is None: return None
# get the expression and variables dictionary
[expression, v] = self._parse_script(script)
# if there was a problem parsing the script
if v is None:
print "ERROR: Could not parse '"+script+"'"
return None
# get all the numpy stuff too
g = self._globals()
g.update(v)
# otherwise, evaluate the script using python's eval command
return eval(expression, g)
# Otherwise, this is a list of (lists of) scripts. Make the recursive call.
output = []
for s in script: output.append(self.execute_script(s))
return output
# Define this so you can quickly call a script
__call__ = execute_script
def _parse_script(self, script, n=0):
"""
This takes a script such as "a/b where a=c('current'), b=3.3" and returns
["a/b", {"a":self.columns["current"], "b":3.3}]
You can also just use an integer for script to reference columns by number
or use the column label as the script.
n is for internal use. Don't use it. In fact, don't use this function, user.
"""
if n > 1000:
print "This script ran recursively 1000 times!"
a = raw_input("<enter> or (q)uit: ")
if a.strip().lower() in ['q', 'quit']:
script = None
if script is None: return [None, None]
# check if the script is simply an integer
if type(script) in [int,long]:
if script<0: script = script+len(self.ckeys)
return ["___"+str(script), {"___"+str(script):self[script]}]
# the scripts would like to use calls like "h('this')/3.0*c('that')",
# so to make eval() work we should add these functions to a local list
# first split up by "where"
split_script = script.split(" where ")
########################################
# Scripts without a "where" statement:
########################################
# if it's a simple script, like "column0" or "c(3)/2.0"
if len(split_script) is 1:
if self.debug: print "script of length 1"
# try to evaluate the script
# first try to evaluate it as a simple column label
if n==0 and script in self.ckeys:
# only try this on the zero'th attempt
# if this is a recursive call, there can be ambiguities if the
# column names are number strings
return ['___', {'___':self[script]}]
# Otherwise, evaluate it.
try:
b = eval(script, self._globals())
return ['___', {'___':b}]
except:
print
print "ERROR: Could not evaluate '"+str(script)+"'"
return [None, None]
#######################################
# Full-on fancy scripts
#######################################
# otherwise it's a complicated script like "c(1)-a/2 where a=h('this')"
# tidy up the expression
expression = split_script[0].strip()
# now split the variables list up by ,
varsplit = split_script[1].split(';')
# loop over the entries in the list of variables, storing the results
# of evaluation in the "stuff" dictionary
stuff = dict()
for var in varsplit:
# split each entry by the "=" sign
s = var.split("=")
if len(s) == 1:
print s, "has no '=' in it"
return [None, None]
# tidy up into "variable" and "column label"
v = s[0].strip()
c = s[1].strip()
# now try to evaluate c, given our current globbies
# recursively call this sub-script. At the end of all this mess
# we want the final return value to be the first expression
# and a full dictionary of variables to fill it
[x,y] = self._parse_script(c, n+1)
# if it's not working, just quit out.
if y is None: return [None, None]
stuff[v] = y[x]
# at this point we've found or generated the list
return [expression, stuff]
def copy_headers(self, other_databox):
"""
Loops over the hkeys of the other_databox and sets this databoxes' header.
"""
for k in other_databox.hkeys: self.insert_header(k, other_databox.h(k))
def copy_columns(self, other_databox):
"""
Loops over the ckeys of the other_databox and sets this databoxes' columns.
"""
for k in other_databox.ckeys: self.insert_column(other_databox[k], k)
def copy_all(self, other_databox):
"""
Copies the header and columns from other_databox.
"""
self.copy_headers(other_databox)
self.copy_columns(other_databox)
def insert_global(self, thing, name=None):
"""
Appends or overwrites the supplied object in the self.extra_globals.
Use this to expose execute_script() or _parse_script() etc... to external
objects and functions.
If name=None, use thing.__name__
"""
if name is None: name=thing.__name__
self.extra_globals[name] = thing
def insert_header(self, hkey, value, index=None):
"""
This will insert/overwrite a value to the header and hkeys.
If hkey is an integer, use self.hkeys[hkey]
"""
#if hkey is '': return
# if it's an integer, use the hkey from the list
if type(hkey) in [int, long]: hkey = self.hkeys[hkey]
# set the data
self.headers[str(hkey)] = value
if not hkey in self.hkeys:
if index is None: self.hkeys.append(str(hkey))
else: self.hkeys.insert(index, str(hkey))
def pop_header(self, hkey):
"""
This will remove and return the specified header value.
You can specify either a key string or an index.
"""
# try the integer approach first to allow negative values
if type(hkey) is not str:
return self.headers.pop(self.hkeys.pop(hkey))
else:
# find the key integer and pop it
hkey = self.hkeys.index(hkey)
# if we didn't find the column, quit
if hkey < 0:
print "Column does not exist (yes, we looked)."
return
# pop it!
return self.headers.pop(self.hkeys.pop(hkey))
def pop_column(self, ckey):
"""
This will remove and return the data in the specified column.
You can specify either a key string or an index.
"""
# try the integer approach first to allow negative values
if type(ckey) is not str:
return self.columns.pop(self.ckeys.pop(ckey))
else:
# find the key integer and pop it
ckey = self.ckeys.index(ckey)
# if we didn't find the column, quit
if ckey < 0:
print "Column does not exist (yes, we looked)."
return
# pop it!
return self.columns.pop(self.ckeys.pop(ckey))
def insert_column(self, data_array, ckey='temp', index=None):
"""
This will insert/overwrite a new column and fill it with the data from the
the supplied array.
data_array data; can be a list, but will be converted to numpy array
ckey name of the column; if an integer is supplied, uses self.ckeys[ckey]
index where to insert this column. None => append to end.
"""
# if it's an integer, use the ckey from the list
if type(ckey) in [int, long]: ckey = self.ckeys[ckey]
# append/overwrite the column value
self.columns[ckey] = _n.array(data_array)
if not ckey in self.ckeys:
if index is None: self.ckeys.append(ckey)
else: self.ckeys.insert(index, ckey)
def append_column(self, data_array, ckey='temp'):
"""
This will append a new column and fill it with the data from the
the supplied array.
data_array data; can be a list, but will be converted to numpy array
ckey name of the column.
"""
if not type(ckey) is str:
print "ERROR: ckey should be a string!"
return
if ckey in self.ckeys:
print "ERROR: ckey '"+ckey+"' already exists!"
return
self.insert_column(data_array, ckey)
def clear_columns(self):
"""
This will remove all the ckeys and columns.
"""
self.ckeys = []
self.columns = {}
def clear_headers(self):
"""
This will remove all the hkeys and headers
"""
self.hkeys = []
self.headers = {}
def clear(self):
"""
Removes all headers and columns from the databox.
"""
self.clear_columns()
self.clear_headers()
def rename_header(self, old_name, new_name):
"""
This will rename the header. The supplied names need to be strings.
"""
self.hkeys[self.hkeys.index(old_name)] = new_name
self.headers[new_name] = self.headers.pop(old_name)
def rename_column(self, column, new_name):
"""
This will rename the column.
The supplied column can be an integer or the old column name.
"""
if type(column) is not str: column = self.ckeys[column]
self.ckeys[self.ckeys.index(column)] = new_name
self.columns[new_name] = self.columns.pop(column)
def trim(self, *conditions):
"""
Removes data points not satisfying the supplied conditions. Conditions
can be truth arrays (having the same length as the columns!)
or scripted strings.
Example:
d1 = spinmob.data.load()
d2 = d1.trim( (2<d1[0]) & (d1[0]<10) | (d1[3]==22), 'sin(d[2])*h("gain")<32.2')
Note this will not modify the databox, rather it will generate a new
one with the same header information and return it.
"""
conditions = list(conditions)
# if necessary, evaluate string scripts
for n in range(len(conditions)):
if type(conditions[n]) is str:
conditions[n] = self.execute_script(conditions[n])
# make a new databox with the same options and headers
new_databox = databox(delimiter=self.delimiter)
new_databox.copy_headers(self)
# trim it up, send it out.
cs = _s.fun.trim_data_uber(self, conditions)
for n in range(len(cs)): new_databox.append_column(cs[n], self.ckeys[n])
return new_databox
def update_headers(self, dictionary, keys=None):
"""
Updates the header with the supplied dictionary. If keys=None, it
will be unsorted. Otherwise it will loop over the supplied keys
(a list) in order.
"""
if keys is None: keys = dictionary.keys()
for k in keys: self.insert_header(k, dictionary[k])
def c(self, n):
"""
Returns the n'th column if it's an integer, otherwise the column based
on key.
"""
if len(self.columns) == 0: return []
if type(n) is str: return self.columns[n]
else: return self.columns[self.ckeys[n]]
__getitem__ = c
def h(self, *args, **kwargs):
"""
This function searches through hkeys for one *containing* a key string
supplied by args[0] and returns that header value.
Also can take integers, returning the key'th header value.
kwargs can be specified to set header elements.
"""
# first loop over kwargs if there are any to set header elements
for k in kwargs.keys():
self.insert_header(k, kwargs[k])
# Meow search for a key if specified
if len(args):
# this can be shortened. Eventually, it'd be nice to get a tuple back!
hkey = args[0]
# if this is an index
if type(hkey) in [int, long]: return self.headers[self.hkeys[hkey]]
# if this is an exact match
elif hkey in self.hkeys: return self.headers[hkey]
# Look for a fragment.
else:
for k in self.hkeys:
if k.find(hkey) >= 0:
return self.headers[k]
print
print "ERROR: Couldn't find '"+str(hkey) + "' in header."
print "Possible values:"
for k in self.hkeys: print k
print
return None
###########################################
# Class for fitting data
###########################################
class fitter():
f = None # list of functions
bg = None # list of background functions (for subtracting etc)
_f_raw = None # raw argument passed to set_functions()
_bg_raw = None # raw argument passed to set_functions()
_set_xdata = None # definitions from which data is derived during fits
_set_ydata = None
_set_eydata = None
_set_data_globals = dict(_n.__dict__) # defaults to numpy
_xdata_massaged = None # internal storage of trimmed data sets (used for fitting)
_ydata_massaged = None
_eydata_massaged = None
_settings = None # dictionary containing all the fitter settings
results = None # full output from the fitter.
def __init__(self, f=['a*x*cos(b*x)+c', 'a*x+c'], p='a=1.5, b, c=-2', c=None, bg=None, g=None, **kwargs):
"""
Creates an object for fitting data to functions.
f = function or list of functions
p = comma-delimited list of fit parameters
c = comma-delimited list of constants
bg = optional background function or list of functions
g = optional globals dictionary for evaluating functions)
f, p, bg are sent to set_functions()
**kwargs are sent to settings
Typical workflow:
my_fitter = fitter('a*x+b', 'a,b') # creates the fitter object
my_fitter.set_data([1,2,3],[1,2,1]) # sets the data to be fit
my_fitter.fit() # does the fitting
Tips:
Do not set data directly; use set_data(), which clears the fit
results. Otherwise the fit results will not match the existing data.
When errors are completely unknown, try autoscale_errors_and_fit()
repeatedly until the reduced chi squareds of all the data sets
are approximately 1. This is one way to more-or-less estimate
the error from the data itself.
"""
# make sure all the awesome stuff from numpy is visible.
self._globals = _n.__dict__
# update the globals dictionary
if not g is None: self._globals.update(g)
self._pnames = []
self._cnames = []
self._fnames = []
self._bgnames = []
self._pguess = []
self._constants = []
# default settings
self._settings = dict(autoplot = True, # whether we always plot when changing stuff
plot_fit = True, # include f in plots?
plot_bg = True, # include bg in plots?
plot_ey = True, # include error bars?
plot_guess = True, # include the guess?
plot_guess_zoom = False, # zoom to include plot?
subtract_bg = False, # subtract bg from plots?
first_figure = 0, # first figure number to use
fpoints = 1000, # number of points to use when plotting f
xmin = None, # list of limits for trimming x-data
xmax = None, # list of limits for trimming x-data
ymin = None, # list of limits for trimming y-data
ymax = None, # list of limits for trimming y-data
xlabel = None, # list of x labels
ylabel = None, # list of y labels
xscale = 'linear', # axis scale type
yscale = 'linear', # axis scale type
scale_eydata = 1.0, # by how much should we scale the eydata?
coarsen = 1, # how much to coarsen the data
# styles of plots
style_data = dict(marker='+', color='b', ls=''),
style_fit = dict(marker='', color='r', ls='-'),
style_guess = dict(marker='', color='0.25', ls='-'),
style_bg = dict(marker='', color='k', ls='-'))
# Silence warnings
self._settings['silent'] = False
# settings that don't require a re-fit
self._safe_settings =list(['bg_names', 'fpoints', 'f_names',
'plot_bg', 'plot_ey', 'plot_guess', 'plot_fit',
'silent', 'style_bg', 'style_data', 'style_guess',
'style_fit', 'subtract_bg', 'xscale', 'yscale',
'xlabel', 'ylabel'])
# settings that should not be lists in general (i.e. not one per data set)
self._single_settings = list(['autoplot', 'first_figure'])
# set the functions
self.set_functions(f, p, c, bg)
# update the default settings
for k in kwargs.keys(): self[k] = kwargs[k]
def set(self, **kwargs):