-
Notifications
You must be signed in to change notification settings - Fork 5
Add wd1 processing #41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 6 commits
e28d086
726691e
a19f3dd
0f5b647
80f813d
fee38cc
b5e8f5e
7db1f47
4a23efa
05677db
aeb83f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |||||||||||||||||
|
|
||||||||||||||||||
| from packs.core.io import read_config_file | ||||||||||||||||||
| from packs.proc.processing_utils import process_bin_WD2 | ||||||||||||||||||
| from packs.proc.processing_utils import process_bin_WD1 | ||||||||||||||||||
| from packs.core.core_utils import check_test | ||||||||||||||||||
|
|
||||||||||||||||||
| def proc(config_file): | ||||||||||||||||||
|
|
@@ -22,6 +23,8 @@ def proc(config_file): | |||||||||||||||||
| case 'decode': | ||||||||||||||||||
| if conf_dict['wavedump_edition'] == 2: | ||||||||||||||||||
| process_bin_WD2(**arg_dict) | ||||||||||||||||||
| elif conf_dict['wavedump_edition'] == 1: | ||||||||||||||||||
| process_bin_WD1(**arg_dict) | ||||||||||||||||||
|
Comment on lines
24
to
+27
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also test the new case? :)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thats done in the test |
||||||||||||||||||
| else: | ||||||||||||||||||
| raise RuntimeError(f"wavedump edition {conf_dict['wavedump_edition']} decoding isn't currently implemented.") | ||||||||||||||||||
| case default: | ||||||||||||||||||
|
|
||||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -14,6 +14,8 @@ | |||||||||
|
|
||||||||||
| # imports start from MULE/ | ||||||||||
| from packs.core.core_utils import flatten | ||||||||||
| from packs.core.core_utils import MalformedHeaderError | ||||||||||
| from packs.core.io import writer | ||||||||||
|
||||||||||
| from packs.types import types | ||||||||||
|
|
||||||||||
| """ | ||||||||||
|
|
@@ -369,13 +371,130 @@ def check_save_path(save_path : str, | |||||||||
| return save_path | ||||||||||
|
|
||||||||||
|
|
||||||||||
| def process_event_lazy_WD1(file_object : BinaryIO, | ||||||||||
| sample_size : int): | ||||||||||
|
|
||||||||||
| ''' | ||||||||||
| WAVEDUMP 1: Generator that outputs each event iteratively from an opened binary file | ||||||||||
| Parameters | ||||||||||
| ---------- | ||||||||||
| file_object (obj) : Opened file object | ||||||||||
| sample_size (int) : Time difference between each sample in waveform (2ns for V1730B digitiser) | ||||||||||
| Returns | ||||||||||
| ------- | ||||||||||
| data (generator) : Generator object containing one event's worth of data | ||||||||||
| across each event | ||||||||||
| ''' | ||||||||||
|
|
||||||||||
| # read first header | ||||||||||
| header = np.fromfile(file_object, dtype = 'i', count = 6) | ||||||||||
|
|
||||||||||
|
Comment on lines
+390
to
+391
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess these are fixed for WD2 right?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its fixed for WD1, WD2 uses an adaptively sized header, but since each file in Wavedump1 is a channel, this issue doesn't occur. |
||||||||||
| # header to check against | ||||||||||
| sanity_header = header.copy() | ||||||||||
|
|
||||||||||
| # continue only if data exists | ||||||||||
|
Comment on lines
+393
to
+395
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why copy it before knowing if it has anything?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because we rewrite the header variable in the next steps to compare to this 'initial sanity check' header. If it is malformed, an error is returned. This code could be restructured to check if it has none before copying, but copying these headers once isn't particularly expensive. |
||||||||||
| while len(header) > 0: | ||||||||||
|
|
||||||||||
| # alter header to match expected size | ||||||||||
| header[0] = header[0] - 24 | ||||||||||
| event_size = header[0] // sample_size | ||||||||||
|
|
||||||||||
| # collect waveform, no of samples and timestamp | ||||||||||
| yield (np.fromfile(file_object, dtype = np.dtype('<H'), count = event_size), event_size, header[-1]) | ||||||||||
|
|
||||||||||
| # collect next header | ||||||||||
| header = np.fromfile(file_object, dtype = 'i', count = 6) | ||||||||||
|
|
||||||||||
| # check if header has correct number of elements and correct information ONCE. | ||||||||||
| if sanity_header is not None: | ||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comparison should be made at the beginning and not compared all the time; this object is unchanged, right?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, given you already did the while with this in the 1st iteration technically you've checked this already
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is only made at the beginning in the first iteration, if it passes the checks |
||||||||||
| if len(header) == 6: | ||||||||||
| if all([header[0] == sanity_header[0], # event size | ||||||||||
| header[4] == sanity_header[4] + 1, # event number +1 | ||||||||||
| header[5] > sanity_header[5] # timestamp increases | ||||||||||
| ]): | ||||||||||
| sanity_header = None | ||||||||||
| else: | ||||||||||
| raise MalformedHeaderError(sanity_header, header) | ||||||||||
| else: | ||||||||||
| raise MalformedHeaderError(sanity_header, header) | ||||||||||
|
Comment on lines
+418
to
+419
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, if it's none, why passing int to the exception?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can be a bit more explicit on the error message
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't pass into the exception if its |
||||||||||
| print("Processing Finished!") | ||||||||||
|
|
||||||||||
|
|
||||||||||
| def process_bin_WD1(file_path : str, | ||||||||||
| save_path : str, | ||||||||||
| sample_size : int, | ||||||||||
| overwrite : Optional[bool] = False, | ||||||||||
| print_mod : Optional[int] = -1): | ||||||||||
|
|
||||||||||
| ''' | ||||||||||
| WAVEDUMP 1: Takes a binary file and outputs the containing information in a h5 file. | ||||||||||
| This only works for individual channels at the moment, as wavedump 1 saves each channel | ||||||||||
| as a separate file. | ||||||||||
| For particularly large waveforms/number of events. You can 'chunk' the data such that | ||||||||||
| each dataset holds `counts` events. | ||||||||||
| # Makeup of the header (header[n]) where n is: | ||||||||||
| # 0 - event size (ns in our case, with extra 24 samples) | ||||||||||
| # 1 - board ID | ||||||||||
| # 2 - pattern (not sure exactly what this means) | ||||||||||
| # 3 - board channel | ||||||||||
| # 4 - event counter | ||||||||||
| # 5 - Time-tag for the trigger | ||||||||||
| # Each of which is a signed 4byte integer | ||||||||||
| Parameters | ||||||||||
| ---------- | ||||||||||
| file_path (str) : Path to binary file | ||||||||||
| save_path (str) : Path to saved file | ||||||||||
| sample_size (int) : Size of each sample in an event (2 ns in the case of V1730B digitiser) | ||||||||||
| overwrite (bool) : Boolean for overwriting pre-existing files | ||||||||||
| counts (int) : The number of events per chunks. -1 implies no chunking of data. | ||||||||||
|
||||||||||
| Returns | ||||||||||
| ------- | ||||||||||
| None | ||||||||||
| ''' | ||||||||||
|
|
||||||||||
|
|
||||||||||
| # lets build it here first and break it up later | ||||||||||
| # destroy the group within the file if you're overwriting | ||||||||||
| save_path = check_save_path(save_path, overwrite) | ||||||||||
| print(save_path) | ||||||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. messages can be a bit more helpful
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These will be addressed in #47 |
||||||||||
|
|
||||||||||
|
|
||||||||||
| # open file for reading | ||||||||||
| with open(file_path, 'rb') as file: | ||||||||||
|
|
||||||||||
| # open writer object | ||||||||||
| with writer(save_path, 'RAW', overwrite) as write: | ||||||||||
|
|
||||||||||
| for i, (waveform, samples, timestamp) in enumerate(process_event_lazy_WD1(file, sample_size)): | ||||||||||
|
|
||||||||||
| if (i % print_mod == 0) and (print_mod != -1): | ||||||||||
| print(f"Event {i}") | ||||||||||
|
Comment on lines
+470
to
+471
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess that -1 means do not print, why not check that once and not for all lines?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What would you suggest? If you check it once, you'll still need a flag to check if the event numbers should be printed right? |
||||||||||
|
|
||||||||||
| # enforce stucture upon data | ||||||||||
| e_dtype = types.event_info_type | ||||||||||
| wf_dtype = types.rwf_type_WD1(samples) | ||||||||||
|
||||||||||
| e_dtype = types.event_info_type | |
| wf_dtype = types.rwf_type_WD1(samples) | |
| e_dtype = types.event_info_type | |
| wf_dtype = types.rwf_type_WD1(samples) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Careful when expanding to multiple channels, cause you will, right? :P
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently the method for multiple channels is implementing multiple configs, but yes this will need to be considered if expanded to multiple channels.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Issue #50 addresses this.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| [required] | ||
|
|
||
| process = 'decode' | ||
| wavedump_edition = 1 | ||
| file_path = '/path/to/file.bin' | ||
| save_path = '/path/to/file.h5' | ||
|
|
||
| [optional] | ||
|
|
||
| overwrite = True | ||
| counts = -1 | ||
|
||
| print_mod = 100 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| [required] | ||
| process = 'decode' | ||
| wavedump_edition = 1 | ||
| file_path = '/home/casper/Documents/MULE/packs/tests/data/one_channel_WD1.dat' | ||
| save_path = '/home/casper/Documents/MULE/packs/tests/data/one_channel_WD1_tmp.h5' | ||
|
Comment on lines
+4
to
+5
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall these paths be more generic? :)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could try and tie them into the provided environment variables for the MULE directory, but these are just sample configs. They're not meant to work out of the box, but provide a template to work upon.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This has also led me to realise that these should probably be tied to temporary paths, I'll figure this out.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These file_paths and save_paths are rewritten in the tests, so theres no issue here. This does tie into an older technical debt #32 that I've now resolved for WD1, but needs to be resolved for WD2 still. |
||
| sample_size = 2 | ||
|
|
||
| [optional] | ||
| overwrite = True | ||
| print_mod = 100 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| import os | ||
| import sys | ||
|
|
||
| import numpy as np | ||
|
|
@@ -11,6 +12,8 @@ | |
| from pytest import warns | ||
| from pytest import fixture | ||
|
|
||
| from packs.proc.processing_utils import process_event_lazy_WD1 | ||
| from packs.proc.processing_utils import process_bin_WD1 | ||
| from packs.proc.processing_utils import read_defaults_WD2 | ||
| from packs.proc.processing_utils import process_header | ||
| from packs.proc.processing_utils import read_binary | ||
|
|
@@ -22,8 +25,11 @@ | |
| from packs.types.types import rwf_type | ||
| from packs.types.types import event_info_type | ||
|
|
||
| from packs.core.core_utils import MalformedHeaderError | ||
|
|
||
| from packs.core.io import load_rwf_info | ||
| from packs.core.io import load_evt_info | ||
| from packs.core.io import reader | ||
|
|
||
| from packs.types import types | ||
| from hypothesis import given | ||
|
|
@@ -181,3 +187,53 @@ def test_decode_produces_expected_output(config, inpt, output, comparison, MULE_ | |
| assert load_evt_info(save_path).equals(load_evt_info(comparison_path)) | ||
| assert load_rwf_info(save_path, samples).equals(load_rwf_info(comparison_path, samples)) | ||
|
|
||
|
|
||
| @mark.parametrize("config, inpt, output, comparison", [("process_WD1_1channel.conf", "one_channel_WD1.dat", "one_channel_WD1_tmp.h5", "one_channel_WD1.h5")]) | ||
| def test_WD1_decode_produces_expected_output(config, inpt, output, comparison, MULE_dir, data_dir): | ||
| ''' | ||
| This test will be merged with test_decode_produces_expected_output() | ||
| once WD2 processing has been updated to match lazy method of WD1 | ||
| ''' | ||
|
|
||
| # ensure path is correct | ||
| file_path = data_dir + inpt | ||
| save_path = data_dir + output | ||
| comparison_path = data_dir + comparison | ||
| config_path = data_dir + "configs/" + config | ||
|
|
||
| # rewrite paths to files | ||
| cnfg = configparser.ConfigParser() | ||
| cnfg.read(config_path) | ||
| cnfg.set('required', 'file_path', "'" + file_path + "'") # need to add comments around for config reasons | ||
| cnfg.set('required', 'save_path', "'" + save_path + "'") | ||
|
|
||
| with open(config_path, 'w') as cfgfile: | ||
| cnfg.write(cfgfile) | ||
|
|
||
| # run processing pack decode | ||
| run_pack = ['python3', MULE_dir + "/bin/mule", "proc", config_path] | ||
| subprocess.run(run_pack) | ||
|
|
||
| # the event info can be read out like a normal h5, the RWF cannot due to how they're structured | ||
| assert pd.read_hdf(save_path, 'RAW/event_info').equals(pd.read_hdf(comparison_path, 'RAW/event_info')) | ||
| assert [x for x in reader(save_path, 'RAW', 'rwf')] == [x for x in reader(comparison_path, 'RAW', 'rwf')] | ||
|
|
||
|
|
||
| def test_lazy_loading_malformed_data(MULE_dir): | ||
|
||
| ''' | ||
| Test that a file you pass through with no appropriate header is flagged if it's | ||
| not functioning correctly. | ||
| ATM the check for this is: | ||
| - event number goes up +1 events | ||
| - number of samples stays the same across two events | ||
| - timestamp increases between events | ||
| These may not always hold, but will ensure the test works as expected | ||
| ''' | ||
|
|
||
| data_path = MULE_dir + "/packs/tests/data/malformed_data.bin" | ||
|
|
||
| with raises(MalformedHeaderError): | ||
| with open(data_path, 'rb') as file: | ||
| a = process_event_lazy_WD1(file, sample_size = 2) | ||
| next(a) | ||
| next(a) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Guess you mean exception?