nu-ZOO · jwaiton · Apr 20, 2025 · Apr 20, 2025 · Apr 20, 2025 · Apr 20, 2025
diff --git a/packs/core/core_utils.py b/packs/core/core_utils.py
@@ -12,3 +12,19 @@ def check_test(file):
         return True
     else:
         return False
+
+
+# THIS SHOULD BE MOVED ELSEWHERE
+class MalformedHeaderError(Exception):
+    '''
+    Header created for when two headers don't match up consecutively.
+    Created initially for WD1 processing, but should be back-ported for WD2
+    '''
+
+    def __init__(self, header1, header2):
+        self.header1 = header1
+        self.header2 = header2
+
+    def __str__(self):
+        return f"MalformedHeaderError: Headers don't output expected result. Ensure the .dat file provided is formatted correctly.\nFirst Header {self.header1}\nSecond Header {self.header2}"
+
diff --git a/packs/proc/proc.py b/packs/proc/proc.py
@@ -2,6 +2,7 @@
 
 from packs.core.io                import read_config_file
 from packs.proc.processing_utils  import process_bin_WD2
+from packs.proc.processing_utils  import process_bin_WD1
 from packs.core.core_utils        import check_test
 
 def proc(config_file):
@@ -22,6 +23,8 @@ def proc(config_file):
         case 'decode':
             if conf_dict['wavedump_edition'] == 2:
                 process_bin_WD2(**arg_dict)
+            elif conf_dict['wavedump_edition'] == 1:
+                process_bin_WD1(**arg_dict)
-            if conf_dict['wavedump_edition'] == 2:
-                process_bin_WD2(**arg_dict)
-            elif conf_dict['wavedump_edition'] == 1:
-                process_bin_WD1(**arg_dict)
+if conf_dict['wavedump_edition']   == 2:
+    process_bin_WD2(**arg_dict)
+elif conf_dict['wavedump_edition'] == 1:
+    process_bin_WD1(**arg_dict)
-            if conf_dict['wavedump_edition'] == 2:
-                process_bin_WD2(**arg_dict)
-            elif conf_dict['wavedump_edition'] == 1:
-                process_bin_WD1(**arg_dict)
+if conf_dict['wavedump_edition']   == 2:
+    process_bin_WD2(**arg_dict)
+elif conf_dict['wavedump_edition'] == 1:
+    process_bin_WD1(**arg_dict)
             else:
                 raise RuntimeError(f"wavedump edition {conf_dict['wavedump_edition']} decoding isn't currently implemented.")
         case default:

diff --git a/packs/proc/processing_utils.py b/packs/proc/processing_utils.py
@@ -14,6 +14,8 @@
 
 # imports start from MULE/
 from packs.core.core_utils import flatten
+from packs.core.core_utils import MalformedHeaderError
+from packs.core.io import writer
 from packs.types import types
 
 """
@@ -369,13 +371,130 @@ def check_save_path(save_path  :  str,
     return save_path
 
 
+def process_event_lazy_WD1(file_object  :  BinaryIO,
+                           sample_size  :  int):
+
+    '''
+    WAVEDUMP 1: Generator that outputs each event iteratively from an opened binary file
+    Parameters
+    ----------
+        file_object  (obj)  :  Opened file object
+        sample_size  (int)  :  Time difference between each sample in waveform (2ns for V1730B digitiser)
+    Returns
+    -------
+        data  (generator)  :  Generator object containing one event's worth of data
+                              across each event
+    '''
+
+    # read first header
+    header = np.fromfile(file_object, dtype = 'i', count = 6)
+
+    # header to check against
+    sanity_header = header.copy()
+
+    # continue only if data exists
+    while len(header) > 0:
+
+        # alter header to match expected size
+        header[0] = header[0] - 24
+        event_size = header[0] // sample_size
+
+        # collect waveform, no of samples and timestamp
+        yield (np.fromfile(file_object, dtype = np.dtype('<H'), count = event_size), event_size, header[-1])
+
+        # collect next header
+        header = np.fromfile(file_object, dtype = 'i', count = 6)
+
+        # check if header has correct number of elements and correct information ONCE.
+        if sanity_header is not None:
+            if len(header) == 6:
+                if all([header[0] == sanity_header[0], # event size
+                    header[4] == sanity_header[4] + 1,  # event number +1
+                    header[5] > sanity_header[5]        # timestamp increases
+                    ]):
+                    sanity_header = None
+                else:
+                    raise MalformedHeaderError(sanity_header, header)
+            else:
+                raise MalformedHeaderError(sanity_header, header)
+    print("Processing Finished!")
+
+
+def process_bin_WD1(file_path    :  str,
+                    save_path    :  str,
+                    sample_size  :  int,
+                    overwrite    :  Optional[bool] = False,
+                    print_mod    :  Optional[int] = -1):
+
+    '''
+    WAVEDUMP 1: Takes a binary file and outputs the containing information in a h5 file.
+    This only works for individual channels at the moment, as wavedump 1 saves each channel
+    as a separate file.
+    For particularly large waveforms/number of events. You can 'chunk' the data such that
+    each dataset holds `counts` events.
+    # Makeup of the header (header[n]) where n is:
+    # 0 - event size (ns in our case, with extra 24 samples)
+    # 1 - board ID
+    # 2 - pattern (not sure exactly what this means)
+    # 3 - board channel
+    # 4 - event counter
+    # 5 - Time-tag for the trigger
+    # Each of which is a signed 4byte integer
+    Parameters
+    ----------
+        file_path    (str)   :  Path to binary file
+        save_path    (str)   :  Path to saved file
+        sample_size  (int)   :  Size of each sample in an event (2 ns in the case of V1730B digitiser)
+        overwrite    (bool)  :  Boolean for overwriting pre-existing files
+        counts       (int)   :  The number of events per chunks. -1 implies no chunking of data.
+    Returns
+    -------
+        None
+    '''
+
+
+    # lets build it here first and break it up later
+    # destroy the group within the file if you're overwriting
+    save_path = check_save_path(save_path, overwrite)
+    print(save_path)
+
+
+    # open file for reading
+    with open(file_path, 'rb') as file:
+
+        # open writer object
+        with writer(save_path, 'RAW', overwrite) as write:
+
+            for i, (waveform, samples, timestamp) in enumerate(process_event_lazy_WD1(file, sample_size)):
+
+                if (i % print_mod == 0) and (print_mod != -1):
+                    print(f"Event {i}")
+
+                # enforce stucture upon data
+                e_dtype = types.event_info_type
+                wf_dtype = types.rwf_type_WD1(samples)
-                e_dtype = types.event_info_type
-                wf_dtype = types.rwf_type_WD1(samples)
+                e_dtype  = types.event_info_type
+                wf_dtype = types.rwf_type_WD1(samples)
-                e_dtype = types.event_info_type
-                wf_dtype = types.rwf_type_WD1(samples)
+                e_dtype  = types.event_info_type
+                wf_dtype = types.rwf_type_WD1(samples)
+
+                event_info = np.array((i, timestamp, samples, sample_size, 1), dtype = e_dtype)
+                waveforms = np.array((i, 0, waveform), dtype = wf_dtype)
+
+                # first run-through, collect the header information to extract table size
+                if i == 0:
+                    file_size     = os.path.getsize(file_path)
+                    waveform_size = (samples * 2) + (4*6)
+                    num_of_events = int(file_size / waveform_size)
+
+                # add data to df lazily
+                write('event_info', event_info, (True, num_of_events, i))
+                write('rwf', waveforms, (True, num_of_events, i))
+
+
 def process_bin_WD2(file_path  :  str,
                     save_path  :  str,
                     overwrite  :  Optional[bool] = False,
                     counts     :  Optional[int]  = -1):
 
     '''
-    Takes a binary file and outputs the containing waveform information in a h5 file.
+    WAVEDUMP 2: Takes a binary file and outputs the containing waveform information in a h5 file.
 
     For particularly large waveforms/number of events. You can 'chunk' the data such that
     each dataset holds `counts` events.

diff --git a/packs/tests/data/configs/process_WD1.conf b/packs/tests/data/configs/process_WD1.conf
@@ -0,0 +1,12 @@
+[required]
+
+process          = 'decode'
+wavedump_edition = 1
+file_path        = '/path/to/file.bin'
+save_path        = '/path/to/file.h5'
+
+[optional]
+
+overwrite        = True
+counts           = -1  
+print_mod        = 100
diff --git a/packs/tests/data/configs/process_WD1_1channel.conf b/packs/tests/data/configs/process_WD1_1channel.conf
@@ -0,0 +1,11 @@
+[required]
+process = 'decode'
+wavedump_edition = 1
+file_path = '/home/casper/Documents/MULE/packs/tests/data/one_channel_WD1.dat'
+save_path = '/home/casper/Documents/MULE/packs/tests/data/one_channel_WD1_tmp.h5'
+sample_size = 2
+
+[optional]
+overwrite = True
+print_mod = 100
+
diff --git a/packs/tests/data/one_channel_WD1.dat b/packs/tests/data/one_channel_WD1.dat
diff --git a/packs/tests/data/one_channel_WD1.h5 b/packs/tests/data/one_channel_WD1.h5
diff --git a/packs/tests/processing_test.py b/packs/tests/processing_test.py
@@ -1,3 +1,4 @@
+import os
 import sys
 
 import numpy as np
@@ -11,6 +12,8 @@
 from pytest                        import warns
 from pytest                        import fixture
 
+from packs.proc.processing_utils   import process_event_lazy_WD1
+from packs.proc.processing_utils   import process_bin_WD1
 from packs.proc.processing_utils   import read_defaults_WD2
 from packs.proc.processing_utils   import process_header
 from packs.proc.processing_utils   import read_binary
@@ -22,8 +25,11 @@
 from packs.types.types             import rwf_type
 from packs.types.types             import event_info_type
 
+from packs.core.core_utils         import MalformedHeaderError
+
 from packs.core.io                 import load_rwf_info
 from packs.core.io                 import load_evt_info
+from packs.core.io                 import reader
 
 from packs.types                   import types
 from hypothesis                    import given
@@ -181,3 +187,53 @@ def test_decode_produces_expected_output(config, inpt, output, comparison, MULE_
     assert load_evt_info(save_path).equals(load_evt_info(comparison_path))
     assert load_rwf_info(save_path, samples).equals(load_rwf_info(comparison_path, samples))
 
+
+@mark.parametrize("config, inpt, output, comparison", [("process_WD1_1channel.conf", "one_channel_WD1.dat", "one_channel_WD1_tmp.h5", "one_channel_WD1.h5")])
+def test_WD1_decode_produces_expected_output(config, inpt, output, comparison, MULE_dir, data_dir):
+    '''
+    This test will be merged with test_decode_produces_expected_output()
+    once WD2 processing has been updated to match lazy method of WD1
+    '''
+
+    # ensure path is correct
+    file_path       = data_dir + inpt
+    save_path       = data_dir + output
+    comparison_path = data_dir + comparison
+    config_path     = data_dir + "configs/" + config
+
+    # rewrite paths to files
+    cnfg = configparser.ConfigParser()
+    cnfg.read(config_path)
+    cnfg.set('required', 'file_path', "'" +  file_path + "'") # need to add comments around for config reasons
+    cnfg.set('required', 'save_path', "'" +  save_path + "'")
+
+    with open(config_path, 'w') as cfgfile:
+        cnfg.write(cfgfile)
+
+    # run processing pack decode
+    run_pack = ['python3', MULE_dir + "/bin/mule", "proc", config_path]
+    subprocess.run(run_pack)
+
+    # the event info can be read out like a normal h5, the RWF cannot due to how they're structured
+    assert pd.read_hdf(save_path, 'RAW/event_info').equals(pd.read_hdf(comparison_path, 'RAW/event_info'))
+    assert [x for x in reader(save_path, 'RAW', 'rwf')] == [x for x in reader(comparison_path, 'RAW', 'rwf')]
+
+
+def test_lazy_loading_malformed_data(MULE_dir):
+    '''
+    Test that a file you pass through with no appropriate header is flagged if it's
+    not functioning correctly.
+    ATM the check for this is:
+    - event number goes up +1 events
+    - number of samples stays the same across two events
+    - timestamp increases between events
+    These may not always hold, but will ensure the test works as expected
+    '''
+
+    data_path = MULE_dir + "/packs/tests/data/malformed_data.bin"
+
+    with raises(MalformedHeaderError):
+        with open(data_path, 'rb') as file:
+            a = process_event_lazy_WD1(file, sample_size = 2)
+            next(a)
+            next(a)
diff --git a/packs/types/types.py b/packs/types/types.py
@@ -31,6 +31,15 @@ def rwf_type(samples  :  int) -> np.dtype:
             ('rwf', np.float32, (samples,))
         ])
 
+def rwf_type_WD1(samples  :  int) -> np.dtype:
+    '''
+    WAVEDUMP 1: Generates the data-type for raw waveforms
+    '''
+
+    return np.dtype([('event_number', int),
+                     ('channels', int),
+                     ('rwf', np.uint16, (samples))])
+
 
 def generate_wfdtype(channels, samples):
     '''