nu-ZOO · jwaiton · Feb 20, 2025 · Feb 20, 2025 · Feb 20, 2025 · Feb 20, 2025
diff --git a/packs/configs/process_WD1.conf b/packs/configs/process_WD1.conf
@@ -0,0 +1,12 @@
+[required]
+
+process          = 'decode'
+wavedump_edition = 1
+file_path        = '/path/to/file.bin'
+save_path        = '/path/to/file.h5'
+
+[optional]
+
+overwrite        = True
+counts           = -1  
+print_mod        = 100
diff --git a/packs/core/core_utils.py b/packs/core/core_utils.py
@@ -12,3 +12,19 @@ def check_test(file):
         return True
     else:
         return False
+
+# THIS SHOULD BE MOVED ELSEWHERE
+class MalformedHeaderError(Exception):
+    '''
+    Header created for when two headers don't match up consecutively.
+
+    Created initially for WD1 processing, but should be back-ported for WD2
+    '''
+
+    def __init__(self, header1, header2):
+        self.header1 = header1
+        self.header2 = header2
+
+    def __str__(self):
+        return f"MalformedHeaderError: Headers don't output expected result. Ensure the .dat file provided is formatted correctly.\nFirst Header {self.header1}\nSecond Header {self.header2}"
+
diff --git a/packs/core/io.py b/packs/core/io.py
@@ -1,9 +1,18 @@
 import pandas as pd
+import numpy  as np
 
 import h5py
 import ast
 import configparser
 
+from contextlib import contextmanager
+
+from typing import Optional
+from typing import Generator
+from typing import Union
+from typing import Tuple
+
+from functools import partial
 
 from packs.types import types
 
@@ -99,3 +108,123 @@ def read_config_file(file_path  :  str) -> dict:
             arg_dict[key] = ast.literal_eval(config[section][key])
 
     return arg_dict
+
+
+@contextmanager
+def writer(path        :  str,
+           group       :  str,
+           overwrite   :  Optional[bool] = True) -> Generator:
+    '''
+    Outer function for a lazy h5 writer that will iteratively write to a dataset, with the formatting:
+
+    FILE.h5 -> GROUP/DATASET
+
+    Includes overwriting functionality, which will overwrite **GROUPS** at will if needed.
+
+    Parameters
+    ----------
+
+    path (str)       :  File path
+    group (str)      :  Group within the h5 file
+    overwrite(bool)  :  Boolean for overwriting previous dataset (OPTIONAL)
+
+    Returns
+    -------
+
+    write (func)     : write function described in write()
+
+
+    Fixed size is for when you know the size of the output file, so you set the size
+    of the df beforehand, saving precious IO operation. The input then becomes a tuple
+    of (True, DF_SIZE, INDEX), otherwise its false.
+    '''
+
+
+    # open file if exists, create group or overwrite it
+    h5f = h5py.File(path, 'a')
+    try:
+        if overwrite:
+            if group in h5f:
+                del h5f[group]
+
+        gr  = h5f.require_group(group)
+
+        def write(dataset     :  str,
+                  data        :  np.ndarray,
+                  fixed_size  :  Optional[Union[False, Tuple[True, int, int]]] = False) -> None:
+            '''
+            Writes ndarray to dataset within group defined in writer().
+
+            Fixed size used to speed up writing, if True will 
+            create a dataset of a fixed size rather than 
+            increasing the size iteratively.
+
+            Parameters
+            ----------
+
+            dataset (str)       :  Dataset name to write to
+            data (ndarray)      :  Data to write*
+            fixed_size (Union[Bool, Tuple[Bool, int, int]])
+                                :  Method that's either enable or disabled.
+                                     False (disabled) -> Iteratively increases size of dataframe at runtime
+                                     True  (enabled)  -> Requires Tuple containing 
+                                                            (True, number of events, index to write to)
+                                   This method is best seen in action in `process_bin_WD1()`.
+
+            * Data should be in a numpy structured array format, as can be seen in WD1 and WD2 processing
+            '''
+            if fixed_size is False:
+                # create dataset if doesnt exist, if does make larger
+                if dataset in gr:
+                    dset = gr[dataset]
+                    dset.resize((dset.shape[0] + 1, *dset.shape[1:]))
+                    dset[-1] = data
+                else:
+                    max_shape = (None,) + data.shape
+                    dset = gr.require_dataset(dataset, shape = (1,) + data.shape,
+                                              maxshape = max_shape, dtype = data.dtype,
+                                              chunks = True)
+                    dset[0] = data
+            else:
+                index = fixed_size[2]
+                # dataset of fixed size
+                if dataset in gr:
+                    dset = gr[dataset]
+                else:
+                    dset = gr.require_dataset(dataset, shape = (fixed_size[1],) + data.shape,
+                                              maxshape = fixed_size[1], dtype = data.dtype,
+                                              chunks = True)
+                dset[index] = data
+
+        yield write
+
+    finally:
+        h5f.close()
+
+def reader(path     :  str,
+           group    :  str,
+           dataset  :  str) -> Generator:
+    '''
+    A lazy h5 reader that will iteratively read from a dataset, with the formatting:
+
+    FILE.H5 -> GROUP/DATASET
+
+    Parameters
+    ----------
+
+    path (str)       :  File path
+    group (str)      :  Group name within the h5 file
+    dataset (str)    :  Dataset name within the group
+
+    Returns
+    -------
+
+    row (generator)  :  Generator object that returns the next row from the dataset upon being called.
+    '''
+
+    with h5py.File(path, 'r') as h5f:
+        gr = h5f[group]
+        dset = gr[dataset]
+
+        for row in dset:
+            yield row
diff --git a/packs/proc/proc.py b/packs/proc/proc.py
@@ -1,7 +1,7 @@
 import os
 
 from packs.core.io                import read_config_file
-from packs.proc.processing_utils  import process_bin_WD2
+from packs.proc.processing_utils  import process_bin_WD2, process_bin_WD1
 from packs.core.core_utils        import check_test
 
 def proc(config_file):
@@ -22,6 +22,8 @@ def proc(config_file):
         case 'decode':
             if conf_dict['wavedump_edition'] == 2:
                 process_bin_WD2(**arg_dict)
+            elif conf_dict['wavedump_edition'] == 1:
+                process_bin_WD1(**arg_dict)
             else:
                 raise RuntimeError(f"wavedump edition {conf_dict['wavedump_edition']} decoding isn't currently implemented.")
         case default: