Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions packs/configs/process_WD1.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[required]

process = 'decode'
wavedump_edition = 1
file_path = '/path/to/file.bin'
save_path = '/path/to/file.h5'

[optional]

overwrite = True
counts = -1
print_mod = 100
16 changes: 16 additions & 0 deletions packs/core/core_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,19 @@ def check_test(file):
return True
else:
return False

# THIS SHOULD BE MOVED ELSEWHERE
class MalformedHeaderError(Exception):
'''
Header created for when two headers don't match up consecutively.

Created initially for WD1 processing, but should be back-ported for WD2
'''

def __init__(self, header1, header2):
self.header1 = header1
self.header2 = header2

def __str__(self):
return f"MalformedHeaderError: Headers don't output expected result. Ensure the .dat file provided is formatted correctly.\nFirst Header {self.header1}\nSecond Header {self.header2}"

129 changes: 129 additions & 0 deletions packs/core/io.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
import pandas as pd
import numpy as np

import h5py
import ast
import configparser

from contextlib import contextmanager

from typing import Optional
from typing import Generator
from typing import Union
from typing import Tuple

from functools import partial

from packs.types import types

Expand Down Expand Up @@ -99,3 +108,123 @@ def read_config_file(file_path : str) -> dict:
arg_dict[key] = ast.literal_eval(config[section][key])

return arg_dict


@contextmanager
def writer(path : str,
group : str,
overwrite : Optional[bool] = True) -> Generator:
'''
Outer function for a lazy h5 writer that will iteratively write to a dataset, with the formatting:

FILE.h5 -> GROUP/DATASET

Includes overwriting functionality, which will overwrite **GROUPS** at will if needed.

Parameters
----------

path (str) : File path
group (str) : Group within the h5 file
overwrite(bool) : Boolean for overwriting previous dataset (OPTIONAL)

Returns
-------

write (func) : write function described in write()


Fixed size is for when you know the size of the output file, so you set the size
of the df beforehand, saving precious IO operation. The input then becomes a tuple
of (True, DF_SIZE, INDEX), otherwise its false.
'''


# open file if exists, create group or overwrite it
h5f = h5py.File(path, 'a')
try:
if overwrite:
if group in h5f:
del h5f[group]

gr = h5f.require_group(group)

def write(dataset : str,
data : np.ndarray,
fixed_size : Optional[Union[False, Tuple[True, int, int]]] = False) -> None:
'''
Writes ndarray to dataset within group defined in writer().

Fixed size used to speed up writing, if True will
create a dataset of a fixed size rather than
increasing the size iteratively.

Parameters
----------

dataset (str) : Dataset name to write to
data (ndarray) : Data to write*
fixed_size (Union[Bool, Tuple[Bool, int, int]])
: Method that's either enable or disabled.
False (disabled) -> Iteratively increases size of dataframe at runtime
True (enabled) -> Requires Tuple containing
(True, number of events, index to write to)
This method is best seen in action in `process_bin_WD1()`.

* Data should be in a numpy structured array format, as can be seen in WD1 and WD2 processing
'''
if fixed_size is False:
# create dataset if doesnt exist, if does make larger
if dataset in gr:
dset = gr[dataset]
dset.resize((dset.shape[0] + 1, *dset.shape[1:]))
dset[-1] = data
else:
max_shape = (None,) + data.shape
dset = gr.require_dataset(dataset, shape = (1,) + data.shape,
maxshape = max_shape, dtype = data.dtype,
chunks = True)
dset[0] = data
else:
index = fixed_size[2]
# dataset of fixed size
if dataset in gr:
dset = gr[dataset]
else:
dset = gr.require_dataset(dataset, shape = (fixed_size[1],) + data.shape,
maxshape = fixed_size[1], dtype = data.dtype,
chunks = True)
dset[index] = data

yield write

finally:
h5f.close()

def reader(path : str,
group : str,
dataset : str) -> Generator:
'''
A lazy h5 reader that will iteratively read from a dataset, with the formatting:

FILE.H5 -> GROUP/DATASET

Parameters
----------

path (str) : File path
group (str) : Group name within the h5 file
dataset (str) : Dataset name within the group

Returns
-------

row (generator) : Generator object that returns the next row from the dataset upon being called.
'''

with h5py.File(path, 'r') as h5f:
gr = h5f[group]
dset = gr[dataset]

for row in dset:
yield row
4 changes: 3 additions & 1 deletion packs/proc/proc.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os

from packs.core.io import read_config_file
from packs.proc.processing_utils import process_bin_WD2
from packs.proc.processing_utils import process_bin_WD2, process_bin_WD1
from packs.core.core_utils import check_test

def proc(config_file):
Expand All @@ -22,6 +22,8 @@ def proc(config_file):
case 'decode':
if conf_dict['wavedump_edition'] == 2:
process_bin_WD2(**arg_dict)
elif conf_dict['wavedump_edition'] == 1:
process_bin_WD1(**arg_dict)
else:
raise RuntimeError(f"wavedump edition {conf_dict['wavedump_edition']} decoding isn't currently implemented.")
case default:
Expand Down
Loading