diff --git a/.gitignore b/.gitignore index 82f9275..7c146c8 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,9 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + + +# ignore the new .txt files generated in test +/packs/tests/data/repetitive_data/test_*.txt +# temporary h5 files get ignored +*tmp.h5 \ No newline at end of file diff --git a/MULE_environment.yml b/MULE_environment.yml index a41f81b..fcd0da9 100644 --- a/MULE_environment.yml +++ b/MULE_environment.yml @@ -2,23 +2,23 @@ name: MULE-3.12-10-24 channels: - defaults dependencies: - - python=3.12 - - numpy - - iminuit - - matplotlib - - seaborn - - tables - - h5py - - pandas - - tqdm - - scipy - - cython - - pytables - - coverage - - pytest - - hypothesis - - jupyterlab - - jupyter - - flaky - - sphinx + - python = 3.12 + - numpy = 1.26.4 + - iminuit = 2.18.0 + - matplotlib = 3.9.2 + - seaborn = 0.13.2 + - tables = 3.9.2 + - h5py = 3.11.0 + - pandas = 2.2.2 + - tqdm = 4.66.5 + - scipy = 1.13.1 + - cython = 3.0.11 + - pytables = 3.10.1 + - coverage = 7.6.1 + - pytest = 7.4.4 + - hypothesis = 6.111.0 + - jupyterlab = 4.2.5 + - jupyter = 1.0.0 + - flaky = 3.7.0 + - sphinx = 7.3.7 prefix: /home/e78368jw/anaconda3/envs/MULE-3.12-10-24 diff --git a/bin/mule b/bin/mule index 992c0b0..5f14110 100755 --- a/bin/mule +++ b/bin/mule @@ -27,10 +27,10 @@ MULE pack runtime executable Use 'mule --help' for more information ======================================''', formatter_class=argparse.RawTextHelpFormatter) -parser.add_argument("pack", choices = ['acq','proc', 'test'], help = '''The pack implemented: - acq - Acquisition of data using wavedump 1 - proc - Processing of data - test - Testing directory (IGNORE) +parser.add_argument("pack", choices = ['acq','proc', 'tests'], help = '''The pack implemented: + acq - Acquisition of data using wavedump 1 + proc - Processing of data + tests - Testing directory (IGNORE) ''') parser.add_argument("config", help = 'The config file provided to the pack, this differs based on which pack youre using') # acquire arguments @@ -46,4 +46,4 @@ except ModuleNotFoundError: traceback.print_exc() exit(1) else: - pack() + pack(args.config) diff --git a/packs/acq/acq.py b/packs/acq/acq.py index 6f16008..127419f 100644 --- a/packs/acq/acq.py +++ b/packs/acq/acq.py @@ -1,3 +1,10 @@ -def acq(): +from packs.core.core_utils import check_test + + +def acq(config_file): print("This works as expected: acquisition") print("In here you should read the config provided") + + if check_test(config_file): + return + diff --git a/packs/configs/process_WD2_3channel.conf b/packs/configs/process_WD2_3channel.conf new file mode 100644 index 0000000..07190a6 --- /dev/null +++ b/packs/configs/process_WD2_3channel.conf @@ -0,0 +1,11 @@ +[required] + +process = 'decode' +wavedump_edition = 2 +file_path = '/path/to/file.bin' +save_path = '/path/to/file.h5' + +[optional] + +overwrite = True +counts = -1 \ No newline at end of file diff --git a/packs/core/__init__.py b/packs/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/packs/core/core_utils.py b/packs/core/core_utils.py new file mode 100644 index 0000000..4bb9ea2 --- /dev/null +++ b/packs/core/core_utils.py @@ -0,0 +1,14 @@ +def flatten(xss): + ''' + Flattens a 2D list + eg: [[0,1,2,3], [4,5,6,7]] -> [0,1,2,3,4,5,6,7] + ''' + return [x for xs in xss for x in xs] + +def check_test(file): + # quick check for test config + if file == "test_config": + print("Test config executable run successfully") + return True + else: + return False diff --git a/packs/core/io.py b/packs/core/io.py new file mode 100644 index 0000000..6c7b2d0 --- /dev/null +++ b/packs/core/io.py @@ -0,0 +1,101 @@ +import pandas as pd + +import h5py +import ast +import configparser + + +from packs.types import types + + +def load_evt_info(file_path, merge = False): + ''' + Loads in a processed WD .h5 file as pandas DataFrame, extracting event information tables. + + Parameters + ---------- + + file_path (str) : Path to saved data + merge (bool) : Flag for merging chunked data + + Returns + ------- + + (pd.DataFrame) : Dataframe of event information + ''' + + h5_data = [] + with h5py.File(file_path) as f: + # extract event info + evt_info = f.get('event_information') + for i in evt_info.keys(): + q = evt_info.get(str(i)) + for j in q: + h5_data.append(j) + + + return pd.DataFrame(map(list, h5_data), columns = (types.event_info_type).names) + + +def load_rwf_info(file_path : str, + samples : int) -> list: + ''' + Loads in a processed WD .h5 file as pandas dataframe, extracting raw waveform tables. + Samples must be provided, and can be found using `load_evt_info()`. + + Parameters + ---------- + + file_path (str) : Path to saved data + samples (int) : Number of samples in each raw waveform + + Returns + ------- + + (pd.DataFrame) : Dataframe of raw waveform information + ''' + h5_data = [] + with h5py.File(file_path) as f: + rwf_info = f.get('rwf') + for i in rwf_info.keys(): + q = rwf_info.get(str(i)) + for j in q: + h5_data.append(j) + + return pd.DataFrame(map(list, h5_data), columns = (types.rwf_type(samples)).names) + + +def read_config_file(file_path : str) -> dict: + ''' + Read config file passed in via 'mule' and extract relevant information for pack. + Example: + + >> mule proc config.conf + + This function collects the relevant information from `config.conf` and passes it to the `proc` pack. + + Parameters + ---------- + + file_path (str) : Path to config file + + Returns + ------- + + arg_dict (dict) : Dictionary of relevant arguments for the pack + ''' + # setup config parser + config = configparser.ConfigParser() + + # read in arguments, require the required ones + config.read(file_path) + arg_dict = {} + for section in config.sections(): + for key in config[section]: + # the config should be written in such a way that the python evaluator + # can determine its type + # + # we can setup stricter rules at some other time + arg_dict[key] = ast.literal_eval(config[section][key]) + + return arg_dict diff --git a/packs/core/processing_utils.py b/packs/core/processing_utils.py deleted file mode 100644 index 232bc49..0000000 --- a/packs/core/processing_utils.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -Processing utilities - -This file holds all the relevant functions for processing data from WaveDump 1/2 into -the h5 format. -""" - - - - -def raw_to_h5_WD1(): - ''' - Takes binary files data files (.dat) produced using Wavedump 1 - and decodes them into waveforms, that are then inserted into - pandas dataframes. - - These dataframes can then be saved as h5 files for further use. - - Args: - PATH (str) : File path of interest - save_h5 (bool) : Flag for saving data - verbose (bool) : Flag for outputting information - print_mod (int) : Print modifier - - Returns: - data (int 2D array) : 2D array of events - First element defines event - Second element defines ADC value - ''' - - # Makeup of the header (array[n]) where n is: - # 0 - event size (ns in our case, with extra 24 samples) - # 1 - board ID - # 2 - pattern (not sure exactly what this means) - # 3 - board channel - # 4 - event counter - # 5 - Time-tag for the trigger - - # Output data is a collection of ints defined in size - # by (event size - 24) // 2 - - file = open(PATH, 'rb') - data = [] - - print("File open! Processing...") - # Collect data, while true loops are always dangerous but lets ignore that here :) - while (True): - - # take the header information from the file (first 6 elements) - array = np.fromfile(file, dtype='i', count=6) - - # breaking condition - if len(array) == 0: - print("Processing finished! Saving...") - break - - # printing events - if (array[4] % int(print_mod) == 0): - print("Event {}".format(array[4])) - - # verbose check - if (verbose == True): - array_tag = ['event size (ns)', 'board ID', 'pattern', 'board channel', 'event counter', 'trigger tag'] - for i in range(len(array)): - print("{}: {}".format(array_tag[i], array[i])) - - - - # alter event size to the samples - array[0] = array[0] - 24 - - # collect event - event_size = array[0] // 2 - - int16bit = np.dtype(' (int, int, int, int): + ''' + Provided with an open WD2 binary file, will provide the header information. + + Parameters + ---------- + + file (BufferedReader) : Opened file + byte_order (str) : Byte order + + Returns + ------- + + event_number (int) : First event number extracted from file + timestamp (int) : Timestamp of first event + samples (int) : Number of samples + sampling_period (int) : The time value of 1 sample in ns + ''' + + event_number = int.from_bytes(file.read(4), byteorder=byte_order) + timestamp = int.from_bytes(file.read(8), byteorder=byte_order) + samples = int.from_bytes(file.read(4), byteorder=byte_order) + sampling_period = int.from_bytes(file.read(8), byteorder=byte_order) + + return (event_number, timestamp, samples, sampling_period) + + +def process_header(file_path : str, + byte_order : Optional[str] = None) -> (np.dtype, int, int, int): + ''' + Collect the relevant information from the file's header, and determine if its valid + + Header is formatted for WD2 as shown: + Event number -> uint32 (4 bytes) + Timestamp -> uint64 (8 bytes) + Samples -> uint32 (4 bytes) + Sampling Period -> uint64 (8 bytes) + (OPTIONAL) + Channels -> int32 (8 bytes) + + Waveform data is 4-byte float (float32). + + This extra optional channel poses problems, so need to consider it. + The rest are all as expected. + + The `byte_order` should generally be left alone, but I have left it as an optional argument + as there may be situations in which the data is recorded as little-endian and the computer you're + processing it on is big-endian. + + Parameters + ---------- + + file_path (str) : Path to binary file + byte_order (str) : Byte order + + Returns + ------- + + wdtype (ndtype) : Custom data type for extracting information from + binary files + samples (int) : Number of samples per event + sampling_period (int) : The time value of 1 sample in ns + channels (int) : Number of channels in the data + ''' + + # ensure you're using the right byteorder defined by your machine. + # If you take the data from one machine to another of differing endianness, + # you may have issues here! + if byte_order == None: + warnings.warn("Warning: No byte order provided. This may cause issues if transferring data between machines.") + byte_order = sys.byteorder + elif (byte_order != 'little') and (byte_order != 'big'): + raise NameError(f'Invalid byte order provided: {byte_order}. Please provide the correct byte order for your machine.') + + # open file + file = open(file_path, 'rb') + + event_number, timestamp, samples, sampling_period = read_defaults_WD2(file, byte_order) + # attempt to read channels + channels = int.from_bytes(file.read(4), byteorder=byte_order) + + # then read in a full collection of data, and see if the following header makes sense. + # if it explicitly breaks, assume 1 channel, raise a warning and continue. + try: + dataset = file.read(4*samples*channels) + event_number_1, timestamp_1, samples_1, sampling_period_1 = read_defaults_WD2(file, byte_order) + except MemoryError as e: + warnings.warn("process_header() unable to read file, defaulting to 1-channel description.\nIf this is not what you expect, please ensure your data was collected correctly.") + event_number_1 = -1 + samples_1 = -1 + sampling_period_1 = -1 + + # check that event header is as expected + if (event_number_1 -1 == event_number) and (samples_1 == samples) and sampling_period_1 == (sampling_period): + print(f"{channels} channels detected. Processing accordingly...") + else: + print(f"Single channel detected. If you're expecting more channels, something has gone wrong.\nProcessing accordingly...") + channels = 1 + + file.close() + + # this is a check to ensure that if you've screwed up the acquisition, it warns you adequately + if samples == 0: + raise RuntimeError(r"Unable to decode raw waveforms that have sample size zero. In wavedump 2, when collecting data from a single channel make sure that 'multiple channels per file' isn't checked.") + + # collect data types + wdtype = types.generate_wfdtype(channels, samples) + return wdtype, samples, sampling_period, channels + + +def read_binary(file : BinaryIO, + wdtype : np.dtype, + counts : Optional[int] = -1, + offset : Optional[int] = 0) -> np.ndarray: + ''' + Reads the binary in with the expected format/offset + + Parameters + ---------- + + file (BufferedReader) : Opened file + wdtype (ndtype) : Custom data type for extracting information from + binary files + counts (int) : How many events you want to read in. -1 sets it to take all events. + offset (int) : Offset at which to start reading the data. Used for chunking purposes + and so should by default be set to zero if not chunking. + + Returns + ------- + data (ndarray) : Unformatted data from binary file + + ''' + # be aware, you're passing through the open file object + data = np.fromfile(file, dtype=wdtype, count = counts, offset = offset) + + return data + +def format_wfs(data : np.ndarray, + wdtype : np.dtype, + samples : int, + channels : int) -> (np.ndarray, np.ndarray): + ''' + Formats the data for saving purposes. + + Parameters + ---------- + + data (ndarray) : Unformatted data from binary file + wdtype (ndtype) : Custom data type for extracting information from + unformatted data + samples (int) : Number of samples in each waveform list + channels (int) : The first event number in the file (generally) + + Returns + ------- + event_information (ndarray) : Reformatted event information + waveform (ndarray) : Reformatted waveforms + + ''' + # remove data component of dtype for event_information table + e_dtype = types.event_info_type + # if only one channel, select relevant information. Otherwise, split event by channel + if channels == 1: + event_information = [list(data[i])[:4] for i in range(len(data))] + # add channel = 1 for each row + [x.append(1) for x in event_information] + waveform = [[(data[j][0], 0, list(data[j])[-i:][0]) for i in reversed(range(1, channels+1))] for j in range(len(data))] + else: + event_information = [list(data[i])[:5] for i in range(len(data))] + waveform = [[(data[j][0], data[j][4] - i, list(data[j])[-i:][0]) for i in reversed(range(1, channels+1))] for j in range(len(data))] + + # convert to list of tuples and then structured numpy array + event_information = list(map(tuple, event_information)) + event_information = np.array(event_information, dtype = e_dtype) + waveform = np.array(flatten(waveform), dtype = types.rwf_type(samples)) + + return event_information, waveform + +def save_data(event_information : np.ndarray, + rwf : np.ndarray, + save_path : str, + event_number : Optional[int] = 0): + ''' + Produces the h5 files given the event information and raw waveforms + + Parameters + ---------- + + event_information (ndarray) : Event information from the binary file + rwf (ndarray) : Raw waveforms from the binary file + save_path (str) : Path to saved file + event_number (int) : The first event number in the file (generally) + + Returns + ------- + + None + + ''' + try: + # check if first set of events, if so 'w', otherwise 'a' + if event_number == 0: + h5f = h5py.File(save_path, 'w') + evt_info = h5f.create_group('event_information') + rwf_grp = h5f.create_group('rwf') + else: + h5f = h5py.File(save_path, 'a') + # creates groups if they dont exist + evt_info = h5f.require_group('event_information') + rwf_grp = h5f.require_group('rwf') + + evt_info.create_dataset('ei_' + str(event_number), data=event_information) + # write waveforms + rwf_grp.create_dataset('rwf_' + str(event_number), data=rwf) + + finally: + # `finally` will always run regardless of what happens in the `try` case + # even if an error occurs, so the file close is here to ensure no matter + # what happens, the file doesn't stay open. + h5f.close() + + + +def check_save_path(save_path : str, + overwrite : bool): + ''' + Checks that the save_path is valid/doesn't already exist and if it does, other `overwrite` it + or create an additional file with a number added. + + Parameters + ---------- + + save_path (str) : Path to saved file + overwrite (bool) : Boolean for overwriting pre-existing files + + Returns + ------- + save_path (str) : Valid path to saved file, either unmodified or altered to add '_N' + where N is number of loops it had to do before finding a valid N + + ''' + + name, ext = os.path.splitext(save_path) + counter = 1 + + if overwrite == False: + while os.path.exists(save_path): + save_path = name + str(counter) + ext + counter += 1 + if counter > 100: + raise RuntimeError("Writing to file went over 100 loops to find a unique name. Sort out your files!") + + return save_path + + +def process_bin_WD2(file_path : str, + save_path : str, + overwrite : Optional[bool] = False, + counts : Optional[int] = -1): + + ''' + Takes a binary file and outputs the containing waveform information in a h5 file. + + For particularly large waveforms/number of events. You can 'chunk' the data such that + each dataset holds `counts` events. + + Parameters + ---------- + + file_path (str) : Path to binary file + save_path (str) : Path to saved file + overwrite (bool) : Boolean for overwriting pre-existing files + counts (int) : The number of events per chunks. -1 implies no chunking of data. + + Returns + ------- + None + ''' + + # Ensure save path is clear + save_path = check_save_path(save_path, overwrite) + print(f'\nData input : {file_path}\nData output : {save_path}') + + # collect binary information + wdtype, samples, sampling_period, channels = process_header(file_path) + + # create header length (bytes) for processing + if channels == 1: + header_size = 24 + else: + header_size = 28 + + # Process data chunked or unchunked + if counts == -1: + print("No chunking selected...") + # read in data + with open(file_path, 'rb') as file: + data = read_binary(file, wdtype) + + # format_data + event_info, rwf = format_wfs(data, wdtype, samples, channels) + + # save data + save_data(event_info, rwf, save_path) + else: + print(f"Chunking by {counts}...") + # collect data into dataframes based on desired splitting + counter = 0 + while True: + with open(file_path, 'rb') as file: + # create offset equivalent to size of each chunk multiplied + # by number of events already passed, and read data + offset = (counter*samples*channels*4) + (header_size * counter) + data = read_binary(file_path, wdtype, counts, offset) + + # check binary has content in it + if len(data) == 0: + print("Processing Finished!") + return True + + # format_data + event_info, rwf = format_wfs(data, wdtype, samples, channels) + + # save data + save_data(event_info, rwf, save_path, counter) + counter += (counts) + diff --git a/packs/test/setup_test.py b/packs/test/setup_test.py deleted file mode 100644 index a0a9ade..0000000 --- a/packs/test/setup_test.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -import subprocess - -from pytest import mark -from pytest import raises - - - -@mark.parametrize("pack", ["acq", "proc", "test"]) -def test_executable_runs_successfully(pack): - ''' - This test is made to check if the current executable method for - `bin/mule` to work as intended, accessing the relevant files when run. - ''' - bin_dir = str(os.environ['MULE_DIR']) - # config will need to be improved - run_pack = ["python3", bin_dir + "/bin/mule", str(pack), "config"] - - # ensure output is successful (no errors) - - # this should be more complex in the future, such as if the config (which - # will be a default for the tests) returns the test flag return a number, or - # something in particular from subprocess - assert subprocess.run(run_pack).returncode == 0 - - -def test_incorrect_pack_returns_error(): - bin_dir = str(os.environ['MULE_DIR']) - - # give an incorrect pack - run_pack = ["python3", bin_dir + "/bin/mule", "donkey", "config"] - - with raises(subprocess.CalledProcessError): - subprocess.run(run_pack, check = True) diff --git a/packs/test/test.py b/packs/test/test.py deleted file mode 100644 index 834ecf4..0000000 --- a/packs/test/test.py +++ /dev/null @@ -1,3 +0,0 @@ -def test(): - print("This works as expected: testing") - print("In here you should read the config provided") \ No newline at end of file diff --git a/packs/tests/data/configs/empty_entry.conf b/packs/tests/data/configs/empty_entry.conf new file mode 100644 index 0000000..965165a --- /dev/null +++ b/packs/tests/data/configs/empty_entry.conf @@ -0,0 +1,9 @@ +[required] + +test_1 = +test_2 = 6.03 + +[optional] + +test_3 = 5 +test_4 = True \ No newline at end of file diff --git a/packs/tests/data/configs/incorrect_format.conf b/packs/tests/data/configs/incorrect_format.conf new file mode 100644 index 0000000..fad24da --- /dev/null +++ b/packs/tests/data/configs/incorrect_format.conf @@ -0,0 +1,8 @@ +[required] + +test_1 = 'a string' +test_2 = 6.03 + +[optional] + +5 diff --git a/packs/tests/data/configs/malformed_header.conf b/packs/tests/data/configs/malformed_header.conf new file mode 100644 index 0000000..7883f5b --- /dev/null +++ b/packs/tests/data/configs/malformed_header.conf @@ -0,0 +1,9 @@ +[required + +test_1 = 'a string' +test_2 = 6.03 + +[optional] + +test_3 = 5 +test_4 = True \ No newline at end of file diff --git a/packs/tests/data/configs/nonexistent_WD_version.conf b/packs/tests/data/configs/nonexistent_WD_version.conf new file mode 100644 index 0000000..e3f1607 --- /dev/null +++ b/packs/tests/data/configs/nonexistent_WD_version.conf @@ -0,0 +1,9 @@ +[required] +process = 'decode' +wavedump_edition = 984 +file_path = 'test_file.bin' +save_path = 'test_file.h5' + +[optional] +overwrite = True + diff --git a/packs/tests/data/configs/nonexistent_process.conf b/packs/tests/data/configs/nonexistent_process.conf new file mode 100644 index 0000000..edaa14f --- /dev/null +++ b/packs/tests/data/configs/nonexistent_process.conf @@ -0,0 +1,9 @@ +[required] +process = 'make_balloons' +wavedump_edition = 2 +file_path = 'test_file.bin' +save_path = 'test_file.h5' + +[optional] +overwrite = True + diff --git a/packs/tests/data/configs/process_WD2_1channel.conf b/packs/tests/data/configs/process_WD2_1channel.conf new file mode 100644 index 0000000..82dfe2a --- /dev/null +++ b/packs/tests/data/configs/process_WD2_1channel.conf @@ -0,0 +1,9 @@ +[required] +process = 'decode' +wavedump_edition = 2 +file_path = '/home/e78368jw/Documents/MULE/packs/tests/data/one_channel_WD2.bin' +save_path = '/home/e78368jw/Documents/MULE/packs/tests/data/one_channel_tmp.h5' + +[optional] +overwrite = True + diff --git a/packs/tests/data/configs/process_WD2_3channel.conf b/packs/tests/data/configs/process_WD2_3channel.conf new file mode 100644 index 0000000..e82645e --- /dev/null +++ b/packs/tests/data/configs/process_WD2_3channel.conf @@ -0,0 +1,10 @@ +[required] +process = 'decode' +wavedump_edition = 2 +file_path = '/home/e78368jw/Documents/MULE/packs/tests/data/three_channels_WD2.bin' +save_path = '/home/e78368jw/Documents/MULE/packs/tests/data/three_channels_tmp.h5' + +[optional] +overwrite = True +counts = 10 + diff --git a/packs/tests/data/configs/single_multi_chan.conf b/packs/tests/data/configs/single_multi_chan.conf new file mode 100644 index 0000000..857dfa7 --- /dev/null +++ b/packs/tests/data/configs/single_multi_chan.conf @@ -0,0 +1,9 @@ +[required] +process = 'decode' +wavedump_edition = 2 +file_path = 'single_multi_chan.bin' +save_path = 'single_multi_chan.h5' + +[optional] +overwrite = True + diff --git a/packs/tests/data/configs/test_config.conf b/packs/tests/data/configs/test_config.conf new file mode 100644 index 0000000..6470cfb --- /dev/null +++ b/packs/tests/data/configs/test_config.conf @@ -0,0 +1,9 @@ +[required] + +test_1 = 'a string' +test_2 = 6.03 + +[optional] + +test_3 = 5 +test_4 = True \ No newline at end of file diff --git a/packs/tests/data/false_data.npy b/packs/tests/data/false_data.npy new file mode 100644 index 0000000..42bdd8b Binary files /dev/null and b/packs/tests/data/false_data.npy differ diff --git a/packs/tests/data/malformed_data.bin b/packs/tests/data/malformed_data.bin new file mode 100644 index 0000000..4f9dffc Binary files /dev/null and b/packs/tests/data/malformed_data.bin differ diff --git a/packs/tests/data/one_channel_WD2.bin b/packs/tests/data/one_channel_WD2.bin new file mode 100644 index 0000000..633bd04 Binary files /dev/null and b/packs/tests/data/one_channel_WD2.bin differ diff --git a/packs/tests/data/one_channel_WD2.h5 b/packs/tests/data/one_channel_WD2.h5 new file mode 100644 index 0000000..bd35f1f Binary files /dev/null and b/packs/tests/data/one_channel_WD2.h5 differ diff --git a/packs/tests/data/repetitive_data/force_folder_file.bs b/packs/tests/data/repetitive_data/force_folder_file.bs new file mode 100644 index 0000000..e69de29 diff --git a/packs/tests/data/single_multi_chan.bin b/packs/tests/data/single_multi_chan.bin new file mode 100644 index 0000000..78d0245 Binary files /dev/null and b/packs/tests/data/single_multi_chan.bin differ diff --git a/packs/tests/data/three_channels_WD2.bin b/packs/tests/data/three_channels_WD2.bin new file mode 100644 index 0000000..7caa869 Binary files /dev/null and b/packs/tests/data/three_channels_WD2.bin differ diff --git a/packs/tests/data/three_channels_WD2.h5 b/packs/tests/data/three_channels_WD2.h5 new file mode 100644 index 0000000..286c67e Binary files /dev/null and b/packs/tests/data/three_channels_WD2.h5 differ diff --git a/packs/tests/processing_test.py b/packs/tests/processing_test.py new file mode 100644 index 0000000..4366160 --- /dev/null +++ b/packs/tests/processing_test.py @@ -0,0 +1,205 @@ +import os +import sys + +import numpy as np +import pandas as pd +import subprocess + +import configparser + +from pytest import mark +from pytest import raises +from pytest import warns + +from packs.proc.processing_utils import read_defaults_WD2 +from packs.proc.processing_utils import process_header +from packs.proc.processing_utils import read_binary +from packs.proc.processing_utils import format_wfs +from packs.proc.processing_utils import check_save_path +from packs.proc.processing_utils import save_data + +from packs.types.types import generate_wfdtype +from packs.types.types import rwf_type +from packs.types.types import event_info_type + +from packs.core.io import load_rwf_info +from packs.core.io import load_evt_info + +from packs.types import types +from hypothesis import given +from hypothesis.strategies import integers + +@given(integers(min_value = 1, max_value = 1000000)) +def test_rwf_type_has_correct_shape(samples): + x = rwf_type(samples) + + assert x['rwf'].shape[0] == samples + + +def test_header_components_read_as_expected(): + + MULE_dir = str(os.environ['MULE_DIR']) + file = MULE_dir + '/packs/tests/data/three_channels_WD2.bin' + + evt_num = 0 + tstamp = 1998268 + smpls = 1000 + smpl_prd = 8 + + with open(file, 'rb') as f: + event_number, timestamp, samples, sampling_period = read_defaults_WD2(f, sys.byteorder) + + assert event_number == evt_num + assert timestamp == tstamp + assert samples == smpls + assert sampling_period == smpl_prd + + +def test_header_processed_correctly(): + + MULE_dir = str(os.environ['MULE_DIR']) + file = MULE_dir + '/packs/tests/data/three_channels_WD2.bin' + + smpls = 1000 + smpl_prd = 8 + channels = 3 + wdtype = generate_wfdtype(channels, smpls) # 3 channels in this case + + result = process_header(file) + + assert result[0] == wdtype + assert result[1] == smpls + assert result[2] == smpl_prd + assert result[3] == channels + +def test_header_works_when_data_malformed(): + # this test would normally cause a memory error as the data + # provided is singular channel, and `process_header()` tests + # for single channel behaviour by analysing it as multi-channel + # and returning a single channel response if the header breaks. + + # Here,the header will crash out due to a channels value > 10^10. + # This has been fixed quickly in process_header, but should be + # optimised in a different fashion. + + MULE_dir = str(os.environ['MULE_DIR']) + file = MULE_dir + '/packs/tests/data/malformed_data.bin' + + with warns(UserWarning): + process_header(file) + +@mark.parametrize("function, error", [(process_header, NameError), + (read_defaults_WD2, ValueError)]) +def test_endian_error_when_reading(function, error): + + MULE_dir = str(os.environ['MULE_DIR']) + file = MULE_dir + '/packs/tests/data/three_channels_WD2.bin' + + + byte_order = 'Big' # this will raise a ValueError + + with raises(error): + with open(file, 'rb') as f: + holder = function(f, byte_order) + + +def test_invalid_file_for_reading(): + + MULE_dir = str(os.environ['MULE_DIR']) + file = MULE_dir + '/packs/tests/data/false_data.npy' + + + x = read_binary(file, types.generate_wfdtype(1, 1000)) + + # if you've malformed the data types on a non-binary file, the result should be empty + # but this may not always be the case. + assert len(x) == 0 + + +def test_formatting_works(): + + MULE_dir = str(os.environ['MULE_DIR']) + + file_path = MULE_dir + '/packs/tests/data/three_channels_WD2.bin' + + # collect relevant data from output + check_file = MULE_dir + '/packs/tests/data/three_channels_WD2.h5' + check_rwf = load_rwf_info(check_file, 1000) + check_evt_info = load_evt_info(check_file) + + + channels = 3 + samples = 1000 + + wdtype = types.generate_wfdtype(channels, samples) + + with open(file_path, 'rb') as file: + # read in data + data = read_binary(file, wdtype) + + evt_info, rwf = format_wfs(data, wdtype, samples, channels) + + # modify into dataframes for appropriate comparison + rwf = pd.DataFrame(list(map(list, rwf)), columns = rwf_type(samples).names) + evt_info = pd.DataFrame(list(map(list, evt_info)), columns = event_info_type.names) + + assert rwf.equals(check_rwf) + assert evt_info.equals(check_evt_info) + + +def test_ensure_new_path_created(): + + MULE_dir = str(os.environ['MULE_DIR']) + data_path = MULE_dir + '/packs/tests/data/three_channels_WD2.h5' + new_data_path = MULE_dir + '/packs/tests/data/three_channels_WD21.h5' + + found_path = check_save_path(data_path, overwrite = False) + + assert found_path == new_data_path + + +def test_runtime_error_when_too_many_save_files(): + + MULE_dir = str(os.environ['MULE_DIR']) + relevant_dir = MULE_dir + '/packs/tests/data/repetitive_data/' + # generate 101 empty files + with open(relevant_dir + f'test_.txt', 'w'): + pass + for i in range(1, 101): + with open(relevant_dir + f'test_{i}.txt', 'w'): + pass + with raises(RuntimeError): + check_save_path(relevant_dir + 'test_.txt', overwrite=False) + +@mark.parametrize("config, inpt, output, comparison", [("process_WD2_1channel.conf", "one_channel_WD2.bin", "one_channel_tmp.h5", "one_channel_WD2.h5"), + ("process_WD2_3channel.conf", "three_channels_WD2.bin", "three_channels_tmp.h5", "three_channels_WD2.h5")]) +def test_decode_produces_expected_output(config, inpt, output, comparison): + + MULE_dir = str(os.environ['MULE_DIR']) + data_dir = "/packs/tests/data/" + + # ensure path is correct + file_path = MULE_dir + data_dir + inpt + save_path = MULE_dir + data_dir + output + comparison_path = MULE_dir + data_dir + comparison + config_path = MULE_dir + data_dir + "configs/" + config + + # collect samples from header + _, samples, _, _ = process_header(file_path) + + # rewrite paths to files + cnfg = configparser.ConfigParser() + cnfg.read(config_path) + cnfg.set('required', 'file_path', "'" + file_path + "'") # need to add comments around for config reasons + cnfg.set('required', 'save_path', "'" + save_path + "'") + + with open(config_path, 'w') as cfgfile: + cnfg.write(cfgfile) + + # run processing pack decode + run_pack = ['python3', MULE_dir + "/bin/mule", "proc", config_path] + subprocess.run(run_pack) + # check that the resulting dataframe is as expected + assert load_evt_info(save_path).equals(load_evt_info(comparison_path)) + assert load_rwf_info(save_path, samples).equals(load_rwf_info(comparison_path, samples)) + diff --git a/packs/tests/setup_test.py b/packs/tests/setup_test.py new file mode 100644 index 0000000..26ff4f2 --- /dev/null +++ b/packs/tests/setup_test.py @@ -0,0 +1,76 @@ +import os +import subprocess +import configparser + +from pytest import mark +from pytest import raises + +from packs.core.io import read_config_file + + +@mark.parametrize("pack", ["acq", "proc", "tests"]) +def test_executable_runs_successfully(pack): + ''' + This test is made to check if the current executable method for + `bin/mule` to work as intended, accessing the relevant files when run. + ''' + bin_dir = str(os.environ['MULE_DIR']) + # config will need to be improved + run_pack = ["python3", bin_dir + "/bin/mule", str(pack), "test_config"] + + # ensure output is successful (no errors) + + # this should be more complex in the future, such as if the config (which + # will be a default for the tests) returns the test flag return a number, or + # something in particular from subprocess + assert subprocess.run(run_pack).returncode == 0 + + +def test_incorrect_pack_returns_error(): + bin_dir = str(os.environ['MULE_DIR']) + + # give an incorrect pack + run_pack = ["python3", bin_dir + "/bin/mule", "donkey", "config"] + + with raises(subprocess.CalledProcessError): + subprocess.run(run_pack, check = True) + +def test_config_read_correctly(): + + MULE_dir = str(os.environ['MULE_DIR']) + file_path = MULE_dir + '/packs/tests/data/configs/test_config.conf' + + expected_dict = {'test_1': 'a string', 'test_2': 6.03, 'test_3': 5, 'test_4': True} + + x = read_config_file(file_path) + + assert (x == expected_dict) + + +@mark.parametrize("config, error", [('malformed_header.conf', configparser.MissingSectionHeaderError), + ('empty_entry.conf', SyntaxError), + ('incorrect_format.conf', configparser.ParsingError)]) +def test_malformed_config(config, error): + # provides expected output when config file is malformed + MULE_dir = str(os.environ['MULE_DIR']) + file_path = MULE_dir + '/packs/tests/data/configs/' + config + + with raises(error): + x = read_config_file(file_path) + +@mark.parametrize("config, error", [('nonexistent_WD_version.conf', RuntimeError), + ('nonexistent_process.conf', ValueError), + ('single_multi_chan.conf', RuntimeError)]) + # these will change to value errors when other + # packs are implemented +def test_processing_catches(config, error): + + MULE_dir = str(os.environ['MULE_DIR']) + config_path = MULE_dir + "/packs/tests/data/configs/" + config + + run_pack = ["python3", MULE_dir + "/bin/mule", "proc", config_path] + + with raises(subprocess.CalledProcessError): + subprocess.run(run_pack, check = True) + + diff --git a/packs/test/temporal_test.py b/packs/tests/temporal_test.py similarity index 100% rename from packs/test/temporal_test.py rename to packs/tests/temporal_test.py diff --git a/packs/tests/tests.py b/packs/tests/tests.py new file mode 100644 index 0000000..7b2d5ce --- /dev/null +++ b/packs/tests/tests.py @@ -0,0 +1,9 @@ +from packs.core.core_utils import check_test + + +def tests(config_file): + print("This works as expected: testing") + print("In here you should read the config provided") + + if check_test(config_file): + return \ No newline at end of file diff --git a/packs/types/types.py b/packs/types/types.py new file mode 100644 index 0000000..4722041 --- /dev/null +++ b/packs/types/types.py @@ -0,0 +1,59 @@ +import numpy as np + + +event_info_type = np.dtype([ + ('event_number', np.uint32), + ('timestamp', np.uint64), + ('samples', np.uint32), + ('sampling_period', np.uint64), + ('channels', np.int32), + ]) + +def rwf_type(samples : int) -> np.dtype: + """ + Generates the data-type for raw waveforms + + Parameters + ---------- + + samples (int) : Number of samples per waveform + + Returns + ------- + + (ndtype) : Desired data type for processing + + + """ + return np.dtype([ + ('event_number', np.uint32), + ('channels', np.int32), + ('rwf', np.float32, (samples,)) + ]) + + +def generate_wfdtype(channels, samples): + ''' + generates the dtype for collecting the binary data based on samples and number of + channels + ''' + if channels >1: + wdtype = np.dtype([ + ('event_number', np.uint32), + ('timestamp', np.uint64), + ('samples', np.uint32), + ('sampling_period', np.uint64), + ('channels', np.int32), + ] + + [(f'chan_{i+1}', np.float32, (samples,)) for i in range(0,channels)] + ) + else: + wdtype = np.dtype([ + ('event_number', np.uint32), + ('timestamp', np.uint64), + ('samples', np.uint32), + ('sampling_period', np.uint64), + ('chan_1', np.float32, (samples,)) + ]) + + return wdtype