diff --git a/additional_tests/README.md b/additional_tests/README.md deleted file mode 100644 index 66b53e92..00000000 --- a/additional_tests/README.md +++ /dev/null @@ -1,2 +0,0 @@ -This directory is for python tests that do not fit with the nextflow + unittest directory organization. -Or any other test file that is intended to be run every time a PR is merged. \ No newline at end of file diff --git a/additional_tests/__pycache__/test_launch_interpret_json.cpython-311-pytest-7.4.0.pyc b/additional_tests/__pycache__/test_launch_interpret_json.cpython-311-pytest-7.4.0.pyc deleted file mode 100644 index e001c624..00000000 Binary files a/additional_tests/__pycache__/test_launch_interpret_json.cpython-311-pytest-7.4.0.pyc and /dev/null differ diff --git a/additional_tests/test_launch_interpret_json.py b/additional_tests/test_launch_interpret_json.py deleted file mode 100644 index 515bd374..00000000 --- a/additional_tests/test_launch_interpret_json.py +++ /dev/null @@ -1,175 +0,0 @@ -import unittest -from src.stimulus.cli.interpret_json import interpret_json - -""" -to run this test you need to put a relative import in the JsonSchema import line in launch_interpret_json.py. - -To explain this further launch_interpret_json.py is meant to be launched as it is: -python3 launch_interpret_json.py - -So it can not have relative imports inside, (it can but it gets complicated quickly). -But here to test it we have to import it, and once we do that it will throw an error because the absolute import it has for -the JsonSchema class will not be resolved. - -Basically there is no simple way to have the louncher in bin keeping the opverall directory organization -and a nice set of tests for it that live in another directory. Hence the need to manually mnodify that file when the need for test arise. - -TODO find a clean solution at the above problem. -""" - -# initialize unittest class -class TestInterpretJson(unittest.TestCase): - - def test_interpret_json_with_empty_json(self): - d = {"experiment": "MyCustomExperiment"} - out_l = [{"experiment": "MyCustomExperiment", "noise": None, "split": None}] - self.assertEqual(interpret_json(d), out_l) - - - def test_interpret_json_without_noise_arg(self): - d = { - "experiment": "MyCustomExperiment", - "split": [ - { - "name": "RandomSplitter", - "params": [{"split": [[0.6, 0.2, 0.2], [0.7, 0.15, 0.15]]}] - }, - { - "name": "SomeSplitter", - "params": "default" - }, - { - "name": "SomeSplitter1", - "params": ["default"] - }]} - out_l = [{'experiment': 'MyCustomExperiment', 'noise': None, 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': None, 'split': {'name': 'RandomSplitter', 'params': {'split': [0.6, 0.2, 0.2]}}}, - {'experiment': 'MyCustomExperiment', 'noise': None, 'split': {'name': 'RandomSplitter', 'params': {'split': [0.7, 0.15, 0.15]}}}, - {'experiment': 'MyCustomExperiment', 'noise': None, 'split': {'name': 'SomeSplitter', 'params': {}}}, - {'experiment': 'MyCustomExperiment', 'noise': None, 'split': {'name': 'SomeSplitter1', 'params': {}}}] - self.assertEqual(interpret_json(d), out_l) - - - def test_interpret_json_without_split_arg_column_wise(self): - d = { - "experiment": "MyCustomExperiment", - "interpret_params_mode": "column_wise", - "noise": [ - { - "column_name": "hello:input1:dna", - "name": ["UniformTextMasker", "AnotherNoiser", "AnotherNoiser"], - "params": [{"probability": [0.1, 0.2]}, "default", {"probability": [0.12, 0.22], "seed": [0, 0]}] - }, - { - "column_name": "hello:input2:prot", - "name": ["UniformTextMasker", "AnotherNoiser1"], - "params": ["default", {"p": [1, 2], "s": [3, 4]}] - }, - { - "column_name": "hello:label:int", - "name": ["YetAnotherNoiser"], - "params": "default" - }]} - out_l =[{'experiment': 'MyCustomExperiment', 'noise': None, 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'UniformTextMasker', 'params': {'probability': 0.1}}, {'column_name': 'hello:input2:prot', 'name': 'UniformTextMasker', 'params': {}}, {'column_name': 'hello:label:int', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'UniformTextMasker', 'params': {'probability': 0.2}}, {'column_name': 'hello:input2:prot', 'name': 'UniformTextMasker', 'params': {}}, {'column_name': 'hello:label:int', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'UniformTextMasker', 'params': {'probability': 0.1}}, {'column_name': 'hello:input2:prot', 'name': 'AnotherNoiser1', 'params': {'p': 1, 's': 3}}, {'column_name': 'hello:label:int', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'UniformTextMasker', 'params': {'probability': 0.2}}, {'column_name': 'hello:input2:prot', 'name': 'AnotherNoiser1', 'params': {'p': 2, 's': 4}}, {'column_name': 'hello:label:int', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {}}, {'column_name': 'hello:input2:prot', 'name': 'UniformTextMasker', 'params': {}}, {'column_name': 'hello:label:int', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {}}, {'column_name': 'hello:input2:prot', 'name': 'AnotherNoiser1', 'params': {'p': 1, 's': 3}}, {'column_name': 'hello:label:int', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {}}, {'column_name': 'hello:input2:prot', 'name': 'AnotherNoiser1', 'params': {'p': 2, 's': 4}}, {'column_name': 'hello:label:int', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {'probability': 0.12, 'seed': 0}}, {'column_name': 'hello:input2:prot', 'name': 'UniformTextMasker', 'params': {}}, {'column_name': 'hello:label:int', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {'probability': 0.22, 'seed': 0}}, {'column_name': 'hello:input2:prot', 'name': 'UniformTextMasker', 'params': {}}, {'column_name': 'hello:label:int', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {'probability': 0.12, 'seed': 0}}, {'column_name': 'hello:input2:prot', 'name': 'AnotherNoiser1', 'params': {'p': 1, 's': 3}}, {'column_name': 'hello:label:int', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {'probability': 0.22, 'seed': 0}}, {'column_name': 'hello:input2:prot', 'name': 'AnotherNoiser1', 'params': {'p': 2, 's': 4}}, {'column_name': 'hello:label:int', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': None}] - self.assertEqual(interpret_json(d), out_l) - - - - def test_interpret_json_with_custom_dict(self): - d = { - "experiment": "MyCustomExperiment", - "custom": [ - { - "noise": [ - { - "column_name": "input1", - "name": "UniformTextMasker", - "params": {"probability": 0.1} - }, - { - "column_name": "input2", - "name": "GaussianNoise", - "params": {"mean": 0.5, "std": 0.1} - }], - "split": [ - { - "name": "RandomSplitter", - "params": {"split": [0.6, 0.4, 0]} - }]}, - { - "noise": [ - { - "column_name": "input2", - "name": "UniformTextMasker", - "params": {"probability": 0.1} - }, - { - "column_name": "float", - "name": "GaussianNoise", - "params": {"mean": 0.5, "std": 0.1} - }], - "split": [ - { - "name": "RandomSplitter", - "params": {"split": [0.6, 0.8, 0.1]} - }]}]} - - out_l = [{'experiment': 'MyCustomExperiment', 'noise': None, 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'input1', 'name': 'UniformTextMasker', 'params': {'probability': 0.1}}, {'column_name': 'input2', 'name': 'GaussianNoise', 'params': {'mean': 0.5, 'std': 0.1}}], 'split': [{'name': 'RandomSplitter', 'params': {'split': [0.6, 0.4, 0]}}]}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'input2', 'name': 'UniformTextMasker', 'params': {'probability': 0.1}}, {'column_name': 'float', 'name': 'GaussianNoise', 'params': {'mean': 0.5, 'std': 0.1}}], 'split': [{'name': 'RandomSplitter', 'params': {'split': [0.6, 0.8, 0.1]}}]}] - self.assertEqual(interpret_json(d), out_l) - - - def test_interpret_json_with_column_wise(self): - d = { - "experiment": "MyCustomExperiment", - "interpret_params_mode": "column_wise", - "noise": [ - { - "column_name": "hello:input1:dna", - "name": ["UniformTextMasker", "AnotherNoiser"], - "params": ["default", {"mean": [0.5, 0.6], "std": [0.1, 0.2]}] - }, - { - "column_name": "hello:input2:prot", - "name": ["YetAnotherNoiser"], - "params": ["default", {"p1": [1, 2]}] - }], - "split": [ - { - "name": "RandomSplitter", - "params": [{"split": [[0.6, 0.2, 0.2], [0.7, 0.15, 0.15]]}] - }, - { - "name": "SomeSplitter", - "params": "default" - }]} - - - out_list = [ - {'experiment': 'MyCustomExperiment', 'noise': None, 'split': None}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'UniformTextMasker', 'params': {}}, {'column_name': 'hello:input2:prot', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': {'name': 'RandomSplitter', 'params': {'split': [0.6, 0.2, 0.2]}}}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'UniformTextMasker', 'params': {}}, {'column_name': 'hello:input2:prot', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': {'name': 'RandomSplitter', 'params': {'split': [0.7, 0.15, 0.15]}}}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'UniformTextMasker', 'params': {}}, {'column_name': 'hello:input2:prot', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': {'name': 'SomeSplitter', 'params': {}}}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {'mean': 0.5, 'std': 0.1}}, {'column_name': 'hello:input2:prot', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': {'name': 'RandomSplitter', 'params': {'split': [0.6, 0.2, 0.2]}}}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {'mean': 0.5, 'std': 0.1}}, {'column_name': 'hello:input2:prot', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': {'name': 'RandomSplitter', 'params': {'split': [0.7, 0.15, 0.15]}}}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {'mean': 0.5, 'std': 0.1}}, {'column_name': 'hello:input2:prot', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': {'name': 'SomeSplitter', 'params': {}}}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {'mean': 0.6, 'std': 0.2}}, {'column_name': 'hello:input2:prot', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': {'name': 'RandomSplitter', 'params': {'split': [0.6, 0.2, 0.2]}}}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {'mean': 0.6, 'std': 0.2}}, {'column_name': 'hello:input2:prot', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': {'name': 'RandomSplitter', 'params': {'split': [0.7, 0.15, 0.15]}}}, - {'experiment': 'MyCustomExperiment', 'noise': [{'column_name': 'hello:input1:dna', 'name': 'AnotherNoiser', 'params': {'mean': 0.6, 'std': 0.2}}, {'column_name': 'hello:input2:prot', 'name': 'YetAnotherNoiser', 'params': {}}], 'split': {'name': 'SomeSplitter', 'params': {}}} - ] - - d_to_test = interpret_json(d) - self.assertEqual(len(d_to_test), 10) - self.assertEqual(d_to_test, out_list) diff --git a/tests/unittest_encoders.py b/tests/data/encoding/unittest_encoders.py similarity index 100% rename from tests/unittest_encoders.py rename to tests/data/encoding/unittest_encoders.py diff --git a/tests/unittest_splitters.py b/tests/data/splitters/unittest_splitters.py similarity index 100% rename from tests/unittest_splitters.py rename to tests/data/splitters/unittest_splitters.py diff --git a/tests/test_csv_loader.py b/tests/data/test_csv_loader.py similarity index 100% rename from tests/test_csv_loader.py rename to tests/data/test_csv_loader.py diff --git a/tests/test_csv_processing.py b/tests/data/test_csv_processing.py similarity index 100% rename from tests/test_csv_processing.py rename to tests/data/test_csv_processing.py diff --git a/tests/test_handlertorch.py b/tests/data/test_handlertorch.py similarity index 100% rename from tests/test_handlertorch.py rename to tests/data/test_handlertorch.py diff --git a/tests/test_data_transformers.py b/tests/data/transform/test_data_transformers.py similarity index 100% rename from tests/test_data_transformers.py rename to tests/data/transform/test_data_transformers.py diff --git a/tests/unittest_experiments.py b/tests/data/unittest_experiments.py similarity index 100% rename from tests/unittest_experiments.py rename to tests/data/unittest_experiments.py diff --git a/tests/unittest_raytune_learner.py b/tests/learner/unittest_raytune_learner.py similarity index 100% rename from tests/unittest_raytune_learner.py rename to tests/learner/unittest_raytune_learner.py diff --git a/tests/test_cli_check_model.sh b/tests/test_cli_check_model.sh deleted file mode 100755 index 4dc0ce61..00000000 --- a/tests/test_cli_check_model.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -# Test script for stimulus CLI applications - -set -e # Exit immediately if a command exits with a non-zero status - -# Set up paths -TEST_DIR=$(pwd)/tests -DATA_LOC=$TEST_DIR/test_data/titanic/titanic_stimulus.csv -JSON_LOC=$TEST_DIR/test_data/titanic/titanic_stimulus.json -MODEL_LOC=$TEST_DIR/test_model/titanic_model.py -CONFIG_LOC=$TEST_DIR/test_model/titanic_model_cpu.yaml - -# Run the command -stimulus-check-model -d $DATA_LOC -m $MODEL_LOC -e $JSON_LOC -c $CONFIG_LOC --gpus 0 - -echo "stimulus-check-model test passed" - diff --git a/tests/test_cli_model_tuning.sh b/tests/test_cli_model_tuning.sh deleted file mode 100755 index 103363cc..00000000 --- a/tests/test_cli_model_tuning.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -# Test script for stimulus CLI applications - -set -e # Exit immediately if a command exits with a non-zero status - -# Set up paths -TEST_DIR=$(pwd)/tests -DATA_LOC=$TEST_DIR/test_output/titanic_stimulus_split.csv -JSON_LOC=$TEST_DIR/test_output/titanic_stimulus-split-RandomSplitter_0.7_0.15_0.15.json -MODEL_LOC=$TEST_DIR/test_model/titanic_model.py -CONFIG_LOC=$TEST_DIR/test_model/titanic_model_cpu.yaml -OUTPUT_LOC=$TEST_DIR/ - -# Run the command -stimulus-tuning -c $CONFIG_LOC -m $MODEL_LOC -d $DATA_LOC -e $JSON_LOC -o $OUTPUT_LOC --gpus 0 - -echo "stimulus-tuning test passed" - diff --git a/tests/test_cli_split.sh b/tests/test_cli_split.sh deleted file mode 100755 index 3e539e1c..00000000 --- a/tests/test_cli_split.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# Test script for stimulus CLI applications - -set -e # Exit immediately if a command exits with a non-zero status - -# Set up paths -TEST_DIR=$(pwd)/tests -DATA_LOC=$TEST_DIR/test_data/titanic/titanic_stimulus.csv -JSON_LOC=$TEST_DIR/test_output/titanic_stimulus-split-RandomSplitter_0.7_0.15_0.15.json -OUTPUT_LOC=$TEST_DIR/test_output/titanic_stimulus_split.csv - -# Run the command -stimulus-split-csv -c $DATA_LOC -j $JSON_LOC -o $OUTPUT_LOC - -echo "stimulus-split-csv test passed" \ No newline at end of file diff --git a/tests/unittest_csv.py b/tests/unittest_csv.py deleted file mode 100644 index bf920746..00000000 --- a/tests/unittest_csv.py +++ /dev/null @@ -1,205 +0,0 @@ -import json -import os -import unittest -from abc import ABC - -import numpy as np -import numpy.testing as npt -import polars as pl - -from src.stimulus.data.csv import CsvLoader, CsvProcessing -from src.stimulus.data.experiments import DnaToFloatExperiment, ProtDnaToFloatExperiment - - -class TestCsvProcessing(ABC): - """Base class for testing CsvProcessing.""" - - def setUp(self): - # raising a NotImplementedError to ensure that the subclass has implemented the setUp method. - if type(self) is TestCsvProcessing: - raise NotImplementedError("TestCsvProcessing is a base class and should not be instantiated directly.") - self.csv_processing = None - self.configs = None - self.data_length = None - - def test_len(self): - """Test if data is loaded with correct shape.""" - self.assertEqual(len(self.csv_processing.data), self.data_length) - - def test_add_split(self): - """Test adding split to the data.""" - self.csv_processing.add_split(self.configs["split"]) - self._test_random_splitter(self.expected_splits) - - def test_transform(self): - """Test data transformation.""" - self.csv_processing.transform(self.configs["transform"]) - self._test_transformed_data() - - def _test_random_splitter(self, expected_splits): - for i in range(self.data_length): - self.assertEqual(self.csv_processing.data["split:split:int"][i], expected_splits[i]) - - def _test_transformed_data(self): - raise NotImplementedError("Subclasses should implement this method.") - - def _test_column_values(self, column_name, expected_values): - observed_values = list(self.csv_processing.data[column_name]) - observed_values = [round(v, 6) if isinstance(v, float) else v for v in observed_values] - self.assertEqual(observed_values, expected_values) - - -class TestDnaToFloatCsvProcessing(TestCsvProcessing, unittest.TestCase): - """Test CsvProcessing for DnaToFloatExperiment.""" - - def setUp(self): - np.random.seed(123) - pl.set_random_seed(123) - self.experiment = DnaToFloatExperiment() - self.csv_path = os.path.abspath("tests/test_data/dna_experiment/test.csv") - self.csv_processing = CsvProcessing(self.experiment, self.csv_path) - self.csv_shuffle_long_path = os.path.abspath("tests/test_data/dna_experiment/test_shuffling_long.csv") - self.csv_shuffle_long = CsvProcessing(self.experiment, self.csv_shuffle_long_path) - self.csv_shuffle_long_shuffled_path = os.path.abspath( - "tests/test_data/dna_experiment/test_shuffling_long_shuffled.csv", - ) - self.csv_shuffle_long_shuffled = CsvProcessing(self.experiment, self.csv_shuffle_long_shuffled_path) - with open("tests/test_data/dna_experiment/test_config.json") as f: - self.configs = json.load(f) - self.data_length = 2 - self.expected_splits = [1, 0] - - def _test_transformed_data(self): - self.data_length *= 2 - self._test_column_values("pet:meta:str", ["cat", "dog", "cat", "dog"]) - self._test_column_values("hola:label:float", [12.676405, 12.540016, 12.676405, 12.540016]) - self._test_column_values( - "hello:input:dna", - ["ACTGACTGATCGATNN", "ACTGACTGATCGATNN", "NNATCGATCAGTCAGT", "NNATCGATCAGTCAGT"], - ) - # self._test_column_values("split:split:int", [1, 0, 1, 0]) - - def test_shuffle_labels(self): - """Test shuffling of labels.""" - self.csv_shuffle_long.shuffle_labels(seed=42) - npt.assert_array_equal( - self.csv_shuffle_long.data["hola:label:float"], - self.csv_shuffle_long_shuffled.data["hola:label:float"], - ) - - -class TestProtDnaToFloatCsvProcessing(TestCsvProcessing): - """Test CsvProcessing for ProtDnaToFloatExperiment.""" - - def setUp(self): - self.experiment = ProtDnaToFloatExperiment() - self.csv_path = os.path.abspath("tests/test_data/prot_dna_experiment/test.csv") - self.csv_processing = CsvProcessing(self.experiment, self.csv_path) - with open("tests/test_data/prot_dna_experiment/test_config.json") as f: - self.configs = json.load(f) - self.data_length = 2 - self.expected_splits = [1, 0] - - def _test_transformed_data(self): - self.data_length *= 2 - self._test_column_values("pet:meta:str", ["cat", "dog", "cat", "dog"]) - self._test_column_values("hola:label:float", [12.676405, 12.540016, 12.676405, 12.540016]) - self._test_column_values( - "hello:input:dna", - ["ACTGACTGATCGATNN", "ACTGACTGATCGATNN", "NNATCGATCAGTCAGT", "NNATCGATCAGTCAGT"], - ) - self._test_column_values("split:split:int", [1, 0, 1, 0]) - self._test_column_values( - "bonjour:input:prot", - ["GPRTTIKAKQLETLX", "GPRTTIKAKQLETLX", "GPRTTIKAKQLETLX", "GPRTTIKAKQLETLX"], - ) - - -class TestCsvLoader(ABC): - """Base class for testing CsvLoader.""" - - def setUp(self): - self.csv_loader = None - self.data_shape = None - self.data_shape_split = None - self.shape_splits = None - - def test_len(self): - """Test the length of the dataset.""" - self.assertEqual(len(self.csv_loader), self.data_shape[0]) - - def test_parse_csv_to_input_label_meta(self): - """Test parsing of CSV to input, label, and meta.""" - self.assertIsInstance(self.csv_loader.input, dict) - self.assertIsInstance(self.csv_loader.label, dict) - self.assertIsInstance(self.csv_loader.meta, dict) - - def test_get_encoded_item_unique(self): - """Test getting a single encoded item.""" - encoded_item = self.csv_loader[0] - self._assert_encoded_item(encoded_item, expected_length=1) - - def test_get_encoded_item_multiple(self): - """Test getting multiple encoded items.""" - encoded_item = self.csv_loader[slice(0, 2)] - self._assert_encoded_item(encoded_item, expected_length=2) - - def test_load_with_split(self): - """Test loading with split.""" - self.csv_loader_split = CsvLoader(self.experiment, self.csv_path_split) - self.assertEqual(len(self.csv_loader_split), self.data_shape_split[0]) - - for i in [0, 1, 2]: - self.csv_loader_split = CsvLoader(self.experiment, self.csv_path_split, split=i) - self.assertEqual(len(self.csv_loader_split.input["hello:dna"]), self.shape_splits[i]) - - with self.assertRaises(ValueError): - CsvLoader(self.experiment, self.csv_path_split, split=3) - - def test_get_all_items(self): - """Test getting all items.""" - input_data, label_data, meta_data = self.csv_loader.get_all_items() - self.assertIsInstance(input_data, dict) - self.assertIsInstance(label_data, dict) - self.assertIsInstance(meta_data, dict) - - def _assert_encoded_item(self, encoded_item, expected_length): - self.assertEqual(len(encoded_item), 3) - for i in range(3): - self.assertIsInstance(encoded_item[i], dict) - for key in encoded_item[i].keys(): - self.assertIsInstance(encoded_item[i][key], np.ndarray) - if ( - expected_length > 1 - ): # If the expected length is 0, this will fail as we are trying to find the length of an object size 0. - self.assertEqual(len(encoded_item[i][key]), expected_length) - - -class TestDnaToFloatCsvLoader(TestCsvLoader, unittest.TestCase): - """Test CsvLoader for DnaToFloatExperiment.""" - - def setUp(self): - self.csv_path = os.path.abspath("tests/test_data/dna_experiment/test.csv") - self.csv_path_split = os.path.abspath("tests/test_data/dna_experiment/test_with_split.csv") - self.experiment = DnaToFloatExperiment() - self.csv_loader = CsvLoader(self.experiment, self.csv_path) - self.data_shape = [2, 3] - self.data_shape_split = [48, 4] - self.shape_splits = {0: 16, 1: 16, 2: 16} - - -class TestProtDnaToFloatCsvLoader(TestCsvLoader, unittest.TestCase): - """Test CsvLoader for ProtDnaToFloatExperiment.""" - - def setUp(self): - self.csv_path = os.path.abspath("tests/test_data/prot_dna_experiment/test.csv") - self.csv_path_split = os.path.abspath("tests/test_data/prot_dna_experiment/test_with_split.csv") - self.experiment = ProtDnaToFloatExperiment() - self.csv_loader = CsvLoader(self.experiment, self.csv_path) - self.data_shape = [2, 4] - self.data_shape_split = [3, 5] - self.shape_splits = {0: 1, 1: 1, 2: 1} - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unittest_data_transformers.py b/tests/unittest_data_transformers.py deleted file mode 100644 index 9df8a3a2..00000000 --- a/tests/unittest_data_transformers.py +++ /dev/null @@ -1,119 +0,0 @@ -import unittest -from abc import ABC, abstractmethod - -from src.stimulus.data.transform.data_transformation_generators import ( - GaussianChunk, - GaussianNoise, - ReverseComplement, - UniformTextMasker, -) - - -class TestDataTransformer(ABC): - """Base class for testing data transformers.""" - - @abstractmethod - def setUp(self): - self.transformer = None - - def test_transform_single(self): - """Test transforming a single item.""" - transformed_data = self.transformer.transform(self.single_input, **self.single_params) - self.assertIsInstance(transformed_data, self.expected_type) - self.assertEqual(transformed_data, self.expected_single_output) - - def test_transform_all_single_item(self): - """Test transforming a list with a single item.""" - transformed_data = self.transformer.transform_all([self.single_input], **self.single_params) - self.assertIsInstance(transformed_data, list) - self.assertIsInstance(transformed_data[0], self.expected_type) - self.assertEqual(transformed_data, [self.expected_single_output]) - - def test_transform_all_multiple_items(self): - """Test transforming a list with multiple items.""" - transformed_data = self.transformer.transform_all(self.multiple_inputs, **self.multiple_params) - self.assertIsInstance(transformed_data, list) - for item in transformed_data: - self.assertIsInstance(item, self.expected_type) - self.assertEqual(transformed_data, self.expected_multiple_outputs) - - -class TestUniformTextMasker(TestDataTransformer, unittest.TestCase): - def setUp(self): - self.transformer = UniformTextMasker(mask="N") - self.single_input = "ACGTACGT" - self.single_params = {"seed": 42, "probability": 0.1} - self.expected_type = str - self.expected_single_output = "ACGTACNT" - self.multiple_inputs = ["ATCGATCGATCG", "ATCG"] - self.multiple_params = {"seed": 42, "probability": 0.1} - self.expected_multiple_outputs = ["ATCGATNGATNG", "ATCG"] - - -class TestGaussianNoise(TestDataTransformer, unittest.TestCase): - def setUp(self): - self.transformer = GaussianNoise() - self.single_input = 5.0 - self.single_params = {"seed": 42, "mean": 0, "std": 1} - self.expected_type = float - self.expected_single_output = 5.4967141530112327 - self.multiple_inputs = [1.0, 2.0, 3.0] - self.multiple_params = {"seed": 42, "mean": 0, "std": 1} - self.expected_multiple_outputs = [1.4967141530112327, 2.0211241446210543, 3.7835298641951802] - - def test_transform_single(self): - transformed_data = self.transformer.transform(self.single_input, **self.single_params) - self.assertIsInstance(transformed_data, self.expected_type) - self.assertAlmostEqual(transformed_data, self.expected_single_output, places=7) - - def test_transform_all_multiple_items(self): - transformed_data = self.transformer.transform_all(self.multiple_inputs, **self.multiple_params) - self.assertIsInstance(transformed_data, list) - for item, expected in zip(transformed_data, self.expected_multiple_outputs): - self.assertIsInstance(item, self.expected_type) - self.assertAlmostEqual(item, expected, places=7) - - -class TestReverseComplement(TestDataTransformer, unittest.TestCase): - def setUp(self): - self.transformer = ReverseComplement() - self.single_input = "ACCCCTACGTNN" - self.single_params = {} - self.expected_type = str - self.expected_single_output = "NNACGTAGGGGT" - self.multiple_inputs = ["ACCCCTACGTNN", "ACTGA"] - self.multiple_params = {} - self.expected_multiple_outputs = ["NNACGTAGGGGT", "TCAGT"] - - -class TestGaussianChunk(TestDataTransformer, unittest.TestCase): - def setUp(self): - self.transformer = GaussianChunk() - self.single_input = "AGCATGCTAGCTAGATCAAAATCGATGCATGCTAGCGGCGCGCATGCATGAGGAGACTGAC" - self.single_params = {"seed": 42, "chunk_size": 10, "std": 1} - self.expected_type = str - self.expected_single_output = "TGCATGCTAG" - self.multiple_inputs = [ - "AGCATGCTAGCTAGATCAAAATCGATGCATGCTAGCGGCGCGCATGCATGAGGAGACTGAC", - "AGCATGCTAGCTAGATCAAAATCGATGCATGCTAGCGGCGCGCATGCATGAGGAGACTGAC", - ] - self.multiple_params = {"seed": 42, "chunk_size": 10, "std": 1} - self.expected_multiple_outputs = ["TGCATGCTAG", "GCATGCTAGC"] - - def test_transform_single(self): - transformed_data = self.transformer.transform(self.single_input, **self.single_params) - self.assertIsInstance(transformed_data, self.expected_type) - self.assertEqual(len(transformed_data), 10) - self.assertEqual(transformed_data, self.expected_single_output) - - def test_transform_all_multiple_items(self): - transformed_data = self.transformer.transform_all(self.multiple_inputs, **self.multiple_params) - self.assertIsInstance(transformed_data, list) - for item in transformed_data: - self.assertIsInstance(item, self.expected_type) - self.assertEqual(len(item), 10) - self.assertEqual(transformed_data, self.expected_multiple_outputs) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unittest_handlertorch.py b/tests/unittest_handlertorch.py deleted file mode 100644 index 14c9e979..00000000 --- a/tests/unittest_handlertorch.py +++ /dev/null @@ -1,121 +0,0 @@ -import os -import unittest -from abc import ABC -from typing import Any, Dict - -import torch - -from src.stimulus.data.experiments import DnaToFloatExperiment, ProtDnaToFloatExperiment, TitanicExperiment -from src.stimulus.data.handlertorch import TorchDataset - - -class TestTorchDataset(ABC): - """Base class for testing TorchDataset.""" - - def setUp(self): - if type(self) is TestTorchDataset: - raise NotImplementedError("TestTorchDataset is a base class and should not be instantiated directly.") - self.torchdataset = None - self.expected_len = None - self.expected_input_shape = None - self.expected_label_shape = None - self.expected_item_shape = None - - def test_len(self): - """Test the length of the dataset.""" - self.assertEqual(len(self.torchdataset), self.expected_len) - - def test_convert_dict_to_dict_of_tensor(self): - """Test conversion of dict to dict of tensors.""" - self._test_convert_dict_to_dict_of_tensor(self.torchdataset.input, self.expected_input_shape) - self._test_convert_dict_to_dict_of_tensor(self.torchdataset.label, self.expected_label_shape) - - def test_get_item(self): - """Test getting items from the dataset.""" - self._test_get_item_shape(0, self.expected_item_shape) - self._test_get_item_shape(slice(0, 2), {k: [2] + v for k, v in self.expected_item_shape.items()}) - - def _test_convert_dict_to_dict_of_tensor(self, data: Dict[str, torch.Tensor], expected_shape: Dict[str, list]): - for key in data: - self.assertIsInstance(data[key], torch.Tensor) - self.assertEqual(data[key].shape, torch.Size(expected_shape[key])) - - def _test_get_item_shape(self, idx: Any, expected_size: Dict[str, list]): - x, y, meta = self.torchdataset[idx] - self.assertIsInstance(x, dict) - self.assertIsInstance(y, dict) - self.assertIsInstance(meta, dict) - for key, value in {**x, **y, **meta}.items(): - if key in expected_size: - self.assertEqual(value.shape, torch.Size(expected_size[key])) - - -class TestDnaToFloatTorchDatasetSameLength(TestTorchDataset, unittest.TestCase): - """Test TorchDataset for DnaToFloatExperiment with same length sequences.""" - - def setUp(self): - super().setUp() - self.torchdataset = TorchDataset( - csvpath=os.path.abspath("tests/test_data/dna_experiment/test.csv"), - experiment=DnaToFloatExperiment(), - ) - self.expected_len = 2 - self.expected_input_shape = {"hello": [2, 16, 4]} - self.expected_label_shape = {"hola": [2]} - self.expected_item_shape = {"hello": [16, 4]} - - -class TestDnaToFloatTorchDatasetDifferentLength(TestTorchDataset, unittest.TestCase): - """Test TorchDataset for DnaToFloatExperiment with different length sequences.""" - - def setUp(self): - super().setUp() - self.torchdataset = TorchDataset( - csvpath=os.path.abspath("tests/test_data/dna_experiment/test_unequal_dna_float.csv"), - experiment=DnaToFloatExperiment(), - ) - self.expected_len = 4 - self.expected_input_shape = {"hello": [4, 31, 4]} - self.expected_label_shape = {"hola": [4]} - self.expected_item_shape = {"hello": [31, 4]} - - -class TestProtDnaToFloatTorchDatasetSameLength(TestTorchDataset, unittest.TestCase): - """Test TorchDataset for ProtDnaToFloatExperiment with same length sequences.""" - - def setUp(self): - super().setUp() - self.torchdataset = TorchDataset( - csvpath=os.path.abspath("tests/test_data/prot_dna_experiment/test.csv"), - experiment=ProtDnaToFloatExperiment(), - ) - self.expected_len = 2 - self.expected_input_shape = {"hello": [2, 16, 4], "bonjour": [2, 15, 20]} - self.expected_label_shape = {"hola": [2]} - self.expected_item_shape = {"hello": [16, 4], "bonjour": [15, 20]} - - -class TestTitanicTorchDataset(TestTorchDataset, unittest.TestCase): - """Test TorchDataset for TitanicExperiment.""" - - def setUp(self): - super().setUp() - self.torchdataset = TorchDataset( - csvpath=os.path.abspath("tests/test_data/titanic/titanic_stimulus.csv"), - experiment=TitanicExperiment(), - ) - self.expected_len = 712 - # Add expected shapes for Titanic dataset if known - self.expected_input_shape = {} # Fill this with the expected input shape - self.expected_label_shape = {} # Fill this with the expected label shape - self.expected_item_shape = {} # Fill this with the expected item shape - - def test_convert_dict_to_dict_of_tensor(self): - """Override this method if Titanic dataset has different requirements.""" - - def test_get_item(self): - """Override this method if Titanic dataset has different requirements.""" - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unittest_performance.py b/tests/utils/unittest_performance.py similarity index 100% rename from tests/unittest_performance.py rename to tests/utils/unittest_performance.py