From 91b0b87509cf85d94665c25fd96eae76e0e18b05 Mon Sep 17 00:00:00 2001 From: Igor Trujnara Date: Wed, 29 Jan 2025 15:47:43 +0100 Subject: [PATCH 1/3] feat (typing): Add typing module. Add a typing module, which contains all classes from the package for typing purposes, as well as some aliases. --- src/stimulus/typing/__init__.py | 83 +++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 src/stimulus/typing/__init__.py diff --git a/src/stimulus/typing/__init__.py b/src/stimulus/typing/__init__.py new file mode 100644 index 00000000..f8abc492 --- /dev/null +++ b/src/stimulus/typing/__init__.py @@ -0,0 +1,83 @@ +"""Typing for Stimulus Python API. + +This module contains all Stimulus types which will be used for variable typing +and likely not instantiated, as well as aliases for other types to use for typing purposes. + +The aliases from this module should be used for typing purposes only. +""" +# ruff: noqa: F401 + +from typing import TypeAlias + +# these imports mostly alias everything +from stimulus.analysis.analysis_default import Analysis, AnalysisPerformanceTune, AnalysisRobustness +from stimulus.data.data_handlers import ( + DatasetHandler, + DatasetLoader, + DatasetManager, + DatasetProcessor, + EncodeManager, + SplitManager, + TransformManager, +) +from stimulus.data.encoding.encoders import AbstractEncoder as Encoder +from stimulus.data.handlertorch import TorchDataset +from stimulus.data.loaders import EncoderLoader, SplitLoader, TransformLoader +from stimulus.data.splitters.splitters import AbstractSplitter as Splitter +from stimulus.data.transform.data_transformation_generators import AbstractDataTransformer as Transform +from stimulus.learner.predict import PredictWrapper +from stimulus.learner.raytune_learner import CheckpointDict, TuneModel, TuneWrapper +from stimulus.learner.raytune_parser import RayTuneMetrics, RayTuneOptimizer, RayTuneResult, TuneParser +from stimulus.utils.performance import Performance +from stimulus.utils.yaml_data import ( + YamlColumns, + YamlColumnsEncoder, + YamlConfigDict, + YamlGlobalParams, + YamlSchema, + YamlSplit, + YamlSubConfigDict, + YamlTransform, + YamlTransformColumns, + YamlTransformColumnsTransformation, +) +from stimulus.utils.yaml_model_schema import ( + CustomTunableParameter, + Data, + Loss, + Model, + RayTuneModel, + RunParams, + Scheduler, + TunableParameter, + Tune, + TuneParams, + YamlRayConfigLoader, +) + +# data/data_handlers.py + +DataManager: TypeAlias = DatasetManager | EncodeManager | SplitManager | TransformManager + +# data/experiments.py + +Loader: TypeAlias = DatasetLoader | EncoderLoader | TransformLoader | SplitLoader + +# learner/raytune_parser.py + +RayTuneData: TypeAlias = RayTuneMetrics | RayTuneOptimizer | RayTuneResult + +# utils/yaml_data.py + +YamlData: TypeAlias = ( + YamlColumns + | YamlColumnsEncoder + | YamlConfigDict + | YamlGlobalParams + | YamlSchema + | YamlSplit + | YamlSubConfigDict + | YamlTransform + | YamlTransformColumns + | YamlTransformColumnsTransformation +) From 8f82101d69232531d9bd7c3f26968cb682e607a4 Mon Sep 17 00:00:00 2001 From: Igor Trujnara Date: Wed, 29 Jan 2025 15:49:25 +0100 Subject: [PATCH 2/3] refactor (data): Rename experiments.py to loaders.py Rename the file previously called experiments.py to loaders.py in order to limit confusion. --- src/stimulus/cli/split_csv.py | 2 +- src/stimulus/cli/transform_csv.py | 2 +- src/stimulus/data/data_handlers.py | 10 ++--- src/stimulus/data/handlertorch.py | 5 ++- .../data/{experiments.py => loaders.py} | 0 src/stimulus/utils/launch_utils.py | 2 +- tests/data/test_data_handlers.py | 40 +++++++++---------- tests/data/test_experiment.py | 16 ++++---- tests/data/test_handlertorch.py | 14 +++---- 9 files changed, 46 insertions(+), 45 deletions(-) rename src/stimulus/data/{experiments.py => loaders.py} (100%) diff --git a/src/stimulus/cli/split_csv.py b/src/stimulus/cli/split_csv.py index a0382b63..98dcce81 100755 --- a/src/stimulus/cli/split_csv.py +++ b/src/stimulus/cli/split_csv.py @@ -6,7 +6,7 @@ import yaml from stimulus.data.data_handlers import DatasetProcessor, SplitManager -from stimulus.data.experiments import SplitLoader +from stimulus.data.loaders import SplitLoader from stimulus.utils.yaml_data import YamlSubConfigDict diff --git a/src/stimulus/cli/transform_csv.py b/src/stimulus/cli/transform_csv.py index d08b0b11..2e2ff5fd 100755 --- a/src/stimulus/cli/transform_csv.py +++ b/src/stimulus/cli/transform_csv.py @@ -6,7 +6,7 @@ import yaml from stimulus.data.data_handlers import DatasetProcessor, TransformManager -from stimulus.data.experiments import TransformLoader +from stimulus.data.loaders import TransformLoader from stimulus.utils.yaml_data import YamlSubConfigDict diff --git a/src/stimulus/data/data_handlers.py b/src/stimulus/data/data_handlers.py index 9a582ab3..c58e3dda 100644 --- a/src/stimulus/data/data_handlers.py +++ b/src/stimulus/data/data_handlers.py @@ -29,7 +29,7 @@ import torch import yaml -from stimulus.data import experiments +from stimulus.data import loaders from stimulus.utils import yaml_data @@ -157,7 +157,7 @@ class EncodeManager: def __init__( self, - encoder_loader: experiments.EncoderLoader, + encoder_loader: loaders.EncoderLoader, ) -> None: """Initialize the EncodeManager. @@ -219,7 +219,7 @@ class TransformManager: def __init__( self, - transform_loader: experiments.TransformLoader, + transform_loader: loaders.TransformLoader, ) -> None: """Initialize the TransformManager.""" self.transform_loader = transform_loader @@ -245,7 +245,7 @@ class SplitManager: def __init__( self, - split_loader: experiments.SplitLoader, + split_loader: loaders.SplitLoader, ) -> None: """Initialize the SplitManager.""" self.split_loader = split_loader @@ -396,7 +396,7 @@ def __init__( self, config_path: str, csv_path: str, - encoder_loader: experiments.EncoderLoader, + encoder_loader: loaders.EncoderLoader, split: Union[int, None] = None, ) -> None: """Initialize the DatasetLoader.""" diff --git a/src/stimulus/data/handlertorch.py b/src/stimulus/data/handlertorch.py index 0c608072..c42078a4 100644 --- a/src/stimulus/data/handlertorch.py +++ b/src/stimulus/data/handlertorch.py @@ -4,7 +4,8 @@ from torch.utils.data import Dataset -from src.stimulus.data import data_handlers, experiments +from src.stimulus.data import data_handlers +from stimulus.data import loaders class TorchDataset(Dataset): @@ -14,7 +15,7 @@ def __init__( self, config_path: str, csv_path: str, - encoder_loader: experiments.EncoderLoader, + encoder_loader: loaders.EncoderLoader, split: Optional[int] = None, ) -> None: """Initialize the TorchDataset. diff --git a/src/stimulus/data/experiments.py b/src/stimulus/data/loaders.py similarity index 100% rename from src/stimulus/data/experiments.py rename to src/stimulus/data/loaders.py diff --git a/src/stimulus/utils/launch_utils.py b/src/stimulus/utils/launch_utils.py index 13574a77..2d6f74ea 100644 --- a/src/stimulus/utils/launch_utils.py +++ b/src/stimulus/utils/launch_utils.py @@ -5,7 +5,7 @@ import os from typing import Union -import stimulus.data.experiments as exp +import stimulus.data.loaders as exp def import_class_from_file(file_path: str) -> type: diff --git a/tests/data/test_data_handlers.py b/tests/data/test_data_handlers.py index 7c4bfba7..725f275b 100644 --- a/tests/data/test_data_handlers.py +++ b/tests/data/test_data_handlers.py @@ -3,7 +3,7 @@ import pytest import yaml -from stimulus.data import experiments +from stimulus.data import loaders from stimulus.data.data_handlers import ( DatasetLoader, DatasetManager, @@ -82,7 +82,7 @@ def dump_single_split_config_to_disk() -> str: ## Loader fixtures @pytest.fixture -def encoder_loader(generate_sub_configs: list[YamlConfigDict]) -> experiments.EncoderLoader: +def encoder_loader(generate_sub_configs: list[YamlConfigDict]) -> loaders.EncoderLoader: """Create encoder loader with initialized encoders. Args: @@ -91,13 +91,13 @@ def encoder_loader(generate_sub_configs: list[YamlConfigDict]) -> experiments.En Returns: experiments.EncoderLoader: Initialized encoder loader """ - loader = experiments.EncoderLoader() + loader = loaders.EncoderLoader() loader.initialize_column_encoders_from_config(generate_sub_configs[0].columns) return loader @pytest.fixture -def transform_loader(generate_sub_configs: list[YamlConfigDict]) -> experiments.TransformLoader: +def transform_loader(generate_sub_configs: list[YamlConfigDict]) -> loaders.TransformLoader: """Create transform loader with initialized transformers. Args: @@ -106,13 +106,13 @@ def transform_loader(generate_sub_configs: list[YamlConfigDict]) -> experiments. Returns: experiments.TransformLoader: Initialized transform loader """ - loader = experiments.TransformLoader() + loader = loaders.TransformLoader() loader.initialize_column_data_transformers_from_config(generate_sub_configs[0].transforms) return loader @pytest.fixture -def split_loader(generate_sub_configs: list[YamlConfigDict]) -> experiments.SplitLoader: +def split_loader(generate_sub_configs: list[YamlConfigDict]) -> loaders.SplitLoader: """Create split loader with initialized splitter. Args: @@ -121,7 +121,7 @@ def split_loader(generate_sub_configs: list[YamlConfigDict]) -> experiments.Spli Returns: experiments.SplitLoader: Initialized split loader """ - loader = experiments.SplitLoader() + loader = loaders.SplitLoader() loader.initialize_splitter_from_config(generate_sub_configs[0].split) return loader @@ -166,21 +166,21 @@ def test_dataset_manager_get_transform_logic(dump_single_split_config_to_disk: s # Test EncodeManager def test_encode_manager_init() -> None: """Test initialization of EncodeManager.""" - encoder_loader = experiments.EncoderLoader() + encoder_loader = loaders.EncoderLoader() manager = EncodeManager(encoder_loader) assert hasattr(manager, "encoder_loader") def test_encode_manager_initialize_encoders() -> None: """Test encoder initialization.""" - encoder_loader = experiments.EncoderLoader() + encoder_loader = loaders.EncoderLoader() manager = EncodeManager(encoder_loader) assert hasattr(manager, "encoder_loader") def test_encode_manager_encode_numeric() -> None: """Test numeric encoding.""" - encoder_loader = experiments.EncoderLoader() + encoder_loader = loaders.EncoderLoader() intencoder = encoder_loader.get_encoder("NumericEncoder") encoder_loader.set_encoder_as_attribute("test_col", intencoder) manager = EncodeManager(encoder_loader) @@ -192,21 +192,21 @@ def test_encode_manager_encode_numeric() -> None: # Test TransformManager def test_transform_manager_init() -> None: """Test initialization of TransformManager.""" - transform_loader = experiments.TransformLoader() + transform_loader = loaders.TransformLoader() manager = TransformManager(transform_loader) assert hasattr(manager, "transform_loader") def test_transform_manager_initialize_transforms() -> None: """Test transform initialization.""" - transform_loader = experiments.TransformLoader() + transform_loader = loaders.TransformLoader() manager = TransformManager(transform_loader) assert hasattr(manager, "transform_loader") def test_transform_manager_transform_column() -> None: """Test column transformation.""" - transform_loader = experiments.TransformLoader() + transform_loader = loaders.TransformLoader() dummy_config = YamlTransform( transformation_name="GaussianNoise", columns=[ @@ -230,19 +230,19 @@ def test_transform_manager_transform_column() -> None: # Test SplitManager -def test_split_manager_init(split_loader: experiments.SplitLoader) -> None: +def test_split_manager_init(split_loader: loaders.SplitLoader) -> None: """Test initialization of SplitManager.""" manager = SplitManager(split_loader) assert hasattr(manager, "split_loader") -def test_split_manager_initialize_splits(split_loader: experiments.SplitLoader) -> None: +def test_split_manager_initialize_splits(split_loader: loaders.SplitLoader) -> None: """Test split initialization.""" manager = SplitManager(split_loader) assert hasattr(manager, "split_loader") -def test_split_manager_apply_split(split_loader: experiments.SplitLoader) -> None: +def test_split_manager_apply_split(split_loader: loaders.SplitLoader) -> None: """Test applying splits to data.""" manager = SplitManager(split_loader) data = {"col": range(100)} @@ -271,7 +271,7 @@ def test_dataset_processor_init( def test_dataset_processor_apply_split( dump_single_split_config_to_disk: str, titanic_csv_path: str, - split_loader: experiments.SplitLoader, + split_loader: loaders.SplitLoader, ) -> None: """Test applying splits in DatasetProcessor.""" processor = DatasetProcessor( @@ -288,7 +288,7 @@ def test_dataset_processor_apply_split( def test_dataset_processor_apply_transformation_group( dump_single_split_config_to_disk: str, titanic_csv_path: str, - transform_loader: experiments.TransformLoader, + transform_loader: loaders.TransformLoader, ) -> None: """Test applying transformation groups.""" processor = DatasetProcessor( @@ -318,7 +318,7 @@ def test_dataset_processor_apply_transformation_group( def test_dataset_loader_init( dump_single_split_config_to_disk: str, titanic_csv_path: str, - encoder_loader: experiments.EncoderLoader, + encoder_loader: loaders.EncoderLoader, ) -> None: """Test initialization of DatasetLoader.""" loader = DatasetLoader( @@ -336,7 +336,7 @@ def test_dataset_loader_init( def test_dataset_loader_get_dataset( dump_single_split_config_to_disk: str, titanic_csv_path: str, - encoder_loader: experiments.EncoderLoader, + encoder_loader: loaders.EncoderLoader, ) -> None: """Test getting dataset from loader.""" loader = DatasetLoader( diff --git a/tests/data/test_experiment.py b/tests/data/test_experiment.py index da786677..15954217 100644 --- a/tests/data/test_experiment.py +++ b/tests/data/test_experiment.py @@ -3,7 +3,7 @@ import pytest import yaml -from stimulus.data import experiments +from stimulus.data import loaders from stimulus.data.encoding.encoders import AbstractEncoder from stimulus.data.splitters import splitters from stimulus.data.transform import data_transformation_generators @@ -74,7 +74,7 @@ def test_get_encoder(text_onehot_encoder_params: tuple[str, dict[str, str]]) -> Args: text_onehot_encoder_params: Tuple of encoder name and parameters """ - experiment = experiments.EncoderLoader() + experiment = loaders.EncoderLoader() encoder_name, encoder_params = text_onehot_encoder_params encoder = experiment.get_encoder(encoder_name, encoder_params) assert isinstance(encoder, AbstractEncoder) @@ -86,7 +86,7 @@ def test_set_encoder_as_attribute(text_onehot_encoder_params: tuple[str, dict[st Args: text_onehot_encoder_params: Tuple of encoder name and parameters """ - experiment = experiments.EncoderLoader() + experiment = loaders.EncoderLoader() encoder_name, encoder_params = text_onehot_encoder_params encoder = experiment.get_encoder(encoder_name, encoder_params) experiment.set_encoder_as_attribute("ciao", encoder) @@ -101,7 +101,7 @@ def test_build_experiment_class_encoder_dict(dna_experiment_sub_yaml: yaml_data. Args: dna_experiment_sub_yaml: DNA experiment sub-configuration """ - experiment = experiments.EncoderLoader() + experiment = loaders.EncoderLoader() config = dna_experiment_sub_yaml.columns experiment.initialize_column_encoders_from_config(config) assert hasattr(experiment, "hello") @@ -113,14 +113,14 @@ def test_build_experiment_class_encoder_dict(dna_experiment_sub_yaml: yaml_data. def test_get_data_transformer() -> None: """Test the get_data_transformer method of the TransformLoader class.""" - experiment = experiments.TransformLoader() + experiment = loaders.TransformLoader() transformer = experiment.get_data_transformer("ReverseComplement") assert isinstance(transformer, data_transformation_generators.ReverseComplement) def test_set_data_transformer_as_attribute() -> None: """Test the set_data_transformer_as_attribute method.""" - experiment = experiments.TransformLoader() + experiment = loaders.TransformLoader() transformer = experiment.get_data_transformer("ReverseComplement") experiment.set_data_transformer_as_attribute("col1", transformer) assert hasattr(experiment, "col1") @@ -135,7 +135,7 @@ def test_initialize_column_data_transformers_from_config( Args: dna_experiment_sub_yaml: DNA experiment sub-configuration """ - experiment = experiments.TransformLoader() + experiment = loaders.TransformLoader() config = dna_experiment_sub_yaml.transforms experiment.initialize_column_data_transformers_from_config(config) @@ -152,7 +152,7 @@ def test_initialize_splitter_from_config( Args: dna_experiment_sub_yaml: DNA experiment sub-configuration """ - experiment = experiments.SplitLoader() + experiment = loaders.SplitLoader() config = dna_experiment_sub_yaml.split experiment.initialize_splitter_from_config(config) assert hasattr(experiment, "split") diff --git a/tests/data/test_handlertorch.py b/tests/data/test_handlertorch.py index 4e4b20ca..d121ed39 100644 --- a/tests/data/test_handlertorch.py +++ b/tests/data/test_handlertorch.py @@ -5,7 +5,7 @@ import pytest import yaml -from stimulus.data import experiments, handlertorch +from stimulus.data import handlertorch, loaders from stimulus.utils import yaml_data @@ -44,9 +44,9 @@ def titanic_yaml_config(titanic_config_path: str) -> dict: @pytest.fixture -def titanic_encoder_loader(titanic_yaml_config: yaml_data.YamlSubConfigDict) -> experiments.EncoderLoader: +def titanic_encoder_loader(titanic_yaml_config: yaml_data.YamlSubConfigDict) -> loaders.EncoderLoader: """Get Titanic encoder loader.""" - loader = experiments.EncoderLoader() + loader = loaders.EncoderLoader() loader.initialize_column_encoders_from_config(titanic_yaml_config.columns) return loader @@ -54,7 +54,7 @@ def titanic_encoder_loader(titanic_yaml_config: yaml_data.YamlSubConfigDict) -> def test_init_handlertorch( titanic_config_path: str, titanic_csv_path: str, - titanic_encoder_loader: experiments.EncoderLoader, + titanic_encoder_loader: loaders.EncoderLoader, ) -> None: """Test TorchDataset initialization.""" handlertorch.TorchDataset( @@ -67,7 +67,7 @@ def test_init_handlertorch( def test_len_handlertorch( titanic_config_path: str, titanic_csv_path: str, - titanic_encoder_loader: experiments.EncoderLoader, + titanic_encoder_loader: loaders.EncoderLoader, ) -> None: """Test length functionality of TorchDataset. @@ -87,7 +87,7 @@ def test_len_handlertorch( def test_getitem_handlertorch_slice( titanic_config_path: str, titanic_csv_path: str, - titanic_encoder_loader: experiments.EncoderLoader, + titanic_encoder_loader: loaders.EncoderLoader, ) -> None: """Test slice indexing functionality of TorchDataset. @@ -108,7 +108,7 @@ def test_getitem_handlertorch_slice( def test_getitem_handlertorch_int( titanic_config_path: str, titanic_csv_path: str, - titanic_encoder_loader: experiments.EncoderLoader, + titanic_encoder_loader: loaders.EncoderLoader, ) -> None: """Test integer indexing functionality of TorchDataset. From 81bf5d2b2d3bfe87f8cb42ec4279b42534bf399a Mon Sep 17 00:00:00 2001 From: Igor Trujnara Date: Wed, 29 Jan 2025 15:57:59 +0100 Subject: [PATCH 3/3] test (typing): Add tests for typing. Add tests for the typing module. --- tests/typing/__init__.py | 1 + tests/typing/test_typing.py | 94 +++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 tests/typing/__init__.py create mode 100644 tests/typing/test_typing.py diff --git a/tests/typing/__init__.py b/tests/typing/__init__.py new file mode 100644 index 00000000..a987b788 --- /dev/null +++ b/tests/typing/__init__.py @@ -0,0 +1 @@ +"""Test directory for the typing module.""" diff --git a/tests/typing/test_typing.py b/tests/typing/test_typing.py new file mode 100644 index 00000000..fed2f568 --- /dev/null +++ b/tests/typing/test_typing.py @@ -0,0 +1,94 @@ +"""The test suite for the typing module. + +As the typing module only contains types, the tests only check imports. +""" +# ruff: noqa: F401 + +import pytest + + +def test_analysis_types() -> None: + """Test the analysis types.""" + try: + from stimulus.typing import Analysis, AnalysisPerformanceTune, AnalysisRobustness + except ImportError: + pytest.fail("Failed to import Analysis types") + + +def test_data_handlers_types() -> None: + """Test the data handlers types.""" + try: + from stimulus.typing import ( + DatasetHandler, + DatasetLoader, + DatasetManager, + DatasetProcessor, + EncodeManager, + SplitManager, + TransformManager, + ) + except ImportError: + pytest.fail("Failed to import Data Handlers types") + + +def test_learner_types() -> None: + """Test the learner types.""" + try: + from stimulus.typing import ( + PredictWrapper, + RayTuneMetrics, + RayTuneOptimizer, + RayTuneResult, + TuneModel, + TuneParser, + TuneWrapper, + ) + except ImportError: + pytest.fail("Failed to import Learner types") + + +def test_yaml_data_types() -> None: + """Test the YAML data types.""" + try: + from stimulus.typing import ( + YamlColumns, + YamlColumnsEncoder, + YamlConfigDict, + YamlGlobalParams, + YamlSchema, + YamlSplit, + YamlSubConfigDict, + YamlTransform, + YamlTransformColumns, + YamlTransformColumnsTransformation, + ) + except ImportError: + pytest.fail("Failed to import YAML Data types") + + +def test_yaml_model_schema_types() -> None: + """Test the YAML model schema types.""" + try: + from stimulus.typing import ( + CustomTunableParameter, + Data, + Loss, + Model, + RayTuneModel, + RunParams, + Scheduler, + TunableParameter, + Tune, + TuneParams, + YamlRayConfigLoader, + ) + except ImportError: + pytest.fail("Failed to import YAML Model Schema types") + + +def test_type_aliases() -> None: + """Test the type aliases.""" + try: + from stimulus.typing import DataManager, Loader, RayTuneData, YamlData + except ImportError: + pytest.fail("Failed to import Type Aliases")