diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2a0fb7d..a91edc5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,10 +43,10 @@ repos: - id: flake8 additional_dependencies: [ flake8-docstrings, "flake8-bugbear==22.8.23" ] -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.11 - hooks: - - id: ruff +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.11 + hooks: + - id: ruff - repo: https://github.com/pre-commit/mirrors-isort rev: v5.10.1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c1c99a1..f28e174 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -28,18 +28,15 @@ Next, the Melusine team, or the community, will give you a feedback on whether y ## Fork to code in your personal Melusine repo -The first step is to get our MAIF repository on your personal GitHub repositories. To do so, use the "Fork" button. +The first step is to get our MAIF repository on your personal GitHub repositories. To do so, use the "Fork" button on Github landing page of melusine project. -fork this repository ## Clone your forked repository -clone your forked repository - Click on the "Code" button to copy the url of your repository, and next, you can paste this url to clone your forked repository. ``` -git clone https://github.com/YOUR_GITHUB_PROFILE/melusine.git +git clone https://github.com//melusine.git ``` ## Make sure that your repository is up-to-date @@ -137,9 +134,7 @@ Your branch is now available on your remote forked repository, with your changes A pull request allows you to ask the Melusine team to review your changes, and merge your changes into the master branch of the official repository. -To create one, on the top of your forked repository, you will find a button "Compare & pull request" - -pull request +To create one, on the top of your forked repository, you will find a button "Compare & pull request". As you can see, you can select on the right side which branch of your forked repository you want to associate to the pull request. @@ -150,12 +145,7 @@ On the left side, you will find the official Melusine repository. Due to increas - Head repository: your-github-username/melusine - Head branch: your-contribution-branch -clone your forked repository - Once you have selected the right branch, let's create the pull request with the green button "Create pull request". - -clone your forked repository - In the description, a template is initialized with all information you have to give about what you are doing on what your PR is doing. Please follow this to write your PR content. diff --git a/melusine/__init__.py b/melusine/__init__.py index e33be5c..49745e1 100644 --- a/melusine/__init__.py +++ b/melusine/__init__.py @@ -8,7 +8,7 @@ __all__ = ["config"] -VERSION = (3, 1, 0) +VERSION = (3, 1, 1) __version__ = ".".join(map(str, VERSION)) # ------------------------------- # diff --git a/melusine/pipeline.py b/melusine/pipeline.py index bead973..3a3ed10 100644 --- a/melusine/pipeline.py +++ b/melusine/pipeline.py @@ -8,6 +8,7 @@ import copy import importlib +import warnings from typing import Iterable, TypeVar from sklearn.pipeline import Pipeline @@ -35,7 +36,7 @@ class MelusinePipeline(Pipeline): """ OBJ_NAME: str = "name" - OBJ_KEY: str = "config_key" + OBJ_CONFIG_KEY: str = "config_key" OBJ_PARAMS: str = "parameters" STEPS_KEY: str = "steps" OBJ_CLASS: str = "class_name" @@ -194,10 +195,10 @@ def from_config( # Get config dict if config_key and not config_dict: raw_config_dict = config[config_key] - config_dict = cls.parse_pipeline_config(raw_config_dict) + config_dict = cls.parse_pipeline_config(config_dict=raw_config_dict) elif config_dict and not config_key: - config_dict = cls.parse_pipeline_config(config_dict) + config_dict = cls.parse_pipeline_config(config_dict=config_dict) else: raise ValueError("You should specify one and only one of 'config_key' and 'config_dict'") @@ -218,10 +219,33 @@ def from_config( # Step arguments obj_params = obj_meta[cls.OBJ_PARAMS] - if issubclass(obj_class, IoMixin): + # Verbose option + suppress_warnings: Any = kwargs.pop("suppress_warnings", False) + + try: obj = obj_class.from_config(config_dict=obj_params) - else: - raise TypeError(f"Object {obj_class} does not inherit from the SaverMixin class") # pragma: no cover + if not issubclass(obj_class, IoMixin) and not suppress_warnings: + type_warn_msg: str = f""" + It seems you are not using a melusine object in your melusine pipeline, but object '{obj_class}' (class {type(obj_class)}) for step '{step_name}'. + The expected behavior is not guaranteed and can break in future version of melusine. + + Recommended usage: + - Exclusive usage of melusine class Pipeline with melusine objects (MelusineTransformer, MelusineDetector...) + + To suppress this warning, instanciate melusine Pipeline with suppress_warnings argument at True (MelusinePipeline.from_config(..., suppress_warnings=True)). + + Visit Melusine Open-Source project: https://github.com/MAIF/melusine and the documentation for more information. + """ + warnings.warn(message=type_warn_msg, category=DeprecationWarning, stacklevel=2) + except AttributeError as err: + if not hasattr(obj_class, "from_config"): + raise AttributeError( + f"Object '{obj_class}' does not implement 'from_config' method, maybe consider to inherit it from the Melusine IoMixin class or use a MelusineTransformer class." + ) from None + else: + raise AttributeError(f"Error in loading class: '{obj_class}'\\n{str(err)}").with_traceback( + err.__traceback__ + ) from err # Add step to pipeline steps.append((step_name, obj)) @@ -251,17 +275,17 @@ def validate_step_config(cls, step: dict[str, Any]) -> dict[str, Any]: f"Pipeline step conf should have a {cls.OBJ_MODULE} key and a {cls.OBJ_CLASS} key." ) - if step.get(cls.OBJ_KEY): + if step.get(cls.OBJ_CONFIG_KEY): return { cls.OBJ_CLASS: step[cls.OBJ_CLASS], cls.OBJ_MODULE: step[cls.OBJ_MODULE], - cls.OBJ_KEY: step[cls.OBJ_KEY], + cls.OBJ_CONFIG_KEY: step[cls.OBJ_CONFIG_KEY], } if not step.get(cls.OBJ_NAME) or not step.get(cls.OBJ_PARAMS): raise PipelineConfigurationError( - f"Pipeline step conf should have a {cls.OBJ_NAME} key and a {cls.OBJ_KEY} key " - f"(unless a {cls.OBJ_KEY} is specified)." + f"Pipeline step conf should have a {cls.OBJ_NAME} key and a {cls.OBJ_CONFIG_KEY} key " + f"(unless a {cls.OBJ_CONFIG_KEY} is specified)." ) if not isinstance(step[cls.OBJ_PARAMS], dict): @@ -325,11 +349,12 @@ def parse_pipeline_config(cls, config_dict: dict[str, Any]) -> dict[str, Any]: steps = [] for step in config_dict[cls.STEPS_KEY]: # Step defined from the config - config_key = step.get(cls.OBJ_KEY) + config_key = step.get(cls.OBJ_CONFIG_KEY) + config_name = step.get(cls.OBJ_NAME) if config_key: - # Use config key as step name - step[cls.OBJ_NAME] = config_key - _ = step.pop(cls.OBJ_KEY) + # Use config key as step name if no name is provided in config + step[cls.OBJ_NAME] = config_name or config_key + _ = step.pop(cls.OBJ_CONFIG_KEY) # Update step parameters step[cls.OBJ_PARAMS] = config[config_key] diff --git a/tests/pipeline/test_pipeline_basic.py b/tests/pipeline/test_pipeline_basic.py index d2b2f25..73db444 100644 --- a/tests/pipeline/test_pipeline_basic.py +++ b/tests/pipeline/test_pipeline_basic.py @@ -1,6 +1,8 @@ """ Example script to fit a minimal preprocessing pipeline """ +from contextlib import nullcontext as does_not_raise +from unittest import mock import pandas as pd import pytest @@ -34,7 +36,7 @@ def test_pipeline_basic(dataframe_basic): @pytest.mark.usefixtures("use_test_config") def test_pipeline_from_config(dataframe_basic): """ - Train a pipeline using transformers defined in a pipeline config file. + Train a pipeline using scikit-learn transformers defined in a pipeline config file. """ # Input data df = dataframe_basic.copy() @@ -79,7 +81,7 @@ def test_pipeline_from_config(dataframe_basic): @pytest.mark.usefixtures("use_test_config") def test_pipeline_from_dict(dataframe_basic): """ - Train a pipeline using transformers defined in a pipeline config file. + Train a pipeline using scikit-learn transformers defined in a pipeline config file. """ # Input data df = dataframe_basic.copy() @@ -125,7 +127,7 @@ def test_pipeline_from_dict(dataframe_basic): @pytest.mark.usefixtures("use_test_config") def test_missing_config_key(): """ - Train a pipeline using transformers defined in a pipeline config file. + Train a pipeline using scikit-learn transformers defined in a pipeline config file. """ # Set config keys normalizer_name = "normalizer" @@ -159,7 +161,7 @@ def test_missing_config_key(): @pytest.mark.usefixtures("use_test_config") def test_invalid_config_key(): """ - Train a pipeline using transformers defined in a pipeline config file. + Train a pipeline using scikit-learn transformers defined in a pipeline config file. """ incorrect_config_key = "INCORRECT_CONFIG_KEY" @@ -263,7 +265,7 @@ def test_invalid_config_key(): ) def test_pipeline_config_error(pipeline_conf): """ - Train a pipeline using transformers defined in a pipeline config file. + Train a pipeline using scikit-learn transformers defined in a pipeline config file. """ # Create pipeline from a json config file (using config key "my_pipeline") with pytest.raises(PipelineConfigurationError): @@ -313,3 +315,117 @@ def test_missing_input_field(dataframe_basic): # Fit the pipeline and transform the data with pytest.raises(ValueError, match=rf"(?s){tokenizer_name}.*{missing_field_name}"): pipe.transform(df) + + +@pytest.mark.usefixtures("use_test_config") +def test_pipeline_from_config_with_error(): + """ + Instanciate a Melusine Pipeline not using scikit-learn transformers defined in a pipeline config file (raising errors). + """ + # Set config keys + normalizer_key = "test_normalizer" + pipeline_key = "test_pipeline" + + # Pipeline configuration + conf_pipeline_basic = { + "steps": [ + { + "class_name": "Normalizer", + "module": "melusine.processors", + "config_key": normalizer_key, + }, + ] + } + + test_conf_dict = config.dict() + test_conf_dict[pipeline_key] = conf_pipeline_basic + config.reset(config_dict=test_conf_dict) + + with mock.patch("melusine.pipeline.MelusinePipeline.import_class") as mock_import_class, pytest.raises( + AttributeError, match=r"Object '.+' does not implement 'from_config'.+to inherit it from the Melusine IoMixin.+" + ): + + class WrongNormalizerWithoutMethodFromConfig: + def __init__(self, form, input_columns, lowercase, output_columns) -> None: + """Mock class without from_config method so we raise error.""" + return None + + mock_import_class.return_value = WrongNormalizerWithoutMethodFromConfig + + # Create pipeline from a json config file + _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True) + + with mock.patch("melusine.pipeline.MelusinePipeline.import_class") as mock_import_class, pytest.raises( + AttributeError + ) as e: + + class WrongNormalizerWithMethodFromConfig: + def __init__(self) -> None: + return None + + @classmethod + def from_config(cls, config_dict: dict): + """Mock method from_config so we have a valid 'duck typed' class but with attribute error.""" + + return cls.class_constant_not_defined_raising_attribute_error + + mock_import_class.return_value = WrongNormalizerWithMethodFromConfig + + # Create pipeline from a json config file + _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True) + + assert "does not implement 'from_config'" not in str(e) + + +@pytest.mark.usefixtures("use_test_config") +def test_pipeline_from_config_with_warning(recwarn): + """ + Instanciate a Melusine Pipeline not using scikit-learn transformers defined in a pipeline config file (raising warnings). + """ + # Set config keys + normalizer_key = "test_normalizer" + pipeline_key = "test_pipeline" + + # Pipeline configuration + conf_pipeline_basic = { + "steps": [ + { + "class_name": "Normalizer", + "module": "melusine.processors", + "config_key": normalizer_key, + }, + ] + } + + test_conf_dict = config.dict() + test_conf_dict[pipeline_key] = conf_pipeline_basic + config.reset(config_dict=test_conf_dict) + + with mock.patch("melusine.pipeline.MelusinePipeline.import_class") as mock_import_class: + + class WrongNormalizerWithMethodFromConfig: + def __init__(self) -> None: + return None + + @classmethod + def from_config(cls, config_dict: dict): + """Mock method from_config so we have a valid 'duck typed' class but with invalid functional behavior.""" + return cls + + mock_import_class.return_value = WrongNormalizerWithMethodFromConfig + + with does_not_raise(): + # Create pipeline from a json config file and suppress warning + _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True, suppress_warnings=True) + + assert len(recwarn) == 0 + + _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True, suppress_warnings=False) + + assert len(recwarn) == 1 + + with pytest.warns( + DeprecationWarning, match=r".+It seems you are not using a melusine object in your melusine pipeline.+" + ): + # Create pipeline from a json config file + _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True, suppress_warnings=False)