diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2a0fb7d..a91edc5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -43,10 +43,10 @@ repos:
- id: flake8
additional_dependencies: [ flake8-docstrings, "flake8-bugbear==22.8.23" ]
-- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.1.11
- hooks:
- - id: ruff
+- repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.1.11
+ hooks:
+ - id: ruff
- repo: https://github.com/pre-commit/mirrors-isort
rev: v5.10.1
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c1c99a1..f28e174 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -28,18 +28,15 @@ Next, the Melusine team, or the community, will give you a feedback on whether y
## Fork to code in your personal Melusine repo
-The first step is to get our MAIF repository on your personal GitHub repositories. To do so, use the "Fork" button.
+The first step is to get our MAIF repository on your personal GitHub repositories. To do so, use the "Fork" button on Github landing page of melusine project.
-
## Clone your forked repository
-
-
Click on the "Code" button to copy the url of your repository, and next, you can paste this url to clone your forked repository.
```
-git clone https://github.com/YOUR_GITHUB_PROFILE/melusine.git
+git clone https://github.com//melusine.git
```
## Make sure that your repository is up-to-date
@@ -137,9 +134,7 @@ Your branch is now available on your remote forked repository, with your changes
A pull request allows you to ask the Melusine team to review your changes, and merge your changes into the master branch of the official repository.
-To create one, on the top of your forked repository, you will find a button "Compare & pull request"
-
-
+To create one, on the top of your forked repository, you will find a button "Compare & pull request".
As you can see, you can select on the right side which branch of your forked repository you want to associate to the pull request.
@@ -150,12 +145,7 @@ On the left side, you will find the official Melusine repository. Due to increas
- Head repository: your-github-username/melusine
- Head branch: your-contribution-branch
-
-
Once you have selected the right branch, let's create the pull request with the green button "Create pull request".
-
-
-
In the description, a template is initialized with all information you have to give about what you are doing on what your PR is doing.
Please follow this to write your PR content.
diff --git a/melusine/__init__.py b/melusine/__init__.py
index e33be5c..49745e1 100644
--- a/melusine/__init__.py
+++ b/melusine/__init__.py
@@ -8,7 +8,7 @@
__all__ = ["config"]
-VERSION = (3, 1, 0)
+VERSION = (3, 1, 1)
__version__ = ".".join(map(str, VERSION))
# ------------------------------- #
diff --git a/melusine/pipeline.py b/melusine/pipeline.py
index bead973..3a3ed10 100644
--- a/melusine/pipeline.py
+++ b/melusine/pipeline.py
@@ -8,6 +8,7 @@
import copy
import importlib
+import warnings
from typing import Iterable, TypeVar
from sklearn.pipeline import Pipeline
@@ -35,7 +36,7 @@ class MelusinePipeline(Pipeline):
"""
OBJ_NAME: str = "name"
- OBJ_KEY: str = "config_key"
+ OBJ_CONFIG_KEY: str = "config_key"
OBJ_PARAMS: str = "parameters"
STEPS_KEY: str = "steps"
OBJ_CLASS: str = "class_name"
@@ -194,10 +195,10 @@ def from_config(
# Get config dict
if config_key and not config_dict:
raw_config_dict = config[config_key]
- config_dict = cls.parse_pipeline_config(raw_config_dict)
+ config_dict = cls.parse_pipeline_config(config_dict=raw_config_dict)
elif config_dict and not config_key:
- config_dict = cls.parse_pipeline_config(config_dict)
+ config_dict = cls.parse_pipeline_config(config_dict=config_dict)
else:
raise ValueError("You should specify one and only one of 'config_key' and 'config_dict'")
@@ -218,10 +219,33 @@ def from_config(
# Step arguments
obj_params = obj_meta[cls.OBJ_PARAMS]
- if issubclass(obj_class, IoMixin):
+ # Verbose option
+ suppress_warnings: Any = kwargs.pop("suppress_warnings", False)
+
+ try:
obj = obj_class.from_config(config_dict=obj_params)
- else:
- raise TypeError(f"Object {obj_class} does not inherit from the SaverMixin class") # pragma: no cover
+ if not issubclass(obj_class, IoMixin) and not suppress_warnings:
+ type_warn_msg: str = f"""
+ It seems you are not using a melusine object in your melusine pipeline, but object '{obj_class}' (class {type(obj_class)}) for step '{step_name}'.
+ The expected behavior is not guaranteed and can break in future version of melusine.
+
+ Recommended usage:
+ - Exclusive usage of melusine class Pipeline with melusine objects (MelusineTransformer, MelusineDetector...)
+
+ To suppress this warning, instanciate melusine Pipeline with suppress_warnings argument at True (MelusinePipeline.from_config(..., suppress_warnings=True)).
+
+ Visit Melusine Open-Source project: https://github.com/MAIF/melusine and the documentation for more information.
+ """
+ warnings.warn(message=type_warn_msg, category=DeprecationWarning, stacklevel=2)
+ except AttributeError as err:
+ if not hasattr(obj_class, "from_config"):
+ raise AttributeError(
+ f"Object '{obj_class}' does not implement 'from_config' method, maybe consider to inherit it from the Melusine IoMixin class or use a MelusineTransformer class."
+ ) from None
+ else:
+ raise AttributeError(f"Error in loading class: '{obj_class}'\\n{str(err)}").with_traceback(
+ err.__traceback__
+ ) from err
# Add step to pipeline
steps.append((step_name, obj))
@@ -251,17 +275,17 @@ def validate_step_config(cls, step: dict[str, Any]) -> dict[str, Any]:
f"Pipeline step conf should have a {cls.OBJ_MODULE} key and a {cls.OBJ_CLASS} key."
)
- if step.get(cls.OBJ_KEY):
+ if step.get(cls.OBJ_CONFIG_KEY):
return {
cls.OBJ_CLASS: step[cls.OBJ_CLASS],
cls.OBJ_MODULE: step[cls.OBJ_MODULE],
- cls.OBJ_KEY: step[cls.OBJ_KEY],
+ cls.OBJ_CONFIG_KEY: step[cls.OBJ_CONFIG_KEY],
}
if not step.get(cls.OBJ_NAME) or not step.get(cls.OBJ_PARAMS):
raise PipelineConfigurationError(
- f"Pipeline step conf should have a {cls.OBJ_NAME} key and a {cls.OBJ_KEY} key "
- f"(unless a {cls.OBJ_KEY} is specified)."
+ f"Pipeline step conf should have a {cls.OBJ_NAME} key and a {cls.OBJ_CONFIG_KEY} key "
+ f"(unless a {cls.OBJ_CONFIG_KEY} is specified)."
)
if not isinstance(step[cls.OBJ_PARAMS], dict):
@@ -325,11 +349,12 @@ def parse_pipeline_config(cls, config_dict: dict[str, Any]) -> dict[str, Any]:
steps = []
for step in config_dict[cls.STEPS_KEY]:
# Step defined from the config
- config_key = step.get(cls.OBJ_KEY)
+ config_key = step.get(cls.OBJ_CONFIG_KEY)
+ config_name = step.get(cls.OBJ_NAME)
if config_key:
- # Use config key as step name
- step[cls.OBJ_NAME] = config_key
- _ = step.pop(cls.OBJ_KEY)
+ # Use config key as step name if no name is provided in config
+ step[cls.OBJ_NAME] = config_name or config_key
+ _ = step.pop(cls.OBJ_CONFIG_KEY)
# Update step parameters
step[cls.OBJ_PARAMS] = config[config_key]
diff --git a/tests/pipeline/test_pipeline_basic.py b/tests/pipeline/test_pipeline_basic.py
index d2b2f25..73db444 100644
--- a/tests/pipeline/test_pipeline_basic.py
+++ b/tests/pipeline/test_pipeline_basic.py
@@ -1,6 +1,8 @@
"""
Example script to fit a minimal preprocessing pipeline
"""
+from contextlib import nullcontext as does_not_raise
+from unittest import mock
import pandas as pd
import pytest
@@ -34,7 +36,7 @@ def test_pipeline_basic(dataframe_basic):
@pytest.mark.usefixtures("use_test_config")
def test_pipeline_from_config(dataframe_basic):
"""
- Train a pipeline using transformers defined in a pipeline config file.
+ Train a pipeline using scikit-learn transformers defined in a pipeline config file.
"""
# Input data
df = dataframe_basic.copy()
@@ -79,7 +81,7 @@ def test_pipeline_from_config(dataframe_basic):
@pytest.mark.usefixtures("use_test_config")
def test_pipeline_from_dict(dataframe_basic):
"""
- Train a pipeline using transformers defined in a pipeline config file.
+ Train a pipeline using scikit-learn transformers defined in a pipeline config file.
"""
# Input data
df = dataframe_basic.copy()
@@ -125,7 +127,7 @@ def test_pipeline_from_dict(dataframe_basic):
@pytest.mark.usefixtures("use_test_config")
def test_missing_config_key():
"""
- Train a pipeline using transformers defined in a pipeline config file.
+ Train a pipeline using scikit-learn transformers defined in a pipeline config file.
"""
# Set config keys
normalizer_name = "normalizer"
@@ -159,7 +161,7 @@ def test_missing_config_key():
@pytest.mark.usefixtures("use_test_config")
def test_invalid_config_key():
"""
- Train a pipeline using transformers defined in a pipeline config file.
+ Train a pipeline using scikit-learn transformers defined in a pipeline config file.
"""
incorrect_config_key = "INCORRECT_CONFIG_KEY"
@@ -263,7 +265,7 @@ def test_invalid_config_key():
)
def test_pipeline_config_error(pipeline_conf):
"""
- Train a pipeline using transformers defined in a pipeline config file.
+ Train a pipeline using scikit-learn transformers defined in a pipeline config file.
"""
# Create pipeline from a json config file (using config key "my_pipeline")
with pytest.raises(PipelineConfigurationError):
@@ -313,3 +315,117 @@ def test_missing_input_field(dataframe_basic):
# Fit the pipeline and transform the data
with pytest.raises(ValueError, match=rf"(?s){tokenizer_name}.*{missing_field_name}"):
pipe.transform(df)
+
+
+@pytest.mark.usefixtures("use_test_config")
+def test_pipeline_from_config_with_error():
+ """
+ Instanciate a Melusine Pipeline not using scikit-learn transformers defined in a pipeline config file (raising errors).
+ """
+ # Set config keys
+ normalizer_key = "test_normalizer"
+ pipeline_key = "test_pipeline"
+
+ # Pipeline configuration
+ conf_pipeline_basic = {
+ "steps": [
+ {
+ "class_name": "Normalizer",
+ "module": "melusine.processors",
+ "config_key": normalizer_key,
+ },
+ ]
+ }
+
+ test_conf_dict = config.dict()
+ test_conf_dict[pipeline_key] = conf_pipeline_basic
+ config.reset(config_dict=test_conf_dict)
+
+ with mock.patch("melusine.pipeline.MelusinePipeline.import_class") as mock_import_class, pytest.raises(
+ AttributeError, match=r"Object '.+' does not implement 'from_config'.+to inherit it from the Melusine IoMixin.+"
+ ):
+
+ class WrongNormalizerWithoutMethodFromConfig:
+ def __init__(self, form, input_columns, lowercase, output_columns) -> None:
+ """Mock class without from_config method so we raise error."""
+ return None
+
+ mock_import_class.return_value = WrongNormalizerWithoutMethodFromConfig
+
+ # Create pipeline from a json config file
+ _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True)
+
+ with mock.patch("melusine.pipeline.MelusinePipeline.import_class") as mock_import_class, pytest.raises(
+ AttributeError
+ ) as e:
+
+ class WrongNormalizerWithMethodFromConfig:
+ def __init__(self) -> None:
+ return None
+
+ @classmethod
+ def from_config(cls, config_dict: dict):
+ """Mock method from_config so we have a valid 'duck typed' class but with attribute error."""
+
+ return cls.class_constant_not_defined_raising_attribute_error
+
+ mock_import_class.return_value = WrongNormalizerWithMethodFromConfig
+
+ # Create pipeline from a json config file
+ _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True)
+
+ assert "does not implement 'from_config'" not in str(e)
+
+
+@pytest.mark.usefixtures("use_test_config")
+def test_pipeline_from_config_with_warning(recwarn):
+ """
+ Instanciate a Melusine Pipeline not using scikit-learn transformers defined in a pipeline config file (raising warnings).
+ """
+ # Set config keys
+ normalizer_key = "test_normalizer"
+ pipeline_key = "test_pipeline"
+
+ # Pipeline configuration
+ conf_pipeline_basic = {
+ "steps": [
+ {
+ "class_name": "Normalizer",
+ "module": "melusine.processors",
+ "config_key": normalizer_key,
+ },
+ ]
+ }
+
+ test_conf_dict = config.dict()
+ test_conf_dict[pipeline_key] = conf_pipeline_basic
+ config.reset(config_dict=test_conf_dict)
+
+ with mock.patch("melusine.pipeline.MelusinePipeline.import_class") as mock_import_class:
+
+ class WrongNormalizerWithMethodFromConfig:
+ def __init__(self) -> None:
+ return None
+
+ @classmethod
+ def from_config(cls, config_dict: dict):
+ """Mock method from_config so we have a valid 'duck typed' class but with invalid functional behavior."""
+ return cls
+
+ mock_import_class.return_value = WrongNormalizerWithMethodFromConfig
+
+ with does_not_raise():
+ # Create pipeline from a json config file and suppress warning
+ _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True, suppress_warnings=True)
+
+ assert len(recwarn) == 0
+
+ _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True, suppress_warnings=False)
+
+ assert len(recwarn) == 1
+
+ with pytest.warns(
+ DeprecationWarning, match=r".+It seems you are not using a melusine object in your melusine pipeline.+"
+ ):
+ # Create pipeline from a json config file
+ _ = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True, suppress_warnings=False)