Skip to content

Commit

Permalink
Merge branch 'master' into feature/transformers_integration
Browse files Browse the repository at this point in the history
  • Loading branch information
meriemjebali authored Jan 26, 2025
2 parents 5e637a4 + b205b4a commit 1f65ab3
Show file tree
Hide file tree
Showing 30 changed files with 1,701 additions and 1,136 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ repos:
- id: flake8
additional_dependencies: [ flake8-docstrings, "flake8-bugbear==22.8.23" ]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.11
hooks:
- id: ruff
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.11
hooks:
- id: ruff

- repo: https://github.com/pre-commit/mirrors-isort
rev: v5.10.1
Expand Down
16 changes: 3 additions & 13 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,15 @@ Next, the Melusine team, or the community, will give you a feedback on whether y

## Fork to code in your personal Melusine repo

The first step is to get our MAIF repository on your personal GitHub repositories. To do so, use the "Fork" button.
The first step is to get our MAIF repository on your personal GitHub repositories. To do so, use the "Fork" button on Github landing page of melusine project.

<img src="https://raw.githubusercontent.com/MAIF/melusine/master/docs/assets/images/contributing/fork_melusine.PNG" alt="fork this repository" />

## Clone your forked repository

<img align="right" width="300" src="https://raw.githubusercontent.com/MAIF/melusine/master/docs/assets/images/contributing/clone_melusine.PNG" alt="clone your forked repository" />

Click on the "Code" button to copy the url of your repository, and next, you can paste this url to clone your forked repository.

```
git clone https://github.com/YOUR_GITHUB_PROFILE/melusine.git
git clone https://github.com/<YOUR_GITHUB_PROFILE>/melusine.git
```

## Make sure that your repository is up-to-date
Expand Down Expand Up @@ -137,9 +134,7 @@ Your branch is now available on your remote forked repository, with your changes

A pull request allows you to ask the Melusine team to review your changes, and merge your changes into the master branch of the official repository.

To create one, on the top of your forked repository, you will find a button "Compare & pull request"

<img src="https://raw.githubusercontent.com/MAIF/melusine/master/docs/assets/images/contributing/melusine-compare-pr.png" alt="pull request" />
To create one, on the top of your forked repository, you will find a button "Compare & pull request".

As you can see, you can select on the right side which branch of your forked repository you want to associate to the pull request.

Expand All @@ -150,12 +145,7 @@ On the left side, you will find the official Melusine repository. Due to increas
- Head repository: your-github-username/melusine
- Head branch: your-contribution-branch

<img src="https://raw.githubusercontent.com/MAIF/melusine/master/docs/assets/images/contributing/melusine-pr-branch.png" alt="clone your forked repository" />

Once you have selected the right branch, let's create the pull request with the green button "Create pull request".

<img src="https://raw.githubusercontent.com/MAIF/melusine/master/docs/assets/images/contributing/melusine-pr-description.png" alt="clone your forked repository" />

In the description, a template is initialized with all information you have to give about what you are doing on what your PR is doing.

Please follow this to write your PR content.
Expand Down
4 changes: 0 additions & 4 deletions docs/tutorials/08_MelusineRegex.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ from melusine.base import MelusineRegex


class AnnoyingEmailsRegex(MelusineRegex):

@property
def positive(self) -> Union[str, Dict[str, str]]:
return dict(
Expand Down Expand Up @@ -65,7 +64,6 @@ from melusine.base import MelusineRegex


class AnnoyingEmailsRegex(MelusineRegex):

@property
def positive(self) -> Union[str, Dict[str, str]]:
return dict(
Expand Down Expand Up @@ -192,7 +190,6 @@ from melusine.base import MelusineRegex


class AnnoyingEmailsRegex(MelusineRegex):

@property
def positive(self) -> Union[str, Dict[str, str]]:
return dict(
Expand Down Expand Up @@ -237,7 +234,6 @@ That is were neutral regex can be of use. Whenever a neutral regex is matched, i

```python
class IfritAlertRegex(MelusineRegex):

@property
def positive(self) -> Union[str, Dict[str, str]]:
return dict(
Expand Down
2 changes: 1 addition & 1 deletion melusine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

__all__ = ["config"]

VERSION = (3, 1, 0)
VERSION = (3, 2, 0)
__version__ = ".".join(map(str, VERSION))

# ------------------------------- #
Expand Down
36 changes: 35 additions & 1 deletion melusine/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from sklearn.base import BaseEstimator, TransformerMixin

from melusine.backend import backend
from melusine.io import IoMixin
from melusine.io_mixin import IoMixin

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -100,6 +100,23 @@ def parse_column_list(columns: str | Iterable[str]) -> list[str]:
columns = [columns]
return list(columns)

def fit(self, X: MelusineDataset, y: Any = None) -> MelusineTransformer:
"""A reference implementation of a fitting function.
Parameters
----------
X : The training input samples.
y : The target values (class labels in classification, real numbers in
regression).
Returns
-------
self : object
Returns self.
"""
return self

def transform(self, data: MelusineDataset) -> MelusineDataset:
"""
Transform input data.
Expand Down Expand Up @@ -197,6 +214,23 @@ def transform_methods(self) -> list[Callable]:
List of methods to be called by the transform method.
"""

def fit(self, X: MelusineDataset, y: Any = None) -> MelusineTransformer:
"""A reference implementation of a fitting function.
Parameters
----------
X : The training input samples.
y : The target values (class labels in classification, real numbers in
regression).
Returns
-------
self : object
Returns self.
"""
return self

def transform(self, df: MelusineDataset) -> MelusineDataset:
"""
Re-definition of super().transform() => specific detector's implementation
Expand Down
3 changes: 3 additions & 0 deletions melusine/conf/pipelines/demo_pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ demo_pipeline:
- class_name: ContentTagger
config_key: content_tagger
module: melusine.processors
- class_name: RefinedTagger
config_key: refined_tagger
module: melusine.processors
- class_name: TextExtractor
config_key: text_extractor
module: melusine.processors
Expand Down
3 changes: 3 additions & 0 deletions melusine/conf/pipelines/preprocessing_pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ preprocessing_pipeline:
- class_name: ContentTagger
config_key: content_tagger
module: melusine.processors
- class_name: RefinedTagger
config_key: refined_tagger
module: melusine.processors
- class_name: TransferredEmailProcessor
config_key: transferred_email_processor
module: melusine.processors
Expand Down
2 changes: 2 additions & 0 deletions melusine/conf/processors/refined_tagger.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
refined_tagger:
default_tag: BODY
22 changes: 4 additions & 18 deletions melusine/detectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"""

from typing import Any, Dict, List, Tuple
from typing import Any, Dict, List

from melusine.base import MelusineDetector, MelusineItem, MelusineRegex
from melusine.message import Message
Expand Down Expand Up @@ -95,19 +95,12 @@ def pre_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineIte
target_tags={self.BODY_PART}, stop_at={self.GREETINGS_PART}
)

# Extract the THANKS part in the last message
thanks_parts: List[Tuple[str, str]] = row[self.messages_column][0].extract_parts(target_tags={self.THANKS_PART})

# Compute THANKS text
if not thanks_parts:
thanks_text: str = ""
else:
thanks_text = "\n".join(x[1] for x in thanks_parts)
# Extract the THANKS text in the last message
thanks_text = row[self.messages_column][0].extract_text(target_tags={self.THANKS_PART})

# Save debug data
if debug_mode:
debug_dict = {
self.THANKS_PARTS_COL: thanks_parts,
self.THANKS_TEXT_COL: thanks_text,
self.HAS_BODY: has_body,
}
Expand Down Expand Up @@ -236,20 +229,13 @@ def pre_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineIte
"""
# Last message body
last_message: Message = row[self.messages_column][0]
body_parts = last_message.extract_last_body()

if body_parts:
row[self.CONST_TEXT_COL_NAME] = "\n".join(text for tag, text in body_parts)
else:
row[self.CONST_TEXT_COL_NAME] = ""
row[self.CONST_TEXT_COL_NAME] = last_message.extract_text(target_tags=("BODY",), stop_at=("GREETINGS",))

# Prepare and save debug data
if debug_mode:
debug_dict: Dict[str, Any] = {
self.CONST_DEBUG_TEXT_KEY: row[self.CONST_TEXT_COL_NAME],
}
if self.messages_column:
debug_dict[self.CONST_DEBUG_PARTS_KEY] = body_parts
row[self.debug_dict_col].update(debug_dict)

return row
Expand Down
2 changes: 1 addition & 1 deletion melusine/io/__init__.py → melusine/io_mixin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
The melusine.io module includes classes for input/output data.
"""

from melusine.io._classes import IoMixin
from melusine.io_mixin._classes import IoMixin

__all__ = ["IoMixin"]
4 changes: 0 additions & 4 deletions melusine/io/_classes.py → melusine/io_mixin/_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ class IoMixin:
Defines generic load methods.
"""

def __init__(self, **kwargs: Any):
"""Initialize attribute."""
self.json_exclude_list: list[str] = ["_func", "json_exclude_list"]

@classmethod
def from_config(
cls: type[T],
Expand Down
Loading

0 comments on commit 1f65ab3

Please sign in to comment.