Skip to content

Commit 44b8a63

Browse files
committed
❇️ Add hooks to MelusineRegex
1 parent a265c56 commit 44b8a63

File tree

2 files changed

+108
-6
lines changed

2 files changed

+108
-6
lines changed

melusine/base.py

+61
Original file line numberDiff line numberDiff line change
@@ -308,12 +308,16 @@ class MissingFieldError(Exception):
308308
"""
309309

310310

311+
MatchData = dict[str, list[dict[str, Any]]]
312+
313+
311314
class MelusineRegex(ABC):
312315
"""
313316
Class to standardise text pattern detection using regex.
314317
"""
315318

316319
REGEX_FLAGS: re.RegexFlag = re.IGNORECASE | re.MULTILINE
320+
PAIRED_MATCHING_PREFIX: str = "_"
317321

318322
# Match fields
319323
MATCH_RESULT: str = "match_result"
@@ -563,6 +567,63 @@ def _describe_match_field(match_field_data: dict[str, list[dict[str, Any]]]) ->
563567
print("The following text matched positively:")
564568
_describe_match_field(match_data[self.POSITIVE_MATCH_FIELD])
565569

570+
def apply_paired_matching(self, negative_match_data: MatchData, positive_match_data: MatchData) -> bool:
571+
"""
572+
Check if negative match is effective in the case of paired matching.
573+
574+
Args:
575+
negative_match_data: negative_match_data
576+
positive_match_data: positive_match_data
577+
578+
Returns:
579+
effective_negative_match: negative_match adapted for paired matching
580+
"""
581+
effective_negative_match = False
582+
if positive_match_data and negative_match_data:
583+
positive_match_keys = set(positive_match_data.keys())
584+
585+
for key in negative_match_data:
586+
if key.startswith(self.PAIRED_MATCHING_PREFIX):
587+
if key[1:] in positive_match_keys:
588+
effective_negative_match = True
589+
else:
590+
effective_negative_match = True
591+
592+
return effective_negative_match
593+
594+
def pre_match_hook(self, text: str) -> str:
595+
"""
596+
Hook to run before the Melusine regex match.
597+
598+
Args:
599+
text: input text.
600+
601+
Returns:
602+
_: Modified text.
603+
"""
604+
return text
605+
606+
def post_match_hook(self, match_dict: dict[str, Any]) -> dict[str, Any]:
607+
"""
608+
Hook to run after the Melusine regex match.
609+
610+
Args:
611+
match_dict: Match results.
612+
613+
Returns:
614+
_: Modified match results.
615+
"""
616+
617+
# Paired matching
618+
negative_match = self.apply_paired_matching(
619+
match_dict[self.NEGATIVE_MATCH_FIELD], match_dict[self.POSITIVE_MATCH_FIELD]
620+
)
621+
positive_match = bool(match_dict[self.POSITIVE_MATCH_FIELD])
622+
623+
match_dict[self.MATCH_RESULT] = positive_match and not negative_match
624+
625+
return match_dict
626+
566627
def test(self) -> None:
567628
"""
568629
Test the MelusineRegex on the match_list and no_match_list.

tests/base/test_melusine_regex.py

+47-6
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,12 @@ def no_match_list(self) -> List[str]:
4646

4747
def test_erroneous_substitution_pattern():
4848
with pytest.raises(ValueError):
49-
regex = VirusRegex(substitution_pattern="12345")
49+
_ = VirusRegex(substitution_pattern="12345")
5050

5151

5252
def test_method_test():
5353
regex = VirusRegex()
5454
regex.test()
55-
assert True
5655

5756

5857
def test_match_method():
@@ -94,7 +93,7 @@ def test_describe_method(capfd):
9493

9594
# Negative match on bug (group NEGATIVE_BUG) and ignore ladybug and corona virus
9695
regex.describe("The computer virus in the ladybug software caused a bug in the corona virus dashboard")
97-
out, err = capfd.readouterr()
96+
out, _ = capfd.readouterr()
9897
assert "NEGATIVE_BUG" in out
9998
assert "start" not in out
10099

@@ -103,18 +102,18 @@ def test_describe_method(capfd):
103102
"The computer virus in the ladybug software caused a bug in the corona virus dashboard",
104103
position=True,
105104
)
106-
out, err = capfd.readouterr()
105+
out, _ = capfd.readouterr()
107106
assert "match result is : NEGATIVE" in out
108107
assert "NEGATIVE_BUG" in out
109108
assert "start" in out
110109

111110
regex.describe("This is a dangerous virus")
112-
out, err = capfd.readouterr()
111+
out, _ = capfd.readouterr()
113112
assert "match result is : POSITIVE" in out
114113
assert "start" not in out
115114

116115
regex.describe("Nada")
117-
out, err = capfd.readouterr()
116+
out, _ = capfd.readouterr()
118117
assert "The input text did not match anything" in out
119118

120119

@@ -151,3 +150,45 @@ def no_match_list(self):
151150
regex = SomeRegex()
152151
assert regex.neutral is None
153152
assert regex.negative is None
153+
154+
155+
class PairedMatchRegex(MelusineRegex):
156+
"""
157+
Test paired matching.
158+
"""
159+
160+
@property
161+
def positive(self) -> Union[str, Dict[str, str]]:
162+
return {
163+
"test_1": r"pos_pattern_1",
164+
"test_2": r"pos_pattern_2",
165+
}
166+
167+
@property
168+
def negative(self) -> Optional[Union[str, Dict[str, str]]]:
169+
return {
170+
"_test_1": r"neg_pattern_1",
171+
"generic": r"neg_pattern_2",
172+
}
173+
174+
@property
175+
def match_list(self) -> List[str]:
176+
return [
177+
"Test pos_pattern_1",
178+
"pos_pattern_2",
179+
"pos_pattern_2 and neg_pattern_1",
180+
]
181+
182+
@property
183+
def no_match_list(self) -> List[str]:
184+
return [
185+
"test",
186+
"Test pos_pattern_1 and neg_pattern_1",
187+
"pos_pattern_2 and neg_pattern_2",
188+
"pos_pattern_1 and neg_pattern_2",
189+
]
190+
191+
192+
def test_paired_matching_test():
193+
regex = PairedMatchRegex()
194+
regex.test()

0 commit comments

Comments
 (0)