Skip to content

Commit 0d715f0

Browse files
committed
Refactor error messages in RegexParser for clarity and consistency
Add new grammar tests for improved coverage Bump to 0.9.2
1 parent 0619033 commit 0d715f0

12 files changed

+131
-23
lines changed

regex_enumerator/regex_parser.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def _parseRegex(self, to_close: bool) -> RegexTree:
4242
if self.index < len(self.regex) and self.regex[self.index] == '?':
4343
self.index += 1
4444
if self.index >= len(self.regex):
45-
self._raise_error("Invalid named group")
45+
self._raise_error("Invalid group")
4646
elif self.regex[self.index] == '<':
4747
self.index += 1
4848
name = ''
@@ -97,11 +97,12 @@ def _parseRegex(self, to_close: bool) -> RegexTree:
9797
continue
9898
if isinstance(reference, str):
9999
if reference not in named_groups:
100-
self._raise_error("Invalid back reference")
100+
self._raise_error("Named back reference not found")
101101
group = named_groups[reference]
102102
else:
103103
if reference < 1 or reference > len(ordered_groups):
104-
self._raise_error("Invalid back reference")
104+
self._raise_error(
105+
"Positional back reference not found")
105106
group = ordered_groups[reference - 1]
106107
min_len, max_len = self._parseQuantifier()
107108
reference = BackReference(
@@ -130,13 +131,13 @@ def _parseBackReferenceLookahead(self) -> str | int | None:
130131
self.index += 1
131132
name = ''
132133
if len(self.regex) <= self.index or self.regex[self.index] != '<':
133-
self._raise_error("Invalid back reference")
134+
self._raise_error("Invalid named back reference")
134135
self.index += 1
135136
while self.index < len(self.regex) and self.regex[self.index] != '>':
136137
name += self.regex[self.index]
137138
self.index += 1
138-
if len(self.regex) <= self.index or self.regex[self.index] != '>':
139-
self._raise_error("Invalid back reference")
139+
if len(self.regex) <= self.index or self.regex[self.index] != '>' or name == '':
140+
self._raise_error("Invalid named back reference")
140141
self.index += 1
141142
return name
142143
case char if char.isdigit():
@@ -169,21 +170,21 @@ def _parseEscapeChar(self) -> str:
169170
case 'f': return '\f'
170171
case 'x':
171172
if len(self.regex) < self.index + 1 or self.regex[self.index] not in self.HEX:
172-
raise ValueError('Invalid escape character')
173+
self._raise_error('Invalid ASCII escape character')
173174
if len(self.regex) < self.index + 2 or self.regex[self.index + 1] not in self.HEX:
174175
num = int(self.regex[self.index], 16)
175176
self.index += 1
176177
else:
177178
num = int(self.regex[self.index: self.index + 2], 16)
178179
self.index += 2
179180
if num < 32 or num > 126:
180-
self._raise_error(f"Invalid escape character {num}")
181+
self._raise_error(f"Invalid ASCII escape character {num}")
181182
return chr(num)
182183
case 'u':
183184
code = []
184185
for _ in range(4):
185186
if len(self.regex) <= self.index or self.regex[self.index] not in self.HEX:
186-
self._raise_error("Invalid escape character")
187+
self._raise_error("Invalid unicode escape character")
187188
code.append(self.regex[self.index])
188189
self.index += 1
189190
num = int(''.join(code), 16)

regex_enumerator/regex_tree.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,8 @@ def __init__(self, alternatives: list[Alternative], min_len: int, max_len: int |
255255
self._index_repetition = 0
256256
self._done_repetition = False
257257
self._current_chars: set[str] = self._calculate_chars()
258-
self.current: set[str] = self._calculate_first() if not self.done else set()
258+
self.current: set[str] = self._calculate_first(
259+
) if not self.done else set()
259260

260261
def _calculate_first(self) -> set[str]:
261262
if self._max_len is not None and self._index_repetition + self._min_len >= self._max_len:
@@ -271,7 +272,8 @@ def _calculate_first(self) -> set[str]:
271272
result = {pfx + sfx for pfx in result for sfx in self._current_chars}
272273

273274
for _ in range(self._index_repetition):
274-
result.update({pfx + sfx for pfx in result for sfx in self._current_chars})
275+
result.update(
276+
{pfx + sfx for pfx in result for sfx in self._current_chars})
275277

276278
return result
277279

@@ -327,14 +329,12 @@ def next(self) -> set[str]:
327329
return self.current
328330

329331
def _calculate(self) -> set[str]:
332+
assert self._index_repetition != 0
330333
if self._max_len is not None and self._index_repetition + self._min_len >= self._max_len:
331334
self._done_repetition = True
332335
if self._done_charset:
333336
self.done = True
334337

335-
if self._index_repetition + self._min_len == 0:
336-
return {''}
337-
338338
result = set(self._current_chars)
339339
for _ in range(1, self._min_len + self._index_repetition):
340340
result = {pfx + sfx for pfx in result for sfx in self._current_chars}

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name='regex_enumerator',
8-
version='0.9.1',
8+
version='0.9.2',
99
packages=find_packages(include=['regex_enumerator', 'regex_enumerator.*']),
1010
description='Enumerate all strings that match a given regex',
1111
author='Vincenzo Greco',

tests/test_alternative.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from regex_enumerator import RegexEnumerator
21
from .test_function import f_finite, f_infinite
32

43

tests/test_backreference.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from regex_enumerator import RegexEnumerator
21
from .test_function import f_finite, f_infinite
32

43

tests/test_char_classes.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from regex_enumerator import RegexEnumerator
21
from .test_function import f_finite, f_infinite
32

43

tests/test_escape_char.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from regex_enumerator import RegexEnumerator
21
from .test_function import f_finite
32

43

tests/test_grammar.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import pytest
2+
from regex_enumerator import RegexEnumerator
3+
from regex_enumerator.regex_parser import RegexError
4+
5+
6+
def test_grammar_group():
7+
with pytest.raises(RegexError, match='Invalid group'):
8+
RegexEnumerator(r'(?)')
9+
10+
with pytest.raises(RegexError, match='Invalid group'):
11+
RegexEnumerator(r'(?')
12+
13+
with pytest.raises(RegexError, match='Invalid named group'):
14+
RegexEnumerator(r'(?<>)')
15+
16+
with pytest.raises(RegexError, match='Invalid named group'):
17+
RegexEnumerator(r'(?<')
18+
19+
with pytest.raises(RegexError, match='Duplicate named group'):
20+
RegexEnumerator(r'(?<name>a)(?<name>b)')
21+
22+
with pytest.raises(RegexError, match='Invalid group'):
23+
RegexEnumerator(r'(?a)')
24+
25+
with pytest.raises(RegexError, match='Unmatched closing parenthesis'):
26+
RegexEnumerator(r'a)')
27+
28+
with pytest.raises(RegexError, match='Unmatched opening parenthesis'):
29+
RegexEnumerator(r'(a')
30+
31+
32+
def test_grammar_backreference():
33+
with pytest.raises(RegexError, match='Named back reference not found'):
34+
RegexEnumerator(r'\k<name>')
35+
36+
with pytest.raises(RegexError, match='Positional back reference not found'):
37+
RegexEnumerator(r'\1')
38+
39+
with pytest.raises(RegexError, match='Incomplete escape sequence'):
40+
RegexEnumerator('\\')
41+
42+
with pytest.raises(RegexError, match='Invalid named back reference'):
43+
RegexEnumerator(r'\k')
44+
45+
with pytest.raises(RegexError, match='Invalid named back reference'):
46+
RegexEnumerator(r'\k<')
47+
48+
with pytest.raises(RegexError, match='Invalid named back reference'):
49+
RegexEnumerator(r'\ka')
50+
51+
with pytest.raises(RegexError, match='Invalid named back reference'):
52+
RegexEnumerator(r'\k<>')
53+
54+
55+
def test_grammar_escape_character():
56+
with pytest.raises(RegexError, match='Incomplete escape sequence'):
57+
RegexEnumerator('[\\')
58+
59+
with pytest.raises(RegexError, match='Invalid ASCII escape character'):
60+
RegexEnumerator(r'\x')
61+
62+
with pytest.raises(RegexError, match='Invalid ASCII escape character'):
63+
RegexEnumerator(r'\xh')
64+
65+
with pytest.raises(RegexError, match='Invalid ASCII escape character 0'):
66+
RegexEnumerator(r'[\x0]')
67+
68+
with pytest.raises(RegexError, match='Invalid unicode escape character'):
69+
RegexEnumerator(r'\u0')
70+
71+
with pytest.raises(RegexError, match='Invalid unicode escape character'):
72+
RegexEnumerator(r'\un')
73+
74+
with pytest.raises(RegexError, match='Unicode property not supported'):
75+
RegexEnumerator(r'\p{L}')
76+
77+
78+
def test_grammar_charclass():
79+
with pytest.raises(RegexError, match='Unclosed character class'):
80+
RegexEnumerator(r'[')
81+
82+
with pytest.raises(RegexError, match='Unclosed character class'):
83+
RegexEnumerator(r'[a')
84+
85+
86+
def test_grammar_quantifiers():
87+
with pytest.raises(RegexError, match='Invalid quantifier'):
88+
RegexEnumerator(r'a{')
89+
90+
with pytest.raises(RegexError, match='Invalid quantifier'):
91+
RegexEnumerator(r'a{a}')
92+
93+
with pytest.raises(RegexError, match='Invalid quantifier'):
94+
RegexEnumerator(r'a{1')
95+
96+
with pytest.raises(RegexError, match='Invalid quantifier'):
97+
RegexEnumerator(r'a{1 d')
98+
99+
with pytest.raises(RegexError, match='Invalid quantifier'):
100+
RegexEnumerator(r'a{1, f')
101+
102+
with pytest.raises(RegexError, match='Max length cannot be less than min length in quantifier'):
103+
RegexEnumerator(r'a{2,1}')
104+
105+
with pytest.raises(RegexError, match='Invalid quantifier'):
106+
RegexEnumerator(r'a{1,2 d')

tests/test_groups.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from regex_enumerator import RegexEnumerator
21
from .test_function import f_finite, f_infinite
32

43

tests/test_literals.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from regex_enumerator import RegexEnumerator
21
from .test_function import f_finite, f_infinite
32

43

tests/test_mixed.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ def test_done():
3131

3232
f_finite(regex, possibilities)
3333

34+
def test_empty_additional_charset():
35+
regexEnumerator = RegexEnumerator(r'')
36+
assert regexEnumerator.next() == ''
37+
assert regexEnumerator.next() == None
38+
regexEnumerator = RegexEnumerator(r'', precompute=False)
39+
assert regexEnumerator.next() == ''
40+
assert regexEnumerator.next() == None
41+
3442

3543
def test_weak_password():
3644
regex = r'[Ll][Oo0][Vv][Ee3]([Yy][Oo0][Uu])?(2023|2024|123)?[!1.]{1,2}'

tests/test_not_capturing_groups.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from regex_enumerator import RegexEnumerator
2-
from .test_function import f_finite, f_infinite
1+
from .test_function import f_finite
32

43

54
def test_not_capturing_groups():

0 commit comments

Comments
 (0)