Skip to content

Commit 81c6f00

Browse files
committed
Bugfix group string generation
Bump to version 0.8.2
1 parent 1a6d789 commit 81c6f00

File tree

8 files changed

+156
-38
lines changed

8 files changed

+156
-38
lines changed

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ print(re.next()) # a2b
2929
## What is supported
3030

3131
- [x] Character classes
32-
- [x] Quantifiers for character classes
33-
- [x] Quantifiers for groups
32+
- [x] Quantifiers (greedy)
3433
- [x] Groups (named and unnamed)
3534
- [x] Alternation
3635
- [x] Escaped characters

regex_enumerator/regex_tree.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,20 @@ def add_reference(self, reference: BackReference):
209209
return
210210
self.references.append(reference)
211211

212+
def _calculate_using_new_charset(self) -> set[str]:
213+
assert not self.done
214+
if self._done_repetition and self._done_charset:
215+
self.done = True
216+
217+
if self._index_repetition + self._min_len == 0:
218+
return {''}
219+
220+
result = set(self._current_chars)
221+
for _ in range(1, self._min_len + self._index_repetition):
222+
result.update({pfx + sfx for pfx in result for sfx in self._current_chars})
223+
224+
return result
225+
212226
def next(self) -> set[str]:
213227
assert not self.done
214228

@@ -219,12 +233,13 @@ def next(self) -> set[str]:
219233

220234
if self._gen_charset:
221235
_: set[str] = self._next_charset()
222-
# optimize it by using only the new charset
236+
# Optimization: use the new charset to calculate the next set of strings
237+
res: set[str] = self._calculate_using_new_charset()
223238
else:
224239
if not self._done_repetition:
225240
self._index_repetition += 1
241+
res: set[str] = self._calculate()
226242

227-
res: set[str] = self._calculate()
228243
self._gen_charset = not self._gen_charset
229244
new_res = res - self.current
230245
if len(new_res) == 0:

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name='regex_enumerator',
8-
version='0.8.1',
8+
version='0.8.2',
99
packages=find_packages(include=['regex_enumerator', 'regex_enumerator.*']),
1010
description='Enumerate all strings that match a given regex',
1111
author='Vincenzo Greco',

tests/test_alternative.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,24 @@ def test_alternative_with_character_class_and_literal():
4242
possibilities = ['', 'd']
4343

4444
f_finite(regexEnumerator, possibilities)
45+
46+
47+
def test_alternation_with_character_classes_and_literals():
48+
regexEnumerator = RegexEnumerator(r'(a|[0-2])')
49+
possibilities = ['a', '0', '1', '2']
50+
51+
f_finite(regexEnumerator, possibilities)
52+
53+
54+
def test_nested_alternation():
55+
regexEnumerator = RegexEnumerator(r'((a|b)|c)')
56+
possibilities = ['a', 'b', 'c']
57+
58+
f_finite(regexEnumerator, possibilities)
59+
60+
61+
def test_alternation_with_grouping():
62+
regexEnumerator = RegexEnumerator(r'(a(b|c)d|x)')
63+
possibilities = ['abd', 'acd', 'x']
64+
65+
f_finite(regexEnumerator, possibilities)

tests/test_backreference.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,41 @@ def test_zero_width_backreference():
4343

4444
f_finite(regexEnumerator, possibilities)
4545

46+
4647
def test_10_backreference():
4748
regexEnumerator = RegexEnumerator(r'(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\10')
4849
possibilities = ['abcdefghijj']
4950

5051
f_finite(regexEnumerator, possibilities)
52+
53+
54+
def test_multiple_backreferences():
55+
regexEnumerator = RegexEnumerator(r'(a)(b)\2\1')
56+
possibilities = ['abba']
57+
58+
f_finite(regexEnumerator, possibilities)
59+
60+
61+
def test_backreference_with_mismatch():
62+
regexEnumerator = RegexEnumerator(r'(a)(b)\1')
63+
possibilities = ['aba']
64+
65+
f_finite(regexEnumerator, possibilities)
66+
67+
68+
def test_named_group_with_backreference():
69+
regexEnumerator = RegexEnumerator(r'(?<letter>[ab])\k<letter>')
70+
possibilities = [
71+
'aa', 'bb'
72+
]
73+
74+
f_finite(regexEnumerator, possibilities)
75+
76+
77+
def test_named_group_infinite_repetition_with_backreference():
78+
regexEnumerator = RegexEnumerator(r'(?<letter>[ab])+\k<letter>')
79+
possibilities = [
80+
'aa', 'bb', 'abab', 'baba', 'aaaa', 'bbbb'
81+
]
82+
83+
f_infinite(regexEnumerator, possibilities)

tests/test_char_classes.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,12 @@ def test_unicode_character_class():
114114
possibilities = ['à', 'á', 'â', 'ã', 'ä', 'å']
115115

116116
f_finite(regexEnumerator, possibilities)
117+
118+
119+
def test_additional_charset():
120+
regexEnumerator = RegexEnumerator(
121+
r'[^\w\d\s]', additional_charset=['γ', 'β', 'α'])
122+
possibilities = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':',
123+
';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '`', '{', '|', '}', '~', 'α', 'β', 'γ']
124+
125+
f_finite(regexEnumerator, possibilities)

tests/test_groups.py

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,64 +2,105 @@
22
from .test_function import f_finite, f_infinite
33

44

5-
def test_single_group_literal_char():
5+
def test_single_capturing_group_with_literal():
66
regexEnumerator = RegexEnumerator(r'(a)')
77
possibilities = ['a']
88

99
f_finite(regexEnumerator, possibilities)
1010

1111

12-
def test_single_character_class():
12+
def test_single_capturing_group_with_class_single_char():
1313
regexEnumerator = RegexEnumerator(r'([a])')
1414
possibilities = ['a']
1515

1616
f_finite(regexEnumerator, possibilities)
1717

1818

19-
def test_multiple_character_class():
19+
def test_single_capturing_group_with_class_multi_char():
2020
regexEnumerator = RegexEnumerator(r'([a-c])')
2121
possibilities = ['a', 'b', 'c']
2222

2323
f_finite(regexEnumerator, possibilities)
2424

2525

26-
def test_group_with_zero_or_more_quantifier():
26+
def test_capturing_group_with_star_quantifier():
2727
regexEnumerator = RegexEnumerator(r'(a)*')
2828
possibilities = ['', 'a', 'aa', 'aaa', 'aaaa', 'aaaaa']
2929

3030
f_infinite(regexEnumerator, possibilities)
3131

3232

33-
def test_named_group():
33+
def test_named_capturing_group_with_optional_subgroup():
3434
regexEnumerator = RegexEnumerator(r'(?<name>a[bcd](e)?)')
3535
possibilities = ['ab', 'abe', 'ac', 'ace', 'ad', 'ade']
3636

3737
f_finite(regexEnumerator, possibilities)
3838

3939

40-
def test_group_with_range_quantifier_after_literal():
40+
def test_literal_followed_by_group_with_star_quantifier():
4141
regexEnumerator = RegexEnumerator(r'a(b)*')
4242
possibilities = ['a' + 'b' * i for i in range(6)]
4343

4444
f_infinite(regexEnumerator, possibilities)
4545

4646

47-
def test_2_groups_with_range_quantifier():
47+
def test_two_capturing_groups_with_star_quantifiers():
4848
regexEnumerator = RegexEnumerator(r'(a)*(b)*')
4949
possibilities = ['a' * i + 'b' * j for i in range(6) for j in range(6)]
5050

5151
f_infinite(regexEnumerator, possibilities)
5252

5353

54-
def test_nested_groups():
54+
def test_nested_capturing_groups():
5555
regexEnumerator = RegexEnumerator(r'(a(b(c)))')
5656
possibilities = ['abc']
5757

5858
f_finite(regexEnumerator, possibilities)
5959

6060

61-
def test_group_of_groups():
61+
def test_capturing_groups_in_sequence():
6262
regexEnumerator = RegexEnumerator(r'((a)(b))')
6363
possibilities = ['ab']
6464

6565
f_finite(regexEnumerator, possibilities)
66+
67+
68+
def test_non_capturing_group():
69+
regexEnumerator = RegexEnumerator(r'(?:a|b)*')
70+
possibilities = ['', 'a', 'b', 'aa', 'ab', 'ba', 'bb']
71+
72+
f_infinite(regexEnumerator, possibilities)
73+
74+
75+
def test_non_capturing_group_with_quantifier():
76+
regexEnumerator = RegexEnumerator(r'(?:ab)+')
77+
possibilities = ['ab', 'abab', 'ababab']
78+
79+
f_infinite(regexEnumerator, possibilities)
80+
81+
82+
def test_named_capturing_group_with_quantifier():
83+
regexEnumerator = RegexEnumerator(r'(?<chars>[ab]{1,2})')
84+
possibilities = ['a', 'b', 'aa', 'ab', 'ba', 'bb']
85+
86+
f_finite(regexEnumerator, possibilities)
87+
88+
89+
def test_nested_non_capturing_groups():
90+
regexEnumerator = RegexEnumerator(r'(?:a(?:b(?:c)))?')
91+
possibilities = ['', 'abc']
92+
93+
f_finite(regexEnumerator, possibilities)
94+
95+
96+
def test_group_for_quantifier_scope():
97+
regexEnumerator = RegexEnumerator(r'(ab)+')
98+
possibilities = ['ab', 'abab', 'ababab']
99+
100+
f_infinite(regexEnumerator, possibilities)
101+
102+
def test_group_with_char_class_infinite_repetition():
103+
regexEnumerator = RegexEnumerator(r'([ab])+')
104+
possibilities = ['a', 'b', 'aa', 'ab', 'ba', 'bb']
105+
106+
f_infinite(regexEnumerator, possibilities)

tests/test_single_char.py renamed to tests/test_literals.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,80 +2,80 @@
22
from .test_function import f_finite, f_infinite
33

44

5-
def test_empty_regex():
5+
def test_empty_pattern_yields_empty_string():
66
regexEnumerator = RegexEnumerator(r'')
77
possibilities = ['']
8-
98
f_finite(regexEnumerator, possibilities)
109

1110

12-
def test_single_literal_char():
11+
def test_single_literal_character():
1312
regexEnumerator = RegexEnumerator(r'a')
1413
possibilities = ['a']
15-
1614
f_finite(regexEnumerator, possibilities)
1715

1816

19-
def test_zero_or_more_quantifier():
17+
def test_zero_or_more_quantifier_on_single_char():
2018
regexEnumerator = RegexEnumerator(r'a*')
2119
possibilities = ['', 'a', 'aa', 'aaa', 'aaaa', 'aaaaa']
22-
2320
f_infinite(regexEnumerator, possibilities)
2421

2522

26-
def test_one_or_more_quantifier():
23+
def test_one_or_more_quantifier_on_single_char():
2724
regexEnumerator = RegexEnumerator(r'a+')
2825
possibilities = ['a', 'aa', 'aaa', 'aaaa', 'aaaaa']
29-
3026
f_infinite(regexEnumerator, possibilities)
3127

3228

33-
def test_zero_or_one_quantifier():
29+
def test_zero_or_one_quantifier_on_single_char():
3430
regexEnumerator = RegexEnumerator(r'a?')
3531
possibilities = ['', 'a']
36-
3732
f_finite(regexEnumerator, possibilities)
3833

3934

40-
def test_exact_repetition_quantifier():
35+
def test_exact_repetition_quantifier_on_single_char():
4136
regexEnumerator = RegexEnumerator(r'a{2}')
4237
possibilities = ['aa']
43-
4438
f_finite(regexEnumerator, possibilities)
4539

4640

47-
def test_min_repetition_quantifier():
41+
def test_minimum_repetition_quantifier_on_single_char():
4842
regexEnumerator = RegexEnumerator(r'a{2,}')
4943
possibilities = ['aa', 'aaa', 'aaaa', 'aaaaa']
50-
5144
f_infinite(regexEnumerator, possibilities)
5245

5346

54-
def test_min_max_repetition_quantifier():
47+
def test_min_max_repetition_quantifier_on_single_char():
48+
# `a{2,4}` yields 'aa', 'aaa', 'aaaa'.
5549
regexEnumerator = RegexEnumerator(r'a{2,4}')
5650
possibilities = ['aa', 'aaa', 'aaaa']
57-
5851
f_finite(regexEnumerator, possibilities)
5952

6053

61-
def test_zero_repetition_quantifier():
54+
def test_zero_times_repetition_quantifier_on_single_char():
6255
regexEnumerator = RegexEnumerator(r'a{0}')
6356
possibilities = ['']
64-
6557
f_finite(regexEnumerator, possibilities)
6658

6759

68-
def test_literal_special_characters():
60+
def test_escaped_literal_special_characters():
6961
regexEnumerator = RegexEnumerator(r'\*\+\?')
7062
possibilities = ['*+?']
63+
f_finite(regexEnumerator, possibilities)
7164

65+
66+
def test_single_character_class():
67+
regexEnumerator = RegexEnumerator(r'[abc]')
68+
possibilities = ['a', 'b', 'c']
7269
f_finite(regexEnumerator, possibilities)
7370

7471

75-
def test_additional_charset():
76-
regexEnumerator = RegexEnumerator(
77-
r'[^\w\d\s]', additional_charset=['γ', 'β', 'α'])
78-
possibilities = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':',
79-
';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '`', '{', '|', '}', '~', 'α', 'β', 'γ']
72+
def test_single_escaped_character():
73+
regexEnumerator = RegexEnumerator(r'\n')
74+
possibilities = ['\n']
75+
f_finite(regexEnumerator, possibilities)
76+
8077

78+
def test_literal_dot_character():
79+
regexEnumerator = RegexEnumerator(r'\.')
80+
possibilities = ['.']
8181
f_finite(regexEnumerator, possibilities)

0 commit comments

Comments
 (0)