Skip to content

Commit 30b01fe

Browse files
committed
Bump version to 0.9.0 Update README with precomputation details
Refactor tests for precomputation
1 parent 8a52e6b commit 30b01fe

11 files changed

+230
-199
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,16 @@ assert '¢' in result
7575
assert '£' in result
7676
```
7777

78+
## Precomputation
79+
80+
For optimization purposes, the library precomputes the strings of the elements in the regex pattern when those does not repeat indefinitely. To disable this feature, in order to reduce the time of the first call to `next()`, set `precompute=False` when creating the `RegexEnumerator`.
81+
82+
```python
83+
from regex_enumerator import RegexEnumerator
84+
85+
re = RegexEnumerator(r'a[0-9]b', precompute=False)
86+
```
87+
7888
## How it works
7989

8090
This library works by parsing the regex pattern into a tree structure. Once parsed, it performs a breadth-first search (BFS) on the tree to generate all matching strings. This ensures it does not get stuck on unbounded quantifiers for character classes or groups.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name='regex_enumerator',
8-
version='0.8.5',
8+
version='0.9.0',
99
packages=find_packages(include=['regex_enumerator', 'regex_enumerator.*']),
1010
description='Enumerate all strings that match a given regex',
1111
author='Vincenzo Greco',

tests/test_alternative.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,69 +3,70 @@
33

44

55
def test_two_alternatives():
6-
regexEnumerator = RegexEnumerator(r'a|b')
6+
regex = r'a|b'
77
possibilities = ['a', 'b']
88

9-
f_finite(regexEnumerator, possibilities)
9+
f_finite(regex, possibilities)
1010

1111

1212
def test_alternatives_with_quantifier_on_second_option():
13-
regexEnumerator = RegexEnumerator(r'a|b*')
13+
regex = r'a|b*'
1414
possibilities = ['a', '', 'b', 'bb', 'bbb', 'bbbb', 'bbbbb']
1515

16-
f_infinite(regexEnumerator, possibilities)
16+
f_infinite(regex, possibilities)
1717

1818

1919
def test_alternatives_with_quantifier_plus_on_first_option():
20-
regexEnumerator = RegexEnumerator(r'a+|b')
20+
regex = r'a+|b'
2121
possibilities = ['b', 'a', 'aa', 'aaa', 'aaaa', 'aaaaa']
2222

23-
f_infinite(regexEnumerator, possibilities)
23+
f_infinite(regex, possibilities)
2424

2525

2626
def test_multiple_alternatives():
27-
regexEnumerator = RegexEnumerator(r'a|b|c')
27+
regex = r'a|b|c'
2828
possibilities = ['a', 'b', 'c']
2929

30-
f_finite(regexEnumerator, possibilities)
30+
f_finite(regex, possibilities)
3131

3232

3333
def test_alternative_with_literal_and_character_class():
34-
regexEnumerator = RegexEnumerator(r'a|[b-d]')
34+
regex = r'a|[b-d]'
3535
possibilities = ['a', 'b', 'c', 'd']
3636

37-
f_finite(regexEnumerator, possibilities)
37+
f_finite(regex, possibilities)
3838

3939

4040
def test_alternative_with_character_class_and_literal():
41-
regexEnumerator = RegexEnumerator(r'[a-c]{ 0}|d')
41+
regex = r'[a-c]{ 0}|d'
4242
possibilities = ['', 'd']
4343

44-
f_finite(regexEnumerator, possibilities)
44+
f_finite(regex, possibilities)
4545

4646

4747
def test_alternation_with_character_classes_and_literals():
48-
regexEnumerator = RegexEnumerator(r'(a|[0-2])')
48+
regex = r'(a|[0-2])'
4949
possibilities = ['a', '0', '1', '2']
5050

51-
f_finite(regexEnumerator, possibilities)
51+
f_finite(regex, possibilities)
5252

5353

5454
def test_nested_alternation():
55-
regexEnumerator = RegexEnumerator(r'((a|b)|c)')
55+
regex = r'((a|b)|c)'
5656
possibilities = ['a', 'b', 'c']
57-
58-
f_finite(regexEnumerator, possibilities)
57+
58+
f_finite(regex, possibilities)
5959

6060

6161
def test_alternation_with_grouping():
62-
regexEnumerator = RegexEnumerator(r'(a(b|c)d|x)')
62+
regex = r'(a(b|c)d|x)'
6363
possibilities = ['abd', 'acd', 'x']
64-
65-
f_finite(regexEnumerator, possibilities)
64+
65+
f_finite(regex, possibilities)
66+
6667

6768
def test_same_alternative_twice():
68-
regexEnumerator = RegexEnumerator(r'a{1,2}|a{1,2}')
69+
regex = r'a{1,2}|a{1,2}'
6970
possibilities = ['a', 'aa']
7071

71-
f_finite(regexEnumerator, possibilities)
72+
f_finite(regex, possibilities)

tests/test_backreference.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,81 +3,81 @@
33

44

55
def test_backreference():
6-
regexEnumerator = RegexEnumerator(r'(a)\1')
6+
regex = r'(a)\1'
77
possibilities = ['aa']
88

9-
f_finite(regexEnumerator, possibilities)
9+
f_finite(regex, possibilities)
1010

1111

1212
def test_backreference_with_group_quantifier():
13-
regexEnumerator = RegexEnumerator(r'(a)+\1')
13+
regex = r'(a)+\1'
1414
possibilities = ['aa' * i for i in range(1, 6)]
1515

16-
f_infinite(regexEnumerator, possibilities)
16+
f_infinite(regex, possibilities)
1717

1818

1919
def test_backreference_with_quantifier():
20-
regexEnumerator = RegexEnumerator(r'(a)\1+')
20+
regex = r'(a)\1+'
2121
possibilities = ['a' * i + 'a' for i in range(1, 6)]
2222

23-
f_infinite(regexEnumerator, possibilities)
23+
f_infinite(regex, possibilities)
2424

2525

2626
def test_backreference_with_named_group():
27-
regexEnumerator = RegexEnumerator(r'(?<name>[a-b])\k<name>')
27+
regex = r'(?<name>[a-b])\k<name>'
2828
possibilities = ['aa', 'bb']
2929

30-
f_finite(regexEnumerator, possibilities)
30+
f_finite(regex, possibilities)
3131

3232

3333
def test_backreference_with_named_group_and_quantifier():
34-
regexEnumerator = RegexEnumerator(r'(?<name>[a-b])\k<name>{1, 2}')
34+
regex = r'(?<name>[a-b])\k<name>{1, 2}'
3535
possibilities = ['aa', 'bb', 'aaa', 'bbb']
3636

37-
f_finite(regexEnumerator, possibilities)
37+
f_finite(regex, possibilities)
3838

3939

4040
def test_zero_width_backreference():
41-
regexEnumerator = RegexEnumerator(r'(a)?\1{0}')
41+
regex = r'(a)?\1{0}'
4242
possibilities = ['a', '']
4343

44-
f_finite(regexEnumerator, possibilities)
44+
f_finite(regex, possibilities)
4545

4646

4747
def test_10_backreference():
48-
regexEnumerator = RegexEnumerator(r'(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\10')
48+
regex = r'(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\10'
4949
possibilities = ['abcdefghijj']
5050

51-
f_finite(regexEnumerator, possibilities)
51+
f_finite(regex, possibilities)
5252

5353

5454
def test_multiple_backreferences():
55-
regexEnumerator = RegexEnumerator(r'(a)(b)\2\1')
55+
regex = r'(a)(b)\2\1'
5656
possibilities = ['abba']
5757

58-
f_finite(regexEnumerator, possibilities)
58+
f_finite(regex, possibilities)
5959

6060

6161
def test_backreference_with_mismatch():
62-
regexEnumerator = RegexEnumerator(r'(a)(b)\1')
62+
regex = r'(a)(b)\1'
6363
possibilities = ['aba']
6464

65-
f_finite(regexEnumerator, possibilities)
65+
f_finite(regex, possibilities)
6666

6767

6868
def test_named_group_with_backreference():
69-
regexEnumerator = RegexEnumerator(r'(?<letter>[ab])\k<letter>')
69+
regex = r'(?<letter>[ab])\k<letter>'
7070
possibilities = [
7171
'aa', 'bb'
7272
]
7373

74-
f_finite(regexEnumerator, possibilities)
74+
f_finite(regex, possibilities)
7575

7676

7777
def test_named_group_infinite_repetition_with_backreference():
78-
regexEnumerator = RegexEnumerator(r'(?<letter>[ab])+\k<letter>')
78+
regex = r'(?<letter>[ab])+\k<letter>'
7979
possibilities = [
8080
'aa', 'bb', 'abab', 'baba', 'aaaa', 'bbbb'
8181
]
8282

83-
f_infinite(regexEnumerator, possibilities)
83+
f_infinite(regex, possibilities)

tests/test_char_classes.py

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3,129 +3,131 @@
33

44

55
def test_single_character_class():
6-
regexEnumerator = RegexEnumerator(r'[a]')
6+
regex = r'[a]'
77
possibilities = ['a']
88

9-
f_finite(regexEnumerator, possibilities)
9+
f_finite(regex, possibilities)
1010

1111

1212
def test_character_class_with_two_literals():
13-
regexEnumerator = RegexEnumerator(r'[ab]')
13+
regex = r'[ab]'
1414
possibilities = ['a', 'b']
1515

16-
f_finite(regexEnumerator, possibilities)
16+
f_finite(regex, possibilities)
1717

1818

1919
def test_character_class_with_zero_or_more_quantifier():
20-
regexEnumerator = RegexEnumerator(r'[a]*')
20+
regex = r'[a]*'
2121
possibilities = ['', 'a', 'aa', 'aaa', 'aaaa', 'aaaaa']
2222

23-
f_infinite(regexEnumerator, possibilities)
23+
f_infinite(regex, possibilities)
2424

2525

2626
def test_range_character_class():
27-
regexEnumerator = RegexEnumerator(r'[a-c]')
27+
regex = r'[a-c]'
2828
possibilities = ['a', 'b', 'c']
2929

30-
f_finite(regexEnumerator, possibilities)
30+
f_finite(regex, possibilities)
3131

3232

3333
def test_range_character_class_with_repetition():
34-
regexEnumerator = RegexEnumerator(r'[a-c]{1,2}')
34+
regex = r'[a-c]{1,2}'
3535
possibilities = ['a', 'b', 'c', 'aa', 'ab',
3636
'ac', 'ba', 'bb', 'bc', 'ca', 'cb', 'cc']
3737

38-
f_finite(regexEnumerator, possibilities)
38+
f_finite(regex, possibilities)
3939

4040

4141
def test_range_character_class_with_zero_repetition():
42-
regexEnumerator = RegexEnumerator(r'[a-c]{0}')
42+
regex = r'[a-c]{0}'
4343
possibilities = ['']
4444

45-
f_finite(regexEnumerator, possibilities)
45+
f_finite(regex, possibilities)
4646

4747

4848
def test_range_character_class_with_one_or_more_quantifier():
49-
regexEnumerator = RegexEnumerator(r'[a-b]+')
49+
regex = r'[a-b]+'
5050
possibilities = ['a', 'b', 'aa', 'ab', 'ba', 'bb', 'aaa',
5151
'aab', 'aba', 'abb', 'baa', 'bab', 'bba', 'bbb']
5252

53-
f_infinite(regexEnumerator, possibilities)
53+
f_infinite(regex, possibilities)
5454

5555

5656
def test_two_ranges_with_optional_quantifier():
57-
regexEnumerator = RegexEnumerator(r'[a-cf-g]?')
57+
regex = r'[a-cf-g]?'
5858
possibilities = ['', 'a', 'b', 'c', 'f', 'g']
5959

60-
f_finite(regexEnumerator, possibilities)
60+
f_finite(regex, possibilities)
6161

6262

6363
def test_literal_in_character_class():
64-
regexEnumerator = RegexEnumerator(r'[.]')
64+
regex = r'[.]'
6565
possibilities = ['.']
6666

67-
f_finite(regexEnumerator, possibilities)
67+
f_finite(regex, possibilities)
6868

6969

7070
def test_negated_character_class():
71-
regexEnumerator = RegexEnumerator(r'[^a]')
71+
regex = r'[^a]'
7272
possibilities = [chr(i) for i in range(32, 127) if chr(i) != 'a']
7373

74-
f_finite(regexEnumerator, possibilities)
74+
f_finite(regex, possibilities)
7575

7676

7777
def test_character_class_with_escaped_special_char_at_start():
78-
regexEnumerator = RegexEnumerator(r'[\]-a]')
78+
regex = r'[\]-a]'
7979
possibilities = [chr(i) for i in range(93, 98)]
8080

81-
f_finite(regexEnumerator, possibilities)
81+
f_finite(regex, possibilities)
8282

8383

8484
def test_character_class_with_escaped_special_char_at_end():
85-
regexEnumerator = RegexEnumerator(r'[Z-\]]')
85+
regex = r'[Z-\]]'
8686
possibilities = [chr(i) for i in range(90, 94)]
8787

88-
f_finite(regexEnumerator, possibilities)
88+
f_finite(regex, possibilities)
8989

9090

9191
def test_character_class_with_escape_sequence():
92-
regexEnumerator = RegexEnumerator(r'[\d]')
92+
regex = r'[\d]'
9393
possibilities = [str(i) for i in range(10)]
9494

95-
f_finite(regexEnumerator, possibilities)
95+
f_finite(regex, possibilities)
9696

9797

9898
def test_incomplete_range_character_class():
99-
regexEnumerator = RegexEnumerator(r'[a-]')
99+
regex = r'[a-]'
100100
possibilities = ['a', '-']
101101

102-
f_finite(regexEnumerator, possibilities)
102+
f_finite(regex, possibilities)
103103

104104

105105
def test_2_ranges():
106-
regexEnumerator = RegexEnumerator(r'[1a-crf-g3]')
106+
regex = r'[1a-crf-g3]'
107107
possibilities = ['1', 'a', 'b', 'c', 'f', 'g', 'r', '3']
108108

109-
f_finite(regexEnumerator, possibilities)
109+
f_finite(regex, possibilities)
110110

111111

112112
def test_unicode_character_class():
113-
regexEnumerator = RegexEnumerator(r'[à-å]')
113+
regex = r'[à-å]'
114114
possibilities = ['à', 'á', 'â', 'ã', 'ä', 'å']
115115

116-
f_finite(regexEnumerator, possibilities)
116+
f_finite(regex, possibilities)
117117

118118

119119
def test_additional_charset():
120-
regexEnumerator = RegexEnumerator(
121-
r'[^\w\d\s]', additional_charset=['γ', 'β', 'α'])
120+
regex = r'[^\w\d\s]'
121+
additional_charset = ['γ', 'β', 'α']
122122
possibilities = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':',
123123
';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '`', '{', '|', '}', '~', 'α', 'β', 'γ']
124124

125-
f_finite(regexEnumerator, possibilities)
125+
f_finite(regex, possibilities, additional_charset)
126+
126127

127128
def test_charclass_with_quantifier_from_0():
128-
regexEnumerator = RegexEnumerator(r'[b-d]{0,2}')
129-
possibilities = ['', 'b', 'c', 'd', 'bb', 'bc', 'bd', 'cb', 'cc', 'cd', 'db', 'dc', 'dd']
129+
regex = r'[b-d]{0,2}'
130+
possibilities = ['', 'b', 'c', 'd', 'bb', 'bc',
131+
'bd', 'cb', 'cc', 'cd', 'db', 'dc', 'dd']
130132

131-
f_finite(regexEnumerator, set(possibilities))
133+
f_finite(regex, set(possibilities))

0 commit comments

Comments
 (0)