Skip to content

Commit af4cb0a

Browse files
committed
Optimize backreference
Bump version to 0.9.4
1 parent d45160b commit af4cb0a

File tree

3 files changed

+26
-17
lines changed

3 files changed

+26
-17
lines changed

regex_enumerator/regex_tree.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -99,19 +99,14 @@ def __init__(self, reference: RegexTree, min_len: int, max_len: int | None, prec
9999
self.current: dict[str, list[str]
100100
] = self._calculate() if not self.done else {}
101101

102-
def update(self):
102+
def update_reference(self, new_strings: set[str]) -> None:
103103
if self._max_len is not None and self._min_len + self._index >= self._max_len and self.reference.done:
104104
self.done = True
105105

106-
for string in self.reference.current:
107-
if string in self.current:
108-
self.current[string].append(
109-
string * (self._min_len + self._index))
110-
else:
111-
result = []
112-
for i in range(self._min_len, self._min_len + self._index + 1):
113-
result.append(string * i)
114-
self.current[string] = result
106+
for string in new_strings:
107+
assert string not in self.current
108+
self.current[string] = [
109+
string * i for i in range(self._min_len, self._min_len + self._index + 1)]
115110

116111
def _calculate(self) -> dict[str, set[str]]:
117112
current_ref = self.reference.current
@@ -121,10 +116,8 @@ def _calculate(self) -> dict[str, set[str]]:
121116
result: dict[str, list[str]] = {}
122117

123118
for string in current_ref:
124-
partial = []
125-
for i in range(self._min_len, self._min_len + self._index + 1):
126-
partial.append(string * i)
127-
result[string] = partial
119+
result[string] = [
120+
string * i for i in range(self._min_len, self._min_len + self._index + 1)]
128121

129122
return result
130123

@@ -324,8 +317,8 @@ def next(self) -> set[str]:
324317
return result
325318

326319
for reference in self.references:
327-
reference.update()
328-
return self.current
320+
reference.update_reference(result)
321+
return result
329322

330323
def _calculate(self) -> set[str]:
331324
assert self._index_repetition != 0

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name='regex_enumerator',
8-
version='0.9.3',
8+
version='0.9.4',
99
packages=find_packages(include=['regex_enumerator', 'regex_enumerator.*']),
1010
description='Enumerate all strings that match a given regex',
1111
author='Vincenzo Greco',

tests/test_backreference.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,19 @@ def test_named_group_infinite_repetition_with_backreference():
8080
]
8181

8282
f_infinite(regex, possibilities)
83+
84+
85+
def test_backreference_with_group_quantifier_and_mismatch():
86+
regex = r'(a){1,3}\1{0,2}'
87+
possibilities = ['a', 'aa', 'aaa', 'aaaa', 'aaaaaa', 'aaaaaaaaa']
88+
89+
f_finite(regex, possibilities)
90+
91+
92+
def test_backreference_with_group_quantifier_and_mismatch_complex():
93+
regex = r'(a){1,3}[c-d]\1{0,2}'
94+
possibilities = ['ac', 'ad', 'aca', 'ada', 'acaa', 'adaa',
95+
'aac', 'aad', 'aacaa', 'aadaa', 'aacaaaa', 'aadaaaa',
96+
'aaac', 'aaad', 'aaacaaa', 'aaadaaa', 'aaacaaaaaa', 'aaadaaaaaa']
97+
98+
f_finite(regex, possibilities)

0 commit comments

Comments
 (0)