Skip to content

Commit 9b1f56e

Browse files
committed
perf: more rewrite rules
1 parent 5ef999c commit 9b1f56e

File tree

2 files changed

+92
-45
lines changed

2 files changed

+92
-45
lines changed

src/regex.ts

Lines changed: 86 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -75,23 +75,23 @@ export function concat(left: ExtRegex, right: ExtRegex): ExtRegex {
7575
if (equal(empty, left))
7676
// ∅ · r ≈ ∅
7777
return empty
78-
else if (equal(empty, right))
78+
if (equal(empty, right))
7979
// r · ∅ ≈ ∅
8080
return empty
81-
else if (left.type === "concat")
81+
if (left.type === "concat")
8282
// (r · s) · t ≈ r · (s · t)
8383
return concat(left.left, concat(left.right, right))
84-
else if (left.type === "epsilon")
84+
if (left.type === "epsilon")
8585
// ε · r ≈ r
8686
return right
87-
else if (right.type === "epsilon")
87+
if (right.type === "epsilon")
8888
// r · ε ≈ r
8989
return left
90-
else if (left.type === 'union' && equal(left.right, epsilon)) {
90+
if (left.type === 'union' && equal(left.right, epsilon)) {
9191
if (equal(left.left, right))
9292
// (r + ε) · r ≈ r · (r + ε)
9393
return concat(right, left)
94-
else if (right.type === 'concat' && equal(left.left, right.left))
94+
if (right.type === 'concat' && equal(left.left, right.left))
9595
// (r + ε) · (r · s) ≈ r · ((r + ε) · s)
9696
return concat(right.left, concat(left, right.right))
9797
}
@@ -102,78 +102,116 @@ export function concat(left: ExtRegex, right: ExtRegex): ExtRegex {
102102
if (equal(left.inner, right))
103103
// r* · r ≈ r · r*
104104
return concat(right, left)
105-
else if (right.type === 'concat' && equal(left.inner, right.left))
105+
if (right.type === 'concat' && equal(left.inner, right.left))
106106
// r* · (r · s) ≈ r · (r* · s)
107107
return concat(left.inner, concat(left, right.right))
108-
else if (right.type === 'star' && equal(left.inner, right.inner))
108+
if (right.type === 'star' && equal(left.inner, right.inner))
109109
// r* · r* ≈ r*
110110
return left
111-
else if (right.type === 'concat' && right.left.type === 'star' && equal(left, right.left))
111+
if (right.type === 'concat' && right.left.type === 'star' && equal(left, right.left))
112112
// r* · (r* · s) ≈ r* · s
113113
return concat(left, right.right)
114114
}
115115

116116
return withHash({ type: 'concat', left, right })
117117
}
118118

119+
function extractFront(regex: StdRegex): [StdRegex, StdRegex]
120+
function extractFront(regex: ExtRegex): [ExtRegex, ExtRegex]
121+
function extractFront(regex: ExtRegex): [ExtRegex, ExtRegex] {
122+
switch (regex.type) {
123+
case 'epsilon': return [regex, epsilon]
124+
case 'literal': return [regex, epsilon]
125+
case 'concat': return [regex.left, regex.right]
126+
case 'union': return [regex, epsilon]
127+
case 'star': return [regex.inner, regex]
128+
case 'intersection': return [regex, epsilon]
129+
case 'complement': return [regex, epsilon]
130+
}
131+
checkedAllCases(regex)
132+
}
133+
134+
function extractBack(regex: StdRegex): [StdRegex, StdRegex]
135+
function extractBack(regex: ExtRegex): [ExtRegex, ExtRegex]
136+
function extractBack(regex: ExtRegex): [ExtRegex, ExtRegex] {
137+
switch (regex.type) {
138+
case 'epsilon': return [epsilon, epsilon]
139+
case 'literal': return [epsilon, regex]
140+
case 'concat': return [regex.left, regex.right]
141+
case 'union': return [epsilon, regex]
142+
case 'star': return [regex, regex.inner]
143+
case 'intersection': return [epsilon, regex]
144+
case 'complement': return [epsilon, regex]
145+
}
146+
checkedAllCases(regex)
147+
}
148+
119149
export function union(left: StdRegex, right: StdRegex): StdRegex
120150
export function union(left: ExtRegex, right: ExtRegex): ExtRegex
121151
export function union(left: ExtRegex, right: ExtRegex): ExtRegex {
122152
if (left.type === 'union')
123153
// (r + s) + t ≈ r + (s + t)
124154
return union(left.left, union(left.right, right))
125-
else if (equal(left, right))
155+
if (equal(left, right))
126156
// r + r ≈ r
127157
return left
128-
else if (equal(left, empty))
158+
if (equal(left, empty))
129159
// ∅ + r ≈ r
130160
return right
131-
else if (left.type === 'epsilon')
161+
if (left.type === 'epsilon')
132162
// ε + r ≈ r + ε
133163
return union(right, left)
134-
else if (equal(empty, right))
164+
if (equal(empty, right))
135165
// r + ∅ ≈ r
136166
return left
137-
else if (equal(left, complement(empty)))
167+
if (equal(left, complement(empty)))
138168
// ¬∅ + r ≈ ¬∅
139169
return complement(empty)
140-
else if (equal(complement(empty), right))
170+
if (equal(complement(empty), right))
141171
// r + ¬∅ ≈ ¬∅
142172
return complement(empty)
143-
else if (left.type === 'literal' && right.type === 'literal')
173+
if (left.type === 'literal' && right.type === 'literal')
144174
// R + S ≈ R ∪ S
145175
return literal(CharSet.union(left.charset, right.charset))
146-
else if (right.type === 'union' && equal(left, right.left))
147-
// r + (r + s) = r + s
148-
return union(left, right.right)
149-
else if (right.type === 'union' && equal(left, right.right))
150-
// r + (s + r) = r + s
151-
return union(left, right.left)
152-
153-
else if (left.type === 'concat') {
154-
if (right.type === 'concat') {
155-
if (equal(left.left, right.left))
156-
// (r · s) + (r · t) = r · (s + t)
157-
return concat(left.left, union(left.right, right.right))
158-
else if (equal(left.right, right.right))
159-
// (s · r) + (t · r) = (s + t) · r
160-
return concat(union(left.left, right.left), left.right)
161-
} else if (equal(left.left, right)) {
162-
// (r · s) + r = r · (s + ε)
163-
return concat(left.left, optional(left.right))
164-
} else if (equal(right, left.right)) {
165-
// (s · r) + r = (s + ε) · r
166-
return concat(optional(left.left), left.right)
167-
}
168-
} else if (right.type === 'concat') {
169-
if (equal(right.left, left))
170-
// r + (r · s) = r · (ε + s)
171-
return concat(right.left, optional(right.right))
172-
else if (right.right.hash === left.hash)
173-
// r + (s · r) = (s + ε) · r
174-
return concat(optional(right.left), right.right)
176+
if (left.type === 'star' && right.type === 'epsilon')
177+
// r* + ε = r*
178+
return left
179+
180+
if (right.type == 'union') {
181+
if (equal(left, right.left))
182+
// r + (r + s) = r + s
183+
return union(left, right.right)
184+
if (equal(left, right.right))
185+
// r + (s + r) = r + s
186+
return union(left, right.left)
187+
188+
// const [leftHead, leftTail] = extractFront(left)
189+
// const [rightHead, rightTail] = extractFront(right.left)
190+
// if (equal(leftHead, rightHead))
191+
// // (r · s) + ((r · t) + u) = (r · (s + t)) + u
192+
// return union(concat(left, union(leftTail, rightTail)), right.right)
193+
// // return concat(left, optional(union(leftTail, right.right)))
175194
}
176195

196+
const [leftHead, leftTail] = extractFront(left)
197+
const [rightHead, rightTail] = extractFront(right)
198+
199+
if (equal(leftHead, rightHead))
200+
// (r · s) + (r · t) = r · (s + t)
201+
// (r · s) + r = r · (s + ε)
202+
// r + (r · s) = r · (ε + s)
203+
// r + r* = r · (ε + r*)
204+
return concat(leftHead, union(leftTail, rightTail))
205+
206+
const [leftInit, leftLast] = extractBack(left)
207+
const [rightInit, rightLast] = extractBack(right)
208+
209+
if (equal(leftLast, rightLast))
210+
// (s · r) + (t · r) = (s + t) · r
211+
// (s · r) + r = (s + ε) · r
212+
// r + (s · r) = (s + ε) · r
213+
return concat(union(leftInit, rightInit), leftLast)
214+
177215
return withHash({ type: 'union', left, right })
178216
}
179217

@@ -189,6 +227,9 @@ export function star(inner: ExtRegex): ExtRegex {
189227
else if (equal(empty, inner))
190228
// ∅∗ ≈ ε
191229
return epsilon
230+
else if (inner.type === 'concat' && inner.left.type === 'star' && inner.right.type === 'star')
231+
// (r∗ · s∗)∗ = (r + s)∗
232+
return star(union(inner.left.inner, inner.right.inner))
192233
else
193234
return withHash({ type: "star", inner })
194235
}

test/regex.spec.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,15 +179,21 @@ describe('rewrite rules', () => {
179179
[/^(b|a)|a$/, /^([ab])$/],
180180
[/^(a|b)|a$/, /^([ab])$/],
181181
[/^(a?)?$/, /^(a?)$/],
182+
[/^(a*)?$/, /^(a*)$/],
183+
[/^(a|a*)$/, /^(aa*)$/],
182184
// union-of-concat rules:
183185
[/^ab|ac$/, /^(a[bc])$/],
184186
[/^ba|ca$/, /^([bc]a)$/],
185187
[/^ab|a$/, /^(ab?)$/],
186188
[/^ba|a$/, /^(b?a)$/],
187189
[/^a|ab$/, /^(ab?)$/],
188190
[/^a|ba$/, /^(b?a)$/],
191+
// TODO:
192+
// [/^(a|a{2}|a{3}|a{4}|a{5})$/, /^(a{1,5})$/],
193+
// [/^(a|a{2}|a{3}|a{4}|a{5}|b)$/, /^(a{1,5}|b)$/],
189194
// star rules:
190195
[/^(a*)*$/, /^(a*)$/],
196+
[/^(a*b*)*$/, /^([ab]*)$/],
191197
])('rewrites %s to %s', (source, target) => {
192198
expect(RE.toRegExp(parseRegExp(source))).toEqual(target)
193199
})

0 commit comments

Comments
 (0)