Skip to content

Commit fe60fdb

Browse files
committed
rewrite rule for sort union members
Sorting union members automatically eliminates duplicates in unions. That reduces expression size and in particluar avoids call-stack overflows in the benchmark for instances with otherwise very deep expression trees. However, overall expressions are even larger at times and the sorting step also has a performance cost.
1 parent e4074db commit fe60fdb

File tree

2 files changed

+9
-36
lines changed

2 files changed

+9
-36
lines changed

benchmark/toStdRegex_output_length.js

Lines changed: 4 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,35 +7,8 @@ let avgMult = 0
77
let maxMult = -Infinity
88

99
const hardInstances = new Set([
10-
290, // call-stack overflow
11-
556, // ???
12-
658, // ???
13-
689, // call-stack overflow
14-
724, // ???
15-
777, // ???
16-
783, // out of memory
17-
787, // ???
18-
791, // ???
19-
831, // ???
20-
840, // stack overflow
21-
860, // ???
22-
871, // ???
2310
883, // ???
24-
884, // ???
25-
894, // ???
26-
900, // stack overflow
27-
908, // ???
28-
940, // ???
29-
948, // ???
30-
949, // ???
31-
954, // ???
32-
958, // ???
33-
961, // stack overflow
3411
964, // ???
35-
981, // ???
36-
991, // ???
37-
994, // ???
38-
996, // ???
3912
])
4013

4114
function run(inputRegExp, index) {
@@ -61,6 +34,8 @@ function run(inputRegExp, index) {
6134
avg. multiplier : ${avgMult}
6235
worst multiplier : ${maxMult}
6336
`)
37+
38+
console.log('#' + index, outputRegExp)
6439
} catch (err) {
6540
console.log('too many captures')
6641
}
@@ -73,10 +48,10 @@ const regexDatasetSorted = regexDataset.sort(
7348
(a,b) => a.source.length - b.source.length
7449
)
7550

76-
run(regexDatasetSorted[689], 689)
51+
run(regexDatasetSorted[689], 0)
7752

7853
// regexDatasetSorted
79-
// .filter(inst => !hardInstances.includes(inst))
54+
// .filter((inst, i) => !hardInstances.has(i))
8055
// .forEach(run)
8156

8257
console.log('time:', performance.now() - timeStart)

src/regex.ts

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -177,20 +177,18 @@ export function union(left: ExtRegex, right: ExtRegex): ExtRegex {
177177
// r* + ε = r*
178178
return left
179179

180+
// These rules sort union members and eliminate duplicates, e.g.
181+
// /b|a|b/ --> /a|b|b/ --> /a|b/
180182
if (right.type == 'union') {
181183
if (equal(left, right.left))
182184
// r + (r + s) = r + s
183185
return union(left, right.right)
184186
if (equal(left, right.right))
185187
// r + (s + r) = r + s
186188
return union(left, right.left)
187-
188-
// const [leftHead, leftTail] = extractFront(left)
189-
// const [rightHead, rightTail] = extractFront(right.left)
190-
// if (equal(leftHead, rightHead))
191-
// // (r · s) + ((r · t) + u) = (r · (s + t)) + u
192-
// return union(concat(left, union(leftTail, rightTail)), right.right)
193-
// // return concat(left, optional(union(leftTail, right.right)))
189+
if (left.hash > right.left.hash)
190+
// s + (r + t) = r + (s + t)
191+
return union(right.left, union(left, right.right))
194192
}
195193

196194
const [leftHead, leftTail] = extractFront(left)

0 commit comments

Comments
 (0)