Skip to content

Commit 6f868b0

Browse files
committed
fix(parser): union operands can be optional
Parser benchmark remaining parse errors: 128 / 749
1 parent ad47eb5 commit 6f868b0

File tree

2 files changed

+15
-7
lines changed

2 files changed

+15
-7
lines changed

src/regex-parser.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,6 @@ const regExpFlags = [
1717

1818
// type RegExpFlag = typeof regExpFlags[number]
1919

20-
// TODO:
21-
// - parse \uXXXX notation
22-
// - allow empty strings, e.g. regex like "(|)"
23-
// const emptyString = P.string('').map(() => RE.epsilon)
24-
2520
const startMarker = P.optional(P.string('^')).map(marker => {
2621
if (marker === undefined) {
2722
return RE.star(RE.anySingleChar)
@@ -225,6 +220,14 @@ function lookAheadOp(): P.Expr.BinaryOperator<RE.ExtRegex | undefined, RE.ExtReg
225220
)
226221
)
227222
}
223+
224+
/**
225+
* Parses expression like `(a|b)`. The left- and right operand are optional,
226+
* e.g. `(a|)` and `(|)` are also valid expressions.
227+
*/
228+
function unionOp(): P.Expr.BinaryOperator<RE.ExtRegex | undefined, RE.ExtRegex> {
229+
return P.string('|').map(_ => (left, right) => RE.union(left ?? RE.epsilon, right ?? RE.epsilon))
230+
}
228231

229232
function regex(): P.Parser<RE.ExtRegex> {
230233
return P.lazy(() => P.Expr.makeExprParser<RE.ExtRegex>(
@@ -236,7 +239,7 @@ function regex(): P.Parser<RE.ExtRegex> {
236239
{ type: 'postfix', op: P.string('?').map(_ => RE.optional) },
237240
{ type: 'infixRight', op: P.string('').map(_ => RE.concat) },
238241
{ type: 'infixRightOptional', op: lookAheadOp() },
239-
{ type: 'infixRight', op: P.string('|').map(_ => RE.union) },
242+
{ type: 'infixRightOptional', op: unionOp() },
240243
]
241244
))
242245
}

test/regex-parser.spec.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,15 @@ describe('parseRegexString', () => {
1717
[/^a+$/, RE.plus(RE.singleChar('a'))],
1818
[/^a?$/, RE.optional(RE.singleChar('a'))],
1919
[/^abc$/, RE.string('abc')],
20+
[/^ab*$/, RE.concat(RE.singleChar('a'), RE.star(RE.singleChar('b')))],
21+
// union:
2022
[/^a|b$/, RE.union(RE.singleChar('a'), RE.singleChar('b'))],
2123
[/^aa|bb$/, RE.union(RE.string('aa'), RE.string('bb'))],
2224
[/^(a|b)*$/, RE.star(RE.union(RE.singleChar('a'), RE.singleChar('b')))],
23-
[/^ab*$/, RE.concat(RE.singleChar('a'), RE.star(RE.singleChar('b')))],
25+
[/^(|a)$/, RE.optional(RE.singleChar('a'))],
26+
[/^(a||)$/, RE.optional(RE.singleChar('a'))],
27+
[/^(|a|)$/, RE.optional(RE.singleChar('a'))],
28+
[/^(|)$/, RE.epsilon],
2429
// bounded quantifier:
2530
[/^a{3}$/, RE.repeat(RE.singleChar('a'), 3)],
2631
[/^a{3,}$/, RE.repeat(RE.singleChar('a'), { min: 3 })],

0 commit comments

Comments
 (0)