Skip to content

Commit 3c27c80

Browse files
committed
fix(parser): only allow alphaNum ranges
Char ranges like /[a-z]/ are only allowed for alphanumeric characters. I.e. [a-z], [0-9], [A-Z] or any sub-ranges of those. Also, turns out if you have [a-] then the dash is interpreted literally. Remaining parse error in parser benchmark: 144/749
1 parent c29f00b commit 3c27c80

File tree

2 files changed

+35
-8
lines changed

2 files changed

+35
-8
lines changed

src/regex-parser.ts

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ const wildcard = P.string('.').map(
4444

4545
const unescapedChar = P.satisfy(Range.neverMustBeEscaped)
4646

47+
const alphaNumChar = P.satisfy(char => /^[a-zA-Z0-9]$/.test(char))
48+
4749
const unescapedCharInsideBrackets = P.satisfy(Range.mustBeEscapedOrInBrackets)
4850
.map(CharSet.singleton)
4951

@@ -73,19 +75,38 @@ const escapeSequence = P.string('\\').andThen(_ => P.anyChar).map(escapedChar =>
7375
}
7476
})
7577

76-
const codePointRange: P.Parser<CharSet.CharSet> =
77-
unescapedChar.andThen(start =>
78-
P.optional(P.string('-').andThen(_ => unescapedChar))
79-
.map(end => CharSet.charRange(start, end ?? start))
80-
)
78+
// E.g. "a-z", "0-9", "A-Z"
79+
const alphaNumRange: P.Parser<CharSet.CharSet> = alphaNumChar.andThen(start =>
80+
P.optional(P.string('-')).andThen(dash => {
81+
if (dash === undefined) {
82+
// e.g. [a]
83+
return P.pure(CharSet.singleton(start))
84+
} else {
85+
return P.optional(alphaNumChar).map(end => {
86+
if (end === undefined) {
87+
// e.g. [a-] so dash is interpreted literally
88+
return CharSet.fromArray([start, dash])
89+
} else {
90+
// e.g. [a-z]
91+
return CharSet.charRange(start, end)
92+
}
93+
})
94+
}
95+
})
96+
)
8197

8298
const charSet = P.choice([
8399
P.between(
84100
// QUESTION: can brackets be nested?
85101
P.string('['),
86102
P.string(']'),
87103
P.optional(P.string('^')).andThen(negated =>
88-
P.many(P.choice([escapeSequence, codePointRange, unescapedCharInsideBrackets])).map(
104+
P.many(P.choice([
105+
escapeSequence, // e.g. "\$", "\]"
106+
alphaNumRange, // e.g. "a-z", "0-9" (will also match just "a", "3")
107+
unescapedCharInsideBrackets, // e.g. "$", "."
108+
unescapedChar.map(CharSet.singleton), // e.g. "#", "%"
109+
])).map(
89110
sets => {
90111
if (negated === undefined)
91112
return sets.reduce(CharSet.union, CharSet.empty)

test/regex-parser.spec.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ describe('parseRegexString', () => {
3131
[/^\.$/, RE.literal(CharSet.singleton('.'))],
3232
// char class from range:
3333
[/^[a-z]$/, RE.literal(CharSet.charRange('a', 'z'))],
34+
[/^[a-]$/, RE.literal(CharSet.fromArray(['a', '-']))],
3435
// negative char class:
3536
[/^[^abc]$/, RE.literal(CharSet.complement(CharSet.fromArray(['a', 'b', 'c'])))],
3637
// non-capturing groups
@@ -52,14 +53,19 @@ describe('parseRegexString', () => {
5253
// other special chars need escape even inside brackets:
5354
[/^[\\\]\/]$/, RE.literal(CharSet.fromArray([...'\\]/']))],
5455
])('can parse %s', (regexp, expected) => {
55-
expect(parseRegExp(regexp)).toEqual(expected)
56+
expect(parseRegExp(regexp).hash).toBe(expected.hash)
5657
})
5758

5859
it.each([
59-
['a+*'],
60+
// unclosed parenthesis:
6061
['(a'],
62+
// combined quantifiers:
63+
['a+*'],
6164
['a?{2}'],
6265
['a+{2}'],
66+
// TODO: invalid ranges:
67+
// ['[a-#]'],
68+
// ['[%-#]'],
6369
])('rejects invalid regex /%s/', (regexStr) => {
6470
expect(() => parseRegexString(regexStr)).toThrowError(ParseError)
6571
})

0 commit comments

Comments
 (0)