Skip to content

Commit bf0e4f4

Browse files
committed
feat(parser): support \uYYYY and \xYY
1 parent e753b37 commit bf0e4f4

File tree

3 files changed

+44
-27
lines changed

3 files changed

+44
-27
lines changed

src/code-point-range.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,14 @@ export function strictlyDisjoint(rangeA: CodePointRange, rangeB: CodePointRange)
4242
return isStrictlyBefore(rangeA, rangeB) || isStrictlyAfter(rangeA, rangeB)
4343
}
4444

45-
export function singleton(char: string): CodePointRange {
46-
const codePoint = char.codePointAt(0)
47-
assert(codePoint !== undefined && char.length <= 1, `Invalid character: ${char}`)
48-
return { start: codePoint, end: codePoint }
45+
export function singleton(char: string | number): CodePointRange {
46+
if (typeof char === 'number') {
47+
return { start: char, end: char }
48+
} else {
49+
const codePoint = char.codePointAt(0)
50+
assert(codePoint !== undefined && char.length <= 1, `Invalid character: ${char}`)
51+
return { start: codePoint, end: codePoint }
52+
}
4953
}
5054

5155
export function size(range: CodePointRange): number {

src/parser.ts

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { identity, checkedAllCases } from './utils'
1+
import { identity, checkedAllCases, assert } from './utils'
22

33
export type ParseResult<T> = { value: T, restInput: string }
44

@@ -144,6 +144,17 @@ export function some<T>(parser: Parser<T>): Parser<[T, ...T[]]> {
144144
)
145145
}
146146

147+
export function count<T>(n: number, parser: Parser<T>): Parser<T[]> {
148+
assert(Number.isInteger(n) && n >= 0)
149+
150+
if (n === 0)
151+
return pure([])
152+
else // n > 0
153+
return parser.andThen(first => count(n-1, parser)
154+
.map(rest => [first, ...rest])
155+
)
156+
}
157+
147158
export function optional<T>(parser: Parser<T>): Parser<T | undefined> {
148159
return new Parser(input => {
149160
try {
@@ -179,16 +190,14 @@ export function satisfy(predicate: (char: string) => boolean): Parser<string> {
179190
})
180191
}
181192

182-
export const digitChar: Parser<string> = satisfy(char => char.match(/^[0-9]$/) !== null)
193+
export const digitChar: Parser<string> = satisfy(char => /^[0-9]$/.test(char))
183194

184-
export const anyChar: Parser<string> = satisfy(_ => true)
195+
export const hexChar: Parser<string> = satisfy(char => /^[0-9a-fA-F]$/.test(char))
185196

186-
function digitsToDecimal(digits: number[]): number {
187-
return digits.reduce((acc, digit) => 10*acc + digit, 0)
188-
}
197+
export const anyChar: Parser<string> = satisfy(_ => true)
189198

190199
export const decimal: Parser<number> = some(digitChar).map(
191-
digits => digitsToDecimal(digits.map(digit => parseInt(digit)))
200+
digits => parseInt(digits.join(''), 10)
192201
)
193202

194203
/**

src/regex-parser.ts

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,27 +46,31 @@ const unescapedCharInsideBrackets = P.satisfy(Range.mustBeEscapedOrInBrackets)
4646

4747
export class UnsupportedSyntaxError extends Error {}
4848

49-
const escapeSequence = P.string('\\').andThen(_ => P.anyChar).map(escapedChar => {
49+
const escapeSequence = P.string('\\').andThen(_ => P.anyChar).andThen(escapedChar => {
5050
switch (escapedChar) {
51-
case 'w': return CharSet.wordChars
52-
case 'W': return CharSet.nonWordChars
53-
case 's': return CharSet.whiteSpaceChars
54-
case 'S': return CharSet.nonWhiteSpaceChars
55-
case 'd': return CharSet.digitChars
56-
case 'D': return CharSet.nonDigitChars
57-
case 't': return CharSet.singleton('\t') // horizontal tab
58-
case 'r': return CharSet.singleton('\r') // carriage return
59-
case 'n': return CharSet.singleton('\n') // line feed
60-
case 'v': return CharSet.singleton('\v') // vertical tab
61-
case 'f': return CharSet.singleton('\f') // form feed
62-
case '0': return CharSet.singleton('\0') // NUL character
51+
case 'w': return P.pure(CharSet.wordChars)
52+
case 'W': return P.pure(CharSet.nonWordChars)
53+
case 's': return P.pure(CharSet.whiteSpaceChars)
54+
case 'S': return P.pure(CharSet.nonWhiteSpaceChars)
55+
case 'd': return P.pure(CharSet.digitChars)
56+
case 'D': return P.pure(CharSet.nonDigitChars)
57+
case 't': return P.pure(CharSet.singleton('\t')) // horizontal tab
58+
case 'r': return P.pure(CharSet.singleton('\r')) // carriage return
59+
case 'n': return P.pure(CharSet.singleton('\n')) // line feed
60+
case 'v': return P.pure(CharSet.singleton('\v')) // vertical tab
61+
case 'f': return P.pure(CharSet.singleton('\f')) // form feed
62+
case '0': return P.pure(CharSet.singleton('\0')) // NUL character
6363
case 'b': throw new UnsupportedSyntaxError('\b word-boundary assertion not supported')
6464
case 'c': throw new UnsupportedSyntaxError('\cX control characters not supported')
65-
case 'x': throw new UnsupportedSyntaxError('\\x not supported')
66-
case 'u': throw new UnsupportedSyntaxError('\\u not supported')
65+
case 'x': return P.count(2, P.hexChar).map(chars =>
66+
CharSet.fromRange(Range.singleton(parseInt(chars.join(''), 16)))
67+
)
68+
case 'u': return P.count(4, P.hexChar).map(chars =>
69+
CharSet.fromRange(Range.singleton(parseInt(chars.join(''), 16)))
70+
)
6771
case 'p': throw new UnsupportedSyntaxError('\\p not supported')
6872
case 'P': throw new UnsupportedSyntaxError('\\P not supported')
69-
default: return CharSet.singleton(escapedChar) // match character literally
73+
default: return P.pure(CharSet.singleton(escapedChar)) // match character literally
7074
}
7175
})
7276

0 commit comments

Comments
 (0)