@@ -68,7 +68,7 @@ export class Lexer {
6868 token = token . next ;
6969 } else {
7070 // Read the next token and form a link in the token linked-list.
71- const nextToken = readNextToken ( this , token ) ;
71+ const nextToken = readNextToken ( this , token . end ) ;
7272 // @ts -expect-error next is only mutable during parsing.
7373 token . next = nextToken ;
7474 // @ts -expect-error prev is only mutable during parsing.
@@ -161,10 +161,10 @@ function createToken(
161161 * punctuators immediately or calls the appropriate helper function for more
162162 * complicated tokens.
163163 */
164- function readNextToken ( lexer : Lexer , prev : Token ) : Token {
164+ function readNextToken ( lexer : Lexer , start : number ) : Token {
165165 const body = lexer . source . body ;
166166 const bodyLength = body . length ;
167- let position = prev . end ;
167+ let position = start ;
168168
169169 while ( position < bodyLength ) {
170170 const code = body . charCodeAt ( position ) ;
@@ -185,22 +185,22 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
185185 // - "Space (U+0020)"
186186 //
187187 // Comma :: ,
188- case 0x0009 : // \t
189- case 0x0020 : // <space>
190- case 0x002c : // ,
191- case 0xfeff : // <BOM>
188+ case 0xfeff : // <BOM>
189+ case 0x0009 : // \t
190+ case 0x0020 : // <space>
191+ case 0x002c : // ,
192192 ++ position ;
193193 continue ;
194194 // LineTerminator ::
195195 // - "New Line (U+000A)"
196196 // - "Carriage Return (U+000D)" [lookahead != "New Line (U+000A)"]
197197 // - "Carriage Return (U+000D)" "New Line (U+000A)"
198- case 0x000a : // \n
198+ case 0x000a : // \n
199199 ++ position ;
200200 ++ lexer . line ;
201201 lexer . lineStart = position ;
202202 continue ;
203- case 0x000d : // \r
203+ case 0x000d : // \r
204204 if ( body . charCodeAt ( position + 1 ) === 0x000a ) {
205205 position += 2 ;
206206 } else {
@@ -210,7 +210,7 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
210210 lexer . lineStart = position ;
211211 continue ;
212212 // Comment
213- case 0x0023 : // #
213+ case 0x0023 : // #
214214 return readComment ( lexer , position ) ;
215215 // Token ::
216216 // - Punctuator
@@ -220,42 +220,42 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
220220 // - StringValue
221221 //
222222 // Punctuator :: one of ! $ & ( ) ... : = @ [ ] { | }
223- case 0x0021 : // !
223+ case 0x0021 : // !
224224 return createToken ( lexer , TokenKind . BANG , position , position + 1 ) ;
225- case 0x0024 : // $
225+ case 0x0024 : // $
226226 return createToken ( lexer , TokenKind . DOLLAR , position , position + 1 ) ;
227- case 0x0026 : // &
227+ case 0x0026 : // &
228228 return createToken ( lexer , TokenKind . AMP , position , position + 1 ) ;
229- case 0x0028 : // (
229+ case 0x0028 : // (
230230 return createToken ( lexer , TokenKind . PAREN_L , position , position + 1 ) ;
231- case 0x0029 : // )
231+ case 0x0029 : // )
232232 return createToken ( lexer , TokenKind . PAREN_R , position , position + 1 ) ;
233- case 0x002e : // .
233+ case 0x002e : // .
234234 if (
235235 body . charCodeAt ( position + 1 ) === 0x002e &&
236236 body . charCodeAt ( position + 2 ) === 0x002e
237237 ) {
238238 return createToken ( lexer , TokenKind . SPREAD , position , position + 3 ) ;
239239 }
240240 break ;
241- case 0x003a : // :
241+ case 0x003a : // :
242242 return createToken ( lexer , TokenKind . COLON , position , position + 1 ) ;
243- case 0x003d : // =
243+ case 0x003d : // =
244244 return createToken ( lexer , TokenKind . EQUALS , position , position + 1 ) ;
245- case 0x0040 : // @
245+ case 0x0040 : // @
246246 return createToken ( lexer , TokenKind . AT , position , position + 1 ) ;
247- case 0x005b : // [
247+ case 0x005b : // [
248248 return createToken ( lexer , TokenKind . BRACKET_L , position , position + 1 ) ;
249- case 0x005d : // ]
249+ case 0x005d : // ]
250250 return createToken ( lexer , TokenKind . BRACKET_R , position , position + 1 ) ;
251- case 0x007b : // {
251+ case 0x007b : // {
252252 return createToken ( lexer , TokenKind . BRACE_L , position , position + 1 ) ;
253- case 0x007c : // |
253+ case 0x007c : // |
254254 return createToken ( lexer , TokenKind . PIPE , position , position + 1 ) ;
255- case 0x007d : // }
255+ case 0x007d : // }
256256 return createToken ( lexer , TokenKind . BRACE_R , position , position + 1 ) ;
257257 // StringValue
258- case 0x0022 : // "
258+ case 0x0022 : // "
259259 if (
260260 body . charCodeAt ( position + 1 ) === 0x0022 &&
261261 body . charCodeAt ( position + 2 ) === 0x0022
@@ -265,9 +265,8 @@ function readNextToken(lexer: Lexer, prev: Token): Token {
265265 return readString ( lexer , position ) ;
266266 }
267267
268- // IntValue | FloatValue
269- // 0-9 | -
270- if ( ( code >= 0x0030 && code <= 0x0039 ) || code === 0x002d ) {
268+ // IntValue | FloatValue (Digit | -)
269+ if ( isDigit ( code ) || code === 0x002d ) {
271270 return readNumber ( lexer , position , code ) ;
272271 }
273272
@@ -305,7 +304,7 @@ function readComment(lexer: Lexer, start: number): Token {
305304 while ( position < bodyLength ) {
306305 const code = body . charCodeAt ( position ) ;
307306
308- // LineTerminator (\n or \r)
307+ // LineTerminator (\n | \r)
309308 if ( code === 0x000a || code === 0x000d ) {
310309 break ;
311310 }
@@ -331,9 +330,6 @@ function readComment(lexer: Lexer, start: number): Token {
331330 * Reads a number token from the source file, either a FloatValue or an IntValue
332331 * depending on whether a FractionalPart or ExponentPart is encountered.
333332 *
334- * Digit :: one of
335- * - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
336- *
337333 * IntValue :: IntegerPart [lookahead != {Digit, `.`, NameStart}]
338334 *
339335 * IntegerPart ::
@@ -371,8 +367,7 @@ function readNumber(lexer: Lexer, start: number, firstCode: number): Token {
371367 // Zero (0)
372368 if ( code === 0x0030 ) {
373369 code = body . charCodeAt ( ++ position ) ;
374- // Digit (0-9)
375- if ( code >= 0x0030 && code <= 0x0039 ) {
370+ if ( isDigit ( code ) ) {
376371 throw syntaxError (
377372 lexer . source ,
378373 position ,
@@ -434,25 +429,26 @@ function readNumber(lexer: Lexer, start: number, firstCode: number): Token {
434429 * Returns the new position in the source after reading one or more digits.
435430 */
436431function readDigits ( lexer : Lexer , start : number , firstCode : number ) : number {
432+ if ( ! isDigit ( firstCode ) ) {
433+ throw syntaxError (
434+ lexer . source ,
435+ start ,
436+ `Invalid number, expected digit but got: ${ printCodePointAt (
437+ lexer ,
438+ start ,
439+ ) } .`,
440+ ) ;
441+ }
442+
437443 const body = lexer . source . body ;
438444 let position = start ;
439445 let code = firstCode ;
440446
441- // 0 - 9
442- if ( code >= 0x0030 && code <= 0x0039 ) {
443- do {
444- code = body . charCodeAt ( ++ position ) ;
445- } while ( code >= 0x0030 && code <= 0x0039 ) ; // 0 - 9
446- return position ;
447- }
448- throw syntaxError (
449- lexer . source ,
450- position ,
451- `Invalid number, expected digit but got: ${ printCodePointAt (
452- lexer ,
453- position ,
454- ) } .`,
455- ) ;
447+ do {
448+ code = body . charCodeAt ( ++ position ) ;
449+ } while ( isDigit ( code ) ) ;
450+
451+ return position ;
456452}
457453
458454/**
@@ -500,7 +496,7 @@ function readString(lexer: Lexer, start: number): Token {
500496 continue ;
501497 }
502498
503- // LineTerminator (\n or \r)
499+ // LineTerminator (\n | \r)
504500 if ( code === 0x000a || code === 0x000d ) {
505501 break ;
506502 }
@@ -545,40 +541,39 @@ function readEscapedUnicode(lexer: Lexer, position: number): EscapeSequence {
545541}
546542
547543/**
548- * Reads four hexadecimal chars and returns the integer that 16bit hexadecimal
549- * string represents. For example, "000f" will return 15, and "dead" will
550- * return 57005.
544+ * Reads four hexadecimal characters and returns the positive integer that 16bit
545+ * hexadecimal string represents. For example, "000f" will return 15, and "dead"
546+ * will return 57005.
551547 *
552548 * Returns a negative number if any char was not a valid hexadecimal digit.
553- *
554- * This is implemented by noting that hexValue() returns -1 on error,
555- * which means the result of ORing the hexValue() will also be negative.
556549 */
557550function read16BitHexCode ( body : string , position : number ) : number {
551+ // readHexDigit() returns -1 on error. ORing a negative value with any other
552+ // value always produces a negative value.
558553 return (
559- ( hexValue ( body . charCodeAt ( position ) ) << 12 ) |
560- ( hexValue ( body . charCodeAt ( position + 1 ) ) << 8 ) |
561- ( hexValue ( body . charCodeAt ( position + 2 ) ) << 4 ) |
562- hexValue ( body . charCodeAt ( position + 3 ) )
554+ ( readHexDigit ( body . charCodeAt ( position ) ) << 12 ) |
555+ ( readHexDigit ( body . charCodeAt ( position + 1 ) ) << 8 ) |
556+ ( readHexDigit ( body . charCodeAt ( position + 2 ) ) << 4 ) |
557+ readHexDigit ( body . charCodeAt ( position + 3 ) )
563558 ) ;
564559}
565560
566561/**
567- * Converts a hex character to its integer value.
562+ * Reads a hexadecimal character and returns its positive integer value (0-15) .
568563 *
569564 * '0' becomes 0, '9' becomes 9
570565 * 'A' becomes 10, 'F' becomes 15
571566 * 'a' becomes 10, 'f' becomes 15
572567 *
573- * Any other input returns -1 .
568+ * Returns -1 if the provided character code was not a valid hexadecimal digit .
574569 */
575- function hexValue ( code : number ) : number {
576- return code >= 0x0030 && code <= 0x0039
577- ? code - 0x0030 // 0-9
578- : code >= 0x0041 && code <= 0x0046
579- ? code - 0x0037 // A-F
580- : code >= 0x0061 && code <= 0x0066
581- ? code - 0x0057 // a-f
570+ function readHexDigit ( code : number ) : number {
571+ return code >= 0x0030 && code <= 0x0039 // 0-9
572+ ? code - 0x0030
573+ : code >= 0x0041 && code <= 0x0046 // A-F
574+ ? code - 0x0037
575+ : code >= 0x0061 && code <= 0x0066 // a-f
576+ ? code - 0x0057
582577 : - 1 ;
583578}
584579
@@ -718,15 +713,6 @@ function readBlockString(lexer: Lexer, start: number): Token {
718713 * - Letter
719714 * - Digit
720715 * - `_`
721- *
722- * Letter :: one of
723- * - `A` `B` `C` `D` `E` `F` `G` `H` `I` `J` `K` `L` `M`
724- * - `N` `O` `P` `Q` `R` `S` `T` `U` `V` `W` `X` `Y` `Z`
725- * - `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m`
726- * - `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z`
727- *
728- * Digit :: one of
729- * - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
730716 */
731717function readName ( lexer : Lexer , start : number ) : Token {
732718 const body = lexer . source . body ;
@@ -736,17 +722,13 @@ function readName(lexer: Lexer, start: number): Token {
736722 while ( position < bodyLength ) {
737723 const code = body . charCodeAt ( position ) ;
738724 // NameContinue
739- if (
740- ( code >= 0x0061 && code <= 0x007a ) || // a-z
741- ( code >= 0x0041 && code <= 0x005a ) || // A-Z
742- ( code >= 0x0030 && code <= 0x0039 ) || // 0-9
743- code === 0x005f // _
744- ) {
725+ if ( isLetter ( code ) || isDigit ( code ) || code === 0x005f ) {
745726 ++ position ;
746727 } else {
747728 break ;
748729 }
749730 }
731+
750732 return createToken (
751733 lexer ,
752734 TokenKind . NAME ,
@@ -756,11 +738,28 @@ function readName(lexer: Lexer, start: number): Token {
756738 ) ;
757739}
758740
759- // a-z | A-Z | _
760741function isNameStart ( code : number ) : boolean {
742+ return isLetter ( code ) || code === 0x005f ;
743+ }
744+
745+ /**
746+ * Digit :: one of
747+ * - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
748+ */
749+ function isDigit ( code : number ) : boolean {
750+ return code >= 0x0030 && code <= 0x0039 ;
751+ }
752+
753+ /**
754+ * Letter :: one of
755+ * - `A` `B` `C` `D` `E` `F` `G` `H` `I` `J` `K` `L` `M`
756+ * - `N` `O` `P` `Q` `R` `S` `T` `U` `V` `W` `X` `Y` `Z`
757+ * - `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m`
758+ * - `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z`
759+ */
760+ function isLetter ( code : number ) : boolean {
761761 return (
762- ( code >= 0x0061 && code <= 0x007a ) ||
763- ( code >= 0x0041 && code <= 0x005a ) ||
764- code === 0x005f
762+ ( code >= 0x0061 && code <= 0x007a ) || // A-Z
763+ ( code >= 0x0041 && code <= 0x005a ) // a-z
765764 ) ;
766765}
0 commit comments