From f79d727ca896b0a460999e1f2d35b7bbe0ec5add Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 26 Aug 2022 19:08:00 +0300 Subject: [PATCH 1/5] init --- src/tokenizer.ts | 49 +++++++++++++++++++++--------------------------- src/util/text.ts | 9 +++++++-- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index da75224ff4..1dfc02c4b4 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -31,6 +31,8 @@ import { isIdentifierPart, isDecimal, isOctal, + isHex, + isHexPart, isHighSurrogate, isLowSurrogate } from "./util"; @@ -1313,30 +1315,24 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); + var value = i64_zero; var i64_4 = i64_new(4); var nextValue = value; var overflowOccurred = false; while (pos < end) { let c = text.charCodeAt(pos); - if (c >= CharCode._0 && c <= CharCode._9) { - // value = (value << 4) + c - CharCode._0; + if (isDecimal(c)) { + // (value << 4) + c - CharCode._0 nextValue = i64_add( i64_shl(value, i64_4), i64_new(c - CharCode._0) ); - } else if (c >= CharCode.A && c <= CharCode.F) { - // value = (value << 4) + 10 + c - CharCode.A; + } else if (isHexPart(c)) { + // (value << 4) + (c | 32) + (10 - CharCode.a) nextValue = i64_add( i64_shl(value, i64_4), - i64_new(10 + c - CharCode.A) - ); - } else if (c >= CharCode.a && c <= CharCode.f) { - // value = (value << 4) + 10 + c - CharCode.a; - nextValue = i64_add( - i64_shl(value, i64_4), - i64_new(10 + c - CharCode.a) + i64_new((c | 32) + (10 - CharCode.a)) ); } else if (c == CharCode._) { if (sepEnd == pos) { @@ -1386,14 +1382,14 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); + var value = i64_zero; var i64_10 = i64_new(10); var nextValue = value; var overflowOccurred = false; while (pos < end) { let c = text.charCodeAt(pos); - if (c >= CharCode._0 && c <= CharCode._9) { + if (isDecimal(c)) { // value = value * 10 + c - CharCode._0; nextValue = i64_add( i64_mul(value, i64_10), @@ -1451,7 +1447,7 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); + var value = i64_zero; var i64_3 = i64_new(3); var nextValue = value; var overflowOccurred = false; @@ -1511,21 +1507,20 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; var start = pos; var sepEnd = start; - var value = i64_new(0); - var i64_1 = i64_new(1); + var value = i64_zero; var nextValue = value; var overflowOccurred = false; while (pos < end) { let c = text.charCodeAt(pos); if (c == CharCode._0) { - // value = (value << 1); - nextValue = i64_shl(value, i64_1); + // value << 1 | 0 + nextValue = i64_shl(value, i64_one); } else if (c == CharCode._1) { - // value = (value << 1) + 1; - nextValue = i64_add( - i64_shl(value, i64_1), - i64_1 + // value << 1 | 1 + nextValue = i64_or( + i64_shl(value, i64_one), + i64_one ); } else if (c == CharCode._) { if (sepEnd == pos) { @@ -1665,12 +1660,10 @@ export class Tokenizer extends DiagnosticEmitter { var end = this.end; while (pos < end) { let c = text.charCodeAt(pos++); - if (c >= CharCode._0 && c <= CharCode._9) { + if (isDecimal(c)) { value = (value << 4) + c - CharCode._0; - } else if (c >= CharCode.A && c <= CharCode.F) { - value = (value << 4) + c + (10 - CharCode.A); - } else if (c >= CharCode.a && c <= CharCode.f) { - value = (value << 4) + c + (10 - CharCode.a); + } else if (isHexPart(c)) { + value = (value << 4) + (c | 32) + (10 - CharCode.a); } else if (~startIfTaggedTemplate) { this.pos = --pos; return text.substring(startIfTaggedTemplate, pos); diff --git a/src/util/text.ts b/src/util/text.ts index 2c25418f03..65d66a3619 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -234,10 +234,15 @@ export function isOctal(c: i32): bool { return c >= CharCode._0 && c <= CharCode._7; } +/** Tests if the specified character code is a valid hexadecimal symbol [a-f]. */ +export function isHexPart(c: i32): bool { + let c0 = c | 32; // unify uppercases and lowercases a|A - f|F + return c0 >= CharCode.a && c0 <= CharCode.f; +} + /** Tests if the specified character code is a valid hexadecimal digit. */ export function isHex(c: i32): bool { - let c0 = c | 32; // unify uppercases and lowercases a|A - f|F - return isDecimal(c) || (c0 >= CharCode.a && c0 <= CharCode.f); + return isDecimal(c) || isHexPart(c); } /** Tests if the specified character code is trivially alphanumeric. */ From a5640f18d3f55866d63d44907a03ddd308697287 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 26 Aug 2022 19:16:52 +0300 Subject: [PATCH 2/5] fix --- src/tokenizer.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 1dfc02c4b4..687f9629dd 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -31,7 +31,6 @@ import { isIdentifierPart, isDecimal, isOctal, - isHex, isHexPart, isHighSurrogate, isLowSurrogate From a12fe7cf68c71a6a712d587d7c1b74262bf8e9e4 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 26 Aug 2022 19:18:22 +0300 Subject: [PATCH 3/5] same for octal --- src/tokenizer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 687f9629dd..ba36b7084e 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1453,8 +1453,8 @@ export class Tokenizer extends DiagnosticEmitter { while (pos < end) { let c = text.charCodeAt(pos); - if (c >= CharCode._0 && c <= CharCode._7) { - // value = (value << 3) + c - CharCode._0; + if (isOctal(c)) { + // (value << 3) + c - CharCode._0 nextValue = i64_add( i64_shl(value, i64_3), i64_new(c - CharCode._0) From 0841adbd7e4adaee7ee6f1fee9ed290ba7131125 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 26 Aug 2022 20:51:12 +0300 Subject: [PATCH 4/5] renmae isHexPart to isHex --- src/tokenizer.ts | 6 +++--- src/util/text.ts | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index ba36b7084e..9de5f44064 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -31,7 +31,7 @@ import { isIdentifierPart, isDecimal, isOctal, - isHexPart, + isHex, isHighSurrogate, isLowSurrogate } from "./util"; @@ -1327,7 +1327,7 @@ export class Tokenizer extends DiagnosticEmitter { i64_shl(value, i64_4), i64_new(c - CharCode._0) ); - } else if (isHexPart(c)) { + } else if (isHex(c)) { // (value << 4) + (c | 32) + (10 - CharCode.a) nextValue = i64_add( i64_shl(value, i64_4), @@ -1661,7 +1661,7 @@ export class Tokenizer extends DiagnosticEmitter { let c = text.charCodeAt(pos++); if (isDecimal(c)) { value = (value << 4) + c - CharCode._0; - } else if (isHexPart(c)) { + } else if (isHex(c)) { value = (value << 4) + (c | 32) + (10 - CharCode.a); } else if (~startIfTaggedTemplate) { this.pos = --pos; diff --git a/src/util/text.ts b/src/util/text.ts index 65d66a3619..b92b1f8bc7 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -235,14 +235,14 @@ export function isOctal(c: i32): bool { } /** Tests if the specified character code is a valid hexadecimal symbol [a-f]. */ -export function isHexPart(c: i32): bool { +export function isHex(c: i32): bool { let c0 = c | 32; // unify uppercases and lowercases a|A - f|F return c0 >= CharCode.a && c0 <= CharCode.f; } /** Tests if the specified character code is a valid hexadecimal digit. */ -export function isHex(c: i32): bool { - return isDecimal(c) || isHexPart(c); +export function isHexOrDecimal(c: i32): bool { + return isDecimal(c) || isHex(c); } /** Tests if the specified character code is trivially alphanumeric. */ From e332f0dd3dcee9b8bfad539d798c638cde197547 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Fri, 26 Aug 2022 20:59:14 +0300 Subject: [PATCH 5/5] rename isHex to isHexBase --- src/tokenizer.ts | 6 +++--- src/util/text.ts | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 9de5f44064..b931a98438 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -31,7 +31,7 @@ import { isIdentifierPart, isDecimal, isOctal, - isHex, + isHexBase, isHighSurrogate, isLowSurrogate } from "./util"; @@ -1327,7 +1327,7 @@ export class Tokenizer extends DiagnosticEmitter { i64_shl(value, i64_4), i64_new(c - CharCode._0) ); - } else if (isHex(c)) { + } else if (isHexBase(c)) { // (value << 4) + (c | 32) + (10 - CharCode.a) nextValue = i64_add( i64_shl(value, i64_4), @@ -1661,7 +1661,7 @@ export class Tokenizer extends DiagnosticEmitter { let c = text.charCodeAt(pos++); if (isDecimal(c)) { value = (value << 4) + c - CharCode._0; - } else if (isHex(c)) { + } else if (isHexBase(c)) { value = (value << 4) + (c | 32) + (10 - CharCode.a); } else if (~startIfTaggedTemplate) { this.pos = --pos; diff --git a/src/util/text.ts b/src/util/text.ts index b92b1f8bc7..60fe6a5410 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -235,14 +235,14 @@ export function isOctal(c: i32): bool { } /** Tests if the specified character code is a valid hexadecimal symbol [a-f]. */ -export function isHex(c: i32): bool { +export function isHexBase(c: i32): bool { let c0 = c | 32; // unify uppercases and lowercases a|A - f|F return c0 >= CharCode.a && c0 <= CharCode.f; } /** Tests if the specified character code is a valid hexadecimal digit. */ export function isHexOrDecimal(c: i32): bool { - return isDecimal(c) || isHex(c); + return isDecimal(c) || isHexBase(c); } /** Tests if the specified character code is trivially alphanumeric. */