From 8957d3367a565f78c3e49771c9e13fc4879f7bb6 Mon Sep 17 00:00:00 2001 From: Cameron Steele Date: Sat, 26 Nov 2016 20:45:07 -0800 Subject: [PATCH 1/8] implement isURLCodePoint Perhaps you were waiting since it's not a pretty thing to have to implement. I'm doing it for my own purposes, so here's a version in what I think is your style if you want it. --- src/url-state-machine.js | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/src/url-state-machine.js b/src/url-state-machine.js index ee6dcd7df..bb0ff17d1 100644 --- a/src/url-state-machine.js +++ b/src/url-state-machine.js @@ -43,6 +43,41 @@ function isASCIIHex(c) { return isASCIIDigit(c) || (c >= 0x41 && c <= 0x46) || (c >= 0x61 && c <= 0x66); } +function isURLCodePoint(c) { + return ( + isASCIIAlphanumeric(c) + || c == 0x21 + || c == 0x24 + || (c >= 0x26 && c <= 0x2F) + || c == 0x3A + || c == 0x3B + || c == 0x3D + || c == 0x3F + || c == 0x40 + || c == 0x5F + || c == 0x7E + || (c >= 0xA0 && c <= 0xD7FF) + || (c >= 0xE000 && c <= 0xFDCF) + || (c >= 0xFDF0 && c <= 0xFFFD) + || (c >= 0x10000 && c <= 0x1FFFD) + || (c >= 0x20000 && c <= 0x2FFFD) + || (c >= 0x30000 && c <= 0x3FFFD) + || (c >= 0x40000 && c <= 0x4FFFD) + || (c >= 0x50000 && c <= 0x5FFFD) + || (c >= 0x60000 && c <= 0x6FFFD) + || (c >= 0x70000 && c <= 0x7FFFD) + || (c >= 0x80000 && c <= 0x8FFFD) + || (c >= 0x90000 && c <= 0x9FFFD) + || (c >= 0xA0000 && c <= 0xAFFFD) + || (c >= 0xB0000 && c <= 0xBFFFD) + || (c >= 0xC0000 && c <= 0xCFFFD) + || (c >= 0xD0000 && c <= 0xDFFFD) + || (c >= 0xE0000 && c <= 0xEFFFD) + || (c >= 0xF0000 && c <= 0xFFFFD) + || (c >= 0x100000 && c <= 0x10FFFD) + ); +} + function isSingleDot(buffer) { return buffer === "." || buffer.toLowerCase() === "%2e"; } @@ -946,7 +981,9 @@ URLStateMachine.prototype["parse path"] = function parsePath(c) { this.state = "fragment"; } } else { - // TODO: If c is not a URL code point and not "%", parse error. + if(!isURLCodePoint(c) && c !== p("%")) { + this.parseError = true; + } if (c === p("%") && (!isASCIIHex(this.input[this.pointer + 1]) || From bb23d431b15efe3b57ff6aad0eac1d3cef2cfe0f Mon Sep 17 00:00:00 2001 From: Cameron Steele Date: Sat, 26 Nov 2016 20:48:40 -0800 Subject: [PATCH 2/8] Update url-state-machine.js --- src/url-state-machine.js | 60 ++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/url-state-machine.js b/src/url-state-machine.js index bb0ff17d1..168e3f695 100644 --- a/src/url-state-machine.js +++ b/src/url-state-machine.js @@ -45,36 +45,36 @@ function isASCIIHex(c) { function isURLCodePoint(c) { return ( - isASCIIAlphanumeric(c) - || c == 0x21 - || c == 0x24 - || (c >= 0x26 && c <= 0x2F) - || c == 0x3A - || c == 0x3B - || c == 0x3D - || c == 0x3F - || c == 0x40 - || c == 0x5F - || c == 0x7E - || (c >= 0xA0 && c <= 0xD7FF) - || (c >= 0xE000 && c <= 0xFDCF) - || (c >= 0xFDF0 && c <= 0xFFFD) - || (c >= 0x10000 && c <= 0x1FFFD) - || (c >= 0x20000 && c <= 0x2FFFD) - || (c >= 0x30000 && c <= 0x3FFFD) - || (c >= 0x40000 && c <= 0x4FFFD) - || (c >= 0x50000 && c <= 0x5FFFD) - || (c >= 0x60000 && c <= 0x6FFFD) - || (c >= 0x70000 && c <= 0x7FFFD) - || (c >= 0x80000 && c <= 0x8FFFD) - || (c >= 0x90000 && c <= 0x9FFFD) - || (c >= 0xA0000 && c <= 0xAFFFD) - || (c >= 0xB0000 && c <= 0xBFFFD) - || (c >= 0xC0000 && c <= 0xCFFFD) - || (c >= 0xD0000 && c <= 0xDFFFD) - || (c >= 0xE0000 && c <= 0xEFFFD) - || (c >= 0xF0000 && c <= 0xFFFFD) - || (c >= 0x100000 && c <= 0x10FFFD) + isASCIIAlphanumeric(c) || + c == 0x21 || + c == 0x24 || + (c >= 0x26 && c <= 0x2F) || + c == 0x3A || + c == 0x3B || + c == 0x3D || + c == 0x3F || + c == 0x40 || + c == 0x5F || + c == 0x7E || + (c >= 0xA0 && c <= 0xD7FF) || + (c >= 0xE000 && c <= 0xFDCF) || + (c >= 0xFDF0 && c <= 0xFFFD) || + (c >= 0x10000 && c <= 0x1FFFD) || + (c >= 0x20000 && c <= 0x2FFFD) || + (c >= 0x30000 && c <= 0x3FFFD) || + (c >= 0x40000 && c <= 0x4FFFD) || + (c >= 0x50000 && c <= 0x5FFFD) || + (c >= 0x60000 && c <= 0x6FFFD) || + (c >= 0x70000 && c <= 0x7FFFD) || + (c >= 0x80000 && c <= 0x8FFFD) || + (c >= 0x90000 && c <= 0x9FFFD) || + (c >= 0xA0000 && c <= 0xAFFFD) || + (c >= 0xB0000 && c <= 0xBFFFD) || + (c >= 0xC0000 && c <= 0xCFFFD) || + (c >= 0xD0000 && c <= 0xDFFFD) || + (c >= 0xE0000 && c <= 0xEFFFD) || + (c >= 0xF0000 && c <= 0xFFFFD) || + (c >= 0x100000 && c <= 0x10FFFD) ); } From 6e619fd5994d83753c942e3d266db16d404db5c6 Mon Sep 17 00:00:00 2001 From: Cameron Steele Date: Sat, 26 Nov 2016 20:52:32 -0800 Subject: [PATCH 3/8] implement isURLCodePoint --- src/url-state-machine.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/url-state-machine.js b/src/url-state-machine.js index 168e3f695..6b1a0ab21 100644 --- a/src/url-state-machine.js +++ b/src/url-state-machine.js @@ -46,16 +46,16 @@ function isASCIIHex(c) { function isURLCodePoint(c) { return ( isASCIIAlphanumeric(c) || - c == 0x21 || - c == 0x24 || + c === 0x21 || + c === 0x24 || (c >= 0x26 && c <= 0x2F) || - c == 0x3A || - c == 0x3B || - c == 0x3D || - c == 0x3F || - c == 0x40 || - c == 0x5F || - c == 0x7E || + c === 0x3A || + c === 0x3B || + c === 0x3D || + c === 0x3F || + c === 0x40 || + c === 0x5F || + c === 0x7E || (c >= 0xA0 && c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFDCF) || (c >= 0xFDF0 && c <= 0xFFFD) || From 93643a6c8f5b347ec68be4e4ee69dc781b37cf98 Mon Sep 17 00:00:00 2001 From: Cameron Steele Date: Sat, 26 Nov 2016 20:54:28 -0800 Subject: [PATCH 4/8] implement isURLCodePoint --- src/url-state-machine.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/url-state-machine.js b/src/url-state-machine.js index 6b1a0ab21..50dee9daf 100644 --- a/src/url-state-machine.js +++ b/src/url-state-machine.js @@ -981,7 +981,7 @@ URLStateMachine.prototype["parse path"] = function parsePath(c) { this.state = "fragment"; } } else { - if(!isURLCodePoint(c) && c !== p("%")) { + if (!isURLCodePoint(c) && c !== p("%")) { this.parseError = true; } From a9d913390cbb41c364621bded92cc8fd8c7578f0 Mon Sep 17 00:00:00 2001 From: Cameron Steele Date: Sun, 27 Nov 2016 02:17:44 -0800 Subject: [PATCH 5/8] implement isURLCodePoint() as per https://url.spec.whatwg.org/#url-code-points --- src/url-state-machine.js | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/url-state-machine.js b/src/url-state-machine.js index 50dee9daf..c80740c1a 100644 --- a/src/url-state-machine.js +++ b/src/url-state-machine.js @@ -1012,8 +1012,7 @@ URLStateMachine.prototype["parse cannot-be-a-base-URL path"] = function parseCan this.url.fragment = ""; this.state = "fragment"; } else { - // TODO: Add: not a URL code point - if (!isNaN(c) && c !== p("%")) { + if (!isNaN(c) && !isURLCodePoint(c) && c !== p("%")) { this.parseError = true; } @@ -1053,7 +1052,10 @@ URLStateMachine.prototype["parse query"] = function parseQuery(c, cStr) { this.state = "fragment"; } } else { - // TODO: If c is not a URL code point and not "%", parse error. + if (!isURLCodePoint(c) && c !== p('%')) { + this.parseError = true; + } + if (c === p("%") && (!isASCIIHex(this.input[this.pointer + 1]) || !isASCIIHex(this.input[this.pointer + 2]))) { @@ -1071,7 +1073,10 @@ URLStateMachine.prototype["parse fragment"] = function parseFragment(c, cStr) { } else if (c === 0x0) { this.parseError = true; } else { - // TODO: If c is not a URL code point and not "%", parse error. + if (!isURLCodePoint(c) && c !== '%') { + this.parseError = true; + } + if (c === p("%") && (!isASCIIHex(this.input[this.pointer + 1]) || !isASCIIHex(this.input[this.pointer + 2]))) { From 0ac0e1d5eab572818a07995c02e79fcd5d564550 Mon Sep 17 00:00:00 2001 From: Cameron Steele Date: Sun, 27 Nov 2016 02:22:27 -0800 Subject: [PATCH 6/8] implement isURLCodePoint() changing single quotes to double quotes --- src/url-state-machine.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/url-state-machine.js b/src/url-state-machine.js index c80740c1a..c6377a607 100644 --- a/src/url-state-machine.js +++ b/src/url-state-machine.js @@ -1052,7 +1052,7 @@ URLStateMachine.prototype["parse query"] = function parseQuery(c, cStr) { this.state = "fragment"; } } else { - if (!isURLCodePoint(c) && c !== p('%')) { + if (!isURLCodePoint(c) && c !== p("%")) { this.parseError = true; } @@ -1073,7 +1073,7 @@ URLStateMachine.prototype["parse fragment"] = function parseFragment(c, cStr) { } else if (c === 0x0) { this.parseError = true; } else { - if (!isURLCodePoint(c) && c !== '%') { + if (!isURLCodePoint(c) && c !== "%") { this.parseError = true; } From 6329e6b8142585e90407bcdc821e172318d0ac65 Mon Sep 17 00:00:00 2001 From: Cameron Steele Date: Sun, 27 Nov 2016 15:03:38 -0800 Subject: [PATCH 7/8] implement isURLCodePoint() changed initial codePoint checks to check literal characters passed through p() function; storing them in array --- src/url-state-machine.js | 58 +++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/src/url-state-machine.js b/src/url-state-machine.js index c80740c1a..39439ecc8 100644 --- a/src/url-state-machine.js +++ b/src/url-state-machine.js @@ -43,38 +43,34 @@ function isASCIIHex(c) { return isASCIIDigit(c) || (c >= 0x41 && c <= 0x46) || (c >= 0x61 && c <= 0x66); } +const urlCodePoints = [ + p("!"), p("$"), p("'"), p("("), p(")"), p("*"), + p("+"), p(","), p("-"), p("."), p("/"), p(":"), + p(";"), p("="), p("?"), p("@"), p("_"), p("~") +]; function isURLCodePoint(c) { return ( isASCIIAlphanumeric(c) || - c === 0x21 || - c === 0x24 || - (c >= 0x26 && c <= 0x2F) || - c === 0x3A || - c === 0x3B || - c === 0x3D || - c === 0x3F || - c === 0x40 || - c === 0x5F || - c === 0x7E || - (c >= 0xA0 && c <= 0xD7FF) || - (c >= 0xE000 && c <= 0xFDCF) || - (c >= 0xFDF0 && c <= 0xFFFD) || - (c >= 0x10000 && c <= 0x1FFFD) || - (c >= 0x20000 && c <= 0x2FFFD) || - (c >= 0x30000 && c <= 0x3FFFD) || - (c >= 0x40000 && c <= 0x4FFFD) || - (c >= 0x50000 && c <= 0x5FFFD) || - (c >= 0x60000 && c <= 0x6FFFD) || - (c >= 0x70000 && c <= 0x7FFFD) || - (c >= 0x80000 && c <= 0x8FFFD) || - (c >= 0x90000 && c <= 0x9FFFD) || - (c >= 0xA0000 && c <= 0xAFFFD) || - (c >= 0xB0000 && c <= 0xBFFFD) || - (c >= 0xC0000 && c <= 0xCFFFD) || - (c >= 0xD0000 && c <= 0xDFFFD) || - (c >= 0xE0000 && c <= 0xEFFFD) || - (c >= 0xF0000 && c <= 0xFFFFD) || - (c >= 0x100000 && c <= 0x10FFFD) + urlCodePoints.indexOf(c) !== -1 || + (c >= 0xA0 && c <= 0xD7FF) || + (c >= 0xE000 && c <= 0xFDCF) || + (c >= 0xFDF0 && c <= 0xFFFD) || + (c >= 0x10000 && c <= 0x1FFFD) || + (c >= 0x20000 && c <= 0x2FFFD) || + (c >= 0x30000 && c <= 0x3FFFD) || + (c >= 0x40000 && c <= 0x4FFFD) || + (c >= 0x50000 && c <= 0x5FFFD) || + (c >= 0x60000 && c <= 0x6FFFD) || + (c >= 0x70000 && c <= 0x7FFFD) || + (c >= 0x80000 && c <= 0x8FFFD) || + (c >= 0x90000 && c <= 0x9FFFD) || + (c >= 0xA0000 && c <= 0xAFFFD) || + (c >= 0xB0000 && c <= 0xBFFFD) || + (c >= 0xC0000 && c <= 0xCFFFD) || + (c >= 0xD0000 && c <= 0xDFFFD) || + (c >= 0xE0000 && c <= 0xEFFFD) || + (c >= 0xF0000 && c <= 0xFFFFD) || + (c >= 0x100000 && c <= 0x10FFFD) ); } @@ -1052,7 +1048,7 @@ URLStateMachine.prototype["parse query"] = function parseQuery(c, cStr) { this.state = "fragment"; } } else { - if (!isURLCodePoint(c) && c !== p('%')) { + if (!isURLCodePoint(c) && c !== p("%")) { this.parseError = true; } @@ -1073,7 +1069,7 @@ URLStateMachine.prototype["parse fragment"] = function parseFragment(c, cStr) { } else if (c === 0x0) { this.parseError = true; } else { - if (!isURLCodePoint(c) && c !== '%') { + if (!isURLCodePoint(c) && c !== p("%") { this.parseError = true; } From 734970e58dbca26011c9490adc447ba871bf5ab0 Mon Sep 17 00:00:00 2001 From: Cameron Steele Date: Sun, 27 Nov 2016 15:07:33 -0800 Subject: [PATCH 8/8] implement isURLCodePoint() fixing indentation --- src/url-state-machine.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/url-state-machine.js b/src/url-state-machine.js index ae14be30a..05f9746a5 100644 --- a/src/url-state-machine.js +++ b/src/url-state-machine.js @@ -44,9 +44,9 @@ function isASCIIHex(c) { } const urlCodePoints = [ - p("!"), p("$"), p("'"), p("("), p(")"), p("*"), - p("+"), p(","), p("-"), p("."), p("/"), p(":"), - p(";"), p("="), p("?"), p("@"), p("_"), p("~") + p("!"), p("$"), p("'"), p("("), p(")"), p("*"), + p("+"), p(","), p("-"), p("."), p("/"), p(":"), + p(";"), p("="), p("?"), p("@"), p("_"), p("~") ]; function isURLCodePoint(c) { return (