Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

url: refactor pathToFileURL to native #55476

Merged
merged 2 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 15 additions & 77 deletions lib/internal/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ const {
ObjectSetPrototypeOf,
ReflectGetOwnPropertyDescriptor,
ReflectOwnKeys,
RegExpPrototypeSymbolReplace,
SafeMap,
SafeSet,
StringPrototypeCharAt,
Expand Down Expand Up @@ -779,6 +778,8 @@ function isURL(self) {
* for invalid URL inputs.
*/
const kParseURLSymbol = Symbol('kParseURL');
const kCreateURLFromPosixPathSymbol = Symbol('kCreateURLFromPosixPath');
const kCreateURLFromWindowsPathSymbol = Symbol('kCreateURLFromWindowsPath');

class URL {
#context = new URLContext();
Expand Down Expand Up @@ -812,8 +813,17 @@ class URL {
base = `${base}`;
}

const raiseException = parseSymbol !== kParseURLSymbol;
const href = bindingUrl.parse(input, base, raiseException);
let href;
if (arguments.length < 3) {
href = bindingUrl.parse(input, base, true);
} else {
const raiseException = parseSymbol !== kParseURLSymbol;
const interpretAsWindowsPath = parseSymbol === kCreateURLFromWindowsPathSymbol;
const pathToFileURL = interpretAsWindowsPath || (parseSymbol === kCreateURLFromPosixPathSymbol);
href = pathToFileURL ?
bindingUrl.pathToFileURL(input, interpretAsWindowsPath, base) :
bindingUrl.parse(input, base, raiseException);
}
if (href) {
this.#updateContext(href);
}
Expand Down Expand Up @@ -1500,76 +1510,9 @@ function fileURLToPath(path, options = kEmptyObject) {
return (windows ?? isWindows) ? getPathFromURLWin32(path) : getPathFromURLPosix(path);
}

// RFC1738 defines the following chars as "unsafe" for URLs
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
const percentRegEx = /%/g;
const newlineRegEx = /\n/g;
const carriageReturnRegEx = /\r/g;
const tabRegEx = /\t/g;
const quoteRegEx = /"/g;
const hashRegex = /#/g;
const spaceRegEx = / /g;
const questionMarkRegex = /\?/g;
const openSquareBracketRegEx = /\[/g;
const backslashRegEx = /\\/g;
const closeSquareBracketRegEx = /]/g;
const caretRegEx = /\^/g;
const verticalBarRegEx = /\|/g;
const tildeRegEx = /~/g;

function encodePathChars(filepath, options = kEmptyObject) {
if (StringPrototypeIncludes(filepath, '%')) {
filepath = RegExpPrototypeSymbolReplace(percentRegEx, filepath, '%25');
}

if (StringPrototypeIncludes(filepath, '\t')) {
filepath = RegExpPrototypeSymbolReplace(tabRegEx, filepath, '%09');
}
if (StringPrototypeIncludes(filepath, '\n')) {
filepath = RegExpPrototypeSymbolReplace(newlineRegEx, filepath, '%0A');
}
if (StringPrototypeIncludes(filepath, '\r')) {
filepath = RegExpPrototypeSymbolReplace(carriageReturnRegEx, filepath, '%0D');
}
if (StringPrototypeIncludes(filepath, ' ')) {
filepath = RegExpPrototypeSymbolReplace(spaceRegEx, filepath, '%20');
}
if (StringPrototypeIncludes(filepath, '"')) {
filepath = RegExpPrototypeSymbolReplace(quoteRegEx, filepath, '%22');
}
if (StringPrototypeIncludes(filepath, '#')) {
filepath = RegExpPrototypeSymbolReplace(hashRegex, filepath, '%23');
}
if (StringPrototypeIncludes(filepath, '?')) {
filepath = RegExpPrototypeSymbolReplace(questionMarkRegex, filepath, '%3F');
}
if (StringPrototypeIncludes(filepath, '[')) {
filepath = RegExpPrototypeSymbolReplace(openSquareBracketRegEx, filepath, '%5B');
}
// Back-slashes must be special-cased on Windows, where they are treated as path separator.
if (!options.windows && StringPrototypeIncludes(filepath, '\\')) {
filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C');
}
if (StringPrototypeIncludes(filepath, ']')) {
filepath = RegExpPrototypeSymbolReplace(closeSquareBracketRegEx, filepath, '%5D');
}
if (StringPrototypeIncludes(filepath, '^')) {
filepath = RegExpPrototypeSymbolReplace(caretRegEx, filepath, '%5E');
}
if (StringPrototypeIncludes(filepath, '|')) {
filepath = RegExpPrototypeSymbolReplace(verticalBarRegEx, filepath, '%7C');
}
if (StringPrototypeIncludes(filepath, '~')) {
filepath = RegExpPrototypeSymbolReplace(tildeRegEx, filepath, '%7E');
}

return filepath;
}

function pathToFileURL(filepath, options = kEmptyObject) {
const windows = options?.windows ?? isWindows;
if (windows && StringPrototypeStartsWith(filepath, '\\\\')) {
const outURL = new URL('file://');
// UNC path format: \\server\share\resource
// Handle extended UNC path and standard UNC path
// "\\?\UNC\" path prefix should be ignored.
Expand All @@ -1592,12 +1535,7 @@ function pathToFileURL(filepath, options = kEmptyObject) {
);
}
const hostname = StringPrototypeSlice(filepath, prefixLength, hostnameEndIndex);
outURL.hostname = domainToASCII(hostname);
outURL.pathname = encodePathChars(
RegExpPrototypeSymbolReplace(backslashRegEx, StringPrototypeSlice(filepath, hostnameEndIndex), '/'),
{ windows },
);
return outURL;
return new URL(StringPrototypeSlice(filepath, hostnameEndIndex), hostname, kCreateURLFromWindowsPathSymbol);
}
let resolved = windows ? path.win32.resolve(filepath) : path.posix.resolve(filepath);
// path.resolve strips trailing slashes so we must add them back
Expand All @@ -1608,7 +1546,7 @@ function pathToFileURL(filepath, options = kEmptyObject) {
resolved[resolved.length - 1] !== path.sep)
resolved += '/';

return new URL(`file://${encodePathChars(resolved, { windows })}`);
return new URL(resolved, undefined, windows ? kCreateURLFromWindowsPathSymbol : kCreateURLFromPosixPathSymbol);
}

function toPathIfFileURL(fileURLOrPath) {
Expand Down
104 changes: 104 additions & 0 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,108 @@ void BindingData::Deserialize(v8::Local<v8::Context> context,
CHECK_NOT_NULL(binding);
}

#ifndef LARGEST_ASCII_CHAR_CODE_TO_ENCODE
#define LARGEST_ASCII_CHAR_CODE_TO_ENCODE '~'
#endif

// RFC1738 defines the following chars as "unsafe" for URLs
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
constexpr auto lookup_table = []() consteval {
// Each entry is an array that can hold up to 3 chars + null terminator
std::array<std::array<char, 4>, LARGEST_ASCII_CHAR_CODE_TO_ENCODE + 1>
result{};

for (uint8_t i = 0; i <= LARGEST_ASCII_CHAR_CODE_TO_ENCODE; i++) {
switch (i) {
#define ENCODE_CHAR(CHAR, HEX_DIGIT_2, HEX_DIGIT_1) \
case CHAR: \
result[i] = {{'%', HEX_DIGIT_2, HEX_DIGIT_1, 0}}; \
break;

ENCODE_CHAR('\0', '0', '0') // '\0' == 0x00
ENCODE_CHAR('\t', '0', '9') // '\t' == 0x09
ENCODE_CHAR('\n', '0', 'A') // '\n' == 0x0A
ENCODE_CHAR('\r', '0', 'D') // '\r' == 0x0D
ENCODE_CHAR(' ', '2', '0') // ' ' == 0x20
ENCODE_CHAR('"', '2', '2') // '"' == 0x22
ENCODE_CHAR('#', '2', '3') // '#' == 0x23
ENCODE_CHAR('%', '2', '5') // '%' == 0x25
ENCODE_CHAR('?', '3', 'F') // '?' == 0x3F
ENCODE_CHAR('[', '5', 'B') // '[' == 0x5B
ENCODE_CHAR('\\', '5', 'C') // '\\' == 0x5C
ENCODE_CHAR(']', '5', 'D') // ']' == 0x5D
ENCODE_CHAR('^', '5', 'E') // '^' == 0x5E
ENCODE_CHAR('|', '7', 'C') // '|' == 0x7C
ENCODE_CHAR('~', '7', 'E') // '~' == 0x7E
#undef ENCODE_CHAR

default:
result[i] = {{static_cast<char>(i), '\0', '\0', '\0'}};
break;
}
}

return result;
}
();

enum class OS { WINDOWS, POSIX };

std::string EncodePathChars(std::string_view input_str, OS operating_system) {
std::string encoded = "file://";
encoded.reserve(input_str.size() +
7); // Reserve space for "file://" and input_str
for (size_t i : input_str) {
if (i > LARGEST_ASCII_CHAR_CODE_TO_ENCODE) [[unlikely]] {
encoded.push_back(i);
continue;
}
if (operating_system == OS::WINDOWS) {
if (i == '\\') {
encoded.push_back('/');
continue;
}
}
encoded.append(lookup_table[i].data());
}

return encoded;
}

void BindingData::PathToFileURL(const FunctionCallbackInfo<Value>& args) {
CHECK_GE(args.Length(), 2); // input
CHECK(args[0]->IsString());
CHECK(args[1]->IsBoolean());

Realm* realm = Realm::GetCurrent(args);
BindingData* binding_data = realm->GetBindingData<BindingData>();
Isolate* isolate = realm->isolate();
OS os = args[1]->IsTrue() ? OS::WINDOWS : OS::POSIX;

Utf8Value input(isolate, args[0]);
auto input_str = input.ToStringView();
CHECK(!input_str.empty());

auto out =
ada::parse<ada::url_aggregator>(EncodePathChars(input_str, os), nullptr);

if (!out) {
return ThrowInvalidURL(realm->env(), input.ToStringView(), nullptr);
}

if (os == OS::WINDOWS && args.Length() > 2 && !args[2]->IsUndefined())
[[unlikely]] {
CHECK(args[2]->IsString());
Utf8Value hostname(isolate, args[2]);
CHECK(out->set_hostname(hostname.ToStringView()));
}

binding_data->UpdateComponents(out->get_components(), out->type);

args.GetReturnValue().Set(
ToV8Value(realm->context(), out->get_href(), isolate).ToLocalChecked());
}

void BindingData::DomainToASCII(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
CHECK_GE(args.Length(), 1); // input
Expand Down Expand Up @@ -371,6 +473,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
SetMethodNoSideEffect(isolate, target, "format", Format);
SetMethodNoSideEffect(isolate, target, "getOrigin", GetOrigin);
SetMethod(isolate, target, "parse", Parse);
SetMethod(isolate, target, "pathToFileURL", PathToFileURL);
aduh95 marked this conversation as resolved.
Show resolved Hide resolved
SetMethod(isolate, target, "update", Update);
SetFastMethodNoSideEffect(
isolate, target, "canParse", CanParse, {fast_can_parse_methods_, 2});
Expand All @@ -391,6 +494,7 @@ void BindingData::RegisterExternalReferences(
registry->Register(Format);
registry->Register(GetOrigin);
registry->Register(Parse);
registry->Register(PathToFileURL);
registry->Register(Update);
registry->Register(CanParse);
registry->Register(FastCanParse);
Expand Down
1 change: 1 addition & 0 deletions src/node_url.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class BindingData : public SnapshotableObject {
static void Format(const v8::FunctionCallbackInfo<v8::Value>& args);
static void GetOrigin(const v8::FunctionCallbackInfo<v8::Value>& args);
static void Parse(const v8::FunctionCallbackInfo<v8::Value>& args);
static void PathToFileURL(const v8::FunctionCallbackInfo<v8::Value>& args);
static void Update(const v8::FunctionCallbackInfo<v8::Value>& args);

static void CreatePerIsolateProperties(IsolateData* isolate_data,
Expand Down
Loading