Skip to content

Commit

Permalink
module: eliminate performance cost of detection for cjs entry
Browse files Browse the repository at this point in the history
  • Loading branch information
GeoffreyBooth committed Mar 15, 2024
1 parent b360532 commit 278b15f
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 10 deletions.
12 changes: 12 additions & 0 deletions lib/internal/modules/cjs/loader.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,18 @@ const cjsParseCache = new SafeWeakMap();
*/
const cjsExportsCache = new SafeWeakMap();

/**
* Map of CJS modules where the initial attempt to parse as CommonJS failed;
* we want to retry as ESM but avoid reading the module from disk again.
* @type {SafeMap<string, string>} filename: contents
*/
const cjsRetryAsESMCache = new SafeMap();

// Set first due to cycle with ESM loader functions.
module.exports = {
cjsExportsCache,
cjsParseCache,
cjsRetryAsESMCache,
initializeCJS,
Module,
wrapSafe,
Expand Down Expand Up @@ -1293,6 +1301,10 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) {

return result.function;
} catch (err) {
if (getOptionValue('--experimental-detect-module')) {
cjsRetryAsESMCache.set(filename, content);
}

if (process.mainModule === cjsModuleInstance) {
const { enrichCJSError } = require('internal/modules/esm/translators');
enrichCJSError(err, content, filename);
Expand Down
32 changes: 32 additions & 0 deletions lib/internal/modules/helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,37 @@ function normalizeReferrerURL(referrerName) {
}


const esmSyntaxErrorMessages = new SafeSet([
'Cannot use import statement outside a module', // `import` statements
"Unexpected token 'export'", // `export` statements
"Cannot use 'import.meta' outside a module", // `import.meta` references
]);
const throwsOnlyInCommonJSErrorMessages = new SafeSet([
"Identifier 'module' has already been declared",
"Identifier 'exports' has already been declared",
"Identifier 'require' has already been declared",
"Identifier '__filename' has already been declared",
"Identifier '__dirname' has already been declared",
'await is only valid in async functions and the top level bodies of modules', // Top-level `await`
]);
/**
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
* @param {string} source Module contents
*/
function shouldRetryAsESM(errorMessage, source) {
if (esmSyntaxErrorMessages.has(errorMessage)) {
return true;
}

if (throwsOnlyInCommonJSErrorMessages.has(errorMessage)) {
return /** @type {boolean} */(internalBinding('contextify').shouldRetryAsESM(source));
}

return false;
}


// Whether we have started executing any user-provided CJS code.
// This is set right before we call the wrapped CJS code (not after,
// in case we are half-way in the execution when internals check this).
Expand All @@ -339,6 +370,7 @@ module.exports = {
loadBuiltinModule,
makeRequireFunction,
normalizeReferrerURL,
shouldRetryAsESM,
stripBOM,
toRealPath,
hasStartedUserCJSExecution() {
Expand Down
34 changes: 24 additions & 10 deletions lib/internal/modules/run_main.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ const {
globalThis,
} = primordials;

const { containsModuleSyntax } = internalBinding('contextify');
const { getNearestParentPackageJSONType } = internalBinding('modules');
const { getOptionValue } = require('internal/options');
const { checkPackageJSONIntegrity } = require('internal/modules/package_json_reader');
Expand Down Expand Up @@ -87,10 +86,6 @@ function shouldUseESMLoader(mainPath) {

// No package.json or no `type` field.
if (response === undefined || response[0] === 'none') {
if (getOptionValue('--experimental-detect-module')) {
// If the first argument of `containsModuleSyntax` is undefined, it will read `mainPath` from the file system.
return containsModuleSyntax(undefined, mainPath);
}
return false;
}

Expand Down Expand Up @@ -162,7 +157,30 @@ function runEntryPointWithESMLoader(callback) {
function executeUserEntryPoint(main = process.argv[1]) {
const resolvedMain = resolveMainPath(main);
const useESMLoader = shouldUseESMLoader(resolvedMain);
if (useESMLoader) {

// We might retry as ESM if the CommonJS loader throws because the file is ESM, so
// try the CommonJS loader first unless we know it's supposed to be parsed as ESM.
let retryAsESM = false;
if (!useESMLoader) {
// Module._load is the monkey-patchable CJS module loader.
const { Module } = require('internal/modules/cjs/loader');
try {
Module._load(main, null, true);
} catch (error) {
if (getOptionValue('--experimental-detect-module')) {
const { cjsRetryAsESMCache } = require('internal/modules/cjs/loader');
const source = cjsRetryAsESMCache.get(resolvedMain);
cjsRetryAsESMCache.delete(resolvedMain);
const { shouldRetryAsESM } = require('internal/modules/helpers');
retryAsESM = shouldRetryAsESM(error.message, source);
}
if (!retryAsESM) {
throw error;
}
}
}

if (useESMLoader || retryAsESM) {
const mainPath = resolvedMain || main;
const mainURL = pathToFileURL(mainPath).href;

Expand All @@ -171,10 +189,6 @@ function executeUserEntryPoint(main = process.argv[1]) {
// even after the event loop stops running.
return cascadedLoader.import(mainURL, undefined, { __proto__: null }, true);
});
} else {
// Module._load is the monkey-patchable CJS module loader.
const { Module } = require('internal/modules/cjs/loader');
Module._load(main, null, true);
}
}

Expand Down
84 changes: 84 additions & 0 deletions src/node_contextify.cc
Original file line number Diff line number Diff line change
Expand Up @@ -345,13 +345,15 @@ void ContextifyContext::CreatePerIsolateProperties(
SetMethod(isolate, target, "makeContext", MakeContext);
SetMethod(isolate, target, "compileFunction", CompileFunction);
SetMethod(isolate, target, "containsModuleSyntax", ContainsModuleSyntax);
SetMethod(isolate, target, "shouldRetryAsESM", ShouldRetryAsESM);
}

void ContextifyContext::RegisterExternalReferences(
ExternalReferenceRegistry* registry) {
registry->Register(MakeContext);
registry->Register(CompileFunction);
registry->Register(ContainsModuleSyntax);
registry->Register(ShouldRetryAsESM);
registry->Register(PropertyGetterCallback);
registry->Register(PropertySetterCallback);
registry->Register(PropertyDescriptorCallback);
Expand Down Expand Up @@ -1566,6 +1568,88 @@ void ContextifyContext::ContainsModuleSyntax(
args.GetReturnValue().Set(should_retry_as_esm);
}

void ContextifyContext::ShouldRetryAsESM(
const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
Isolate* isolate = env->isolate();
Local<Context> context = env->context();

if (args.Length() != 1) {
return THROW_ERR_MISSING_ARGS(
env, "shouldRetryAsESM needs 1 argument");
}
// Argument 1: source code
Local<String> code;
CHECK(args[0]->IsString());
code = args[0].As<String>();

Local<String> script_id = String::NewFromUtf8(isolate, "throwaway").ToLocalChecked();
Local<Symbol> id_symbol = Symbol::New(isolate, script_id);

Local<PrimitiveArray> host_defined_options =
GetHostDefinedOptions(isolate, id_symbol);
ScriptCompiler::Source source = GetCommonJSSourceInstance(
isolate, code, script_id, 0, 0, host_defined_options, nullptr);
ScriptCompiler::CompileOptions options = GetCompileOptions(source);

TryCatchScope try_catch(env);
ShouldNotAbortOnUncaughtScope no_abort_scope(env);

// Try parsing where instead of the CommonJS wrapper we use an async function
// wrapper. If the parse succeeds, then any CommonJS parse error for this
// module was caused by either a top-level declaration of one of the CommonJS
// module variables, or a top-level `await`.
code =
String::Concat(isolate,
String::NewFromUtf8(isolate, "(async function() {")
.ToLocalChecked(),
code);
code = String::Concat(
isolate,
code,
String::NewFromUtf8(isolate, "})();").ToLocalChecked());

ScriptCompiler::Source wrapped_source = GetCommonJSSourceInstance(
isolate, code, script_id, 0, 0, host_defined_options, nullptr);

std::vector<Local<String>> params = GetCJSParameters(env->isolate_data());
std::ignore = ScriptCompiler::CompileFunction(
context,
&wrapped_source,
params.size(),
params.data(),
0,
nullptr,
options,
v8::ScriptCompiler::NoCacheReason::kNoCacheNoReason);

bool should_retry_as_esm = false;
if (!try_catch.HasTerminated()) {
if (try_catch.HasCaught()) {
// If on the second parse an error is thrown by ESM syntax, then
// what happened was that the user had top-level `await` or a
// top-level declaration of one of the CommonJS module variables
// above the first `import` or `export`.
Utf8Value message_value(
env->isolate(), try_catch.Message()->Get());
auto message_view = message_value.ToStringView();
for (const auto& error_message : esm_syntax_error_messages) {
if (message_view.find(error_message) !=
std::string_view::npos) {
should_retry_as_esm = true;
break;
}
}
} else {
// No errors thrown in the second parse, so most likely the error
// was caused by a top-level `await` or a top-level declaration of
// one of the CommonJS module variables.
should_retry_as_esm = true;
}
}
args.GetReturnValue().Set(should_retry_as_esm);
}

static void CompileFunctionForCJSLoader(
const FunctionCallbackInfo<Value>& args) {
CHECK(args[0]->IsString());
Expand Down
2 changes: 2 additions & 0 deletions src/node_contextify.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ class ContextifyContext : public BaseObject {
const v8::ScriptCompiler::Source& source);
static void ContainsModuleSyntax(
const v8::FunctionCallbackInfo<v8::Value>& args);
static void ShouldRetryAsESM(
const v8::FunctionCallbackInfo<v8::Value>& args);
static void WeakCallback(
const v8::WeakCallbackInfo<ContextifyContext>& data);
static void PropertyGetterCallback(
Expand Down
17 changes: 17 additions & 0 deletions test/es-module/test-esm-detect-ambiguous.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {
describe('string input', { concurrency: true }, () => {
it('permits ESM syntax in --eval input without requiring --input-type=module', async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
'--eval',
'import { version } from "node:process"; console.log(version);',
Expand All @@ -23,6 +24,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {

it('permits ESM syntax in STDIN input without requiring --input-type=module', async () => {
const child = spawn(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
]);
child.stdin.end('console.log(typeof import.meta.resolve)');
Expand All @@ -32,6 +34,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {

it('should be overridden by --input-type', async () => {
const { code, signal, stdout, stderr } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
'--input-type=commonjs',
'--eval',
Expand All @@ -46,6 +49,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {

it('should be overridden by --experimental-default-type', async () => {
const { code, signal, stdout, stderr } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
'--experimental-default-type=commonjs',
'--eval',
Expand All @@ -60,6 +64,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {

it('does not trigger detection via source code `eval()`', async () => {
const { code, signal, stdout, stderr } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
'--eval',
'eval("import \'nonexistent\';");',
Expand Down Expand Up @@ -101,6 +106,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {
]) {
it(testName, async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
entryPath,
]);
Expand Down Expand Up @@ -142,6 +148,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {
]) {
it(testName, async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
entryPath,
]);
Expand All @@ -156,6 +163,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {
it('should not hint wrong format in resolve hook', async () => {
let writeSync;
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
'--no-warnings',
'--loader',
Expand Down Expand Up @@ -194,6 +202,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {
]) {
it(testName, async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
entryPath,
]);
Expand Down Expand Up @@ -223,6 +232,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {
]) {
it(testName, async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
entryPath,
]);
Expand All @@ -239,6 +249,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {
describe('syntax that errors in CommonJS but works in ESM', { concurrency: true }, () => {
it('permits top-level `await`', async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
'--eval',
'await Promise.resolve(); console.log("executed");',
Expand All @@ -252,6 +263,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {

it('permits top-level `await` above import/export syntax', async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
'--eval',
'await Promise.resolve(); import "node:os"; console.log("executed");',
Expand All @@ -265,6 +277,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {

it('still throws on `await` in an ordinary sync function', async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
'--eval',
'function fn() { await Promise.resolve(); } fn();',
Expand All @@ -278,6 +291,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {

it('throws on undefined `require` when top-level `await` triggers ESM parsing', async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
'--eval',
'const fs = require("node:fs"); await Promise.resolve();',
Expand All @@ -291,6 +305,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {

it('permits declaration of CommonJS module variables', async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
fixtures.path('es-modules/package-without-type/commonjs-wrapper-variables.js'),
]);
Expand All @@ -303,6 +318,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {

it('permits declaration of CommonJS module variables above import/export', async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
'--eval',
'const module = 3; import "node:os"; console.log("executed");',
Expand All @@ -316,6 +332,7 @@ describe('--experimental-detect-module', { concurrency: true }, () => {

it('still throws on double `const` declaration not at the top level', async () => {
const { stdout, stderr, code, signal } = await spawnPromisified(process.execPath, [
'--no-warnings',
'--experimental-detect-module',
'--eval',
'function fn() { const require = 1; const require = 2; } fn();',
Expand Down

0 comments on commit 278b15f

Please sign in to comment.