diff --git a/lit-actions/ext/js/99_patches.js b/lit-actions/ext/js/99_patches.js index 3296b988..6c908793 100644 --- a/lit-actions/ext/js/99_patches.js +++ b/lit-actions/ext/js/99_patches.js @@ -1,4 +1,8 @@ -import { op_increment_fetch_count, op_panic } from 'ext:core/ops'; +import { + op_eval_context, + op_increment_fetch_count, + op_panic, +} from 'ext:core/ops'; // Import modules to suppress build error: // "Following modules were not evaluated; make sure they are imported from other code" @@ -28,6 +32,29 @@ import * as _actions from 'ext:lit_actions/02_litActionsSDK.js'; // Expose Deno's built-in panic op for testing globalThis.LitTest = { op_panic }; +// Route user code through op_eval_context so V8's eval-context code cache +// (wired into WorkerServiceOptions.v8_code_cache) sees it. execute_script +// bypasses that cache, so on repeated executions V8 reparses and recompiles +// the bundled action from source every time (CPL-264). The outer stub still +// parses per execution but its body is one string literal, so V8 only pays +// for source-string scanning, not for compiling the bundled action body. +// +// The helper deletes itself from globalThis as its first action so user code +// (which runs inside op_eval_context below) cannot reach it; this preserves +// the `--disallow-code-generation-from-strings` posture by not handing actions +// a string-eval primitive. Each request runs in a fresh worker, so the delete +// has no cross-request effect. +globalThis.__litEvalCached = (source, specifier) => { + delete globalThis.__litEvalCached; + // `op_eval_context` returns `[result, [thrown, isNativeError, isCompileError]]`. + // The wrapper in `ext:core/01_core.js` reshapes it into an object, but we're + // calling the raw op here so we must index by position. + const [, error] = op_eval_context(source, specifier); + if (error) { + throw error[0]; + } +}; + // expose "global" because it was available in the old deno version // but is not available in the new one, and we don't want to break // existing code that expects it to be available diff --git a/lit-actions/server/bundler.rs b/lit-actions/server/bundler.rs index 6a60d115..e65dd937 100644 --- a/lit-actions/server/bundler.rs +++ b/lit-actions/server/bundler.rs @@ -17,11 +17,18 @@ use anyhow::{Context, Result, anyhow, bail}; use deno_core::ModuleSpecifier; use futures::future::try_join_all; use swc_bundler::{Bundler, Config, Hook, Load, ModuleData, ModuleRecord, ModuleType, Resolve}; -use swc_common::{FileName, GLOBALS, Globals, Span, SyntaxContext, sync::Lrc}; -use swc_ecma_ast::{EsVersion, KeyValueProp, Module, ModuleDecl, ModuleItem}; +use swc_common::{ + FileName, GLOBALS, Globals, SourceMapper, Span, Spanned, SyntaxContext, sync::Lrc, +}; +use swc_ecma_ast::{ + CallExpr, Callee, EsVersion, Expr, ExprOrSpread, Ident, IdentName, ImportDecl, ImportPhase, + ImportSpecifier, ImportStarAsSpecifier, KeyValueProp, Lit, MemberExpr, MemberProp, Module, + ModuleDecl, ModuleItem, Str, +}; use swc_ecma_codegen::{Emitter, text_writer::JsWriter}; use swc_ecma_loader::resolve::Resolution; use swc_ecma_parser::{EsSyntax, Syntax, parse_file_as_module}; +use swc_ecma_visit::{VisitMut, VisitMutWith}; use tracing::{debug, info, instrument}; use crate::cdn_module_loader::{CdnModuleLoader, MAX_MODULE_COUNT}; @@ -43,16 +50,31 @@ pub(crate) async fn bundle_user_code( imports: &[ParsedImport], loader: &CdnModuleLoader, ) -> Result { + // Pre-bundle pass: rewrite literal `import("...")` calls into static + // `import * as __litDyn_ from "..."` + `Promise.resolve(__litDyn_)` + // so swc_bundler will inline the module and the runtime never re-enters + // the module loader for them. Non-literal `import(expr)` calls are + // rejected here because the bundler-only design (CPL-262/CPL-264) cannot + // pre-fetch unknown specifiers at cache time. + let (rewritten_entry, dynamic_specifiers) = rewrite_literal_dynamic_imports(user_code) + .context("scanning entry for literal dynamic imports")?; + let initial_urls: Vec = imports .iter() .map(|imp| { resolve_entry_specifier(&imp.specifier) .with_context(|| format!("invalid import specifier: {}", imp.specifier)) }) + .chain(dynamic_specifiers.iter().map(|spec| { + resolve_entry_specifier(spec) + .with_context(|| format!("invalid dynamic import specifier: {spec}")) + })) .collect::>()?; info!( - initial_imports = initial_urls.len(), + static_imports = imports.len(), + dynamic_imports = dynamic_specifiers.len(), + initial_urls = initial_urls.len(), "bundler: walking CDN dependency graph" ); let walk_started = Instant::now(); @@ -63,8 +85,7 @@ pub(crate) async fn bundle_user_code( "bundler: dependency graph walk complete" ); - let entry_src = user_code.to_string(); - tokio::task::spawn_blocking(move || run_swc_bundler(entry_src, sources)) + tokio::task::spawn_blocking(move || run_swc_bundler(rewritten_entry, sources)) .await .context("bundler task join failed")? } @@ -193,6 +214,170 @@ async fn walk_deps( Ok(sources) } +/// Rewrite literal `import("...")` calls in `source` into a static +/// `import * as __litDyn_ from ""` declaration plus +/// `Promise.resolve(__litDyn_)` at the call site. Returns the rewritten +/// source plus the list of newly-introduced specifiers (in alias index order). +/// +/// The rewrite preserves the `Promise` type that the original +/// `import()` call returned, so destructuring (`const { foo } = await import(..)`) +/// continues to work. +/// +/// Errors if the entry contains an `import(...)` whose argument is not a string +/// literal: the bundler cannot pre-fetch unknown specifiers, and the bundler-only +/// design rejects runtime module resolution. +/// +/// SEMANTIC CHANGE — eager evaluation. A literal `import("X")` originally +/// loaded and evaluated module X lazily, only when the call site executed. After +/// rewrite, X becomes a top-level static import: it is fetched, parsed, and +/// evaluated *unconditionally at script start*, even if the original `import()` +/// call site was never reached (e.g. inside a branch never taken). Side effects +/// of X's top-level code (and of any module X transitively imports) therefore +/// run earlier and always. This matches the bundler-only design (CPL-262/264): +/// every dependency a Lit Action *might* use must be known at cache-write time, +/// so lazy/conditional `import()` is not a supported part of the Lit Action JS +/// subset. Authors needing conditional execution should branch on the namespace +/// after the await rather than relying on `import()` being skipped. +/// +/// NOTE: this rewrite only runs on the user's entry source, not on fetched CDN +/// modules. CDN modules that contain dynamic `import()` calls are still left to +/// fail at execution time via `CdnModuleLoader::load`. In practice jsDelivr's +/// `+esm` bundles compile dependencies down to static imports, so this gap is +/// rarely hit; if it becomes a problem, walk_deps can be extended to apply the +/// same rewrite to each fetched module. +fn rewrite_literal_dynamic_imports(source: &str) -> Result<(String, Vec)> { + let cm: Lrc = Default::default(); + let fm = cm.new_source_file( + Lrc::new(FileName::Custom("entry-dyn-rewrite".into())), + source.to_string(), + ); + let mut errors = vec![]; + let mut module = parse_file_as_module( + &fm, + Syntax::Es(EsSyntax::default()), + EsVersion::Es2022, + None, + &mut errors, + ) + .map_err(|e| anyhow!("parse error during dynamic-import scan: {e:?}"))?; + + let mut visitor = DynamicImportRewriter::default(); + module.visit_mut_with(&mut visitor); + + if !visitor.non_literal_spans.is_empty() { + let count = visitor.non_literal_spans.len(); + let first = visitor.non_literal_spans[0]; + let loc = cm.lookup_char_pos(first.lo); + let snippet = cm + .span_to_snippet(first) + .ok() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "".to_string()); + bail!( + "{count} dynamic import(...) call(s) with non-literal specifiers; only string-literal specifiers can be bundled at cache-write time. First offender at line {}, col {}: `{snippet}`", + loc.line, + loc.col_display + 1, + ); + } + + if visitor.specifiers.is_empty() { + return Ok((source.to_string(), Vec::new())); + } + + let mut prepended: Vec = visitor + .specifiers + .iter() + .enumerate() + .map(|(i, spec)| { + ModuleItem::ModuleDecl(ModuleDecl::Import(ImportDecl { + span: Span::default(), + specifiers: vec![ImportSpecifier::Namespace(ImportStarAsSpecifier { + span: Span::default(), + local: Ident::new( + format!("__litDyn_{i}").into(), + Span::default(), + SyntaxContext::empty(), + ), + })], + src: Box::new(Str::from(spec.clone())), + type_only: false, + with: None, + phase: ImportPhase::Evaluation, + })) + }) + .collect(); + prepended.append(&mut module.body); + module.body = prepended; + + let rewritten = codegen_module(&cm, &module)?; + Ok((rewritten, visitor.specifiers)) +} + +#[derive(Default)] +struct DynamicImportRewriter { + /// Specifiers found, in source order. Each occurrence gets its own alias + /// (swc_bundler dedups identical specifiers across the bundle anyway). + specifiers: Vec, + /// Spans of `import(expr)` calls whose argument is not a string literal. + /// We surface these as a hard error from the caller, with line/col + snippet + /// from the first offender so the user knows where to refactor. + non_literal_spans: Vec, +} + +impl VisitMut for DynamicImportRewriter { + fn visit_mut_call_expr(&mut self, call: &mut CallExpr) { + call.visit_mut_children_with(self); + + if !matches!(call.callee, Callee::Import(_)) { + return; + } + if call.args.len() != 1 || call.args[0].spread.is_some() { + // Span the whole call so the snippet captures `import(...)` shape. + self.non_literal_spans.push(call.span); + return; + } + let spec = match &*call.args[0].expr { + Expr::Lit(Lit::Str(s)) => s.value.to_string(), + _ => { + // Span the offending argument expression so the snippet shows + // exactly what couldn't be evaluated at cache-write time. + self.non_literal_spans.push(call.args[0].expr.span()); + return; + } + }; + + let i = self.specifiers.len(); + self.specifiers.push(spec); + + let alias = Ident::new( + format!("__litDyn_{i}").into(), + Span::default(), + SyntaxContext::empty(), + ); + + // Replace the CallExpr in place with: Promise.resolve() + *call = CallExpr { + span: Span::default(), + ctxt: SyntaxContext::empty(), + callee: Callee::Expr(Box::new(Expr::Member(MemberExpr { + span: Span::default(), + obj: Box::new(Expr::Ident(Ident::new( + "Promise".into(), + Span::default(), + SyntaxContext::empty(), + ))), + prop: MemberProp::Ident(IdentName::new("resolve".into(), Span::default())), + }))), + args: vec![ExprOrSpread { + spread: None, + expr: Box::new(Expr::Ident(alias)), + }], + type_args: None, + }; + } +} + /// Parse a JS source and return every static import/re-export specifier. /// Dynamic `import()` calls are intentionally not followed. Any surviving /// dynamic import is rejected by `CdnModuleLoader::load` at execution time, @@ -606,6 +791,100 @@ mod tests { ); } + #[test] + fn rewrite_dynamic_imports_noop_when_none_present() { + let src = "async function main() { return 1; }"; + let (out, specs) = rewrite_literal_dynamic_imports(src).unwrap(); + assert!(specs.is_empty()); + assert_eq!(out, src); + } + + #[test] + fn rewrite_dynamic_imports_inlines_literal_call() { + let src = r#"async function main() { + const ns = await import("zod@3.22.4/+esm"); + return ns.foo; +}"#; + let (out, specs) = rewrite_literal_dynamic_imports(src).unwrap(); + assert_eq!(specs, vec!["zod@3.22.4/+esm"]); + assert!( + out.contains(r#"import * as __litDyn_0 from "zod@3.22.4/+esm""#), + "expected static import alias prepended; got: {out}" + ); + assert!( + out.contains("Promise.resolve(__litDyn_0)"), + "expected dynamic call rewritten; got: {out}" + ); + assert!( + !out.contains(r#"import("zod"#), + "literal import() should be gone; got: {out}" + ); + } + + #[test] + fn rewrite_dynamic_imports_assigns_unique_alias_per_call() { + let src = r#"async function main() { + const a = await import("a@1.0.0/+esm"); + const b = await import("b@1.0.0/+esm"); + return [a, b]; +}"#; + let (out, specs) = rewrite_literal_dynamic_imports(src).unwrap(); + assert_eq!(specs, vec!["a@1.0.0/+esm", "b@1.0.0/+esm"]); + assert!(out.contains("__litDyn_0")); + assert!(out.contains("__litDyn_1")); + } + + #[test] + fn rewrite_dynamic_imports_rejects_non_literal_specifier() { + let src = r#"async function main(spec) { + return await import(spec); +}"#; + let err = rewrite_literal_dynamic_imports(src).unwrap_err(); + let msg = format!("{err:#}"); + assert!( + msg.contains("non-literal"), + "expected non-literal rejection; got: {msg}" + ); + // Error must point at where to refactor: line + col + offending snippet. + assert!(msg.contains("line 2"), "missing line info; got: {msg}"); + assert!(msg.contains("`spec`"), "missing snippet; got: {msg}"); + } + + /// End-to-end through the bundler: a dynamic `import("...")` call must be + /// fully inlined so the output script contains no `import(` of any kind. + #[test] + fn run_swc_bundler_inlines_literal_dynamic_import() { + // Drive bundle_user_code's machinery without actually fetching: build + // the rewritten entry + sources by hand and run run_swc_bundler. + let entry = r#"async function main() { + const ns = await import("https://cdn.jsdelivr.net/npm/dyn@1.0.0/+esm"); + return ns.greet(); +}"#; + let (rewritten, specs) = rewrite_literal_dynamic_imports(entry).unwrap(); + assert_eq!(specs, vec!["https://cdn.jsdelivr.net/npm/dyn@1.0.0/+esm"]); + + let mut sources = HashMap::new(); + sources.insert( + "https://cdn.jsdelivr.net/npm/dyn@1.0.0/+esm".to_string(), + "export function greet() { return 'hi from dyn'; }".to_string(), + ); + + let bundled = run_swc_bundler(rewritten, sources).unwrap(); + assert!( + !bundled.contains("import("), + "leftover dynamic import: {bundled}" + ); + assert!(!bundled.contains("import "), "leftover import: {bundled}"); + assert!( + bundled.contains("hi from dyn"), + "dyn body not inlined: {bundled}" + ); + assert!( + bundled.contains("Promise.resolve"), + "Promise.resolve wrap missing: {bundled}" + ); + } + /// Build a `CdnModuleLoader` whose cache is pre-seeded with the given /// `(url, source)` pairs and whose integrity manifest is empty. With no /// expected hash, the cache-hit path in `fetch_module_bytes` returns the @@ -614,9 +893,10 @@ mod tests { fn cached_loader)>>(entries: I) -> CdnModuleLoader { use std::sync::{Arc, RwLock}; - use crate::cdn_module_loader::ModuleCache; use lit_actions_ext::bindings::LoadedModules; + use crate::cdn_module_loader::ModuleCache; + let cache: ModuleCache = Arc::new(RwLock::new(HashMap::new())); { let mut w = cache.write().unwrap(); @@ -653,7 +933,9 @@ mod tests { (b_url.clone(), b_src.into_bytes()), ]); - let sources = walk_deps(&loader, &[entry_url.clone()]).await.unwrap(); + let sources = walk_deps(&loader, std::slice::from_ref(&entry_url)) + .await + .unwrap(); assert_eq!(sources.len(), 3, "expected 3 modules, got {sources:?}"); assert!(sources.contains_key(&entry_url)); diff --git a/lit-actions/server/import_rewriter.rs b/lit-actions/server/import_rewriter.rs index 5aa9fea9..0bc367d3 100644 --- a/lit-actions/server/import_rewriter.rs +++ b/lit-actions/server/import_rewriter.rs @@ -31,10 +31,12 @@ pub(crate) struct ParsedImport { /// Result of rewriting imports in user code. pub(crate) struct RewriteResult { - /// The user code with import statements removed. Non-test callers currently - /// ignore this (the bundler consumes the original code with imports intact), - /// but tests assert on the stripped form to pin the scanner's behavior. - #[allow(dead_code)] + /// The user code with import statements removed. Populated only under + /// `#[cfg(test)]` to pin the scanner's stripping behavior; production + /// callers consume the original code (the bundler keeps `import`s intact + /// so SWC can map bindings to inlined definitions), so we skip the + /// per-miss rebuild on the hot path. + #[cfg(test)] pub code: String, /// The parsed imports in order of appearance. pub imports: Vec, @@ -53,6 +55,7 @@ pub(crate) fn rewrite_imports(code: &str) -> RewriteResult { let bytes = preamble.as_bytes(); let mut imports = Vec::new(); + #[cfg(test)] let mut ranges_to_remove: Vec<(usize, usize)> = Vec::new(); let mut pos = 0; @@ -115,7 +118,10 @@ pub(crate) fn rewrite_imports(code: &str) -> RewriteResult { if bytes.get(end) == Some(&b'\n') { end += 1; } + #[cfg(test)] ranges_to_remove.push((start, end)); + #[cfg(not(test))] + let _ = start; imports.push(imp); pos = end; continue; @@ -124,16 +130,22 @@ pub(crate) fn rewrite_imports(code: &str) -> RewriteResult { pos += 1; } - // Rebuild code with import statements removed - let mut result = String::with_capacity(code.len()); - let mut last = 0; - for &(start, end) in &ranges_to_remove { - result.push_str(&code[last..start]); - last = end; - } - result.push_str(&code[last..]); + #[cfg(test)] + let result = { + // Rebuild code with import statements removed (test-only; production + // callers do not consume the stripped form). + let mut result = String::with_capacity(code.len()); + let mut last = 0; + for &(start, end) in &ranges_to_remove { + result.push_str(&code[last..start]); + last = end; + } + result.push_str(&code[last..]); + result + }; RewriteResult { + #[cfg(test)] code: result, imports, } diff --git a/lit-actions/server/lib.rs b/lit-actions/server/lib.rs index 83709420..91d933db 100644 --- a/lit-actions/server/lib.rs +++ b/lit-actions/server/lib.rs @@ -2,6 +2,7 @@ mod bundler; pub mod cdn_module_loader; mod import_rewriter; mod runtime; +mod v8_code_cache; pub mod worker_pool; pub mod server; diff --git a/lit-actions/server/runtime.rs b/lit-actions/server/runtime.rs index 30bdc41a..a369b44c 100644 --- a/lit-actions/server/runtime.rs +++ b/lit-actions/server/runtime.rs @@ -3,7 +3,7 @@ use std::mem::size_of; use std::path::{Path, PathBuf}; use std::rc::Rc; use std::sync::Once; -use std::sync::{Arc, RwLock}; +use std::sync::{Arc, LazyLock, RwLock}; use std::thread; use std::time::{Duration, Instant}; @@ -13,6 +13,7 @@ use deno_core::{JsRuntime, v8}; use crate::bundler; use crate::cdn_module_loader::{CdnModuleLoader, LoadedModules, ModuleCache}; use crate::import_rewriter; +use crate::v8_code_cache::SharedV8CodeCache; use deno_resolver::npm::{DenoInNpmPackageChecker, ManagedNpmResolver}; use deno_runtime::{ BootstrapOptions, WorkerLogLevel, @@ -42,6 +43,24 @@ const EXECUTION_TERMINATED_ERROR: &str = "Uncaught Error: execution terminated"; const MAX_ACTION_CODE_CACHE_BYTES: usize = 100 * 1024 * 1024; const ACTION_CODE_CACHE_TTL: Duration = Duration::from_secs(30 * 60); +// Permissions are identical for every execution (deny everything except outbound +// `fetch`), so we build the descriptor parser and the `Permissions` instance +// once and clone per execution instead of rebuilding them inside +// `build_worker_base`. +static PERMISSION_DESC_PARSER: LazyLock>> = + LazyLock::new(|| Arc::new(RuntimePermissionDescriptorParser::new(RealSys))); + +static BASE_PERMISSIONS: LazyLock = LazyLock::new(|| { + Permissions::from_options( + PERMISSION_DESC_PARSER.as_ref(), + &PermissionsOptions { + allow_net: Some(vec![]), + ..Default::default() + }, + ) + .expect("valid permissions") +}); + pub(crate) type ActionCodeCache = Arc>; pub(crate) fn new_action_code_cache() -> ActionCodeCache { @@ -204,6 +223,10 @@ pub(crate) struct PoolSharedState { /// Heap limit in MB. Pool workers always use `DEFAULT_MEMORY_LIMIT_MB`; /// the legacy cold path uses whatever the caller specified. pub memory_limit_mb: usize, + /// Shared V8 code cache wired into every `MainWorker`'s + /// `WorkerServiceOptions`. Pool and legacy paths share the same cache, + /// so a code-cache hit warmed by either path benefits the other. + pub v8_code_cache: SharedV8CodeCache, } /// A `MainWorker` that has been bootstrapped from the V8 snapshot but has @@ -255,20 +278,25 @@ async fn prepare_action_code( code: &str, context: &ActionCodePrepareContext<'_>, ) -> Result { - // Detect the user's static imports. If there are none, the raw code is - // already executable as a script. Otherwise, hand off to the pre-execution - // bundler, which inlines every CDN dep into the entry source so the cached - // form contains no `import` call of any kind — eliminating Deno's - // `resolve`/`load` pipeline from the hot path (CPL-262). + // Detect the user's static imports. If there are none AND no `import(` + // substring is present, the raw code is already executable as a script. + // Otherwise, hand off to the pre-execution bundler, which inlines every + // CDN dep into the entry source so the cached form contains no `import` + // call of any kind — eliminating Deno's `resolve`/`load` pipeline from + // the hot path (CPL-262). // - // `rewrite_imports` is used only to decide whether bundling is necessary - // and to seed the dep-graph walk; the bundler consumes the **original** - // code (with imports intact) so SWC can map each imported binding to its - // inlined definition. - let import_rewriter::RewriteResult { code: _, imports } = - import_rewriter::rewrite_imports(code); - - if imports.is_empty() { + // The `import(` substring check is intentionally permissive — false + // positives (e.g. the literal text inside a string or comment) just route + // through the bundler, which is harmless. The bundler then parses the + // source with SWC and either inlines literal `import("...")` calls or + // surfaces a clear error for non-literal dynamic imports. + // + // `rewrite_imports` is used only to seed the static-import dep walk; the + // bundler consumes the **original** code (with imports intact) so SWC can + // map each imported binding to its inlined definition. + let import_rewriter::RewriteResult { imports, .. } = import_rewriter::rewrite_imports(code); + + if imports.is_empty() && !code.contains("import(") { return Ok(CachedActionCode { code: code.to_string(), loaded_modules: Vec::new(), @@ -382,7 +410,7 @@ pub(crate) fn build_worker_base(shared: &PoolSharedState) -> Result Result, RealSys> { @@ -420,7 +445,7 @@ pub(crate) fn build_worker_base(shared: &PoolSharedState) -> Result, http_client: Arc, ) -> Result<()> { @@ -614,6 +640,7 @@ pub(crate) async fn execute_js( lockfile_path, http_client, memory_limit_mb: memory_limit_mb.unwrap_or(DEFAULT_MEMORY_LIMIT_MB), + v8_code_cache, }); let mut prepared = build_worker_base(&shared) @@ -786,11 +813,24 @@ async fn execute_with_worker_inner( .await? }; record_loaded_modules(&loaded_modules, &cached_code); - let code = cached_code.to_executable_code(&js_func_params); + let user_code = cached_code.to_executable_code(&js_func_params); + + // Route user code through op_eval_context (via the __litEvalCached helper + // baked into 99_patches.js) so Deno's `eval_context_code_cache_cbs` — + // wired to our SharedV8CodeCache — gets to see the source. execute_script + // bypasses that cache entirely, forcing V8 to reparse and recompile the + // bundled action on every request. The outer stub still parses per + // execution, but its body is one string literal, so V8 only pays for + // source-string scanning rather than compiling the bundled action body + // (CPL-264). + let user_code_literal = + serde_json::to_string(&user_code).context("Could not serialize user code for eval stub")?; + let stub = + format!("__litEvalCached({user_code_literal}, \"file:///user_provided_script.js\");"); if let Err(e) = worker .js_runtime - .execute_script("", code) + .execute_script("", stub) { // Delay error handling if the controller has terminated the isolate, // in which case halt_isolate_rx will tell the reason. @@ -1051,6 +1091,7 @@ mod tests { lockfile_path: None, http_client: CdnModuleLoader::build_http_client(), memory_limit_mb: DEFAULT_MEMORY_LIMIT_MB, + v8_code_cache: crate::v8_code_cache::new_v8_code_cache(), }; let w1 = build_worker_base(&shared).expect("first worker built"); diff --git a/lit-actions/server/server.rs b/lit-actions/server/server.rs index e0bcd788..709ed7dd 100644 --- a/lit-actions/server/server.rs +++ b/lit-actions/server/server.rs @@ -7,6 +7,7 @@ use crate::cdn_module_loader::{CdnModuleLoader, ModuleCache}; use crate::runtime::{ ActionCodeCache, DEFAULT_MEMORY_LIMIT_MB, PoolSharedState, new_action_code_cache, }; +use crate::v8_code_cache::{SharedV8CodeCache, new_v8_code_cache}; use crate::worker_pool::{WorkItem, WorkerPool}; use anyhow::Result; @@ -48,6 +49,9 @@ pub struct Server { module_cache: ModuleCache, /// Shared cache for action code prepared from the incoming code CID. action_code_cache: ActionCodeCache, + /// Shared V8 code cache: handed to each MainWorker so compiled bytecode + /// for cached actions is reused instead of being recompiled every run. + v8_code_cache: SharedV8CodeCache, /// Path to integrity.lock file for TOFU auto-pinning. lockfile_path: Option, /// Shared HTTP client for CDN fetches (connection pooling across executions). @@ -73,6 +77,7 @@ impl Server { module_cache: ModuleCache, lockfile_path: Option, http_client: Arc, + v8_code_cache: SharedV8CodeCache, ) -> Arc { let shared = Arc::new(PoolSharedState { integrity_manifest, @@ -81,6 +86,7 @@ impl Server { lockfile_path, http_client, memory_limit_mb: DEFAULT_MEMORY_LIMIT_MB, + v8_code_cache, }); WorkerPool::new(pool_size_from_env(), shared) } @@ -93,12 +99,14 @@ impl Server { ) -> Self { let module_cache: ModuleCache = Arc::new(RwLock::new(HashMap::new())); let http_client = CdnModuleLoader::build_http_client(); + let v8_code_cache = new_v8_code_cache(); let pool = Self::build_pool( integrity_manifest.clone(), strict_imports, module_cache.clone(), lockfile_path.clone(), http_client.clone(), + v8_code_cache.clone(), ); Self { server_type: ServerType::Production, @@ -106,6 +114,7 @@ impl Server { strict_imports, module_cache, action_code_cache: new_action_code_cache(), + v8_code_cache, lockfile_path, http_client, pool, @@ -116,12 +125,14 @@ impl Server { let module_cache: ModuleCache = Arc::new(RwLock::new(HashMap::new())); let http_client = CdnModuleLoader::build_http_client(); let integrity_manifest = Arc::new(RwLock::new(HashMap::new())); + let v8_code_cache = new_v8_code_cache(); let pool = Self::build_pool( integrity_manifest.clone(), false, module_cache.clone(), None, http_client.clone(), + v8_code_cache.clone(), ); Self { server_type: ServerType::Test, @@ -129,6 +140,7 @@ impl Server { strict_imports: false, module_cache, action_code_cache: new_action_code_cache(), + v8_code_cache, lockfile_path: None, http_client, pool, @@ -147,6 +159,7 @@ struct DispatchState { strict_imports: bool, module_cache: ModuleCache, action_code_cache: ActionCodeCache, + v8_code_cache: SharedV8CodeCache, lockfile_path: Option, http_client: Arc, } @@ -160,6 +173,7 @@ impl DispatchState { strict_imports: server.strict_imports, module_cache: server.module_cache.clone(), action_code_cache: server.action_code_cache.clone(), + v8_code_cache: server.v8_code_cache.clone(), lockfile_path: server.lockfile_path.clone(), http_client: server.http_client.clone(), } @@ -352,6 +366,7 @@ fn spawn_legacy(dispatch: &DispatchState, work: WorkItem, memory_limit_mb: Optio let module_cache = dispatch.module_cache.clone(); let lockfile_path = dispatch.lockfile_path.clone(); let http_client = dispatch.http_client.clone(); + let v8_code_cache = dispatch.v8_code_cache.clone(); std::thread::spawn(move || { if work.request_id.is_some() || work.correlation_id.is_some() { @@ -389,6 +404,7 @@ fn spawn_legacy(dispatch: &DispatchState, work: WorkItem, memory_limit_mb: Optio strict_imports, module_cache, action_code_cache, + v8_code_cache, lockfile_path, http_client, ) diff --git a/lit-actions/server/v8_code_cache.rs b/lit-actions/server/v8_code_cache.rs new file mode 100644 index 00000000..2d7cd9bd --- /dev/null +++ b/lit-actions/server/v8_code_cache.rs @@ -0,0 +1,302 @@ +//! Process-wide, in-memory V8 code cache for compiled user scripts. +//! +//! When a Lit Action's bundled JS lands on the hot path, V8 otherwise +//! re-parses and re-compiles it on every execution — even though the source +//! is identical across executions once it's in the `ActionCodeCache`. +//! +//! `deno_runtime` exposes a `CodeCache` trait that lets us hand V8 the +//! already-compiled bytecode on subsequent runs. We keep it in memory with +//! the same 100MB bound as the action-code cache, so the two caches together +//! give "full caching of JavaScript" for cached actions (CPL-264). + +use std::borrow::Borrow; +use std::collections::HashMap; +use std::hash::{Hash, Hasher}; +use std::mem::size_of; +use std::sync::{Arc, RwLock}; + +use deno_core::ModuleSpecifier; +use deno_runtime::code_cache::{CodeCache, CodeCacheType}; + +const MAX_V8_CODE_CACHE_BYTES: usize = 100 * 1024 * 1024; + +pub type SharedV8CodeCache = Arc; + +pub fn new_v8_code_cache() -> SharedV8CodeCache { + Arc::new(V8CodeCache::default()) +} + +#[derive(Default)] +pub struct V8CodeCache { + inner: RwLock, +} + +#[derive(Default)] +struct V8CodeCacheState { + entries: HashMap>, + total_bytes: usize, +} + +#[derive(Clone)] +struct V8CodeCacheKey { + specifier: String, + kind: u8, + source_hash: u64, +} + +/// Borrowed view of a key. Used so `get_sync` can probe the map without +/// allocating a `String` for the specifier on every lookup (V8 calls into +/// the code cache once per ES module load). +struct V8CodeCacheKeyRef<'a> { + specifier: &'a str, + kind: u8, + source_hash: u64, +} + +/// Glue trait that lets `V8CodeCacheKey` and `V8CodeCacheKeyRef<'_>` share +/// a single `Hash`/`Eq` impl via a trait object, enabling `HashMap::get` +/// against a borrowed key without allocation. +trait V8CodeCacheKeyAccess { + fn parts(&self) -> (&str, u8, u64); +} + +impl V8CodeCacheKeyAccess for V8CodeCacheKey { + fn parts(&self) -> (&str, u8, u64) { + (&self.specifier, self.kind, self.source_hash) + } +} + +impl V8CodeCacheKeyAccess for V8CodeCacheKeyRef<'_> { + fn parts(&self) -> (&str, u8, u64) { + (self.specifier, self.kind, self.source_hash) + } +} + +impl Hash for dyn V8CodeCacheKeyAccess + '_ { + fn hash(&self, state: &mut H) { + let (s, k, h) = self.parts(); + s.hash(state); + k.hash(state); + h.hash(state); + } +} + +impl PartialEq for dyn V8CodeCacheKeyAccess + '_ { + fn eq(&self, other: &Self) -> bool { + self.parts() == other.parts() + } +} + +impl Eq for dyn V8CodeCacheKeyAccess + '_ {} + +// HashMap calls `Hash`/`PartialEq` on the stored owned key when computing +// buckets and resolving collisions. Forward through the trait object so the +// owned and borrowed shapes hash + compare identically. +impl Hash for V8CodeCacheKey { + fn hash(&self, state: &mut H) { + (self as &dyn V8CodeCacheKeyAccess).hash(state); + } +} + +impl PartialEq for V8CodeCacheKey { + fn eq(&self, other: &Self) -> bool { + (self as &dyn V8CodeCacheKeyAccess) == (other as &dyn V8CodeCacheKeyAccess) + } +} + +impl Eq for V8CodeCacheKey {} + +impl<'a> Borrow for V8CodeCacheKey { + fn borrow(&self) -> &(dyn V8CodeCacheKeyAccess + 'a) { + self + } +} + +fn kind_to_u8(kind: CodeCacheType) -> u8 { + match kind { + CodeCacheType::EsModule => 0, + CodeCacheType::Script => 1, + } +} + +fn entry_size(specifier: &str, data_len: usize) -> usize { + size_of::() + specifier.len() + data_len +} + +impl CodeCache for V8CodeCache { + fn get_sync( + &self, + specifier: &ModuleSpecifier, + code_cache_type: CodeCacheType, + source_hash: u64, + ) -> Option> { + let inner = self.inner.read().ok()?; + let key_ref = V8CodeCacheKeyRef { + specifier: specifier.as_str(), + kind: kind_to_u8(code_cache_type), + source_hash, + }; + inner + .entries + .get(&key_ref as &dyn V8CodeCacheKeyAccess) + .cloned() + } + + fn set_sync( + &self, + specifier: ModuleSpecifier, + code_cache_type: CodeCacheType, + source_hash: u64, + data: &[u8], + ) { + let key = V8CodeCacheKey { + specifier: specifier.to_string(), + kind: kind_to_u8(code_cache_type), + source_hash, + }; + + let new_size = entry_size(&key.specifier, data.len()); + if new_size > MAX_V8_CODE_CACHE_BYTES { + return; + } + + let Ok(mut inner) = self.inner.write() else { + return; + }; + + // Probe-then-decide: never drop the previously-cached bytecode unless + // we're certain the replacement will be admitted. Otherwise a near-full + // cache could turn a cache hit into a permanent miss for that key. + let old_size = inner + .entries + .get(&key) + .map(|old| entry_size(&key.specifier, old.len())) + .unwrap_or(0); + let bytes_after_remove = inner.total_bytes.saturating_sub(old_size); + + if bytes_after_remove + new_size > MAX_V8_CODE_CACHE_BYTES { + if old_size > 0 { + // Replacement wouldn't fit. Keep the existing entry rather + // than dropping a working cache hit for a write we can't honor. + return; + } + // Fresh insert that doesn't fit at the 100MB ceiling. There's no + // LRU yet (CPL-264 follow-up), so do the simplest recovery: wipe + // the cache and let the live action set repopulate naturally. + // Without this, the cache becomes permanently full and never + // accepts another action's bytecode. + inner.entries.clear(); + inner.total_bytes = 0; + } else if old_size > 0 { + inner.entries.remove(&key); + inner.total_bytes = bytes_after_remove; + } + + inner.total_bytes += new_size; + inner.entries.insert(key, data.to_vec()); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn specifier() -> ModuleSpecifier { + ModuleSpecifier::parse("file:///test.js").unwrap() + } + + fn specifier_n(i: usize) -> ModuleSpecifier { + ModuleSpecifier::parse(&format!("file:///test{i}.js")).unwrap() + } + + #[test] + fn round_trip() { + let cache = V8CodeCache::default(); + let s = specifier(); + assert!(cache.get_sync(&s, CodeCacheType::Script, 42).is_none()); + cache.set_sync(s.clone(), CodeCacheType::Script, 42, b"bytecode"); + assert_eq!( + cache.get_sync(&s, CodeCacheType::Script, 42).as_deref(), + Some(b"bytecode".as_ref()) + ); + } + + #[test] + fn kind_and_hash_are_part_of_key() { + let cache = V8CodeCache::default(); + let s = specifier(); + cache.set_sync(s.clone(), CodeCacheType::Script, 1, b"a"); + assert!(cache.get_sync(&s, CodeCacheType::EsModule, 1).is_none()); + assert!(cache.get_sync(&s, CodeCacheType::Script, 2).is_none()); + } + + #[test] + fn set_replaces_existing_entry() { + let cache = V8CodeCache::default(); + let s = specifier(); + cache.set_sync(s.clone(), CodeCacheType::Script, 1, b"first"); + cache.set_sync(s.clone(), CodeCacheType::Script, 1, b"second"); + assert_eq!( + cache.get_sync(&s, CodeCacheType::Script, 1).as_deref(), + Some(b"second".as_ref()) + ); + } + + /// Replacement-larger-than-remaining must NOT drop the existing cached + /// entry — otherwise a near-full cache turns a cache hit into a + /// permanent miss for that key. + #[test] + fn replacement_too_large_keeps_existing_entry() { + let cache = V8CodeCache::default(); + // Force a near-full state via a large initial entry. + let big = vec![0u8; MAX_V8_CODE_CACHE_BYTES - 1024]; + let s1 = specifier_n(1); + cache.set_sync(s1.clone(), CodeCacheType::Script, 1, &big); + assert!(cache.get_sync(&s1, CodeCacheType::Script, 1).is_some()); + + // Insert a small entry, then attempt to replace it with one that + // would overflow the budget when summed with the existing big entry. + let s2 = specifier_n(2); + cache.set_sync(s2.clone(), CodeCacheType::Script, 1, b"small"); + let too_big_replacement = vec![0u8; 4096]; + cache.set_sync(s2.clone(), CodeCacheType::Script, 1, &too_big_replacement); + + // Existing cache hit for s2 must be preserved, not dropped. + assert_eq!( + cache.get_sync(&s2, CodeCacheType::Script, 1).as_deref(), + Some(b"small".as_ref()) + ); + } + + /// Once the cache is at the 100MB ceiling, a fresh insert must trigger + /// recovery (clear), not become a permanent rejection. + #[test] + fn fresh_insert_at_ceiling_recovers() { + let cache = V8CodeCache::default(); + let big = vec![0u8; MAX_V8_CODE_CACHE_BYTES - 1024]; + cache.set_sync(specifier_n(1), CodeCacheType::Script, 1, &big); + assert!( + cache + .get_sync(&specifier_n(1), CodeCacheType::Script, 1) + .is_some() + ); + + // Fresh key that would push past MAX. Existing entries get evicted + // and the fresh entry is admitted. + let medium = vec![0u8; 8 * 1024]; + cache.set_sync(specifier_n(2), CodeCacheType::Script, 1, &medium); + + assert!( + cache + .get_sync(&specifier_n(1), CodeCacheType::Script, 1) + .is_none(), + "old entry should have been evicted" + ); + assert!( + cache + .get_sync(&specifier_n(2), CodeCacheType::Script, 1) + .is_some(), + "new entry should have been admitted after recovery" + ); + } +} diff --git a/lit-actions/server/worker_pool.rs b/lit-actions/server/worker_pool.rs index ea51fbb3..6e2144cf 100644 --- a/lit-actions/server/worker_pool.rs +++ b/lit-actions/server/worker_pool.rs @@ -463,6 +463,7 @@ mod tests { lockfile_path: None, http_client: CdnModuleLoader::build_http_client(), memory_limit_mb: DEFAULT_MEMORY_LIMIT_MB, + v8_code_cache: crate::v8_code_cache::new_v8_code_cache(), }) } diff --git a/lit-actions/tests/it.rs b/lit-actions/tests/it.rs index 9d593c14..1b17c47b 100644 --- a/lit-actions/tests/it.rs +++ b/lit-actions/tests/it.rs @@ -281,6 +281,36 @@ async fn lit_namespace_protection(mut client: TestClient) { assert!(client.received::().success); } +/// User code must not see the internal `__litEvalCached` helper that wraps +/// the action source for V8's eval-context code cache (CPL-264). The helper +/// deletes itself from globalThis as its first action; if that ever regresses, +/// user code would gain a string-eval primitive that bypasses +/// `--disallow-code-generation-from-strings`. +#[rstest] +#[tokio::test] +async fn lit_eval_cached_is_hidden_from_user_code(mut client: TestClient) { + let code = indoc! {r#" + async function main() { + console.log( + typeof __litEvalCached, + typeof globalThis.__litEvalCached, + ) + } + "#}; + + client + .respond_with(PrintResponse {}) + .execute_js(code) + .await + .unwrap(); + + assert_eq!( + client.received::().message.trim_end(), + "undefined undefined" + ); + assert!(client.received::().success); +} + #[rstest] #[tokio::test] async fn js_params(mut client: TestClient) { @@ -536,13 +566,19 @@ async fn reference_error(mut client: TestClient) { .execute_js("async function main() { nonexisting_function() }") .await; + // User code now runs through `__litEvalCached` (op_eval_context) so V8's + // script code cache is reachable for the bundled action (CPL-264). Side + // effects on error output: the script name is the URL specifier used by + // op_eval_context, and two wrapper frames appear at the tail. assert_eq!( res.unwrap_err().to_string(), indoc! {r#" Uncaught (in promise) ReferenceError: nonexisting_function is not defined - at main (:2:33) - at :6:28 - at :10:11 + at main (file:///user_provided_script.js:2:33) + at file:///user_provided_script.js:6:28 + at file:///user_provided_script.js:10:11 + at globalThis.__litEvalCached (ext:lit_actions/99_patches.js:52:21) + at :1:1 "#} .trim() ); @@ -560,13 +596,17 @@ async fn throw_error(mut client: TestClient) { "#}; let res = client.execute_js(code).await; + // See `reference_error` for why the stack format differs from + // pre-CPL-264 output. assert_eq!( res.unwrap_err().to_string(), indoc! {r#" Uncaught (in promise) Error: boom - at main (:3:7) - at :9:28 - at :13:11 + at main (file:///user_provided_script.js:3:7) + at file:///user_provided_script.js:9:28 + at file:///user_provided_script.js:13:11 + at globalThis.__litEvalCached (ext:lit_actions/99_patches.js:52:21) + at :1:1 "#} .trim(), ); @@ -585,9 +625,11 @@ async fn throw_error(mut client: TestClient) { res.unwrap_err().to_string(), indoc! {r#" Uncaught (in promise) Error: boom - at main (:3:11) - at :9:28 - at :13:11 + at main (file:///user_provided_script.js:3:11) + at file:///user_provided_script.js:9:28 + at file:///user_provided_script.js:13:11 + at globalThis.__litEvalCached (ext:lit_actions/99_patches.js:52:21) + at :1:1 "#} .trim(), );