diff --git a/.gitignore b/.gitignore index 37233c0..a89f9a9 100644 --- a/.gitignore +++ b/.gitignore @@ -134,8 +134,7 @@ elfconv-v* !tests/browser/*.js !tests/browser/*.html !tests/browser/package.json -tests/browser/wasm-out/ -tests/browser/wasm-out-bash/ +tests/browser/wasm-out*/ tests/browser/test-results/ # AI diff --git a/browser/js-kernel.js b/browser/js-kernel.js index bc5ca41..c35d491 100644 --- a/browser/js-kernel.js +++ b/browser/js-kernel.js @@ -1793,6 +1793,195 @@ var Module = (() => { processData(url) } }; + + // Expose helpers for loadPackage (preload mount support) + Module["addRunDependency"] = addRunDependency; + Module["removeRunDependency"] = removeRunDependency; + Module["FS_createPath"] = (parent, path, canRead, canWrite) => { + var fullPath = parent === "/" ? "/" + path : parent + "/" + path; + var parts = fullPath.split("/").filter(p => p); + var currentPath = ""; + for (var i = 0; i < parts.length; i++) { + currentPath += "/" + parts[i]; + try { + FS.mkdir(currentPath); + } catch (e) { + if (e.errno !== 20) throw e; + } + } + }; + Module["FS_createDataFile"] = (parent, name, data, canRead, canWrite, canOwn) => { + var path = name; + if (parent) { + parent = typeof parent == "string" ? parent : FS.getPath(parent); + path = name ? PATH.join2(parent, name) : parent + } + var mode = FS_getMode(canRead, canWrite); + try { + var existing = FS.lookupPath(path); + if (existing && existing.node) { + FS.unlink(path); + } + } catch (e) { /* node doesn't exist, fine */ } + var node = FS.create(path, mode); + if (data) { + if (typeof data == "string") { + var arr = new Array(data.length); + for (var i = 0, len = data.length; i < len; ++i) arr[i] = data.charCodeAt(i); + data = arr + } + if (node.node_ops && node.node_ops.setattr) { + node.contents = new Uint8Array(data); + node.usedBytes = data.length; + node.timestamp = Date.now(); + } else { + FS.chmod(node, mode | 146); + var stream = FS.open(node, 577); + FS.write(stream, data, 0, data.length, 0, canOwn); + FS.close(stream.fd); + FS.chmod(node, mode); + } + } + }; + + // loadPackage: fetch a .data file and extract preloaded files into MEMFS + Module["expectedDataFileDownloads"] ??= 0; + Module["loadPackage"] = function (metadata) { + var PACKAGE_NAME = metadata["packageDataName"]; + var REMOTE_PACKAGE_BASE = PACKAGE_NAME; + var REMOTE_PACKAGE_NAME = Module["locateFile"] ? Module["locateFile"](REMOTE_PACKAGE_BASE, "") : REMOTE_PACKAGE_BASE; + var REMOTE_PACKAGE_SIZE = metadata["remote_package_size"]; + + function fetchRemotePackage(packageName, packageSize, callback, errback) { + Module["dataFileDownloads"] ??= {}; + fetch(packageName).then(response => { + if (!response.ok) { + errback?.(new Error(`${response.status}: ${response.url}`)); + return; + } + if (!response.body && response.arrayBuffer) { + return response.arrayBuffer().then(callback, errback) + } + const reader = response.body.getReader(); + const chunks = []; + const headers = response.headers; + const total = Number(headers.get("Content-Length") ?? packageSize); + let loaded = 0; + + const handleChunk = ({ done, value }) => { + if (!done) { + chunks.push(value); + loaded += value.length; + Module["dataFileDownloads"][packageName] = { + loaded, + total + }; + let totalLoaded = 0; + let totalSize = 0; + for (const download of Object.values(Module["dataFileDownloads"])) { + totalLoaded += download.loaded; + totalSize += download.total + } + Module["setStatus"]?.(`Downloading data... (${totalLoaded}/${totalSize})`); + return reader.read().then(handleChunk, errback); + } else { + const packageData = new Uint8Array(chunks.map(c => c.length).reduce((a, b) => a + b, 0)); + let offset = 0; + for (const chunk of chunks) { + packageData.set(chunk, offset); + offset += chunk.length + } + callback(packageData.buffer); + } + }; + + Module["setStatus"]?.("Downloading data..."); + reader.read().then(handleChunk, errback); + }).catch(cause => { + errback?.(new Error(`Network Error: ${packageName}`, { cause })); + }) + } + + function handleError(error) { + console.error("package error:", error) + } + var fetchedCallback = null; + var fetched = Module["getPreloadedPackage"] ? Module["getPreloadedPackage"](REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE) : null; + if (!fetched) fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE, data => { + if (fetchedCallback) { + fetchedCallback(data); + fetchedCallback = null + } else { + fetched = data + } + }, handleError); + + var runWithFSExecuted = false; + function runWithFS(Module) { + if (runWithFSExecuted) { + return; + } + runWithFSExecuted = true; + + function assert(check, msg) { + if (!check) throw msg + (new Error).stack + } + + function DataRequest(start, end, audio) { + this.start = start; + this.end = end; + this.audio = audio + } + DataRequest.prototype = { + requests: {}, + open: function (mode, name) { + this.name = name; + this.requests[name] = this; + Module["addRunDependency"](`fp ${this.name}`) + }, + send: function () { }, + onload: function () { + var byteArray = this.byteArray.subarray(this.start, this.end); + this.finish(byteArray) + }, + finish: function (byteArray) { + var that = this; + Module["FS_createDataFile"](this.name, null, byteArray, true, true, true); + Module["removeRunDependency"](`fp ${that.name}`); + this.requests[this.name] = null + } + }; + var files = metadata["files"]; + for (var i = 0; i < files.length; ++i) { + new DataRequest(files[i]["start"], files[i]["end"], files[i]["audio"] || 0).open("GET", files[i]["filename"]) + } + + function processPackageData(arrayBuffer) { + assert(arrayBuffer, "Loading data file failed."); + assert(arrayBuffer.constructor.name === ArrayBuffer.name, "bad input to processPackageData"); + var byteArray = new Uint8Array(arrayBuffer); + DataRequest.prototype.byteArray = byteArray; + var files = metadata["files"]; + for (var i = 0; i < files.length; ++i) { + DataRequest.prototype.requests[files[i].filename].onload() + } + Module["removeRunDependency"]("datafile_" + PACKAGE_NAME) + } + Module["addRunDependency"]("datafile_" + PACKAGE_NAME); + Module["preloadResults"] ??= {}; + Module["preloadResults"][PACKAGE_NAME] = { + fromCache: false + }; + if (fetched) { + processPackageData(fetched); + fetched = null + } else { + fetchedCallback = processPackageData + } + } + runWithFS(Module); + }; + var FS_modeStringToFlags = str => { var flagModes = { r: 0, @@ -5166,8 +5355,27 @@ var Module = (() => { return } - function doRun() { + async function doRun() { initRuntime(); + // Load preloaded files from manifest if present + try { + var manifestUrl = Module["preloadManifestUrl"] || "preload-manifest.json"; + var resp = await fetch(manifestUrl); + if (resp.ok) { + var manifest = await resp.json(); + for (var pkg of manifest) { + if (pkg.directories) { + for (var dir of pkg.directories) { + Module["FS_createPath"]("/", dir.substring(1), true, true); + } + } + Module["loadPackage"](pkg); + if (runDependencies > 0) { + await new Promise(resolve => { dependenciesFulfilled = resolve; }); + } + } + } + } catch (e) { /* no manifest = no preload, normal */ } preMain(); readyPromiseResolve(Module); postRun() diff --git a/runtime/syscalls/SyscallBrowser.cpp b/runtime/syscalls/SyscallBrowser.cpp index 8db8797..4013d2b 100644 --- a/runtime/syscalls/SyscallBrowser.cpp +++ b/runtime/syscalls/SyscallBrowser.cpp @@ -753,8 +753,6 @@ EM_JS(uint32_t, ___syscall_sendfile, - x86-64: syscall NR: rax, return: rax, args: rdi, rsi, rdx, r10, r8, r9 ref: https://blog.rchapman.org/posts/Linux_System_Call_Table_for_x86_64/ */ -uint64_t CNT = 0; - void RuntimeManager::SVCBrowserCall(uint8_t *arena_ptr) { errno = 0; #if defined(ELFC_RUNTIME_SYSCALL_DEBUG) diff --git a/scripts/elfconv.sh b/scripts/elfconv.sh index e1dc907..5cff51a 100755 --- a/scripts/elfconv.sh +++ b/scripts/elfconv.sh @@ -40,7 +40,7 @@ setting() { CLANGFLAGS="${OPTFLAGS} -std=c++20 -static -I${ROOT_DIR}/backend/remill/include -I${ROOT_DIR}" # emscripten EMCC=em++ - EMCC_OPTION="-sASYNCIFY=0 -sINITIAL_MEMORY=536870912 -sSTACK_SIZE=16MB -sPTHREAD_POOL_SIZE=0 -pthread -sALLOW_MEMORY_GROWTH -sEXPORT_ES6 -sENVIRONMENT=web,worker $PRELOAD" + EMCC_OPTION="-sASYNCIFY=0 -sINITIAL_MEMORY=536870912 -sSTACK_SIZE=16MB -sPTHREAD_POOL_SIZE=0 -pthread -sALLOW_MEMORY_GROWTH -sEXPORT_ES6 -sENVIRONMENT=web,worker" EMCCFLAGS="${OPTFLAGS} -I${ROOT_DIR}/backend/remill/include -I${ROOT_DIR}" # wasi WASISDKCC="${WASI_SDK_PATH}/bin/clang++" @@ -154,9 +154,9 @@ prepare_js() { exit 1 fi - # --preload-file generates the mapped data file `exe.data`. - if [[ -f "exe.data" ]]; then - cp -p exe.data ${BROWSER_DIR} + # copy preload manifest and data files if they exist + if [[ -f "${CUR_DIR}/preload-manifest.json" ]]; then + echo -e "[${GREEN}INFO${NC}] Preload manifest found, copying data files." fi rm "${CUR_DIR}/process.js" @@ -233,13 +233,8 @@ main() { *-wasm) RUNTIME_MACRO="${RUNTIME_MACRO} -DTARGET_IS_BROWSER=1 -DELFNAME=\"${ELFNAME}\"" MAINOBJ="${CUR_DIR}/${ELFNAME}.wasm.o" - PRELOAD= MAINGENJS="${CUR_DIR}/${ELFNAME}.generated.js" - - if [[ -n "${MOUNT_SETTING}" ]]; then - PRELOAD="--preload-file ${MOUNT_SETTING}" - fi - + if [[ -z "${NO_COMPILED}" ]]; then ${EMCC} ${EMCCFLAGS} ${RUNTIME_MACRO} -c ${MAINIR} -o ${MAINOBJ} echo -e "[${GREEN}INFO${NC}] built ${MAINOBJ}" @@ -247,8 +242,14 @@ main() { echo -e "[${GREEN}INFO${NC}] NO_COPMILED is ON." fi - # creates wasm - ${EMCC} ${EMCCFLAGS} ${RUNTIME_MACRO} ${EMCC_OPTION} ${PRELOAD} -o ${MAINGENJS} ${MAINOBJ} ${ELFCONV_COMMON_RUNTIMES} ${RUNTIME_DIR}/syscalls/SyscallBrowser.cpp + # creates wasm (no --preload-file; preloading is handled by pack-preload.py + js-kernel.js) + ${EMCC} ${EMCCFLAGS} ${RUNTIME_MACRO} ${EMCC_OPTION} -o ${MAINGENJS} ${MAINOBJ} ${ELFCONV_COMMON_RUNTIMES} ${RUNTIME_DIR}/syscalls/SyscallBrowser.cpp + + # generate preload .data and manifest if MOUNT_SETTING is specified + if [[ -n "${MOUNT_SETTING}" ]]; then + echo -e "[${GREEN}INFO${NC}] Packing preload data for: ${MOUNT_SETTING}" + python3 "${ROOT_DIR}/scripts/pack-preload.py" ${MOUNT_SETTING} -o "${CUR_DIR}" + fi echo -e "[${GREEN}INFO${NC}] built ${ELFNAME}.wasm and ${ELFNAME}.js and ${ELFNAME}.html." # prepare Js and Wasm diff --git a/scripts/pack-preload.py b/scripts/pack-preload.py new file mode 100644 index 0000000..82421d8 --- /dev/null +++ b/scripts/pack-preload.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +""" +Pack host directories into .data files with a preload manifest for js-kernel.js. + +Usage: + python3 pack-preload.py @ [-o ] + +Example: + python3 pack-preload.py /root/cpython/Lib@/lib/python3.15 -o ./out + +Generates: + /.data - concatenated binary blob + /preload-manifest.json - metadata for js-kernel.js loadPackage +""" + +import argparse +import json +import os +import sys + + +def parse_mount_spec(spec): + """Parse 'host_dir@mount_point' into (host_dir, mount_point).""" + if "@" not in spec: + print(f"Error: mount spec must be 'host_dir@mount_point', got: {spec}", file=sys.stderr) + sys.exit(1) + host_dir, mount_point = spec.rsplit("@", 1) + if not os.path.isdir(host_dir): + print(f"Error: host directory not found: {host_dir}", file=sys.stderr) + sys.exit(1) + if not mount_point.startswith("/"): + print(f"Error: mount point must be absolute path: {mount_point}", file=sys.stderr) + sys.exit(1) + return host_dir, mount_point + + +def collect_files(host_dir, mount_point): + """Walk host_dir and collect (host_path, mount_path) pairs for regular files.""" + result = [] + for dirpath, dirnames, filenames in os.walk(host_dir): + dirnames[:] = [d for d in dirnames if not d.startswith(".")] + for fname in filenames: + if fname.startswith("."): + continue + host_path = os.path.join(dirpath, fname) + if not os.path.isfile(host_path): + continue + rel = os.path.relpath(host_path, host_dir) + mount_path = mount_point.rstrip("/") + "/" + rel + result.append((host_path, mount_path)) + result.sort(key=lambda x: x[1]) + return result + + +def collect_directories(host_dir, mount_point): + """Walk host_dir and collect all directories (including empty ones).""" + dirs = set() + dirs.add(mount_point.rstrip("/") or "/") + for dirpath, dirnames, filenames in os.walk(host_dir): + dirnames[:] = [d for d in dirnames if not d.startswith(".")] + rel = os.path.relpath(dirpath, host_dir) + if rel == ".": + continue + mount_dir = mount_point.rstrip("/") + "/" + rel + dirs.add(mount_dir) + all_dirs = set() + for d in dirs: + while d and d != "/": + all_dirs.add(d) + d = os.path.dirname(d) + return sorted(all_dirs) + + +def pack_data(file_pairs, output_path): + """Concatenate all files into a single .data blob, return file metadata.""" + files_meta = [] + offset = 0 + with open(output_path, "wb") as out: + for host_path, mount_path in file_pairs: + size = os.path.getsize(host_path) + with open(host_path, "rb") as f: + while True: + chunk = f.read(1024 * 1024) + if not chunk: + break + out.write(chunk) + files_meta.append({ + "filename": mount_path, + "start": offset, + "end": offset + size, + }) + offset += size + return files_meta, offset + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("mount_specs", nargs="+", + help="Mount specs: host_dir@mount_point") + parser.add_argument("-o", "--output-dir", required=True, + help="Output directory for .data and manifest") + args = parser.parse_args() + + output_dir = args.output_dir + if not os.path.isdir(output_dir): + os.makedirs(output_dir, exist_ok=True) + + manifest = [] + + for spec in args.mount_specs: + host_dir, mount_point = parse_mount_spec(spec) + file_pairs = collect_files(host_dir, mount_point) + directories = collect_directories(host_dir, mount_point) + + if not file_pairs and not directories: + print(f"Warning: no files found in {host_dir}", file=sys.stderr) + continue + + data_name = mount_point.strip("/").replace("/", "_") + ".data" + data_path = os.path.join(output_dir, data_name) + + files_meta, total_size = pack_data(file_pairs, data_path) + + entry = { + "packageDataName": data_name, + "remote_package_size": total_size, + "directories": directories, + "files": files_meta, + } + manifest.append(entry) + + print(f"Packed {len(file_pairs)} files, {len(directories)} dirs ({total_size} bytes) -> {data_name}") + + manifest_path = os.path.join(output_dir, "preload-manifest.json") + with open(manifest_path, "w") as f: + json.dump(manifest, f) + + print(f"Manifest written to {manifest_path}") + + +if __name__ == "__main__": + main() diff --git a/tests/browser/bash-busybox.spec.js b/tests/browser/bash-busybox.spec.js index 27f2ff0..6b1fc6c 100644 --- a/tests/browser/bash-busybox.spec.js +++ b/tests/browser/bash-busybox.spec.js @@ -168,3 +168,47 @@ for (const [batchName, cmds] of allBatches) { expect(results.fail).toEqual([]); }); } + +// Preloaded host directory mount tests (fixtures/testdir mounted at /mnt/test) +test('preload: cat reads mounted file', async ({ page }) => { + test.setTimeout(180000); + await page.goto('/'); + await waitForTerminalContent(page, 'bash-static.wasm', 90000); + + await typeCommand(page, 'cat /mnt/test/hello.txt'); + const out = await waitForTerminalContent(page, 'Hello from preloaded file'); + expect(await out.jsonValue()).toContain('Hello from preloaded file'); +}); + +test('preload: cat reads nested mounted file', async ({ page }) => { + test.setTimeout(180000); + await page.goto('/'); + await waitForTerminalContent(page, 'bash-static.wasm', 90000); + + await typeCommand(page, 'cat /mnt/test/subdir/nested.txt'); + const out = await waitForTerminalContent(page, 'Nested content here'); + expect(await out.jsonValue()).toContain('Nested content here'); +}); + +test('preload: ls lists mounted directory', async ({ page }) => { + test.setTimeout(180000); + await page.goto('/'); + await waitForTerminalContent(page, 'bash-static.wasm', 90000); + + await typeCommand(page, 'ls /mnt/test/'); + const out = await waitForTerminalContent(page, 'hello.txt'); + const text = await out.jsonValue(); + expect(text).toContain('hello.txt'); + expect(text).toContain('lines.txt'); + expect(text).toContain('subdir'); +}); + +test('preload: wc verifies mounted file content', async ({ page }) => { + test.setTimeout(180000); + await page.goto('/'); + await waitForTerminalContent(page, 'bash-static.wasm', 90000); + + await typeCommand(page, 'wc -l /mnt/test/lines.txt'); + const out = await waitForTerminalContent(page, '3'); + expect(await out.jsonValue()).toContain('3'); +}); diff --git a/tests/browser/build.sh b/tests/browser/build.sh index 1eb5580..8409fd1 100755 --- a/tests/browser/build.sh +++ b/tests/browser/build.sh @@ -24,6 +24,13 @@ build_bash() { TARGET=aarch64-wasm ECV_OUT_DIR="${BASH_OUT_DIR}" \ "${ROOT_DIR}/scripts/dev.sh" "${ROOT_DIR}/examples/examples-repos/busybox/busybox" echo "Browser Wasm artifacts (bash+busybox) built in ${BASH_OUT_DIR}" + + # Pack preload test fixtures into wasm-out-bash + local FIXTURE_DIR="${SCRIPT_DIR}/fixtures/testdir" + if [[ -d "${FIXTURE_DIR}" ]]; then + python3 "${ROOT_DIR}/scripts/pack-preload.py" "${FIXTURE_DIR}@/mnt/test" -o "${BASH_OUT_DIR}" + echo "Preload test fixtures packed into ${BASH_OUT_DIR}" + fi } case "${PROJECT}" in diff --git a/tests/browser/fixtures/testdir/hello.txt b/tests/browser/fixtures/testdir/hello.txt new file mode 100644 index 0000000..ab7d766 --- /dev/null +++ b/tests/browser/fixtures/testdir/hello.txt @@ -0,0 +1 @@ +Hello from preloaded file diff --git a/tests/browser/fixtures/testdir/lines.txt b/tests/browser/fixtures/testdir/lines.txt new file mode 100644 index 0000000..83db48f --- /dev/null +++ b/tests/browser/fixtures/testdir/lines.txt @@ -0,0 +1,3 @@ +line1 +line2 +line3 diff --git a/tests/browser/fixtures/testdir/subdir/nested.txt b/tests/browser/fixtures/testdir/subdir/nested.txt new file mode 100644 index 0000000..882ba37 --- /dev/null +++ b/tests/browser/fixtures/testdir/subdir/nested.txt @@ -0,0 +1 @@ +Nested content here diff --git a/tests/browser/test-server.js b/tests/browser/test-server.js index f1f5382..3b42a51 100644 --- a/tests/browser/test-server.js +++ b/tests/browser/test-server.js @@ -12,6 +12,7 @@ const MIME_TYPES = { '.wasm': 'application/wasm', '.css': 'text/css', '.json': 'application/json', + '.data': 'application/octet-stream', }; const TEST_HTML = path.resolve(__dirname, process.env.TEST_HTML || 'test-main.html');