-
Notifications
You must be signed in to change notification settings - Fork 331
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Python: Set up read only file system backed by packages tar (#1555)
This is a startup performance optimization. Instead of copying the data around, this uses the tar file in place as the backing for the files.
- Loading branch information
Showing
15 changed files
with
345 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
import { default as Reader } from "pyodide-internal:packages_tar_reader"; | ||
|
||
// This is based on the info about the tar file format on wikipedia | ||
// And some trial and error with real tar files. | ||
// https://en.wikipedia.org/wiki/Tar_(computing)#File_format | ||
|
||
|
||
const decoder = new TextDecoder(); | ||
function decodeString(buf) { | ||
const nullIdx = buf.indexOf(0); | ||
if (nullIdx >= 0) { | ||
buf = buf.subarray(0, nullIdx); | ||
} | ||
return decoder.decode(buf); | ||
} | ||
function decodeField(buf, offset, size) { | ||
return decodeString(buf.subarray(offset, offset + size)); | ||
} | ||
function decodeNumber(buf, offset, size) { | ||
return parseInt(decodeField(buf, offset, size), 8); | ||
} | ||
|
||
function decodeHeader(buf) { | ||
const nameBase = decodeField(buf, 0, 100); | ||
const namePrefix = decodeField(buf, 345, 155); | ||
let path = namePrefix + nameBase; | ||
// Trim possible leading ./ | ||
if (path.startsWith("./")) { | ||
path = path.slice(2); | ||
} | ||
const mode = decodeNumber(buf, 100, 8); | ||
const size = decodeNumber(buf, 124, 12); | ||
const modtime = decodeNumber(buf, 136, 12); | ||
const type = Number(String.fromCharCode(buf[156])); | ||
return { | ||
path, | ||
name: path, | ||
mode, | ||
size, | ||
modtime, | ||
type, | ||
parts: [], | ||
children: undefined, | ||
}; | ||
} | ||
|
||
export function parseTarInfo() { | ||
const directories = []; | ||
const soFiles = []; | ||
const root = { | ||
children: new Map(), | ||
mode: 0o777, | ||
type: 5, | ||
modtime: 0, | ||
size: 0, | ||
path: "", | ||
name: "", | ||
parts: [], | ||
}; | ||
let directory = root; | ||
const buf = new Uint8Array(512); | ||
let offset = 0; | ||
while (true) { | ||
Reader.read(offset, buf); | ||
const info = decodeHeader(buf); | ||
if (isNaN(info.mode)) { | ||
// Invalid mode means we're done | ||
return [root, soFiles]; | ||
} | ||
const contentsOffset = offset + 512; | ||
offset += 512 * Math.ceil(info.size / 512 + 1); | ||
if (info.path === "") { | ||
// skip possible leading ./ directory | ||
continue; | ||
} | ||
if (info.path.includes("PaxHeader")) { | ||
// Ignore PaxHeader extension | ||
// These metadata directories don't actually have a directory entry which | ||
// is going to cause us to crash below. | ||
// Our tar files shouldn't have these anyways... | ||
continue; | ||
} | ||
|
||
// Navigate to the correct directory by going up until we're at the common | ||
// ancestor of the current position and the target then back down. | ||
// | ||
// Most tar files I run into are lexicographically sorted, so the "go back | ||
// down" step is not necessary. But some tar files are a bit out of order. | ||
// | ||
// We do rely on the fact that the entry for a given directory appears | ||
// before any files in the directory. I don't see anywhere in the spec where | ||
// it says this is required but I think it would be weird and annoying for a | ||
// tar file to violate this property. | ||
|
||
// go up to common ancestor | ||
while (directories.length && !info.name.startsWith(directory.path)) { | ||
directory = directories.pop(); | ||
} | ||
// go down to target (in many tar files this second loop body is evaluated 0 | ||
// times) | ||
const parts = info.path.slice(0, -1).split("/"); | ||
for (let i = directories.length; i < parts.length - 1; i++) { | ||
directories.push(directory); | ||
directory = directory.children.get(parts[i]); | ||
} | ||
if (info.type === 5) { | ||
// a directory | ||
directories.push(directory); | ||
info.parts = parts; | ||
info.name = info.parts.at(-1); | ||
info.children = new Map(); | ||
directory.children.set(info.name, info); | ||
directory = info; | ||
} else { | ||
// hopefully a normal file, we ignore other values of type (e.g., symlink) | ||
info.contentsOffset = contentsOffset; | ||
info.name = info.path.slice(directory.path.length); | ||
if (info.name.endsWith(".so")) { | ||
soFiles.push(info.path); | ||
} | ||
directory.children.set(info.name, info); | ||
} | ||
} | ||
} |
Oops, something went wrong.