diff --git a/packages/hashi/src/H5P/H5PRunner.js b/packages/hashi/src/H5P/H5PRunner.js index f55d9ac9f7b..13ccf9887c8 100644 --- a/packages/hashi/src/H5P/H5PRunner.js +++ b/packages/hashi/src/H5P/H5PRunner.js @@ -137,9 +137,10 @@ export default class H5PRunner { // and for logging xAPI statements about the content. this.contentNamespace = CONTENT_ID; const start = performance.now(); + const largeFileUrlGenerator = filePath => `${this.zipcontentUrl}/${filePath}`; // First load the full H5P file // Store the zip locally for later reference - this.zip = new ZipFile(this.filepath); + this.zip = new ZipFile(this.filepath, { largeFileUrlGenerator }); // Recurse all the package dependencies return this.recurseDependencies('h5p.json', true).then(() => { // Once we have found all the dependencies, we call this diff --git a/packages/kolibri-zip/src/constants.js b/packages/kolibri-zip/src/constants.js new file mode 100644 index 00000000000..b5679efcf99 --- /dev/null +++ b/packages/kolibri-zip/src/constants.js @@ -0,0 +1,7 @@ +// ZIP End of Central Directory Record signature +export const EOCD_SIGNATURE = 0x06054b50; +export const EOCD_SIZE = 22; // Minimum size of end of central directory record +export const MAX_COMMENT_SIZE = 65535; // Maximum ZIP comment size +export const LARGE_FILE_THRESHOLD = 500 * 1024; // 500KB +export const LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50; +export const LOCAL_FILE_HEADER_FIXED_SIZE = 30; diff --git a/packages/kolibri-zip/src/index.js b/packages/kolibri-zip/src/index.js index a7e26a87ef6..0f7399ccbbf 100644 --- a/packages/kolibri-zip/src/index.js +++ b/packages/kolibri-zip/src/index.js @@ -1,14 +1,17 @@ -import { unzip, strFromU8, strToU8 } from 'fflate'; +import { inflate, strFromU8, strToU8 } from 'fflate'; import isPlainObject from 'lodash/isPlainObject'; -import loadBinary from './loadBinary'; import mimetypes from './mimetypes.json'; import { getAbsoluteFilePath, defaultFilePathMappers } from './fileUtils'; +import ZipMetadata from './zipMetadata'; +import { LOCAL_FILE_HEADER_SIGNATURE, LOCAL_FILE_HEADER_FIXED_SIZE } from './constants'; +import { readUInt16LE, readUInt32LE } from './zipUtils'; class ExtractedFile { - constructor(name, obj) { + constructor(name, obj, urlGenerator = null) { this.name = name; this.obj = obj; this._url = null; + this._urlGenerator = urlGenerator; } get fileNameExt() { @@ -20,11 +23,19 @@ class ExtractedFile { } toString() { + if (this._urlGenerator) { + throw new Error('Cannot convert large file to string'); + } return strFromU8(this.obj); } toUrl() { - if (!this._url) { + if (this._url) { + return this._url; + } + if (this._urlGenerator) { + this._url = this._urlGenerator(this.name); + } else { const blob = new Blob([this.obj.buffer], { type: this.mimeType }); this._url = URL.createObjectURL(blob); } @@ -32,118 +43,204 @@ class ExtractedFile { } close() { - if (this._url) { + if (this._url && !this._urlGenerator) { URL.revokeObjectURL(this._url); } } } export default class ZipFile { - constructor(url, { filePathMappers } = { filePathMappers: defaultFilePathMappers }) { + constructor( + url, + { filePathMappers, largeFileThreshold, largeFileUrlGenerator } = { + filePathMappers: defaultFilePathMappers, + largeFileThreshold: 500 * 1024, + largeFileUrlGenerator: null, + }, + ) { this._loadingError = null; this._extractedFileCache = {}; - this._fileLoadingPromise = loadBinary(url) - .then(data => { - this.zipData = new Uint8Array(data); + this._entriesMap = {}; + this._segmentInfo = {}; + this._segmentData = new Map(); + this._zipFileSize = null; + this.largeFileUrlGenerator = largeFileUrlGenerator; + this.filePathMappers = isPlainObject(filePathMappers) ? filePathMappers : {}; + this._metadata = new ZipMetadata(url, Object.keys(this.filePathMappers), largeFileThreshold); + // Initialize metadata loading + this._metadataPromise = this._metadata + .readCentralDirectory() + .then(({ entries, segments, totalSize }) => { + this._entriesMap = entries; + this._segmentInfo = segments; + this._zipFileSize = totalSize; }) .catch(err => { this._loadingError = err; + throw err; }); - this.filePathMappers = isPlainObject(filePathMappers) ? filePathMappers : {}; } - /* - * @param {ExtractedFile} file - The file to carry out replacement of references in - * @param {Object} visitedPaths - A map of paths that have already been visited to prevent a loop - * @return {Promise[ExtractedFile]} - A promise that resolves to the file with references replaced - */ - _replaceFiles(file, visitedPaths) { + async _loadSegment(segmentId) { + if (this._segmentData.has(segmentId)) { + return this._segmentData.get(segmentId); + } + + const segment = this._segmentInfo[segmentId]; + if (!segment) { + throw new Error(`Invalid segment ID: ${segmentId}`); + } + + const segmentData = this._metadata.readRange(segment.start, segment.end - segment.start); + this._segmentData.set(segmentId, segmentData); + return segmentData; + } + + async _replaceFiles(file, visitedPaths = {}) { const mapperClass = this.filePathMappers[file.fileNameExt]; if (!mapperClass) { - return Promise.resolve(file); + return file; } + visitedPaths = { ...visitedPaths }; visitedPaths[file.name] = true; + const mapper = new mapperClass(file); - // Filter out any paths that are in our already visited paths, as that means we are in a - // referential loop where one file has pointed us to another, which is now point us back - // to the source. - // Because we need to modify the file before we generate the URL, we can't resolve this loop. const paths = mapper .getPaths() .filter(path => !visitedPaths[getAbsoluteFilePath(file.name, path)]); + const absolutePathsMap = paths.reduce((acc, path) => { acc[getAbsoluteFilePath(file.name, path)] = path; return acc; }, {}); - return this._getFiles(file => absolutePathsMap[file.name], visitedPaths).then( - replacementFiles => { - const replacementFileMap = replacementFiles.reduce((acc, replacementFile) => { - acc[absolutePathsMap[replacementFile.name]] = replacementFile.toUrl(); - return acc; - }, {}); - const newFileContents = mapper.replacePaths(replacementFileMap); - file.obj = strToU8(newFileContents); - return file; - }, - ); + + const promises = Object.keys(absolutePathsMap).map(async absPath => { + const entry = this._entriesMap[absPath]; + if (!entry) return null; + return this._extractFile(entry, visitedPaths); + }); + + const replacementFiles = (await Promise.all(promises)).filter(Boolean); + + const replacementFileMap = replacementFiles.reduce((acc, replacementFile) => { + acc[absolutePathsMap[replacementFile.name]] = replacementFile.toUrl(); + return acc; + }, {}); + + const newFileContents = mapper.replacePaths(replacementFileMap); + file.obj = strToU8(newFileContents); + + return file; } - _getFiles(filterPredicate, visitedPaths = {}) { - const filter = file => !this._extractedFileCache[file.name] && filterPredicate(file); - return this._fileLoadingPromise.then(() => { - return new Promise((resolve, reject) => { - unzip(this.zipData, { filter }, (err, unzipped) => { - if (err) { - reject(err); - return; - } - const alreadyUnzipped = Object.values(this._extractedFileCache).filter(filterPredicate); - if (!unzipped && !alreadyUnzipped.length) { - reject('No files found'); - return; + async _extractFile(entry, visitedPaths = {}) { + // Return cached file if available + if (this._extractedFileCache[entry.fileName]) { + return this._extractedFileCache[entry.fileName]; + } + + // For large files, create and cache a URL generator file + if (entry.loadFromUrl) { + const extractedFile = new ExtractedFile(entry.fileName, null, this.largeFileUrlGenerator); + this._extractedFileCache[entry.fileName] = extractedFile; + return extractedFile; + } + + // Load the segment containing this file + const segmentData = await this._loadSegment(entry.segment); + + // Calculate the file's offset within the segment + const fileOffset = entry.localHeaderOffset - this._segmentInfo[entry.segment].start; + + // Verify local file header signature + const signature = readUInt32LE(segmentData, fileOffset); + if (signature !== LOCAL_FILE_HEADER_SIGNATURE) { + throw new Error(`Invalid local file header signature for ${entry.fileName}`); + } + + // Read variable-length fields from local header + const fileNameLength = readUInt16LE(segmentData, fileOffset + 26); + const extraFieldLength = readUInt16LE(segmentData, fileOffset + 28); + + // Calculate offset to compressed data + const dataOffset = + fileOffset + LOCAL_FILE_HEADER_FIXED_SIZE + fileNameLength + extraFieldLength; + + // Extract the compressed data from the segment + const compressedData = segmentData.subarray(dataOffset, dataOffset + entry.compressedSize); + + return new Promise((resolve, reject) => { + // Use inflate directly on the compressed data + inflate(compressedData, { size: entry.uncompressedSize }, async (err, inflated) => { + if (err) { + reject(new Error(`Failed to inflate ${entry.fileName}: ${err}`)); + return; + } + + try { + const extractedFile = new ExtractedFile(entry.fileName, inflated); + + // Only do replacement if this file hasn't been visited in the current chain + if (!visitedPaths[entry.fileName]) { + await this._replaceFiles(extractedFile, visitedPaths); } - Promise.all( - Object.entries(unzipped).map(([name, obj]) => { - const extractedFile = new ExtractedFile(name, obj); - return this._replaceFiles(extractedFile, visitedPaths).then(extractedFile => { - this._extractedFileCache[name] = extractedFile; - return extractedFile; - }); - }), - ).then(extractedFiles => { - resolve(extractedFiles.concat(alreadyUnzipped)); - }); - }); + + this._extractedFileCache[entry.fileName] = extractedFile; + resolve(extractedFile); + } catch (e) { + reject(e); + } }); }); } - file(filename) { + async file(filename) { if (this._loadingError) { return Promise.reject(this._loadingError); } - if (this._extractedFileCache[filename]) { - return Promise.resolve(this._extractedFileCache[filename]); + + await this._metadataPromise; + const entry = this._entriesMap[filename]; + + if (!entry) { + throw new Error(`File not found: ${filename}`); } - return this._getFiles(file => file.name === filename).then(files => files[0]); + + return this._extractFile(entry); } - files(path) { + + async files(path) { if (this._loadingError) { return Promise.reject(this._loadingError); } - return this._getFiles(file => file.name.startsWith(path)); + + await this._metadataPromise; + const promises = Object.values(this._entriesMap) + .filter(entry => entry.fileName.startsWith(path)) + .map(entry => this._extractFile(entry)); + + return Promise.all(promises); } - filesFromExtension(extension) { + + async filesFromExtension(extension) { if (this._loadingError) { return Promise.reject(this._loadingError); } - return this._getFiles(file => file.name.endsWith(extension)); + + await this._metadataPromise; + const promises = Object.values(this._entriesMap) + .filter(entry => entry.fileName.endsWith(extension)) + .map(entry => this._extractFile(entry)); + + return Promise.all(promises); } close() { for (const file of Object.values(this._extractedFileCache)) { file.close(); } - this.zipData = null; + this._extractedFileCache = {}; + this._segmentData.clear(); + this._metadata = null; } } diff --git a/packages/kolibri-zip/src/loadBinary.js b/packages/kolibri-zip/src/loadBinary.js index 250bc245bba..6bd65113a30 100644 --- a/packages/kolibri-zip/src/loadBinary.js +++ b/packages/kolibri-zip/src/loadBinary.js @@ -1,37 +1,72 @@ /* - * Vendored from https://github.com/Stuk/jszip-utils/blob/master/lib/index.js + * Vendored and modified from https://github.com/Stuk/jszip-utils/blob/master/lib/index.js */ /** - * @param {string} path The path to the resource to GET. - * @return {Promise} + * Load binary data with support for HEAD requests and byte ranges + * @param {string} path The path to the resource to GET + * @param {Object} options Request options + * @param {string} options.method HTTP method (GET/HEAD) + * @param {number} options.start Start byte for range request + * @param {number} options.end End byte for range request + * @return {Promise} */ -export default function (path) { +export default function loadBinary(path, options = {}) { + const { method = 'GET', start, end } = options; + return new Promise((resolve, reject) => { try { const xhr = new window.XMLHttpRequest(); - xhr.open('GET', path, true); + // Handle network errors - needs to be set before open() + xhr.onerror = function () { + reject(new Error('Error initiating request: Network error')); + }; + + xhr.open(method, path, true); + + // Only set responseType for GET requests + // HEAD requests with responseType can fail in some browsers + if (method === 'GET') { + xhr.responseType = 'arraybuffer'; + } - xhr.responseType = 'arraybuffer'; + // Only add range header if both start and end are explicitly defined numbers + if (typeof start === 'number' && typeof end === 'number' && !isNaN(start) && !isNaN(end)) { + xhr.setRequestHeader('Range', `bytes=${start}-${end}`); + } xhr.onreadystatechange = function () { - // use `xhr` and not `this`... thanks IE - if (xhr.readyState === 4) { - if (xhr.status === 200 || xhr.status === 0) { - try { + if (xhr.readyState !== 4) return; + + // Success states + if (xhr.status === 200 || xhr.status === 206 || (method === 'HEAD' && xhr.status === 204)) { + try { + if (method === 'HEAD') { + resolve({ + contentLength: parseInt(xhr.getResponseHeader('Content-Length')), + acceptRanges: xhr.getResponseHeader('Accept-Ranges'), + }); + } else { resolve(xhr.response); - } catch (err) { - reject(new Error(err)); } - } else { - reject(new Error('Ajax error for ' + path + ' : ' + xhr.status + ' ' + xhr.statusText)); + } catch (err) { + reject(new Error(`Error processing response: ${err.message}`)); } + return; + } + + // Any other status including 0 is treated as an error + if (xhr.status === 0) { + reject(new Error('Error initiating request: Network error')); + } else { + reject(new Error(`HTTP error for ${path}: ${xhr.status} ${xhr.statusText}`)); } }; + xhr.send(); } catch (e) { - reject(new Error(e), null); + reject(new Error(`Error initiating request: ${e.message}`)); } }); } diff --git a/packages/kolibri-zip/src/zipMetadata.js b/packages/kolibri-zip/src/zipMetadata.js new file mode 100644 index 00000000000..25c10c5f584 --- /dev/null +++ b/packages/kolibri-zip/src/zipMetadata.js @@ -0,0 +1,196 @@ +import { strFromU8 } from 'fflate'; +import loadBinary from './loadBinary'; +import { + LARGE_FILE_THRESHOLD, + LOCAL_FILE_HEADER_FIXED_SIZE, + EOCD_SIZE, + EOCD_SIGNATURE, + MAX_COMMENT_SIZE, +} from './constants'; +import { readUInt32LE, readUInt16LE } from './zipUtils'; + +class ZipMetadata { + constructor(url, mapperExtensions = [], largeFileThreshold = LARGE_FILE_THRESHOLD) { + this.url = url; + this._supportsRanges = false; + this._fileSize = null; + this.mapperExtensions = mapperExtensions; + this.largeFileThreshold = largeFileThreshold; + } + + async readRange(start, length) { + if (!this._supportsRanges) { + throw new Error('Range requests are not supported.'); + } + + const buffer = await loadBinary(this.url, { + start, + end: start + length - 1, + }); + return new Uint8Array(buffer); + } + + shouldLoadFromUrl(entry) { + // Files that need mapping are never considered large + if (this.mapperExtensions.includes(entry.fileName.split('.').pop()?.toLowerCase())) { + return false; + } + return entry.uncompressedSize >= this.largeFileThreshold; + } + + createSegments(entries) { + // Sort files by offset + const sortedFiles = [...entries].sort((a, b) => a.localHeaderOffset - b.localHeaderOffset); + + const segments = []; + let currentSegment = null; + + for (const file of sortedFiles) { + const start = file.localHeaderOffset; + const headerSize = + LOCAL_FILE_HEADER_FIXED_SIZE + file.fileNameLength + (file.extraFieldLength || 0); + const end = start + file.compressedSize + headerSize; + + if (!currentSegment && !file.loadFromUrl) { + currentSegment = { start, end, id: segments.length }; + segments.push(currentSegment); + } else if (file.loadFromUrl) { + currentSegment = null; + } else { + currentSegment.end = end; + } + + file.segment = file.loadFromUrl ? null : currentSegment.id; + } + + return segments; + } + + async findEndOfCentralDirectory() { + // Start with 64KB instead of EOCD_SIZE + // To try to grab whole central directory in a single request + let readSize = 65536; // 64KB + let startPos = -readSize; + + try { + const { contentLength, acceptRanges } = await loadBinary(this.url, { method: 'HEAD' }); + this._fileSize = contentLength; + this._supportsRanges = acceptRanges === 'bytes'; + if (!this._supportsRanges) { + throw new Error('Server does not support range requests.'); + } + + // If file is smaller than our read size, adjust + if (contentLength < readSize) { + readSize = contentLength; + startPos = -readSize; + } + + // Search progressively larger chunks until we find EOCD + while (-startPos <= MAX_COMMENT_SIZE + EOCD_SIZE) { + const chunk = await this.readRange(contentLength + startPos, readSize); + + // Search for EOCD signature in chunk + for (let i = chunk.length - EOCD_SIZE; i >= 0; i--) { + const signature = readUInt32LE(chunk, i); + if (signature === EOCD_SIGNATURE) { + // Found EOCD + const eocd = chunk.slice(i); + const centralDirSize = readUInt32LE(eocd, 12); + const centralDirOffset = readUInt32LE(eocd, 16); + const totalEntries = readUInt16LE(eocd, 8); + + // Check if we already have the central directory in our chunk + if ( + centralDirOffset >= contentLength + startPos && + centralDirOffset + centralDirSize <= contentLength + ) { + // We already have the central directory data! + const centralDirData = chunk.slice( + centralDirOffset - (contentLength + startPos), + centralDirOffset - (contentLength + startPos) + centralDirSize, + ); + + // Cache the central directory data for later use + this._cachedCentralDir = centralDirData; + } + + return { + centralDirOffset, + centralDirSize, + totalEntries, + }; + } + } + + // Double readSize but don't exceed file size + readSize = Math.min(readSize * 2, this._fileSize); + startPos = -readSize; + } + + throw new Error('Could not find ZIP central directory'); + } catch (error) { + throw new Error(`Failed to locate End of Central Directory record: ${error.message}`); + } + } + + async readCentralDirectory() { + const eocd = await this.findEndOfCentralDirectory(); + + // If we have cached central directory data, use it + let centralDir; + if (this._cachedCentralDir) { + centralDir = this._cachedCentralDir; + this._cachedCentralDir = null; // Free the memory + } else { + // Otherwise fetch it + centralDir = await this.readRange(eocd.centralDirOffset, eocd.centralDirSize); + } + + const entries = []; + let offset = 0; + + const CENTRAL_HEADER_SIGNATURE = 0x02014b50; + + while (offset < centralDir.length) { + const signature = readUInt32LE(centralDir, offset); + if (signature !== CENTRAL_HEADER_SIGNATURE) { + throw new Error(`Invalid central directory header signature: ${signature.toString(16)}`); + } + + const entry = { + compressedSize: readUInt32LE(centralDir, offset + 20), + uncompressedSize: readUInt32LE(centralDir, offset + 24), + fileNameLength: readUInt16LE(centralDir, offset + 28), + extraFieldLength: readUInt16LE(centralDir, offset + 30), + fileCommentLength: readUInt16LE(centralDir, offset + 32), + localHeaderOffset: readUInt32LE(centralDir, offset + 42), + compressionMethod: readUInt16LE(centralDir, offset + 10), + }; + + offset += 46; // Fixed-size portion of header + + // Read filename + entry.fileName = strFromU8(centralDir.slice(offset, offset + entry.fileNameLength)); + entry.loadFromUrl = this.shouldLoadFromUrl(entry); + offset += entry.fileNameLength + entry.extraFieldLength + entry.fileCommentLength; + + entries.push(entry); + } + + const segments = this.createSegments(entries); + + const entriesByName = entries.reduce((acc, entry) => { + acc[entry.fileName] = entry; + return acc; + }, {}); + + return { + entries: entriesByName, + segments, + totalSize: this._fileSize, + }; + } +} + +export default ZipMetadata; diff --git a/packages/kolibri-zip/src/zipUtils.js b/packages/kolibri-zip/src/zipUtils.js new file mode 100644 index 00000000000..5d254b5c7eb --- /dev/null +++ b/packages/kolibri-zip/src/zipUtils.js @@ -0,0 +1,12 @@ +export function readUInt32LE(buffer, offset) { + return ( + buffer[offset] | + (buffer[offset + 1] << 8) | + (buffer[offset + 2] << 16) | + (buffer[offset + 3] << 24) + ); +} + +export function readUInt16LE(buffer, offset) { + return buffer[offset] | (buffer[offset + 1] << 8); +} diff --git a/packages/kolibri-zip/test/loadBinary.spec.js b/packages/kolibri-zip/test/loadBinary.spec.js new file mode 100644 index 00000000000..8c19b8de383 --- /dev/null +++ b/packages/kolibri-zip/test/loadBinary.spec.js @@ -0,0 +1,112 @@ +import mock from 'xhr-mock'; +import loadBinary from '../src/loadBinary'; + +describe('loadBinary utility', () => { + const TEST_URL = 'test.file'; + const TEST_FILE_SIZE = 1024; + + beforeEach(() => { + mock.setup(); + }); + + afterEach(() => { + mock.teardown(); + }); + + test('makes HEAD request correctly', async () => { + mock.use('HEAD', TEST_URL, { + status: 200, + headers: { + 'Accept-Ranges': 'bytes', + 'Content-Length': TEST_FILE_SIZE.toString(), + }, + }); + + const result = await loadBinary(TEST_URL, { method: 'HEAD' }); + + expect(result).toEqual({ + contentLength: TEST_FILE_SIZE, + acceptRanges: 'bytes', + }); + }); + + test('handles range requests correctly', async () => { + const mockData = new Uint8Array([1, 2, 3, 4, 5]).buffer; + let rangeHeader; + mock.get(TEST_URL, (req, res) => { + rangeHeader = req.header('Range'); + + return res.status(206).header('Content-Range', 'bytes 0-4/1024').body(mockData); + }); + + const result = await loadBinary(TEST_URL, { start: 0, end: 4 }); + expect(new Uint8Array(result)).toEqual(new Uint8Array([1, 2, 3, 4, 5])); + expect(rangeHeader).toBe('bytes=0-4'); + }); + + test('handles full file GET request correctly', async () => { + const mockData = new Uint8Array([1, 2, 3, 4, 5]).buffer; + + let rangeHeader; + + mock.get(TEST_URL, (req, res) => { + rangeHeader = req.header('Range'); + return res.status(200).header('Content-Length', '5').body(mockData); + }); + + const result = await loadBinary(TEST_URL); + expect(new Uint8Array(result)).toEqual(new Uint8Array([1, 2, 3, 4, 5])); + expect(rangeHeader).toBe(null); + }); + + test('handles network errors properly', async () => { + mock.get(TEST_URL, { + status: 404, + reason: 'Not Found', + }); + + await expect(loadBinary(TEST_URL)).rejects.toThrow('HTTP error for test.file: 404 Not Found'); + }); + + test('handles server errors properly', async () => { + mock.get(TEST_URL, { + status: 500, + reason: 'Internal Server Error', + }); + + await expect(loadBinary(TEST_URL)).rejects.toThrow( + 'HTTP error for test.file: 500 Internal Server Error', + ); + }); + + test('handles non-206 response for range request', async () => { + mock.get(TEST_URL, { + status: 200, + body: new ArrayBuffer(10), + }); + + const result = await loadBinary(TEST_URL, { start: 0, end: 4 }); + expect(result).toBeInstanceOf(ArrayBuffer); + }); + + test('handles HEAD request with no Content-Length header', async () => { + mock.use('HEAD', TEST_URL, { + status: 200, + headers: { + 'Accept-Ranges': 'bytes', + }, + }); + + const result = await loadBinary(TEST_URL, { method: 'HEAD' }); + expect(result.contentLength).toBeNaN(); + expect(result.acceptRanges).toBe('bytes'); + }); + + test('handles request error events', async () => { + mock.get(TEST_URL, () => { + throw new Error('Network error'); + }); + + await expect(loadBinary(TEST_URL)).rejects.toThrow('Error initiating request: Network error'); + }); +}); diff --git a/packages/kolibri-zip/test/zipMetadata.spec.js b/packages/kolibri-zip/test/zipMetadata.spec.js new file mode 100644 index 00000000000..2f8b18669a4 --- /dev/null +++ b/packages/kolibri-zip/test/zipMetadata.spec.js @@ -0,0 +1,203 @@ +import mock from 'xhr-mock'; +import { zipSync, strToU8 } from 'fflate'; +import ZipMetadata from '../src/zipMetadata'; + +function setupMockZip(zipData) { + mock.reset(); // Clear all previous handlers + + mock.use('HEAD', 'test.zip', { + status: 200, + headers: { + 'Accept-Ranges': 'bytes', + 'Content-Length': zipData.length.toString(), + }, + }); + + mock.get('test.zip', (req, res) => { + const rangeHeader = req.header('Range'); + + if (!rangeHeader) { + return res.status(200).body(zipData.buffer); + } + + const [start, end] = rangeHeader.replace('bytes=', '').split('-').map(Number); + + if (start >= zipData.length) { + return res.status(416).reason('Requested range not satisfiable'); + } + + const slicedData = zipData.slice(start, Math.min(end + 1, zipData.length)); + + return res + .status(206) + .header('Content-Range', `bytes ${start}-${end}/${zipData.length}`) + .header('Content-Length', slicedData.length.toString()) + .body(slicedData.buffer); + }); +} + +describe('ZIP Metadata Reader', () => { + const TEST_URL = 'test.zip'; + + beforeEach(() => mock.setup()); + afterEach(() => mock.teardown()); + + test('reads Central Directory entries correctly', () => { + const html = '

Test

'; + const zipData = zipSync({ + 'test.txt': strToU8('Hello World'), + 'test2.html': strToU8(html), + }); + + setupMockZip(zipData); + + const reader = new ZipMetadata(TEST_URL); + return reader.readCentralDirectory().then(({ entries, segments, totalSize }) => { + expect(Object.keys(entries)).toHaveLength(2); + expect(entries['test.txt'].uncompressedSize).toBe(11); + expect(entries['test2.html'].uncompressedSize).toBe(13); // '

Test

' is 13 chars + expect(segments).toHaveLength(1); + expect(totalSize).toBe(zipData.length); + }); + }); + + test('reads Central Directory entries and creates segments correctly', () => { + const largeData = new Uint8Array(2000000).fill(0xff); + const zipData = zipSync({ + 'test.txt': strToU8('Small file 1'), + 'test2.html': strToU8('

Small file 2

'), + 'large.mp4': largeData, + 'test3.css': strToU8('.test { color: red; }'), + }); + + setupMockZip(zipData); + + const reader = new ZipMetadata(TEST_URL, ['css', 'html']); + return reader.readCentralDirectory().then(({ entries, segments }) => { + const largeFiles = Object.values(entries).filter(entry => entry.loadFromUrl); + expect(Object.keys(entries)).toHaveLength(4); + expect(segments.length).toBeGreaterThan(0); + expect(largeFiles).toHaveLength(1); + expect(largeFiles[0].fileName).toBe('large.mp4'); + expect(entries['large.mp4'].segment).toBeNull(); + }); + }); + + test('handles small files correctly', () => { + const content = 'Small file content'; + const zipData = zipSync({ + 'small.txt': strToU8(content), + }); + + setupMockZip(zipData); + + const reader = new ZipMetadata(TEST_URL); + return reader.readCentralDirectory().then(({ entries }) => { + expect(Object.keys(entries)).toHaveLength(1); + expect(entries['small.txt'].uncompressedSize).toBe(18); // 'Small file content' is 18 chars + }); + }); + + test('handles ZIP with comment correctly', () => { + const content = 'Test content'; + const zipData = zipSync( + { + 'test.txt': strToU8(content), + }, + { + comment: 'Test Comment', + }, + ); + + setupMockZip(zipData); + + const reader = new ZipMetadata(TEST_URL); + return reader.readCentralDirectory().then(({ entries }) => { + expect(Object.keys(entries)).toHaveLength(1); + expect(entries['test.txt'].uncompressedSize).toBe(12); // 'Test content' is 12 chars + }); + }); + + test('handles maximum size ZIP comment', () => { + const content = 'Test content'; + const maxComment = 'A'.repeat(65535); + const zipData = zipSync( + { + 'test.txt': strToU8(content), + }, + { + comment: maxComment, + }, + ); + + setupMockZip(zipData); + + const reader = new ZipMetadata(TEST_URL); + return reader.readCentralDirectory().then(({ entries }) => { + expect(Object.keys(entries)).toHaveLength(1); + expect(entries['test.txt'].uncompressedSize).toBe(12); // 'Test content' is 12 chars + }); + }); + + test('handles empty ZIP file correctly', () => { + const zipData = zipSync({}); + setupMockZip(zipData); + + const reader = new ZipMetadata(TEST_URL); + return reader.readCentralDirectory().then(({ entries, segments }) => { + expect(Object.keys(entries)).toHaveLength(0); + expect(segments).toHaveLength(0); + }); + }); + + test('handles server not supporting ranges', () => { + const zipData = zipSync({ + 'test.txt': strToU8('Test'), + }); + + mock.use('HEAD', TEST_URL, { + status: 200, + headers: { + 'Content-Length': zipData.length.toString(), + }, + }); + + const reader = new ZipMetadata(TEST_URL); + return expect(reader.readCentralDirectory()).rejects.toThrow( + 'Server does not support range requests', + ); + }); + + test('handles interrupted requests', () => { + mock.get(TEST_URL, () => { + throw new Error('Request aborted'); + }); + + const reader = new ZipMetadata(TEST_URL); + return expect(reader.readCentralDirectory()).rejects.toThrow( + 'Failed to locate End of Central Directory record: Error initiating request: Network error', + ); + }); + + test('handles invalid central directory signature', () => { + const validZipData = zipSync({ + 'test.txt': strToU8('Test'), + }); + + const corruptedZipData = new Uint8Array(validZipData); + for (let i = 0; i < corruptedZipData.length - 4; i++) { + const view = new DataView(corruptedZipData.buffer); + if (view.getUint32(i, true) === 0x02014b50) { + view.setUint32(i, 0x12345678, true); + break; + } + } + + setupMockZip(corruptedZipData); + + const reader = new ZipMetadata(TEST_URL); + return expect(reader.readCentralDirectory()).rejects.toThrow( + 'Invalid central directory header signature', + ); + }); +});