From 0a262d7d73cbfc9b646298500ef9b0e52955daf9 Mon Sep 17 00:00:00 2001 From: shunkica Date: Tue, 24 Jun 2025 14:15:30 +0200 Subject: [PATCH] wip: add support for C14N --- binding/exported-functions.txt | 9 + binding/exported-runtime-functions.txt | 1 + src/c14n.mts | 368 +++++++++++++++++++++++++ src/document.mts | 39 +++ src/libxml2.mts | 94 ++++++- src/libxml2raw.d.mts | 31 +++ src/nodes.mts | 2 +- src/utils.mts | 92 ++++--- test/crossplatform/c14n.spec.mts | 167 +++++++++++ 9 files changed, 770 insertions(+), 33 deletions(-) create mode 100644 src/c14n.mts create mode 100644 test/crossplatform/c14n.spec.mts diff --git a/binding/exported-functions.txt b/binding/exported-functions.txt index d7aefea..e5a9020 100644 --- a/binding/exported-functions.txt +++ b/binding/exported-functions.txt @@ -4,12 +4,14 @@ _xmlAddChild _xmlAddNextSibling _xmlAddPrevSibling _xmlCleanupInputCallbacks +_xmlCopyNode _xmlCtxtParseDtd _xmlCtxtReadMemory _xmlCtxtSetErrorHandler _xmlCtxtValidateDtd _xmlDocGetRootElement _xmlDocSetRootElement +_xmlFree _xmlFreeDoc _xmlFreeDtd _xmlFreeNode @@ -74,3 +76,10 @@ _xmlXPathFreeObject _xmlXPathNewContext _xmlXPathRegisterNs _xmlXPathSetContextNode +_xmlC14NDocDumpMemory +_xmlC14NExecute +_xmlBufferCreate +_xmlOutputBufferCreateBuffer +_xmlBufferContent +_xmlOutputBufferClose +_xmlBufferFree diff --git a/binding/exported-runtime-functions.txt b/binding/exported-runtime-functions.txt index bf41392..c9c36ee 100644 --- a/binding/exported-runtime-functions.txt +++ b/binding/exported-runtime-functions.txt @@ -1,6 +1,7 @@ UTF8ToString addFunction getValue +setValue lengthBytesUTF8 stringToUTF8 HEAP32 diff --git a/src/c14n.mts b/src/c14n.mts new file mode 100644 index 0000000..757d046 --- /dev/null +++ b/src/c14n.mts @@ -0,0 +1,368 @@ +import { + addFunction, + getValue, DisposableMalloc, + UTF8ToString, xmlC14NDocDumpMemory, xmlC14NExecute, xmlCopyNode, + xmlDocSetRootElement, + XmlError, + xmlFree, + xmlFreeDoc, + xmlNewDoc, + xmlNewNs, + XmlOutputBufferHandler, DisposableXmlOutputBuffer, ContextStorage, +} from './libxml2.mjs'; +import { createNode, XmlElement, type XmlNode } from './nodes.mjs'; +import type { XmlDocPtr } from './libxml2raw.mjs'; +import { + CStringArrayWrapper, XmlNodeSetWrapper, +} from './utils.mjs'; +import { Pointer } from './libxml2raw.mjs'; + +export const XmlC14NMode = { + XML_C14N_1_0: 0, + XML_C14N_EXCLUSIVE_1_0: 1, + XML_C14N_1_1: 2, +} as const; + +export type C14NOptionsBase = { + /** The canonicalization mode to use + * @see {@link XmlC14NMode} + */ + mode: typeof XmlC14NMode[keyof typeof XmlC14NMode]; + /** The list of inclusive namespace prefixes (only for exclusive canonicalization) */ + inclusiveNamespacePrefixList?: string[]; + /** Whether to include comments in the canonicalized output + * @default false + */ + withComments?: boolean; +}; + +export type C14NOptionsDocument = C14NOptionsBase & { + node?: never; + nodeSet?: never; + isVisibleCallback?: never; + userData?: never; +}; + +export type C14NOptionsNode = C14NOptionsBase & { + node: XmlNode; + nodeSet?: never; + isVisibleCallback?: never; + userData?: never; +}; + +export type C14NOptionsNodeSet = C14NOptionsBase & { + nodeSet: XmlNode[]; + node?: never; + isVisibleCallback?: never; + userData?: never; +}; + +export type C14NOptionsCallback = C14NOptionsBase & { + node?: never; + nodeSet?: never; + isVisibleCallback: XmlC14NIsVisibleCallback; + userData?: T; +}; + +export type C14NOptions = + C14NOptionsDocument | C14NOptionsNode | C14NOptionsNodeSet | C14NOptionsCallback; + +/** + * Decide if a node should be included in the canonicalization. + */ +export type XmlC14NIsVisibleCallback = (userData: T, node: XmlNode, parent: XmlNode) => boolean; + +/** + * wrap the users is visible function + */ +export function getC14NIsVisibleCallback( + cb: XmlC14NIsVisibleCallback, + contextStorage: ContextStorage | null, +): Pointer { + const wrapper = (userDataPtr: number, nodePtr: number, parentPtr: number): number => { + const node = createNode(nodePtr); + const parent = createNode(parentPtr); + const userDataObj = contextStorage ? contextStorage.get(userDataPtr) : undefined; + return cb(userDataObj as T, node, parent) ? 1 : 0; + }; + const funcPtr = addFunction(wrapper, 'iiii'); + return funcPtr as Pointer; +} + +/** + * Canonicalize an XML document with a specific node + */ +export function canonicalizeWithNode( + docPtr: XmlDocPtr, + handler: XmlOutputBufferHandler, + options: C14NOptionsNode, +): void { + using docTxtMem = new DisposableMalloc(4); + let tempDoc: number | null = null; + let prefixArray: CStringArrayWrapper | null = null; + + try { + // If inclusiveNamespaces is provided + if (options.inclusiveNamespacePrefixList) { + prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); + } + + // Create a temporary document for the subtree + tempDoc = xmlNewDoc(); + if (!tempDoc) { + throw new XmlError('Failed to create new document for subtree'); + } + + // Make a deep copy of the node (1 = recursive copy) + const copiedNode = xmlCopyNode(options.node._nodePtr, 1); + if (!copiedNode) { + throw new XmlError('Failed to copy subtree node'); + } + + // Set the copied node as the root element of the new document + xmlDocSetRootElement(tempDoc, copiedNode); + + // If inclusiveNamespaces is provided, + // we need to add the namespace declarations to the root element + const inclusivePrefixes = options.inclusiveNamespacePrefixList; + if (inclusivePrefixes) { + let currentNode: XmlElement | null = options.node.parent; + while (currentNode) { + Object.entries(currentNode.nsDeclarations).forEach( + ([prefix, namespaceURI]) => { + if (inclusivePrefixes.includes(prefix)) { + const namespace = xmlNewNs(copiedNode, namespaceURI, prefix); + if (!namespace) { + throw new XmlError(`Failed to add namespace declaration "${prefix}"`); + } + } + }, + ); + currentNode = currentNode.parent; + } + } + + const mode = options.mode ?? XmlC14NMode.XML_C14N_1_0; + const withComments = options.withComments ? 1 : 0; + + const result = xmlC14NDocDumpMemory( + tempDoc, + 0, // no nodeSet for single node + mode, + prefixArray ? prefixArray._ptr : 0, + withComments, + docTxtMem._ptr, + ); + + if (result < 0) { + throw new XmlError('Failed to canonicalize XML subtree'); + } + + const txtPtr = getValue(docTxtMem._ptr, 'i32'); + if (!txtPtr) throw new XmlError('Failed to get canonicalized XML'); + + const canonicalXml = UTF8ToString(txtPtr, result); + const buffer = new TextEncoder().encode(canonicalXml); + handler.write(buffer); + + xmlFree(txtPtr); + } finally { + if (tempDoc) { + xmlFreeDoc(tempDoc); + } + if (prefixArray) { + prefixArray.dispose(); + } + } +} + +/** + * Canonicalize an XML document with a node set + * + * TODO: I can't figure out how to add namespace nodes to the node set. + * (Error: Unsupported node type 18) + */ +export function canonicalizeWithNodeSet( + docPtr: XmlDocPtr, + handler: XmlOutputBufferHandler, + options: C14NOptionsNodeSet, +): void { + using docTxtPtr = new DisposableMalloc(4); + let prefixArray: CStringArrayWrapper | null = null; + let nodeSet: XmlNodeSetWrapper | null = null; + + try { + // If inclusiveNamespaces is provided + if (options.inclusiveNamespacePrefixList) { + prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); + } + + // Create nodeSet wrapper + nodeSet = new XmlNodeSetWrapper(options.nodeSet.map((item) => item._nodePtr)); + + const mode = options.mode ?? XmlC14NMode.XML_C14N_1_0; + const withComments = options.withComments ? 1 : 0; + + const result = xmlC14NDocDumpMemory( + docPtr, + nodeSet._ptr, + mode, + prefixArray ? prefixArray._ptr : 0, + withComments, + docTxtPtr._ptr, + ); + + if (result < 0) { + throw new XmlError('Failed to canonicalize XML with node set'); + } + + const txtPtr = getValue(docTxtPtr._ptr, 'i32'); + if (!txtPtr) throw new XmlError('Failed to get canonicalized XML'); + + const canonicalXml = UTF8ToString(txtPtr, result); + const buffer = new TextEncoder().encode(canonicalXml); + handler.write(buffer); + + xmlFree(txtPtr); + } finally { + if (prefixArray) { + prefixArray.dispose(); + } + if (nodeSet) { + nodeSet.dispose(); + } + } +} + +/** + * Canonicalize an XML document with a callback + */ +export function canonicalizeWithCallback( + docPtr: XmlDocPtr, + handler: XmlOutputBufferHandler, + options: C14NOptionsCallback, +): void { + using outputBuffer = new DisposableXmlOutputBuffer(); + let prefixArray: CStringArrayWrapper | null = null; + let contextStorage: ContextStorage | null = null; + let callbackPtr: Pointer | null = null; + let userDataPtr = 0; + + try { + // If inclusiveNamespaces is provided + if (options.inclusiveNamespacePrefixList) { + prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); + } + + // Set up callback and user data + if (options.userData !== undefined) { + contextStorage = new ContextStorage(); + userDataPtr = contextStorage.allocate(options.userData); + } + + callbackPtr = getC14NIsVisibleCallback(options.isVisibleCallback, contextStorage); + + const withComments = options.withComments ? 1 : 0; + + const result = xmlC14NExecute( + docPtr, + callbackPtr, + userDataPtr, + options.mode, + prefixArray ? prefixArray._ptr : 0, + withComments, + outputBuffer.getOutputBufferPtr(), + ); + + if (result < 0) { + throw new XmlError('Failed to canonicalize XML with callback'); + } + + const caninicalizedXml = outputBuffer.getContent(); + + // TODO: handle this better + handler.write(Buffer.from(caninicalizedXml)); + } finally { + if (prefixArray) { + prefixArray.dispose(); + } + if (contextStorage) { + contextStorage.free(userDataPtr); + } + } +} + +/** + * Canonicalize an XML document (default mode - entire document) + */ +export function canonicalizeDocument( + docPtr: XmlDocPtr, + handler: XmlOutputBufferHandler, + options?: C14NOptionsBase, +): void { + using docTxtPtr = new DisposableMalloc(4); + let prefixArray: CStringArrayWrapper | null = null; + + try { + // If inclusiveNamespaces is provided + if (options && options.inclusiveNamespacePrefixList) { + prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); + } + + const mode = options && options.mode ? options.mode : XmlC14NMode.XML_C14N_1_0; + const withComments = options && options.withComments ? 1 : 0; + + const result = xmlC14NDocDumpMemory( + docPtr, + 0, // no nodeSet + mode, + prefixArray ? prefixArray._ptr : 0, + withComments, + docTxtPtr._ptr, + ); + + if (result < 0) { + throw new XmlError('Failed to canonicalize XML'); + } + + const txtPtr = getValue(docTxtPtr._ptr, 'i32'); + if (!txtPtr) throw new XmlError('Failed to get canonicalized XML'); + + const canonicalXml = UTF8ToString(txtPtr, result); + const buffer = new TextEncoder().encode(canonicalXml); + handler.write(buffer); + + xmlFree(txtPtr); + } finally { + if (prefixArray) { + prefixArray.dispose(); + } + } +} + +// export function onlyATest(): string { +// const xmlString = 'text'; +// const doc = XmlDocument.fromString(xmlString); +// +// const buf = xmlBufferCreate(); +// const bufbuf = xmlOutputBufferCreateBuffer(buf, 0); +// +// const canonical = xmlC14NExecute( +// doc._ptr, +// 0, +// 0, +// 0, +// 0, +// 0, +// bufbuf, +// ); +// const errPtr = xmlGetLastError(); +// if (errPtr) { +// const code = getValue(errPtr + 16, 'i32'); // offset depends on struct layout +// const msgPtr = getValue(errPtr + 8, '*'); // check xmlError struct in libxml2 +// const msg = UTF8ToString(msgPtr); +// console.error('C14N error:', code, msg); +// } +// +// return canonical.toString(); +// } diff --git a/src/document.mts b/src/document.mts index f98be44..bf5cfcf 100644 --- a/src/document.mts +++ b/src/document.mts @@ -32,6 +32,13 @@ import type { XmlDocPtr, XmlParserCtxtPtr } from './libxml2raw.mjs'; import { disposeBy, XmlDisposable } from './disposable.mjs'; import { XmlDtd } from './dtd.mjs'; import { XmlStringOutputBufferHandler } from './utils.mjs'; +import { + canonicalizeDocument, + canonicalizeWithCallback, + canonicalizeWithNode, + canonicalizeWithNodeSet, + type C14NOptions, +} from './c14n.mjs'; export enum ParseOption { XML_PARSE_DEFAULT = 0, @@ -394,4 +401,36 @@ export class XmlDocument extends XmlDisposable { xmlXIncludeFreeContext(xinc); } } + + /** + * Canonicalize the XML document to a buffer and invoke the callbacks to process. + * @param handler handlers to process the content in the buffer + * @param options Canonicalization options + * @see {@link toCanonicalString} + */ + canonicalize(handler: XmlOutputBufferHandler, options?: C14NOptions): void { + if (!options) { + canonicalizeDocument(this._ptr, handler); + } else if (options.node) { + canonicalizeWithNode(this._ptr, handler, options); + } else if (options.nodeSet) { + canonicalizeWithNodeSet(this._ptr, handler, options); + } else if (options.isVisibleCallback) { + canonicalizeWithCallback(this._ptr, handler, options); + } else { + canonicalizeDocument(this._ptr, handler, options); + } + } + + /** + * Canonicalize the XML document and return the result as a string. + * @param options Canonicalization options + * @returns The canonicalized XML as a string + * @see {@link canonicalize} + */ + toCanonicalString(options?: C14NOptions): string { + const handler = new XmlStringOutputBufferHandler(); + this.canonicalize(handler, options); + return handler.result; + } } diff --git a/src/libxml2.mts b/src/libxml2.mts index cb8493c..05334cd 100644 --- a/src/libxml2.mts +++ b/src/libxml2.mts @@ -13,11 +13,46 @@ import type { XmlXPathContextPtr, } from './libxml2raw.mjs'; import moduleLoader from './libxml2raw.mjs'; -import { ContextStorage } from './utils.mjs'; +import { disposeBy, XmlDisposable } from './disposable.mjs'; const libxml2 = await moduleLoader(); libxml2._xmlInitParser(); +// Export specific functions needed by other modules +export const { + getValue, setValue, UTF8ToString, lengthBytesUTF8, stringToUTF8, addFunction, +} = libxml2; + +/** + * Manage JS context object for wasm. + * + * In libxml2, a registration of callback often has a context/userdata pointer. + * But when it is in wasm, this pointer is essentially an integer. + * + * To support JS object as context/userdata, we store it in the map and access with an integer key. + * This key could be passed to the registration. + * And the callback use this key to retrieve the real object. + */ +export class ContextStorage { + private storage: Map = new Map(); + + private index = 0; + + allocate(value: T): number { + this.index += 1; + this.storage.set(this.index, value); + return this.index; + } + + free(index: number) { + this.storage.delete(index); + } + + get(index: number): T { + return this.storage.get(index)!; + } +} + /** * The base class for exceptions in this library. * @@ -619,13 +654,68 @@ export function xmlSaveSetIndentString( return withStringUTF8(indent, (buf) => libxml2._xmlSaveSetIndentString(ctxt, buf)); } +/** + * We probably don't want to expose malloc/free directly? + */ +@disposeBy(libxml2._free) +export class DisposableMalloc extends XmlDisposable { + constructor(size: number) { + super(libxml2._malloc(size)); + } +} + +/** + * Maybe also don't expose xmlBuffer* functions directly? + * Don't reuse this buffer. + */ +@disposeBy(libxml2._xmlBufferFree) +export class DisposableXmlOutputBuffer extends XmlDisposable { + private _content: string | null = null; + + private _outputBufferPtr: number; + + constructor() { + super(libxml2._xmlBufferCreate()); + this._outputBufferPtr = libxml2._xmlOutputBufferCreateBuffer(this._ptr, 0); + } + + getOutputBufferPtr(): Pointer { + return this._outputBufferPtr; + } + + // closes the buffer and gets is content as string. + getContent(): string { + if (this._content) { + return this._content; + } + if (this._outputBufferPtr === 0) { + throw new XmlError('Output buffer has been closed'); + } + libxml2._xmlOutputBufferClose(this._outputBufferPtr); + this._outputBufferPtr = 0; + const contentPtr = libxml2._xmlBufferContent(this._ptr); + this._content = libxml2.UTF8ToString(contentPtr); + return this._content; + } + + [Symbol.dispose]() { + if (this._outputBufferPtr !== 0) { + libxml2._xmlOutputBufferClose(this._outputBufferPtr); + this._outputBufferPtr = 0; + } + super[Symbol.dispose](); + } +} + export const xmlAddChild = libxml2._xmlAddChild; export const xmlAddNextSibling = libxml2._xmlAddNextSibling; export const xmlAddPrevSibling = libxml2._xmlAddPrevSibling; +export const xmlCopyNode = libxml2._xmlCopyNode; export const xmlCtxtSetErrorHandler = libxml2._xmlCtxtSetErrorHandler; export const xmlCtxtValidateDtd = libxml2._xmlCtxtValidateDtd; export const xmlDocGetRootElement = libxml2._xmlDocGetRootElement; export const xmlDocSetRootElement = libxml2._xmlDocSetRootElement; +export const xmlFree = libxml2._xmlFree; export const xmlFreeDoc = libxml2._xmlFreeDoc; export const xmlFreeNode = libxml2._xmlFreeNode; export const xmlFreeDtd = libxml2._xmlFreeDtd; @@ -670,3 +760,5 @@ export const xmlXPathFreeContext = libxml2._xmlXPathFreeContext; export const xmlXPathFreeObject = libxml2._xmlXPathFreeObject; export const xmlXPathNewContext = libxml2._xmlXPathNewContext; export const xmlXPathSetContextNode = libxml2._xmlXPathSetContextNode; +export const xmlC14NDocDumpMemory = libxml2._xmlC14NDocDumpMemory; +export const xmlC14NExecute = libxml2._xmlC14NExecute; diff --git a/src/libxml2raw.d.mts b/src/libxml2raw.d.mts index b41f2f0..0295aa2 100644 --- a/src/libxml2raw.d.mts +++ b/src/libxml2raw.d.mts @@ -39,6 +39,7 @@ export class LibXml2 { _xmlAddNextSibling(prev: XmlNodePtr, cur: XmlNodePtr): XmlNodePtr; _xmlAddPrevSibling(next: XmlNodePtr, cur: XmlNodePtr): XmlNodePtr; _xmlCleanupInputCallbacks(): void; + _xmlCopyNode(node: XmlNodePtr, extended: number): XmlNodePtr; _xmlCtxtParseDtd( ctxt: XmlParserCtxtPtr, input: XmlParserInputPtr, @@ -63,6 +64,7 @@ export class LibXml2 { _xmlFreeParserCtxt(ctxt: XmlParserCtxtPtr): void; _xmlDocGetRootElement(doc: XmlDocPtr): XmlNodePtr; _xmlDocSetRootElement(doc: XmlDocPtr, root: XmlNodePtr): XmlNodePtr; + _xmlFree(ptr: Pointer): void; _xmlFreeDoc(Doc: XmlDocPtr): void; _xmlFreeDtd(dtd: XmlDtdPtr): void; _xmlGetIntSubset(doc: XmlDocPtr): XmlDtdPtr; @@ -71,6 +73,7 @@ export class LibXml2 { _xmlHasNsProp(node: XmlNodePtr, name: CString, namespace: CString): XmlAttrPtr; _xmlInitParser(): void; _xmlNewDoc(): XmlDocPtr; + _xmlNewDtd(): XmlDtdPtr; _xmlNewCDataBlock(doc: XmlDocPtr, content: CString, len: number): XmlNodePtr; _xmlNewDocComment(doc: XmlDocPtr, content: CString): XmlNodePtr; _xmlNewDocNode(doc: XmlDocPtr, ns: XmlNsPtr, name: CString, content: CString): XmlNodePtr; @@ -160,10 +163,38 @@ export class LibXml2 { _xmlSchemaValidateDoc(ctx: XmlSchemaValidCtxtPtr, doc: XmlDocPtr): number; _xmlSchemaValidateOneElement(ctx: XmlSchemaValidCtxtPtr, elem: XmlNodePtr): number; _xmlUnlinkNode(cur: XmlNodePtr): void; + _xmlC14NDocDumpMemory( + doc: XmlDocPtr, + nodeset: Pointer, + mode: number, + inclusiveNamespaces: Pointer, + withComments: number, + docTxtPtr: Pointer, + ): number; + _xmlC14NExecute( + doc: XmlDocPtr, + is_visible_callback: Pointer, + user_data: Pointer, + mode: number, + inclusive_ns_prefixes: Pointer, + with_comments: number, + buf: Pointer, + ): number; + // _xmlBufferCreate + // _xmlOutputBufferCreateBuffer + // _xmlBufferContent + // _xmlOutputBufferClose + // _xmlBufferFree + _xmlBufferCreate(): Pointer; + _xmlOutputBufferCreateBuffer(buffer: Pointer, encoder: Pointer): Pointer; + _xmlBufferContent(buffer: Pointer): Pointer; + _xmlOutputBufferClose(outputBuffer: Pointer): number; + _xmlBufferFree(buffer: Pointer): void; // runtime functions UTF8ToString(ptr: CString, maxBytesToRead?: number): string; addFunction(func: Function, sig: string): Pointer; getValue(ptr: Pointer, type: string): number; + setValue(ptr: Pointer, value: number, type: string): void; lengthBytesUTF8(str: string): number; stringToUTF8(str: string, outPtr: CString, maxBytesToWrite: number): CString; } diff --git a/src/nodes.mts b/src/nodes.mts index e966b86..b816d4e 100644 --- a/src/nodes.mts +++ b/src/nodes.mts @@ -88,7 +88,7 @@ export function forNodeType(nodeType: XmlNodeStruct.Type) { }; } -function createNode(nodePtr: XmlNodePtr): XmlNode { +export function createNode(nodePtr: XmlNodePtr): XmlNode { const nodeType = XmlNodeStruct.type(nodePtr); const Constructor = nodeConstructors.get(nodeType); diff --git a/src/utils.mts b/src/utils.mts index 601ad32..4073c19 100644 --- a/src/utils.mts +++ b/src/utils.mts @@ -1,36 +1,9 @@ -import { XmlInputProvider, XmlOutputBufferHandler } from './libxml2.mjs'; +import { + DisposableMalloc, + lengthBytesUTF8, setValue, stringToUTF8, XmlInputProvider, XmlOutputBufferHandler, +} from './libxml2.mjs'; import { Pointer } from './libxml2raw.mjs'; -/** - * Manage JS context object for wasm. - * - * In libxml2, a registration of callback often has a context/userdata pointer. - * But when it is in wasm, this pointer is essentially an integer. - * - * To support JS object as context/userdata, we store it in the map and access with an integer key. - * This key could be passed to the registration. - * And the callback use this key to retrieve the real object. - */ -export class ContextStorage { - private storage: Map = new Map(); - - private index = 0; - - allocate(value: T): number { - this.index += 1; - this.storage.set(this.index, value); - return this.index; - } - - free(index: number) { - this.storage.delete(index); - } - - get(index: number): T { - return this.storage.get(index)!; - } -} - const bufferContexts: Map = new Map(); let contextIndex = 1; @@ -142,3 +115,60 @@ export class XmlStringOutputBufferHandler implements XmlOutputBufferHandler { return this._result; } } + +/** + * Helper to create a C-style array of C strings + */ +export class CStringArrayWrapper extends DisposableMalloc { + private cStrings: DisposableMalloc[] = []; + + constructor(strings: string[]) { + // allocate pointer array (+1 for NULL terminator) + super((strings.length + 1) * 4); + + this.cStrings = strings.map((s) => { + const len = lengthBytesUTF8(s) + 1; + const mem = new DisposableMalloc(len); + stringToUTF8(s, mem._ptr, len); + return mem; + }); + + this.cStrings.forEach(({ _ptr }, i) => { + setValue(this._ptr + i * 4, _ptr, 'i32'); + }); + setValue(this._ptr + this.cStrings.length * 4, 0, 'i32'); + } + + [Symbol.dispose](): void { + this.cStrings.forEach((dm) => dm.dispose()); + super[Symbol.dispose](); + } +} + +/** + * Helper to create a libxml2 xmlNodeSet structure from an array of node pointers + */ +export class XmlNodeSetWrapper extends DisposableMalloc { + private nodeArrayMem: DisposableMalloc; + + constructor(nodes: number[]) { + super(12); // Allocate 12 bytes for the struct + const count = nodes.length; + + // allocate array of node pointers + this.nodeArrayMem = new DisposableMalloc(count * 4); + nodes.forEach((ptr, i) => { + setValue(this.nodeArrayMem._ptr + i * 4, ptr, 'i32'); + }); + + // allocate struct + setValue(this._ptr, count, 'i32'); // nodeNr + setValue(this._ptr + 4, count, 'i32'); // nodeMax + setValue(this._ptr + 8, this.nodeArrayMem._ptr, 'i32'); // nodeTab + } + + [Symbol.dispose](): void { + this.nodeArrayMem.dispose(); + super[Symbol.dispose](); + } +} diff --git a/test/crossplatform/c14n.spec.mts b/test/crossplatform/c14n.spec.mts new file mode 100644 index 0000000..beb2d0c --- /dev/null +++ b/test/crossplatform/c14n.spec.mts @@ -0,0 +1,167 @@ +import { assert, expect } from 'chai'; +import { + XmlDocument, diag, +} from '@libxml2-wasm/lib/index.mjs'; +import { + XmlC14NMode, +} from '@libxml2-wasm/lib/c14n.mjs'; + +const usingXmlDocument = (doc: XmlDocument, cb: (doc: XmlDocument) => void) => { + diag.configure({ enabled: true }); + try { + cb(doc); + } finally { + doc.dispose(); + const report = diag.report(); + diag.configure({ enabled: false }); + expect(report).to.deep.equal({}); + } +}; + +describe('C14N (XML Canonicalization)', () => { + describe('canonicalizeDocument', () => { + it('should canonicalize a simple XML document', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal(xmlString); + }); + }); + + it('should order attributes', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + + it('should sort namespace declarations', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + + it('should remove whitespace between attributes', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + + it('should replace self-closing tags with full tags', () => { + const xmlString = ''; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal(''); + + doc.dispose(); + }); + expect(diag.report()).to.deep.equal({}); + }); + + it('should remove the XML declaration', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + }); + + describe('canonicalizeNode', () => { + it('should canonicalize only a specific subtree', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + + expect(node).to.not.be.null; + assert(node != null); + + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, node, + }); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + + it('should include inclusive namespaces with exclusive canonicalization', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const inclusiveNamespaces = ['ns3']; + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + expect(node).to.not.be.null; + assert(node != null); + + const canonical = doc.toCanonicalString({ + node, + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + inclusiveNamespacePrefixList: inclusiveNamespaces, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + }); + + describe('canonicalizeNodeSet', () => { + it('should work with nodeset', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const nodes = doc.find('//ns1:child/namespace::* | //ns:sibling/namespace::*', { ns: 'uri:root', ns1: 'uri:ns1' }); + + expect(nodes).to.have.lengthOf(4); + + const canonical = doc.toCanonicalString( + { mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, nodeSet: nodes }, + ); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('other'); + }); + expect(diag.report()).to.deep.equal({}); + }); + }); + + describe('canonicalizeCallback', () => { + it('should work with isVisibleCallback', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + isVisibleCallback: () => true, + }); + expect(canonical).to.equal('text'); + }); + }); + }); +});