Merge pull request #37 from mni-ml/staging

r-chong · web-flow · commit ff3fc1ca5783 · 2026-04-10T14:23:22.000-04:00
Refactor/tests
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -270,4 +270,4 @@ jobs:
           npx tsc
 
       - name: Run tests
-        run: node test/ops.test.js
+        run: npm test
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -249,7 +249,7 @@ jobs:
           npx tsc
 
       - name: Run tests
-        run: node test/ops.test.js
+        run: npm test
 
   publish:
     needs: [test, build-cuda, build-cuda-windows]
diff --git a/.gitignore b/.gitignore
@@ -41,4 +41,5 @@ coverage/
 
 # Temporary files
 tmp/
-temp/
+temp/
+pnpm-lock.yaml
diff --git a/package.json b/package.json
@@ -21,7 +21,7 @@
     "gpu"
   ],
   "engines": {
-    "node": ">=20.0.0"
+    "node": ">=22.18.0"
   },
   "exports": {
     ".": {
@@ -58,6 +58,6 @@
     "build:native:cuda": "cd src/native && cargo build --release --no-default-features --features cuda && cp target/release/libmni_framework_native.so mni-framework-native.linux-x64-gnu.node",
     "build:native:webgpu": "cd src/native && cargo build --release --no-default-features --features webgpu && cp target/release/libmni_framework_native.dylib mni-framework-native.darwin-arm64.node",
     "build:all": "npm run build:native && npm run build",
-    "test": "node test/ops.test.js"
+    "test": "node --no-warnings --loader ./test/loader.js test/run.ts"
   }
 }
diff --git a/test/autograd.test.ts b/test/autograd.test.ts
@@ -0,0 +1,47 @@
+import {
+    Tensor,
+} from '../dist/index.js';
+import { assert, assertClose, section } from './helpers.js';
+
+// ============================================================
+// Autograd / backward
+// ============================================================
+
+section('Autograd / backward');
+
+// simple gradient: d/dx (x^2) at x=3 should be 6
+const xGrad = Tensor.fromFloat32(new Float32Array([3]), [1]).setRequiresGrad(true);
+const xSq = xGrad.pow(2);
+xSq.backward();
+const gradX = xGrad.grad;
+assert(gradX !== null, 'gradient exists after backward');
+assertClose(gradX!.toFloat32()[0], 6, 1e-3, 'd/dx(x^2) at x=3 = 6');
+
+// gradient through mul
+const paramA = Tensor.fromFloat32(new Float32Array([1, 2, 3, 4]), [2, 2]).setRequiresGrad(true);
+const paramB = Tensor.fromFloat32(new Float32Array([5, 6, 7, 8]), [2, 2]).setRequiresGrad(true);
+const c = paramA.mul(paramB).sum(0).sum(0);
+c.backward();
+assert(paramA.grad !== null, 'paramA gradient exists');
+assert(paramB.grad !== null, 'paramB gradient exists');
+
+const gradAData = paramA.grad!.toFloat32();
+assertClose(gradAData[0], 5, 1e-3, 'grad_a[0] = b[0]');
+assertClose(gradAData[1], 6, 1e-3, 'grad_a[1] = b[1]');
+
+// gradient through add
+const addX = Tensor.fromFloat32(new Float32Array([2, 3]), [2]).setRequiresGrad(true);
+const addY = Tensor.fromFloat32(new Float32Array([4, 5]), [2]).setRequiresGrad(true);
+const addSum = addX.add(addY).sum();
+addSum.backward();
+assert(addX.grad !== null && addY.grad !== null, 'add gradients exist');
+assertClose(addX.grad!.toFloat32()[0], 1, 1e-3, 'd/dx(x+y).sum() = 1');
+assertClose(addY.grad!.toFloat32()[0], 1, 1e-3, 'd/dy(x+y).sum() = 1');
+
+// gradient through matmul
+const mmX = Tensor.fromFloat32(new Float32Array([1, 0, 0, 1]), [2, 2]).setRequiresGrad(true);
+const mmY = Tensor.fromFloat32(new Float32Array([3, 4, 5, 6]), [2, 2]).setRequiresGrad(true);
+const mmOut = mmX.matmul(mmY).sum();
+mmOut.backward();
+assert(mmX.grad !== null, 'matmul grad exists');
+
diff --git a/test/helpers.ts b/test/helpers.ts
@@ -0,0 +1,103 @@
+const green = '\x1b[32m';
+const red = '\x1b[31m';
+const yellow = '\x1b[33m';
+const bold = '\x1b[1m';
+const dim = '\x1b[2m';
+const reset = '\x1b[0m';
+const greenBg = '\x1b[30m\x1b[42m';
+const redBg = '\x1b[37m\x1b[41m';
+
+let totalPassed = 0;
+let totalFailed = 0;
+let totalSkipped = 0;
+let suitePassed = 0;
+let suiteFailed = 0;
+let suiteSkipped = 0;
+let suitesRun = 0;
+let suitesFailed = 0;
+const failures: string[] = [];
+const startTime = Date.now();
+
+export function assert(cond: boolean, msg: string): void {
+    if (!cond) {
+        suiteFailed++;
+        totalFailed++;
+        failures.push(msg);
+        console.log(`    ${red}\u2716${reset} ${msg}`);
+    } else {
+        suitePassed++;
+        totalPassed++;
+        console.log(`    ${green}\u2713${reset} ${dim}${msg}${reset}`);
+    }
+}
+
+export function assertClose(a: number, b: number, tol: number = 1e-4, msg: string = ''): void {
+    if (Math.abs(a - b) > tol) {
+        suiteFailed++;
+        totalFailed++;
+        const detail = `${msg}: ${a} != ${b} (tol=${tol})`;
+        failures.push(detail);
+        console.log(`    ${red}\u2716${reset} ${detail}`);
+    } else {
+        suitePassed++;
+        totalPassed++;
+        console.log(`    ${green}\u2713${reset} ${dim}${msg}${reset}`);
+    }
+}
+
+export function skip(msg: string): void {
+    suiteSkipped++;
+    totalSkipped++;
+    console.log(`    ${yellow}\u25CB${reset} ${dim}skipped: ${msg}${reset}`);
+}
+
+export function section(name: string): void {
+    console.log(`  ${name}`);
+}
+
+export function startSuite(file: string): void {
+    suitesRun++;
+    suitePassed = 0;
+    suiteFailed = 0;
+    suiteSkipped = 0;
+    console.log('');
+}
+
+export function endSuite(file: string): void {
+    const badge = suiteFailed > 0
+        ? `${redBg} FAIL ${reset}`
+        : `${greenBg} PASS ${reset}`;
+    console.log(`${badge} ${file}`);
+    if (suiteFailed > 0) suitesFailed++;
+}
+
+export function summarize(): void {
+    const elapsed = ((Date.now() - startTime) / 1000).toFixed(3);
+    const total = totalPassed + totalFailed + totalSkipped;
+
+    console.log('');
+
+    // Suites line
+    const suitesPassedCount = suitesRun - suitesFailed;
+    if (suitesFailed > 0) {
+        console.log(`${bold}Test Suites:${reset} ${red}${suitesFailed} failed${reset}, ${green}${suitesPassedCount} passed${reset}, ${suitesRun} total`);
+    } else {
+        console.log(`${bold}Test Suites:${reset} ${green}${suitesPassedCount} passed${reset}, ${suitesRun} total`);
+    }
+
+    // Tests line
+    const parts: string[] = [];
+    if (totalFailed > 0) parts.push(`${red}${totalFailed} failed${reset}`);
+    if (totalSkipped > 0) parts.push(`${yellow}${totalSkipped} skipped${reset}`);
+    parts.push(`${green}${totalPassed} passed${reset}`);
+    console.log(`${bold}Tests:${reset}       ${parts.join(', ')}, ${total} total`);
+
+    // Time
+    console.log(`${bold}Time:${reset}        ${elapsed} s`);
+
+    if (totalFailed > 0) {
+        console.log(`\n${red}Failures:${reset}`);
+        for (const f of failures) console.log(`  ${red}\u2716${reset} ${f}`);
+        process.exit(1);
+    }
+}
diff --git a/test/loader.js b/test/loader.js
@@ -0,0 +1,26 @@
+import { readFileSync } from 'fs';
+import { fileURLToPath } from 'url';
+import { dirname, resolve as pathResolve } from 'path';
+
+export async function resolve(specifier, context, nextResolve) {
+    // Remap relative .js imports to sibling .ts files when the .js file is absent.
+    if (specifier.endsWith('.js') && (specifier.startsWith('./') || specifier.startsWith('../'))) {
+        const parentPath = context.parentURL ? fileURLToPath(context.parentURL) : process.cwd();
+        const parentDir = context.parentURL ? dirname(parentPath) : parentPath;
+        const resolved = pathResolve(parentDir, specifier);
+
+        // If .js doesn't exist, try .ts
+        try {
+            readFileSync(resolved);
+        } catch {
+            const tsPath = resolved.replace(/\.js$/, '.ts');
+            try {
+                readFileSync(tsPath);
+                return nextResolve(specifier.replace(/\.js$/, '.ts'), context);
+            } catch {
+                // Fall through to default resolution
+            }
+        }
+    }
+    return nextResolve(specifier, context);
+}
diff --git a/test/module.test.ts b/test/module.test.ts
@@ -0,0 +1,131 @@
+import {
+    Tensor,
+    Module, Parameter,
+    Linear, ReLU,
+    Adam, SGD, GradScaler,
+    mseLoss,
+} from '../dist/index.js';
+import { assert, assertClose, section } from './helpers.js';
+
+// ============================================================
+// Module system
+// ============================================================
+
+section('Module system');
+
+class TestNet extends Module {
+    l1: any;
+    l2: any;
+    relu: any;
+    constructor() {
+        super();
+        this.l1 = new Linear(3, 4);
+        this.l2 = new Linear(4, 2);
+        this.relu = new ReLU();
+    }
+    forward(x: any) {
+        return this.l2.forward(this.relu.forward(this.l1.forward(x)));
+    }
+}
+
+const net = new TestNet();
+
+const params = net.parameters();
+assert(params.length === 4, 'TestNet has 4 parameters (2 weights + 2 biases)');
+
+const named = net.namedParameters();
+assert(named.length === 4, 'namedParameters count');
+const names = named.map(([n]: [string, any]) => n);
+assert(names.some((n: string) => n.includes('l1')), 'namedParameters includes l1');
+assert(names.some((n: string) => n.includes('l2')), 'namedParameters includes l2');
+
+const kids = net.children();
+assert(kids.length === 3, 'TestNet has 3 children (l1, l2, relu)');
+
+const allMods = net.modules();
+assert(allMods.length >= 4, 'modules() includes self + children');
+
+net.eval();
+assert(net.training === false, 'eval sets training=false');
+net.train();
+assert(net.training === true, 'train sets training=true');
+
+const netInput = Tensor.rand([2, 3]);
+const netOut = net.forward(netInput);
+assert(netOut.shape[0] === 2 && netOut.shape[1] === 2, 'TestNet output shape');
+
+// ============================================================
+// Optimizers
+// ============================================================
+
+section('Optimizers');
+
+// SGD
+const sgdParam = Tensor.fromFloat32(new Float32Array([5, 5, 5, 5]), [2, 2]).setRequiresGrad(true);
+const sgdParamObj = new Parameter(sgdParam);
+const sgdTarget = Tensor.zeros([2, 2]);
+const sgdLoss = sgdParamObj.value.sub(sgdTarget).pow(2).mean();
+sgdLoss.backward();
+const sgd = new SGD([sgdParamObj], 0.1);
+const sgdBefore = sgdParamObj.value.toFloat32()[0];
+sgd.step();
+const sgdAfter = sgdParamObj.value.toFloat32()[0];
+assert(sgdAfter < sgdBefore, 'SGD step reduces parameter toward target');
+sgd.zeroGrad();
+
+// Adam
+const adamParam = Tensor.fromFloat32(new Float32Array([5, 5, 5, 5]), [2, 2]).setRequiresGrad(true);
+const adamParamObj = new Parameter(adamParam);
+const adamTarget = Tensor.zeros([2, 2]);
+const adamLoss = adamParamObj.value.sub(adamTarget).pow(2).mean();
+adamLoss.backward();
+const adam = new Adam([adamParamObj], { lr: 0.01 });
+adam.step();
+const adamAfter = adamParamObj.value.toFloat32()[0];
+assert(adamAfter !== 5, 'Adam step changes parameter');
+adam.zeroGrad();
+
+// Adam returns grad norm
+const adamParam2 = Tensor.fromFloat32(new Float32Array([3, 3]), [2]).setRequiresGrad(true);
+const adamParamObj2 = new Parameter(adamParam2);
+const adamLoss2 = adamParamObj2.value.pow(2).sum();
+adamLoss2.backward();
+const adam2 = new Adam([adamParamObj2], { lr: 0.01 });
+const gradNorm = adam2.step();
+assert(typeof gradNorm === 'number', 'Adam.step() returns grad norm');
+
+// GradScaler
+const scaler = new GradScaler({ initScale: 1024 });
+assert(scaler.getScale() === 1024, 'GradScaler initial scale');
+
+const gsLossInput = Tensor.fromFloat32(new Float32Array([2, 3]), [2]);
+const scaledLoss = scaler.scaleLoss(gsLossInput);
+const scaledData = scaledLoss.toFloat32();
+assertClose(scaledData[0], 2 * 1024, 1e-1, 'scaleLoss scales by initScale');
+assertClose(scaledData[1], 3 * 1024, 1e-1, 'scaleLoss scales second element');
+
+// ============================================================
+// End-to-end training loop
+// ============================================================
+
+section('End-to-end training');
+
+const trainX = Tensor.fromFloat32(new Float32Array([0, 1, 2, 3, 4, 5]), [6, 1]);
+const trainY = Tensor.fromFloat32(new Float32Array([1, 3, 5, 7, 9, 11]), [6, 1]);
+const regNet = new Linear(1, 1);
+const regOptim = new Adam(regNet.parameters(), { lr: 0.05 });
+
+let earlyLoss: number | null = null;
+for (let i = 0; i < 200; i++) {
+    regOptim.zeroGrad();
+    const pred = regNet.forward(trainX);
+    const loss = mseLoss(pred, trainY);
+    if (i === 10) earlyLoss = loss.toFloat32()[0];
+    loss.backward();
+    regOptim.step();
+}
+const finalPred = regNet.forward(trainX);
+const finalLoss = mseLoss(finalPred, trainY).toFloat32()[0];
+assert(finalLoss < earlyLoss!, 'training reduces loss');
+assert(finalLoss < 1.0, 'training converges to low loss');
+
diff --git a/test/native.test.ts b/test/native.test.ts
@@ -0,0 +1,47 @@
+import {
+    Tensor, native,
+    flashAttention, residualLayerNorm, biasGelu,
+} from '../dist/index.js';
+import { assert, skip, section } from './helpers.js';
+
+// ============================================================
+// FlashAttention / ResidualLayerNorm / BiasGelu (GPU/CUDA only)
+// ============================================================
+
+section('FlashAttention / ResidualLayerNorm / BiasGelu');
+
+if (typeof native.flashAttention === 'function') {
+    const nHeads = 2, seqLen = 4, headDim = 8;
+    const qAtt = Tensor.rand([1, nHeads, seqLen, headDim]);
+    const kAtt = Tensor.rand([1, nHeads, seqLen, headDim]);
+    const vAtt = Tensor.rand([1, nHeads, seqLen, headDim]);
+    const scale = 1.0 / Math.sqrt(headDim);
+    const attOut = flashAttention(qAtt, kAtt, vAtt, scale, true);
+    assert(attOut.shape[0] === 1, 'attention batch');
+    assert(attOut.shape[1] === nHeads, 'attention heads');
+    assert(attOut.shape[2] === seqLen, 'attention seq_len');
+    assert(attOut.shape[3] === headDim, 'attention head_dim');
+} else {
+    skip('flashAttention not available in CPU build');
+}
+
+if (typeof native.residualLayernorm === 'function') {
+    const rlnX = Tensor.rand([2, 4]);
+    const rlnResidual = Tensor.rand([2, 4]);
+    const rlnGamma = Tensor.ones([4]).setRequiresGrad(true);
+    const rlnBeta = Tensor.zeros([4]).setRequiresGrad(true);
+    const rlnOut = residualLayerNorm(rlnX, rlnResidual, rlnGamma, rlnBeta);
+    assert(rlnOut.shape[0] === 2 && rlnOut.shape[1] === 4, 'residualLayerNorm shape');
+} else {
+    skip('residualLayerNorm not available in CPU build');
+}
+
+if (typeof native.biasGelu === 'function') {
+    const bgX = Tensor.rand([2, 4]);
+    const bgBias = Tensor.rand([4]);
+    const bgOut = biasGelu(bgX, bgBias);
+    assert(bgOut.shape[0] === 2 && bgOut.shape[1] === 4, 'biasGelu shape');
+} else {
+    skip('biasGelu not available in CPU build');
+}
+
diff --git a/test/nn.test.ts b/test/nn.test.ts
diff --git a/test/ops.test.js b/test/ops.test.js
diff --git a/test/run.ts b/test/run.ts
diff --git a/test/tensor.test.ts b/test/tensor.test.ts