From f9492c736ab28b86260c62259f6b222d0ed2daba Mon Sep 17 00:00:00 2001 From: Thiago Alves Date: Mon, 1 Jun 2026 07:47:31 -0400 Subject: [PATCH 1/4] fix(codegen): lower IEC numeric literals in VAR initializers (#133) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hex/based literals in a VAR initial value were copied verbatim into the C++ constructor initializer list (`X(16#FF)`), producing invalid C++ ("stray '#' in program"). The expression-statement path lowers the same literals correctly; the declaration-initializer path did not. Root cause: PROGRAM/GLOBAL VAR initialisers reach codegen as raw IEC strings (project-model.expressionToString → rawValue), and getDefaultValue never lowered numeric strings. expressionToString also dropped unary expressions entirely, so a signed literal like `-5` silently became the type default (0). Checked every literal form in the initializer position and fixed them together: - based integers/bitstrings: 16#FF→0xFF, 8#17→017, 2#1010→0b1010 - IEC underscore separators stripped: 16#FF_FF→0xFFFF, 1_000→1000 - typed-literal prefixes stripped: INT#5→5, INT#16#10→0x10, BYTE#16#AB→0xAB, REAL#1.5→1.5 - signed literals preserved: -5, +3 (previously lost to 0) - decimals/reals/bool/string unchanged; non-numeric initialisers (enum names, named constants) pass through untouched New getDefaultValue helper lowerNumericInitializer reuses the existing iecBaseToCppLiteral so declaration initialisers and statement bodies lower identically. FB initialisers already routed through the expression path and were unaffected; struct fields emit valid decimal. Adds tests/backend/codegen-var-initializers.test.ts covering all forms, including the VAR_GLOBAL path. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/backend/codegen.ts | 51 ++++++ src/project-model.ts | 10 ++ .../backend/codegen-var-initializers.test.ts | 148 ++++++++++++++++++ 3 files changed, 209 insertions(+) create mode 100644 tests/backend/codegen-var-initializers.test.ts diff --git a/src/backend/codegen.ts b/src/backend/codegen.ts index 6a1320a..343f44b 100644 --- a/src/backend/codegen.ts +++ b/src/backend/codegen.ts @@ -5058,6 +5058,18 @@ export class CodeGenerator { const escaped = this.translateIECString(inner); return `u"${escaped}"`; } + // Lower IEC numeric literals (based 16#FF/8#17/2#1010, decimals + // with underscore separators, typed prefixes like INT#5, optional + // sign). PROGRAM/GLOBAL VAR initialisers arrive here as raw IEC + // strings; without this they're emitted verbatim (`X(16#FF)`, + // `X(1_000)`, `X(INT#5)`) and the C++ build fails. Mirrors the + // expression-statement path (formatIntegerLiteral). Returns null + // for non-numeric initialisers (enum names, constants), which then + // pass through unchanged. + const numeric = this.lowerNumericInitializer(initialValue); + if (numeric !== null) { + return numeric; + } return initialValue; } @@ -5101,6 +5113,45 @@ export class CodeGenerator { return ""; } + /** + * Lower an IEC numeric literal initializer string to a C++ literal. + * + * Handles based literals (16#FF, 8#17, 2#1010), decimals/reals with + * IEC underscore separators (1_000, 16#FF_FF), an optional leading + * sign (-5, +3), and an optional IEC type prefix (INT#5, BYTE#16#AB, + * REAL#1.5). Reuses {@link iecBaseToCppLiteral}, the same helper the + * expression path uses, so declaration initialisers and statement + * bodies lower identically. + * + * Returns `null` when `raw` is not a recognised numeric literal, so + * non-numeric initialisers (enum names, named constants) pass through + * unchanged at the call site. + */ + private lowerNumericInitializer(raw: string): string | null { + let s = raw.trim(); + let sign = ""; + if (s.startsWith("-") || s.startsWith("+")) { + sign = s[0]!; + s = s.slice(1).trimStart(); + } + // Strip an optional IEC type prefix (TYPE#...). The leading + // identifier must start with a letter/underscore, which excludes + // radix markers like `16#` whose left side is numeric. + const typePrefix = /^[A-Za-z_][A-Za-z0-9_]*#(.+)$/.exec(s); + if (typePrefix) { + s = typePrefix[1]!; + } + const isNumeric = + /^16#[0-9A-Fa-f][0-9A-Fa-f_]*$/.test(s) || + /^8#[0-7][0-7_]*$/.test(s) || + /^2#[01][01_]*$/.test(s) || + /^[0-9][0-9_]*(\.[0-9][0-9_]*)?([eE][+-]?[0-9]+)?$/.test(s); + if (!isNumeric) { + return null; + } + return sign + iecBaseToCppLiteral(s); + } + /** * Collect all program instances from a configuration. */ diff --git a/src/project-model.ts b/src/project-model.ts index bf90d69..7bbe1c1 100644 --- a/src/project-model.ts +++ b/src/project-model.ts @@ -831,6 +831,16 @@ export class ProjectModelBuilder { const lit = expr; return lit.rawValue; } + if ( + expr.kind === "UnaryExpression" && + (expr.operator === "-" || expr.operator === "+") + ) { + // Preserve the sign on numeric literal initialisers (e.g. -5). + // Without this the operand is dropped and the initialiser silently + // falls back to the type's default (0). Codegen lowers the result. + const inner = this.expressionToString(expr.operand); + return inner === "" ? "" : `${expr.operator}${inner}`; + } if (expr.kind === "VariableExpression") { if (expr.fieldAccess.length > 0) { return `${expr.name}.${expr.fieldAccess.join(".")}`; diff --git a/tests/backend/codegen-var-initializers.test.ts b/tests/backend/codegen-var-initializers.test.ts new file mode 100644 index 0000000..1ddad9a --- /dev/null +++ b/tests/backend/codegen-var-initializers.test.ts @@ -0,0 +1,148 @@ +/** + * Regression tests for issue #133 — numeric literal lowering in VAR + * initializers. + * + * IEC numeric literals used as VAR initial values reach the program / + * global codegen path as raw IEC strings (project-model stringifies the + * initializer expression). They must be lowered to valid C++ the same + * way the expression-statement path lowers them — otherwise the + * constructor initializer list emits e.g. `X(16#FF)` / `X(INT#5)` / + * `X(1_000)` verbatim and the generated C++ fails to compile + * (`stray '#' in program`, bad digit separators), or a signed literal + * like `-5` is silently dropped to the type default. + */ + +import { describe, it, expect } from "vitest"; +import { compile } from "../../dist/index.js"; + +function compileST(source: string): { + cppCode: string; + headerCode: string; + success: boolean; + errors: unknown[]; +} { + const result = compile(source); + return { + cppCode: result.cppCode, + headerCode: result.headerCode, + success: result.success, + errors: result.errors, + }; +} + +/** Extract the constructor initializer-list line for a program. */ +function initList(cpp: string): string { + const line = cpp.split("\n").find((l) => l.trimStart().startsWith(": ")); + return line ?? ""; +} + +describe("issue #133: VAR initializer literal lowering", () => { + it("lowers based integer/bitstring literals (16#, 8#, 2#)", () => { + const { cppCode, success } = compileST(` + PROGRAM P + VAR + h : UDINT := 16#FF; + o : UDINT := 8#17; + b : UDINT := 2#1010; + END_VAR + h := h; + END_PROGRAM + `); + expect(success).toBe(true); + const inits = initList(cppCode); + expect(inits).toContain("H(0xFF)"); + expect(inits).toContain("O(017)"); + expect(inits).toContain("B(0b1010)"); + // No raw IEC based-literal marker must survive in the init list. + expect(inits).not.toContain("#"); + }); + + it("strips IEC underscore separators from initializers", () => { + const { cppCode, success } = compileST(` + PROGRAM P + VAR + x : UDINT := 16#FF_FF; + y : UDINT := 1_000; + END_VAR + x := x; + END_PROGRAM + `); + expect(success).toBe(true); + const inits = initList(cppCode); + expect(inits).toContain("X(0xFFFF)"); + expect(inits).toContain("Y(1000)"); + // The IEC underscore separators must be gone from the init list. + expect(inits).not.toContain("_"); + }); + + it("strips IEC typed-literal prefixes (INT#, BYTE#, REAL#)", () => { + const { cppCode, success } = compileST(` + PROGRAM P + VAR + a : INT := INT#5; + b : INT := INT#16#10; + c : BYTE := BYTE#16#AB; + d : REAL := REAL#1.5; + END_VAR + a := a; + END_PROGRAM + `); + expect(success).toBe(true); + const inits = initList(cppCode); + expect(inits).toContain("A(5)"); + expect(inits).toContain("B(0x10)"); + expect(inits).toContain("C(0xAB)"); + expect(inits).toContain("D(1.5)"); + expect(inits).not.toContain("#"); + }); + + it("preserves the sign on negative/positive literal initializers", () => { + const { cppCode, success } = compileST(` + PROGRAM P + VAR + n : INT := -5; + p : INT := +3; + END_VAR + n := n; + END_PROGRAM + `); + expect(success).toBe(true); + const inits = initList(cppCode); + expect(inits).toContain("N(-5)"); + // A dropped sign would regress to the default 0. + expect(inits).not.toContain("N(0)"); + }); + + it("leaves decimals, reals and non-numeric initializers unchanged", () => { + const { cppCode, success } = compileST(` + PROGRAM P + VAR + d : UDINT := 255; + r : REAL := 1.5; + e : REAL := 1.5E3; + t : BOOL := TRUE; + END_VAR + d := d; + END_PROGRAM + `); + expect(success).toBe(true); + const inits = initList(cppCode); + expect(inits).toContain("D(255)"); + expect(inits).toContain("R(1.5)"); + expect(inits).toContain("E(1.5E3)"); + expect(inits).toContain("T(true)"); + }); + + it("lowers based literals in VAR_GLOBAL initializers too", () => { + const { cppCode, success } = compileST(` + CONFIGURATION Cfg + VAR_GLOBAL + g : UDINT := 16#CAFE; + END_VAR + END_CONFIGURATION + `); + expect(success).toBe(true); + expect(cppCode).toContain("0xCAFE"); + expect(cppCode).not.toContain("16#"); + }); +}); From 9f5a34aca83b3e4ff2bd1a7d543656e7670cd2a2 Mon Sep 17 00:00:00 2001 From: Thiago Alves Date: Mon, 1 Jun 2026 07:52:42 -0400 Subject: [PATCH 2/4] test(codegen): pin statement-body literal lowering (#133) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verified that based literals, IEC underscore separators, typed-literal prefixes, signs (incl. -16#FF), and reals already lower correctly on the statement-body path (assignment RHS, conditions, arithmetic, array indices) — no code change needed there; the #133 bug was confined to the declaration-initializer path. These tests lock that in so the two paths can't drift. (BOOL#TRUE/BOOL#FALSE is non-lexable and non-standard; typed bool uses BOOL#1/BOOL#0.) Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/backend/codegen-literal-body.test.ts | 82 ++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 tests/backend/codegen-literal-body.test.ts diff --git a/tests/backend/codegen-literal-body.test.ts b/tests/backend/codegen-literal-body.test.ts new file mode 100644 index 0000000..4804915 --- /dev/null +++ b/tests/backend/codegen-literal-body.test.ts @@ -0,0 +1,82 @@ +/** + * Companion to codegen-var-initializers.test.ts (issue #133). + * + * The initializer fix lowers numeric literals on the declaration path so + * it matches the statement-body path. These tests pin down the body path + * itself — based literals, IEC underscore separators, typed-literal + * prefixes, signs, and reals must lower to valid C++ wherever a literal + * can appear (assignment RHS, conditions, arithmetic, array indices), + * so the two paths can't drift apart later. + */ + +import { describe, it, expect } from "vitest"; +import { compile } from "../../dist/index.js"; + +/** Compile a program body and return just the `run()` method text. */ +function runBody(statements: string, vars: string): string { + const result = compile(` + PROGRAM P + VAR ${vars} END_VAR + ${statements} + END_PROGRAM + `); + expect(result.success).toBe(true); + const lines = result.cppCode.split("\n"); + const start = lines.findIndex((l) => l.includes("void Program_P::run")); + expect(start).toBeGreaterThanOrEqual(0); + const end = lines.findIndex((l, idx) => idx > start && l.trimEnd() === "}"); + return lines.slice(start, end + 1).join("\n"); +} + +describe("issue #133: statement-body literal lowering", () => { + it("lowers based integer/bitstring literals", () => { + const body = runBody("u := 16#FF; u := 8#17; u := 2#1010;", "u : UDINT;"); + expect(body).toContain("U = 0xFF;"); + expect(body).toContain("U = 017;"); + expect(body).toContain("U = 0b1010;"); + }); + + it("strips IEC underscore separators", () => { + const body = runBody("u := 16#FF_FF; u := 1_000;", "u : UDINT;"); + expect(body).toContain("U = 0xFFFF;"); + expect(body).toContain("U = 1000;"); + }); + + it("lowers typed-literal prefixes to static_cast", () => { + const body = runBody( + "i := INT#5; i := INT#16#10; bt := BYTE#16#AB; wd := WORD#16#1234;", + "i : INT; bt : BYTE; wd : WORD;", + ); + expect(body).toContain("static_cast(5)"); + expect(body).toContain("static_cast(0x10)"); + expect(body).toContain("static_cast(0xAB)"); + expect(body).toContain("static_cast(0x1234)"); + }); + + it("preserves signs, including on based literals", () => { + const body = runBody("i := -5; i := +3; li := -16#FF;", "i : INT; li : LINT;"); + expect(body).toContain("I = -5;"); + expect(body).toContain("I = +3;"); + expect(body).toContain("LI = -0xFF;"); + }); + + it("lowers based literals inside conditions, arithmetic and indices", () => { + const body = runBody( + "IF u > 16#10 THEN u := u + 16#01; END_IF; arr[2#11] := 16#AA;", + "u : UDINT; arr : ARRAY[0..15] OF INT;", + ); + expect(body).toContain("U > 0x10"); + expect(body).toContain("U + 0x01"); + expect(body).toContain("ARR[0b11] = 0xAA;"); + }); + + it("emits valid reals (decimal point or exponent)", () => { + const body = runBody( + "r := 1.5; r := 1.5E3; lr := 1.5E-10;", + "r : REAL; lr : LREAL;", + ); + expect(body).toContain("R = 1.5;"); + expect(body).toContain("R = 1500.0;"); + expect(body).toContain("LR = 1.5e-10;"); + }); +}); From 522545d1b0348bbe183fdb18f99ec4e549b6a6f1 Mon Sep 17 00:00:00 2001 From: Thiago Alves Date: Mon, 1 Jun 2026 08:00:22 -0400 Subject: [PATCH 3/4] fix(cli): locate runtime include dir on npm install (#134) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `--build`/`--test` aborted with "Could not locate runtime include directory" on a global/npm install, even though the headers ship in the package. The package places the runtime at /src/runtime/include but the CLI runs from /dist/node, so reaching it needs two levels up (../../src/runtime/include). findRuntimeIncludeDir only climbed one (../src/runtime/include → /dist/src, absent), so auto-discovery landed one directory short. It only worked when run from the project root, where the cwd-relative candidate happened to hit — which is never the case on a global install (cwd is the user's project). Add the two-level package-relative candidate to both the import.meta and __dirname blocks, mirroring the libs locator just below (which already uses ../../libs). Verified end-to-end: `--build` from a foreign cwd now locates the headers and produces a working binary. Strengthen the previously-lenient build-utils test (it mocked cwd to /tmp but only asserted "doesn't throw") into a real #134 regression that requires the package-relative lookup to resolve src/runtime/include, plus a -I cxx-flags fallback case. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/node/build-utils.ts | 8 +++++++ tests/backend/build-utils.test.ts | 35 ++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/node/build-utils.ts b/src/node/build-utils.ts index 21c34f9..741cc40 100644 --- a/src/node/build-utils.ts +++ b/src/node/build-utils.ts @@ -69,6 +69,11 @@ export function findRuntimeIncludeDir(cxxFlags: string): string | null { if (typeof import.meta?.url === "string") { const scriptDir = dirname(new URL(import.meta.url).pathname); candidates.push(resolve(scriptDir, "runtime", "include")); + // npm/built layout: dist/node/ → ../../src/runtime/include. + // (src/node/ dev layout resolves to the same src/runtime/include.) + candidates.push( + resolve(scriptDir, "..", "..", "src", "runtime", "include"), + ); candidates.push(resolve(scriptDir, "..", "src", "runtime", "include")); } } catch { @@ -78,6 +83,9 @@ export function findRuntimeIncludeDir(cxxFlags: string): string | null { // From __dirname (CJS bundle via esbuild) if (typeof __dirname === "string") { candidates.push(resolve(__dirname, "runtime", "include")); + candidates.push( + resolve(__dirname, "..", "..", "src", "runtime", "include"), + ); candidates.push(resolve(__dirname, "..", "src", "runtime", "include")); } diff --git a/tests/backend/build-utils.test.ts b/tests/backend/build-utils.test.ts index 9a6cb2d..3602b42 100644 --- a/tests/backend/build-utils.test.ts +++ b/tests/backend/build-utils.test.ts @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (C) 2025 Autonomy / OpenPLC Project +import { existsSync } from "node:fs"; +import { resolve } from "node:path"; import { describe, it, expect } from "vitest"; import { splitCxxFlags } from "../../dist/cxx-flags.js"; import { @@ -53,15 +55,38 @@ describe("findRuntimeIncludeDir", () => { expect(dir).toContain("include"); }); - it("returns null when not found and no -I flags", () => { - // Override CWD to a temp dir where runtime doesn't exist + it("locates the runtime via the package layout when cwd is elsewhere (#134)", () => { + // Regression for #134: on a global/npm install the cwd is the user's + // project, not the package, so the cwd-relative candidate can't help. + // The package-relative candidates (import.meta / __dirname) must still + // resolve the shipped src/runtime/include — the bundle runs from + // dist/node, so it has to climb two levels (../../src/runtime/include), + // not one. const origCwd = process.cwd; process.cwd = () => "/tmp"; try { - // This may or may not find it via __dirname / import.meta.url - // Just verify it doesn't throw const dir = findRuntimeIncludeDir(""); - expect(typeof dir === "string" || dir === null).toBe(true); + expect(dir).not.toBeNull(); + expect(dir).toContain("runtime"); + expect(dir).toContain("include"); + // And it must actually contain the canonical runtime header. + expect(existsSync(resolve(dir!, "iec_types.hpp"))).toBe(true); + } finally { + process.cwd = origCwd; + } + }); + + it("falls back to a -I path from cxx-flags when auto-discovery misses", () => { + const origCwd = process.cwd; + process.cwd = () => "/tmp"; + try { + const real = findRuntimeIncludeDir(""); + expect(real).not.toBeNull(); + // Point auto-discovery at nothing useful, but supply the real dir + // via -I; the flag fallback should recover it. + const viaFlag = findRuntimeIncludeDir(`-I${real}`); + expect(viaFlag).not.toBeNull(); + expect(existsSync(resolve(viaFlag!, "iec_types.hpp"))).toBe(true); } finally { process.cwd = origCwd; } From b8bc3a8160110ae7e266cf395481ebc89e84f147 Mon Sep 17 00:00:00 2001 From: Thiago Alves Date: Mon, 1 Jun 2026 12:01:15 -0400 Subject: [PATCH 4/4] chore(release): v0.4.22 Co-Authored-By: Claude Opus 4.8 (1M context) --- package-lock.json | 4 ++-- package.json | 2 +- src/version-build.ts | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/package-lock.json b/package-lock.json index 0536ba7..f288874 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "strucpp", - "version": "0.4.21", + "version": "0.4.22", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "strucpp", - "version": "0.4.21", + "version": "0.4.22", "license": "GPL-3.0-or-later", "dependencies": { "chevrotain": "^11.0.0" diff --git a/package.json b/package.json index e5d5531..0d4cd89 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "strucpp", - "version": "0.4.21", + "version": "0.4.22", "description": "IEC 61131-3 Structured Text to C++ Compiler", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/src/version-build.ts b/src/version-build.ts index a1c4b5d..9740b66 100644 --- a/src/version-build.ts +++ b/src/version-build.ts @@ -2,4 +2,4 @@ // Copyright (C) 2025 Autonomy / OpenPLC Project // AUTO-GENERATED by scripts/rebuild-libs.mjs from package.json. Do not edit by hand — // any changes are overwritten on the next build. -export const STRUCPP_VERSION_BUILD = "0.4.21"; +export const STRUCPP_VERSION_BUILD = "0.4.22";