diff --git a/.gitignore b/.gitignore index 80e438e95..e800adc8b 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,5 @@ node_modules/ *.iml book/ +**/src/bindings.rs +*.lit_test_times.txt* diff --git a/Makefile.toml b/Makefile.toml index 6160f9669..9f3ccfa76 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -336,6 +336,7 @@ args = [ "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/parse", "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/wasm-translation", "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/source-location", + "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/debugdump", ] dependencies = ["litcheck"] diff --git a/codegen/masm/src/lib.rs b/codegen/masm/src/lib.rs index 4cba3d36e..dfbed513d 100644 --- a/codegen/masm/src/lib.rs +++ b/codegen/masm/src/lib.rs @@ -46,6 +46,7 @@ pub fn register_dialect_hooks(context: &midenc_hir::Context) { info.register_operation_trait::(); info.register_operation_trait::(); info.register_operation_trait::(); + info.register_operation_trait::(); }); context.register_dialect_hook::(|info, _context| { info.register_operation_trait::(); diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index c1e31d76a..bdaf49022 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1229,6 +1229,93 @@ impl HirLowering for arith::Split { } } +impl HirLowering for builtin::DbgValue { + fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + use miden_core::{DebugVarInfo, DebugVarLocation, Felt}; + use midenc_hir::DIExpressionOp; + + // Get the variable info + let var = self.variable(); + + // Build the DebugVarLocation from DIExpression + let expr = self.expression(); + let value = self.value().as_value_ref(); + + // If the value is not on the stack and there's no expression info, + // skip emitting this debug info (the value has been optimized away) + let has_location_expr = expr.operations.first().is_some_and(|op| { + matches!( + op, + DIExpressionOp::WasmStack(_) + | DIExpressionOp::WasmLocal(_) + | DIExpressionOp::ConstU64(_) + | DIExpressionOp::ConstS64(_) + ) + }); + if !has_location_expr && emitter.stack.find(&value).is_none() { + // Value has been dropped and we have no other location info, skip + return Ok(()); + } + let value_location = if let Some(first_op) = expr.operations.first() { + match first_op { + DIExpressionOp::WasmStack(offset) => DebugVarLocation::Stack(*offset as u8), + DIExpressionOp::WasmLocal(idx) => DebugVarLocation::Local(*idx as u16), + DIExpressionOp::WasmGlobal(_) | DIExpressionOp::Deref => { + // For global or dereference, check the stack position of the value + if let Some(pos) = emitter.stack.find(&value) { + DebugVarLocation::Stack(pos as u8) + } else { + DebugVarLocation::Expression(vec![]) + } + } + DIExpressionOp::ConstU64(val) => DebugVarLocation::Const(Felt::new(*val)), + DIExpressionOp::ConstS64(val) => DebugVarLocation::Const(Felt::new(*val as u64)), + _ => { + // For other operations, try to find the value on the stack + if let Some(pos) = emitter.stack.find(&value) { + DebugVarLocation::Stack(pos as u8) + } else { + DebugVarLocation::Expression(vec![]) + } + } + } + } else { + // No expression, try to find the value on the stack + if let Some(pos) = emitter.stack.find(&value) { + DebugVarLocation::Stack(pos as u8) + } else { + // Value not found, use expression + DebugVarLocation::Expression(vec![]) + } + }; + + let mut debug_var = DebugVarInfo::new(var.name.to_string(), value_location); + + // Set arg_index if this is a parameter + if let Some(arg_index) = var.arg_index { + debug_var.set_arg_index(arg_index + 1); // Convert to 1-based + } + + // Set source location + if let Some(line) = core::num::NonZeroU32::new(var.line) { + use miden_assembly::debuginfo::{ColumnNumber, FileLineCol, LineNumber, Uri}; + let uri = Uri::new(var.file.as_str()); + let file_line_col = FileLineCol::new( + uri, + LineNumber::new(line.get()).unwrap_or_default(), + var.column.and_then(ColumnNumber::new).unwrap_or_default(), + ); + debug_var.set_location(file_line_col); + } + + // Emit the instruction + let inst = masm::Instruction::DebugVar(debug_var); + emitter.emit_op(masm::Op::Inst(Span::new(self.span(), inst))); + + Ok(()) + } +} + impl HirLowering for builtin::GlobalSymbol { fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { let context = self.as_operation().context(); diff --git a/docs/DebugInfoFormat.md b/docs/DebugInfoFormat.md new file mode 100644 index 000000000..ae646f71a --- /dev/null +++ b/docs/DebugInfoFormat.md @@ -0,0 +1,360 @@ +# Debug Info Format Specification + +This document describes the `.debug_info` custom section format used in MASP (Miden Assembly Package) files. This section contains source-level debug information that enables debuggers to map between Miden VM execution state and the original source code. + +## Overview + +The debug info section is stored as a custom section in the MASP package with the section ID `debug_info`. It is designed to be: + +- **Compact**: Uses index-based references and string deduplication +- **Self-contained**: All information needed for debugging is in this section +- **Extensible**: Version field allows for future format evolution + +## Section Structure + +The `.debug_info` section contains the following logical subsections: + +``` +┌─────────────────────────────────────────┐ +│ Debug Info Header │ +│ - version (u8) │ +├─────────────────────────────────────────┤ +│ .debug_str │ +│ - String table (deduplicated) │ +├─────────────────────────────────────────┤ +│ .debug_types │ +│ - Type definitions │ +├─────────────────────────────────────────┤ +│ .debug_files │ +│ - Source file information │ +├─────────────────────────────────────────┤ +│ .debug_functions │ +│ - Function metadata │ +│ - Variables (nested) │ +│ - Inlined calls (nested) │ +└─────────────────────────────────────────┘ +``` + +## Format Version + +Current version: **1** + +The version byte is the first field in the section and indicates the format version. Readers should reject sections with unsupported versions. + +--- + +## .debug_str - String Table + +The string table contains all strings used in the debug info, deduplicated to save space. Other sections reference strings by their index into this table. + +### Contents + +- File paths +- Function names +- Variable names +- Type names +- Linkage/mangled names + +### Example Output + +``` +.debug_str contents: + [ 0] "/Users/user/project/src/lib.rs" + [ 1] "my_function" + [ 2] "x" + [ 3] "result" +``` + +--- + +## .debug_types - Type Information + +The type table contains definitions for all types referenced by variables and functions. Types can reference other types by index, allowing for complex type hierarchies. + +### Type Kinds + +| Tag | Kind | Description | +|-----|------|-------------| +| 0 | Primitive | Built-in scalar types | +| 1 | Pointer | Pointer to another type | +| 2 | Array | Fixed or dynamic array | +| 3 | Struct | Composite type with fields | +| 4 | Function | Function signature | +| 5 | Unknown | Opaque/unknown type | + +### Primitive Types + +| Value | Type | Size (bytes) | Size (felts) | +|-------|------|--------------|--------------| +| 0 | void | 0 | 0 | +| 1 | bool | 1 | 1 | +| 2 | i8 | 1 | 1 | +| 3 | u8 | 1 | 1 | +| 4 | i16 | 2 | 1 | +| 5 | u16 | 2 | 1 | +| 6 | i32 | 4 | 1 | +| 7 | u32 | 4 | 1 | +| 8 | i64 | 8 | 2 | +| 9 | u64 | 8 | 2 | +| 10 | i128 | 16 | 4 | +| 11 | u128 | 16 | 4 | +| 12 | f32 | 4 | 2 | +| 13 | f64 | 8 | 2 | +| 14 | felt | 8 | 1 | +| 15 | word | 32 | 4 | + +### Example Output + +``` +.debug_types contents: + [ 0] PRIMITIVE: i32 (size: 4 bytes, 1 felts) + [ 1] PRIMITIVE: felt (size: 8 bytes, 1 felts) + [ 2] POINTER -> i32 + [ 3] ARRAY [felt; 4] + [ 4] STRUCT Point (size: 16 bytes) + + 0: x : felt + + 8: y : felt +``` + +--- + +## .debug_files - Source File Information + +The file table contains information about source files referenced by functions and variables. + +### Fields + +| Field | Type | Description | +|-------|------|-------------| +| path_idx | u32 | Index into string table for file path | +| directory_idx | Option\ | Optional index for directory path | +| checksum | Option\<[u8; 32]\> | Optional SHA-256 checksum for verification | + +### Example Output + +``` +.debug_files contents: + [ 0] /Users/user/project/src/lib.rs + [ 1] /rustc/abc123.../library/core/src/panicking.rs + [ 2] unknown +``` + +--- + +## .debug_functions - Function Information + +The function table contains debug metadata for each function in the compiled program. + +### Fields + +| Field | Type | Description | +|-------|------|-------------| +| name_idx | u32 | Index into string table for function name | +| linkage_name_idx | Option\ | Optional mangled/linkage name | +| file_idx | u32 | Index into file table | +| line | u32 | Line number where function is defined | +| column | u32 | Column number | +| type_idx | Option\ | Optional function type (index into type table) | +| mast_root | Option\<[u8; 32]\> | MAST root digest linking to compiled code | +| variables | Vec | Local variables and parameters | +| inlined_calls | Vec | Inlined function call sites | + +### Variables + +Each function contains a list of variables (parameters and locals): + +| Field | Type | Description | +|-------|------|-------------| +| name_idx | u32 | Index into string table | +| type_idx | u32 | Index into type table | +| arg_index | u32 | 1-based parameter index (0 = local variable) | +| line | u32 | Declaration line | +| column | u32 | Declaration column | +| scope_depth | u32 | Lexical scope depth (0 = function scope) | + +### Inlined Calls + +For tracking inlined function calls: + +| Field | Type | Description | +|-------|------|-------------| +| callee_idx | u32 | Index into function table for inlined function | +| file_idx | u32 | Call site file | +| line | u32 | Call site line | +| column | u32 | Call site column | + +### Example Output + +``` +.debug_functions contents: + [ 0] FUNCTION: my_function + Location: /Users/user/project/src/lib.rs:10:1 + MAST root: 0xabcd1234... + Variables (3): + - x (param #1): i32 @ 10:14 + - y (param #2): i32 @ 10:22 + - result (local): i32 @ 11:9 [scope depth: 1] + Inlined calls (1): + - helper_fn inlined at lib.rs:12:5 +``` + +--- + +## Usage + +### Generating Debug Info + +Compile with debug info enabled: + +```bash +midenc input.wasm --exe --debug full -o output.masp +``` + +For projects using `trim-paths`, use the `-Z trim-path-prefix` option to preserve absolute paths: + +```bash +midenc input.wasm --exe --debug full \ + -Z trim-path-prefix="/path/to/project" \ + -o output.masp +``` + +### Inspecting Debug Info + +Use the `miden-debugdump` tool to inspect debug info in a MASP file: + +```bash +# Full dump (includes all sections) +miden-debugdump output.masp + +# Summary only +miden-debugdump output.masp --summary + +# Specific section from .debug_info +miden-debugdump output.masp --section functions +miden-debugdump output.masp --section variables +miden-debugdump output.masp --section types +miden-debugdump output.masp --section files +miden-debugdump output.masp --section strings + +# Show DebugVar decorators from MAST (.debug_loc) +miden-debugdump output.masp --section locations + +# Verbose mode (shows additional details like raw decorator list) +miden-debugdump output.masp --section locations --verbose + +# Raw indices (for debugging the debug info itself) +miden-debugdump output.masp --raw +``` + +--- + +## Design Rationale + +### Index-Based References + +All cross-references use indices rather than embedding data directly. This: +- Enables string deduplication (file paths, names appear once) +- Reduces section size +- Allows efficient random access + +### Separation of Concerns + +The section is divided into logical subsections: +- **Strings**: Shared across all other sections +- **Types**: Can be referenced by multiple variables/functions +- **Files**: Shared by multiple functions +- **Functions**: Contains variables and inlined calls inline + +### Compatibility with DWARF + +The format is inspired by DWARF but simplified for Miden's needs: +- No complex DIE tree structure +- No location expressions (handled by `DebugVar` decorators in MAST) +- No line number tables (locations embedded in functions/variables) + +--- + +## Debug Variable Locations + +Debug information in MASP is split between two locations: the `.debug_info` custom section (documented above) and `Decorator::DebugVar` entries embedded in the MAST instruction stream. + +### Architecture Overview + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ MASP Package │ +├──────────────────────────────────────────────────────────────────┤ +│ MAST Forest │ +│ ├── MastNode[] │ +│ │ └── Decorator::DebugVar(DebugVarInfo) ← Runtime locations │ +│ │ • name: "x" │ +│ │ • value_location: Stack(0) / Local(2) / Memory(...) │ +│ │ • source location │ +│ └── String table (for names) │ +├──────────────────────────────────────────────────────────────────┤ +│ .debug_info Section (separate custom section) │ +│ ├── .debug_str (deduplicated strings) │ +│ ├── .debug_types (type definitions) │ +│ ├── .debug_files (source file paths) │ +│ └── .debug_functions (static metadata, variables, inlined) │ +└──────────────────────────────────────────────────────────────────┘ +``` + +### Why Two Locations? + +| Aspect | `Decorator::DebugVar` in MAST | `.debug_info` Section | +|--------|-------------------------------|----------------------| +| **Where stored** | Embedded in instruction stream | Custom section at end of MASP | +| **Purpose** | Runtime value location at specific execution points | Static metadata (types, files, function info) | +| **When used** | During execution, debugger reads variable values | To display type names, source files, etc. | +| **DWARF analog** | Location lists (`.debug_loc`) | `.debug_info` / `.debug_abbrev` | + +The `.debug_info` section tells you **what** variables exist (name, type, scope). The `DebugVar` decorators tell you **where** a variable's value is at a specific point during execution. + +### DebugVarInfo Structure + +Each `Decorator::DebugVar` contains a `DebugVarInfo` with the following fields: + +| Field | Type | Description | +|-------|------|-------------| +| name | String | Variable name | +| value_location | DebugVarLocation | Where to find the value | +| type_id | Option\ | Index into `.debug_types` | +| arg_index | Option\ | 1-based parameter index (if parameter) | +| location | Option\ | Source location of declaration | + +### DebugVarLocation Variants + +The `value_location` field describes where the variable's value can be found at runtime: + +| Variant | Encoding | Description | +|---------|----------|-------------| +| `Stack(u8)` | Tag 0 + u8 | Value is at stack position N (0 = top) | +| `Memory(u32)` | Tag 1 + u32 | Value is at memory word address | +| `Const(u64)` | Tag 2 + u64 | Value is a constant field element | +| `Local(u16)` | Tag 3 + u16 | Value is in local variable slot N | +| `Expression(Vec)` | Tag 4 + len + bytes | Complex location (DWARF-style expression) | + +### Example + +For a function like: +```rust +fn add(x: i32, y: i32) -> i32 { + let sum = x + y; + sum +} +``` + +The MAST will contain decorators like: +``` +# At function entry +Decorator::DebugVar { name: "x", value_location: Local(0), arg_index: Some(1), ... } +Decorator::DebugVar { name: "y", value_location: Local(1), arg_index: Some(2), ... } + +# After computing sum +Decorator::DebugVar { name: "sum", value_location: Stack(0), arg_index: None, ... } +``` + +A debugger pausing at a specific instruction can read these decorators to know where each variable's value is stored at that moment. + +--- diff --git a/docs/DebugInfoMetadata.md b/docs/DebugInfoMetadata.md new file mode 100644 index 000000000..bdf71aa21 --- /dev/null +++ b/docs/DebugInfoMetadata.md @@ -0,0 +1,216 @@ +# Debug Info Metadata Pipeline + +This note describes how the Miden compiler now threads source-level variable +metadata through HIR when compiling Wasm input. The goal is to make every HIR +function carry `DI*` attributes and `dbg.*` intrinsics that mirror the DWARF +records present in the Wasm binary, so downstream passes (or tooling consuming +serialized HIR) can reason about user variables. + +## High-Level Flow + +1. **DWARF ingestion** – while `ModuleEnvironment` parses the module, we retain + the full set of DWARF sections (`.debug_info`, `.debug_line`, etc.) and the + wasm name section. +2. **Metadata extraction** – before we translate functions, we walk the DWARF + using `addr2line` to determine source files and fall back to the wasm module + path when no debug info is present. We also load parameter/local names from + the name section. The result is a `FunctionDebugInfo` record containing a + `DICompileUnitAttr`, `DISubprogramAttr`, and a per-index list of + `DILocalVariableAttr`s. +3. **Translation-time tracking** – every `FuncTranslator` receives the + `FunctionDebugInfo` for the function it is translating. `FunctionBuilderExt` + attaches the compile-unit/subprogram attrs to the function op, records entry + parameters, and emits `builtin.dbg_value` intrinsics whenever locals change. +4. **Span-aware updates** – as each wasm operator is translated we store the + real `SourceSpan`. The first non-unknown span is used to retroactively patch + the compile unit, subprogram, and parameter variable records with real file, + line, and column information so the resulting HIR references surfaces from + the actual user file. + +The emitted HIR therefore contains both the SSA instructions and the debug +intrinsics that map values back to the user program. + +## HIR Metadata Constructs + +The core types live in `hir/src/attributes/debug.rs`: + +- `DICompileUnitAttr` – captures language, primary file, optional directory, + producer string, and optimized flag. Stored once per function/module. +- `DISubprogramAttr` – names the function, file, line/column, optional linkage + name, and flags indicating definition/local status. Does not embed the compile + unit to avoid redundancy - stored once per function. +- `DILocalVariableAttr` – describes parameters or locals, including the source + location, optional argument index, and optional `Type`. Does not embed the + scope to avoid redundancy - the scope is implied by the containing function. +- `DIExpressionAttr` – represents DWARF location expressions that describe how + to compute or locate a variable's value. +- `DIExpressionOp` – individual operations within a DIExpression, including: + - `WasmLocal(u32)` - Variable is in a WebAssembly local + - `WasmGlobal(u32)` - Variable is in a WebAssembly global + - `WasmStack(u32)` - Variable is on the WebAssembly operand stack + - `ConstU64(u64)` - Unsigned constant value + - Additional DWARF operations for complex expressions + +These attrs are exported from `midenc_hir` so clients can construct them +programmatically. The debug intrinsic (`builtin.dbg_value` from +`hir/src/dialects/builtin/ops/debug.rs`) consume a `Value` plus the +metadata attributes. The `dbg_value` operation includes a `DIExpressionAttr` +field that describes the location or computation of the variable's value. + +## Collecting Metadata from Wasm + +`frontend/wasm/src/module/debug_info.rs` is the central collector. The key +steps are: + +1. Iterate over the bodies scheduled for translation (`ParsedModule::function_body_inputs`). +2. For each body, determine the source file and first line using `addr2line` and + store fallbacks (module path or `unknown`) when debug info is missing. +3. Construct `DICompileUnitAttr`/`DISubprogramAttr` and a `Vec>` + that covers both signature parameters and wasm locals. Parameter/local names + sourced from the name section are used when available; otherwise we emit + synthesized names (`arg{n}`, `local{n}`). +4. Store the result in a map `FxHashMap>>` + attached to `ParsedModule`. We use `RefCell` so later stages can patch the + attrs once the translator sees more accurate spans. + +## Using Metadata During Translation + +The translation machinery picks up those records as follows: + +- `build_ir.rs` moves the precomputed map onto the `FuncTranslator` invocation. +- `FuncTranslator::translate_body` installs the debug info on its + `FunctionBuilderExt` before any instructions are emitted. +- `FunctionBuilderExt::set_debug_metadata` attaches compile-unit/subprogram + attrs to the function op and resets its internal bookkeeping. +- Entry parameters are stored via `register_parameter` so we can emit + `dbg.value` instructions after we encounter the first real span (parameters + have no dedicated wasm operator with source ranges). +- Every wasm operator calls `builder.record_debug_span(span)` prior to emission; + the first non-unknown span updates the compile unit/subprogram attrs and + triggers parameter `dbg.value` emission so arguments are tied to the correct + location. +- `def_var_with_dbg` is the canonical entry point for `local.set` and + `local.tee`. It updates the SSA value and immediately emits a + `builtin.dbg_value` with the precise span of the store. +- Decoded `DW_AT_location` ranges are normalized into a per-function schedule. + As the translator visits each wasm offset we opportunistically emit extra + `dbg.value` intrinsics so source variables track transitions between Wasm + locals without relying on `builtin.dbg_declare`. +- When present, `DW_AT_decl_line`/`DW_AT_decl_column` on variables override the + default span so we keep the original lexical definition sites instead of + inheriting the statement we first observed during translation. + +Locals declared in the wasm prologue receive an initial value but no debug +intrinsic until they are defined in user code. Subsequent writes insert +additional `dbg.value` ops so consumers can track value changes over time. + +## Example + +In the serialized HIR for the test pipeline you now see: + +```hir +builtin.dbg_value v0 #[expression = di.expression(DW_OP_WASM_local 0)] + #[variable = di.local_variable( + name = arg0, + file = /path/to/lib.rs, + line = 25, + column = 5, + arg = 1, + ty = i32 + )] # /path/to/lib.rs:25:5; +``` + +The `expression` attribute indicates that the variable is stored in WASM local 0. +When a variable moves between locations, additional `dbg_value` operations are +emitted with updated expressions: + +```hir +builtin.dbg_value v22 #[expression = di.expression(DW_OP_WASM_local 3)] + #[variable = di.local_variable(name = sum, ...)] +``` + +Both the attribute and the trailing comment reference the same source location +so downstream tooling can disambiguate the variable regardless of how it parses +HIR. + +## Kinda Fallback Behavior/Best Effort cases + +- If DWARF lookup fails entirely, we still emit attrs but populate + `file = unknown`, `line = 0`, and omit columns. As soon as a real span is + observed, those fields are patched. +- If the wasm name section lacks parameter/local names, we keep the generated + `arg{n}`/`local{n}` placeholders in the HIR. This mirrors LLVM’s behavior when + debug names are unavailable. + +## What we can do next and what are the limitations + +- **Location expressions** – We now decode `DW_AT_location` records for locals + and parameters, interpret simple Wasm location opcodes (including locals, + globals, and operand-stack slots), and attach them to `dbg.value` operations + as `DIExpressionAttr`. The system emits additional `dbg.value` intrinsics + whenever a variable's storage changes, with each operation containing the + appropriate expression. This allows modeling multi-location lifetimes where + variables move between different storage locations. Support for more complex + composite expressions (pieces, arithmetic operations, etc.) is implemented + but not fully utilized from DWARF parsing yet. +- **Lifetimes** – we reset the compile-unit/subprogram metadata to the first + span we encounter, but we do not track scopes or lexical block DIEs. Extending + the collector to read `DW_TAG_lexical_block` and other scope markers would + allow more precise lifetime modelling. +- **Cross-language inputs** – the language string comes from DWARF or defaults + to `"wasm"`. If the Wasm file was produced by Rust/C compilers we could read + `DW_AT_language` to provide richer values. +- **Incremental spans** – parameter debug entries currently use the first + non-unknown span in the function. For multi-file functions we might wish to + attach per-parameter spans using `DW_AT_decl_file`/`DW_AT_decl_line` if the + DWARF provides them. +- **MASM codegen** – The MASM backend emits `Decorator::DebugVar` entries + containing `DebugVarInfo` with variable names, runtime locations + (`DebugVarLocation::Stack`, `Local`, etc.), source positions, and type + information. These decorators are embedded in the MAST instruction stream, + enabling debuggers to track variable values at specific execution points. + +These refinements can be implemented without changing the public HIR surface; we +would only update the metadata collector and the builder helpers. + +## Testing + +The debug info implementation is validated by lit tests in `tests/lit/debug/`: + +- **simple_debug.shtest** – verifies basic debug info for function parameters +- **function_metadata.shtest** – tests debug metadata on multi-parameter functions +- **variable_locations.shtest** – validates debug info tracking for variables in a loop +- more... + +Each test compiles a small Rust snippet with DWARF enabled (`-C debuginfo=2`), +runs it through `midenc compile --emit hir`, and uses `FileCheck` to verify that +`builtin.dbg_value` intrinsics are emitted with the correct `di.local_variable` +attributes containing variable names, file paths, line numbers, and types. + +To run the debug info tests: + +```bash +/opt/homebrew/bin/lit -va tests/lit/debug/ +``` + +Or to run a specific test: + +```bash +/opt/homebrew/bin/lit -va tests/lit/debug/simple_debug.shtest +``` + +## Bottomline + +- HIR now exposes DWARF-like metadata via reusable `DI*` attributes including + `DIExpressionAttr` for location expressions. +- The wasm frontend precomputes function metadata, keeps it mutable during + translation, and emits `dbg.value` intrinsics with location expressions for + every parameter/variable assignment. +- Location expressions (DW_OP_WASM_local, etc.) are preserved from DWARF and + attached to `dbg.value` operations, enabling accurate tracking of variables + as they move between different storage locations. +- The serialized HIR describes user variables with accurate file/line/column + information and storage locations, providing a foundation for future tooling + (debugging, diagnostics correlation, or IR-level analysis). +- The design avoids redundancy by not embedding scope hierarchies in each variable, + instead relying on structural containment to establish relationships. diff --git a/frontend/wasm/src/code_translator/mod.rs b/frontend/wasm/src/code_translator/mod.rs index e83d3243e..58f0f675a 100644 --- a/frontend/wasm/src/code_translator/mod.rs +++ b/frontend/wasm/src/code_translator/mod.rs @@ -56,6 +56,8 @@ pub fn translate_operator( diagnostics: &DiagnosticsHandler, span: SourceSpan, ) -> WasmResult<()> { + builder.record_debug_span(span); + if !state.reachable { translate_unreachable_operator(op, builder, state, mod_types, diagnostics, span)?; return Ok(()); @@ -90,11 +92,11 @@ pub fn translate_operator( } else { val }; - builder.def_var(var, val); + builder.def_var_with_dbg(var, val, span); } Operator::LocalTee { local_index } => { let val = state.peek1(); - builder.def_var(Variable::from_u32(*local_index), val); + builder.def_var_with_dbg(Variable::from_u32(*local_index), val, span); } /********************************** Globals ****************************************/ Operator::GlobalGet { global_index } => { diff --git a/frontend/wasm/src/module/build_ir.rs b/frontend/wasm/src/module/build_ir.rs index 0fb225f69..94e2f3afa 100644 --- a/frontend/wasm/src/module/build_ir.rs +++ b/frontend/wasm/src/module/build_ir.rs @@ -15,7 +15,8 @@ use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic, Severity, use wasmparser::Validator; use super::{ - MemoryIndex, module_translation_state::ModuleTranslationState, types::ModuleTypesBuilder, + MemoryIndex, debug_info::collect_function_debug_info, + module_translation_state::ModuleTranslationState, types::ModuleTypesBuilder, }; use crate::{ WasmTranslationConfig, @@ -117,6 +118,14 @@ pub fn build_ir_module( ..Default::default() }) .into_diagnostic()?; + parsed_module.function_debug = collect_function_debug_info( + parsed_module, + module_types, + &parsed_module.module, + &addr2line, + context.diagnostics(), + ); + let mut func_translator = FuncTranslator::new(context.clone()); // Although this renders this parsed module invalid(without function // bodies), we don't support multiple module instances. Thus, this @@ -189,8 +198,12 @@ pub fn build_ir_module( continue; } - let FunctionBodyData { validator, body } = body_data; + let FunctionBodyData { + validator, body, .. + } = body_data; let mut func_validator = validator.into_validator(Default::default()); + let debug_info = parsed_module.function_debug.get(&func_index).cloned(); + func_translator.translate_body( &body, function_ref, @@ -201,6 +214,7 @@ pub fn build_ir_module( context.session(), &mut func_validator, _config, + debug_info, )?; } Ok(()) diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs new file mode 100644 index 000000000..9f02d5f7a --- /dev/null +++ b/frontend/wasm/src/module/debug_info.rs @@ -0,0 +1,697 @@ +use alloc::{rc::Rc, vec::Vec}; +use core::cell::RefCell; +use std::path::Path; + +use addr2line::Context; +use cranelift_entity::EntityRef; +use gimli::{self, AttributeValue, read::Operation}; +use log::debug; +use midenc_hir::{ + DICompileUnitAttr, DIExpressionAttr, DIExpressionOp, DILocalVariableAttr, DISubprogramAttr, + FxHashMap, SourceSpan, interner::Symbol, +}; +use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic}; + +use super::{ + FuncIndex, Module, + module_env::{DwarfReader, FunctionBodyData, ParsedModule}, + types::{WasmFuncType, convert_valtype, ir_type}, +}; +use crate::module::types::ModuleTypesBuilder; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LocationDescriptor { + /// Inclusive start offset within the function's code, relative to the Wasm code section. + pub start: u64, + /// Exclusive end offset. `None` indicates the location is valid until the end of the function. + pub end: Option, + pub storage: VariableStorage, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum VariableStorage { + Local(u32), + Global(u32), + Stack(u32), + ConstU64(u64), + Unsupported, +} + +impl VariableStorage { + pub fn as_local(&self) -> Option { + match self { + VariableStorage::Local(index) => Some(*index), + _ => None, + } + } + + pub fn to_expression_op(&self) -> DIExpressionOp { + match self { + VariableStorage::Local(idx) => DIExpressionOp::WasmLocal(*idx), + VariableStorage::Global(idx) => DIExpressionOp::WasmGlobal(*idx), + VariableStorage::Stack(idx) => DIExpressionOp::WasmStack(*idx), + VariableStorage::ConstU64(val) => DIExpressionOp::ConstU64(*val), + VariableStorage::Unsupported => { + DIExpressionOp::Unsupported(Symbol::intern("unsupported")) + } + } + } +} + +#[derive(Clone)] +pub struct LocalDebugInfo { + pub attr: DILocalVariableAttr, + pub locations: Vec, + pub expression: Option, +} + +#[derive(Clone)] +pub struct FunctionDebugInfo { + pub compile_unit: DICompileUnitAttr, + pub subprogram: DISubprogramAttr, + pub locals: Vec>, + pub function_span: Option, + pub location_schedule: Vec, + pub next_location_event: usize, +} + +#[derive(Default, Clone)] +struct DwarfLocalData { + name: Option, + locations: Vec, + decl_line: Option, + decl_column: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LocationScheduleEntry { + pub offset: u64, + pub var_index: usize, + pub storage: VariableStorage, +} + +impl FunctionDebugInfo { + pub fn local_attr(&self, index: usize) -> Option<&DILocalVariableAttr> { + self.locals.get(index).and_then(|info| info.as_ref().map(|data| &data.attr)) + } +} + +pub fn collect_function_debug_info( + parsed_module: &ParsedModule, + module_types: &ModuleTypesBuilder, + module: &Module, + addr2line: &Context>, + diagnostics: &DiagnosticsHandler, +) -> FxHashMap>> { + let mut map = FxHashMap::default(); + + let dwarf_locals = collect_dwarf_local_data(parsed_module, module, diagnostics); + + debug!( + "Collecting function debug info for {} functions", + parsed_module.function_body_inputs.len() + ); + + for (defined_idx, body) in parsed_module.function_body_inputs.iter() { + let func_index = module.func_index(defined_idx); + let func_name = module.func_name(func_index); + if let Some(info) = build_function_debug_info( + parsed_module, + module_types, + module, + func_index, + body, + addr2line, + diagnostics, + dwarf_locals.get(&func_index), + ) { + debug!( + "Collected debug info for function {}: {} locals", + func_name.as_str(), + info.locals.len() + ); + map.insert(func_index, Rc::new(RefCell::new(info))); + } else { + debug!("No debug info collected for function {}", func_name.as_str()); + } + } + + debug!("Collected debug info for {} functions total", map.len()); + map +} + +#[allow(clippy::too_many_arguments)] +fn build_function_debug_info( + parsed_module: &ParsedModule, + module_types: &ModuleTypesBuilder, + module: &Module, + func_index: FuncIndex, + body: &FunctionBodyData, + addr2line: &Context>, + diagnostics: &DiagnosticsHandler, + dwarf_locals: Option<&FxHashMap>, +) -> Option { + let func_name = module.func_name(func_index); + + let (file_symbol, directory_symbol) = determine_file_symbols(parsed_module, addr2line, body); + let (line, column) = determine_location(addr2line, body.body_offset); + + let mut compile_unit = DICompileUnitAttr::new(Symbol::intern("wasm"), file_symbol); + compile_unit.directory = directory_symbol; + compile_unit.producer = Some(Symbol::intern("midenc-frontend-wasm")); + + let mut subprogram = DISubprogramAttr::new(func_name, compile_unit.file, line, column); + subprogram.is_definition = true; + + let wasm_signature = module_types[module.functions[func_index].signature].clone(); + let locals = build_local_debug_info( + module, + func_index, + &wasm_signature, + body, + &subprogram, + diagnostics, + dwarf_locals, + ); + let location_schedule = build_location_schedule(&locals); + + Some(FunctionDebugInfo { + compile_unit, + subprogram, + locals, + function_span: None, + location_schedule, + next_location_event: 0, + }) +} + +fn determine_file_symbols( + parsed_module: &ParsedModule, + addr2line: &Context>, + body: &FunctionBodyData, +) -> (Symbol, Option) { + if let Some(location) = addr2line + .find_location(body.body_offset) + .ok() + .flatten() + .and_then(|loc| loc.file.map(|file| file.to_owned())) + { + let path = Path::new(location.as_str()); + let directory_symbol = path.parent().and_then(|parent| parent.to_str()).map(Symbol::intern); + let file_symbol = Symbol::intern(location.as_str()); + (file_symbol, directory_symbol) + } else if let Some(path) = parsed_module.wasm_file.path.as_ref() { + let file_symbol = Symbol::intern(path.to_string_lossy().as_ref()); + let directory_symbol = path.parent().and_then(|parent| parent.to_str()).map(Symbol::intern); + (file_symbol, directory_symbol) + } else { + (Symbol::intern("unknown"), None) + } +} + +fn determine_location(addr2line: &Context>, offset: u64) -> (u32, Option) { + match addr2line.find_location(offset).ok().flatten() { + Some(location) => { + let line = location.line.unwrap_or_default(); + let column = location.column; + (line, column) + } + None => (0, None), + } +} + +fn build_local_debug_info( + module: &Module, + func_index: FuncIndex, + wasm_signature: &WasmFuncType, + body: &FunctionBodyData, + subprogram: &DISubprogramAttr, + diagnostics: &DiagnosticsHandler, + dwarf_locals: Option<&FxHashMap>, +) -> Vec> { + let param_count = wasm_signature.params().len(); + let mut local_entries = Vec::new(); + if let Ok(mut locals_reader) = body.body.get_locals_reader().into_diagnostic() { + let decl_count = locals_reader.get_count(); + for _ in 0..decl_count { + if let Ok((count, ty)) = locals_reader.read().into_diagnostic() { + local_entries.push((count, ty)); + } + } + } + let local_count: usize = local_entries.iter().map(|(count, _)| *count as usize).sum(); + + let total = param_count + local_count; + let mut locals = vec![None; total]; + + for (param_idx, wasm_ty) in wasm_signature.params().iter().enumerate() { + let index_u32 = param_idx as u32; + let dwarf_entry = dwarf_locals.and_then(|map| map.get(&index_u32)); + let mut name_symbol = module + .local_name(func_index, index_u32) + .unwrap_or_else(|| Symbol::intern(format!("arg{param_idx}"))); + if let Some(info) = dwarf_entry + && let Some(symbol) = info.name + { + name_symbol = symbol; + } + let mut attr = DILocalVariableAttr::new( + name_symbol, + subprogram.file, + subprogram.line, + subprogram.column, + ); + attr.arg_index = Some((param_idx + 1) as u32); + if let Ok(ty) = ir_type(*wasm_ty, diagnostics) { + attr.ty = Some(ty); + } + let dwarf_info = dwarf_entry.cloned(); + if let Some(info) = dwarf_info.as_ref() { + if let Some(line) = info.decl_line + && line != 0 + { + attr.line = line; + } + if info.decl_column.is_some() { + attr.column = info.decl_column; + } + } + let locations = dwarf_info.as_ref().map(|info| info.locations.clone()).unwrap_or_default(); + + // Create expression from the first location if available + let expression = if !locations.is_empty() { + let ops = vec![locations[0].storage.to_expression_op()]; + Some(DIExpressionAttr::with_ops(ops)) + } else { + None + }; + + locals[param_idx] = Some(LocalDebugInfo { + attr, + locations, + expression, + }); + } + + let mut next_local_index = param_count; + for (count, ty) in local_entries { + for _ in 0..count { + let index_u32 = next_local_index as u32; + let dwarf_entry = dwarf_locals.and_then(|map| map.get(&index_u32)); + let mut name_symbol = module + .local_name(func_index, index_u32) + .unwrap_or_else(|| Symbol::intern(format!("local{next_local_index}"))); + if let Some(info) = dwarf_entry + && let Some(symbol) = info.name + { + name_symbol = symbol; + } + let mut attr = DILocalVariableAttr::new( + name_symbol, + subprogram.file, + subprogram.line, + subprogram.column, + ); + let wasm_ty = convert_valtype(ty); + if let Ok(ir_ty) = ir_type(wasm_ty, diagnostics) { + attr.ty = Some(ir_ty); + } + let dwarf_info = dwarf_entry.cloned(); + if let Some(info) = dwarf_info.as_ref() { + if let Some(line) = info.decl_line + && line != 0 + { + attr.line = line; + } + if info.decl_column.is_some() { + attr.column = info.decl_column; + } + } + let locations = + dwarf_info.as_ref().map(|info| info.locations.clone()).unwrap_or_default(); + + // Create expression from the first location if available + let expression = if !locations.is_empty() { + let ops = vec![locations[0].storage.to_expression_op()]; + Some(DIExpressionAttr::with_ops(ops)) + } else { + None + }; + + locals[next_local_index] = Some(LocalDebugInfo { + attr, + locations, + expression, + }); + next_local_index += 1; + } + } + + locals +} + +fn build_location_schedule(locals: &[Option]) -> Vec { + let mut schedule = Vec::new(); + for (var_index, info_opt) in locals.iter().enumerate() { + let Some(info) = info_opt else { + continue; + }; + for descriptor in &info.locations { + if descriptor.storage.as_local().is_none() { + continue; + } + schedule.push(LocationScheduleEntry { + offset: descriptor.start, + var_index, + storage: descriptor.storage.clone(), + }); + } + } + schedule.sort_by(|a, b| a.offset.cmp(&b.offset)); + schedule +} + +fn collect_dwarf_local_data( + parsed_module: &ParsedModule, + module: &Module, + diagnostics: &DiagnosticsHandler, +) -> FxHashMap> { + let _ = diagnostics; + let dwarf = &parsed_module.debuginfo.dwarf; + + let mut func_by_name = FxHashMap::default(); + for (func_index, _) in module.functions.iter() { + let name = module.func_name(func_index).as_str().to_owned(); + func_by_name.insert(name, func_index); + } + + let mut low_pc_map = FxHashMap::default(); + let code_section_offset = parsed_module.wasm_file.code_section_offset; + for (defined_idx, body) in parsed_module.function_body_inputs.iter() { + let func_index = module.func_index(defined_idx); + let adjusted = body.body_offset.saturating_sub(code_section_offset); + low_pc_map.insert(adjusted, func_index); + } + + let mut results: FxHashMap> = FxHashMap::default(); + let mut units = dwarf.units(); + loop { + let header = match units.next() { + Ok(Some(header)) => header, + Ok(None) => break, + Err(err) => { + debug!("failed to iterate DWARF units: {err:?}"); + break; + } + }; + let unit = match dwarf.unit(header) { + Ok(unit) => unit, + Err(err) => { + debug!("failed to load DWARF unit: {err:?}"); + continue; + } + }; + + let mut entries = unit.entries(); + loop { + let next = match entries.next_dfs() { + Ok(Some(data)) => data, + Ok(None) => break, + Err(err) => { + debug!("error while traversing DWARF entries: {err:?}"); + break; + } + }; + let (delta, entry) = next; + let _ = delta; // we don't need depth deltas explicitly. + + if entry.tag() == gimli::DW_TAG_subprogram { + let resolved = + resolve_subprogram_target(dwarf, &unit, &func_by_name, &low_pc_map, entry); + let Some((func_index, low_pc, high_pc)) = resolved else { + continue; + }; + + if let Err(err) = collect_subprogram_variables( + dwarf, + &unit, + entry.offset(), + func_index, + low_pc, + high_pc, + &mut results, + ) { + debug!("failed to gather variables for function {:?}: {err:?}", func_index); + } + } + } + } + + results +} + +fn resolve_subprogram_target>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + func_by_name: &FxHashMap, + low_pc_map: &FxHashMap, + entry: &gimli::DebuggingInformationEntry, +) -> Option<(FuncIndex, u64, Option)> { + let mut maybe_name: Option = None; + let mut low_pc = None; + let mut high_pc = None; + + let mut attrs = entry.attrs(); + while let Ok(Some(attr)) = attrs.next() { + match attr.name() { + gimli::DW_AT_name => { + if let Ok(raw) = dwarf.attr_string(unit, attr.value()) + && let Ok(name) = raw.to_string_lossy() + { + maybe_name = Some(name.into_owned()); + } + } + gimli::DW_AT_linkage_name => { + if maybe_name.is_none() + && let Ok(raw) = dwarf.attr_string(unit, attr.value()) + && let Ok(name) = raw.to_string_lossy() + { + maybe_name = Some(name.into_owned()); + } + } + gimli::DW_AT_low_pc => match attr.value() { + AttributeValue::Addr(addr) => low_pc = Some(addr), + AttributeValue::Udata(val) => low_pc = Some(val), + _ => {} + }, + gimli::DW_AT_high_pc => match attr.value() { + AttributeValue::Addr(addr) => high_pc = Some(addr), + AttributeValue::Udata(size) => { + if let Some(base) = low_pc { + high_pc = Some(base.saturating_add(size)); + } + } + _ => {} + }, + _ => {} + } + } + + if let Some(name) = maybe_name + && let Some(&func_index) = func_by_name.get(&name) + { + return Some((func_index, low_pc.unwrap_or_default(), high_pc)); + } + + if let Some(base) = low_pc + && let Some(&func_index) = low_pc_map.get(&base) + { + return Some((func_index, base, high_pc)); + } + + None +} + +fn collect_subprogram_variables>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + offset: gimli::UnitOffset, + func_index: FuncIndex, + low_pc: u64, + high_pc: Option, + results: &mut FxHashMap>, +) -> gimli::Result<()> { + let mut tree = unit.entries_tree(Some(offset))?; + let root = tree.root()?; + let mut children = root.children(); + while let Some(child) = children.next()? { + walk_variable_nodes(dwarf, unit, child, func_index, low_pc, high_pc, results)?; + } + Ok(()) +} + +fn walk_variable_nodes>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + node: gimli::EntriesTreeNode, + func_index: FuncIndex, + low_pc: u64, + high_pc: Option, + results: &mut FxHashMap>, +) -> gimli::Result<()> { + let entry = node.entry(); + match entry.tag() { + gimli::DW_TAG_formal_parameter | gimli::DW_TAG_variable => { + if let Some((local_index, mut data)) = + decode_variable_entry(dwarf, unit, entry, low_pc, high_pc)? + { + let local_map = results.entry(func_index).or_default(); + let entry = local_map.entry(local_index).or_insert_with(DwarfLocalData::default); + entry.name = entry.name.or(data.name); + entry.decl_line = entry.decl_line.or(data.decl_line); + entry.decl_column = entry.decl_column.or(data.decl_column); + if !data.locations.is_empty() { + entry.locations.append(&mut data.locations); + } + } + } + _ => {} + } + + let mut children = node.children(); + while let Some(child) = children.next()? { + walk_variable_nodes(dwarf, unit, child, func_index, low_pc, high_pc, results)?; + } + Ok(()) +} + +fn decode_variable_entry>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + entry: &gimli::DebuggingInformationEntry<'_, '_, R>, + low_pc: u64, + high_pc: Option, +) -> gimli::Result> { + let mut name_symbol = None; + let mut location_attr = None; + let mut decl_line = None; + let mut decl_column = None; + + let mut attrs = entry.attrs(); + while let Some(attr) = attrs.next()? { + match attr.name() { + gimli::DW_AT_name => { + if let Ok(raw) = dwarf.attr_string(unit, attr.value()) + && let Ok(text) = raw.to_string_lossy() + { + name_symbol = Some(Symbol::intern(text.as_ref())); + } + } + gimli::DW_AT_location => location_attr = Some(attr.value()), + gimli::DW_AT_decl_line => { + if let Some(line) = attr.udata_value() { + decl_line = Some(line as u32); + } + } + gimli::DW_AT_decl_column => { + if let Some(column) = attr.udata_value() { + decl_column = Some(column as u32); + } + } + _ => {} + } + } + + let Some(location_value) = location_attr else { + return Ok(None); + }; + + let mut locations = Vec::new(); + + match location_value { + AttributeValue::Exprloc(expr) => { + if let Some(storage) = decode_storage_from_expression(&expr, unit)? + && let Some(local_index) = storage.as_local() + { + locations.push(LocationDescriptor { + start: low_pc, + end: high_pc, + storage, + }); + let data = DwarfLocalData { + name: name_symbol, + locations, + decl_line, + decl_column, + }; + return Ok(Some((local_index, data))); + } + return Ok(None); + } + AttributeValue::LocationListsRef(offset) => { + let mut iter = dwarf.locations.locations( + offset, + unit.encoding(), + low_pc, + &dwarf.debug_addr, + unit.addr_base, + )?; + while let Some(entry) = iter.next()? { + let storage_expr = entry.data; + if let Some(storage) = decode_storage_from_expression(&storage_expr, unit)? + && storage.as_local().is_some() + { + locations.push(LocationDescriptor { + start: entry.range.begin, + end: Some(entry.range.end), + storage, + }); + continue; + } + } + if locations.is_empty() { + return Ok(None); + } + let Some(local_index) = locations.iter().find_map(|desc| desc.storage.as_local()) + else { + return Ok(None); + }; + let data = DwarfLocalData { + name: name_symbol, + locations, + decl_line, + decl_column, + }; + return Ok(Some((local_index, data))); + } + _ => {} + } + + Ok(None) +} + +fn decode_storage_from_expression>( + expr: &gimli::Expression, + unit: &gimli::Unit, +) -> gimli::Result> { + let mut operations = expr.clone().operations(unit.encoding()); + let mut storage = None; + while let Some(op) = operations.next()? { + match op { + Operation::WasmLocal { index } => storage = Some(VariableStorage::Local(index)), + Operation::WasmGlobal { index } => storage = Some(VariableStorage::Global(index)), + Operation::WasmStack { index } => storage = Some(VariableStorage::Stack(index)), + Operation::UnsignedConstant { value } => { + storage = Some(VariableStorage::ConstU64(value)) + } + Operation::StackValue => {} + _ => {} + } + } + + Ok(storage) +} + +fn func_local_index(func_index: FuncIndex, module: &Module) -> Option { + module.defined_func_index(func_index).map(|idx| idx.index()) +} diff --git a/frontend/wasm/src/module/func_translation_state.rs b/frontend/wasm/src/module/func_translation_state.rs index 154278798..0a3757372 100644 --- a/frontend/wasm/src/module/func_translation_state.rs +++ b/frontend/wasm/src/module/func_translation_state.rs @@ -5,10 +5,13 @@ //! //! Based on Cranelift's Wasm -> CLIF translator v11.0.0 +use alloc::rc::Rc; +use core::cell::RefCell; + use midenc_dialect_hir::HirOpBuilder; use midenc_hir::{BlockRef, Builder, OperationRef, Signature, SourceSpan, Type, ValueRef}; -use super::function_builder_ext::FunctionBuilderExt; +use super::{debug_info::FunctionDebugInfo, function_builder_ext::FunctionBuilderExt}; use crate::{error::WasmResult, module::types::BlockType}; /// Information about the presence of an associated `else` for an `if`, or the @@ -229,6 +232,8 @@ pub struct FuncTranslationState { /// Is the current translation state still reachable? This is false when translating operators /// like End, Return, or Unreachable. pub(crate) reachable: bool, + /// Optional debug metadata for the current function. + pub(crate) debug_info: Option>>, } impl FuncTranslationState { @@ -238,6 +243,7 @@ impl FuncTranslationState { stack: Vec::new(), control_stack: Vec::new(), reachable: true, + debug_info: None, } } @@ -245,6 +251,7 @@ impl FuncTranslationState { debug_assert!(self.stack.is_empty()); debug_assert!(self.control_stack.is_empty()); self.reachable = true; + self.debug_info = None; } /// Initialize the state for compiling a function with the given signature. @@ -256,6 +263,10 @@ impl FuncTranslationState { self.push_block(exit_block, 0, sig.results().len()); } + pub(crate) fn set_debug_info(&mut self, info: Option>>) { + self.debug_info = info; + } + /// Push a value. pub(crate) fn push1(&mut self, val: ValueRef) { self.stack.push(val); diff --git a/frontend/wasm/src/module/func_translator.rs b/frontend/wasm/src/module/func_translator.rs index 4dbb32b98..25e026f93 100644 --- a/frontend/wasm/src/module/func_translator.rs +++ b/frontend/wasm/src/module/func_translator.rs @@ -21,8 +21,9 @@ use midenc_session::{ use wasmparser::{FuncValidator, FunctionBody, WasmModuleResources}; use super::{ - function_builder_ext::SSABuilderListener, module_env::ParsedModule, - module_translation_state::ModuleTranslationState, types::ModuleTypesBuilder, + debug_info::FunctionDebugInfo, function_builder_ext::SSABuilderListener, + module_env::ParsedModule, module_translation_state::ModuleTranslationState, + types::ModuleTypesBuilder, }; use crate::{ code_translator::translate_operator, @@ -70,12 +71,19 @@ impl FuncTranslator { session: &Session, func_validator: &mut FuncValidator, config: &crate::WasmTranslationConfig, + debug_info: Option>>, ) -> WasmResult<()> { let context = func.borrow().as_operation().context_rc(); let mut op_builder = midenc_hir::OpBuilder::new(context) .with_listener(SSABuilderListener::new(self.func_ctx.clone())); let mut builder = FunctionBuilderExt::new(func, &mut op_builder); + if let Some(info) = debug_info.clone() { + builder.set_debug_metadata(info); + } + + self.state.set_debug_info(debug_info); + let entry_block = builder.current_block(); builder.seal_block(entry_block); // Declare all predecessors known. @@ -135,6 +143,7 @@ fn declare_parameters( next_local += 1; let param_value = entry_block.borrow().arguments()[i]; + builder.register_parameter(local, param_value); builder.def_var(local, param_value); } next_local @@ -287,6 +296,10 @@ fn parse_function_body( end_span = span; } + // Record the debug span to trigger parameter debug emission and update + // compile unit/subprogram metadata + builder.record_debug_span(span); + translate_operator( &op, builder, @@ -297,6 +310,8 @@ fn parse_function_body( &session.diagnostics, span, )?; + + builder.apply_location_schedule(offset, span); } let pos = reader.original_position(); func_validator.finish(pos).into_diagnostic()?; diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index 643e66ba8..71d515882 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -1,20 +1,26 @@ -use alloc::rc::Rc; +use alloc::{rc::Rc, vec::Vec}; use core::cell::RefCell; +use std::path::Path; -use cranelift_entity::SecondaryMap; +use cranelift_entity::{EntityRef as _, SecondaryMap}; +use log::warn; use midenc_dialect_arith::ArithOpBuilder; use midenc_dialect_cf::ControlFlowOpBuilder; use midenc_dialect_hir::HirOpBuilder; use midenc_dialect_ub::UndefinedBehaviorOpBuilder; use midenc_hir::{ - BlockRef, Builder, Context, EntityRef, FxHashMap, FxHashSet, Ident, Listener, ListenerType, + BlockRef, Builder, Context, EntityRef, FxHashMap, FxHashSet, Ident, Listener, ListenerType, Op, OpBuilder, OperationRef, ProgramPoint, RegionRef, Signature, SmallVec, SourceSpan, Type, ValueRef, dialects::builtin::{BuiltinOpBuilder, FunctionBuilder, FunctionRef}, + interner::Symbol, traits::{BranchOpInterface, Terminator}, }; -use crate::ssa::{SSABuilder, SideEffects, Variable}; +use crate::{ + module::debug_info::{FunctionDebugInfo, LocationScheduleEntry}, + ssa::{SSABuilder, SideEffects, Variable}, +}; /// Tracking variables and blocks for SSA construction. pub struct FunctionBuilderContext { @@ -122,6 +128,9 @@ impl Listener for SSABuilderListener { pub struct FunctionBuilderExt<'c, B: ?Sized + Builder> { inner: FunctionBuilder<'c, B>, func_ctx: Rc>, + debug_info: Option>>, + param_values: Vec<(Variable, ValueRef)>, + param_dbg_emitted: bool, } impl<'c> FunctionBuilderExt<'c, OpBuilder> { @@ -131,11 +140,153 @@ impl<'c> FunctionBuilderExt<'c, OpBuilder> { let inner = FunctionBuilder::new(func, builder); - Self { inner, func_ctx } + Self { + inner, + func_ctx, + debug_info: None, + param_values: Vec::new(), + param_dbg_emitted: false, + } } } impl FunctionBuilderExt<'_, B> { + const DI_COMPILE_UNIT_ATTR: &'static str = "di.compile_unit"; + const DI_SUBPROGRAM_ATTR: &'static str = "di.subprogram"; + + pub fn set_debug_metadata(&mut self, info: Rc>) { + self.debug_info = Some(info); + self.param_dbg_emitted = false; + self.refresh_function_debug_attrs(); + } + + fn emit_dbg_value_for_var(&mut self, var: Variable, value: ValueRef, span: SourceSpan) { + let Some(info) = self.debug_info.as_ref() else { + return; + }; + let idx = var.index(); + let (attr_opt, expr_opt) = { + let info = info.borrow(); + let local_info = info.locals.get(idx).and_then(|l| l.as_ref()); + match local_info { + Some(l) => (Some(l.attr.clone()), l.expression.clone()), + None => (None, None), + } + }; + let Some(mut attr) = attr_opt else { + return; + }; + + if let Some((file_symbol, _directory, line, column)) = self.span_to_location(span) { + attr.file = file_symbol; + if line != 0 { + attr.line = line; + } + attr.column = column; + } + + if let Err(err) = + BuiltinOpBuilder::builder_mut(self).dbg_value_with_expr(value, attr, expr_opt, span) + { + warn!("failed to emit dbg.value for local {idx}: {err:?}"); + } + } + + pub fn def_var_with_dbg(&mut self, var: Variable, val: ValueRef, span: SourceSpan) { + self.def_var(var, val); + self.emit_dbg_value_for_var(var, val, span); + } + + pub fn register_parameter(&mut self, var: Variable, value: ValueRef) { + self.param_values.push((var, value)); + } + + pub fn record_debug_span(&mut self, span: SourceSpan) { + if span == SourceSpan::UNKNOWN { + return; + } + let Some(info_rc) = self.debug_info.as_ref() else { + return; + }; + + if let Some((file_symbol, directory_symbol, line, column)) = self.span_to_location(span) { + { + let mut info = info_rc.borrow_mut(); + info.compile_unit.file = file_symbol; + info.compile_unit.directory = directory_symbol; + info.subprogram.file = file_symbol; + info.subprogram.line = line; + info.subprogram.column = column; + info.function_span.get_or_insert(span); + } + self.refresh_function_debug_attrs(); + self.emit_parameter_dbg_if_needed(span); + } + } + + pub fn apply_location_schedule(&mut self, offset: u64, span: SourceSpan) { + let Some(info_rc) = self.debug_info.as_ref() else { + return; + }; + + let updates = { + let mut info = info_rc.borrow_mut(); + let mut pending = Vec::new(); + while info.next_location_event < info.location_schedule.len() { + let entry = &info.location_schedule[info.next_location_event]; + if entry.offset > offset { + break; + } + pending.push(entry.clone()); + info.next_location_event += 1; + } + pending + }; + + for entry in updates { + self.emit_scheduled_dbg_value(entry, span); + } + } + + fn emit_scheduled_dbg_value(&mut self, entry: LocationScheduleEntry, span: SourceSpan) { + let var = Variable::new(entry.var_index); + let Ok(value) = self.try_use_var(var) else { + return; + }; + + // Create expression from the scheduled location + let expression = { + let ops = vec![entry.storage.to_expression_op()]; + Some(midenc_hir::DIExpressionAttr::with_ops(ops)) + }; + + let Some(info) = self.debug_info.as_ref() else { + return; + }; + let idx = entry.var_index; + let attr_opt = { + let info = info.borrow(); + info.local_attr(idx).cloned() + }; + let Some(mut attr) = attr_opt else { + return; + }; + + if let Some((file_symbol, _directory, line, column)) = self.span_to_location(span) { + attr.file = file_symbol; + if line != 0 { + attr.line = line; + } + attr.column = column; + } + + if let Err(err) = + BuiltinOpBuilder::builder_mut(self).dbg_value_with_expr(value, attr, expression, span) + { + warn!("failed to emit scheduled dbg.value for local {idx}: {err:?}"); + } + } + pub fn name(&self) -> Ident { *self.inner.func.borrow().name() } @@ -414,6 +565,61 @@ impl FunctionBuilderExt<'_, B> { inst_branch.change_branch_destination(old_block, new_block); self.func_ctx.borrow_mut().ssa.declare_block_predecessor(new_block, branch_inst); } + + fn refresh_function_debug_attrs(&mut self) { + let Some(info) = self.debug_info.as_ref() else { + return; + }; + let info = info.borrow(); + let mut func = self.inner.func.borrow_mut(); + let op = func.as_operation_mut(); + op.set_intrinsic_attribute(Self::DI_COMPILE_UNIT_ATTR, Some(info.compile_unit.clone())); + op.set_intrinsic_attribute(Self::DI_SUBPROGRAM_ATTR, Some(info.subprogram.clone())); + } + + fn emit_parameter_dbg_if_needed(&mut self, span: SourceSpan) { + if self.param_dbg_emitted { + return; + } + self.param_dbg_emitted = true; + let params: Vec<_> = self.param_values.to_vec(); + for (var, value) in params { + let skip_due_to_schedule = if let Some(info_rc) = self.debug_info.as_ref() { + let info = info_rc.borrow(); + info.locals + .get(var.index()) + .and_then(|entry| entry.as_ref()) + .is_some_and(|entry| !entry.locations.is_empty()) + } else { + false + }; + if skip_due_to_schedule { + continue; + } + self.emit_dbg_value_for_var(var, value, span); + } + } + + fn span_to_location( + &self, + span: SourceSpan, + ) -> Option<(Symbol, Option, u32, Option)> { + if span == SourceSpan::UNKNOWN { + return None; + } + + let context = self.inner.builder().context(); + let session = context.session(); + let source_file = session.source_manager.get(span.source_id()).ok()?; + let uri = source_file.uri().as_str(); + let path = Path::new(uri); + let file_symbol = Symbol::intern(uri); + let directory_symbol = path.parent().and_then(|parent| parent.to_str()).map(Symbol::intern); + let location = source_file.location(span); + let line = location.line.to_u32(); + let column = location.column.to_u32(); + Some((file_symbol, directory_symbol, line, Some(column))) + } } impl<'f, B: ?Sized + Builder> ArithOpBuilder<'f, B> for FunctionBuilderExt<'f, B> { diff --git a/frontend/wasm/src/module/mod.rs b/frontend/wasm/src/module/mod.rs index d874e82de..d68dccf8a 100644 --- a/frontend/wasm/src/module/mod.rs +++ b/frontend/wasm/src/module/mod.rs @@ -12,6 +12,7 @@ use self::types::*; use crate::{component::SignatureIndex, error::WasmResult, unsupported_diag}; pub mod build_ir; +pub mod debug_info; pub mod func_translation_state; pub mod func_translator; pub mod function_builder_ext; @@ -322,6 +323,14 @@ impl Module { .unwrap_or(Symbol::intern(format!("func{}", index.as_u32()))) } + /// Returns the name of the given local (including parameters) if available in the name section. + pub fn local_name(&self, func: FuncIndex, index: u32) -> Option { + self.name_section + .locals_names + .get(&func) + .and_then(|locals| locals.get(&index).copied()) + } + /// Sets the fallback name of this module, used if there is no module name in the name section pub fn set_name_fallback(&mut self, name_fallback: Cow<'static, str>) { self.name_fallback = Some(Ident::from(name_fallback.as_ref())); diff --git a/frontend/wasm/src/module/module_env.rs b/frontend/wasm/src/module/module_env.rs index cd22b54aa..4aea0329b 100644 --- a/frontend/wasm/src/module/module_env.rs +++ b/frontend/wasm/src/module/module_env.rs @@ -1,9 +1,9 @@ -use alloc::sync::Arc; +use alloc::{rc::Rc, sync::Arc}; use core::ops::Range; use std::path::PathBuf; use cranelift_entity::{PrimaryMap, packed_option::ReservedValue}; -use midenc_hir::{Ident, interner::Symbol}; +use midenc_hir::{FxHashMap, Ident, interner::Symbol}; use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic, Report, Severity}; use wasmparser::{ CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind, @@ -66,6 +66,10 @@ pub struct ParsedModule<'data> { /// DWARF debug information, if enabled, parsed from the module. pub debuginfo: DebugInfoData<'data>, + /// Precomputed debug metadata for functions + pub function_debug: + FxHashMap>>, + /// Set if debuginfo was found but it was not parsed due to `Tunables` /// configuration. pub has_unparsed_debuginfo: bool, @@ -87,6 +91,8 @@ pub struct FunctionBodyData<'a> { pub body: FunctionBody<'a>, /// Validator for the function body pub validator: FuncToValidate, + /// Offset in the original wasm binary where this function body starts + pub body_offset: u64, } #[derive(Default)] @@ -581,7 +587,12 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { params: sig.params().into(), }); } - self.result.function_body_inputs.push(FunctionBodyData { validator, body }); + let body_offset = body.range().start as u64; + self.result.function_body_inputs.push(FunctionBodyData { + validator, + body, + body_offset, + }); self.result.code_index += 1; Ok(()) } diff --git a/hir-transform/src/dead_debug_ops.rs b/hir-transform/src/dead_debug_ops.rs new file mode 100644 index 000000000..a695dd682 --- /dev/null +++ b/hir-transform/src/dead_debug_ops.rs @@ -0,0 +1,126 @@ +//! This pass removes debug operations (DbgValue) whose operands are no longer +//! live. This prevents issues during codegen where the operand stack state +//! becomes inconsistent due to debug ops referencing dropped values. + +use alloc::vec::Vec; + +use midenc_hir::{ + EntityMut, Operation, OperationName, OperationRef, Report, + dialects::builtin, + pass::{Pass, PassExecutionState, PostPassStatus}, +}; +use midenc_hir_analysis::analyses::LivenessAnalysis; + +/// Removes debug operations whose operands are dead. +/// +/// Debug operations like `DbgValue` reference SSA values to provide debug +/// information. However, these operations don't actually consume their operands; +/// they just observe them. This can cause issues during codegen when the +/// referenced value has been dropped from the operand stack. +/// +/// This pass removes debug ops whose operands are not live after the debug op. +/// If a value is live after the debug op, it will still be available on the +/// operand stack during codegen and can be safely observed. +pub struct RemoveDeadDebugOps; + +impl Pass for RemoveDeadDebugOps { + type Target = Operation; + + fn name(&self) -> &'static str { + "remove-dead-debug-ops" + } + + fn argument(&self) -> &'static str { + "remove-dead-debug-ops" + } + + fn description(&self) -> &'static str { + "Removes debug operations whose operands are dead" + } + + fn can_schedule_on(&self, _name: &OperationName) -> bool { + true + } + + fn run_on_operation( + &mut self, + op: EntityMut<'_, Self::Target>, + state: &mut PassExecutionState, + ) -> Result<(), Report> { + let op_ref = op.as_operation_ref(); + drop(op); + + // Collect all debug ops to potentially remove + let mut debug_ops_to_check: Vec = Vec::new(); + + collect_debug_ops(&op_ref, &mut debug_ops_to_check); + + if debug_ops_to_check.is_empty() { + state.set_post_pass_status(PostPassStatus::Unchanged); + return Ok(()); + } + + // Get liveness analysis + let analysis_manager = state.analysis_manager(); + let liveness = analysis_manager.get_analysis::()?; + + let mut removed_any = false; + + // Check each debug op and remove if its operand will be dead by codegen time + for mut debug_op in debug_ops_to_check { + let should_remove = { + let debug_op_borrowed = debug_op.borrow(); + + // Get the operand (first operand for DbgValue) + let operands = debug_op_borrowed.operands(); + if operands.is_empty() { + continue; + } + + let operand = operands.iter().next().unwrap(); + let operand_value = operand.borrow().as_value_ref(); + + // Only remove debug ops if their operand is not live after the debug op. + // If the value is live after, it will still be on the operand stack + // during codegen and can be safely observed by the debug op. + // + // Note: We previously also removed debug ops if the value had other uses, + // but this was too aggressive - if the value is live after the debug op, + // it doesn't matter how many uses it has; it's still available. + !liveness.is_live_after(operand_value, &debug_op_borrowed) + }; + + if should_remove { + debug_op.borrow_mut().erase(); + removed_any = true; + } + } + + state.set_post_pass_status(if removed_any { + PostPassStatus::Changed + } else { + PostPassStatus::Unchanged + }); + + Ok(()) + } +} + +/// Recursively collect all debug operations in the given operation +fn collect_debug_ops(op: &OperationRef, debug_ops: &mut Vec) { + let op = op.borrow(); + + // Check if this is a debug op + if op.is::() { + debug_ops.push(op.as_operation_ref()); + } + + // Recurse into regions + for region in op.regions() { + for block in region.body() { + for inner_op in block.body() { + collect_debug_ops(&inner_op.as_operation_ref(), debug_ops); + } + } + } +} diff --git a/hir-transform/src/lib.rs b/hir-transform/src/lib.rs index e869c2578..68a898494 100644 --- a/hir-transform/src/lib.rs +++ b/hir-transform/src/lib.rs @@ -10,6 +10,7 @@ mod canonicalization; mod cfg_to_scf; //mod cse; //mod dce; +mod dead_debug_ops; //mod inliner; mod sccp; mod sink; @@ -21,6 +22,7 @@ mod spill; pub use self::{ canonicalization::Canonicalizer, cfg_to_scf::{CFGToSCFInterface, transform_cfg_to_scf}, + dead_debug_ops::RemoveDeadDebugOps, sccp::SparseConditionalConstantPropagation, sink::{ControlFlowSink, SinkOperandDefs}, spill::{ReloadLike, SpillLike, TransformSpillsInterface, transform_spills}, diff --git a/hir/src/attributes.rs b/hir/src/attributes.rs index 230ad2c62..9fdff061c 100644 --- a/hir/src/attributes.rs +++ b/hir/src/attributes.rs @@ -1,10 +1,11 @@ +mod debug; mod overflow; mod visibility; use alloc::{boxed::Box, collections::BTreeMap, vec, vec::Vec}; use core::{any::Any, borrow::Borrow, fmt}; -pub use self::{overflow::Overflow, visibility::Visibility}; +pub use self::{debug::*, overflow::Overflow, visibility::Visibility}; use crate::{Immediate, interner::Symbol}; pub mod markers { diff --git a/hir/src/attributes/debug.rs b/hir/src/attributes/debug.rs new file mode 100644 index 000000000..0dee8171d --- /dev/null +++ b/hir/src/attributes/debug.rs @@ -0,0 +1,254 @@ +use alloc::{format, vec::Vec}; + +use crate::{ + Type, define_attr_type, + formatter::{Document, PrettyPrint, const_text, text}, + interner::Symbol, +}; + +/// Represents the compilation unit associated with debug information. +/// +/// The fields in this struct are intentionally aligned with the subset of +/// DWARF metadata we currently care about when tracking variable locations. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct DICompileUnitAttr { + pub language: Symbol, + pub file: Symbol, + pub directory: Option, + pub producer: Option, + pub optimized: bool, +} + +define_attr_type!(DICompileUnitAttr); + +impl DICompileUnitAttr { + pub fn new(language: Symbol, file: Symbol) -> Self { + Self { + language, + file, + directory: None, + producer: None, + optimized: false, + } + } +} + +impl PrettyPrint for DICompileUnitAttr { + fn render(&self) -> Document { + let mut doc = const_text("di.compile_unit(") + + text(format!("language = {}", self.language.as_str())) + + const_text(", file = ") + + text(self.file.as_str()); + + if let Some(directory) = self.directory { + doc = doc + const_text(", directory = ") + text(directory.as_str()); + } + if let Some(producer) = self.producer { + doc = doc + const_text(", producer = ") + text(producer.as_str()); + } + if self.optimized { + doc += const_text(", optimized"); + } + + doc + const_text(")") + } +} + +/// Represents a subprogram (function) scope for debug information. +/// The compile unit is not embedded but typically stored separately on the module. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct DISubprogramAttr { + pub name: Symbol, + pub linkage_name: Option, + pub file: Symbol, + pub line: u32, + pub column: Option, + pub is_definition: bool, + pub is_local: bool, +} + +define_attr_type!(DISubprogramAttr); + +impl DISubprogramAttr { + pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { + Self { + name, + linkage_name: None, + file, + line, + column, + is_definition: true, + is_local: false, + } + } +} + +impl PrettyPrint for DISubprogramAttr { + fn render(&self) -> Document { + let mut doc = const_text("di.subprogram(") + + text(format!("name = {}", self.name.as_str())) + + const_text(", file = ") + + text(self.file.as_str()) + + const_text(", line = ") + + text(format!("{}", self.line)); + + if let Some(column) = self.column { + doc = doc + const_text(", column = ") + text(format!("{}", column)); + } + if let Some(linkage) = self.linkage_name { + doc = doc + const_text(", linkage = ") + text(linkage.as_str()); + } + if self.is_definition { + doc += const_text(", definition"); + } + if self.is_local { + doc += const_text(", local"); + } + + doc + const_text(")") + } +} + +/// Represents a local variable debug record. +/// The scope (DISubprogramAttr) is not embedded but instead stored on the containing function. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct DILocalVariableAttr { + pub name: Symbol, + pub arg_index: Option, + pub file: Symbol, + pub line: u32, + pub column: Option, + pub ty: Option, +} + +define_attr_type!(DILocalVariableAttr); + +impl DILocalVariableAttr { + pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { + Self { + name, + arg_index: None, + file, + line, + column, + ty: None, + } + } +} + +impl PrettyPrint for DILocalVariableAttr { + fn render(&self) -> Document { + let mut doc = const_text("di.local_variable(") + + text(format!("name = {}", self.name.as_str())) + + const_text(", file = ") + + text(self.file.as_str()) + + const_text(", line = ") + + text(format!("{}", self.line)); + + if let Some(column) = self.column { + doc = doc + const_text(", column = ") + text(format!("{}", column)); + } + if let Some(arg_index) = self.arg_index { + doc = doc + const_text(", arg = ") + text(format!("{}", arg_index)); + } + if let Some(ty) = &self.ty { + doc = doc + const_text(", ty = ") + ty.render(); + } + + doc + const_text(")") + } +} + +/// Represents DWARF expression operations for describing variable locations +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum DIExpressionOp { + /// DW_OP_WASM_location 0x00 - Variable is in a WebAssembly local + WasmLocal(u32), + /// DW_OP_WASM_location 0x01 - Variable is in a WebAssembly global + WasmGlobal(u32), + /// DW_OP_WASM_location 0x02 - Variable is on the WebAssembly operand stack + WasmStack(u32), + /// DW_OP_constu - Unsigned constant value + ConstU64(u64), + /// DW_OP_consts - Signed constant value + ConstS64(i64), + /// DW_OP_plus_uconst - Add unsigned constant to top of stack + PlusUConst(u64), + /// DW_OP_minus - Subtract top two stack values + Minus, + /// DW_OP_plus - Add top two stack values + Plus, + /// DW_OP_deref - Dereference the address at top of stack + Deref, + /// DW_OP_stack_value - The value on the stack is the value of the variable + StackValue, + /// DW_OP_piece - Describes a piece of a variable + Piece(u64), + /// DW_OP_bit_piece - Describes a piece of a variable in bits + BitPiece { size: u64, offset: u64 }, + /// Placeholder for unsupported operations + Unsupported(Symbol), +} + +/// Represents a DWARF expression that describes how to compute or locate a variable's value +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct DIExpressionAttr { + pub operations: Vec, +} + +define_attr_type!(DIExpressionAttr); + +impl DIExpressionAttr { + pub fn new() -> Self { + Self { + operations: Vec::new(), + } + } + + pub fn with_ops(operations: Vec) -> Self { + Self { operations } + } + + pub fn is_empty(&self) -> bool { + self.operations.is_empty() + } +} + +impl Default for DIExpressionAttr { + fn default() -> Self { + Self::new() + } +} + +impl PrettyPrint for DIExpressionAttr { + fn render(&self) -> Document { + if self.operations.is_empty() { + return const_text("di.expression()"); + } + + let mut doc = const_text("di.expression("); + for (i, op) in self.operations.iter().enumerate() { + if i > 0 { + doc += const_text(", "); + } + doc += match op { + DIExpressionOp::WasmLocal(idx) => text(format!("DW_OP_WASM_local {}", idx)), + DIExpressionOp::WasmGlobal(idx) => text(format!("DW_OP_WASM_global {}", idx)), + DIExpressionOp::WasmStack(idx) => text(format!("DW_OP_WASM_stack {}", idx)), + DIExpressionOp::ConstU64(val) => text(format!("DW_OP_constu {}", val)), + DIExpressionOp::ConstS64(val) => text(format!("DW_OP_consts {}", val)), + DIExpressionOp::PlusUConst(val) => text(format!("DW_OP_plus_uconst {}", val)), + DIExpressionOp::Minus => const_text("DW_OP_minus"), + DIExpressionOp::Plus => const_text("DW_OP_plus"), + DIExpressionOp::Deref => const_text("DW_OP_deref"), + DIExpressionOp::StackValue => const_text("DW_OP_stack_value"), + DIExpressionOp::Piece(size) => text(format!("DW_OP_piece {}", size)), + DIExpressionOp::BitPiece { size, offset } => { + text(format!("DW_OP_bit_piece {} {}", size, offset)) + } + DIExpressionOp::Unsupported(name) => text(name.as_str()), + }; + } + doc + const_text(")") + } +} diff --git a/hir/src/dialects/builtin.rs b/hir/src/dialects/builtin.rs index d6bb2711b..7e3b8ec56 100644 --- a/hir/src/dialects/builtin.rs +++ b/hir/src/dialects/builtin.rs @@ -43,6 +43,8 @@ impl DialectRegistration for BuiltinDialect { info.register_operation::(); info.register_operation::(); info.register_operation::(); + info.register_operation::(); + info.register_operation::(); } } diff --git a/hir/src/dialects/builtin/builders.rs b/hir/src/dialects/builtin/builders.rs index efd1e2fe2..ef548c953 100644 --- a/hir/src/dialects/builtin/builders.rs +++ b/hir/src/dialects/builtin/builders.rs @@ -7,7 +7,9 @@ pub use self::{component::*, function::*, module::*, world::*}; use super::ops::*; use crate::{ Builder, BuilderExt, Ident, Immediate, OpBuilder, Report, Signature, SourceSpan, Spanned, Type, - UnsafeIntrusiveEntityRef, ValueRef, Visibility, constants::ConstantData, + UnsafeIntrusiveEntityRef, ValueRef, Visibility, + attributes::{DIExpressionAttr, DILocalVariableAttr}, + constants::ConstantData, }; pub trait BuiltinOpBuilder<'f, B: ?Sized + Builder> { @@ -84,6 +86,27 @@ pub trait BuiltinOpBuilder<'f, B: ?Sized + Builder> { op_builder(arg) } + fn dbg_value( + &mut self, + value: ValueRef, + variable: DILocalVariableAttr, + span: SourceSpan, + ) -> Result { + self.dbg_value_with_expr(value, variable, None, span) + } + + fn dbg_value_with_expr( + &mut self, + value: ValueRef, + variable: DILocalVariableAttr, + expression: Option, + span: SourceSpan, + ) -> Result { + let expr = expression.unwrap_or_default(); + let op_builder = self.builder_mut().create::(span); + op_builder(value, variable, expr) + } + fn builder(&self) -> &B; fn builder_mut(&mut self) -> &mut B; } diff --git a/hir/src/dialects/builtin/ops.rs b/hir/src/dialects/builtin/ops.rs index 53f214cb5..c66ec6da1 100644 --- a/hir/src/dialects/builtin/ops.rs +++ b/hir/src/dialects/builtin/ops.rs @@ -1,5 +1,6 @@ mod cast; mod component; +mod debug; mod function; mod global_variable; mod interface; @@ -13,6 +14,7 @@ pub use self::{ Component, ComponentBuilder as PrimComponentBuilder, ComponentExport, ComponentId, ComponentInterface, ComponentRef, ModuleExport, ModuleInterface, }, + debug::{DbgDeclare, DbgDeclareRef, DbgValue, DbgValueRef}, function::{ Function, FunctionBuilder as PrimFunctionBuilder, FunctionRef, LocalVariable, Ret, RetImm, }, diff --git a/hir/src/dialects/builtin/ops/debug.rs b/hir/src/dialects/builtin/ops/debug.rs new file mode 100644 index 000000000..a6cecaf91 --- /dev/null +++ b/hir/src/dialects/builtin/ops/debug.rs @@ -0,0 +1,75 @@ +use crate::{ + UnsafeIntrusiveEntityRef, + attributes::{DIExpressionAttr, DILocalVariableAttr}, + derive::operation, + dialects::builtin::BuiltinDialect, + traits::AnyType, +}; + +pub type DbgValueRef = UnsafeIntrusiveEntityRef; +pub type DbgDeclareRef = UnsafeIntrusiveEntityRef; + +/// Records the value of an SSA operand for debug information consumers. +#[operation(dialect = BuiltinDialect)] +pub struct DbgValue { + #[operand] + value: AnyType, + #[attr] + variable: DILocalVariableAttr, + #[attr] + expression: DIExpressionAttr, +} + +/// Records the storage location of a source-level variable. +#[operation(dialect = BuiltinDialect)] +pub struct DbgDeclare { + #[operand] + address: AnyType, + #[attr] + variable: DILocalVariableAttr, +} + +#[cfg(test)] +mod tests { + use alloc::{rc::Rc, string::ToString}; + + use crate::{ + Builder, Context, OpPrinter, OpPrintingFlags, SourceSpan, Type, + attributes::DILocalVariableAttr, + dialects::builtin::{BuiltinDialect, BuiltinOpBuilder}, + interner::Symbol, + }; + + fn make_variable() -> DILocalVariableAttr { + let mut variable = + DILocalVariableAttr::new(Symbol::intern("x"), Symbol::intern("main.rs"), 12, Some(7)); + variable.arg_index = Some(0); + variable.ty = Some(Type::I32); + variable + } + + #[test] + fn dbg_value_carries_metadata() { + let context = Rc::new(Context::default()); + context.get_or_register_dialect::(); + + let block = context.create_block_with_params([Type::I32]); + let arg = block.borrow().arguments()[0]; + let value = arg.borrow().as_value_ref(); + + let mut builder = context.clone().builder(); + builder.set_insertion_point_to_end(block); + + let variable = make_variable(); + let dbg_value = builder + .dbg_value(value, variable.clone(), SourceSpan::UNKNOWN) + .expect("failed to create dbg.value op"); + + assert_eq!(dbg_value.borrow().variable(), &variable); + assert_eq!(block.borrow().back(), Some(dbg_value.as_operation_ref())); + + let op = dbg_value.as_operation_ref(); + let printed = op.borrow().print(&OpPrintingFlags::default(), context.as_ref()).to_string(); + assert!(printed.contains("di.local_variable")); + } +} diff --git a/hir/src/lib.rs b/hir/src/lib.rs index 451846177..13c137dcd 100644 --- a/hir/src/lib.rs +++ b/hir/src/lib.rs @@ -83,7 +83,8 @@ pub use midenc_session::diagnostics; pub use self::{ attributes::{ - ArrayAttr, Attribute, AttributeSet, AttributeValue, DictAttr, Overflow, SetAttr, + ArrayAttr, Attribute, AttributeSet, AttributeValue, DICompileUnitAttr, DIExpressionAttr, + DIExpressionOp, DILocalVariableAttr, DISubprogramAttr, DictAttr, Overflow, SetAttr, Visibility, markers::*, }, direction::{Backward, Direction, Forward}, diff --git a/midenc-compile/Cargo.toml b/midenc-compile/Cargo.toml index 470c0bd0f..60097dc9a 100644 --- a/midenc-compile/Cargo.toml +++ b/midenc-compile/Cargo.toml @@ -32,6 +32,7 @@ log.workspace = true inventory.workspace = true midenc-codegen-masm.workspace = true miden-assembly.workspace = true +miden-debug-types.workspace = true miden-mast-package.workspace = true midenc-frontend-wasm.workspace = true midenc-dialect-scf.workspace = true diff --git a/midenc-compile/src/debug_info.rs b/midenc-compile/src/debug_info.rs new file mode 100644 index 000000000..4902f9c2e --- /dev/null +++ b/midenc-compile/src/debug_info.rs @@ -0,0 +1,312 @@ +//! Debug info section builder for MASP packages. +//! +//! This module provides utilities for collecting debug information from the HIR +//! and building a `DebugInfoSection` that can be serialized into the `.debug_info` +//! custom section of a MASP package. + +use alloc::{collections::BTreeMap, string::ToString}; + +use miden_debug_types::{ColumnNumber, LineNumber}; +use miden_mast_package::debug_info::{ + DebugFileInfo, DebugFunctionInfo, DebugInfoSection, DebugPrimitiveType, DebugTypeInfo, + DebugVariableInfo, +}; +use midenc_hir::{DILocalVariableAttr, DISubprogramAttr, OpExt, Type, dialects::builtin}; + +/// Builder for constructing a `DebugInfoSection` from HIR components. +pub struct DebugInfoBuilder { + section: DebugInfoSection, + /// Maps source file paths to their indices in the file table + file_indices: BTreeMap, + /// Maps type hashes to their indices in the type table + type_indices: BTreeMap, +} + +/// A key for deduplicating types +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum TypeKey { + Primitive(u8), // Use discriminant instead of the enum directly + Pointer(u32), + Array(u32, Option), + Unknown, +} + +impl Default for DebugInfoBuilder { + fn default() -> Self { + Self::new() + } +} + +impl DebugInfoBuilder { + /// Creates a new debug info builder. + pub fn new() -> Self { + Self { + section: DebugInfoSection::new(), + file_indices: BTreeMap::new(), + type_indices: BTreeMap::new(), + } + } + + /// Adds a string to the string table and returns its index. + pub fn add_string(&mut self, s: impl Into) -> u32 { + self.section.add_string(s) + } + + /// Adds a file to the file table and returns its index. + pub fn add_file(&mut self, path: &str, directory: Option<&str>) -> u32 { + if let Some(&idx) = self.file_indices.get(path) { + return idx; + } + + let path_idx = self.section.add_string(path); + let directory_idx = directory.map(|d| self.section.add_string(d)); + + let mut file = DebugFileInfo::new(path_idx); + if let Some(dir_idx) = directory_idx { + file = file.with_directory(dir_idx); + } + + let idx = self.section.add_file(file); + self.file_indices.insert(path.to_string(), idx); + idx + } + + /// Adds a type to the type table and returns its index. + pub fn add_type(&mut self, ty: &Type) -> u32 { + let debug_type = hir_type_to_debug_type(ty, self); + let key = type_to_key(&debug_type); + + if let Some(&idx) = self.type_indices.get(&key) { + return idx; + } + + let idx = self.section.add_type(debug_type); + self.type_indices.insert(key, idx); + idx + } + + /// Adds a primitive type and returns its index. + pub fn add_primitive_type(&mut self, prim: DebugPrimitiveType) -> u32 { + let key = TypeKey::Primitive(prim as u8); + if let Some(&idx) = self.type_indices.get(&key) { + return idx; + } + + let idx = self.section.add_type(DebugTypeInfo::Primitive(prim)); + self.type_indices.insert(key, idx); + idx + } + + /// Collects debug information from an HIR component. + pub fn collect_from_component(&mut self, component: &builtin::Component) { + // Traverse the component and collect debug info from all functions + let region = component.body(); + let block = region.entry(); + + for op in block.body() { + if let Some(module) = op.downcast_ref::() { + self.collect_from_module(module); + } else if let Some(interface) = op.downcast_ref::() { + self.collect_from_interface(interface); + } else if let Some(function) = op.downcast_ref::() { + self.collect_from_function(function); + } + } + } + + fn collect_from_module(&mut self, module: &builtin::Module) { + let region = module.body(); + let block = region.entry(); + + for op in block.body() { + if let Some(function) = op.downcast_ref::() { + self.collect_from_function(function); + } + } + } + + fn collect_from_interface(&mut self, interface: &builtin::Interface) { + let region = interface.body(); + let block = region.entry(); + + for op in block.body() { + if let Some(function) = op.downcast_ref::() { + self.collect_from_function(function); + } + } + } + + fn collect_from_function(&mut self, function: &builtin::Function) { + // Get function debug info from attributes + // Try to get DISubprogramAttr from the function's attributes + let subprogram: Option = function + .get_attribute(midenc_hir::interner::Symbol::intern("di.subprogram")) + .and_then(|attr| attr.downcast_ref::().cloned()); + + let Some(subprogram) = subprogram else { + // No debug info for this function, just collect from body + self.collect_variables_from_function_body(function, None); + return; + }; + + // Add file + let file_idx = self.add_file(subprogram.file.as_str(), None); + + // Add function name + let name_idx = self.add_string(subprogram.name.as_str()); + let linkage_name_idx = subprogram.linkage_name.map(|s| self.add_string(s.as_str())); + + // Create function info + let line = LineNumber::new(subprogram.line).unwrap_or_default(); + let column = ColumnNumber::new(subprogram.column.unwrap_or(1)).unwrap_or_default(); + + let mut func_info = DebugFunctionInfo::new(name_idx, file_idx, line, column); + if let Some(linkage_idx) = linkage_name_idx { + func_info = func_info.with_linkage_name(linkage_idx); + } + + // Collect local variables from function body + self.collect_variables_from_function_body(function, Some(&mut func_info)); + + self.section.add_function(func_info); + } + + fn collect_variables_from_function_body( + &mut self, + function: &builtin::Function, + func_info: Option<&mut DebugFunctionInfo>, + ) { + // Walk through the function body to find DbgValue operations + let entry = function.entry_block(); + let entry_block = entry.borrow(); + + if let Some(func_info) = func_info { + self.collect_variables_from_block(&entry_block, func_info); + } + } + + fn collect_variables_from_block( + &mut self, + block: &midenc_hir::Block, + func_info: &mut DebugFunctionInfo, + ) { + for op in block.body() { + // Check if this is a DbgValue operation + if let Some(dbg_value) = op.downcast_ref::() + && let Some(var_info) = self.extract_variable_info(dbg_value.variable()) + { + func_info.add_variable(var_info); + } + + // Recursively process nested regions + for region_idx in 0..op.num_regions() { + let region = op.region(region_idx); + let entry = region.entry(); + self.collect_variables_from_block(&entry, func_info); + } + } + } + + fn extract_variable_info(&mut self, var: &DILocalVariableAttr) -> Option { + let name_idx = self.add_string(var.name.as_str()); + + // Add type if available + let type_idx = if let Some(ref ty) = var.ty { + self.add_type(ty) + } else { + self.add_primitive_type(DebugPrimitiveType::Felt) // Default to felt + }; + + let line = LineNumber::new(var.line).unwrap_or_default(); + let column = ColumnNumber::new(var.column.unwrap_or(1)).unwrap_or_default(); + + let mut var_info = DebugVariableInfo::new(name_idx, type_idx, line, column); + + if let Some(arg_index) = var.arg_index { + var_info = var_info.with_arg_index(arg_index); + } + + Some(var_info) + } + + /// Builds and returns the final `DebugInfoSection`. + pub fn build(self) -> DebugInfoSection { + self.section + } + + /// Returns whether any debug info has been collected. + pub fn is_empty(&self) -> bool { + self.section.is_empty() + } +} + +/// Converts an HIR Type to a DebugTypeInfo. +fn hir_type_to_debug_type(ty: &Type, builder: &mut DebugInfoBuilder) -> DebugTypeInfo { + match ty { + Type::Unknown => DebugTypeInfo::Unknown, + Type::Never => DebugTypeInfo::Primitive(DebugPrimitiveType::Void), + Type::I1 => DebugTypeInfo::Primitive(DebugPrimitiveType::Bool), + Type::I8 => DebugTypeInfo::Primitive(DebugPrimitiveType::I8), + Type::U8 => DebugTypeInfo::Primitive(DebugPrimitiveType::U8), + Type::I16 => DebugTypeInfo::Primitive(DebugPrimitiveType::I16), + Type::U16 => DebugTypeInfo::Primitive(DebugPrimitiveType::U16), + Type::I32 => DebugTypeInfo::Primitive(DebugPrimitiveType::I32), + Type::U32 => DebugTypeInfo::Primitive(DebugPrimitiveType::U32), + Type::I64 => DebugTypeInfo::Primitive(DebugPrimitiveType::I64), + Type::U64 => DebugTypeInfo::Primitive(DebugPrimitiveType::U64), + Type::I128 => DebugTypeInfo::Primitive(DebugPrimitiveType::I128), + Type::U128 => DebugTypeInfo::Primitive(DebugPrimitiveType::U128), + Type::U256 => DebugTypeInfo::Unknown, // No direct mapping for U256 + Type::F64 => DebugTypeInfo::Primitive(DebugPrimitiveType::F64), + Type::Felt => DebugTypeInfo::Primitive(DebugPrimitiveType::Felt), + Type::Ptr(ptr_type) => { + let pointee_idx = builder.add_type(ptr_type.pointee()); + DebugTypeInfo::Pointer { + pointee_type_idx: pointee_idx, + } + } + Type::Array(array_type) => { + let element_idx = builder.add_type(array_type.element_type()); + DebugTypeInfo::Array { + element_type_idx: element_idx, + count: Some(array_type.len() as u32), + } + } + // For types we don't have direct mappings for, use Unknown + Type::Struct(_) | Type::List(_) | Type::Function(_) => DebugTypeInfo::Unknown, + } +} + +/// Creates a key for type deduplication. +fn type_to_key(ty: &DebugTypeInfo) -> TypeKey { + match ty { + DebugTypeInfo::Primitive(p) => TypeKey::Primitive(*p as u8), + DebugTypeInfo::Pointer { pointee_type_idx } => TypeKey::Pointer(*pointee_type_idx), + DebugTypeInfo::Array { + element_type_idx, + count, + } => TypeKey::Array(*element_type_idx, *count), + DebugTypeInfo::Unknown => TypeKey::Unknown, + // For complex types like structs and functions, we don't deduplicate + _ => TypeKey::Unknown, + } +} + +/// Builds a `DebugInfoSection` from an HIR component if debug info is enabled. +pub fn build_debug_info_section( + component: &builtin::Component, + emit_debug_decorators: bool, +) -> Option { + if !emit_debug_decorators { + return None; + } + + let mut builder = DebugInfoBuilder::new(); + builder.collect_from_component(component); + + if builder.is_empty() { + None + } else { + Some(builder.build()) + } +} diff --git a/midenc-compile/src/lib.rs b/midenc-compile/src/lib.rs index 1512ca861..fd3a42d9f 100644 --- a/midenc-compile/src/lib.rs +++ b/midenc-compile/src/lib.rs @@ -6,6 +6,7 @@ extern crate alloc; extern crate std; mod compiler; +pub mod debug_info; mod stage; mod stages; diff --git a/midenc-compile/src/stages/assemble.rs b/midenc-compile/src/stages/assemble.rs index 5ad8489e5..d98f8d3ec 100644 --- a/midenc-compile/src/stages/assemble.rs +++ b/midenc-compile/src/stages/assemble.rs @@ -1,4 +1,4 @@ -use alloc::{string::ToString, vec, vec::Vec}; +use alloc::{string::ToString, vec::Vec}; use miden_assembly::ast::QualifiedProcedureName; use miden_mast_package::{Dependency, MastArtifact, Package, PackageExport}; @@ -90,16 +90,26 @@ fn build_package(mast: MastArtifact, outputs: &CodegenOutput, session: &Session) miden_mast_package::PackageManifest::new(exports).with_dependencies(dependencies); let account_component_metadata_bytes = outputs.account_component_metadata_bytes.clone(); + let debug_info_bytes = outputs.debug_info_bytes.clone(); - let sections = match account_component_metadata_bytes { - Some(bytes) => { - vec![miden_mast_package::Section::new( - miden_mast_package::SectionId::ACCOUNT_COMPONENT_METADATA, - bytes, - )] - } - None => vec![], - }; + let mut sections = Vec::new(); + + // Add account component metadata section if present + if let Some(bytes) = account_component_metadata_bytes { + sections.push(miden_mast_package::Section::new( + miden_mast_package::SectionId::ACCOUNT_COMPONENT_METADATA, + bytes, + )); + } + + // Add debug info section if present + if let Some(bytes) = debug_info_bytes { + log::debug!("adding .debug_info section to package ({} bytes)", bytes.len()); + sections.push(miden_mast_package::Section::new( + miden_mast_package::SectionId::DEBUG_INFO, + bytes, + )); + } miden_mast_package::Package { name, diff --git a/midenc-compile/src/stages/codegen.rs b/midenc-compile/src/stages/codegen.rs index 382200fb9..4e03029ec 100644 --- a/midenc-compile/src/stages/codegen.rs +++ b/midenc-compile/src/stages/codegen.rs @@ -20,6 +20,8 @@ pub struct CodegenOutput { pub link_packages: BTreeMap>, /// The serialized AccountComponentMetadata (name, description, storage layout, etc.) pub account_component_metadata_bytes: Option>, + /// The serialized DebugInfoSection for the .debug_info custom section + pub debug_info_bytes: Option>, } /// Perform code generation on the possibly-linked output of previous stages @@ -73,11 +75,29 @@ impl Stage for CodegenStage { session.emit(OutputMode::Text, masm_component.as_ref()).into_diagnostic()?; } + // Build debug info section if debug decorators are enabled + let debug_info_bytes = if session.options.emit_debug_decorators() { + use miden_assembly::utils::Serializable; + + log::debug!("collecting debug info for .debug_info section"); + let debug_section = + crate::debug_info::build_debug_info_section(&component.borrow(), true); + debug_section.map(|section| { + let mut bytes = alloc::vec::Vec::new(); + section.write_into(&mut bytes); + log::debug!("built debug_info section: {} bytes", bytes.len()); + bytes + }) + } else { + None + }; + Ok(CodegenOutput { component: Arc::from(masm_component), link_libraries, link_packages, account_component_metadata_bytes: linker_output.account_component_metadata_bytes, + debug_info_bytes, }) } } diff --git a/midenc-compile/src/stages/rewrite.rs b/midenc-compile/src/stages/rewrite.rs index 2456f1ee7..f2a029a49 100644 --- a/midenc-compile/src/stages/rewrite.rs +++ b/midenc-compile/src/stages/rewrite.rs @@ -7,7 +7,7 @@ use midenc_hir::{ pass::{IRPrintingConfig, Nesting, PassManager}, patterns::{GreedyRewriteConfig, RegionSimplificationLevel}, }; -use midenc_hir_transform::{Canonicalizer, ControlFlowSink, SinkOperandDefs}; +use midenc_hir_transform::{Canonicalizer, ControlFlowSink, RemoveDeadDebugOps, SinkOperandDefs}; use super::*; @@ -69,6 +69,8 @@ impl Stage for ApplyRewritesStage { func_pm.add_pass(Box::new(SinkOperandDefs)); func_pm.add_pass(Box::new(ControlFlowSink)); func_pm.add_pass(Box::new(TransformSpills)); + // Remove debug ops whose operands are dead to prevent codegen issues + func_pm.add_pass(Box::new(RemoveDeadDebugOps)); } // Function passes for component-level functions { @@ -80,6 +82,8 @@ impl Stage for ApplyRewritesStage { func_pm.add_pass(Box::new(SinkOperandDefs)); func_pm.add_pass(Box::new(ControlFlowSink)); func_pm.add_pass(Box::new(TransformSpills)); + // Remove debug ops whose operands are dead to prevent codegen issues + func_pm.add_pass(Box::new(RemoveDeadDebugOps)); } } @@ -91,6 +95,18 @@ impl Stage for ApplyRewritesStage { log::trace!(target: "driver", "after rewrites: {}", input.world.borrow().as_operation()); log::debug!(target: "driver", "rewrites successful"); + // Emit HIR if requested + let session = context.session(); + if session.should_emit(midenc_session::OutputType::Hir) { + log::debug!(target: "driver", "emitting HIR component"); + session + .emit(midenc_session::OutputMode::Text, &*input.component.borrow()) + .into_diagnostic()?; + log::debug!(target: "driver", "HIR component emitted successfully"); + } else { + log::debug!(target: "driver", "HIR emission not requested"); + } + if context.session().rewrite_only() { log::debug!(target: "driver", "stopping compiler early (rewrite-only=true)"); Err(CompilerStopped.into()) diff --git a/tests/integration/expected/debug_variable_locations.hir b/tests/integration/expected/debug_variable_locations.hir new file mode 100644 index 000000000..7c01fbfa4 --- /dev/null +++ b/tests/integration/expected/debug_variable_locations.hir @@ -0,0 +1,47 @@ +builtin.component root_ns:root@1.0.0 { + builtin.module public @test_rust_abc71f0181dc4e2804cd5c8e0cf50175a2515fac3096a395915f9b0604eb287b { + public builtin.function @entrypoint(v0: i32) -> i32 { + ^block6(v0: i32): + v2 = arith.constant 0 : i32; + v3 = arith.constant 0 : i32; + builtin.dbg_value v3 #[expression = di.expression()] #[variable = di.local_variable(name = local1, file = unknown, line = 0, ty = i32)]; + v4 = arith.constant 0 : i32; + builtin.dbg_value v4 #[expression = di.expression()] #[variable = di.local_variable(name = local2, file = unknown, line = 0, ty = i32)]; + cf.br ^block8(v3, v0, v4); + ^block7(v1: i32): + + ^block8(v6: i32, v7: i32, v15: i32): + v8 = hir.bitcast v6 : u32; + v9 = hir.bitcast v7 : u32; + v10 = arith.lte v8, v9 : i1; + v11 = arith.zext v10 : u32; + v12 = hir.bitcast v11 : i32; + v13 = arith.constant 0 : i32; + v14 = arith.neq v12, v13 : i1; + cf.cond_br v14 ^block10, ^block11; + ^block9(v5: i32): + + ^block10: + v16 = arith.add v15, v6 : i32 #[overflow = wrapping]; + builtin.dbg_value v16 #[expression = di.expression()] #[variable = di.local_variable(name = local2, file = unknown, line = 0, ty = i32)]; + v17 = arith.constant 1 : i32; + v18 = arith.add v6, v17 : i32 #[overflow = wrapping]; + builtin.dbg_value v18 #[expression = di.expression()] #[variable = di.local_variable(name = local1, file = unknown, line = 0, ty = i32)]; + cf.br ^block8(v18, v7, v16); + ^block11: + builtin.ret v15; + }; + + builtin.global_variable private @#__stack_pointer : i32 { + builtin.ret_imm 1048576; + }; + + builtin.global_variable public @#gv1 : i32 { + builtin.ret_imm 1048576; + }; + + builtin.global_variable public @#gv2 : i32 { + builtin.ret_imm 1048576; + }; + }; +}; \ No newline at end of file diff --git a/tests/integration/src/rust_masm_tests/debug.rs b/tests/integration/src/rust_masm_tests/debug.rs new file mode 100644 index 000000000..4676a3d07 --- /dev/null +++ b/tests/integration/src/rust_masm_tests/debug.rs @@ -0,0 +1,27 @@ +use std::borrow::Cow; + +use midenc_expect_test::expect_file; + +use crate::{CompilerTestBuilder, testing::setup}; + +#[test] +fn variable_locations_schedule() { + setup::enable_compiler_instrumentation(); + + let source = r#" + (n: u32) -> u32 { + let mut sum = 0u32; + let mut i = 0u32; + while i <= n { + sum += i; + i += 1; + } + sum + } + "#; + + let mut builder = CompilerTestBuilder::rust_fn_body(source, []); + builder.with_rustflags([Cow::Borrowed("-C"), Cow::Borrowed("debuginfo=2")]); + let mut test = builder.build(); + test.expect_ir_unoptimized(expect_file!["../../expected/debug_variable_locations.hir"]); +} diff --git a/tests/integration/src/rust_masm_tests/mod.rs b/tests/integration/src/rust_masm_tests/mod.rs index 649e4b58c..062479bb7 100644 --- a/tests/integration/src/rust_masm_tests/mod.rs +++ b/tests/integration/src/rust_masm_tests/mod.rs @@ -12,6 +12,7 @@ use crate::testing::eval_package; mod abi_transform; mod apps; +mod debug; mod debug_source_locations; mod examples; mod instructions; diff --git a/tests/lit/debug/function_metadata.rs b/tests/lit/debug/function_metadata.rs new file mode 100644 index 000000000..7bf99b7dc --- /dev/null +++ b/tests/lit/debug/function_metadata.rs @@ -0,0 +1,14 @@ +#![no_std] +#![no_main] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn multiply(x: u32, y: u32) -> u32 { + x * y +} diff --git a/tests/lit/debug/function_metadata.shtest b/tests/lit/debug/function_metadata.shtest new file mode 100644 index 000000000..23434d6fc --- /dev/null +++ b/tests/lit/debug/function_metadata.shtest @@ -0,0 +1,6 @@ +# Test that HIR includes source locations for function parameters +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/function_metadata.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/function_metadata.wasm\" && bin/midenc \"\$TMPDIR/function_metadata.wasm\" --entrypoint=multiply -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Check that function has source location annotations +# CHECK-LABEL: builtin.function @multiply +# CHECK: #loc("{{.*}}function_metadata.rs":{{[0-9]+}} diff --git a/tests/lit/debug/lit.suite.toml b/tests/lit/debug/lit.suite.toml new file mode 100644 index 000000000..f95cc52c0 --- /dev/null +++ b/tests/lit/debug/lit.suite.toml @@ -0,0 +1,5 @@ +name = "debug" +patterns = ["*.shtest"] +working_dir = "../../../" + +[format.shtest] diff --git a/tests/lit/debug/location_expressions.rs b/tests/lit/debug/location_expressions.rs new file mode 100644 index 000000000..c899970e3 --- /dev/null +++ b/tests/lit/debug/location_expressions.rs @@ -0,0 +1,25 @@ +// Test file to verify location expressions in debug info +// Using no_std to avoid runtime overhead + +#![no_std] +#![no_main] + +#[panic_handler] +fn panic(_: &core::panic::PanicInfo) -> ! { + loop {} +} + +#[no_mangle] +pub extern "C" fn test_expressions(p0: i32, p1: i32, p2: i32, p3: i32) -> i32 { + // These parameters should be in WASM locals 0, 1, 2, 3 + // The debug info expressions should show: + // p0 -> DW_OP_WASM_local 0 + // p1 -> DW_OP_WASM_local 1 + // p2 -> DW_OP_WASM_local 2 + // p3 -> DW_OP_WASM_local 3 + + // Simple arithmetic using all parameters + let sum1 = p0.wrapping_add(p1); + let sum2 = p2.wrapping_add(p3); + sum1.wrapping_add(sum2) +} \ No newline at end of file diff --git a/tests/lit/debug/location_expressions.shtest b/tests/lit/debug/location_expressions.shtest new file mode 100644 index 000000000..867d6e1b4 --- /dev/null +++ b/tests/lit/debug/location_expressions.shtest @@ -0,0 +1,9 @@ +# Test that debug info with source locations is properly represented in HIR +# This test verifies that operations include source location annotations +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/location_expressions.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/location_expressions.wasm\" && bin/midenc \"\$TMPDIR/location_expressions.wasm\" --entrypoint=test_expressions -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Test that the function exists with 4 parameters +# CHECK-LABEL: builtin.function @test_expressions({{.*}}: i32, {{.*}}: i32, {{.*}}: i32, {{.*}}: i32) -> i32 + +# Test that operations have source location annotations +# CHECK: #loc("{{.*}}location_expressions.rs":{{[0-9]+}} diff --git a/tests/lit/debug/simple_debug.rs b/tests/lit/debug/simple_debug.rs new file mode 100644 index 000000000..342241ecb --- /dev/null +++ b/tests/lit/debug/simple_debug.rs @@ -0,0 +1,14 @@ +#![no_std] +#![no_main] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn add(a: u32, b: u32) -> u32 { + a + b +} diff --git a/tests/lit/debug/simple_debug.shtest b/tests/lit/debug/simple_debug.shtest new file mode 100644 index 000000000..4df8ec78f --- /dev/null +++ b/tests/lit/debug/simple_debug.shtest @@ -0,0 +1,6 @@ +# Test that basic debug info source locations are emitted for a simple function +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/simple_debug.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/simple_debug.wasm\" && bin/midenc \"\$TMPDIR/simple_debug.wasm\" --entrypoint=add -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Check that function has source location annotations +# CHECK-LABEL: builtin.function @add +# CHECK: #loc("{{.*}}simple_debug.rs":{{[0-9]+}} diff --git a/tests/lit/debug/variable_locations.rs b/tests/lit/debug/variable_locations.rs new file mode 100644 index 000000000..623d268de --- /dev/null +++ b/tests/lit/debug/variable_locations.rs @@ -0,0 +1,21 @@ +#![no_std] +#![no_main] +#![allow(unused_unsafe)] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn entrypoint(n: u32) -> u32 { + let mut sum = 0u32; + let mut i = 0u32; + while i <= n { + sum = sum + i; + i = i + 1; + } + sum +} diff --git a/tests/lit/debug/variable_locations.shtest b/tests/lit/debug/variable_locations.shtest new file mode 100644 index 000000000..5e6861546 --- /dev/null +++ b/tests/lit/debug/variable_locations.shtest @@ -0,0 +1,6 @@ +# Test that debug info tracks source locations in a loop +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/variable_locations.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/variable_locations.wasm\" && bin/midenc \"\$TMPDIR/variable_locations.wasm\" --entrypoint=entrypoint -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Check that function has source location annotations +# CHECK-LABEL: builtin.function @entrypoint +# CHECK: #loc("{{.*}}variable_locations.rs":{{[0-9]+}} diff --git a/tests/lit/debugdump/lit.suite.toml b/tests/lit/debugdump/lit.suite.toml new file mode 100644 index 000000000..162db014a --- /dev/null +++ b/tests/lit/debugdump/lit.suite.toml @@ -0,0 +1,5 @@ +name = "debugdump" +patterns = ["*.wat"] +working_dir = "../../../" + +[format.shtest] diff --git a/tests/lit/debugdump/locations-source-loc.wat b/tests/lit/debugdump/locations-source-loc.wat new file mode 100644 index 000000000..9b9441af3 --- /dev/null +++ b/tests/lit/debugdump/locations-source-loc.wat @@ -0,0 +1,25 @@ +;; Test that .debug_loc section shows DebugVar decorators with source locations +;; from a real Rust project compiled with debug info. +;; +;; RUN: cargo build --release --target wasm32-unknown-unknown --manifest-path tests/lit/source-location/test-project/Cargo.toml 2>&1 +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc 'tests/lit/source-location/test-project/target/wasm32-unknown-unknown/release/source_location_test.wasm' --lib --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s + +;; Check header +;; CHECK: .debug_loc contents (DebugVar decorators from MAST): +;; CHECK: Total DebugVar decorators: 3 +;; CHECK: Unique variable names: 3 + +;; Check variable "arg0" - parameter from test_assertion function +;; CHECK: Variable: "arg0" +;; CHECK: 1 location entries: +;; CHECK: local[0] (param #2) + +;; Check variable "local3" - from panic handler +;; CHECK: Variable: "local3" +;; CHECK: 1 location entries: +;; CHECK: stack[0] + +;; Check variable "x" - parameter from entrypoint function +;; CHECK: Variable: "x" +;; CHECK: 1 location entries: +;; CHECK: local[0] (param #2) diff --git a/tests/lit/debugdump/locations.wat b/tests/lit/debugdump/locations.wat new file mode 100644 index 000000000..aead42315 --- /dev/null +++ b/tests/lit/debugdump/locations.wat @@ -0,0 +1,22 @@ +;; Test that .debug_loc section is present and handles empty case +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc '%s' --exe --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s + +;; Check header for .debug_loc section +;; CHECK: .debug_loc contents (DebugVar decorators from MAST): +;; For raw WAT files without debug info, we expect no decorators +;; CHECK: (no DebugVar decorators found) + +(module + (func $add (export "add") (param i32 i32) (result i32) + local.get 0 + local.get 1 + i32.add + ) + + (func $entrypoint (export "entrypoint") + i32.const 5 + i32.const 3 + call $add + drop + ) +) diff --git a/tests/lit/debugdump/simple.wat b/tests/lit/debugdump/simple.wat new file mode 100644 index 000000000..3d9312e19 --- /dev/null +++ b/tests/lit/debugdump/simple.wat @@ -0,0 +1,32 @@ +;; Test that miden-debugdump correctly parses and displays debug info from a .masp file +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc '%s' --exe --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\"" | filecheck %s + +;; Check header +;; CHECK: DEBUG INFO DUMP: +;; CHECK: Debug info version: 1 + +;; Check summary section is present +;; CHECK: .debug_info summary: +;; CHECK: Strings: +;; CHECK: Types: +;; CHECK: Files: +;; CHECK: Functions: + +;; Check that we have functions from the WAT +;; CHECK: .debug_functions contents: +;; CHECK: FUNCTION: add +;; CHECK: FUNCTION: multiply + +(module + (func $add (export "add") (param $a i32) (param $b i32) (result i32) + local.get $a + local.get $b + i32.add + ) + + (func $multiply (export "multiply") (param $x i32) (param $y i32) (result i32) + local.get $x + local.get $y + i32.mul + ) +) diff --git a/tests/lit/debugdump/summary.wat b/tests/lit/debugdump/summary.wat new file mode 100644 index 000000000..a127c8d07 --- /dev/null +++ b/tests/lit/debugdump/summary.wat @@ -0,0 +1,21 @@ +;; Test that miden-debugdump --summary shows only summary output +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc '%s' --exe --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\" --summary" | filecheck %s + +;; Check summary is present +;; CHECK: .debug_info summary: +;; CHECK: Strings:{{.*}}entries +;; CHECK: Types:{{.*}}entries +;; CHECK: Files:{{.*}}entries +;; CHECK: Functions:{{.*}}entries + +;; Make sure full dump sections are NOT present with --summary +;; CHECK-NOT: .debug_str contents: +;; CHECK-NOT: .debug_types contents: +;; CHECK-NOT: .debug_files contents: +;; CHECK-NOT: .debug_functions contents: + +(module + (func $test (export "test") (param i32) (result i32) + local.get 0 + ) +) diff --git a/tests/lit/lit.cfg.py b/tests/lit/lit.cfg.py new file mode 100644 index 000000000..75043fcf0 --- /dev/null +++ b/tests/lit/lit.cfg.py @@ -0,0 +1,51 @@ +import os +import sys +import shlex + +from lit.formats import ShTest +import lit.util + +config.name = "miden-lit" +config.test_format = ShTest() +config.suffixes = [".shtest", ".hir", ".wat"] + +source_root = os.path.dirname(__file__) +repo_root = os.path.abspath(os.path.join(source_root, os.pardir, os.pardir)) +config.test_source_root = source_root +config.test_exec_root = repo_root +bin_dir = os.path.join(repo_root, "bin") +config.environment["PATH"] = bin_dir + os.pathsep + config.environment.get("PATH", "") +# Use cargo run to ensure proper runtime environment +# Redirect cargo's stderr to suppress build warnings, but keep midenc's stderr +midenc_cmd = f"cargo run --manifest-path {shlex.quote(os.path.join(repo_root, 'Cargo.toml'))} --bin midenc 2>/dev/null --" +config.substitutions.append(("%midenc", midenc_cmd)) + +# Try to find FileCheck in common locations +filecheck = ( + lit.util.which("FileCheck") + or lit.util.which("filecheck") + or lit.util.which("llvm-filecheck") +) + +# Check homebrew LLVM locations if not found +if not filecheck: + homebrew_paths = [ + "/opt/homebrew/opt/llvm@20/bin/FileCheck", + "/opt/homebrew/opt/llvm/bin/FileCheck", + "/usr/local/opt/llvm/bin/FileCheck", + ] + for path in homebrew_paths: + if os.path.exists(path): + filecheck = path + break + +# Fall back to simple_filecheck.py only if system FileCheck not found +if not filecheck: + script = os.path.join(source_root, 'tools', 'simple_filecheck.py') + filecheck = f"{shlex.quote(sys.executable)} {shlex.quote(script)}" + +config.substitutions.append(("%filecheck", filecheck)) + +config.substitutions.append(("%S", source_root)) + +config.environment.setdefault("RUSTFLAGS", "") diff --git a/tests/lit/variable_locations.rs b/tests/lit/variable_locations.rs new file mode 100644 index 000000000..623d268de --- /dev/null +++ b/tests/lit/variable_locations.rs @@ -0,0 +1,21 @@ +#![no_std] +#![no_main] +#![allow(unused_unsafe)] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn entrypoint(n: u32) -> u32 { + let mut sum = 0u32; + let mut i = 0u32; + while i <= n { + sum = sum + i; + i = i + 1; + } + sum +} diff --git a/tools/debugdump/Cargo.toml b/tools/debugdump/Cargo.toml new file mode 100644 index 000000000..930d9812a --- /dev/null +++ b/tools/debugdump/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "miden-debugdump" +version.workspace = true +rust-version.workspace = true +authors.workspace = true +description = "A tool to dump debug information from MASP packages" +repository.workspace = true +homepage.workspace = true +documentation.workspace = true +categories = ["development-tools", "command-line-utilities"] +keywords = ["debug", "miden", "dwarfdump"] +license.workspace = true +readme.workspace = true +edition.workspace = true +publish.workspace = true + +[[bin]] +name = "miden-debugdump" +path = "src/main.rs" + +[dependencies] +miden-mast-package.workspace = true +miden-core.workspace = true +clap.workspace = true +thiserror.workspace = true diff --git a/tools/debugdump/src/main.rs b/tools/debugdump/src/main.rs new file mode 100644 index 000000000..b9df9be93 --- /dev/null +++ b/tools/debugdump/src/main.rs @@ -0,0 +1,638 @@ +//! miden-debugdump - A tool to dump debug information from MASP packages +//! +//! Similar to llvm-dwarfdump, this tool parses the `.debug_info` section +//! from compiled MASP packages and displays the debug metadata in a +//! human-readable format. + +use std::{ + collections::BTreeMap, + fs::File, + io::{BufReader, Read}, + path::PathBuf, +}; + +use clap::{Parser, ValueEnum}; +use miden_core::{ + Decorator, + utils::{Deserializable, SliceReader}, +}; +use miden_mast_package::{ + MastForest, Package, SectionId, + debug_info::{ + DebugFileInfo, DebugFunctionInfo, DebugInfoSection, DebugPrimitiveType, DebugTypeInfo, + DebugVariableInfo, + }, +}; + +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("failed to read file: {0}")] + Io(#[from] std::io::Error), + #[error("failed to parse package: {0}")] + Parse(String), + #[error("no debug_info section found in package")] + NoDebugInfo, +} + +/// A tool to dump debug information from MASP packages +#[derive(Parser, Debug)] +#[command( + name = "miden-debugdump", + about = "Dump debug information from MASP packages (similar to llvm-dwarfdump)", + version, + rename_all = "kebab-case" +)] +struct Cli { + /// Input MASP file to analyze + #[arg(required = true)] + input: PathBuf, + + /// Filter output to specific section + #[arg(short, long, value_enum)] + section: Option, + + /// Show all available information (verbose) + #[arg(short, long)] + verbose: bool, + + /// Show raw indices instead of resolved names + #[arg(long)] + raw: bool, + + /// Only show summary statistics + #[arg(long)] + summary: bool, +} + +#[derive(Debug, Clone, Copy, ValueEnum)] +enum DumpSection { + /// Show string table + Strings, + /// Show type information + Types, + /// Show source file information + Files, + /// Show function debug information + Functions, + /// Show variable information within functions + Variables, + /// Show variable location decorators from MAST (similar to DWARF .debug_loc) + Locations, +} + +fn main() { + if let Err(e) = run() { + eprintln!("error: {e}"); + std::process::exit(1); + } +} + +fn run() -> Result<(), Error> { + let cli = Cli::parse(); + + // Read the MASP file + let file = File::open(&cli.input)?; + let mut reader = BufReader::new(file); + let mut bytes = Vec::new(); + reader.read_to_end(&mut bytes)?; + + // Parse the package + let package = Package::read_from(&mut SliceReader::new(&bytes)) + .map_err(|e| Error::Parse(e.to_string()))?; + + // Get the MAST forest for location decorators + let mast_forest = package.mast.mast_forest(); + + // Find the debug_info section + let debug_section = package + .sections + .iter() + .find(|s| s.id == SectionId::DEBUG_INFO) + .ok_or(Error::NoDebugInfo)?; + + // Parse the debug info + let debug_info = DebugInfoSection::read_from(&mut SliceReader::new(&debug_section.data)) + .map_err(|e| Error::Parse(e.to_string()))?; + + // Print header + println!("{}", "=".repeat(80)); + println!("DEBUG INFO DUMP: {}", cli.input.display()); + println!( + "Package: {} (version: {})", + package.name, + package + .version + .as_ref() + .map(|v| v.to_string()) + .unwrap_or_else(|| "unknown".into()) + ); + println!("Debug info version: {}", debug_info.version); + println!("{}", "=".repeat(80)); + println!(); + + if cli.summary { + print_summary(&debug_info, mast_forest); + return Ok(()); + } + + match cli.section { + Some(DumpSection::Strings) => print_strings(&debug_info), + Some(DumpSection::Types) => print_types(&debug_info, cli.raw), + Some(DumpSection::Files) => print_files(&debug_info, cli.raw), + Some(DumpSection::Functions) => print_functions(&debug_info, cli.raw, cli.verbose), + Some(DumpSection::Variables) => print_variables(&debug_info, cli.raw), + Some(DumpSection::Locations) => print_locations(mast_forest, &debug_info, cli.verbose), + None => { + // Print everything + print_summary(&debug_info, mast_forest); + println!(); + print_strings(&debug_info); + println!(); + print_types(&debug_info, cli.raw); + println!(); + print_files(&debug_info, cli.raw); + println!(); + print_functions(&debug_info, cli.raw, cli.verbose); + println!(); + print_locations(mast_forest, &debug_info, cli.verbose); + } + } + + Ok(()) +} + +fn print_summary(debug_info: &DebugInfoSection, mast_forest: &MastForest) { + println!(".debug_info summary:"); + println!(" Strings: {} entries", debug_info.strings.len()); + println!(" Types: {} entries", debug_info.types.len()); + println!(" Files: {} entries", debug_info.files.len()); + println!(" Functions: {} entries", debug_info.functions.len()); + + let total_vars: usize = debug_info.functions.iter().map(|f| f.variables.len()).sum(); + let total_inlined: usize = debug_info.functions.iter().map(|f| f.inlined_calls.len()).sum(); + println!(" Variables: {} total (across all functions)", total_vars); + println!(" Inlined: {} call sites", total_inlined); + + // Count DebugVar decorators in MAST + let debug_var_count = mast_forest + .decorators() + .iter() + .filter(|d| matches!(d, Decorator::DebugVar(_))) + .count(); + println!(" DebugVar decorators: {} in MAST", debug_var_count); +} + +fn print_strings(debug_info: &DebugInfoSection) { + println!(".debug_str contents:"); + println!("{:-<80}", ""); + for (idx, s) in debug_info.strings.iter().enumerate() { + println!(" [{:4}] \"{}\"", idx, s); + } +} + +fn print_types(debug_info: &DebugInfoSection, raw: bool) { + println!(".debug_types contents:"); + println!("{:-<80}", ""); + for (idx, ty) in debug_info.types.iter().enumerate() { + print!(" [{:4}] ", idx); + print_type(ty, debug_info, raw, 0); + println!(); + } +} + +fn print_type(ty: &DebugTypeInfo, debug_info: &DebugInfoSection, raw: bool, indent: usize) { + let pad = " ".repeat(indent); + match ty { + DebugTypeInfo::Primitive(prim) => { + print!("{}PRIMITIVE: {}", pad, primitive_name(*prim)); + print!(" (size: {} bytes, {} felts)", prim.size_in_bytes(), prim.size_in_felts()); + } + DebugTypeInfo::Pointer { pointee_type_idx } => { + if raw { + print!("{}POINTER -> type[{}]", pad, pointee_type_idx); + } else { + print!("{}POINTER -> ", pad); + if let Some(pointee) = debug_info.get_type(*pointee_type_idx) { + print_type_brief(pointee, debug_info); + } else { + print!("", pointee_type_idx); + } + } + } + DebugTypeInfo::Array { + element_type_idx, + count, + } => { + if raw { + print!("{}ARRAY [{}; {:?}]", pad, element_type_idx, count); + } else { + print!("{}ARRAY [", pad); + if let Some(elem) = debug_info.get_type(*element_type_idx) { + print_type_brief(elem, debug_info); + } else { + print!(""); + } + match count { + Some(n) => print!("; {}]", n), + None => print!("; ?]"), + } + } + } + DebugTypeInfo::Struct { + name_idx, + size, + fields, + } => { + let name = if raw { + format!("str[{}]", name_idx) + } else { + debug_info.get_string(*name_idx).unwrap_or("").to_string() + }; + print!("{}STRUCT {} (size: {} bytes)", pad, name, size); + if !fields.is_empty() { + println!(); + for field in fields { + let field_name = if raw { + format!("str[{}]", field.name_idx) + } else { + debug_info.get_string(field.name_idx).unwrap_or("").to_string() + }; + print!("{} +{:4}: {} : ", pad, field.offset, field_name); + if let Some(fty) = debug_info.get_type(field.type_idx) { + print_type_brief(fty, debug_info); + } else { + print!(""); + } + println!(); + } + } + } + DebugTypeInfo::Function { + return_type_idx, + param_type_indices, + } => { + print!("{}FUNCTION (", pad); + for (i, param_idx) in param_type_indices.iter().enumerate() { + if i > 0 { + print!(", "); + } + if raw { + print!("type[{}]", param_idx); + } else if let Some(pty) = debug_info.get_type(*param_idx) { + print_type_brief(pty, debug_info); + } else { + print!(""); + } + } + print!(") -> "); + match return_type_idx { + Some(idx) => { + if raw { + print!("type[{}]", idx); + } else if let Some(rty) = debug_info.get_type(*idx) { + print_type_brief(rty, debug_info); + } else { + print!(""); + } + } + None => print!("void"), + } + } + DebugTypeInfo::Unknown => { + print!("{}UNKNOWN", pad); + } + } +} + +fn print_type_brief(ty: &DebugTypeInfo, debug_info: &DebugInfoSection) { + match ty { + DebugTypeInfo::Primitive(prim) => print!("{}", primitive_name(*prim)), + DebugTypeInfo::Pointer { pointee_type_idx } => { + print!("*"); + if let Some(p) = debug_info.get_type(*pointee_type_idx) { + print_type_brief(p, debug_info); + } + } + DebugTypeInfo::Array { + element_type_idx, + count, + } => { + print!("["); + if let Some(e) = debug_info.get_type(*element_type_idx) { + print_type_brief(e, debug_info); + } + match count { + Some(n) => print!("; {}]", n), + None => print!("]"), + } + } + DebugTypeInfo::Struct { name_idx, .. } => { + print!("struct {}", debug_info.get_string(*name_idx).unwrap_or("?")); + } + DebugTypeInfo::Function { .. } => print!("fn(...)"), + DebugTypeInfo::Unknown => print!("?"), + } +} + +fn primitive_name(prim: DebugPrimitiveType) -> &'static str { + match prim { + DebugPrimitiveType::Void => "void", + DebugPrimitiveType::Bool => "bool", + DebugPrimitiveType::I8 => "i8", + DebugPrimitiveType::U8 => "u8", + DebugPrimitiveType::I16 => "i16", + DebugPrimitiveType::U16 => "u16", + DebugPrimitiveType::I32 => "i32", + DebugPrimitiveType::U32 => "u32", + DebugPrimitiveType::I64 => "i64", + DebugPrimitiveType::U64 => "u64", + DebugPrimitiveType::I128 => "i128", + DebugPrimitiveType::U128 => "u128", + DebugPrimitiveType::F32 => "f32", + DebugPrimitiveType::F64 => "f64", + DebugPrimitiveType::Felt => "felt", + DebugPrimitiveType::Word => "word", + } +} + +fn print_files(debug_info: &DebugInfoSection, raw: bool) { + println!(".debug_files contents:"); + println!("{:-<80}", ""); + for (idx, file) in debug_info.files.iter().enumerate() { + print_file(idx, file, debug_info, raw); + } +} + +fn print_file(idx: usize, file: &DebugFileInfo, debug_info: &DebugInfoSection, raw: bool) { + let path = if raw { + format!("str[{}]", file.path_idx) + } else { + debug_info.get_string(file.path_idx).unwrap_or("").to_string() + }; + + print!(" [{:4}] {}", idx, path); + + if let Some(dir_idx) = file.directory_idx { + let dir = if raw { + format!("str[{}]", dir_idx) + } else { + debug_info.get_string(dir_idx).unwrap_or("").to_string() + }; + print!(" (dir: {})", dir); + } + + if let Some(checksum) = &file.checksum { + print!(" [checksum: "); + for byte in &checksum[..4] { + print!("{:02x}", byte); + } + print!("...]"); + } + + println!(); +} + +fn print_functions(debug_info: &DebugInfoSection, raw: bool, verbose: bool) { + println!(".debug_functions contents:"); + println!("{:-<80}", ""); + for (idx, func) in debug_info.functions.iter().enumerate() { + print_function(idx, func, debug_info, raw, verbose); + println!(); + } +} + +fn print_function( + idx: usize, + func: &DebugFunctionInfo, + debug_info: &DebugInfoSection, + raw: bool, + verbose: bool, +) { + let name = if raw { + format!("str[{}]", func.name_idx) + } else { + debug_info.get_string(func.name_idx).unwrap_or("").to_string() + }; + + println!(" [{:4}] FUNCTION: {}", idx, name); + + // Linkage name + if let Some(linkage_idx) = func.linkage_name_idx { + let linkage = if raw { + format!("str[{}]", linkage_idx) + } else { + debug_info.get_string(linkage_idx).unwrap_or("").to_string() + }; + println!(" Linkage name: {}", linkage); + } + + // Location + let file_path = if raw { + format!("file[{}]", func.file_idx) + } else { + debug_info + .get_file(func.file_idx) + .and_then(|f| debug_info.get_string(f.path_idx)) + .unwrap_or("") + .to_string() + }; + println!(" Location: {}:{}:{}", file_path, func.line, func.column); + + // Type + if let Some(type_idx) = func.type_idx { + print!(" Type: "); + if raw { + println!("type[{}]", type_idx); + } else if let Some(ty) = debug_info.get_type(type_idx) { + print_type_brief(ty, debug_info); + println!(); + } else { + println!(""); + } + } + + // MAST root + if let Some(root) = &func.mast_root { + print!(" MAST root: 0x"); + for byte in root { + print!("{:02x}", byte); + } + println!(); + } + + // Variables + if !func.variables.is_empty() { + println!(" Variables ({}):", func.variables.len()); + for var in &func.variables { + print_variable(var, debug_info, raw, verbose); + } + } + + // Inlined calls + if !func.inlined_calls.is_empty() && verbose { + println!(" Inlined calls ({}):", func.inlined_calls.len()); + for call in &func.inlined_calls { + let callee = if raw { + format!("func[{}]", call.callee_idx) + } else { + debug_info + .functions + .get(call.callee_idx as usize) + .and_then(|f| debug_info.get_string(f.name_idx)) + .unwrap_or("") + .to_string() + }; + let call_file = if raw { + format!("file[{}]", call.file_idx) + } else { + debug_info + .get_file(call.file_idx) + .and_then(|f| debug_info.get_string(f.path_idx)) + .unwrap_or("") + .to_string() + }; + println!( + " - {} inlined at {}:{}:{}", + callee, call_file, call.line, call.column + ); + } + } +} + +fn print_variable( + var: &DebugVariableInfo, + debug_info: &DebugInfoSection, + raw: bool, + _verbose: bool, +) { + let name = if raw { + format!("str[{}]", var.name_idx) + } else { + debug_info.get_string(var.name_idx).unwrap_or("").to_string() + }; + + let kind = if var.is_parameter() { + format!("param #{}", var.arg_index) + } else { + "local".to_string() + }; + + print!(" - {} ({}): ", name, kind); + + if raw { + print!("type[{}]", var.type_idx); + } else if let Some(ty) = debug_info.get_type(var.type_idx) { + print_type_brief(ty, debug_info); + } else { + print!(""); + } + + print!(" @ {}:{}", var.line, var.column); + + if var.scope_depth > 0 { + print!(" [scope depth: {}]", var.scope_depth); + } + + println!(); +} + +fn print_variables(debug_info: &DebugInfoSection, raw: bool) { + println!(".debug_variables contents (all functions):"); + println!("{:-<80}", ""); + + for func in &debug_info.functions { + if func.variables.is_empty() { + continue; + } + + let func_name = debug_info.get_string(func.name_idx).unwrap_or(""); + println!(" Function: {}", func_name); + + for var in &func.variables { + print_variable(var, debug_info, raw, false); + } + println!(); + } +} + +/// Prints the .debug_loc section - variable location decorators from MAST +/// +/// This is analogous to DWARF's .debug_loc section which contains location +/// lists describing where a variable's value can be found at runtime. +fn print_locations(mast_forest: &MastForest, debug_info: &DebugInfoSection, verbose: bool) { + println!(".debug_loc contents (DebugVar decorators from MAST):"); + println!("{:-<80}", ""); + + // Collect all DebugVar decorators + let debug_vars: Vec<_> = mast_forest + .decorators() + .iter() + .enumerate() + .filter_map(|(idx, dec)| { + if let Decorator::DebugVar(info) = dec { + Some((idx, info)) + } else { + None + } + }) + .collect(); + + if debug_vars.is_empty() { + println!(" (no DebugVar decorators found)"); + return; + } + + // Group by variable name for a cleaner view + let mut by_name: BTreeMap<&str, Vec<(usize, &miden_core::DebugVarInfo)>> = BTreeMap::new(); + for (idx, info) in &debug_vars { + by_name.entry(info.name()).or_default().push((*idx, *info)); + } + + println!(" Total DebugVar decorators: {}", debug_vars.len()); + println!(" Unique variable names: {}", by_name.len()); + println!(); + + for (name, entries) in &by_name { + println!(" Variable: \"{}\"", name); + println!(" {} location entries:", entries.len()); + + for (decorator_idx, info) in entries { + print!(" [dec#{}] ", decorator_idx); + + // Print value location + print!("{}", info.value_location()); + + // Print argument info if present + if let Some(arg_idx) = info.arg_index() { + print!(" (param #{})", arg_idx); + } + + // Print type info if present and we can resolve it + if let Some(type_id) = info.type_id() { + if let Some(ty) = debug_info.get_type(type_id) { + print!(" : "); + print_type_brief(ty, debug_info); + } else { + print!(" : type[{}]", type_id); + } + } + + // Print source location if present + if let Some(loc) = info.location() { + print!(" @ {}:{}:{}", loc.uri, loc.line, loc.column); + } + + println!(); + } + println!(); + } + + // In verbose mode, also show raw decorator list + if verbose { + println!(" Raw decorator list (in order):"); + println!(" {:-<76}", ""); + for (idx, info) in &debug_vars { + println!(" [{:4}] {}", idx, info); + } + } +}