diff --git a/Cargo.lock b/Cargo.lock index e81c25be4..d1fed9e03 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2790,6 +2790,7 @@ dependencies = [ name = "midenc-codegen-masm" version = "0.6.0" dependencies = [ + "anyhow", "env_logger", "inventory", "itertools 0.14.0", @@ -2820,6 +2821,7 @@ dependencies = [ name = "midenc-compile" version = "0.6.0" dependencies = [ + "anyhow", "clap", "inventory", "log", @@ -2930,6 +2932,7 @@ dependencies = [ "midenc-hir-symbol", "midenc-session", "wasmparser 0.227.1", + "wasmprinter", "wat", ] @@ -3058,6 +3061,7 @@ dependencies = [ "midenc-hir-macros", "midenc-hir-symbol", "parking_lot", + "smallvec", "termcolor", ] diff --git a/Cargo.toml b/Cargo.toml index f0e5817bc..fc8139efa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -119,6 +119,7 @@ thiserror = { package = "miden-thiserror", version = "1.0" } toml = { version = "0.8", features = ["preserve_order"] } tokio = { version = "1.39.2", features = ["rt", "time", "macros", "rt-multi-thread"] } wat = "1.0.69" +wasmprinter = "0.227" wasmparser = { version = "0.227", default-features = false, features = [ "features", "component-model", diff --git a/README.md b/README.md index e3455c972..f83472607 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,15 @@ To run the compiler test suite: This will run all of the unit tests in the workspace, as well as all of our `lit` tests. +## Debugging + +### Emitting internal sources/artifacts + +- `MIDENC_EMIT`: Environment-variable equivalent of `--emit`. Accepts the same `KIND[=PATH]` syntax + (comma-delimited), where `PATH` is treated either as folder e.g. `MIDENC_EMIT=ir=target/emit` or file `MIDENC_EMIT=hir=my_name.hir`. +- `MIDENC_EMIT_MACRO_EXPAND[=]`: When set, integration tests dump `cargo expand` + output for Rust fixtures to `.expanded.rs` files in `` (or the CWD if empty/`1`). + ## Docs The documentation in the `docs/external` folder is built using Docusaurus and is automatically absorbed into the main [miden-docs](https://github.com/0xMiden/miden-docs) repository for the main documentation website. Changes to the `next` branch trigger an automated deployment workflow. The docs folder requires npm packages to be installed before building. diff --git a/codegen/masm/Cargo.toml b/codegen/masm/Cargo.toml index 7222103d3..df969189e 100644 --- a/codegen/masm/Cargo.toml +++ b/codegen/masm/Cargo.toml @@ -16,6 +16,7 @@ default = ["std"] std = ["midenc-hir/std", "midenc-dialect-hir/std", "petgraph/std"] [dependencies] +anyhow.workspace = true inventory.workspace = true log.workspace = true miden-assembly.workspace = true diff --git a/codegen/masm/src/artifact.rs b/codegen/masm/src/artifact.rs index 1ba70a649..ed65b9e92 100644 --- a/codegen/masm/src/artifact.rs +++ b/codegen/masm/src/artifact.rs @@ -9,7 +9,7 @@ use miden_core::{Program, Word}; use miden_mast_package::{MastArtifact, Package, ProcedureName}; use midenc_hir::{constants::ConstantData, dialects::builtin, interner::Symbol}; use midenc_session::{ - Session, + Emit, OutputMode, OutputType, Session, Writer, diagnostics::{Report, SourceSpan, Span}, }; @@ -38,6 +38,29 @@ pub struct MasmComponent { pub modules: Vec>, } +impl Emit for MasmComponent { + fn name(&self) -> Option { + None + } + + fn output_type(&self, _mode: OutputMode) -> OutputType { + OutputType::Masm + } + + fn write_to( + &self, + mut writer: W, + mode: OutputMode, + _session: &Session, + ) -> anyhow::Result<()> { + if mode != OutputMode::Text { + anyhow::bail!("masm emission does not support binary mode"); + } + writer.write_fmt(core::format_args!("{self}"))?; + Ok(()) + } +} + /// Represents a read-only data segment, combined with its content digest #[derive(Clone, PartialEq, Eq)] pub struct Rodata { diff --git a/docs/external/src/guides/debugger.md b/docs/external/src/guides/debugger.md index 25b43c6f7..3344d587f 100644 --- a/docs/external/src/guides/debugger.md +++ b/docs/external/src/guides/debugger.md @@ -27,10 +27,10 @@ The debugger may also be used as a library, but that is left as an exercise for ```shell # Compile a program to MAST from a rustc-generated Wasm module -midenc compile foo.wasm -o foo.masl +midenc compile foo.wasm -o foo.masp # Load that program into the debugger and start executing it -midenc debug foo.masl +midenc debug foo.masp ``` ## Program inputs @@ -48,7 +48,7 @@ To specify the contents of the operand stack, you can do so by following the raw Each operand must be a valid field element value, in either decimal or hexadecimal format. For example: ```shell -midenc debug foo.masl -- 1 2 0xdeadbeef +midenc debug foo.masp -- 1 2 0xdeadbeef ``` If you pass arguments via the command line in conjunction with `--inputs`, then the command line arguments diff --git a/frontend/wasm/Cargo.toml b/frontend/wasm/Cargo.toml index 271b39d9e..be2179c25 100644 --- a/frontend/wasm/Cargo.toml +++ b/frontend/wasm/Cargo.toml @@ -13,7 +13,7 @@ edition.workspace = true [features] default = ["std"] -std = ["wasmparser/std", "gimli/std", "midenc-hir-symbol/std"] +std = ["wasmparser/std", "gimli/std", "midenc-hir-symbol/std", "dep:wasmprinter"] [dependencies] anyhow.workspace = true @@ -32,6 +32,7 @@ midenc-hir-symbol.workspace = true midenc-session.workspace = true thiserror.workspace = true wasmparser.workspace = true +wasmprinter = { workspace = true, optional = true } [dev-dependencies] # Use local paths for dev-only dependency to avoid relying on crates.io during packaging diff --git a/frontend/wasm/src/emit.rs b/frontend/wasm/src/emit.rs new file mode 100644 index 000000000..45319b3e2 --- /dev/null +++ b/frontend/wasm/src/emit.rs @@ -0,0 +1,108 @@ +//! Utilities for emitting frontend-specific artifacts. + +use alloc::string::String; + +use midenc_session::{Emit, OutputMode, OutputType, Session, Writer}; + +/// A wrapper that emits WebAssembly text format (WAT). +#[derive(Debug, Copy, Clone)] +pub struct WatEmit<'a>(pub &'a str); + +impl Emit for WatEmit<'_> { + fn name(&self) -> Option { + None + } + + fn output_type(&self, _mode: OutputMode) -> OutputType { + OutputType::Wat + } + + fn write_to( + &self, + mut writer: W, + mode: OutputMode, + _session: &Session, + ) -> anyhow::Result<()> { + if mode != OutputMode::Text { + anyhow::bail!("wat emission does not support binary mode"); + } + writer.write_fmt(core::format_args!("{}", self.0))?; + Ok(()) + } +} + +/// Convert a WebAssembly binary to WAT text, filtering out highly variable custom sections. +#[cfg(feature = "std")] +pub fn wasm_to_wat(wasm_bytes: &[u8]) -> anyhow::Result { + use core::fmt; + + // Disable printing of the various custom sections, e.g. "producers", either because they + // contain strings which are highly variable (but not important), or because they are debug info + // related. + struct NoCustomSectionsPrinter(T); + impl wasmprinter::Print for NoCustomSectionsPrinter { + fn write_str(&mut self, s: &str) -> std::io::Result<()> { + self.0.write_str(s) + } + + fn newline(&mut self) -> std::io::Result<()> { + self.0.newline() + } + + fn start_line(&mut self, binary_offset: Option) { + self.0.start_line(binary_offset); + } + + fn write_fmt(&mut self, args: fmt::Arguments<'_>) -> std::io::Result<()> { + self.0.write_fmt(args) + } + + fn print_custom_section( + &mut self, + name: &str, + binary_offset: usize, + data: &[u8], + ) -> std::io::Result { + match name { + "producers" | "target_features" => Ok(true), + debug if debug.starts_with(".debug") => Ok(true), + _ => self.0.print_custom_section(name, binary_offset, data), + } + } + + fn start_literal(&mut self) -> std::io::Result<()> { + self.0.start_literal() + } + + fn start_name(&mut self) -> std::io::Result<()> { + self.0.start_name() + } + + fn start_keyword(&mut self) -> std::io::Result<()> { + self.0.start_keyword() + } + + fn start_type(&mut self) -> std::io::Result<()> { + self.0.start_type() + } + + fn start_comment(&mut self) -> std::io::Result<()> { + self.0.start_comment() + } + + fn reset_color(&mut self) -> std::io::Result<()> { + self.0.reset_color() + } + + fn supports_async_color(&self) -> bool { + self.0.supports_async_color() + } + } + + // WAT text should be at least ~3x larger than the binary Wasm representation + let mut wat = String::with_capacity(wasm_bytes.len() * 3); + let config = wasmprinter::Config::new(); + let mut wasm_printer = NoCustomSectionsPrinter(wasmprinter::PrintFmtWrite(&mut wat)); + config.print(wasm_bytes, &mut wasm_printer)?; + Ok(wat) +} diff --git a/frontend/wasm/src/lib.rs b/frontend/wasm/src/lib.rs index b7eb35aa4..c84cee496 100644 --- a/frontend/wasm/src/lib.rs +++ b/frontend/wasm/src/lib.rs @@ -14,6 +14,7 @@ mod callable; mod code_translator; mod component; mod config; +mod emit; mod error; mod intrinsics; mod miden_abi; @@ -29,7 +30,9 @@ use midenc_hir::{Context, dialects::builtin}; use module::build_ir::translate_module_as_component; use wasmparser::WasmFeatures; -pub use self::{config::*, error::WasmError}; +#[cfg(feature = "std")] +pub use self::emit::wasm_to_wat; +pub use self::{config::*, emit::WatEmit, error::WasmError}; /// The output of the frontend Wasm translation stage pub struct FrontendOutput { diff --git a/midenc-compile/Cargo.toml b/midenc-compile/Cargo.toml index 5ee330626..470c0bd0f 100644 --- a/midenc-compile/Cargo.toml +++ b/midenc-compile/Cargo.toml @@ -26,6 +26,7 @@ std = [ ] [dependencies] +anyhow.workspace = true clap = { workspace = true, optional = true } log.workspace = true inventory.workspace = true diff --git a/midenc-compile/src/compiler.rs b/midenc-compile/src/compiler.rs index 413182e07..27e86b4e9 100644 --- a/midenc-compile/src/compiler.rs +++ b/midenc-compile/src/compiler.rs @@ -537,7 +537,10 @@ impl Compiler { // Initialize output types let mut output_types = OutputTypes::new(self.output_types).unwrap_or_else(|err| err.exit()); - if output_types.is_empty() { + let has_final_output = + output_types.keys().any(|ty| matches!(ty, OutputType::Mast | OutputType::Masp)); + if !has_final_output { + // By default, we always produce a final artifact; `--emit` selects additional outputs. output_types.insert(OutputType::Masp, output_file.clone()); } else if output_file.is_some() && output_types.get(&OutputType::Masp).is_some() { // The -o flag overrides --emit diff --git a/midenc-compile/src/stages/assemble.rs b/midenc-compile/src/stages/assemble.rs index f82aa48ca..5ad8489e5 100644 --- a/midenc-compile/src/stages/assemble.rs +++ b/midenc-compile/src/stages/assemble.rs @@ -2,7 +2,7 @@ use alloc::{string::ToString, vec, vec::Vec}; use miden_assembly::ast::QualifiedProcedureName; use miden_mast_package::{Dependency, MastArtifact, Package, PackageExport}; -use midenc_session::{Session, diagnostics::IntoDiagnostic}; +use midenc_session::Session; use super::*; @@ -43,8 +43,6 @@ impl Stage for AssembleStage { "successfully assembled mast artifact with digest {}", DisplayHex::new(&mast.digest().as_bytes()) ); - session.emit(OutputMode::Text, &mast).into_diagnostic()?; - session.emit(OutputMode::Binary, &mast).into_diagnostic()?; Ok(Artifact::Assembled(build_package(mast, &input, session))) } else { log::debug!( diff --git a/midenc-compile/src/stages/codegen.rs b/midenc-compile/src/stages/codegen.rs index ec5490311..382200fb9 100644 --- a/midenc-compile/src/stages/codegen.rs +++ b/midenc-compile/src/stages/codegen.rs @@ -53,11 +53,6 @@ impl Stage for CodegenStage { component.borrow().to_masm_component(analysis_manager).map(Box::new)?; let session = context.session(); - if session.should_emit(OutputType::Masm) { - for module in masm_component.modules.iter() { - session.emit(OutputMode::Text, module).into_diagnostic()?; - } - } // Ensure intrinsics modules are linked for intrinsics_module in required_intrinsics_modules(session) { @@ -74,6 +69,10 @@ impl Stage for CodegenStage { masm_component.modules.push(module); } + if session.should_emit(OutputType::Masm) { + session.emit(OutputMode::Text, masm_component.as_ref()).into_diagnostic()?; + } + Ok(CodegenOutput { component: Arc::from(masm_component), link_libraries, diff --git a/midenc-compile/src/stages/parse.rs b/midenc-compile/src/stages/parse.rs index d19e8203d..aab6c489f 100644 --- a/midenc-compile/src/stages/parse.rs +++ b/midenc-compile/src/stages/parse.rs @@ -1,4 +1,6 @@ #[cfg(feature = "std")] +use alloc::borrow::Cow; +#[cfg(feature = "std")] use alloc::string::ToString; use alloc::{format, rc::Rc, sync::Arc}; @@ -6,6 +8,8 @@ use miden_assembly::utils::Deserializable; #[cfg(feature = "std")] use miden_assembly::utils::ReadAdapter; #[cfg(feature = "std")] +use midenc_frontend_wasm::{WatEmit, wasm_to_wat}; +#[cfg(feature = "std")] use midenc_session::{FileName, Path}; use midenc_session::{ InputFile, InputType, @@ -122,13 +126,45 @@ impl Stage for ParseStage { ParseOutput::Module(ref module) => { context.session().emit(OutputMode::Text, module).into_diagnostic()?; } - ParseOutput::Wasm(_) | ParseOutput::Library(_) | ParseOutput::Package(_) => (), + #[cfg(feature = "std")] + ParseOutput::Wasm(ref wasm_input) => { + self.emit_wat_for_wasm_input(wasm_input, context.session())?; + } + #[cfg(not(feature = "std"))] + ParseOutput::Wasm(_) => (), + ParseOutput::Library(_) | ParseOutput::Package(_) => (), } Ok(parsed) } } impl ParseStage { + #[cfg(feature = "std")] + fn emit_wat_for_wasm_input(&self, input: &InputType, session: &Session) -> CompilerResult<()> { + if !session.should_emit(midenc_session::OutputType::Wat) { + return Ok(()); + } + + let wasm_bytes: Cow<'_, [u8]> = match input { + InputType::Real(path) => { + Cow::Owned(std::fs::read(path).into_diagnostic().wrap_err_with(|| { + format!("failed to read wasm input from '{}'", path.display()) + })?) + } + InputType::Stdin { input, .. } => Cow::Borrowed(input), + }; + + let wat = wasm_to_wat(wasm_bytes.as_ref()) + .into_diagnostic() + .wrap_err("failed to convert wasm to wat")?; + let artifact = WatEmit(&wat); + session + .emit(OutputMode::Text, &artifact) + .into_diagnostic() + .wrap_err("failed to emit wat output")?; + Ok(()) + } + #[cfg(feature = "std")] fn parse_wasm_from_wat_file(&self, path: &Path) -> CompilerResult { let wasm = wat::parse_file(path).into_diagnostic().wrap_err("failed to parse wat")?; diff --git a/midenc-session/Cargo.toml b/midenc-session/Cargo.toml index b867f2874..0a44a42d2 100644 --- a/midenc-session/Cargo.toml +++ b/midenc-session/Cargo.toml @@ -38,6 +38,7 @@ miden-stdlib.workspace = true midenc-hir-symbol.workspace = true midenc-hir-macros.workspace = true miden-lib.workspace = true +smallvec.workspace = true parking_lot = { workspace = true, optional = true } termcolor = { version = "1.4.1", optional = true } thiserror.workspace = true diff --git a/midenc-session/src/emit.rs b/midenc-session/src/emit.rs index c13b23681..27f395eb5 100644 --- a/midenc-session/src/emit.rs +++ b/midenc-session/src/emit.rs @@ -283,10 +283,8 @@ impl Emit for miden_assembly::Library { } fn output_type(&self, mode: OutputMode) -> OutputType { - match mode { - OutputMode::Text => OutputType::Mast, - OutputMode::Binary => OutputType::Masl, - } + let _ = mode; + OutputType::Mast } fn write_to( @@ -364,10 +362,8 @@ impl Emit for miden_core::Program { } fn output_type(&self, mode: OutputMode) -> OutputType { - match mode { - OutputMode::Text => OutputType::Mast, - OutputMode::Binary => OutputType::Masl, - } + let _ = mode; + OutputType::Mast } fn write_to( @@ -462,20 +458,26 @@ impl Emit for MastArtifact { } fn output_type(&self, mode: OutputMode) -> OutputType { - match mode { - OutputMode::Text => OutputType::Mast, - OutputMode::Binary => OutputType::Masl, - } + let _ = mode; + OutputType::Mast } fn write_to( &self, mut writer: W, - _mode: OutputMode, - _session: &Session, + mode: OutputMode, + session: &Session, ) -> anyhow::Result<()> { - let mut writer = ByteWriterAdapter(&mut writer); - self.write_into(&mut writer); - Ok(()) + match mode { + OutputMode::Text => match self { + MastArtifact::Executable(program) => program.write_to(writer, mode, session), + MastArtifact::Library(lib) => lib.write_to(writer, mode, session), + }, + OutputMode::Binary => { + let mut writer = ByteWriterAdapter(&mut writer); + self.write_into(&mut writer); + Ok(()) + } + } } } diff --git a/midenc-session/src/lib.rs b/midenc-session/src/lib.rs index 0015f0579..f65fd1b55 100644 --- a/midenc-session/src/lib.rs +++ b/midenc-session/src/lib.rs @@ -304,7 +304,7 @@ impl Session { /// Get the [OutputFile] to write the assembled MAST output to pub fn out_file(&self) -> OutputFile { - let out_file = self.output_files.output_file(OutputType::Masl, None); + let out_file = self.output_files.output_file(OutputType::Masp, None); if let OutputFile::Real(ref path) = out_file { self.check_file_is_writeable(path); @@ -393,6 +393,9 @@ impl Session { if self.should_emit(ty) { match self.output_files.output_file(ty, name.map(|n| n.as_str())) { OutputFile::Real(path) => Some(path), + OutputFile::Directory(_) => { + unreachable!("OutputFiles::output_file never returns OutputFile::Directory") + } OutputFile::Stdout => None, } } else { @@ -410,6 +413,9 @@ impl Session { OutputFile::Real(path) => { item.write_to_file(&path, mode, self)?; } + OutputFile::Directory(_) => { + unreachable!("OutputFiles::output_file never returns OutputFile::Directory") + } OutputFile::Stdout => { let stdout = std::io::stdout().lock(); item.write_to(stdout, mode, self)?; diff --git a/midenc-session/src/outputs.rs b/midenc-session/src/outputs.rs index b9e45138b..5744ebdae 100644 --- a/midenc-session/src/outputs.rs +++ b/midenc-session/src/outputs.rs @@ -6,8 +6,42 @@ use alloc::{ string::String, }; +use smallvec::SmallVec; + use crate::{Path, PathBuf}; +/// Escape `name` for use as a single filesystem path component (e.g. a file stem). +/// +/// This is used when emitting artifacts whose names may contain characters that are legal in +/// compiler/session identifiers, but are problematic (or even invalid) as filenames on common +/// filesystems. +fn escape_path_component(name: &str) -> Cow<'_, str> { + if name.is_empty() { + return Cow::Borrowed("_"); + } + + let is_safe = name != "." + && name != ".." + && name.chars().all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-')); + if is_safe { + return Cow::Borrowed(name); + } + + let mut escaped = String::with_capacity(name.len()); + for ch in name.chars() { + if ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-') { + escaped.push(ch); + } else { + escaped.push('_'); + } + } + + match escaped.as_str() { + "" | "." | ".." => Cow::Borrowed("_"), + _ => Cow::Owned(escaped), + } +} + /// The type of output to produce for a given [OutputType], when multiple options are available #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum OutputMode { @@ -23,14 +57,14 @@ pub enum OutputMode { pub enum OutputType { /// The compiler will emit the parse tree of the input, if applicable Ast, + /// The compiler will emit WebAssembly text format (WAT), if applicable + Wat, /// The compiler will emit Miden IR Hir, /// The compiler will emit Miden Assembly text Masm, /// The compiler will emit a Merkalized Abstract Syntax Tree in text form Mast, - /// The compiler will emit a MAST library in binary form - Masl, /// The compiler will emit a MAST package in binary form #[default] Masp, @@ -38,16 +72,16 @@ pub enum OutputType { impl OutputType { /// Returns true if this output type is an intermediate artifact produced during compilation pub fn is_intermediate(&self) -> bool { - !matches!(self, Self::Mast | Self::Masl | Self::Masp) + !matches!(self, Self::Mast | Self::Masp) } pub fn extension(&self) -> &'static str { match self { Self::Ast => "ast", + Self::Wat => "wat", Self::Hir => "hir", Self::Masm => "masm", Self::Mast => "mast", - Self::Masl => "masl", Self::Masp => "masp", } } @@ -56,33 +90,39 @@ impl OutputType { format!( "`{}`, `{}`, `{}`, `{}`, `{}`, `{}`", Self::Ast, + Self::Wat, Self::Hir, Self::Masm, Self::Mast, - Self::Masl, Self::Masp, ) } - pub fn all() -> [OutputType; 6] { - [ + pub const fn all() -> &'static [OutputType] { + &[ OutputType::Ast, + OutputType::Wat, OutputType::Hir, OutputType::Masm, OutputType::Mast, - OutputType::Masl, OutputType::Masp, ] } + + /// Returns the subset of [OutputType] values considered "intermediate" for convenience + /// emission (WAT, HIR, MASM). + pub const fn ir() -> &'static [OutputType] { + &[OutputType::Wat, OutputType::Hir, OutputType::Masm] + } } impl fmt::Display for OutputType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Ast => f.write_str("ast"), + Self::Wat => f.write_str("wat"), Self::Hir => f.write_str("hir"), Self::Masm => f.write_str("masm"), Self::Mast => f.write_str("mast"), - Self::Masl => f.write_str("masl"), Self::Masp => f.write_str("masp"), } } @@ -93,10 +133,10 @@ impl FromStr for OutputType { fn from_str(s: &str) -> Result { match s { "ast" => Ok(Self::Ast), + "wat" => Ok(Self::Wat), "hir" => Ok(Self::Hir), "masm" => Ok(Self::Masm), "mast" => Ok(Self::Mast), - "masl" => Ok(Self::Masl), "masp" => Ok(Self::Masp), _ => Err(()), } @@ -106,12 +146,18 @@ impl FromStr for OutputType { #[derive(Debug, Clone)] pub enum OutputFile { Real(PathBuf), + /// A directory in which to place outputs. + /// + /// This is distinct from [OutputFile::Real] because callers may want a path to be treated as a + /// directory even if it does not exist yet. + Directory(PathBuf), Stdout, } impl OutputFile { pub fn parent(&self) -> Option<&Path> { match self { Self::Real(path) => path.parent(), + Self::Directory(path) => Some(path.as_ref()), Self::Stdout => None, } } @@ -119,6 +165,7 @@ impl OutputFile { pub fn filestem(&self) -> Option> { match self { Self::Real(path) => path.file_stem().map(|stem| stem.to_string_lossy()), + Self::Directory(_) => None, Self::Stdout => None, } } @@ -132,6 +179,7 @@ impl OutputFile { use std::io::IsTerminal; match self { Self::Real(_) => false, + Self::Directory(_) => false, Self::Stdout => std::io::stdout().is_terminal(), } } @@ -144,6 +192,7 @@ impl OutputFile { pub fn as_path(&self) -> Option<&Path> { match self { Self::Real(path) => Some(path.as_ref()), + Self::Directory(path) => Some(path.as_ref()), Self::Stdout => None, } } @@ -156,6 +205,15 @@ impl OutputFile { ) -> PathBuf { match self { Self::Real(path) => path.clone(), + Self::Directory(dir) => { + let dir = if dir.is_absolute() { + dir.clone() + } else { + outputs.cwd.join(dir) + }; + let stem = escape_path_component(name.unwrap_or(outputs.stem.as_str())); + dir.join(stem.as_ref()).with_extension(ty.extension()) + } Self::Stdout => outputs.temp_path(ty, name), } } @@ -164,6 +222,7 @@ impl fmt::Display for OutputFile { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Real(path) => write!(f, "{}", path.display()), + Self::Directory(path) => write!(f, "{}", path.display()), Self::Stdout => write!(f, "stdout"), } } @@ -184,7 +243,7 @@ pub struct OutputFiles { pub out_dir: PathBuf, /// If specified, the specific path at which to write the compiler output. /// - /// This _only_ applies to the final output, i.e. the `.masl` library or executable. + /// This _only_ applies to the final output, e.g. the `.masp` package. pub out_file: Option, /// The raw output types requested by the user on the command line pub outputs: OutputTypes, @@ -211,41 +270,56 @@ impl OutputFiles { /// Return the [OutputFile] representing where an output of `ty` type should be written, /// with an optional `name`, which overrides the file stem of the resulting path. pub fn output_file(&self, ty: OutputType, name: Option<&str>) -> OutputFile { - let default_name = name.unwrap_or(self.stem.as_str()); - self.outputs - .get(&ty) - .and_then(|p| p.to_owned()) - .map(|of| match of { - OutputFile::Real(path) => OutputFile::Real({ - let path = if path.is_absolute() { - path - } else { - self.cwd.join(path) - }; - if path.is_dir() { - path.join(default_name).with_extension(ty.extension()) - } else if let Some(name) = name { - path.with_stem_and_extension(name, ty.extension()) + let requested = self.outputs.contains_key(&ty); + let default_name = escape_path_component(name.unwrap_or(self.stem.as_str())); + match self.outputs.get(&ty).and_then(|p| p.to_owned()) { + Some(OutputFile::Real(path)) => OutputFile::Real({ + let path = if path.is_absolute() { + path + } else { + self.cwd.join(path) + }; + if path.is_dir() { + path.join(default_name.as_ref()).with_extension(ty.extension()) + } else if let Some(name) = name { + let name = escape_path_component(name); + path.with_stem_and_extension(name.as_ref(), ty.extension()) + } else { + path + } + }), + Some(OutputFile::Directory(dir)) => OutputFile::Real({ + let dir = if dir.is_absolute() { + dir + } else { + self.cwd.join(dir) + }; + dir.join(default_name.as_ref()).with_extension(ty.extension()) + }), + Some(OutputFile::Stdout) => OutputFile::Stdout, + None => { + // If the user requested an output type without specifying a destination, default to + // the session output directory (i.e. the working directory by default). Only + // compiler-internal temporaries use `tmp_dir`. + let out = if ty.is_intermediate() { + if requested { + self.with_directory_and_extension(&self.out_dir, ty.extension()) } else { - path + self.with_directory_and_extension(&self.tmp_dir, ty.extension()) } - }), - out @ OutputFile::Stdout => out, - }) - .unwrap_or_else(|| { - let out = if ty.is_intermediate() { - self.with_directory_and_extension(&self.tmp_dir, ty.extension()) } else if let Some(output_file) = self.out_file.as_ref() { return output_file.clone(); } else { self.with_directory_and_extension(&self.out_dir, ty.extension()) }; OutputFile::Real(if let Some(name) = name { - out.with_stem(name) + let name = escape_path_component(name); + out.with_stem(name.as_ref()) } else { out }) - }) + } + } } /// Return the most appropriate file path for an output of `ty` type. @@ -256,6 +330,9 @@ impl OutputFiles { pub fn output_path(&self, ty: OutputType) -> PathBuf { match self.output_file(ty, None) { OutputFile::Real(path) => path, + OutputFile::Directory(_) => { + unreachable!("OutputFiles::output_file never returns OutputFile::Directory") + } OutputFile::Stdout => { if ty.is_intermediate() { self.with_directory_and_extension(&self.tmp_dir, ty.extension()) @@ -274,9 +351,8 @@ impl OutputFiles { /// /// The file path is always a child of `self.tmp_dir` pub fn temp_path(&self, ty: OutputType, name: Option<&str>) -> PathBuf { - self.tmp_dir - .join(name.unwrap_or(self.stem.as_str())) - .with_extension(ty.extension()) + let name = escape_path_component(name.unwrap_or(self.stem.as_str())); + self.tmp_dir.join(name.as_ref()).with_extension(ty.extension()) } /// Build a file path which is either: @@ -286,6 +362,14 @@ impl OutputFiles { pub fn with_extension(&self, extension: &str) -> PathBuf { match self.out_file.as_ref() { Some(OutputFile::Real(path)) => path.with_extension(extension), + Some(OutputFile::Directory(dir)) => { + let dir = if dir.is_absolute() { + dir.clone() + } else { + self.cwd.join(dir) + }; + self.with_directory_and_extension(&dir, extension) + } Some(OutputFile::Stdout) | None => { self.with_directory_and_extension(&self.out_dir, extension) } @@ -296,7 +380,8 @@ impl OutputFiles { /// `extension` #[inline] fn with_directory_and_extension(&self, directory: &Path, extension: &str) -> PathBuf { - directory.join(&self.stem).with_extension(extension) + let stem = escape_path_component(&self.stem); + directory.join(stem.as_ref()).with_extension(extension) } } @@ -316,21 +401,58 @@ impl OutputTypes { "--emit=all cannot be combined with other --emit types", )); } - if let Some(OutputFile::Real(path)) = &path - && path.extension().is_some() - { - return Err(clap::Error::raw( - clap::error::ErrorKind::ValueValidation, - "invalid path for --emit=all: must be a directory", - )); - } - for ty in OutputType::all() { + let path = match path { + None => None, + Some(OutputFile::Real(path)) => { + if path.extension().is_some() { + return Err(clap::Error::raw( + clap::error::ErrorKind::ValueValidation, + "invalid path for --emit=all: must be a directory", + )); + } + Some(OutputFile::Directory(path)) + } + Some(OutputFile::Directory(path)) => { + if path.extension().is_some() { + return Err(clap::Error::raw( + clap::error::ErrorKind::ValueValidation, + "invalid path for --emit=all: must be a directory", + )); + } + Some(OutputFile::Directory(path)) + } + Some(OutputFile::Stdout) => Some(OutputFile::Stdout), + }; + for &ty in OutputType::all() { map.insert(ty, path.clone()); } } + OutputTypeSpec::Subset { output_types, path } => { + // Emit a bundle of output types into the same destination. + for output_type in output_types { + match map.get(&output_type) { + // If the user already chose an explicit destination for this type, + // don't allow `ir`/`inter` to override it. + Some(Some(_)) => { + return Err(clap::Error::raw( + clap::error::ErrorKind::ValueValidation, + format!( + "conflicting --emit options given for output type \ + '{output_type}'" + ), + )); + } + _ => { + // If the user requested the type without a destination, or hasn't + // requested it at all yet, route it to the `ir` directory. + map.insert(output_type, path.clone()); + } + } + } + } OutputTypeSpec::Typed { output_type, path } => { if path.is_some() { - if matches!(map.get(&output_type), Some(Some(OutputFile::Real(_)))) { + if matches!(map.get(&output_type), Some(Some(_))) { return Err(clap::Error::raw( clap::error::ErrorKind::ValueValidation, format!( @@ -388,27 +510,18 @@ impl OutputTypes { pub fn should_link(&self) -> bool { self.0.keys().any(|k| { - matches!( - k, - OutputType::Hir - | OutputType::Masm - | OutputType::Mast - | OutputType::Masl - | OutputType::Masp - ) + matches!(k, OutputType::Hir | OutputType::Masm | OutputType::Mast | OutputType::Masp) }) } pub fn should_codegen(&self) -> bool { - self.0.keys().any(|k| { - matches!(k, OutputType::Masm | OutputType::Mast | OutputType::Masl | OutputType::Masp) - }) + self.0 + .keys() + .any(|k| matches!(k, OutputType::Masm | OutputType::Mast | OutputType::Masp)) } pub fn should_assemble(&self) -> bool { - self.0 - .keys() - .any(|k| matches!(k, OutputType::Mast | OutputType::Masl | OutputType::Masp)) + self.0.keys().any(|k| matches!(k, OutputType::Mast | OutputType::Masp)) } } @@ -418,6 +531,14 @@ pub enum OutputTypeSpec { All { path: Option, }, + /// Emit a set of output types to a common destination (typically a directory). + /// + /// This is primarily intended for shorthand specifications like `--emit=ir[=PATH]`, but can + /// represent any two-or-more output types with a shared destination. + Subset { + output_types: SmallVec<[OutputType; 3]>, + path: Option, + }, Typed { output_type: OutputType, path: Option, @@ -451,11 +572,12 @@ impl clap::builder::TypedValueParser for OutputTypeParser { Some(Box::new( [ PossibleValue::new("ast").help("Abstract Syntax Tree (text)"), + PossibleValue::new("wat").help("WebAssembly text format (text)"), PossibleValue::new("hir").help("High-level Intermediate Representation (text)"), PossibleValue::new("masm").help("Miden Assembly (text)"), PossibleValue::new("mast").help("Merkelized Abstract Syntax Tree (text)"), - PossibleValue::new("masl").help("Merkelized Abstract Syntax Tree (binary)"), PossibleValue::new("masp").help("Miden Assembly Package Format (binary)"), + PossibleValue::new("ir").help("WAT + HIR + MASM (text, optional directory)"), PossibleValue::new("all").help("All of the above"), ] .into_iter(), @@ -478,14 +600,36 @@ impl clap::builder::TypedValueParser for OutputTypeParser { Some((shorthand, path)) => (shorthand, Some(OutputFile::Real(PathBuf::from(path)))), }; if shorthand == "all" { + let path = match path { + None => None, + Some(OutputFile::Real(path)) => Some(OutputFile::Directory(path)), + Some(OutputFile::Stdout) => Some(OutputFile::Stdout), + Some(OutputFile::Directory(_)) => unreachable!("all path is parsed as real"), + }; return Ok(OutputTypeSpec::All { path }); } + if shorthand == "ir" { + let path = match path { + None => None, + Some(OutputFile::Real(path)) => Some(OutputFile::Directory(path)), + Some(OutputFile::Stdout) => { + return Err(Error::raw( + ErrorKind::InvalidValue, + format!("invalid output type: `{shorthand}=-` - expected `ir[=PATH]`"), + )); + } + Some(OutputFile::Directory(_)) => unreachable!("ir path is parsed as real"), + }; + let output_types = SmallVec::from_slice(OutputType::ir()); + return Ok(OutputTypeSpec::Subset { output_types, path }); + } let output_type = shorthand.parse::().map_err(|_| { Error::raw( ErrorKind::InvalidValue, format!( - "invalid output type: `{shorthand}` - expected one of: {display}", - display = OutputType::shorthand_display() + "invalid output type: `{shorthand}` - expected one of: {display}, `all`, \ + `ir[=PATH]`", + display = OutputType::shorthand_display(), ), ) })?; diff --git a/tests/integration/src/compiler_test.rs b/tests/integration/src/compiler_test.rs index eb1be3674..00728470a 100644 --- a/tests/integration/src/compiler_test.rs +++ b/tests/integration/src/compiler_test.rs @@ -194,6 +194,8 @@ impl CompilerTestBuilder { "-C".into(), "target-feature=+bulk-memory,+wide-arithmetic".into(), // Compile with panic=immediate-abort to avoid emitting any panic formatting code + "-Z".into(), + "unstable-options".into(), "-C".into(), "panic=immediate-abort".into(), // Remap the compiler workspace to `.` so that build outputs do not embed user- @@ -301,8 +303,10 @@ impl CompilerTestBuilder { /// Consume the builder, invoke any tools required to obtain the inputs for the test, and if /// successful, return a [CompilerTest], ready for evaluation. pub fn build(mut self) -> CompilerTest { + let source = self.source; + // Set up the command used to compile the test inputs (typically Rust -> Wasm) - let mut command = match self.source { + let mut command = match &source { CompilerTestInputType::CargoMiden(_) => { let mut cmd = Command::new("cargo"); cmd.arg("miden").arg("build"); @@ -312,18 +316,18 @@ impl CompilerTestBuilder { }; // Extract the directory in which source code is presumed to exist (or will be placed) - let project_dir = match self.source { - CompilerTestInputType::CargoMiden(CargoTest { - ref project_dir, .. - }) => Cow::Borrowed(project_dir.as_path()), - CompilerTestInputType::Rustc(RustcTest { ref target_dir, .. }) => target_dir + let project_dir = match &source { + CompilerTestInputType::CargoMiden(CargoTest { project_dir, .. }) => { + Cow::Borrowed(project_dir.as_path()) + } + CompilerTestInputType::Rustc(RustcTest { target_dir, .. }) => target_dir .as_deref() .map(Cow::Borrowed) .unwrap_or_else(|| Cow::Owned(std::env::temp_dir())), }; // Cargo-based source types share a lot of configuration in common - if let CompilerTestInputType::CargoMiden(ref config) = self.source { + if let CompilerTestInputType::CargoMiden(ref config) = source { let manifest_path = project_dir.join("Cargo.toml"); command.arg("--manifest-path").arg(manifest_path); if config.release { @@ -332,6 +336,7 @@ impl CompilerTestBuilder { } // All test source types support custom RUSTFLAGS + let mut rustflags_env = None::; if !self.rustflags.is_empty() { let mut flags = String::with_capacity( self.rustflags.iter().map(|flag| flag.len()).sum::() + self.rustflags.len(), @@ -342,15 +347,18 @@ impl CompilerTestBuilder { } flags.push_str(flag.as_ref()); } - command.env("RUSTFLAGS", flags); + command.env("RUSTFLAGS", &flags); + rustflags_env = Some(flags); } // Pipe output of command to terminal command.stdout(Stdio::piped()); // Build test - match self.source { - CompilerTestInputType::CargoMiden(..) => { + match source { + CompilerTestInputType::CargoMiden(config) => { + maybe_dump_cargo_expand(&config, rustflags_env.as_deref()); + let mut args = vec![command.get_program().to_str().unwrap().to_string()]; let cmd_args: Vec = command .get_args() @@ -371,8 +379,6 @@ impl CompilerTestBuilder { } => (artifact_path, midenc_flags), other => panic!("Expected Wasm output, got {:?}", other), }; - // dbg!(&wasm_artifact_path); - // dbg!(&extra_midenc_flags); self.midenc_flags.append(&mut extra_midenc_flags); let artifact_name = wasm_artifact_path.file_stem().unwrap().to_str().unwrap().to_string(); @@ -388,7 +394,6 @@ impl CompilerTestBuilder { }, ) })); - // dbg!(&inputs); let context = setup::default_context(inputs, &self.midenc_flags); let session = context.session_rc(); @@ -854,7 +859,6 @@ impl CompilerTest { /// Compare the compiled MASM against the expected output pub fn expect_masm(&mut self, expected_masm_file: midenc_expect_test::ExpectFile) { let program = demangle(self.masm_src().as_str()); - std::println!("{program}"); expected_masm_file.assert_eq(&program); } @@ -1029,6 +1033,93 @@ fn wasm_to_wat(wasm_bytes: &[u8]) -> String { wat } +/// Run `cargo expand` for the given Cargo test fixture, and write the expanded Rust code to disk if +/// `MIDENC_EMIT_MACRO_EXPAND[=]` is set. +/// +/// When `MIDENC_EMIT_MACRO_EXPAND` is set with an empty value, the expanded output is written to +/// the current working directory. When set to `1`, it is treated as enabled and also defaults to +/// the current working directory. When set to a non-empty value other than `1`, it is treated as +/// the output directory. +fn maybe_dump_cargo_expand(test: &CargoTest, rustflags_env: Option<&str>) { + let Some(value) = std::env::var_os("MIDENC_EMIT_MACRO_EXPAND") else { + return; + }; + + let project_dir = if test.project_dir.is_absolute() { + test.project_dir.clone() + } else { + std::env::current_dir().unwrap().join(&test.project_dir) + }; + + let out_dir = if value.is_empty() || value == std::ffi::OsStr::new("1") { + std::env::current_dir().unwrap() + } else { + PathBuf::from(value) + }; + fs::create_dir_all(&out_dir).unwrap_or_else(|err| { + panic!( + "failed to create MIDENC_EMIT_MACRO_EXPAND output directory '{}': {err}", + out_dir.display() + ) + }); + + let filename = format!("{}.expanded.rs", sanitize_filename_component(test.name.as_ref())); + let out_file = out_dir.join(filename); + + let manifest_path = project_dir.join("Cargo.toml"); + + let mut cmd = Command::new("cargo"); + cmd.arg("expand") + .arg("--manifest-path") + .arg(&manifest_path) + // Match the target used by `cargo miden build` (and our compiler tests), so `cfg(target_*)` + // and target-specific `RUSTFLAGS` behave consistently. + .arg("--target") + .arg("wasm32-wasip2") + // Ensure the output we write doesn't include ANSI codes. + .env("CARGO_TERM_COLOR", "never"); + + if test.release { + cmd.arg("--release"); + } + if let Some(rustflags_env) = rustflags_env { + cmd.env("RUSTFLAGS", rustflags_env); + } + + let output = cmd.output().unwrap_or_else(|err| { + panic!("failed to invoke 'cargo expand' (is cargo-expand installed?): {err}") + }); + if !output.status.success() { + panic!( + "'cargo expand' failed (status: {:?})\nstdout:\n{}\nstderr:\n{}", + output.status.code(), + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + } + + fs::write(&out_file, &output.stdout).unwrap_or_else(|err| { + panic!("failed to write expanded Rust code to '{}': {err}", out_file.display()) + }); + eprintln!("wrote expanded Rust code to '{}'", out_file.display()); +} + +/// Convert an arbitrary test name into a reasonable filename component. +fn sanitize_filename_component(name: &str) -> String { + let mut out = String::with_capacity(name.len()); + for ch in name.chars() { + match ch { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' => out.push(ch), + _ => out.push('_'), + } + } + if out.is_empty() { + "expanded".to_string() + } else { + out + } +} + fn hash_string(inputs: &str) -> String { let hash = ::digest(inputs.as_bytes()); format!("{hash:x}") diff --git a/tests/rust-apps-wasm/rust-sdk/assert-debug-test/Cargo.lock b/tests/rust-apps-wasm/rust-sdk/assert-debug-test/Cargo.lock index 17205c8a0..771e0a48e 100644 --- a/tests/rust-apps-wasm/rust-sdk/assert-debug-test/Cargo.lock +++ b/tests/rust-apps-wasm/rust-sdk/assert-debug-test/Cargo.lock @@ -1007,7 +1007,7 @@ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "miden" -version = "0.7.1" +version = "0.8.0" dependencies = [ "miden-base", "miden-base-sys", @@ -1067,7 +1067,7 @@ dependencies = [ [[package]] name = "miden-base" -version = "0.7.1" +version = "0.8.0" dependencies = [ "miden-base-macros", "miden-base-sys", @@ -1076,7 +1076,7 @@ dependencies = [ [[package]] name = "miden-base-macros" -version = "0.7.1" +version = "0.8.0" dependencies = [ "heck", "miden-objects", @@ -1091,7 +1091,7 @@ dependencies = [ [[package]] name = "miden-base-sys" -version = "0.8.0" +version = "0.8.1" dependencies = [ "miden-stdlib-sys", ] @@ -1279,7 +1279,7 @@ dependencies = [ [[package]] name = "miden-sdk-alloc" -version = "0.7.0" +version = "0.8.0" [[package]] name = "miden-stdlib" @@ -1299,7 +1299,7 @@ dependencies = [ [[package]] name = "miden-stdlib-sys" -version = "0.7.1" +version = "0.8.0" [[package]] name = "miden-utils-diagnostics" diff --git a/tools/cargo-miden/src/commands/build.rs b/tools/cargo-miden/src/commands/build.rs index f02c84690..68767f06d 100644 --- a/tools/cargo-miden/src/commands/build.rs +++ b/tools/cargo-miden/src/commands/build.rs @@ -106,10 +106,17 @@ impl BuildCommand { midenc_flags.push(dep_path.to_string_lossy().to_string()); } - // Merge user-provided midenc options from parsed Compiler struct - // User options override target-derived defaults + // Merge user-provided build options + midenc_flags.extend_from_slice(&self.args); + // When debug info is enabled, automatically add -Ztrim-path-prefix to normalize + // source paths in debug information. let package_source_dir = cargo_package.manifest_path.parent().map(|p| p.as_std_path()); - midenc_flags = merge_midenc_flags(midenc_flags, &compiler_opts, package_source_dir); + if compiler_opts.debug != midenc_session::DebugInfo::None + && let Some(source_dir) = package_source_dir + { + let trim_prefix = format!("-Ztrim-path-prefix={}", source_dir.display()); + midenc_flags.push(trim_prefix); + } match build_output_type { OutputType::Wasm => Ok(Some(CommandOutput::BuildCommandOutput { @@ -232,61 +239,6 @@ fn build_cargo_args(cargo_opts: &CargoOptions) -> Vec { args } -/// Merges user-provided options with target-derived defaults. -/// -/// The following options are merged from user input: -/// - `--emit` options -/// - `--debug` option (and automatically adds `-Ztrim-path-prefix` when debug is enabled) -/// -/// All other options are determined by the detected target environment and project type. -fn merge_midenc_flags( - mut base: Vec, - compiler: &Compiler, - package_source_dir: Option<&Path>, -) -> Vec { - // Merge --emit options from user input - for spec in &compiler.output_types { - base.push("--emit".to_string()); - let spec_str = match spec { - midenc_session::OutputTypeSpec::All { path } => { - if let Some(p) = path { - format!("all={p}") - } else { - "all".to_string() - } - } - midenc_session::OutputTypeSpec::Typed { output_type, path } => { - if let Some(p) = path { - format!("{output_type}={p}") - } else { - output_type.to_string() - } - } - }; - base.push(spec_str); - } - - // Pass through the --debug flag to midenc - let debug_level = match compiler.debug { - midenc_session::DebugInfo::None => "none", - midenc_session::DebugInfo::Line => "line", - midenc_session::DebugInfo::Full => "full", - }; - base.push("--debug".to_string()); - base.push(debug_level.to_string()); - - // When debug info is enabled, automatically add -Ztrim-path-prefix to normalize - // source paths in debug information. - if compiler.debug != midenc_session::DebugInfo::None - && let Some(source_dir) = package_source_dir - { - let trim_prefix = format!("-Ztrim-path-prefix={}", source_dir.display()); - base.push(trim_prefix); - } - - base -} - fn run_cargo(wasi: &str, spawn_args: &[String], env: E) -> Result> where E: IntoIterator, diff --git a/tools/cargo-miden/src/dependencies.rs b/tools/cargo-miden/src/dependencies.rs index f21b66112..92cd5ee20 100644 --- a/tools/cargo-miden/src/dependencies.rs +++ b/tools/cargo-miden/src/dependencies.rs @@ -140,8 +140,6 @@ pub fn process_miden_dependencies( if cargo_opts.release { dep_build_args.push("--release".to_string()); } - // Dependencies should always be built as libraries - dep_build_args.push("--lib".to_string()); // We expect dependencies to *always* produce Masm libraries (.masp) let command_output = crate::run(dep_build_args.into_iter(), OutputType::Masm)