From 3e2d801cd1b63894dd9899ab00f82eba427d2fb0 Mon Sep 17 00:00:00 2001 From: Peter Duchovni Date: Fri, 16 May 2025 17:21:03 +1000 Subject: [PATCH 1/4] Add Format::Sequence (vec codegen backend), Expr::Append --- doodle-formats/src/format/png.rs | 2 +- generated/gencode.rs | 73 +++++------ src/codegen/mod.rs | 76 ++++++++++- src/codegen/rust_ast/mod.rs | 216 ++++++++++++++++++++++++------- src/codegen/rust_ast/rebind.rs | 34 +++-- src/codegen/rust_ast/resolve.rs | 85 ++++++++---- src/codegen/typed_decoder.rs | 17 ++- src/codegen/typed_format.rs | 22 +++- src/decoder.rs | 58 ++++++--- src/decoder/seq_kind.rs | 31 +++++ src/helper.rs | 12 +- src/lib.rs | 98 +++++++++----- src/loc_decoder.rs | 43 ++++-- src/output/flat.rs | 12 +- src/output/tree.rs | 106 ++++++++++++++- src/precedence.rs | 3 + src/prelude.rs | 11 ++ src/typecheck.rs | 23 ++++ 18 files changed, 725 insertions(+), 197 deletions(-) diff --git a/doodle-formats/src/format/png.rs b/doodle-formats/src/format/png.rs index 80ba7e8b..c578b375 100644 --- a/doodle-formats/src/format/png.rs +++ b/doodle-formats/src/format/png.rs @@ -466,7 +466,7 @@ pub fn main( module.define_format( "png.main", record_auto([ - ("__signature", is_bytes(PNG_SIGNATURE)), + ("signature", byte_seq(PNG_SIGNATURE)), ("ihdr", ihdr.call()), ("chunks", repeat(png_chunk.call_args(vec![var("ihdr")]))), ( diff --git a/generated/gencode.rs b/generated/gencode.rs index 4e22e13c..8b4ce61e 100644 --- a/generated/gencode.rs +++ b/generated/gencode.rs @@ -4242,9 +4242,10 @@ tag: (u8, u8, u8, u8), crc: u32 } -/// expected size: 184 +/// expected size: 208 #[derive(Debug, Clone)] pub struct png_main { +signature: Vec, ihdr: png_ihdr, chunks: Vec, idat: zlib_main, @@ -4951,72 +4952,72 @@ PResult::Ok(mpeg4_main { atoms }) } fn Decoder_png_main<>(_input: &mut Parser<'_>) -> Result { -{ -let field0 = ((|| { +let signature = { +let _seq0 = { let b = _input.read_byte()?; -PResult::Ok(if b == 137 { +if b == 137 { b } else { return Err(ParseError::ExcludedBranch(8253205784254894771u64)); -}) -})())?; -let field1 = ((|| { +} +}; +let _seq1 = { let b = _input.read_byte()?; -PResult::Ok(if b == 80 { +if b == 80 { b } else { return Err(ParseError::ExcludedBranch(1225514472166157741u64)); -}) -})())?; -let field2 = ((|| { +} +}; +let _seq2 = { let b = _input.read_byte()?; -PResult::Ok(if b == 78 { +if b == 78 { b } else { return Err(ParseError::ExcludedBranch(1224415506115142500u64)); -}) -})())?; -let field3 = ((|| { +} +}; +let _seq3 = { let b = _input.read_byte()?; -PResult::Ok(if b == 71 { +if b == 71 { b } else { return Err(ParseError::ExcludedBranch(16859485491091215361u64)); -}) -})())?; -let field4 = ((|| { +} +}; +let _seq4 = { let b = _input.read_byte()?; -PResult::Ok(if b == 13 { +if b == 13 { b } else { return Err(ParseError::ExcludedBranch(14898840355839773829u64)); -}) -})())?; -let field5 = ((|| { +} +}; +let _seq5 = { let b = _input.read_byte()?; -PResult::Ok(if b == 10 { +if b == 10 { b } else { return Err(ParseError::ExcludedBranch(9453951600195794313u64)); -}) -})())?; -let field6 = ((|| { +} +}; +let _seq6 = { let b = _input.read_byte()?; -PResult::Ok(if b == 26 { +if b == 26 { b } else { return Err(ParseError::ExcludedBranch(10036157788440812915u64)); -}) -})())?; -let field7 = ((|| { +} +}; +let _seq7 = { let b = _input.read_byte()?; -PResult::Ok(if b == 10 { +if b == 10 { b } else { return Err(ParseError::ExcludedBranch(6349531732377484771u64)); -}) -})())?; -(field0, field1, field2, field3, field4, field5, field6, field7) +} +}; +vec![_seq0, _seq1, _seq2, _seq3, _seq4, _seq5, _seq6, _seq7] }; let ihdr = (Decoder_png_ihdr(_input))?; let chunks = { @@ -5162,7 +5163,7 @@ break accum }; let iend = (Decoder_png_iend(_input))?; -PResult::Ok(png_main { ihdr, chunks, idat, more_chunks, iend }) +PResult::Ok(png_main { signature, ihdr, chunks, idat, more_chunks, iend }) } fn Decoder_riff_main<>(_input: &mut Parser<'_>) -> Result { diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 4fd2a409..57de1e08 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -313,6 +313,14 @@ impl CodeGen { "TypedDecoder::Tuple expected to have type RustType::AnonTuple(..) (or UNIT if empty), found {other:?}" ), } + TypedDecoder::Sequence(gt, elts) => match gt { + GenType::Inline(RustType::Atom(AtomType::Comp(CompType::Vec(_t)))) => { + let as_array = _t.prefer_array(elts.len()); + let elements = elts.iter().map(|elt| self.translate(elt.get_dec())).collect(); + CaseLogic::Sequential(SequentialLogic::AccumSeq { as_array, elements }) + }, + other => unreachable!("TypedDecoder::Sequence expected to have type CompType::Vec(..), found {other:?}"), + }, TypedDecoder::Repeat0While(_gt, tree_continue, single) => CaseLogic::Repeat( RepeatLogic::Repeat0ContinueOnMatch( @@ -839,7 +847,11 @@ fn embed_expr(expr: >Expr, info: ExprInfo) -> RustExpr { ) ) } - + TypedExpr::Append(_, seq0, seq1) => { + let lhs = embed_expr(seq0, info); + let rhs = embed_expr(seq1, info); + RustExpr::FunctionCall(Box::new(RustExpr::local("seq_append")), vec![lhs, rhs]) + } TypedExpr::SubSeq(_, seq, ix, len) => { let start_expr = embed_expr_dft(ix); let bind_ix = RustStmt::assign( @@ -2751,7 +2763,12 @@ enum RepeatLogic { /// Fused logic for a left-fold that is updated on each repeat, and contributes to the condition for termination /// /// Lambda order: termination-predicate, then update-function - AccumUntil(GenLambda, GenLambda, Typed, Typed>>), + AccumUntil( + GenLambda, + GenLambda, + Typed, + Typed>>, + ), } pub(crate) type Typed = (T, GenType); @@ -3134,7 +3151,10 @@ where loop_body.push(RustStmt::assign("elem", elt_expr)); loop_body.push(RustStmt::Expr(RustExpr::local("seq").call_method_with( "push", - [RustExpr::owned(RustExpr::local("elem"), elt_type.to_rust_type())], + [RustExpr::owned( + RustExpr::local("elem"), + elt_type.to_rust_type(), + )], ))); let new_acc = update.apply_pair( RustExpr::local("acc"), @@ -3161,6 +3181,10 @@ enum SequentialLogic { constructor: Option, elements: Vec>, }, + AccumSeq { + as_array: bool, + elements: Vec>, + }, } impl ToAst for SequentialLogic @@ -3171,6 +3195,27 @@ where fn to_ast(&self, ctxt: ProdCtxt<'_>) -> GenBlock { match self { + // REVIEW - in certain cases, we may be able to use fixed-sized arrays instead of vec, but that might complicate matters... + SequentialLogic::AccumSeq { as_array, elements } => { + if elements.is_empty() { + return GenBlock::simple_expr(RustExpr::VEC_NIL); + } + let mut stmts = Vec::new(); + let mut terms = Vec::new(); + + for (ix, cl) in elements.iter().enumerate() { + const LAB_PREFIX: &str = "_seq"; + let lab = Label::Owned(format!("{LAB_PREFIX}{ix}")); + stmts.push(GenStmt::BindOnce(lab.clone(), cl.to_ast(ctxt))); + terms.push(RustExpr::local(lab)); + } + let ret = Some(GenExpr::Embed(if *as_array { + RustExpr::ArrayLit(terms) + } else { + RustExpr::Macro(RustMacro::Vec(VecExpr::List(terms))) + })); + GenBlock { stmts, ret } + } SequentialLogic::AccumTuple { constructor, elements, @@ -4141,6 +4186,24 @@ impl<'a> Elaborator<'a> { let gt = self.get_gt_from_index(index); TypedFormat::Tuple(gt, t_elts) } + Format::Sequence(formats) => { + let index = self.get_and_increment_index(); + self.increment_index(); + let t_formats = match &formats[..] { + [] => unreachable!("empty list has no unambiguous type"), + [v] => vec![self.elaborate_format(v, dyn_scope)], + formats => { + let mut t_formats = Vec::with_capacity(formats.len()); + for t in formats { + let t_format = self.elaborate_format(t, dyn_scope); + t_formats.push(t_format); + } + t_formats + } + }; + let gt = self.get_gt_from_index(index); + TypedFormat::Sequence(gt, t_formats) + } Format::Repeat(inner) => { let index = self.get_and_increment_index(); let t_inner = self.elaborate_format(inner, dyn_scope); @@ -4484,7 +4547,14 @@ impl<'a> Elaborator<'a> { let gt = self.get_gt_from_index(index); TypedExpr::Seq(gt, t_elts) } + Expr::Append(lhs, rhs) => { + let t_lhs = self.elaborate_expr(lhs); + let t_rhs = self.elaborate_expr(rhs); + self.increment_index(); + let gt = self.get_gt_from_index(index); + TypedExpr::Append(gt, Box::new(t_lhs), Box::new(t_rhs)) + } Expr::RecordProj(e, fld) => { self.codegen.name_gen.ctxt.push_atom(NameAtom::DeadEnd); let t_e = self.elaborate_expr(e); diff --git a/src/codegen/rust_ast/mod.rs b/src/codegen/rust_ast/mod.rs index e0c76efb..396889b4 100644 --- a/src/codegen/rust_ast/mod.rs +++ b/src/codegen/rust_ast/mod.rs @@ -685,6 +685,15 @@ impl RustType { } } + /// Returns `true` if seq-formats ([`Format::Sequence`]) of type `Seq()` should prefer to use + /// fixed-size arrays (`[T; N]`) over vectors (`Vec`) during construction. An additional parameter, + /// the length of the sequence (`len`), is passed in to guide the decision, as simple types can be + /// preferable as vectors depending more on the length of the sequence than anything else. + pub(crate) fn prefer_array(&self, _n: usize) -> bool { + // REVIEW - currently, we would need to orchestrate the correct decision at multiple layers, which would take a lot of work + false + } + /// Returns `true` if `self` is a known-`Copy` `RustType`. /// /// # Note @@ -1399,6 +1408,17 @@ pub(crate) enum RustExpr { #[derive(Debug, Clone)] pub(crate) enum RustMacro { Matches(Box, Vec), + Vec(VecExpr), +} + +#[derive(Debug, Clone)] +pub(crate) enum VecExpr { + Nil, + #[expect(dead_code)] + Single(Box), + #[expect(dead_code)] + Repeat(Box, Box), + List(Vec), } impl RustExpr { @@ -1410,6 +1430,8 @@ impl RustExpr { pub const FALSE: Self = Self::PrimitiveLit(RustPrimLit::Boolean(false)); + pub const VEC_NIL: Self = RustExpr::Macro(RustMacro::Vec(VecExpr::Nil)); + /// Returns `Some(varname)` if `self` is a simple entity-reference to identifier `varname`, and /// `None` otherwise. pub fn as_local(&self) -> Option<&Label> { @@ -1490,12 +1512,15 @@ impl RustExpr { Self::Owned(OwnedRustExpr { expr, .. }) => match &*expr { Self::FieldAccess(..) => Self::Borrow(expr), _ => *expr, - } + }, other => Self::Borrow(Box::new(other)), } } - pub fn field(self, name: Name) -> Self where Name: Into