Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions src/bin/doodle/format/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,17 @@ pub fn main(module: &mut FormatModule, base: &BaseModule) -> FormatRef {
),
);

let ascii_str = module.define_format("text.string.ascii", repeat1(base.ascii_char_strict()));
let utf8_str = module.define_format("text.string.utf8", repeat(utf8_char.call()));
let ascii_char = module.define_format(
"text.char.ascii",
Format::Map(
Box::new(base.ascii_char_strict()),
Expr::Lambda("byte".into(), Box::new(Expr::AsChar(Box::new(var("byte"))))),
),
);

module.define_format(
"text.string",
Format::UnionNondet(vec![
("ascii".into(), ascii_str.call()),
("utf8".into(), utf8_str.call()),
]),
Format::RepeatFallback(Box::new(ascii_char.call()), Box::new(utf8_char.call())),
)
}

Expand Down
62 changes: 62 additions & 0 deletions src/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub enum Value {
Mapped(Box<Value>, Box<Value>),
Branch(usize, Box<Value>),
Format(Box<Format>),
Fallback(bool, Box<Value>),
}

impl Value {
Expand Down Expand Up @@ -431,6 +432,7 @@ enum Decoder {
Record(Vec<(Cow<'static, str>, Decoder)>),
While(MatchTree, Box<Decoder>),
Until(MatchTree, Box<Decoder>),
RepeatFallback(MatchTree, Box<Decoder>, Box<Decoder>),
RepeatCount(Expr, Box<Decoder>),
RepeatUntilLast(Expr, Box<Decoder>),
RepeatUntilSeq(Expr, Box<Decoder>),
Expand Down Expand Up @@ -858,6 +860,38 @@ impl Decoder {
Err(format!("cannot build match tree for {:?}", format))
}
}
Format::RepeatFallback(narrow, wide) => {
if narrow.is_nullable(compiler.module) || wide.is_nullable(compiler.module) {
return Err(format!(
"Cannot repeat nullable format: Repeat({narrow:?} ⊂ {wide:?})"
));
}

let dnarrow = Box::new(Decoder::compile_next(
compiler,
narrow,
Rc::new(Next::Repeat(narrow, next.clone())),
)?);

let dwide = Box::new(Decoder::compile_next(
compiler,
wide,
Rc::new(Next::Repeat(wide, next.clone())),
)?);

// Under the precondition that narrow is a subset of wide, the union of the two matchtrees is just the
// matchtree for wide

let wide_star = Format::Repeat(wide.clone());
let f_wide = Format::Tuple(vec![(**wide).clone(), wide_star]);
let f_empty = Format::EMPTY;

if let Some(tree) = MatchTree::build(compiler.module, &[f_wide, f_empty], next) {
Ok(Decoder::RepeatFallback(tree, dnarrow, dwide))
} else {
Err(format!("Cannot build match tree for {format:?}"))
}
}
Format::Repeat1(a) => {
if a.is_nullable(compiler.module) {
return Err(format!("cannot repeat nullable format: {a:?}"));
Expand Down Expand Up @@ -1079,6 +1113,34 @@ impl Decoder {
}
Ok((Value::Seq(v), input))
}
Decoder::RepeatFallback(tree, subset, superset) => {
let mut input = input;
let mut v = Vec::new();
let mut decoder = subset;
let mut fellback = false;

while tree.matches(input).ok_or(ParseError::NoValidBranch {
offset: input.offset,
})? == 0
{
match decoder.parse(program, scope, input) {
Ok((va, next_input)) => {
input = next_input;
v.push(va);
}
err @ Err(_) => {
if fellback {
return err;
} else {
decoder = superset;
fellback = true;
}
}
}
}

Ok((Value::Fallback(fellback, Box::new(Value::Seq(v))), input))
}
Decoder::RepeatCount(expr, a) => {
let mut input = input;
let count = expr.eval_value(scope).unwrap_usize();
Expand Down
34 changes: 32 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,8 @@ pub enum Format {
RepeatUntilLast(Expr, Box<Format>),
/// Repeat a format until a condition is satisfied by the sequence
RepeatUntilSeq(Expr, Box<Format>),
/// Repeat an eager narrow format, but continue with a broader item if necessary upon recoverable failure
RepeatFallback(Box<Format>, Box<Format>),
/// Parse a format without advancing the stream position afterwards
Peek(Box<Format>),
/// Attempt to parse a format and fail if it succeeds
Expand Down Expand Up @@ -630,7 +632,7 @@ impl Format {
.map(|(_, f)| f.match_bounds(module))
.reduce(Bounds::add)
.unwrap_or(Bounds::exact(0)),
Format::Repeat(_) => Bounds::new(0, None),
Format::Repeat(_) | Format::RepeatFallback(_, _) => Bounds::new(0, None),
Format::Repeat1(f) => f.match_bounds(module) * Bounds::new(1, None),
Format::RepeatCount(expr, f) => f.match_bounds(module) * expr.bounds(),
Format::RepeatUntilLast(_, f) => f.match_bounds(module) * Bounds::new(1, None),
Expand Down Expand Up @@ -677,7 +679,7 @@ impl Format {
Format::Union(branches) => Format::iso_union_depends_on_next(branches, module),
Format::Tuple(fields) => fields.iter().any(|f| f.depends_on_next(module)),
Format::Record(fields) => fields.iter().any(|(_, f)| f.depends_on_next(module)),
Format::Repeat(_) => true,
Format::Repeat(_) | Format::RepeatFallback(_, _) => true,
Format::Repeat1(_) => true,
Format::RepeatCount(_, _f) => false,
Format::RepeatUntilLast(_, _f) => false,
Expand Down Expand Up @@ -734,6 +736,16 @@ impl Format {
}
}

pub fn is_char_format(&self, module: &FormatModule) -> bool {
match self {
// NOTE - currently only true for named formats matching `/.*char.*/`
Format::ItemVar(level, _args) => module.get_name(*level).contains("char"),
_ => false,
}
}



/// Returns `true` if values associated to this format should be handled as multi-character ASCII strings
pub fn is_ascii_string_format(&self, module: &FormatModule) -> bool {
match self {
Expand Down Expand Up @@ -900,6 +912,11 @@ impl FormatModule {
let t = self.infer_format_type(scope, a)?;
Ok(ValueType::Seq(Box::new(t)))
}
Format::RepeatFallback(narrow, wide) => {
let mut t = self.infer_format_type(scope, narrow)?;
t = t.unify(&self.infer_format_type(scope, wide)?)?;
Ok(ValueType::Seq(Box::new(t)))
}
Format::Peek(a) => self.infer_format_type(scope, a),
Format::PeekNot(_a) => Ok(ValueType::Tuple(vec![])),
Format::Slice(_expr, a) => self.infer_format_type(scope, a),
Expand Down Expand Up @@ -1265,6 +1282,19 @@ impl<'a> MatchTreeStep<'a> {
Format::RepeatUntilSeq(_expr, _a) => {
Self::accept() // FIXME
}
Format::RepeatFallback(narrow, wide) => {
let tree = Self::add_next(module, next.clone());
tree.union(Self::add(
module,
narrow,
Rc::new(Next::Repeat(narrow, next.clone())),
))
.union(Self::add(
module,
wide,
Rc::new(Next::Repeat(wide, next.clone())),
))
}
Format::Peek(a) => {
let tree = Self::add_next(module, next.clone());
let peek = Self::add(module, a, Rc::new(Next::Empty));
Expand Down
13 changes: 13 additions & 0 deletions src/output/flat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ fn check_covered(
| Format::RepeatUntilSeq(_, format) => {
check_covered(module, path, format)?;
}
Format::RepeatFallback(narrow, wide) => {
check_covered(module, path, narrow)?;
check_covered(module, path, wide)?;
}
Format::Peek(_) => {} // FIXME
Format::PeekNot(_) => {} // FIXME
Format::Slice(_, format) => {
Expand Down Expand Up @@ -279,6 +283,15 @@ impl<'module, W: io::Write> Context<'module, W> {
}
_ => panic!("expected sequence, found {value:?}"),
},
Format::RepeatFallback(_narrow, _wide) => match value {
Value::Seq(values) => {
for _v in values {
(); // FIXME
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am slightly unclear on the semantics of check_covered, and so I am not sure how to implement this...

}
Ok(())
}
_ => panic!("expected sequence"),
},
Format::Peek(format) => self.write_flat(scope, value, format),
Format::PeekNot(format) => self.write_flat(scope, value, format),
Format::Slice(_, format) => self.write_flat(scope, value, format),
Expand Down
41 changes: 41 additions & 0 deletions src/output/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ impl<'module> MonoidalPrinter<'module> {
Value::Char(_) => true,
Value::Bool(_) => true,
Value::U8(_) | Value::U16(_) | Value::U32(_) => true,
Value::Fallback(is_fallback, v) => match format {
Some(Format::RepeatFallback(a, b)) => {
let format = if *is_fallback { b } else { a };
self.is_atomic_value(v, Some(format))
}
_ => self.is_atomic_value(v, None),
},
Value::Tuple(values) => values.is_empty(),
Value::Record(fields) => fields.is_empty(),
Value::Seq(values) => values.is_empty(),
Expand Down Expand Up @@ -305,6 +312,30 @@ impl<'module> MonoidalPrinter<'module> {
}
_ => panic!("expected sequence, found {value:?}"),
},
Format::RepeatFallback(narrow, wide) => match value {
Value::Fallback(is_wide, v) => match v.deref() {
Value::Seq(values) => {
let format = if *is_wide { wide } else { narrow };
if self.flags.tables_for_record_sequences
&& self.is_record_with_atomic_fields(format).is_some()
{
self.compile_seq_records(values, format)
} else if self.flags.pretty_ascii_strings
&& format.is_ascii_char_format(self.module)
{
self.compile_ascii_seq(values)
} else if self.flags.pretty_utf8_strings
&& format.is_char_format(self.module)
{
self.compile_char_seq(values)
} else {
self.compile_seq(scope, values, Some(format))
}
}
_ => panic!("expected sequence, found {v:?}"),
},
_ => panic!("expected Fallback, found {value:?}"),
},
Format::Peek(format) => self.compile_decoded_value(scope, value, format),
Format::PeekNot(_format) => self.compile_value(scope, value),
Format::Slice(_, format) => self.compile_decoded_value(scope, value, format),
Expand Down Expand Up @@ -375,6 +406,7 @@ impl<'module> MonoidalPrinter<'module> {
Value::Seq(vals) => self.compile_seq(scope, vals, None),
Value::Record(fields) => self.compile_record(scope, fields, None),
Value::Variant(label, value) => self.compile_variant(scope, label, value, None),
Value::Fallback(_, value) => self.compile_value(scope, value),
Value::Mapped(orig, value) => {
if self.flags.collapse_mapped_values {
self.compile_value(scope, value)
Expand Down Expand Up @@ -1068,6 +1100,15 @@ impl<'module> MonoidalPrinter<'module> {
prec,
Precedence::FORMAT_COMPOUND,
),
Format::RepeatFallback(narrow, wide) => {
let wide_frag = self.compile_format(wide, Precedence::FORMAT_ATOM);

cond_paren(
self.compile_nested_format("repeat_fallback", Some(&[wide_frag]), narrow, prec),
prec,
Precedence::FORMAT_COMPOUND,
)
}
Format::Repeat(format) => cond_paren(
self.compile_nested_format("repeat", None, format, prec),
prec,
Expand Down
3 changes: 2 additions & 1 deletion tests/expected/decode/test.txt.stdout
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
├── data <- _ |...| _ := { text := { ascii := "GIF89a is a popular format\n" } }
├── data <- _ |...| _ :=
│ └── text <- text.string := "GIF89a is a popular format\n"
└── end <- end-of-input
3 changes: 1 addition & 2 deletions tests/expected/decode/test.utf8.stdout
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
├── data <- _ |...| _ :=
│ └── text <- text.string :=
│ └── utf8 <- text.string.utf8 := "この🦀は擂り身ではなく、本物のカニです。\n"
│ └── text <- text.string := "この🦀は擂り身ではなく、本物のカニです。\n"
└── end <- end-of-input