Skip to content

Commit 00b6992

Browse files
authored
Merge pull request #300 from kw217/add-docs
Add extra comments
2 parents aec7952 + 929ed81 commit 00b6992

File tree

5 files changed

+46
-16
lines changed

5 files changed

+46
-16
lines changed

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
[Documentation](https://docs.rs/peg) | [Release Notes](https://github.com/kevinmehall/rust-peg/releases)
44

5-
`rust-peg` is a simple yet flexible parser generator that makes it easy to write robust parsers. Based on the [Parsing Expression Grammar](https://en.wikipedia.org/wiki/Parsing_expression_grammar) formalism, it provides a Rust macro that builds a recursive descent parser from a concise definition of the grammar.
5+
`rust-peg` is a simple yet flexible parser generator that makes it easy to write robust parsers. Based on the [Parsing Expression Grammar](https://en.wikipedia.org/wiki/Parsing_expression_grammar) formalism, it provides a Rust macro that builds a recursive descent parser from a concise definition of the grammar.
66

77
## Features
88

@@ -60,3 +60,11 @@ pub fn main() {
6060
[annotate-snippets]: https://crates.io/crates/annotate-snippets
6161
[codespan-reporting]: https://crates.io/crates/codespan-reporting
6262
[codemap-diagnostic]: https://crates.io/crates/codemap-diagnostic
63+
## Development
64+
65+
The `rust-peg` grammar is written in `rust-peg`: `peg-macros/grammar.rustpeg`. To avoid the circular dependency, a precompiled grammar is checked in as `peg-macros/grammar.rs`. To regenerate this, run the `./bootstrap.sh` script.
66+
67+
There is a large test suite which uses [`trybuild`](https://crates.io/crates/trybuild) to support testing for compilation failure errors.
68+
Use `cargo test` to run the entire suite,
69+
or `cargo test -- trybuild trybuild=lifetimes.rs` to test just the indicated file.
70+
Add `--features trace` to trace these tests.

peg-macros/translate.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,8 @@ fn rule_params_list(context: &Context, rule: &Rule) -> Vec<TokenStream> {
196196
}).collect()
197197
}
198198

199+
/// Compile a rule to a function for use internal to the grammar.
200+
/// Returns `RuleResult<T>`.
199201
fn compile_rule(context: &Context, rule: &Rule) -> TokenStream {
200202
let span = rule.span.resolved_at(Span::mixed_site());
201203
let name = format_ident!("__parse_{}", rule.name, span=span);
@@ -305,6 +307,8 @@ fn compile_rule(context: &Context, rule: &Rule) -> TokenStream {
305307
}
306308
}
307309

310+
/// Compile a rule into the parsing function which will be exported.
311+
/// Returns `Result<T, ParseError>`.
308312
fn compile_rule_export(context: &Context, rule: &Rule) -> TokenStream {
309313
let span = rule.span.resolved_at(Span::mixed_site());
310314

@@ -329,6 +333,10 @@ fn compile_rule_export(context: &Context, rule: &Rule) -> TokenStream {
329333
quote_spanned!{ span => ::peg::Parse::is_eof(__input, __pos) }
330334
};
331335

336+
// Parse once. If it succeeds or throws an error, return that.
337+
// If it fails, parse again to determine the set of all tokens
338+
// that were expected at the failure position.
339+
332340
quote_spanned! { span =>
333341
#doc
334342
#visibility fn #name<'input #(, #grammar_lifetime_params)* #(, #ty_params)*>(__input: #input_ty #extra_args_def #(, #rule_params)*) -> ::std::result::Result<#ret_ty, ::peg::error::ParseError<PositionRepr<#(#grammar_lifetime_params),*>>> {

peg-runtime/error.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ impl Display for ExpectedSet {
3838
}
3939
}
4040

41-
/// An error from a parse failure
41+
/// A parse failure.
4242
#[derive(PartialEq, Eq, Debug, Clone)]
4343
pub struct ParseError<L> {
44-
/// The furthest position the parser reached in the input
44+
/// The furthest position the parser reached in the input before failing.
4545
pub location: L,
4646

47-
/// The set of literals that failed to match at that position
47+
/// The set of literals that failed to match at that position.
4848
pub expected: ExpectedSet,
4949
}
5050

@@ -66,14 +66,23 @@ impl<L: Display + Debug> ::std::error::Error for ParseError<L> {
6666

6767
#[doc(hidden)]
6868
pub struct ErrorState {
69+
/// Furthest failure we've hit so far.
6970
pub max_err_pos: usize,
71+
72+
/// Are we inside a lookahead/quiet block? If so, failure is disabled.
73+
/// Non-zero => yes, to support nested blocks.
7074
pub suppress_fail: usize,
75+
76+
/// Are we reparsing after a failure? If so, compute and store expected set of all alternative expectations
77+
/// when we are at offset `max_err_pos`.
7178
pub reparsing_on_error: bool,
79+
80+
/// The set of tokens we expected to find when we hit the failure. Updated when `reparsing_on_error`.
7281
pub expected: ExpectedSet,
7382
}
7483

7584
impl ErrorState {
76-
pub fn new(initial_pos: usize) -> ErrorState {
85+
pub fn new(initial_pos: usize) -> Self {
7786
ErrorState {
7887
max_err_pos: initial_pos,
7988
suppress_fail: 0,
@@ -84,6 +93,7 @@ impl ErrorState {
8493
}
8594
}
8695

96+
/// Set up for reparsing to record the details of the furthest failure.
8797
pub fn reparse_for_error(&mut self) {
8898
self.suppress_fail = 0;
8999
self.reparsing_on_error = true;
@@ -96,6 +106,7 @@ impl ErrorState {
96106
}
97107
}
98108

109+
/// Flag a failure.
99110
#[inline(always)]
100111
pub fn mark_failure(&mut self, pos: usize, expected: &'static str) -> RuleResult<()> {
101112
if self.suppress_fail == 0 {

peg-runtime/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ pub mod str;
1010
/// type. The public API of a parser adapts errors to `std::result::Result`.
1111
#[derive(Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
1212
pub enum RuleResult<T> {
13+
/// Success, with final location
1314
Matched(usize, T),
15+
16+
/// Failure (furthest failure location is not yet known)
1417
Failed,
1518
}
1619

src/lib.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@
9090
//! Rust block returns a `Result<T, &str>` instead of a value directly. On
9191
//! `Ok(v)`, it matches successfully and returns `v`. On `Err(e)`, the match
9292
//! of the entire expression fails and it tries alternatives or reports a
93-
//! parse error with the `&str` `e`.
93+
//! parse failure with the `&str` `e`.
9494
//! * `e1 / e2 / e3` - _Ordered choice:_ try to match `e1`. If the match succeeds, return its
9595
//! result, otherwise try `e2`, and so on.
9696
//!
@@ -141,7 +141,7 @@
141141
//!
142142
//! If your input type is a slice of an enum type, a pattern could match an enum variant like
143143
//! `[Token::Operator('+')]`.
144-
//!
144+
//!
145145
//! Variables captured by the pattern are accessible in a subsequent action
146146
//! block: `[Token::Integer(i)] { i }`
147147
//!
@@ -214,30 +214,30 @@
214214
//! To allow matching a prefix of the input, add the `#[no_eof]` attribute before `pub rule`.
215215
//! Take care to not miss a malformed `x` at the last position if the rule ends with a `x()*`
216216
//! repeat expression.
217-
//!
217+
//!
218218
//! ## Rule parameters
219-
//!
219+
//!
220220
//! Rules can be parameterized with types, lifetimes, and values, just like Rust functions.
221221
//!
222222
//! In addition to Rust values, rules can also accept PEG expression fragments as arguments by using
223223
//! `rule<R>` as a parameter type. When calling such a rule, use `<>` around a PEG expression in the
224224
//! argument list to capture the expression and pass it to the rule.
225-
//!
225+
//!
226226
//! For example:
227-
//!
227+
//!
228228
//! ```rust,no_run
229229
//! # peg::parser!{grammar doc() for str {
230230
//! rule num_radix(radix: u32) -> u32
231231
//! = n:$(['0'..='9']+) {? u32::from_str_radix(n, radix).or(Err("number")) }
232-
//!
232+
//!
233233
//! rule list<T>(x: rule<T>) -> Vec<T> = "[" v:(x() ** ",") ","? "]" {v}
234-
//!
234+
//!
235235
//! pub rule octal_list() -> Vec<u32> = list(<num_radix(8)>)
236236
//! # }}
237237
//! # fn main() {}
238238
//! ```
239239
//!
240-
//! ## Error reporting
240+
//! ## Failure reporting
241241
//!
242242
//! When a match fails, position information is automatically recorded to report a set of
243243
//! "expected" tokens that would have allowed the parser to advance further.
@@ -313,8 +313,8 @@
313313
//! like `expr() "x" / expr() "y" / expr() "z"`, but this could be rewritten to
314314
//! `expr() ("x" / "y" / "z")` which would be even faster.
315315
//!
316-
//! `#[cache_left_rec]` extends the `#[cache]` mechanism with the ability to resolve
317-
//! left-recursive rules, which are otherwise an error.
316+
//! `#[cache_left_rec]` extends the `#[cache]` mechanism with the ability to resolve
317+
//! left-recursive rules, which are otherwise an error.
318318
//!
319319
//! The `precedence!{}` syntax is another way to handle nested operators and avoid
320320
//! repeatedly matching an expression rule.

0 commit comments

Comments
 (0)