Auto merge of #124779 - workingjubilee:debug-formatting-my-beloved, r=compiler-errors

bors · bors · commit 5ce96b1d0f6b · 2024-05-08T05:11:18.000Z
Improve `rustc_parse::Parser`'s debuggability

The main event is the final commit where I add `Parser::debug_lookahead`. Everything else was basically cleaning up things that bugged me (debugging, as it were) until I felt comfortable enough to actually work on it.

The motivation is that it's annoying as hell to try to figure out how the debug infra works in rustc without having basic queries like `debug!(?parser);` come up "empty". However, Parser has a lot of fields that are mostly irrelevant for most debugging, like the entire ParseSess. I think `Parser::debug_lookahead` with a capped lookahead might be fine as a general-purpose Debug impl, but this adapter version was suggested to allow more choice, and admittedly, it's a refined version of what I was already handrolling just to get some insight going.
diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs
@@ -706,7 +706,7 @@ impl<'t> Iterator for RefTokenTreeCursor<'t> {
 /// involve associated types) for getting individual elements, or
 /// `RefTokenTreeCursor` if you really want an `Iterator`, e.g. in a `for`
 /// loop.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct TokenTreeCursor {
     pub stream: TokenStream,
     index: usize,
diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs
@@ -5,6 +5,7 @@
 #![allow(rustc::untranslatable_diagnostic)]
 #![feature(array_windows)]
 #![feature(box_patterns)]
+#![feature(debug_closure_helpers)]
 #![feature(if_let_guard)]
 #![feature(iter_intersperse)]
 #![feature(let_chains)]
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
@@ -19,6 +19,7 @@ pub(crate) use item::FnParseMode;
 pub use pat::{CommaRecoveryMode, RecoverColon, RecoverComma};
 pub use path::PathStyle;
 
+use core::fmt;
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, Delimiter, Token, TokenKind};
 use rustc_ast::tokenstream::{AttributesData, DelimSpacing, DelimSpan, Spacing};
@@ -60,7 +61,7 @@ mod mut_visit {
 }
 
 bitflags::bitflags! {
-    #[derive(Clone, Copy)]
+    #[derive(Clone, Copy, Debug)]
     struct Restrictions: u8 {
         const STMT_EXPR         = 1 << 0;
         const NO_STRUCT_LITERAL = 1 << 1;
@@ -86,7 +87,7 @@ enum BlockMode {
 
 /// Whether or not we should force collection of tokens for an AST node,
 /// regardless of whether or not it has attributes
-#[derive(Clone, Copy, PartialEq)]
+#[derive(Clone, Copy, Debug, PartialEq)]
 pub enum ForceCollect {
     Yes,
     No,
@@ -134,7 +135,7 @@ macro_rules! maybe_recover_from_interpolated_ty_qpath {
     };
 }
 
-#[derive(Clone, Copy)]
+#[derive(Clone, Copy, Debug)]
 pub enum Recovery {
     Allowed,
     Forbidden,
@@ -184,7 +185,7 @@ pub struct Parser<'a> {
     capture_state: CaptureState,
     /// This allows us to recover when the user forget to add braces around
     /// multiple statements in the closure body.
-    pub current_closure: Option<ClosureSpans>,
+    current_closure: Option<ClosureSpans>,
     /// Whether the parser is allowed to do recovery.
     /// This is disabled when parsing macro arguments, see #103534
     pub recovery: Recovery,
@@ -196,7 +197,7 @@ pub struct Parser<'a> {
 rustc_data_structures::static_assert_size!(Parser<'_>, 264);
 
 /// Stores span information about a closure.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct ClosureSpans {
     pub whole_closure: Span,
     pub closing_pipe: Span,
@@ -225,15 +226,15 @@ pub type ReplaceRange = (Range<u32>, Vec<(FlatToken, Spacing)>);
 /// Controls how we capture tokens. Capturing can be expensive,
 /// so we try to avoid performing capturing in cases where
 /// we will never need an `AttrTokenStream`.
-#[derive(Copy, Clone)]
+#[derive(Copy, Clone, Debug)]
 pub enum Capturing {
     /// We aren't performing any capturing - this is the default mode.
     No,
     /// We are capturing tokens
     Yes,
 }
 
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 struct CaptureState {
     capturing: Capturing,
     replace_ranges: Vec<ReplaceRange>,
@@ -244,7 +245,7 @@ struct CaptureState {
 /// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
 /// use this type to emit them as a linear sequence. But a linear sequence is
 /// what the parser expects, for the most part.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 struct TokenCursor {
     // Cursor for the current (innermost) token stream. The delimiters for this
     // token stream are found in `self.stack.last()`; when that is `None` then
@@ -349,6 +350,7 @@ enum TokenExpectType {
 }
 
 /// A sequence separator.
+#[derive(Debug)]
 struct SeqSep {
     /// The separator token.
     sep: Option<TokenKind>,
@@ -366,6 +368,7 @@ impl SeqSep {
     }
 }
 
+#[derive(Debug)]
 pub enum FollowedByType {
     Yes,
     No,
@@ -390,7 +393,7 @@ pub enum Trailing {
     Yes,
 }
 
-#[derive(Clone, Copy, PartialEq, Eq)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum TokenDescription {
     ReservedIdentifier,
     Keyword,
@@ -1548,6 +1551,47 @@ impl<'a> Parser<'a> {
             })
     }
 
+    // debug view of the parser's token stream, up to `{lookahead}` tokens
+    pub fn debug_lookahead(&self, lookahead: usize) -> impl fmt::Debug + '_ {
+        struct DebugParser<'dbg> {
+            parser: &'dbg Parser<'dbg>,
+            lookahead: usize,
+        }
+
+        impl fmt::Debug for DebugParser<'_> {
+            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                let Self { parser, lookahead } = self;
+                let mut dbg_fmt = f.debug_struct("Parser"); // or at least, one view of
+
+                // we don't need N spans, but we want at least one, so print all of prev_token
+                dbg_fmt.field("prev_token", &parser.prev_token);
+                // make it easier to peek farther ahead by taking TokenKinds only until EOF
+                let tokens = (0..*lookahead)
+                    .map(|i| parser.look_ahead(i, |tok| tok.kind.clone()))
+                    .scan(parser.prev_token == TokenKind::Eof, |eof, tok| {
+                        let current = eof.then_some(tok.clone()); // include a trailing EOF token
+                        *eof |= &tok == &TokenKind::Eof;
+                        current
+                    });
+                dbg_fmt.field_with("tokens", |field| field.debug_list().entries(tokens).finish());
+                dbg_fmt.field("approx_token_stream_pos", &parser.num_bump_calls);
+
+                // some fields are interesting for certain values, as they relate to macro parsing
+                if let Some(subparser) = parser.subparser_name {
+                    dbg_fmt.field("subparser_name", &subparser);
+                }
+                if let Recovery::Forbidden = parser.recovery {
+                    dbg_fmt.field("recovery", &parser.recovery);
+                }
+
+                // imply there's "more to know" than this view
+                dbg_fmt.finish_non_exhaustive()
+            }
+        }
+
+        DebugParser { parser: self, lookahead }
+    }
+
     pub fn clear_expected_tokens(&mut self) {
         self.expected_tokens.clear();
     }