diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs deleted file mode 100644 index 165262b82c75d..0000000000000 --- a/compiler/rustc_lexer/src/cursor.rs +++ /dev/null @@ -1,125 +0,0 @@ -use std::str::Chars; - -pub enum FrontmatterAllowed { - Yes, - No, -} - -/// Peekable iterator over a char sequence. -/// -/// Next characters can be peeked via `first` method, -/// and position can be shifted forward via `bump` method. -pub struct Cursor<'a> { - len_remaining: usize, - /// Iterator over chars. Slightly faster than a &str. - chars: Chars<'a>, - pub(crate) frontmatter_allowed: FrontmatterAllowed, - #[cfg(debug_assertions)] - prev: char, -} - -pub(crate) const EOF_CHAR: char = '\0'; - -impl<'a> Cursor<'a> { - pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> { - Cursor { - len_remaining: input.len(), - chars: input.chars(), - frontmatter_allowed, - #[cfg(debug_assertions)] - prev: EOF_CHAR, - } - } - - pub fn as_str(&self) -> &'a str { - self.chars.as_str() - } - - /// Returns the last eaten symbol (or `'\0'` in release builds). - /// (For debug assertions only.) - pub(crate) fn prev(&self) -> char { - #[cfg(debug_assertions)] - { - self.prev - } - - #[cfg(not(debug_assertions))] - { - EOF_CHAR - } - } - - /// Peeks the next symbol from the input stream without consuming it. - /// If requested position doesn't exist, `EOF_CHAR` is returned. - /// However, getting `EOF_CHAR` doesn't always mean actual end of file, - /// it should be checked with `is_eof` method. - pub fn first(&self) -> char { - // `.next()` optimizes better than `.nth(0)` - self.chars.clone().next().unwrap_or(EOF_CHAR) - } - - /// Peeks the second symbol from the input stream without consuming it. - pub(crate) fn second(&self) -> char { - // `.next()` optimizes better than `.nth(1)` - let mut iter = self.chars.clone(); - iter.next(); - iter.next().unwrap_or(EOF_CHAR) - } - - /// Peeks the third symbol from the input stream without consuming it. - pub fn third(&self) -> char { - // `.next()` optimizes better than `.nth(2)` - let mut iter = self.chars.clone(); - iter.next(); - iter.next(); - iter.next().unwrap_or(EOF_CHAR) - } - - /// Checks if there is nothing more to consume. - pub(crate) fn is_eof(&self) -> bool { - self.chars.as_str().is_empty() - } - - /// Returns amount of already consumed symbols. - pub(crate) fn pos_within_token(&self) -> u32 { - (self.len_remaining - self.chars.as_str().len()) as u32 - } - - /// Resets the number of bytes consumed to 0. - pub(crate) fn reset_pos_within_token(&mut self) { - self.len_remaining = self.chars.as_str().len(); - } - - /// Moves to the next character. - pub(crate) fn bump(&mut self) -> Option { - let c = self.chars.next()?; - - #[cfg(debug_assertions)] - { - self.prev = c; - } - - Some(c) - } - - /// Moves to a substring by a number of bytes. - pub(crate) fn bump_bytes(&mut self, n: usize) { - self.chars = self.as_str()[n..].chars(); - } - - /// Eats symbols while predicate returns true or until the end of file is reached. - pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) { - // It was tried making optimized version of this for eg. line comments, but - // LLVM can inline all of this and compile it down to fast iteration over bytes. - while predicate(self.first()) && !self.is_eof() { - self.bump(); - } - } - - pub(crate) fn eat_until(&mut self, byte: u8) { - self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) { - Some(index) => self.as_str()[index..].chars(), - None => "".chars(), - } - } -} diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 27ffcbc943bde..9d3da6ef49302 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -25,15 +25,13 @@ #![deny(unstable_features)] // tidy-alphabetical-end -mod cursor; - #[cfg(test)] mod tests; +use std::str::Chars; + use LiteralKind::*; use TokenKind::*; -use cursor::EOF_CHAR; -pub use cursor::{Cursor, FrontmatterAllowed}; pub use unicode_ident::UNICODE_VERSION; use unicode_properties::UnicodeEmoji; @@ -407,7 +405,129 @@ pub fn is_ident(string: &str) -> bool { } } -impl Cursor<'_> { +pub enum FrontmatterAllowed { + Yes, + No, +} + +/// Peekable iterator over a char sequence. +/// +/// Next characters can be peeked via `first` method, +/// and position can be shifted forward via `bump` method. +pub struct Cursor<'a> { + len_remaining: usize, + /// Iterator over chars. Slightly faster than a &str. + chars: Chars<'a>, + pub(crate) frontmatter_allowed: FrontmatterAllowed, + #[cfg(debug_assertions)] + prev: char, +} + +const EOF_CHAR: char = '\0'; + +impl<'a> Cursor<'a> { + pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> { + Cursor { + len_remaining: input.len(), + chars: input.chars(), + frontmatter_allowed, + #[cfg(debug_assertions)] + prev: EOF_CHAR, + } + } + + pub fn as_str(&self) -> &'a str { + self.chars.as_str() + } + + /// Returns the last eaten symbol (or `'\0'` in release builds). + /// (For debug assertions only.) + pub(crate) fn prev(&self) -> char { + #[cfg(debug_assertions)] + { + self.prev + } + + #[cfg(not(debug_assertions))] + { + EOF_CHAR + } + } + + /// Peeks the next symbol from the input stream without consuming it. + /// If requested position doesn't exist, `EOF_CHAR` is returned. + /// However, getting `EOF_CHAR` doesn't always mean actual end of file, + /// it should be checked with `is_eof` method. + pub fn first(&self) -> char { + // `.next()` optimizes better than `.nth(0)` + self.chars.clone().next().unwrap_or(EOF_CHAR) + } + + /// Peeks the second symbol from the input stream without consuming it. + pub(crate) fn second(&self) -> char { + // `.next()` optimizes better than `.nth(1)` + let mut iter = self.chars.clone(); + iter.next(); + iter.next().unwrap_or(EOF_CHAR) + } + + /// Peeks the third symbol from the input stream without consuming it. + pub fn third(&self) -> char { + // `.next()` optimizes better than `.nth(2)` + let mut iter = self.chars.clone(); + iter.next(); + iter.next(); + iter.next().unwrap_or(EOF_CHAR) + } + + /// Checks if there is nothing more to consume. + pub(crate) fn is_eof(&self) -> bool { + self.chars.as_str().is_empty() + } + + /// Returns amount of already consumed symbols. + pub(crate) fn pos_within_token(&self) -> u32 { + (self.len_remaining - self.chars.as_str().len()) as u32 + } + + /// Resets the number of bytes consumed to 0. + pub(crate) fn reset_pos_within_token(&mut self) { + self.len_remaining = self.chars.as_str().len(); + } + + /// Moves to the next character. + pub(crate) fn bump(&mut self) -> Option { + let c = self.chars.next()?; + + #[cfg(debug_assertions)] + { + self.prev = c; + } + + Some(c) + } + + /// Moves to a substring by a number of bytes. + pub(crate) fn bump_bytes(&mut self, n: usize) { + self.chars = self.as_str()[n..].chars(); + } + + /// Eats symbols while predicate returns true or until the end of file is reached. + pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) { + // It was tried making optimized version of this for eg. line comments, but + // LLVM can inline all of this and compile it down to fast iteration over bytes. + while predicate(self.first()) && !self.is_eof() { + self.bump(); + } + } + + pub(crate) fn eat_until(&mut self, byte: u8) { + self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) { + Some(index) => self.as_str()[index..].chars(), + None => "".chars(), + } + } + /// Parses a token from the input string. pub fn advance_token(&mut self) -> Token { let Some(first_char) = self.bump() else { diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 42327c7e343d1..7d0acc3337737 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -3,7 +3,7 @@ use std::borrow::Cow; use std::path::PathBuf; -use rustc_ast::token::Token; +use rustc_ast::token::{self, InvisibleOrigin, MetaVarKind, Token}; use rustc_ast::util::parser::ExprPrecedence; use rustc_ast::{Path, Visibility}; use rustc_errors::codes::*; @@ -17,7 +17,6 @@ use rustc_span::edition::{Edition, LATEST_STABLE_EDITION}; use rustc_span::{Ident, Span, Symbol}; use crate::fluent_generated as fluent; -use crate::parser::{ForbiddenLetReason, TokenDescription}; #[derive(Diagnostic)] #[diag(parse_maybe_report_ambiguous_plus)] @@ -3703,3 +3702,64 @@ pub(crate) struct StructLiteralWithoutPathLate { #[suggestion(applicability = "has-placeholders", code = "/* Type */ ", style = "verbose")] pub suggestion_span: Span, } + +/// Used to forbid `let` expressions in certain syntactic locations. +#[derive(Clone, Copy, Subdiagnostic)] +pub(crate) enum ForbiddenLetReason { + /// `let` is not valid and the source environment is not important + OtherForbidden, + /// A let chain with the `||` operator + #[note(parse_not_supported_or)] + NotSupportedOr(#[primary_span] Span), + /// A let chain with invalid parentheses + /// + /// For example, `let 1 = 1 && (expr && expr)` is allowed + /// but `(let 1 = 1 && (let 1 = 1 && (let 1 = 1))) && let a = 1` is not + #[note(parse_not_supported_parentheses)] + NotSupportedParentheses(#[primary_span] Span), +} + +#[derive(Debug, rustc_macros::Subdiagnostic)] +#[suggestion( + parse_misspelled_kw, + applicability = "machine-applicable", + code = "{similar_kw}", + style = "verbose" +)] +pub(crate) struct MisspelledKw { + // We use a String here because `Symbol::into_diag_arg` calls `Symbol::to_ident_string`, which + // prefix the keyword with a `r#` because it aims to print the symbol as an identifier. + pub similar_kw: String, + #[primary_span] + pub span: Span, + pub is_incorrect_case: bool, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(super) enum TokenDescription { + ReservedIdentifier, + Keyword, + ReservedKeyword, + DocComment, + + // Expanded metavariables are wrapped in invisible delimiters which aren't + // pretty-printed. In error messages we must handle these specially + // otherwise we get confusing things in messages like "expected `(`, found + // ``". It's better to say e.g. "expected `(`, found type metavariable". + MetaVar(MetaVarKind), +} + +impl TokenDescription { + pub(super) fn from_token(token: &Token) -> Option { + match token.kind { + _ if token.is_special_ident() => Some(TokenDescription::ReservedIdentifier), + _ if token.is_used_keyword() => Some(TokenDescription::Keyword), + _ if token.is_unused_keyword() => Some(TokenDescription::ReservedKeyword), + token::DocComment(..) => Some(TokenDescription::DocComment), + token::OpenInvisible(InvisibleOrigin::MetaVar(kind)) => { + Some(TokenDescription::MetaVar(kind)) + } + _ => None, + } + } +} diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs index 60e12fa05adfd..cb8a291c7fa05 100644 --- a/compiler/rustc_parse/src/parser/diagnostics.rs +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -35,10 +35,10 @@ use crate::errors::{ ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg, GenericParamsWithoutAngleBrackets, GenericParamsWithoutAngleBracketsSugg, HelpIdentifierStartsWithNumber, HelpUseLatestEdition, InInTypo, IncorrectAwait, IncorrectSemicolon, IncorrectUseOfAwait, IncorrectUseOfUse, - PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg, SelfParamNotFirst, - StructLiteralBodyWithoutPath, StructLiteralBodyWithoutPathSugg, SuggAddMissingLetStmt, - SuggEscapeIdentifier, SuggRemoveComma, TernaryOperator, TernaryOperatorSuggestion, - UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration, + MisspelledKw, PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg, + SelfParamNotFirst, StructLiteralBodyWithoutPath, StructLiteralBodyWithoutPathSugg, + SuggAddMissingLetStmt, SuggEscapeIdentifier, SuggRemoveComma, TernaryOperator, + TernaryOperatorSuggestion, UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration, UnexpectedConstParamDeclarationSugg, UnmatchedAngleBrackets, UseEqInstead, WrapType, }; use crate::parser::FnContext; @@ -212,22 +212,6 @@ impl std::fmt::Display for UnaryFixity { } } -#[derive(Debug, rustc_macros::Subdiagnostic)] -#[suggestion( - parse_misspelled_kw, - applicability = "machine-applicable", - code = "{similar_kw}", - style = "verbose" -)] -struct MisspelledKw { - // We use a String here because `Symbol::into_diag_arg` calls `Symbol::to_ident_string`, which - // prefix the keyword with a `r#` because it aims to print the symbol as an identifier. - similar_kw: String, - #[primary_span] - span: Span, - is_incorrect_case: bool, -} - /// Checks if the given `lookup` identifier is similar to any keyword symbol in `candidates`. /// /// This is a specialized version of [`Symbol::find_similar`] that constructs an error when a diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index c31a4798b471e..8ba7493f6f115 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -21,7 +21,6 @@ use rustc_ast::{ use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic}; use rustc_literal_escaper::unescape_char; -use rustc_macros::Subdiagnostic; use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error}; use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::BREAK_WITH_LABEL_AND_LOOP; @@ -2770,7 +2769,7 @@ impl<'a> Parser<'a> { let recovered = if !restrictions.contains(Restrictions::ALLOW_LET) { let err = errors::ExpectedExpressionFoundLet { span: self.token.span, - reason: ForbiddenLetReason::OtherForbidden, + reason: errors::ForbiddenLetReason::OtherForbidden, missing_let: None, comparison: None, }; @@ -4169,22 +4168,6 @@ pub(crate) fn could_be_unclosed_char_literal(ident: Ident) -> bool { && unescape_char(ident.without_first_quote().name.as_str()).is_ok() } -/// Used to forbid `let` expressions in certain syntactic locations. -#[derive(Clone, Copy, Subdiagnostic)] -pub(crate) enum ForbiddenLetReason { - /// `let` is not valid and the source environment is not important - OtherForbidden, - /// A let chain with the `||` operator - #[note(parse_not_supported_or)] - NotSupportedOr(#[primary_span] Span), - /// A let chain with invalid parentheses - /// - /// For example, `let 1 = 1 && (expr && expr)` is allowed - /// but `(let 1 = 1 && (let 1 = 1 && (let 1 = 1))) && let a = 1` is not - #[note(parse_not_supported_parentheses)] - NotSupportedParentheses(#[primary_span] Span), -} - /// Whether let chains are allowed on all editions, or it's edition dependent (allowed only on /// 2024 and later). In case of edition dependence, specify the currently present edition. pub enum LetChainsPolicy { @@ -4205,7 +4188,7 @@ struct CondChecker<'a> { parser: &'a Parser<'a>, let_chains_policy: LetChainsPolicy, depth: u32, - forbid_let_reason: Option, + forbid_let_reason: Option, missing_let: Option, comparison: Option, } @@ -4226,14 +4209,13 @@ impl<'a> CondChecker<'a> { impl MutVisitor for CondChecker<'_> { fn visit_expr(&mut self, e: &mut Expr) { self.depth += 1; - use ForbiddenLetReason::*; let span = e.span; match e.kind { ExprKind::Let(_, _, _, ref mut recovered @ Recovered::No) => { if let Some(reason) = self.forbid_let_reason { let error = match reason { - NotSupportedOr(or_span) => { + errors::ForbiddenLetReason::NotSupportedOr(or_span) => { self.parser.dcx().emit_err(errors::OrInLetChain { span: or_span }) } _ => self.parser.dcx().emit_err(errors::ExpectedExpressionFoundLet { @@ -4260,24 +4242,27 @@ impl MutVisitor for CondChecker<'_> { mut_visit::walk_expr(self, e); } ExprKind::Binary(Spanned { node: BinOpKind::Or, span: or_span }, _, _) - if let None | Some(NotSupportedOr(_)) = self.forbid_let_reason => + if let None | Some(errors::ForbiddenLetReason::NotSupportedOr(_)) = + self.forbid_let_reason => { let forbid_let_reason = self.forbid_let_reason; - self.forbid_let_reason = Some(NotSupportedOr(or_span)); + self.forbid_let_reason = Some(errors::ForbiddenLetReason::NotSupportedOr(or_span)); mut_visit::walk_expr(self, e); self.forbid_let_reason = forbid_let_reason; } ExprKind::Paren(ref inner) - if let None | Some(NotSupportedParentheses(_)) = self.forbid_let_reason => + if let None | Some(errors::ForbiddenLetReason::NotSupportedParentheses(_)) = + self.forbid_let_reason => { let forbid_let_reason = self.forbid_let_reason; - self.forbid_let_reason = Some(NotSupportedParentheses(inner.span)); + self.forbid_let_reason = + Some(errors::ForbiddenLetReason::NotSupportedParentheses(inner.span)); mut_visit::walk_expr(self, e); self.forbid_let_reason = forbid_let_reason; } ExprKind::Assign(ref lhs, _, span) => { let forbid_let_reason = self.forbid_let_reason; - self.forbid_let_reason = Some(OtherForbidden); + self.forbid_let_reason = Some(errors::ForbiddenLetReason::OtherForbidden); let missing_let = self.missing_let; if let ExprKind::Binary(_, _, rhs) = &lhs.kind && let ExprKind::Path(_, _) @@ -4310,7 +4295,7 @@ impl MutVisitor for CondChecker<'_> { | ExprKind::Tup(_) | ExprKind::Paren(_) => { let forbid_let_reason = self.forbid_let_reason; - self.forbid_let_reason = Some(OtherForbidden); + self.forbid_let_reason = Some(errors::ForbiddenLetReason::OtherForbidden); mut_visit::walk_expr(self, e); self.forbid_let_reason = forbid_let_reason; } @@ -4318,7 +4303,7 @@ impl MutVisitor for CondChecker<'_> { | ExprKind::Type(ref mut op, _) | ExprKind::UnsafeBinderCast(_, ref mut op, _) => { let forbid_let_reason = self.forbid_let_reason; - self.forbid_let_reason = Some(OtherForbidden); + self.forbid_let_reason = Some(errors::ForbiddenLetReason::OtherForbidden); self.visit_expr(op); self.forbid_let_reason = forbid_let_reason; } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 3ef73a55d47d7..0ff0b66229366 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -20,7 +20,6 @@ use std::{fmt, mem, slice}; use attr_wrapper::{AttrWrapper, UsePreAttrPos}; pub use diagnostics::AttemptLocalParseRecovery; -pub(crate) use expr::ForbiddenLetReason; // Public to use it for custom `if` expressions in rustfmt forks like https://github.com/tucant/rustfmt pub use expr::LetChainsPolicy; pub(crate) use item::{FnContext, FnParseMode}; @@ -50,7 +49,7 @@ use token_type::TokenTypeSet; pub use token_type::{ExpKeywordPair, ExpTokenPair, TokenType}; use tracing::debug; -use crate::errors::{self, IncorrectVisibilityRestriction, NonStringAbiLiteral}; +use crate::errors::{self, IncorrectVisibilityRestriction, NonStringAbiLiteral, TokenDescription}; use crate::exp; #[cfg(test)] @@ -308,35 +307,6 @@ impl From for Trailing { } } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub(super) enum TokenDescription { - ReservedIdentifier, - Keyword, - ReservedKeyword, - DocComment, - - // Expanded metavariables are wrapped in invisible delimiters which aren't - // pretty-printed. In error messages we must handle these specially - // otherwise we get confusing things in messages like "expected `(`, found - // ``". It's better to say e.g. "expected `(`, found type metavariable". - MetaVar(MetaVarKind), -} - -impl TokenDescription { - pub(super) fn from_token(token: &Token) -> Option { - match token.kind { - _ if token.is_special_ident() => Some(TokenDescription::ReservedIdentifier), - _ if token.is_used_keyword() => Some(TokenDescription::Keyword), - _ if token.is_unused_keyword() => Some(TokenDescription::ReservedKeyword), - token::DocComment(..) => Some(TokenDescription::DocComment), - token::OpenInvisible(InvisibleOrigin::MetaVar(kind)) => { - Some(TokenDescription::MetaVar(kind)) - } - _ => None, - } - } -} - pub fn token_descr(token: &Token) -> String { let s = pprust::token_to_string(token).to_string();