Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 0 additions & 125 deletions compiler/rustc_lexer/src/cursor.rs

This file was deleted.

130 changes: 125 additions & 5 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,13 @@
#![deny(unstable_features)]
// tidy-alphabetical-end

mod cursor;

#[cfg(test)]
mod tests;

use std::str::Chars;

use LiteralKind::*;
use TokenKind::*;
use cursor::EOF_CHAR;
pub use cursor::{Cursor, FrontmatterAllowed};
pub use unicode_ident::UNICODE_VERSION;
use unicode_properties::UnicodeEmoji;

Expand Down Expand Up @@ -407,7 +405,129 @@ pub fn is_ident(string: &str) -> bool {
}
}

impl Cursor<'_> {
pub enum FrontmatterAllowed {
Yes,
No,
}

/// Peekable iterator over a char sequence.
///
/// Next characters can be peeked via `first` method,
/// and position can be shifted forward via `bump` method.
pub struct Cursor<'a> {
len_remaining: usize,
/// Iterator over chars. Slightly faster than a &str.
chars: Chars<'a>,
pub(crate) frontmatter_allowed: FrontmatterAllowed,
#[cfg(debug_assertions)]
prev: char,
}

const EOF_CHAR: char = '\0';

impl<'a> Cursor<'a> {
pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> {
Cursor {
len_remaining: input.len(),
chars: input.chars(),
frontmatter_allowed,
#[cfg(debug_assertions)]
prev: EOF_CHAR,
}
}

pub fn as_str(&self) -> &'a str {
self.chars.as_str()
}

/// Returns the last eaten symbol (or `'\0'` in release builds).
/// (For debug assertions only.)
pub(crate) fn prev(&self) -> char {
#[cfg(debug_assertions)]
{
self.prev
}

#[cfg(not(debug_assertions))]
{
EOF_CHAR
}
}

/// Peeks the next symbol from the input stream without consuming it.
/// If requested position doesn't exist, `EOF_CHAR` is returned.
/// However, getting `EOF_CHAR` doesn't always mean actual end of file,
/// it should be checked with `is_eof` method.
pub fn first(&self) -> char {
// `.next()` optimizes better than `.nth(0)`
self.chars.clone().next().unwrap_or(EOF_CHAR)
}

/// Peeks the second symbol from the input stream without consuming it.
pub(crate) fn second(&self) -> char {
// `.next()` optimizes better than `.nth(1)`
let mut iter = self.chars.clone();
iter.next();
iter.next().unwrap_or(EOF_CHAR)
}

/// Peeks the third symbol from the input stream without consuming it.
pub fn third(&self) -> char {
// `.next()` optimizes better than `.nth(2)`
let mut iter = self.chars.clone();
iter.next();
iter.next();
iter.next().unwrap_or(EOF_CHAR)
}

/// Checks if there is nothing more to consume.
pub(crate) fn is_eof(&self) -> bool {
self.chars.as_str().is_empty()
}

/// Returns amount of already consumed symbols.
pub(crate) fn pos_within_token(&self) -> u32 {
(self.len_remaining - self.chars.as_str().len()) as u32
}

/// Resets the number of bytes consumed to 0.
pub(crate) fn reset_pos_within_token(&mut self) {
self.len_remaining = self.chars.as_str().len();
}

/// Moves to the next character.
pub(crate) fn bump(&mut self) -> Option<char> {
let c = self.chars.next()?;

#[cfg(debug_assertions)]
{
self.prev = c;
}

Some(c)
}

/// Moves to a substring by a number of bytes.
pub(crate) fn bump_bytes(&mut self, n: usize) {
self.chars = self.as_str()[n..].chars();
}

/// Eats symbols while predicate returns true or until the end of file is reached.
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
// It was tried making optimized version of this for eg. line comments, but
// LLVM can inline all of this and compile it down to fast iteration over bytes.
while predicate(self.first()) && !self.is_eof() {
self.bump();
}
}

pub(crate) fn eat_until(&mut self, byte: u8) {
self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) {
Some(index) => self.as_str()[index..].chars(),
None => "".chars(),
}
}

/// Parses a token from the input string.
pub fn advance_token(&mut self) -> Token {
let Some(first_char) = self.bump() else {
Expand Down
64 changes: 62 additions & 2 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use std::borrow::Cow;
use std::path::PathBuf;

use rustc_ast::token::Token;
use rustc_ast::token::{self, InvisibleOrigin, MetaVarKind, Token};
use rustc_ast::util::parser::ExprPrecedence;
use rustc_ast::{Path, Visibility};
use rustc_errors::codes::*;
Expand All @@ -17,7 +17,6 @@ use rustc_span::edition::{Edition, LATEST_STABLE_EDITION};
use rustc_span::{Ident, Span, Symbol};

use crate::fluent_generated as fluent;
use crate::parser::{ForbiddenLetReason, TokenDescription};

#[derive(Diagnostic)]
#[diag(parse_maybe_report_ambiguous_plus)]
Expand Down Expand Up @@ -3703,3 +3702,64 @@ pub(crate) struct StructLiteralWithoutPathLate {
#[suggestion(applicability = "has-placeholders", code = "/* Type */ ", style = "verbose")]
pub suggestion_span: Span,
}

/// Used to forbid `let` expressions in certain syntactic locations.
#[derive(Clone, Copy, Subdiagnostic)]
pub(crate) enum ForbiddenLetReason {
/// `let` is not valid and the source environment is not important
OtherForbidden,
/// A let chain with the `||` operator
#[note(parse_not_supported_or)]
NotSupportedOr(#[primary_span] Span),
/// A let chain with invalid parentheses
///
/// For example, `let 1 = 1 && (expr && expr)` is allowed
/// but `(let 1 = 1 && (let 1 = 1 && (let 1 = 1))) && let a = 1` is not
#[note(parse_not_supported_parentheses)]
NotSupportedParentheses(#[primary_span] Span),
}

#[derive(Debug, rustc_macros::Subdiagnostic)]
#[suggestion(
parse_misspelled_kw,
applicability = "machine-applicable",
code = "{similar_kw}",
style = "verbose"
)]
pub(crate) struct MisspelledKw {
// We use a String here because `Symbol::into_diag_arg` calls `Symbol::to_ident_string`, which
// prefix the keyword with a `r#` because it aims to print the symbol as an identifier.
pub similar_kw: String,
#[primary_span]
pub span: Span,
pub is_incorrect_case: bool,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(super) enum TokenDescription {
ReservedIdentifier,
Keyword,
ReservedKeyword,
DocComment,

// Expanded metavariables are wrapped in invisible delimiters which aren't
// pretty-printed. In error messages we must handle these specially
// otherwise we get confusing things in messages like "expected `(`, found
// ``". It's better to say e.g. "expected `(`, found type metavariable".
MetaVar(MetaVarKind),
}

impl TokenDescription {
pub(super) fn from_token(token: &Token) -> Option<Self> {
match token.kind {
_ if token.is_special_ident() => Some(TokenDescription::ReservedIdentifier),
_ if token.is_used_keyword() => Some(TokenDescription::Keyword),
_ if token.is_unused_keyword() => Some(TokenDescription::ReservedKeyword),
token::DocComment(..) => Some(TokenDescription::DocComment),
token::OpenInvisible(InvisibleOrigin::MetaVar(kind)) => {
Some(TokenDescription::MetaVar(kind))
}
_ => None,
}
}
}
24 changes: 4 additions & 20 deletions compiler/rustc_parse/src/parser/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ use crate::errors::{
ExpectedIdentifier, ExpectedSemi, ExpectedSemiSugg, GenericParamsWithoutAngleBrackets,
GenericParamsWithoutAngleBracketsSugg, HelpIdentifierStartsWithNumber, HelpUseLatestEdition,
InInTypo, IncorrectAwait, IncorrectSemicolon, IncorrectUseOfAwait, IncorrectUseOfUse,
PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg, SelfParamNotFirst,
StructLiteralBodyWithoutPath, StructLiteralBodyWithoutPathSugg, SuggAddMissingLetStmt,
SuggEscapeIdentifier, SuggRemoveComma, TernaryOperator, TernaryOperatorSuggestion,
UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration,
MisspelledKw, PatternMethodParamWithoutBody, QuestionMarkInType, QuestionMarkInTypeSugg,
SelfParamNotFirst, StructLiteralBodyWithoutPath, StructLiteralBodyWithoutPathSugg,
SuggAddMissingLetStmt, SuggEscapeIdentifier, SuggRemoveComma, TernaryOperator,
TernaryOperatorSuggestion, UnexpectedConstInGenericParam, UnexpectedConstParamDeclaration,
UnexpectedConstParamDeclarationSugg, UnmatchedAngleBrackets, UseEqInstead, WrapType,
};
use crate::parser::FnContext;
Expand Down Expand Up @@ -212,22 +212,6 @@ impl std::fmt::Display for UnaryFixity {
}
}

#[derive(Debug, rustc_macros::Subdiagnostic)]
#[suggestion(
parse_misspelled_kw,
applicability = "machine-applicable",
code = "{similar_kw}",
style = "verbose"
)]
struct MisspelledKw {
// We use a String here because `Symbol::into_diag_arg` calls `Symbol::to_ident_string`, which
// prefix the keyword with a `r#` because it aims to print the symbol as an identifier.
similar_kw: String,
#[primary_span]
span: Span,
is_incorrect_case: bool,
}

/// Checks if the given `lookup` identifier is similar to any keyword symbol in `candidates`.
///
/// This is a specialized version of [`Symbol::find_similar`] that constructs an error when a
Expand Down
Loading
Loading