From 4c363470b634ed452e0fc41ee97e189544c0e061 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 7 Dec 2023 09:53:08 +1100 Subject: [PATCH 1/3] Detect `NulInCStr` error earlier. By making it an `EscapeError` instead of a `LitError`. This makes it more like the other errors produced during unescaping. NOTE: this means these errors are issued earlier, before expansion, which changes behaviour. The next commit will delay issue of this error and others, reverting the behaviour change for this particular error. One nice thing about this: the old approach had some code in `report_lit_error` to calculate the span of the nul char from a range. This code used a hardwired `+2` to account for the `c"` at the start of a C string literal, but this should have changed to a `+3` for raw C string literals to account for the `cr"`, which meant that the caret in `cr"` nul error messages was one short of where it should have been. The new approach doesn't need any of this and avoids the off-by-one error. --- compiler/rustc_ast/src/util/literal.rs | 12 ++---------- compiler/rustc_lexer/src/unescape.rs | 17 +++++++++++++++-- compiler/rustc_parse/messages.ftl | 2 ++ compiler/rustc_parse/src/errors.rs | 5 +++++ .../src/lexer/unescape_error_reporting.rs | 3 +++ compiler/rustc_session/messages.ftl | 2 -- compiler/rustc_session/src/errors.rs | 15 +-------------- .../rfc-3348-c-string-literals/no-nuls.stderr | Bin 674 -> 675 bytes 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 92b9adf1db751..fbae496458813 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -8,7 +8,6 @@ use rustc_lexer::unescape::{ }; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; -use std::ops::Range; use std::{ascii, fmt, str}; // Escapes a string, represented as a symbol. Reuses the original symbol, @@ -39,7 +38,6 @@ pub enum LitError { InvalidFloatSuffix, NonDecimalFloat(u32), IntTooLarge(u32), - NulInCStr(Range), } impl LitKind { @@ -156,10 +154,7 @@ impl LitKind { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); let mut error = Ok(()); - unescape_c_string(s, Mode::CStr, &mut |span, c| match c { - Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { - error = Err(LitError::NulInCStr(span)); - } + unescape_c_string(s, Mode::CStr, &mut |_span, c| match c { Ok(CStrUnit::Byte(b)) => buf.push(b), Ok(CStrUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) @@ -179,10 +174,7 @@ impl LitKind { // can convert the symbol directly to a `Lrc` on success. let s = symbol.as_str(); let mut error = Ok(()); - unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c { - Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => { - error = Err(LitError::NulInCStr(span)); - } + unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c { Ok(_) => {} Err(err) => { if err.is_fatal() { diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index abec12f52a6e6..0a632c4d12ad5 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -59,6 +59,9 @@ pub enum EscapeError { /// Non-ascii character in byte literal, byte string literal, or raw byte string literal. NonAsciiCharInByte, + // `\0` in a C string literal. + NulInCStr, + /// After a line ending with '\', the next line contains whitespace /// characters that are not skipped. UnskippedWhitespaceWarning, @@ -122,10 +125,20 @@ where { match mode { CStr => { - unescape_non_raw_common(src, mode, callback); + unescape_non_raw_common(src, mode, &mut |r, mut result| { + if let Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) = result { + result = Err(EscapeError::NulInCStr); + } + callback(r, result) + }); } RawCStr => { - check_raw_common(src, mode, &mut |r, result| callback(r, result.map(CStrUnit::Char))); + check_raw_common(src, mode, &mut |r, mut result| { + if let Ok('\0') = result { + result = Err(EscapeError::NulInCStr); + } + callback(r, result.map(CStrUnit::Char)) + }); } Char | Byte | Str | RawStr | ByteStr | RawByteStr => unreachable!(), } diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index 363b8f4bfb9cc..59bea69af68d3 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -612,6 +612,8 @@ parse_note_mut_pattern_usage = `mut` may be followed by `variable` and `variable parse_note_pattern_alternatives_use_single_vert = alternatives in or-patterns are separated with `|`, not `||` +parse_nul_in_c_str = null characters in C string literals are not supported + parse_or_pattern_not_allowed_in_fn_parameters = top-level or-patterns are not allowed in function parameters parse_or_pattern_not_allowed_in_let_binding = top-level or-patterns are not allowed in `let` bindings parse_out_of_range_hex_escape = out of range hex escape diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 008adcc83d0ea..768d31ed7d438 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -2138,6 +2138,11 @@ pub enum UnescapeError { #[subdiagnostic] suggestion: MoreThanOneCharSugg, }, + #[diag(parse_nul_in_c_str)] + NulInCStr { + #[primary_span] + span: Span, + }, } #[derive(Subdiagnostic)] diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 775082adbe81e..ab48da5cd156a 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -262,6 +262,9 @@ pub(crate) fn emit_unescape_error( EscapeError::LoneSlash => { dcx.emit_err(UnescapeError::LoneSlash(err_span)); } + EscapeError::NulInCStr => { + dcx.emit_err(UnescapeError::NulInCStr { span: err_span }); + } EscapeError::UnskippedWhitespaceWarning => { let (c, char_span) = last_char(); dcx.emit_warning(UnescapeError::UnskippedWhitespace { diff --git a/compiler/rustc_session/messages.ftl b/compiler/rustc_session/messages.ftl index f2e646c70f577..4f824f9f62e24 100644 --- a/compiler/rustc_session/messages.ftl +++ b/compiler/rustc_session/messages.ftl @@ -72,8 +72,6 @@ session_not_circumvent_feature = `-Zunleash-the-miri-inside-of-you` may not be u session_not_supported = not supported -session_nul_in_c_str = null characters in C string literals are not supported - session_octal_float_literal_not_supported = octal float literal is not supported session_optimization_fuel_exhausted = optimization-fuel-exhausted: {$msg} diff --git a/compiler/rustc_session/src/errors.rs b/compiler/rustc_session/src/errors.rs index c3360815ac9f8..98de8a659dc44 100644 --- a/compiler/rustc_session/src/errors.rs +++ b/compiler/rustc_session/src/errors.rs @@ -5,7 +5,7 @@ use rustc_ast::token; use rustc_ast::util::literal::LitError; use rustc_errors::{error_code, DiagnosticMessage, ErrorGuaranteed, IntoDiagnostic, MultiSpan}; use rustc_macros::Diagnostic; -use rustc_span::{BytePos, Span, Symbol}; +use rustc_span::{Span, Symbol}; use rustc_target::spec::{SplitDebuginfo, StackProtector, TargetTriple}; pub struct FeatureGateError { @@ -329,13 +329,6 @@ pub(crate) struct BinaryFloatLiteralNotSupported { pub span: Span, } -#[derive(Diagnostic)] -#[diag(session_nul_in_c_str)] -pub(crate) struct NulInCStr { - #[primary_span] - pub span: Span, -} - pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { // Checks if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { @@ -414,12 +407,6 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: }; sess.emit_err(IntLiteralTooLarge { span, limit }); } - LitError::NulInCStr(range) => { - let lo = BytePos(span.lo().0 + range.start as u32 + 2); - let hi = BytePos(span.lo().0 + range.end as u32 + 2); - let span = span.with_lo(lo).with_hi(hi); - sess.emit_err(NulInCStr { span }); - } } } diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr index ff9006f6f97f1be7eed18d188e8c30ba2075743e..ee31c43fcc35b8a74cd286da4c6bb929839aea13 100644 GIT binary patch delta 17 YcmZ3)x|nrCFcYK2 Date: Thu, 7 Dec 2023 13:47:44 +1100 Subject: [PATCH 2/3] Move `report_lit_error` from `rustc_session` to `rustc_parse`. It's a more logical spot for it, and will be a big help for the next commit. Doing this creates a new dependency from `rustc_ast_lowering` on `rustc_parse`, but `rustc_ast_lowering` is clearly higher up the crate graph, so this isn't a big deal. One thing in favour of this change, is that two fluent labels were duplicated across `rustc_session` and `rustc_parse`: `invalid_literal_suffix` and `parse_not_supported`. This duplication is now gone, so that's nice evidence that this is a reasonable change. --- Cargo.lock | 1 + compiler/rustc_ast_lowering/Cargo.toml | 1 + compiler/rustc_ast_lowering/src/expr.rs | 2 +- compiler/rustc_builtin_macros/src/concat.rs | 2 +- .../rustc_builtin_macros/src/concat_bytes.rs | 2 +- compiler/rustc_expand/src/base.rs | 5 +- compiler/rustc_parse/messages.ftl | 29 +++ compiler/rustc_parse/src/errors.rs | 92 +++++++++ compiler/rustc_parse/src/parser/expr.rs | 88 ++++++++- compiler/rustc_parse/src/parser/mod.rs | 1 + compiler/rustc_session/messages.ftl | 31 --- compiler/rustc_session/src/errors.rs | 176 ------------------ 12 files changed, 216 insertions(+), 214 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dbe5b2ec6b75c..2c08e9a8df096 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3428,6 +3428,7 @@ dependencies = [ "rustc_index", "rustc_macros", "rustc_middle", + "rustc_parse", "rustc_session", "rustc_span", "rustc_target", diff --git a/compiler/rustc_ast_lowering/Cargo.toml b/compiler/rustc_ast_lowering/Cargo.toml index 8cc4521e0a78d..285468bae1f84 100644 --- a/compiler/rustc_ast_lowering/Cargo.toml +++ b/compiler/rustc_ast_lowering/Cargo.toml @@ -17,6 +17,7 @@ rustc_hir = { path = "../rustc_hir" } rustc_index = { path = "../rustc_index" } rustc_macros = { path = "../rustc_macros" } rustc_middle = { path = "../rustc_middle" } +rustc_parse = { path = "../rustc_parse" } rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } rustc_target = { path = "../rustc_target" } diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index 11b5131b8d788..38a325fd7110f 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -14,7 +14,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_hir as hir; use rustc_hir::def::{DefKind, Res}; use rustc_middle::span_bug; -use rustc_session::errors::report_lit_error; +use rustc_parse::parser::report_lit_error; use rustc_span::source_map::{respan, Spanned}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::DUMMY_SP; diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs index 6b8330bfdaf92..6c83e8868bd31 100644 --- a/compiler/rustc_builtin_macros/src/concat.rs +++ b/compiler/rustc_builtin_macros/src/concat.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::tokenstream::TokenStream; use rustc_expand::base::{self, DummyResult}; -use rustc_session::errors::report_lit_error; +use rustc_parse::parser::report_lit_error; use rustc_span::symbol::Symbol; use crate::errors; diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index 96e9584c20955..4ae328160f0c8 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::{ptr::P, tokenstream::TokenStream}; use rustc_expand::base::{self, DummyResult}; -use rustc_session::errors::report_lit_error; +use rustc_parse::parser::report_lit_error; use rustc_span::Span; use crate::errors; diff --git a/compiler/rustc_expand/src/base.rs b/compiler/rustc_expand/src/base.rs index b63609c48e912..f8f08a9e28a30 100644 --- a/compiler/rustc_expand/src/base.rs +++ b/compiler/rustc_expand/src/base.rs @@ -21,8 +21,7 @@ use rustc_errors::{ use rustc_feature::Features; use rustc_lint_defs::builtin::PROC_MACRO_BACK_COMPAT; use rustc_lint_defs::{BufferedEarlyLint, BuiltinLintDiagnostics, RegisteredTools}; -use rustc_parse::{parser, MACRO_ARGUMENTS}; -use rustc_session::errors::report_lit_error; +use rustc_parse::{self, parser, MACRO_ARGUMENTS}; use rustc_session::{parse::ParseSess, Limit, Session}; use rustc_span::def_id::{CrateNum, DefId, LocalDefId}; use rustc_span::edition::Edition; @@ -1251,7 +1250,7 @@ pub fn expr_to_spanned_string<'a>( } Ok(ast::LitKind::Err) => None, Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, expr.span); + parser::report_lit_error(&cx.sess.parse_sess, err, token_lit, expr.span); None } _ => Some((cx.struct_span_err(expr.span, err_msg), false)), diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index 59bea69af68d3..b6b8f108bbb41 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -59,6 +59,8 @@ parse_bare_cr = {$double_quotes -> parse_bare_cr_in_raw_string = bare CR not allowed in raw string +parse_binary_float_literal_not_supported = binary float literal is not supported + parse_bounds_not_allowed_on_trait_aliases = bounds are not allowed on trait aliases parse_box_not_pat = expected pattern, found {$descr} @@ -292,7 +294,11 @@ parse_generic_parameters_without_angle_brackets = generic parameters without sur parse_generics_in_path = unexpected generic arguments in path parse_help_set_edition_cargo = set `edition = "{$edition}"` in `Cargo.toml` + parse_help_set_edition_standalone = pass `--edition {$edition}` to `rustc` + +parse_hexadecimal_float_literal_not_supported = hexadecimal float literal is not supported + parse_if_expression_missing_condition = missing condition for `if` expression .condition_label = expected condition here .block_label = if this block is the condition of the `if` expression, then it must be followed by another block @@ -364,6 +370,9 @@ parse_inner_doc_comment_not_permitted = expected outer doc comment .label_does_not_annotate_this = the inner doc comment doesn't annotate this {$item} .sugg_change_inner_to_outer = to annotate the {$item}, change the doc comment from inner to outer style +parse_int_literal_too_large = integer literal is too large + .note = value exceeds limit of `{$limit}` + parse_invalid_block_macro_segment = cannot use a `block` macro fragment here .label = the `block` fragment is within this context .suggestion = wrap this in another block @@ -388,8 +397,18 @@ parse_invalid_dyn_keyword = invalid `dyn` keyword .suggestion = remove this keyword parse_invalid_expression_in_let_else = a `{$operator}` expression cannot be directly assigned in `let...else` +parse_invalid_float_literal_suffix = invalid suffix `{$suffix}` for float literal + .label = invalid suffix `{$suffix}` + .help = valid suffixes are `f32` and `f64` + +parse_invalid_float_literal_width = invalid width `{$width}` for float literal + .help = valid widths are 32 and 64 + parse_invalid_identifier_with_leading_number = identifiers cannot start with a number +parse_invalid_int_literal_width = invalid width `{$width}` for integer literal + .help = valid widths are 8, 16, 32, 64 and 128 + parse_invalid_interpolated_expression = invalid interpolated expression parse_invalid_literal_suffix = suffixes on {$kind} literals are invalid @@ -408,6 +427,14 @@ parse_invalid_logical_operator = `{$incorrect}` is not a logical operator parse_invalid_meta_item = expected unsuffixed literal or identifier, found `{$token}` +parse_invalid_num_literal_base_prefix = invalid base prefix for number literal + .note = base prefixes (`0xff`, `0b1010`, `0o755`) are lowercase + .suggestion = try making the prefix lowercase + +parse_invalid_num_literal_suffix = invalid suffix `{$suffix}` for number literal + .label = invalid suffix `{$suffix}` + .help = the suffix must be one of the numeric types (`u32`, `isize`, `f32`, etc.) + parse_invalid_unicode_escape = invalid unicode character escape .label = invalid escape .help = unicode escape must {$surrogate -> @@ -614,6 +641,8 @@ parse_note_pattern_alternatives_use_single_vert = alternatives in or-patterns ar parse_nul_in_c_str = null characters in C string literals are not supported +parse_octal_float_literal_not_supported = octal float literal is not supported + parse_or_pattern_not_allowed_in_fn_parameters = top-level or-patterns are not allowed in function parameters parse_or_pattern_not_allowed_in_let_binding = top-level or-patterns are not allowed in `let` bindings parse_out_of_range_hex_escape = out of range hex escape diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 768d31ed7d438..96cd5eee67ea3 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -2902,3 +2902,95 @@ pub(crate) struct TransposeDynOrImplSugg<'a> { pub insertion_span: Span, pub kw: &'a str, } + +#[derive(Diagnostic)] +#[diag(parse_invalid_literal_suffix)] +pub(crate) struct InvalidLiteralSuffix<'a> { + #[primary_span] + #[label] + pub span: Span, + // FIXME(#100717) + pub kind: &'a str, + pub suffix: Symbol, +} + +#[derive(Diagnostic)] +#[diag(parse_invalid_int_literal_width)] +#[help] +pub(crate) struct InvalidIntLiteralWidth { + #[primary_span] + pub span: Span, + pub width: String, +} + +#[derive(Diagnostic)] +#[diag(parse_invalid_num_literal_base_prefix)] +#[note] +pub(crate) struct InvalidNumLiteralBasePrefix { + #[primary_span] + #[suggestion(applicability = "maybe-incorrect", code = "{fixed}")] + pub span: Span, + pub fixed: String, +} + +#[derive(Diagnostic)] +#[diag(parse_invalid_num_literal_suffix)] +#[help] +pub(crate) struct InvalidNumLiteralSuffix { + #[primary_span] + #[label] + pub span: Span, + pub suffix: String, +} + +#[derive(Diagnostic)] +#[diag(parse_invalid_float_literal_width)] +#[help] +pub(crate) struct InvalidFloatLiteralWidth { + #[primary_span] + pub span: Span, + pub width: String, +} + +#[derive(Diagnostic)] +#[diag(parse_invalid_float_literal_suffix)] +#[help] +pub(crate) struct InvalidFloatLiteralSuffix { + #[primary_span] + #[label] + pub span: Span, + pub suffix: String, +} + +#[derive(Diagnostic)] +#[diag(parse_hexadecimal_float_literal_not_supported)] +pub(crate) struct HexadecimalFloatLiteralNotSupported { + #[primary_span] + #[label(parse_not_supported)] + pub span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_octal_float_literal_not_supported)] +pub(crate) struct OctalFloatLiteralNotSupported { + #[primary_span] + #[label(parse_not_supported)] + pub span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_binary_float_literal_not_supported)] +pub(crate) struct BinaryFloatLiteralNotSupported { + #[primary_span] + #[label(parse_not_supported)] + pub span: Span, +} + +#[derive(Diagnostic)] +#[diag(parse_int_literal_too_large)] +#[note] +pub(crate) struct IntLiteralTooLarge { + #[primary_span] + pub span: Span, + pub limit: String, +} diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index cd3e8b92f2f9e..4d061fb0ad996 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -17,6 +17,7 @@ use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::Spacing; use rustc_ast::util::case::Case; use rustc_ast::util::classify; +use rustc_ast::util::literal::LitError; use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity}; use rustc_ast::visit::Visitor; use rustc_ast::{self as ast, AttrStyle, AttrVec, CaptureBy, ExprField, UnOp, DUMMY_NODE_ID}; @@ -30,9 +31,10 @@ use rustc_errors::{ PResult, StashKey, }; use rustc_macros::Subdiagnostic; -use rustc_session::errors::{report_lit_error, ExprParenthesesNeeded}; +use rustc_session::errors::ExprParenthesesNeeded; use rustc_session::lint::builtin::BREAK_WITH_LABEL_AND_LOOP; use rustc_session::lint::BuiltinLintDiagnostics; +use rustc_session::parse::ParseSess; use rustc_span::source_map::{self, Spanned}; use rustc_span::symbol::kw::PathRoot; use rustc_span::symbol::{kw, sym, Ident, Symbol}; @@ -3672,6 +3674,90 @@ impl<'a> Parser<'a> { } } +pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { + // Checks if `s` looks like i32 or u1234 etc. + fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { + s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) + } + + // Try to lowercase the prefix if the prefix and suffix are valid. + fn fix_base_capitalisation(prefix: &str, suffix: &str) -> Option { + let mut chars = suffix.chars(); + + let base_char = chars.next().unwrap(); + let base = match base_char { + 'B' => 2, + 'O' => 8, + 'X' => 16, + _ => return None, + }; + + // check that the suffix contains only base-appropriate characters + let valid = prefix == "0" + && chars + .filter(|c| *c != '_') + .take_while(|c| *c != 'i' && *c != 'u') + .all(|c| c.to_digit(base).is_some()); + + valid.then(|| format!("0{}{}", base_char.to_ascii_lowercase(), &suffix[1..])) + } + + let token::Lit { kind, symbol, suffix, .. } = lit; + match err { + // `LexerError` is an error, but it was already reported + // by lexer, so here we don't report it the second time. + LitError::LexerError => {} + LitError::InvalidSuffix => { + if let Some(suffix) = suffix { + sess.emit_err(errors::InvalidLiteralSuffix { span, kind: kind.descr(), suffix }); + } + } + LitError::InvalidIntSuffix => { + let suf = suffix.expect("suffix error with no suffix"); + let suf = suf.as_str(); + if looks_like_width_suffix(&['i', 'u'], suf) { + // If it looks like a width, try to be helpful. + sess.emit_err(errors::InvalidIntLiteralWidth { span, width: suf[1..].into() }); + } else if let Some(fixed) = fix_base_capitalisation(symbol.as_str(), suf) { + sess.emit_err(errors::InvalidNumLiteralBasePrefix { span, fixed }); + } else { + sess.emit_err(errors::InvalidNumLiteralSuffix { span, suffix: suf.to_string() }); + } + } + LitError::InvalidFloatSuffix => { + let suf = suffix.expect("suffix error with no suffix"); + let suf = suf.as_str(); + if looks_like_width_suffix(&['f'], suf) { + // If it looks like a width, try to be helpful. + sess.emit_err(errors::InvalidFloatLiteralWidth { + span, + width: suf[1..].to_string(), + }); + } else { + sess.emit_err(errors::InvalidFloatLiteralSuffix { span, suffix: suf.to_string() }); + } + } + LitError::NonDecimalFloat(base) => { + match base { + 16 => sess.emit_err(errors::HexadecimalFloatLiteralNotSupported { span }), + 8 => sess.emit_err(errors::OctalFloatLiteralNotSupported { span }), + 2 => sess.emit_err(errors::BinaryFloatLiteralNotSupported { span }), + _ => unreachable!(), + }; + } + LitError::IntTooLarge(base) => { + let max = u128::MAX; + let limit = match base { + 2 => format!("{max:#b}"), + 8 => format!("{max:#o}"), + 16 => format!("{max:#x}"), + _ => format!("{max}"), + }; + sess.emit_err(errors::IntLiteralTooLarge { span, limit }); + } + } +} + /// Used to forbid `let` expressions in certain syntactic locations. #[derive(Clone, Copy, Subdiagnostic)] pub(crate) enum ForbiddenLetReason { diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index b91432f10c8fa..051e1deec66fc 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -11,6 +11,7 @@ mod stmt; mod ty; use crate::lexer::UnmatchedDelim; +pub use crate::parser::expr::report_lit_error; pub use attr_wrapper::AttrWrapper; pub use diagnostics::AttemptLocalParseRecovery; pub(crate) use expr::ForbiddenLetReason; diff --git a/compiler/rustc_session/messages.ftl b/compiler/rustc_session/messages.ftl index 4f824f9f62e24..712388775fc30 100644 --- a/compiler/rustc_session/messages.ftl +++ b/compiler/rustc_session/messages.ftl @@ -1,4 +1,3 @@ -session_binary_float_literal_not_supported = binary float literal is not supported session_branch_protection_requires_aarch64 = `-Zbranch-protection` is only supported on aarch64 session_cannot_enable_crt_static_linux = sanitizer is incompatible with statically linked libc, disable it using `-C target-feature=-crt-static` @@ -32,48 +31,18 @@ session_function_return_requires_x86_or_x86_64 = `-Zfunction-return` (except `ke session_function_return_thunk_extern_requires_non_large_code_model = `-Zfunction-return=thunk-extern` is only supported on non-large code models -session_hexadecimal_float_literal_not_supported = hexadecimal float literal is not supported - session_incompatible_linker_flavor = linker flavor `{$flavor}` is incompatible with the current target .note = compatible flavors are: {$compatible_list} session_instrumentation_not_supported = {$us} instrumentation is not supported for this target -session_int_literal_too_large = integer literal is too large - .note = value exceeds limit of `{$limit}` - session_invalid_character_in_create_name = invalid character `{$character}` in crate name: `{$crate_name}` session_invalid_character_in_create_name_help = you can either pass `--crate-name` on the command line or add `#![crate_name="…"]` to set the crate name -session_invalid_float_literal_suffix = invalid suffix `{$suffix}` for float literal - .label = invalid suffix `{$suffix}` - .help = valid suffixes are `f32` and `f64` - -session_invalid_float_literal_width = invalid width `{$width}` for float literal - .help = valid widths are 32 and 64 - -session_invalid_int_literal_width = invalid width `{$width}` for integer literal - .help = valid widths are 8, 16, 32, 64 and 128 - -session_invalid_literal_suffix = suffixes on {$kind} literals are invalid - .label = invalid suffix `{$suffix}` - -session_invalid_num_literal_base_prefix = invalid base prefix for number literal - .note = base prefixes (`0xff`, `0b1010`, `0o755`) are lowercase - .suggestion = try making the prefix lowercase - -session_invalid_num_literal_suffix = invalid suffix `{$suffix}` for number literal - .label = invalid suffix `{$suffix}` - .help = the suffix must be one of the numeric types (`u32`, `isize`, `f32`, etc.) - session_linker_plugin_lto_windows_not_supported = linker plugin based LTO is not supported together with `-C prefer-dynamic` when targeting Windows-like targets session_not_circumvent_feature = `-Zunleash-the-miri-inside-of-you` may not be used to circumvent feature gates, except when testing error paths in the CTFE engine -session_not_supported = not supported - -session_octal_float_literal_not_supported = octal float literal is not supported - session_optimization_fuel_exhausted = optimization-fuel-exhausted: {$msg} session_profile_sample_use_file_does_not_exist = file `{$path}` passed to `-C profile-sample-use` does not exist. diff --git a/compiler/rustc_session/src/errors.rs b/compiler/rustc_session/src/errors.rs index 98de8a659dc44..f1d578a2c9176 100644 --- a/compiler/rustc_session/src/errors.rs +++ b/compiler/rustc_session/src/errors.rs @@ -1,8 +1,5 @@ use std::num::NonZeroU32; -use crate::parse::ParseSess; -use rustc_ast::token; -use rustc_ast::util::literal::LitError; use rustc_errors::{error_code, DiagnosticMessage, ErrorGuaranteed, IntoDiagnostic, MultiSpan}; use rustc_macros::Diagnostic; use rustc_span::{Span, Symbol}; @@ -237,179 +234,6 @@ pub enum UnleashedFeatureHelp { }, } -#[derive(Diagnostic)] -#[diag(session_invalid_literal_suffix)] -pub(crate) struct InvalidLiteralSuffix<'a> { - #[primary_span] - #[label] - pub span: Span, - // FIXME(#100717) - pub kind: &'a str, - pub suffix: Symbol, -} - -#[derive(Diagnostic)] -#[diag(session_invalid_int_literal_width)] -#[help] -pub(crate) struct InvalidIntLiteralWidth { - #[primary_span] - pub span: Span, - pub width: String, -} - -#[derive(Diagnostic)] -#[diag(session_invalid_num_literal_base_prefix)] -#[note] -pub(crate) struct InvalidNumLiteralBasePrefix { - #[primary_span] - #[suggestion(applicability = "maybe-incorrect", code = "{fixed}")] - pub span: Span, - pub fixed: String, -} - -#[derive(Diagnostic)] -#[diag(session_invalid_num_literal_suffix)] -#[help] -pub(crate) struct InvalidNumLiteralSuffix { - #[primary_span] - #[label] - pub span: Span, - pub suffix: String, -} - -#[derive(Diagnostic)] -#[diag(session_invalid_float_literal_width)] -#[help] -pub(crate) struct InvalidFloatLiteralWidth { - #[primary_span] - pub span: Span, - pub width: String, -} - -#[derive(Diagnostic)] -#[diag(session_invalid_float_literal_suffix)] -#[help] -pub(crate) struct InvalidFloatLiteralSuffix { - #[primary_span] - #[label] - pub span: Span, - pub suffix: String, -} - -#[derive(Diagnostic)] -#[diag(session_int_literal_too_large)] -#[note] -pub(crate) struct IntLiteralTooLarge { - #[primary_span] - pub span: Span, - pub limit: String, -} - -#[derive(Diagnostic)] -#[diag(session_hexadecimal_float_literal_not_supported)] -pub(crate) struct HexadecimalFloatLiteralNotSupported { - #[primary_span] - #[label(session_not_supported)] - pub span: Span, -} - -#[derive(Diagnostic)] -#[diag(session_octal_float_literal_not_supported)] -pub(crate) struct OctalFloatLiteralNotSupported { - #[primary_span] - #[label(session_not_supported)] - pub span: Span, -} - -#[derive(Diagnostic)] -#[diag(session_binary_float_literal_not_supported)] -pub(crate) struct BinaryFloatLiteralNotSupported { - #[primary_span] - #[label(session_not_supported)] - pub span: Span, -} - -pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { - // Checks if `s` looks like i32 or u1234 etc. - fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { - s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) - } - - // Try to lowercase the prefix if the prefix and suffix are valid. - fn fix_base_capitalisation(prefix: &str, suffix: &str) -> Option { - let mut chars = suffix.chars(); - - let base_char = chars.next().unwrap(); - let base = match base_char { - 'B' => 2, - 'O' => 8, - 'X' => 16, - _ => return None, - }; - - // check that the suffix contains only base-appropriate characters - let valid = prefix == "0" - && chars - .filter(|c| *c != '_') - .take_while(|c| *c != 'i' && *c != 'u') - .all(|c| c.to_digit(base).is_some()); - - valid.then(|| format!("0{}{}", base_char.to_ascii_lowercase(), &suffix[1..])) - } - - let token::Lit { kind, symbol, suffix, .. } = lit; - match err { - // `LexerError` is an error, but it was already reported - // by lexer, so here we don't report it the second time. - LitError::LexerError => {} - LitError::InvalidSuffix => { - if let Some(suffix) = suffix { - sess.emit_err(InvalidLiteralSuffix { span, kind: kind.descr(), suffix }); - } - } - LitError::InvalidIntSuffix => { - let suf = suffix.expect("suffix error with no suffix"); - let suf = suf.as_str(); - if looks_like_width_suffix(&['i', 'u'], suf) { - // If it looks like a width, try to be helpful. - sess.emit_err(InvalidIntLiteralWidth { span, width: suf[1..].into() }); - } else if let Some(fixed) = fix_base_capitalisation(symbol.as_str(), suf) { - sess.emit_err(InvalidNumLiteralBasePrefix { span, fixed }); - } else { - sess.emit_err(InvalidNumLiteralSuffix { span, suffix: suf.to_string() }); - } - } - LitError::InvalidFloatSuffix => { - let suf = suffix.expect("suffix error with no suffix"); - let suf = suf.as_str(); - if looks_like_width_suffix(&['f'], suf) { - // If it looks like a width, try to be helpful. - sess.emit_err(InvalidFloatLiteralWidth { span, width: suf[1..].to_string() }); - } else { - sess.emit_err(InvalidFloatLiteralSuffix { span, suffix: suf.to_string() }); - } - } - LitError::NonDecimalFloat(base) => { - match base { - 16 => sess.emit_err(HexadecimalFloatLiteralNotSupported { span }), - 8 => sess.emit_err(OctalFloatLiteralNotSupported { span }), - 2 => sess.emit_err(BinaryFloatLiteralNotSupported { span }), - _ => unreachable!(), - }; - } - LitError::IntTooLarge(base) => { - let max = u128::MAX; - let limit = match base { - 2 => format!("{max:#b}"), - 8 => format!("{max:#o}"), - 16 => format!("{max:#x}"), - _ => format!("{max}"), - }; - sess.emit_err(IntLiteralTooLarge { span, limit }); - } - } -} - #[derive(Diagnostic)] #[diag(session_optimization_fuel_exhausted)] pub struct OptimisationFuelExhausted { From ee19d5284d8ddaafdec3a0c75d455ddb5b12d9ad Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 12 Dec 2023 10:02:50 +1100 Subject: [PATCH 3/3] [xt but not xc] Delay string literal unescaping. Currently string literals are unescaped twice. - Once during lexing in `cook_quoted`/`cook_c_string`/`cook_common`. This one just checks for errors. - Again in `LitKind::from_token_lit`, which is called when lowering AST to HIR, and also in a few other places during expansion. This one actually constructs the unescaped string. It also has error checking code, but that part of the code is actually dead (and has several bugs) because the check during lexing catches all errors! This commit removes the error-check-only unescaping during lexing, and fixes up `LitKind::from_token_lit` so it properly does both checking and construction. This is a backwards-compatible language change: some programs now compile that previously did not. For example, it is now possible for macros to consume "invalid" string literals like "\a\b\c". This is a continuation of a trend of delaying semantic error checking of literals to after expansion: - #102944 did this for some cases for numeric literals - The detection of NUL chars in C string literals is already delayed in this way. Notes about test changes: - `ignore-block-help.rs`: this requires a parse error for the test to work. The error used was an unescaping error, which is now delayed to after parsing. So the commit changes it to an "unterminated character literal" error which still occurs during parsing. - `tests/ui/lexer/error-stage.rs`: this shows the newly allowed cases, due to delayed literal unescaping. - Several tests had unescaping errors combined with unterminated literal errors. The former are now delayed but the latter remain as lexing errors. So the unterminated literal part needed to be split into a separate test file otherwise compilation would end before the other errors were reported. - issue-62913.rs: The structure and output changed a bit. Issue #62913 was about an ICE due to an unterminated string literal, so the new version should be good enough. - literals-are-validated-before-expansion.rs: this tests exactly the behaviour that has been changed, and so was removed - A couple of other test produce the same errors, just in a different order. --- compiler/rustc_ast/src/util/literal.rs | 270 +++++++++++++----- compiler/rustc_ast_lowering/src/expr.rs | 15 +- compiler/rustc_builtin_macros/src/concat.rs | 63 ++-- .../rustc_builtin_macros/src/concat_bytes.rs | 8 +- compiler/rustc_expand/src/base.rs | 44 +-- compiler/rustc_lexer/src/unescape.rs | 2 +- compiler/rustc_parse/src/lexer/mod.rs | 91 +----- .../src/lexer/unescape_error_reporting.rs | 14 +- compiler/rustc_parse/src/parser/expr.rs | 62 +++- compiler/rustc_parse/src/parser/mod.rs | 4 +- compiler/rustc_parse/src/validate_attr.rs | 21 +- tests/rustdoc-ui/ignore-block-help.rs | 4 +- tests/rustdoc-ui/ignore-block-help.stderr | 4 +- tests/ui/fmt/format-string-error-2.stderr | 12 +- tests/ui/lexer/error-stage.rs | 46 ++- tests/ui/lexer/error-stage.stderr | 69 ++++- tests/ui/lexer/lex-bad-char-literals-7.rs | 3 - tests/ui/lexer/lex-bad-char-literals-7.stderr | 9 +- tests/ui/lexer/lex-bad-char-literals-8.rs | 4 + tests/ui/lexer/lex-bad-char-literals-8.stderr | 9 + tests/ui/parser/byte-literals-2.rs | 3 + tests/ui/parser/byte-literals-2.stderr | 9 + tests/ui/parser/byte-literals.rs | 1 - tests/ui/parser/byte-literals.stderr | 9 +- tests/ui/parser/byte-string-literals-2.rs | 3 + tests/ui/parser/byte-string-literals-2.stderr | 11 + tests/ui/parser/byte-string-literals.rs | 1 - tests/ui/parser/byte-string-literals.stderr | 11 +- tests/ui/parser/issues/issue-62913.rs | 9 +- tests/ui/parser/issues/issue-62913.stderr | 22 +- ...literals-are-validated-before-expansion.rs | 10 - ...rals-are-validated-before-expansion.stderr | 18 -- .../parser/raw/raw-byte-string-literals-2.rs | 3 + .../raw/raw-byte-string-literals-2.stderr | 8 + .../ui/parser/raw/raw-byte-string-literals.rs | 1 - .../raw/raw-byte-string-literals.stderr | 8 +- .../parser/unicode-control-codepoints.stderr | 168 +++++------ 37 files changed, 606 insertions(+), 443 deletions(-) create mode 100644 tests/ui/lexer/lex-bad-char-literals-8.rs create mode 100644 tests/ui/lexer/lex-bad-char-literals-8.stderr create mode 100644 tests/ui/parser/byte-literals-2.rs create mode 100644 tests/ui/parser/byte-literals-2.stderr create mode 100644 tests/ui/parser/byte-string-literals-2.rs create mode 100644 tests/ui/parser/byte-string-literals-2.stderr delete mode 100644 tests/ui/parser/macro/literals-are-validated-before-expansion.rs delete mode 100644 tests/ui/parser/macro/literals-are-validated-before-expansion.stderr create mode 100644 tests/ui/parser/raw/raw-byte-string-literals-2.rs create mode 100644 tests/ui/parser/raw/raw-byte-string-literals-2.stderr diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index fbae496458813..663f8bdbf644d 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -3,11 +3,11 @@ use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; use rustc_lexer::unescape::{ - byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, - Mode, + byte_from_char, unescape_c_string, unescape_literal, CStrUnit, EscapeError, Mode, }; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; +use std::ops::Range; use std::{ascii, fmt, str}; // Escapes a string, represented as a symbol. Reuses the original symbol, @@ -33,6 +33,14 @@ pub fn escape_byte_str_symbol(bytes: &[u8]) -> Symbol { #[derive(Debug)] pub enum LitError { LexerError, + EscapeError { + mode: Mode, + // Length before the string content, e.g. 1 for "a", 5 for br##"a"## + prefix_len: u32, + // The range is the byte range of the bad character, using a zero index. + range: Range, + err: EscapeError, + }, InvalidSuffix, InvalidIntSuffix, InvalidFloatSuffix, @@ -41,154 +49,252 @@ pub enum LitError { } impl LitKind { - /// Converts literal token into a semantic literal. - pub fn from_token_lit(lit: token::Lit) -> Result { + /// Converts literal token into a semantic literal. The return value has + /// two parts: + /// - The `Result` indicates success or failure. + /// - The `Vec` contains all found errors and warnings. + /// + /// If we only had to deal with errors, we could use the more obvious + /// `Result>`; on failure the caller would just + /// (optionally) print errors and take the error path and stop early. But + /// it's possible to succeed with zero errors and one or more warnings, and + /// in that case the caller should (optionally) print the warnings, but + /// also proceed with a valid `LitKind`. This return type facilitates that. + pub fn from_token_lit_with_errs(lit: token::Lit) -> (Result, Vec) { let token::Lit { kind, symbol, suffix } = lit; if suffix.is_some() && !kind.may_have_suffix() { - return Err(LitError::InvalidSuffix); + // Note: we return a single error here. We could instead continue + // processing, possibly returning multiple errors. + return (Err(()), vec![LitError::InvalidSuffix]); } - Ok(match kind { + let mut errs = vec![]; + let mut has_fatal = false; + + let res = match kind { token::Bool => { assert!(symbol.is_bool_lit()); - LitKind::Bool(symbol == kw::True) + Ok(LitKind::Bool(symbol == kw::True)) } token::Byte => { - return unescape_byte(symbol.as_str()) - .map(LitKind::Byte) - .map_err(|_| LitError::LexerError); + let mode = Mode::Byte; + let mut res = None; + unescape_literal(symbol.as_str(), mode, &mut |range, unescaped_char| { + match unescaped_char { + Ok(c) => res = Some(c), + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // b' + range, + err, + }); + } + } + }); + if !has_fatal { Ok(LitKind::Byte(byte_from_char(res.unwrap()))) } else { Err(()) } } token::Char => { - return unescape_char(symbol.as_str()) - .map(LitKind::Char) - .map_err(|_| LitError::LexerError); + let mode = Mode::Char; + let mut res = None; + unescape_literal(symbol.as_str(), mode, &mut |range, unescaped_char| { + match unescaped_char { + Ok(c) => res = Some(c), + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 1, // ' + range, + err, + }); + } + } + }); + if !has_fatal { Ok(LitKind::Char(res.unwrap())) } else { Err(()) } } // There are some valid suffixes for integer and float literals, // so all the handling is done internally. - token::Integer => return integer_lit(symbol, suffix), - token::Float => return float_lit(symbol, suffix), + token::Integer => { + return match integer_lit(symbol, suffix) { + Ok(lit_kind) => (Ok(lit_kind), vec![]), + Err(err) => (Err(()), vec![err]), + }; + } + token::Float => { + return match float_lit(symbol, suffix) { + Ok(lit_kind) => (Ok(lit_kind), vec![]), + Err(err) => (Err(()), vec![err]), + }; + } token::Str => { // If there are no characters requiring special treatment we can // reuse the symbol from the token. Otherwise, we must generate a // new symbol because the string in the LitKind is different to the // string in the token. + let mode = Mode::Str; let s = symbol.as_str(); // Vanilla strings are so common we optimize for the common case where no chars // requiring special behaviour are present. - let symbol = if s.contains(['\\', '\r']) { + if s.contains(['\\', '\r']) { let mut buf = String::with_capacity(s.len()); - let mut error = Ok(()); // Force-inlining here is aggressive but the closure is // called on every char in the string, so it can be // hot in programs with many long strings. unescape_literal( s, - Mode::Str, + mode, &mut #[inline(always)] - |_, unescaped_char| match unescaped_char { + |range, unescaped_char| match unescaped_char { Ok(c) => buf.push(c), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 1, // " + range, + err, + }); } }, ); - error?; - Symbol::intern(&buf) + if !has_fatal { + Ok(LitKind::Str(Symbol::intern(&buf), ast::StrStyle::Cooked)) + } else { + Err(()) + } } else { - symbol - }; - LitKind::Str(symbol, ast::StrStyle::Cooked) + Ok(LitKind::Str(symbol, ast::StrStyle::Cooked)) + } } token::StrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we // can reuse the symbol on success. - let mut error = Ok(()); - unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| { - match unescaped_char { - Ok(_) => {} - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } - } + let mode = Mode::RawStr; + let s = symbol.as_str(); + unescape_literal(s, mode, &mut |range, unescaped_char| match unescaped_char { + Ok(_) => {} + Err(err) => { + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2 + n as u32, // r", r#", r##", etc. + range, + err, + }); } }); - error?; - LitKind::Str(symbol, ast::StrStyle::Raw(n)) + if !has_fatal { Ok(LitKind::Str(symbol, ast::StrStyle::Raw(n))) } else { Err(()) } } token::ByteStr => { + let mode = Mode::ByteStr; let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_literal(s, Mode::ByteStr, &mut |_, c| match c { + unescape_literal(s, mode, &mut |range, c| match c { Ok(c) => buf.push(byte_from_char(c)), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // b" + range, + err, + }); } }); - error?; - LitKind::ByteStr(buf.into(), StrStyle::Cooked) + if !has_fatal { + Ok(LitKind::ByteStr(buf.into(), StrStyle::Cooked)) + } else { + Err(()) + } } token::ByteStrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we // can convert the symbol directly to a `Lrc` on success. + let mode = Mode::RawByteStr; let s = symbol.as_str(); - let mut error = Ok(()); - unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c { + unescape_literal(s, mode, &mut |range, c| match c { Ok(_) => {} Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 3 + n as u32, // br", br#", br##", etc. + range, + err, + }); } }); - LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n)) + if !has_fatal { + Ok(LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))) + } else { + Err(()) + } } token::CStr => { + let mode = Mode::CStr; let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); - unescape_c_string(s, Mode::CStr, &mut |_span, c| match c { + unescape_c_string(s, mode, &mut |range, c| match c { Ok(CStrUnit::Byte(b)) => buf.push(b), Ok(CStrUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) } Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 2, // c" + range, + err, + }); } }); - error?; - buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Cooked) + if !has_fatal { + buf.push(0); + Ok(LitKind::CStr(buf.into(), StrStyle::Cooked)) + } else { + Err(()) + } } token::CStrRaw(n) => { // Raw strings have no escapes, so we only need to check for invalid chars, and we - // can convert the symbol directly to a `Lrc` on success. + // can convert the symbol directly to a `Lrc` (after appending a nul char) on + // success. + let mode = Mode::RawCStr; let s = symbol.as_str(); - let mut error = Ok(()); - unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c { + unescape_c_string(s, mode, &mut |range, c| match c { Ok(_) => {} Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + has_fatal |= err.is_fatal(); + errs.push(LitError::EscapeError { + mode, + prefix_len: 3 + n as u32, // cr", cr#", cr##", etc. + range, + err, + }); } }); - error?; - let mut buf = s.to_owned().into_bytes(); - buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Raw(n)) + if !has_fatal { + let mut buf = s.to_owned().into_bytes(); + buf.push(0); + Ok(LitKind::CStr(buf.into(), StrStyle::Raw(n))) + } else { + Err(()) + } } - token::Err => LitKind::Err, - }) + token::Err => Ok(LitKind::Err), + }; + (res, errs) + } + + // Use this one for call sites where we don't need to print error messages + // about invalid literals. + pub fn from_token_lit(lit: token::Lit) -> Result { + LitKind::from_token_lit_with_errs(lit).0 } } @@ -256,14 +362,26 @@ impl fmt::Display for LitKind { } impl MetaItemLit { - /// Converts a token literal into a meta item literal. - pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result { - Ok(MetaItemLit { + /// Converts a token literal into a meta item literal. See + /// `LitKind::from_token_lit` for an explanation of the return type. + pub fn from_token_lit_with_errs( + token_lit: token::Lit, + span: Span, + ) -> (Result, Vec) { + let (lit, errs) = LitKind::from_token_lit_with_errs(token_lit); + let lit = lit.map(|kind| MetaItemLit { symbol: token_lit.symbol, suffix: token_lit.suffix, - kind: LitKind::from_token_lit(token_lit)?, + kind, span, - }) + }); + (lit, errs) + } + + // Use this one for call sites where we don't need to print error messages + // about invalid literals. + pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result { + MetaItemLit::from_token_lit_with_errs(token_lit, span).0 } /// Cheaply converts a meta item literal into a token literal. diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index 38a325fd7110f..7b87a2cf676ba 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -14,7 +14,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_hir as hir; use rustc_hir::def::{DefKind, Res}; use rustc_middle::span_bug; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs; use rustc_span::source_map::{respan, Spanned}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::DUMMY_SP; @@ -119,13 +119,12 @@ impl<'hir> LoweringContext<'_, 'hir> { hir::ExprKind::Unary(op, ohs) } ExprKind::Lit(token_lit) => { - let lit_kind = match LitKind::from_token_lit(*token_lit) { - Ok(lit_kind) => lit_kind, - Err(err) => { - report_lit_error(&self.tcx.sess.parse_sess, err, *token_lit, e.span); - LitKind::Err - } - }; + let lit_kind = token_lit_to_lit_kind_and_report_errs( + &self.tcx.sess.parse_sess, + *token_lit, + e.span, + ) + .unwrap_or(LitKind::Err); let lit = self.arena.alloc(respan(self.lower_span(e.span), lit_kind)); hir::ExprKind::Lit(lit) } diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs index 6c83e8868bd31..e926418b7d065 100644 --- a/compiler/rustc_builtin_macros/src/concat.rs +++ b/compiler/rustc_builtin_macros/src/concat.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::tokenstream::TokenStream; use rustc_expand::base::{self, DummyResult}; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs; use rustc_span::symbol::Symbol; use crate::errors; @@ -19,44 +19,43 @@ pub fn expand_concat( let mut has_errors = false; for e in es { match e.kind { - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { - Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => { - accumulator.push_str(s.as_str()); - } - Ok(ast::LitKind::Char(c)) => { - accumulator.push(c); - } - Ok(ast::LitKind::Int(i, _)) => { - accumulator.push_str(&i.to_string()); - } - Ok(ast::LitKind::Bool(b)) => { - accumulator.push_str(&b.to_string()); - } - Ok(ast::LitKind::CStr(..)) => { - cx.emit_err(errors::ConcatCStrLit { span: e.span }); - has_errors = true; - } - Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { - cx.emit_err(errors::ConcatBytestr { span: e.span }); - has_errors = true; - } - Ok(ast::LitKind::Err) => { - has_errors = true; - } - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span); - has_errors = true; + ast::ExprKind::Lit(token_lit) => { + match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, e.span) + { + Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => { + accumulator.push_str(s.as_str()); + } + Ok(ast::LitKind::Char(c)) => { + accumulator.push(c); + } + Ok(ast::LitKind::Int(i, _)) => { + accumulator.push_str(&i.to_string()); + } + Ok(ast::LitKind::Bool(b)) => { + accumulator.push_str(&b.to_string()); + } + Ok(ast::LitKind::CStr(..)) => { + cx.emit_err(errors::ConcatCStrLit { span: e.span }); + has_errors = true; + } + Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => { + cx.emit_err(errors::ConcatBytestr { span: e.span }); + has_errors = true; + } + Ok(ast::LitKind::Err) | Err(()) => { + has_errors = true; + } } - }, + } // We also want to allow negative numeric literals. ast::ExprKind::Unary(ast::UnOp::Neg, ref expr) if let ast::ExprKind::Lit(token_lit) = expr.kind => { - match ast::LitKind::from_token_lit(token_lit) { + match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, e.span) + { Ok(ast::LitKind::Int(i, _)) => accumulator.push_str(&format!("-{i}")), Ok(ast::LitKind::Float(f, _)) => accumulator.push_str(&format!("-{f}")), - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span); + Err(()) => { has_errors = true; } _ => missing_literal.push(e.span), diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index 4ae328160f0c8..f4d3bd458fc19 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -1,7 +1,7 @@ use rustc_ast as ast; use rustc_ast::{ptr::P, tokenstream::TokenStream}; use rustc_expand::base::{self, DummyResult}; -use rustc_parse::parser::report_lit_error; +use rustc_parse::parser::token_lit_to_lit_kind_and_report_errs; use rustc_span::Span; use crate::errors; @@ -17,7 +17,7 @@ fn invalid_type_err( ConcatBytesInvalid, ConcatBytesInvalidSuggestion, ConcatBytesNonU8, ConcatBytesOob, }; let snippet = cx.sess.source_map().span_to_snippet(span).ok(); - match ast::LitKind::from_token_lit(token_lit) { + match token_lit_to_lit_kind_and_report_errs(&cx.sess.parse_sess, token_lit, span) { Ok(ast::LitKind::CStr(_, _)) => { // Avoid ambiguity in handling of terminal `NUL` by refusing to // concatenate C string literals as bytes. @@ -60,9 +60,7 @@ fn invalid_type_err( cx.emit_err(ConcatBytesNonU8 { span }); } Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(), - Err(err) => { - report_lit_error(&cx.sess.parse_sess, err, token_lit, span); - } + Err(()) => {} } } diff --git a/compiler/rustc_expand/src/base.rs b/compiler/rustc_expand/src/base.rs index f8f08a9e28a30..054908eed3a49 100644 --- a/compiler/rustc_expand/src/base.rs +++ b/compiler/rustc_expand/src/base.rs @@ -1235,26 +1235,30 @@ pub fn expr_to_spanned_string<'a>( let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr(); Err(match expr.kind { - ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { - Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), - Ok(ast::LitKind::ByteStr(..)) => { - let mut err = cx.struct_span_err(expr.span, err_msg); - let span = expr.span.shrink_to_lo(); - err.span_suggestion( - span.with_hi(span.lo() + BytePos(1)), - "consider removing the leading `b`", - "", - Applicability::MaybeIncorrect, - ); - Some((err, true)) - } - Ok(ast::LitKind::Err) => None, - Err(err) => { - parser::report_lit_error(&cx.sess.parse_sess, err, token_lit, expr.span); - None - } - _ => Some((cx.struct_span_err(expr.span, err_msg), false)), - }, + ast::ExprKind::Lit(token_lit) => { + let res = match parser::token_lit_to_lit_kind_and_report_errs( + &cx.sess.parse_sess, + token_lit, + expr.span, + ) { + Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), + Ok(ast::LitKind::ByteStr(..)) => { + let mut err = cx.struct_span_err(expr.span, err_msg); + let span = expr.span.shrink_to_lo(); + err.span_suggestion( + span.with_hi(span.lo() + BytePos(1)), + "consider removing the leading `b`", + "", + Applicability::MaybeIncorrect, + ); + Some((err, true)) + } + Ok(ast::LitKind::Err) => None, + Err(()) => None, + _ => Some((cx.struct_span_err(expr.span, err_msg), false)), + }; + res + } ast::ExprKind::Err => None, _ => Some((cx.struct_span_err(expr.span, err_msg), false)), }) diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs index 0a632c4d12ad5..06999dae447b4 100644 --- a/compiler/rustc_lexer/src/unescape.rs +++ b/compiler/rustc_lexer/src/unescape.rs @@ -348,7 +348,7 @@ where // them in the range computation. while let Some(c) = chars.next() { let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { + let res: Result = match c { '\\' => { match chars.clone().next() { Some('\n') => { diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index c158edaac2554..96fae62e5d439 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -1,5 +1,3 @@ -use std::ops::Range; - use crate::errors; use crate::lexer::unicode_chars::UNICODE_ARRAY; use crate::make_unclosed_delims_error; @@ -8,7 +6,6 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; -use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::{Base, DocStyle, RawStrError}; use rustc_lexer::{Cursor, LiteralKind}; use rustc_session::lint::builtin::{ @@ -21,10 +18,10 @@ use rustc_span::{edition::Edition, BytePos, Pos, Span}; mod diagnostics; mod tokentrees; -mod unescape_error_reporting; +pub(crate) mod unescape_error_reporting; mod unicode_chars; -use unescape_error_reporting::{emit_unescape_error, escaped_char}; +use unescape_error_reporting::escaped_char; // This type is used a lot. Make sure it doesn't unintentionally get bigger. // @@ -409,7 +406,7 @@ impl<'a> StringReader<'a> { error_code!(E0762), ) } - self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' ' + self.cook_quoted(token::Char, start, end, 1, 1) // ' ' } rustc_lexer::LiteralKind::Byte { terminated } => { if !terminated { @@ -419,7 +416,7 @@ impl<'a> StringReader<'a> { error_code!(E0763), ) } - self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' ' + self.cook_quoted(token::Byte, start, end, 2, 1) // b' ' } rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { @@ -429,7 +426,7 @@ impl<'a> StringReader<'a> { error_code!(E0765), ) } - self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " " + self.cook_quoted(token::Str, start, end, 1, 1) // " " } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { @@ -439,7 +436,7 @@ impl<'a> StringReader<'a> { error_code!(E0766), ) } - self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" " + self.cook_quoted(token::ByteStr, start, end, 2, 1) // b" " } rustc_lexer::LiteralKind::CStr { terminated } => { if !terminated { @@ -449,13 +446,13 @@ impl<'a> StringReader<'a> { error_code!(E0767), ) } - self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" " + self.cook_quoted(token::CStr, start, end, 2, 1) // c" " } rustc_lexer::LiteralKind::RawStr { n_hashes } => { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::StrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "## + self.cook_quoted(kind, start, end, 2 + n, 1 + n) // r##" "## } else { self.report_raw_str_error(start, 1); } @@ -464,7 +461,7 @@ impl<'a> StringReader<'a> { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::ByteStrRaw(n_hashes); - self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "## + self.cook_quoted(kind, start, end, 3 + n, 1 + n) // br##" "## } else { self.report_raw_str_error(start, 2); } @@ -473,7 +470,7 @@ impl<'a> StringReader<'a> { if let Some(n_hashes) = n_hashes { let n = u32::from(n_hashes); let kind = token::CStrRaw(n_hashes); - self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "## + self.cook_quoted(kind, start, end, 3 + n, 1 + n) // cr##" "## } else { self.report_raw_str_error(start, 2); } @@ -693,82 +690,18 @@ impl<'a> StringReader<'a> { self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num }); } - fn cook_common( + fn cook_quoted( &self, kind: token::LitKind, - mode: Mode, start: BytePos, end: BytePos, prefix_len: u32, postfix_len: u32, - unescape: fn(&str, Mode, &mut dyn FnMut(Range, Result<(), EscapeError>)), ) -> (token::LitKind, Symbol) { - let mut has_fatal_err = false; let content_start = start + BytePos(prefix_len); let content_end = end - BytePos(postfix_len); let lit_content = self.str_from_to(content_start, content_end); - unescape(lit_content, mode, &mut |range, result| { - // Here we only check for errors. The actual unescaping is done later. - if let Err(err) = result { - let span_with_quotes = self.mk_sp(start, end); - let (start, end) = (range.start as u32, range.end as u32); - let lo = content_start + BytePos(start); - let hi = lo + BytePos(end - start); - let span = self.mk_sp(lo, hi); - if err.is_fatal() { - has_fatal_err = true; - } - emit_unescape_error( - &self.sess.dcx, - lit_content, - span_with_quotes, - span, - mode, - range, - err, - ); - } - }); - - // We normally exclude the quotes for the symbol, but for errors we - // include it because it results in clearer error messages. - if !has_fatal_err { - (kind, Symbol::intern(lit_content)) - } else { - (token::Err, self.symbol_from_to(start, end)) - } - } - - fn cook_quoted( - &self, - kind: token::LitKind, - mode: Mode, - start: BytePos, - end: BytePos, - prefix_len: u32, - postfix_len: u32, - ) -> (token::LitKind, Symbol) { - self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_literal(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) - }) - } - - fn cook_c_string( - &self, - kind: token::LitKind, - mode: Mode, - start: BytePos, - end: BytePos, - prefix_len: u32, - postfix_len: u32, - ) -> (token::LitKind, Symbol) { - self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_c_string(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) - }) + (kind, Symbol::intern(lit_content)) } } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index ab48da5cd156a..3425fa16cfeb0 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -15,22 +15,28 @@ pub(crate) fn emit_unescape_error( lit: &str, // full span of the literal, including quotes and any prefix full_lit_span: Span, - // span of the error part of the literal - err_span: Span, mode: Mode, + prefix_len: u32, // range of the error inside `lit` range: Range, error: EscapeError, ) { + let (start, end) = (range.start as u32, range.end as u32); + let lo = full_lit_span.lo() + BytePos(prefix_len) + BytePos(start); + let hi = lo + BytePos(end - start); + let err_span = full_lit_span.with_lo(lo).with_hi(hi); + debug!( - "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", - lit, full_lit_span, mode, range, error + "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}, {:?}", + lit, full_lit_span, err_span, mode, range, error ); + let last_char = || { let c = lit[range.clone()].chars().next_back().unwrap(); let span = err_span.with_lo(err_span.hi() - BytePos(c.len_utf8() as u32)); (c, span) }; + match error { EscapeError::LoneSurrogateUnicodeEscape => { dcx.emit_err(UnescapeError::InvalidUnicodeEscape { span: err_span, surrogate: true }); diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 4d061fb0ad996..f8cd207e85a25 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -8,6 +8,7 @@ use super::{ }; use crate::errors; +use crate::lexer::unescape_error_reporting::emit_unescape_error; use crate::maybe_recover_from_interpolated_ty_qpath; use ast::mut_visit::{noop_visit_expr, MutVisitor}; use ast::{CoroutineKind, GenBlockKind, Pat, Path, PathSegment}; @@ -2048,27 +2049,30 @@ impl<'a> Parser<'a> { let recovered = self.recover_after_dot(); let token = recovered.as_ref().unwrap_or(&self.token); match token::Lit::from_token(token) { - Some(lit) => { - match MetaItemLit::from_token_lit(lit, token.span) { + Some(token_lit) => { + let err_span = token.uninterpolated_span(); + let lit = token_lit_to_meta_item_lit_and_report_errs( + self.sess, token_lit, token.span, err_span, + ); + + let res = match lit { Ok(lit) => { self.bump(); - Some(lit) + lit } - Err(err) => { - let span = token.uninterpolated_span(); + Err(()) => { self.bump(); - report_lit_error(self.sess, err, lit, span); // Pack possible quotes and prefixes from the original literal into // the error literal's symbol so they can be pretty-printed faithfully. - let suffixless_lit = token::Lit::new(lit.kind, lit.symbol, None); + let suffixless_lit = + token::Lit::new(token_lit.kind, token_lit.symbol, None); let symbol = Symbol::intern(&suffixless_lit.to_string()); - let lit = token::Lit::new(token::Err, symbol, lit.suffix); - Some( - MetaItemLit::from_token_lit(lit, span) - .unwrap_or_else(|_| unreachable!()), - ) + let lit = token::Lit::new(token::Err, symbol, token_lit.suffix); + MetaItemLit::from_token_lit(lit, err_span) + .unwrap_or_else(|_| unreachable!()) } - } + }; + Some(res) } None => None, } @@ -3674,7 +3678,34 @@ impl<'a> Parser<'a> { } } -pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { +// Use this for call sites where we need to print errors about invalid literals. +pub fn token_lit_to_lit_kind_and_report_errs( + sess: &ParseSess, + token_lit: token::Lit, + span: Span, +) -> Result { + let (lit, errs) = ast::LitKind::from_token_lit_with_errs(token_lit); + for err in errs { + report_lit_error(sess, err, token_lit, span); + } + lit +} + +// Use this for call sites where we need to print errors about invalid literals. +pub fn token_lit_to_meta_item_lit_and_report_errs( + sess: &ParseSess, + token_lit: token::Lit, + lit_span: Span, + err_span: Span, +) -> Result { + let (lit, errs) = ast::MetaItemLit::from_token_lit_with_errs(token_lit, lit_span); + for err in errs { + report_lit_error(sess, err, token_lit, err_span); + } + lit +} + +fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) { // Checks if `s` looks like i32 or u1234 etc. fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) @@ -3707,6 +3738,9 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: // `LexerError` is an error, but it was already reported // by lexer, so here we don't report it the second time. LitError::LexerError => {} + LitError::EscapeError { mode, prefix_len, range, err } => { + emit_unescape_error(&sess.dcx, symbol.as_str(), span, mode, prefix_len, range, err); + } LitError::InvalidSuffix => { if let Some(suffix) = suffix { sess.emit_err(errors::InvalidLiteralSuffix { span, kind: kind.descr(), suffix }); diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 051e1deec66fc..bf39ac924ecc5 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -11,7 +11,9 @@ mod stmt; mod ty; use crate::lexer::UnmatchedDelim; -pub use crate::parser::expr::report_lit_error; +pub use crate::parser::expr::{ + token_lit_to_lit_kind_and_report_errs, token_lit_to_meta_item_lit_and_report_errs, +}; pub use attr_wrapper::AttrWrapper; pub use diagnostics::AttemptLocalParseRecovery; pub(crate) use expr::ForbiddenLetReason; diff --git a/compiler/rustc_parse/src/validate_attr.rs b/compiler/rustc_parse/src/validate_attr.rs index 9fea3826652c6..02c1e153afebb 100644 --- a/compiler/rustc_parse/src/validate_attr.rs +++ b/compiler/rustc_parse/src/validate_attr.rs @@ -1,14 +1,13 @@ //! Meta-syntax validation logic of attributes for post-expansion. -use crate::{errors, parse_in}; +use crate::{errors, parse_in, parser}; use rustc_ast::token::Delimiter; use rustc_ast::tokenstream::DelimSpan; -use rustc_ast::MetaItemKind; use rustc_ast::{self as ast, AttrArgs, AttrArgsEq, Attribute, DelimArgs, MetaItem}; +use rustc_ast::{LitKind, MetaItemKind, MetaItemLit}; use rustc_errors::{Applicability, FatalError, PResult}; use rustc_feature::{AttributeTemplate, BuiltinAttribute, BUILTIN_ATTRIBUTE_MAP}; -use rustc_session::errors::report_lit_error; use rustc_session::lint::builtin::ILL_FORMED_ATTRIBUTE_INPUT; use rustc_session::parse::ParseSess; use rustc_span::{sym, Span, Symbol}; @@ -52,8 +51,10 @@ pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, Meta } AttrArgs::Eq(_, AttrArgsEq::Ast(expr)) => { if let ast::ExprKind::Lit(token_lit) = expr.kind { - let res = ast::MetaItemLit::from_token_lit(token_lit, expr.span); - let res = match res { + let lit = parser::token_lit_to_meta_item_lit_and_report_errs( + sess, token_lit, expr.span, expr.span, + ); + match lit { Ok(lit) => { if token_lit.suffix.is_some() { let mut err = sess.dcx.struct_span_err( @@ -69,18 +70,16 @@ pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, Meta MetaItemKind::NameValue(lit) } } - Err(err) => { - report_lit_error(sess, err, token_lit, expr.span); - let lit = ast::MetaItemLit { + Err(()) => { + let lit = MetaItemLit { symbol: token_lit.symbol, suffix: token_lit.suffix, - kind: ast::LitKind::Err, + kind: LitKind::Err, span: expr.span, }; MetaItemKind::NameValue(lit) } - }; - res + } } else { // Example cases: // - `#[foo = 1+1]`: results in `ast::ExprKind::BinOp`. diff --git a/tests/rustdoc-ui/ignore-block-help.rs b/tests/rustdoc-ui/ignore-block-help.rs index 86f6a2868fb56..fb27d954f9a5a 100644 --- a/tests/rustdoc-ui/ignore-block-help.rs +++ b/tests/rustdoc-ui/ignore-block-help.rs @@ -1,10 +1,10 @@ // check-pass /// ```ignore (to-prevent-tidy-error) -/// let heart = '❤️'; +/// let unterminated = ' /// ``` //~^^^ WARNING could not parse code block //~| NOTE on by default -//~| NOTE character literal may only contain one codepoint +//~| NOTE unterminated character literal //~| HELP `ignore` code blocks require valid Rust code pub struct X; diff --git a/tests/rustdoc-ui/ignore-block-help.stderr b/tests/rustdoc-ui/ignore-block-help.stderr index a30ea51dd8a7f..f5ed287a99834 100644 --- a/tests/rustdoc-ui/ignore-block-help.stderr +++ b/tests/rustdoc-ui/ignore-block-help.stderr @@ -3,7 +3,7 @@ warning: could not parse code block as Rust code | LL | /// ```ignore (to-prevent-tidy-error) | _____^ -LL | | /// let heart = '❤️'; +LL | | /// let unterminated = ' LL | | /// ``` | |_______^ | @@ -12,7 +12,7 @@ help: `ignore` code blocks require valid Rust code for syntax highlighting; mark | LL | /// ```ignore (to-prevent-tidy-error) | ^^^ - = note: error from rustc: character literal may only contain one codepoint + = note: error from rustc: unterminated character literal = note: `#[warn(rustdoc::invalid_rust_codeblocks)]` on by default warning: 1 warning emitted diff --git a/tests/ui/fmt/format-string-error-2.stderr b/tests/ui/fmt/format-string-error-2.stderr index dfd24bf60ad52..50ead59e4e911 100644 --- a/tests/ui/fmt/format-string-error-2.stderr +++ b/tests/ui/fmt/format-string-error-2.stderr @@ -1,9 +1,3 @@ -error: incorrect unicode escape sequence - --> $DIR/format-string-error-2.rs:77:20 - | -LL | println!("\x7B}\u8 {", 1); - | ^^^ help: format of unicode escape sequences uses braces: `\u{8}` - error: invalid format string: expected `'}'`, found `'a'` --> $DIR/format-string-error-2.rs:5:5 | @@ -155,6 +149,12 @@ LL | println!("\x7B}\u{8} {", 1); | = note: if you intended to print `{`, you can escape it using `{{` +error: incorrect unicode escape sequence + --> $DIR/format-string-error-2.rs:77:20 + | +LL | println!("\x7B}\u8 {", 1); + | ^^^ help: format of unicode escape sequences uses braces: `\u{8}` + error: invalid format string: unmatched `}` found --> $DIR/format-string-error-2.rs:81:21 | diff --git a/tests/ui/lexer/error-stage.rs b/tests/ui/lexer/error-stage.rs index c8d88f745a1f0..5edb334c109dd 100644 --- a/tests/ui/lexer/error-stage.rs +++ b/tests/ui/lexer/error-stage.rs @@ -1,3 +1,5 @@ +// edition:2021 + // This test is about the treatment of invalid literals. In particular, some // literals are only considered invalid if they survive to HIR lowering. // @@ -41,6 +43,11 @@ // https://doc.rust-lang.org/reference/tokens.html#integer-literals says that // literals like `128_i8` and `256_u8` "are too big for their type, but are // still valid tokens". +// +// String literals, etc. +// --------------------- +// There are various ways that char, byte, and string literals can be invalid, +// mostly involving invalid escape sequences. macro_rules! sink { ($($x:tt;)*) => {()} @@ -48,7 +55,7 @@ macro_rules! sink { // The invalid literals are ignored because the macro consumes them. Except for // `0b10.0f32` because it's a lexer error. -const _: () = sink! { +const c1: () = sink! { "string"any_suffix; // OK 10u123; // OK 10.0f123; // OK @@ -60,7 +67,7 @@ const _: () = sink! { // The invalid literals used to cause errors, but this was changed by #102944. // Except for `0b010.0f32`, because it's a lexer error. #[cfg(FALSE)] -fn configured_out() { +fn configured_out1() { "string"any_suffix; // OK 10u123; // OK 10.0f123; // OK @@ -70,7 +77,7 @@ fn configured_out() { } // All the invalid literals cause errors. -fn main() { +fn f1() { "string"any_suffix; //~ ERROR suffixes on string literals are invalid 10u123; //~ ERROR invalid width `123` for integer literal 10.0f123; //~ ERROR invalid width `123` for float literal @@ -78,3 +85,36 @@ fn main() { 0b10.0f32; //~ ERROR binary float literal is not supported 999340282366920938463463374607431768211455999; //~ ERROR integer literal is too large } + +// These invalid literals used to cause errors, but this was changed by #118699. +const c2: () = sink! { + ''; + b'ab'; + "\a"; + b"\xzz"; + "\u20"; + c"\u{999999}"; +}; + +// These invalid literals used to cause errors, but this was changed by #118699. +#[cfg(FALSE)] +fn configured_out2() { + ''; + b'ab'; + "\a"; + b"\xzz"; + "\u20"; + c"\u{999999}"; +} + +// These invalid literals cause errors. +fn f2() { + ''; //~ ERROR empty character literal + b'ab'; //~ ERROR character literal may only contain one codepoint + "\a"; //~ ERROR unknown character escape: `a` + b"\xzz"; //~ ERROR invalid character in numeric character escape + "\u20"; //~ ERROR incorrect unicode escape sequence + c"\u{999999}"; //~ ERROR invalid unicode character escape +} + +fn main() {} diff --git a/tests/ui/lexer/error-stage.stderr b/tests/ui/lexer/error-stage.stderr index ecbdb14dc868e..f3572600ac193 100644 --- a/tests/ui/lexer/error-stage.stderr +++ b/tests/ui/lexer/error-stage.stderr @@ -1,29 +1,29 @@ error: binary float literal is not supported - --> $DIR/error-stage.rs:56:5 + --> $DIR/error-stage.rs:63:5 | LL | 0b10.0f32; | ^^^^^^ error: binary float literal is not supported - --> $DIR/error-stage.rs:68:5 + --> $DIR/error-stage.rs:75:5 | LL | 0b10.0f32; | ^^^^^^ error: binary float literal is not supported - --> $DIR/error-stage.rs:78:5 + --> $DIR/error-stage.rs:85:5 | LL | 0b10.0f32; | ^^^^^^ error: suffixes on string literals are invalid - --> $DIR/error-stage.rs:74:5 + --> $DIR/error-stage.rs:81:5 | LL | "string"any_suffix; | ^^^^^^^^^^^^^^^^^^ invalid suffix `any_suffix` error: invalid width `123` for integer literal - --> $DIR/error-stage.rs:75:5 + --> $DIR/error-stage.rs:82:5 | LL | 10u123; | ^^^^^^ @@ -31,7 +31,7 @@ LL | 10u123; = help: valid widths are 8, 16, 32, 64 and 128 error: invalid width `123` for float literal - --> $DIR/error-stage.rs:76:5 + --> $DIR/error-stage.rs:83:5 | LL | 10.0f123; | ^^^^^^^^ @@ -39,18 +39,69 @@ LL | 10.0f123; = help: valid widths are 32 and 64 error: binary float literal is not supported - --> $DIR/error-stage.rs:77:5 + --> $DIR/error-stage.rs:84:5 | LL | 0b10f32; | ^^^^^^^ not supported error: integer literal is too large - --> $DIR/error-stage.rs:79:5 + --> $DIR/error-stage.rs:86:5 | LL | 999340282366920938463463374607431768211455999; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: value exceeds limit of `340282366920938463463374607431768211455` -error: aborting due to 8 previous errors +error: empty character literal + --> $DIR/error-stage.rs:112:6 + | +LL | ''; + | ^ empty character literal + +error: character literal may only contain one codepoint + --> $DIR/error-stage.rs:113:5 + | +LL | b'ab'; + | ^^^^^ + | +help: if you meant to write a byte string literal, use double quotes + | +LL | b"ab"; + | ~~~~~ + +error: unknown character escape: `a` + --> $DIR/error-stage.rs:114:7 + | +LL | "\a"; + | ^ unknown character escape + | + = help: for more information, visit +help: if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal + | +LL | r"\a"; + | ~~~~~ + +error: invalid character in numeric character escape: `z` + --> $DIR/error-stage.rs:115:9 + | +LL | b"\xzz"; + | ^ invalid character in numeric character escape + +error: incorrect unicode escape sequence + --> $DIR/error-stage.rs:116:6 + | +LL | "\u20"; + | ^^^- + | | + | help: format of unicode escape sequences uses braces: `\u{20}` + +error: invalid unicode character escape + --> $DIR/error-stage.rs:117:7 + | +LL | c"\u{999999}"; + | ^^^^^^^^^^ invalid escape + | + = help: unicode escape must be at most 10FFFF + +error: aborting due to 14 previous errors diff --git a/tests/ui/lexer/lex-bad-char-literals-7.rs b/tests/ui/lexer/lex-bad-char-literals-7.rs index c675df2f3ccd0..55484a610141b 100644 --- a/tests/ui/lexer/lex-bad-char-literals-7.rs +++ b/tests/ui/lexer/lex-bad-char-literals-7.rs @@ -7,7 +7,4 @@ fn main() { // Next two are OK, but may befool error recovery let _ = '/'; let _ = b'/'; - - let _ = ' hello // here's a comment - //~^ ERROR: unterminated character literal } diff --git a/tests/ui/lexer/lex-bad-char-literals-7.stderr b/tests/ui/lexer/lex-bad-char-literals-7.stderr index 255b9c6899999..16ba7676932fd 100644 --- a/tests/ui/lexer/lex-bad-char-literals-7.stderr +++ b/tests/ui/lexer/lex-bad-char-literals-7.stderr @@ -10,12 +10,5 @@ error: empty unicode escape LL | let _: char = '\u{}'; | ^^^^ this escape must have at least 1 hex digit -error[E0762]: unterminated character literal - --> $DIR/lex-bad-char-literals-7.rs:11:13 - | -LL | let _ = ' hello // here's a comment - | ^^^^^^^^ - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors -For more information about this error, try `rustc --explain E0762`. diff --git a/tests/ui/lexer/lex-bad-char-literals-8.rs b/tests/ui/lexer/lex-bad-char-literals-8.rs new file mode 100644 index 0000000000000..6c8cbd3a82a85 --- /dev/null +++ b/tests/ui/lexer/lex-bad-char-literals-8.rs @@ -0,0 +1,4 @@ +fn main() { + let _ = ' hello // here's a comment + //~^ ERROR: unterminated character literal +} diff --git a/tests/ui/lexer/lex-bad-char-literals-8.stderr b/tests/ui/lexer/lex-bad-char-literals-8.stderr new file mode 100644 index 0000000000000..04c95df0d0601 --- /dev/null +++ b/tests/ui/lexer/lex-bad-char-literals-8.stderr @@ -0,0 +1,9 @@ +error[E0762]: unterminated character literal + --> $DIR/lex-bad-char-literals-8.rs:2:13 + | +LL | let _ = ' hello // here's a comment + | ^^^^^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0762`. diff --git a/tests/ui/parser/byte-literals-2.rs b/tests/ui/parser/byte-literals-2.rs new file mode 100644 index 0000000000000..fb9e2ac69944a --- /dev/null +++ b/tests/ui/parser/byte-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + b'a //~ ERROR unterminated byte constant [E0763] +} diff --git a/tests/ui/parser/byte-literals-2.stderr b/tests/ui/parser/byte-literals-2.stderr new file mode 100644 index 0000000000000..f0e042ad605db --- /dev/null +++ b/tests/ui/parser/byte-literals-2.stderr @@ -0,0 +1,9 @@ +error[E0763]: unterminated byte constant + --> $DIR/byte-literals-2.rs:2:6 + | +LL | b'a + | ^^^^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0763`. diff --git a/tests/ui/parser/byte-literals.rs b/tests/ui/parser/byte-literals.rs index 896dc1a1a5fba..963a0bb608d84 100644 --- a/tests/ui/parser/byte-literals.rs +++ b/tests/ui/parser/byte-literals.rs @@ -8,5 +8,4 @@ pub fn main() { b' '; //~ ERROR byte constant must be escaped b'''; //~ ERROR byte constant must be escaped b'é'; //~ ERROR non-ASCII character in byte literal - b'a //~ ERROR unterminated byte constant [E0763] } diff --git a/tests/ui/parser/byte-literals.stderr b/tests/ui/parser/byte-literals.stderr index 5b414c8927e2c..97805e01db49f 100644 --- a/tests/ui/parser/byte-literals.stderr +++ b/tests/ui/parser/byte-literals.stderr @@ -43,12 +43,5 @@ help: if you meant to use the unicode code point for 'é', use a \xHH escape LL | b'\xE9'; | ~~~~ -error[E0763]: unterminated byte constant - --> $DIR/byte-literals.rs:11:6 - | -LL | b'a - | ^^^^ - -error: aborting due to 7 previous errors +error: aborting due to 6 previous errors -For more information about this error, try `rustc --explain E0763`. diff --git a/tests/ui/parser/byte-string-literals-2.rs b/tests/ui/parser/byte-string-literals-2.rs new file mode 100644 index 0000000000000..7eb52b854e358 --- /dev/null +++ b/tests/ui/parser/byte-string-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + b"a //~ ERROR unterminated double quote byte string +} diff --git a/tests/ui/parser/byte-string-literals-2.stderr b/tests/ui/parser/byte-string-literals-2.stderr new file mode 100644 index 0000000000000..6fdb3c64ba783 --- /dev/null +++ b/tests/ui/parser/byte-string-literals-2.stderr @@ -0,0 +1,11 @@ +error[E0766]: unterminated double quote byte string + --> $DIR/byte-string-literals-2.rs:2:6 + | +LL | b"a + | ______^ +LL | | } + | |__^ + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0766`. diff --git a/tests/ui/parser/byte-string-literals.rs b/tests/ui/parser/byte-string-literals.rs index 30a4f50c4e40b..c14488dcb6689 100644 --- a/tests/ui/parser/byte-string-literals.rs +++ b/tests/ui/parser/byte-string-literals.rs @@ -5,5 +5,4 @@ pub fn main() { b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z` b"é"; //~ ERROR non-ASCII character in byte string literal br##"é"##; //~ ERROR non-ASCII character in raw byte string literal - b"a //~ ERROR unterminated double quote byte string } diff --git a/tests/ui/parser/byte-string-literals.stderr b/tests/ui/parser/byte-string-literals.stderr index 655b6998e85ff..2a2830c346825 100644 --- a/tests/ui/parser/byte-string-literals.stderr +++ b/tests/ui/parser/byte-string-literals.stderr @@ -37,14 +37,5 @@ error: non-ASCII character in raw byte string literal LL | br##"é"##; | ^ must be ASCII -error[E0766]: unterminated double quote byte string - --> $DIR/byte-string-literals.rs:8:6 - | -LL | b"a - | ______^ -LL | | } - | |__^ - -error: aborting due to 6 previous errors +error: aborting due to 5 previous errors -For more information about this error, try `rustc --explain E0766`. diff --git a/tests/ui/parser/issues/issue-62913.rs b/tests/ui/parser/issues/issue-62913.rs index a55ef5ac71030..c77ef61a97b10 100644 --- a/tests/ui/parser/issues/issue-62913.rs +++ b/tests/ui/parser/issues/issue-62913.rs @@ -1,4 +1,5 @@ -"\u\\" -//~^ ERROR incorrect unicode escape sequence -//~| ERROR invalid trailing slash in literal -//~| ERROR expected item, found `"\u\"` +fn main() { + _ = "\u\\"; + //~^ ERROR incorrect unicode escape sequence + //~| ERROR invalid trailing slash in literal +} diff --git a/tests/ui/parser/issues/issue-62913.stderr b/tests/ui/parser/issues/issue-62913.stderr index c33e46837287f..bee6dd4580037 100644 --- a/tests/ui/parser/issues/issue-62913.stderr +++ b/tests/ui/parser/issues/issue-62913.stderr @@ -1,24 +1,16 @@ error: incorrect unicode escape sequence - --> $DIR/issue-62913.rs:1:2 + --> $DIR/issue-62913.rs:2:10 | -LL | "\u\" - | ^^^ incorrect unicode escape sequence +LL | _ = "\u\"; + | ^^^ incorrect unicode escape sequence | = help: format of unicode escape sequences is `\u{...}` error: invalid trailing slash in literal - --> $DIR/issue-62913.rs:1:5 + --> $DIR/issue-62913.rs:2:13 | -LL | "\u\" - | ^ invalid trailing slash in literal +LL | _ = "\u\"; + | ^ invalid trailing slash in literal -error: expected item, found `"\u\"` - --> $DIR/issue-62913.rs:1:1 - | -LL | "\u\" - | ^^^^^^ expected item - | - = note: for a full list of items that can appear in modules, see - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors diff --git a/tests/ui/parser/macro/literals-are-validated-before-expansion.rs b/tests/ui/parser/macro/literals-are-validated-before-expansion.rs deleted file mode 100644 index c3fc754b5567f..0000000000000 --- a/tests/ui/parser/macro/literals-are-validated-before-expansion.rs +++ /dev/null @@ -1,10 +0,0 @@ -macro_rules! black_hole { - ($($tt:tt)*) => {} -} - -fn main() { - black_hole! { '\u{FFFFFF}' } - //~^ ERROR: invalid unicode character escape - black_hole! { "this is surrogate: \u{DAAA}" } - //~^ ERROR: invalid unicode character escape -} diff --git a/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr b/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr deleted file mode 100644 index e874f62497ea8..0000000000000 --- a/tests/ui/parser/macro/literals-are-validated-before-expansion.stderr +++ /dev/null @@ -1,18 +0,0 @@ -error: invalid unicode character escape - --> $DIR/literals-are-validated-before-expansion.rs:6:20 - | -LL | black_hole! { '\u{FFFFFF}' } - | ^^^^^^^^^^ invalid escape - | - = help: unicode escape must be at most 10FFFF - -error: invalid unicode character escape - --> $DIR/literals-are-validated-before-expansion.rs:8:39 - | -LL | black_hole! { "this is surrogate: \u{DAAA}" } - | ^^^^^^^^ invalid escape - | - = help: unicode escape must not be a surrogate - -error: aborting due to 2 previous errors - diff --git a/tests/ui/parser/raw/raw-byte-string-literals-2.rs b/tests/ui/parser/raw/raw-byte-string-literals-2.rs new file mode 100644 index 0000000000000..8ffda513dbf6f --- /dev/null +++ b/tests/ui/parser/raw/raw-byte-string-literals-2.rs @@ -0,0 +1,3 @@ +pub fn main() { + br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation +} diff --git a/tests/ui/parser/raw/raw-byte-string-literals-2.stderr b/tests/ui/parser/raw/raw-byte-string-literals-2.stderr new file mode 100644 index 0000000000000..b4151eeef7017 --- /dev/null +++ b/tests/ui/parser/raw/raw-byte-string-literals-2.stderr @@ -0,0 +1,8 @@ +error: found invalid character; only `#` is allowed in raw string delimitation: ~ + --> $DIR/raw-byte-string-literals-2.rs:2:5 + | +LL | br##~"a"~##; + | ^^^^^ + +error: aborting due to 1 previous error + diff --git a/tests/ui/parser/raw/raw-byte-string-literals.rs b/tests/ui/parser/raw/raw-byte-string-literals.rs index 1b859fee596ad..3f91c381a9039 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.rs +++ b/tests/ui/parser/raw/raw-byte-string-literals.rs @@ -3,5 +3,4 @@ pub fn main() { br"a "; //~ ERROR bare CR not allowed in raw string br"é"; //~ ERROR non-ASCII character in raw byte string literal - br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation } diff --git a/tests/ui/parser/raw/raw-byte-string-literals.stderr b/tests/ui/parser/raw/raw-byte-string-literals.stderr index a2f27d1ed70ae..2a4073243cbca 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.stderr +++ b/tests/ui/parser/raw/raw-byte-string-literals.stderr @@ -10,11 +10,5 @@ error: non-ASCII character in raw byte string literal LL | br"é"; | ^ must be ASCII -error: found invalid character; only `#` is allowed in raw string delimitation: ~ - --> $DIR/raw-byte-string-literals.rs:6:5 - | -LL | br##~"a"~##; - | ^^^^^ - -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr index fc071a9419142..806e222507f6e 100644 --- a/tests/ui/parser/unicode-control-codepoints.stderr +++ b/tests/ui/parser/unicode-control-codepoints.stderr @@ -1,87 +1,3 @@ -error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:26 - | -LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); - | ^^^^^^^^ unicode escape in byte string - | - = help: unicode escape sequences cannot be used as a byte or in a byte string - -error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:35 - | -LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); - | ^^^^^^^^ unicode escape in byte string - | - = help: unicode escape sequences cannot be used as a byte or in a byte string - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:26 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{202e}' - | -help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes - | -LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:30 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2066}' - | -help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes - | -LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:41 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2069}' - | -help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes - | -LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:43 - | -LL | println!("{:?}", b"/* } if isAdmin begin admins only "); - | ^ must be ASCII but is '\u{2066}' - | -help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes - | -LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); - | ~~~~~~~~~~~~ - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:29 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{202e}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:33 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2066}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:44 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2069}' - -error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:46 - | -LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); - | ^ must be ASCII but is '\u{2066}' - error: unicode codepoint changing visible direction of text present in comment --> $DIR/unicode-control-codepoints.rs:2:5 | @@ -188,5 +104,89 @@ LL | | * ''); */fn bar() {} = note: if their presence wasn't intentional, you can remove them = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:26 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:35 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:26 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{202e}' + | +help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes + | +LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:30 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:41 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2069}' + | +help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte string literal + --> $DIR/unicode-control-codepoints.rs:16:43 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:29 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{202e}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:33 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:44 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2069}' + +error: non-ASCII character in raw byte string literal + --> $DIR/unicode-control-codepoints.rs:21:46 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + error: aborting due to 17 previous errors