diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs index eceef59802eb9..aba7f95487e9d 100644 --- a/compiler/rustc_lexer/src/cursor.rs +++ b/compiler/rustc_lexer/src/cursor.rs @@ -24,6 +24,10 @@ impl<'a> Cursor<'a> { } } + pub fn as_str(&self) -> &'a str { + self.chars.as_str() + } + /// Returns the last eaten symbol (or `'\0'` in release builds). /// (For debug assertions only.) pub(crate) fn prev(&self) -> char { diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 29335a8c0f4cd..d511d2b1280d9 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -367,6 +367,13 @@ impl Cursor<'_> { Some(|terminated| Byte { terminated }), ), + // c-string literal, raw c-string literal or identifier. + 'c' => self.c_or_byte_string( + |terminated| CStr { terminated }, + |n_hashes| RawCStr { n_hashes }, + None, + ), + // Identifier (this should be checked after other variant that can // start as identifier). c if is_id_start(c) => self.ident_or_unknown_prefix(), diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index c6e6b46e4551c..1931ee5e528dd 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -9,8 +9,8 @@ use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey}; use rustc_lexer::unescape::{self, EscapeError, Mode}; -use rustc_lexer::Cursor; use rustc_lexer::{Base, DocStyle, RawStrError}; +use rustc_lexer::{Cursor, LiteralKind}; use rustc_session::lint::builtin::{ RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT, }; @@ -118,6 +118,7 @@ impl<'a> StringReader<'a> { let mut swallow_next_invalid = 0; // Skip trivial (whitespace & comments) tokens loop { + let str_before = self.cursor.as_str(); let token = self.cursor.advance_token(); let start = self.pos; self.pos = self.pos + BytePos(token.len); @@ -165,10 +166,7 @@ impl<'a> StringReader<'a> { continue; } rustc_lexer::TokenKind::Ident => { - let sym = nfc_normalize(self.str_from(start)); - let span = self.mk_sp(start, self.pos); - self.sess.symbol_gallery.insert(sym, span); - token::Ident(sym, false) + self.ident(start) } rustc_lexer::TokenKind::RawIdent => { let sym = nfc_normalize(self.str_from(start + BytePos(2))); @@ -182,10 +180,7 @@ impl<'a> StringReader<'a> { } rustc_lexer::TokenKind::UnknownPrefix => { self.report_unknown_prefix(start); - let sym = nfc_normalize(self.str_from(start)); - let span = self.mk_sp(start, self.pos); - self.sess.symbol_gallery.insert(sym, span); - token::Ident(sym, false) + self.ident(start) } rustc_lexer::TokenKind::InvalidIdent // Do not recover an identifier with emoji if the codepoint is a confusable @@ -203,6 +198,27 @@ impl<'a> StringReader<'a> { .push(span); token::Ident(sym, false) } + // split up (raw) c string literals to an ident and a string literal when edition < 2021. + rustc_lexer::TokenKind::Literal { + kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }), + suffix_start: _, + } if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => { + let prefix_len = match kind { + LiteralKind::CStr { .. } => 1, + LiteralKind::RawCStr { .. } => 2, + _ => unreachable!(), + }; + + // reset the state so that only the prefix ("c" or "cr") + // was consumed. + let lit_start = start + BytePos(prefix_len); + self.pos = lit_start; + self.cursor = Cursor::new(&str_before[prefix_len as usize..]); + + self.report_unknown_prefix(start); + let prefix_span = self.mk_sp(start, lit_start); + return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace); + } rustc_lexer::TokenKind::Literal { kind, suffix_start } => { let suffix_start = start + BytePos(suffix_start); let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); @@ -317,6 +333,13 @@ impl<'a> StringReader<'a> { } } + fn ident(&self, start: BytePos) -> TokenKind { + let sym = nfc_normalize(self.str_from(start)); + let span = self.mk_sp(start, self.pos); + self.sess.symbol_gallery.insert(sym, span); + token::Ident(sym, false) + } + fn struct_fatal_span_char( &self, from_pos: BytePos, diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/auxiliary/count.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/auxiliary/count.rs new file mode 100644 index 0000000000000..0907061d64a1b --- /dev/null +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/auxiliary/count.rs @@ -0,0 +1,14 @@ +// force-host +// edition: 2018 +// no-prefer-dynamic +#![crate_type = "proc-macro"] + +extern crate proc_macro; + +use proc_macro::TokenStream; +use std::str::FromStr; + +#[proc_macro] +pub fn number_of_tokens(_: TokenStream) -> TokenStream { + TokenStream::from_str("c\"\"").unwrap().into_iter().count().to_string().parse().unwrap() +} diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs index 3fc5fd481ea6d..5037396000bf0 100644 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs @@ -1,5 +1,4 @@ -// FIXME(c_str_literals): This should be `run-pass` -// known-bug: #113333 +// run-pass // edition: 2021 #![feature(c_str_literals)] diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr deleted file mode 100644 index 571c319d8c533..0000000000000 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr +++ /dev/null @@ -1,25 +0,0 @@ -error: prefix `c` is unknown - --> $DIR/basic.rs:8:27 - | -LL | assert_eq!(b"test\0", c"test".to_bytes_with_nul()); - | ^ unknown prefix - | - = note: prefixed identifiers and literals are reserved since Rust 2021 -help: consider inserting whitespace here - | -LL | assert_eq!(b"test\0", c "test".to_bytes_with_nul()); - | + - -error: no rules expected the token `"test"` - --> $DIR/basic.rs:8:28 - | -LL | assert_eq!(b"test\0", c"test".to_bytes_with_nul()); - | -^^^^^ - | | - | no rules expected this token in macro call - | help: missing comma here - | - = note: while trying to match sequence start - -error: aborting due to 2 previous errors - diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/edition-spans.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/edition-spans.rs new file mode 100644 index 0000000000000..b3557c71b744e --- /dev/null +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/edition-spans.rs @@ -0,0 +1,16 @@ +// even if this crate is edition 2021, proc macros compiled using older +// editions should still be able to observe the pre-2021 token behavior +// +// adapted from tests/ui/rust-2021/reserved-prefixes-via-macro.rs + +// edition: 2021 +// check-pass + +// aux-build: count.rs +extern crate count; + +const _: () = { + assert!(count::number_of_tokens!() == 2); +}; + +fn main() {} diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr index 8de36ca4a6edf..ea666e4330830 100644 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr @@ -1,32 +1,21 @@ -error: prefix `c` is unknown +error[E0658]: `c".."` literals are experimental --> $DIR/gate.rs:10:5 | LL | c"foo"; - | ^ unknown prefix + | ^^^^^^ | - = note: prefixed identifiers and literals are reserved since Rust 2021 -help: consider inserting whitespace here - | -LL | c "foo"; - | + + = note: see issue #105723 for more information + = help: add `#![feature(c_str_literals)]` to the crate attributes to enable -error: prefix `c` is unknown +error[E0658]: `c".."` literals are experimental --> $DIR/gate.rs:13:8 | LL | m!(c"test"); - | ^ unknown prefix - | - = note: prefixed identifiers and literals are reserved since Rust 2021 -help: consider inserting whitespace here + | ^^^^^^^ | -LL | m!(c "test"); - | + - -error: expected one of `!`, `.`, `::`, `;`, `?`, `{`, `}`, or an operator, found `"foo"` - --> $DIR/gate.rs:10:6 - | -LL | c"foo"; - | ^^^^^ expected one of 8 possible tokens + = note: see issue #105723 for more information + = help: add `#![feature(c_str_literals)]` to the crate attributes to enable -error: aborting due to 3 previous errors +error: aborting due to 2 previous errors +For more information about this error, try `rustc --explain E0658`. diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs index 96945f125da71..369173e23184e 100644 Binary files a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs and b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs differ diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr index 2226c7aa6a9ae..82d9f9cb32091 100644 Binary files a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr and b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr differ diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs index 066505c23dfc0..380445d7a7fb9 100644 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs @@ -1,5 +1,4 @@ -// FIXME(c_str_literals): This should be `run-pass` -// known-bug: #113333 +// run-pass // edition: 2021 #![feature(c_str_literals)] diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr deleted file mode 100644 index 47361fb61d271..0000000000000 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr +++ /dev/null @@ -1,38 +0,0 @@ -error: prefix `c` is unknown - --> $DIR/non-ascii.rs:9:9 - | -LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(), - | ^ unknown prefix - | - = note: prefixed identifiers and literals are reserved since Rust 2021 -help: consider inserting whitespace here - | -LL | c "\xEF\x80🦀\u{1F980}".to_bytes_with_nul(), - | + - -error: out of range hex escape - --> $DIR/non-ascii.rs:9:11 - | -LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(), - | ^^^^ must be a character in the range [\x00-\x7f] - -error: out of range hex escape - --> $DIR/non-ascii.rs:9:15 - | -LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(), - | ^^^^ must be a character in the range [\x00-\x7f] - -error: no rules expected the token `"\xEF\x80🦀\u{1F980}"` - --> $DIR/non-ascii.rs:9:10 - | -LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(), - | -^^^^^^^^^^^^^^^^^^^^ - | | - | no rules expected this token in macro call - | help: missing comma here - | -note: while trying to match `,` - --> $SRC_DIR/core/src/macros/mod.rs:LL:COL - -error: aborting due to 4 previous errors -