|
1 | 1 | use core::convert::TryFrom;
|
2 |
| -use core::{char, fmt, iter, mem}; |
| 2 | +use core::{char, fmt, iter, mem, str}; |
3 | 3 |
|
4 | 4 | #[allow(unused_macros)]
|
5 | 5 | macro_rules! write {
|
@@ -287,6 +287,84 @@ impl<'s> HexNibbles<'s> {
|
287 | 287 | }
|
288 | 288 | Some(v)
|
289 | 289 | }
|
| 290 | + |
| 291 | + /// Decode a UTF-8 byte sequence (with each byte using a pair of nibbles) |
| 292 | + /// into individual `char`s, returning `None` for invalid UTF-8. |
| 293 | + fn try_parse_str_chars(&self) -> Option<impl Iterator<Item = char> + 's> { |
| 294 | + if self.nibbles.len() % 2 != 0 { |
| 295 | + return None; |
| 296 | + } |
| 297 | + |
| 298 | + // FIXME(eddyb) use `array_chunks` instead, when that becomes stable. |
| 299 | + let mut bytes = self |
| 300 | + .nibbles |
| 301 | + .as_bytes() |
| 302 | + .chunks_exact(2) |
| 303 | + .map(|slice| match slice { |
| 304 | + [a, b] => [a, b], |
| 305 | + _ => unreachable!(), |
| 306 | + }) |
| 307 | + .map(|[&hi, &lo]| { |
| 308 | + let half = |nibble: u8| (nibble as char).to_digit(16).unwrap() as u8; |
| 309 | + (half(hi) << 4) | half(lo) |
| 310 | + }); |
| 311 | + |
| 312 | + let chars = iter::from_fn(move || { |
| 313 | + // As long as there are any bytes left, there's at least one more |
| 314 | + // UTF-8-encoded `char` to decode (or the possibility of error). |
| 315 | + bytes.next().map(|first_byte| -> Result<char, ()> { |
| 316 | + // FIXME(eddyb) this `enum` and `fn` should be somewhere in `core`. |
| 317 | + enum Utf8FirstByteError { |
| 318 | + ContinuationByte, |
| 319 | + TooLong, |
| 320 | + } |
| 321 | + fn utf8_len_from_first_byte(byte: u8) -> Result<usize, Utf8FirstByteError> { |
| 322 | + match byte { |
| 323 | + 0x00..=0x7f => Ok(1), |
| 324 | + 0x80..=0xbf => Err(Utf8FirstByteError::ContinuationByte), |
| 325 | + 0xc0..=0xdf => Ok(2), |
| 326 | + 0xe0..=0xef => Ok(3), |
| 327 | + 0xf0..=0xf7 => Ok(4), |
| 328 | + 0xf8..=0xff => Err(Utf8FirstByteError::TooLong), |
| 329 | + } |
| 330 | + } |
| 331 | + |
| 332 | + // Collect the appropriate amount of bytes (up to 4), according |
| 333 | + // to the UTF-8 length implied by the first byte. |
| 334 | + let utf8_len = utf8_len_from_first_byte(first_byte).map_err(|_| ())?; |
| 335 | + let utf8 = &mut [first_byte, 0, 0, 0][..utf8_len]; |
| 336 | + for i in 1..utf8_len { |
| 337 | + utf8[i] = bytes.next().ok_or(())?; |
| 338 | + } |
| 339 | + |
| 340 | + // Fully validate the UTF-8 sequence. |
| 341 | + let s = str::from_utf8(utf8).map_err(|_| ())?; |
| 342 | + |
| 343 | + // Since we included exactly one UTF-8 sequence, and validation |
| 344 | + // succeeded, `str::chars` should return exactly one `char`. |
| 345 | + let mut chars = s.chars(); |
| 346 | + match (chars.next(), chars.next()) { |
| 347 | + (Some(c), None) => Ok(c), |
| 348 | + _ => unreachable!( |
| 349 | + "str::from_utf8({:?}) = {:?} was expected to have 1 char, \ |
| 350 | + but {} chars were found", |
| 351 | + utf8, |
| 352 | + s, |
| 353 | + s.chars().count() |
| 354 | + ), |
| 355 | + } |
| 356 | + }) |
| 357 | + }); |
| 358 | + |
| 359 | + // HACK(eddyb) doing a separate validation iteration like this might be |
| 360 | + // wasteful, but it's easier to avoid starting to print a string literal |
| 361 | + // in the first place, than to abort it mid-string. |
| 362 | + if chars.clone().any(|r| r.is_err()) { |
| 363 | + None |
| 364 | + } else { |
| 365 | + Some(chars.map(Result::unwrap)) |
| 366 | + } |
| 367 | + } |
290 | 368 | }
|
291 | 369 |
|
292 | 370 | fn basic_type(tag: u8) -> Option<&'static str> {
|
@@ -1006,6 +1084,18 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
|
1006 | 1084 |
|
1007 | 1085 | parse!(self, push_depth);
|
1008 | 1086 |
|
| 1087 | + // Only literals (and the names of `const` generic parameters, but they |
| 1088 | + // don't get mangled at all), can appear in generic argument position |
| 1089 | + // without any disambiguation, all other expressions require braces. |
| 1090 | + // To avoid duplicating the mapping between `tag` and what syntax gets |
| 1091 | + // used (especially any special-casing), every case that needs braces |
| 1092 | + // has to call `open_brace(self)?` (and the closing brace is automatic). |
| 1093 | + let mut opened_brace = false; |
| 1094 | + let mut open_brace = |this: &mut Self| { |
| 1095 | + opened_brace = true; |
| 1096 | + this.print("{") |
| 1097 | + }; |
| 1098 | + |
1009 | 1099 | match tag {
|
1010 | 1100 | b'p' => self.print("_")?,
|
1011 | 1101 |
|
@@ -1033,13 +1123,29 @@ impl<'a, 'b, 's> Printer<'a, 'b, 's> {
|
1033 | 1123 | None => invalid!(self),
|
1034 | 1124 | }
|
1035 | 1125 | }
|
| 1126 | + b'e' => { |
| 1127 | + // NOTE(eddyb) a string literal `"..."` has type `&str`, so |
| 1128 | + // to get back the type `str`, `*"..."` syntax is needed |
| 1129 | + // (even if that may not be valid in Rust itself). |
| 1130 | + open_brace(self)?; |
| 1131 | + self.print("*")?; |
| 1132 | + |
| 1133 | + match parse!(self, hex_nibbles).try_parse_str_chars() { |
| 1134 | + Some(chars) => self.print_quoted_escaped_chars('"', chars)?, |
| 1135 | + None => invalid!(self), |
| 1136 | + } |
| 1137 | + } |
1036 | 1138 |
|
1037 | 1139 | b'B' => {
|
1038 | 1140 | self.print_backref(Self::print_const)?;
|
1039 | 1141 | }
|
1040 | 1142 | _ => invalid!(self),
|
1041 | 1143 | }
|
1042 | 1144 |
|
| 1145 | + if opened_brace { |
| 1146 | + self.print("}")?; |
| 1147 | + } |
| 1148 | + |
1043 | 1149 | self.pop_depth();
|
1044 | 1150 | Ok(())
|
1045 | 1151 | }
|
@@ -1164,6 +1270,24 @@ mod tests {
|
1164 | 1270 | t_const!("c2202_", "'∂'");
|
1165 | 1271 | }
|
1166 | 1272 |
|
| 1273 | + #[test] |
| 1274 | + fn demangle_const_str() { |
| 1275 | + t_const!("e616263_", "{*\"abc\"}"); |
| 1276 | + t_const!("e27_", r#"{*"'"}"#); |
| 1277 | + t_const!("e090a_", "{*\"\\t\\n\"}"); |
| 1278 | + t_const!("ee28882c3bc_", "{*\"∂ü\"}"); |
| 1279 | + t_const!( |
| 1280 | + "ee183a1e18390e183ade1839be18394e1839ae18390e183935fe18392e18394e1839b\ |
| 1281 | + e183a0e18398e18394e1839ae183985fe183a1e18390e18393e18398e1839ae18398_", |
| 1282 | + "{*\"საჭმელად_გემრიელი_სადილი\"}" |
| 1283 | + ); |
| 1284 | + t_const!( |
| 1285 | + "ef09f908af09fa688f09fa686f09f90ae20c2a720f09f90b6f09f9192e298\ |
| 1286 | + 95f09f94a520c2a720f09fa7a1f09f929bf09f929af09f9299f09f929c_", |
| 1287 | + "{*\"🐊🦈🦆🐮 § 🐶👒☕🔥 § 🧡💛💚💙💜\"}" |
| 1288 | + ); |
| 1289 | + } |
| 1290 | + |
1167 | 1291 | #[test]
|
1168 | 1292 | fn demangle_exponential_explosion() {
|
1169 | 1293 | // NOTE(eddyb) because of the prefix added by `t_nohash_type!` is
|
|
0 commit comments