Skip to content

Commit 016ca6e

Browse files
authoredAug 13, 2019
Merge pull request rust-lang#29 from eddyb/legacy-unescape
legacy: unescape all $u...$ that encode non-control Unicode codepoints.
2 parents de656cd + c3c684e commit 016ca6e

File tree

1 file changed

+62
-46
lines changed

1 file changed

+62
-46
lines changed
 

Diff for: ‎src/legacy.rs

+62-46
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use core::char;
12
use core::fmt;
23

34
/// Representation of a demangled symbol name.
@@ -133,7 +134,7 @@ impl<'a> fmt::Display for Demangle<'a> {
133134
if rest.starts_with("_$") {
134135
rest = &rest[1..];
135136
}
136-
while !rest.is_empty() {
137+
loop {
137138
if rest.starts_with('.') {
138139
if let Some('.') = rest[1..].chars().next() {
139140
try!(f.write_str("::"));
@@ -143,55 +144,54 @@ impl<'a> fmt::Display for Demangle<'a> {
143144
rest = &rest[1..];
144145
}
145146
} else if rest.starts_with('$') {
146-
macro_rules! demangle {
147-
($($pat:expr => $demangled:expr,)*) => ({
148-
$(if rest.starts_with($pat) {
149-
try!(f.write_str($demangled));
150-
rest = &rest[$pat.len()..];
151-
} else)*
152-
{
153-
try!(f.write_str(rest));
154-
break;
155-
}
156-
157-
})
158-
}
147+
let (escape, after_escape) = if let Some(end) = rest[1..].find('$') {
148+
(&rest[1..end + 1], &rest[end + 2..])
149+
} else {
150+
break;
151+
};
159152

160-
// see src/librustc/back/link.rs for these mappings
161-
demangle! {
162-
"$SP$" => "@",
163-
"$BP$" => "*",
164-
"$RF$" => "&",
165-
"$LT$" => "<",
166-
"$GT$" => ">",
167-
"$LP$" => "(",
168-
"$RP$" => ")",
169-
"$C$" => ",",
170-
171-
// in theory we can demangle any Unicode code point, but
172-
// for simplicity we just catch the common ones.
173-
"$u7e$" => "~",
174-
"$u20$" => " ",
175-
"$u27$" => "'",
176-
"$u3d$" => "=",
177-
"$u5b$" => "[",
178-
"$u5d$" => "]",
179-
"$u7b$" => "{",
180-
"$u7d$" => "}",
181-
"$u3b$" => ";",
182-
"$u2b$" => "+",
183-
"$u21$" => "!",
184-
"$u22$" => "\"",
185-
}
186-
} else {
187-
let idx = match rest.char_indices().find(|&(_, c)| c == '$' || c == '.') {
188-
None => rest.len(),
189-
Some((i, _)) => i,
153+
// see src/librustc_codegen_utils/symbol_names/legacy.rs for these mappings
154+
let unescaped = match escape {
155+
"SP" => "@",
156+
"BP" => "*",
157+
"RF" => "&",
158+
"LT" => "<",
159+
"GT" => ">",
160+
"LP" => "(",
161+
"RP" => ")",
162+
"C" => ",",
163+
164+
_ => {
165+
if escape.starts_with('u') {
166+
let digits = &escape[1..];
167+
let all_lower_hex = digits.chars().all(|c| match c {
168+
'0'...'9' | 'a'...'f' => true,
169+
_ => false,
170+
});
171+
let c = u32::from_str_radix(digits, 16).ok()
172+
.and_then(char::from_u32);
173+
if let (true, Some(c)) = (all_lower_hex, c) {
174+
// FIXME(eddyb) do we need to filter out control codepoints?
175+
if !c.is_control() {
176+
try!(c.fmt(f));
177+
rest = after_escape;
178+
continue;
179+
}
180+
}
181+
}
182+
break;
183+
}
190184
};
191-
try!(f.write_str(&rest[..idx]));
192-
rest = &rest[idx..];
185+
try!(f.write_str(unescaped));
186+
rest = after_escape;
187+
} else if let Some(i) = rest.find(|c| c == '$' || c == '.') {
188+
try!(f.write_str(&rest[..i]));
189+
rest = &rest[i..];
190+
} else {
191+
break;
193192
}
194193
}
194+
try!(f.write_str(rest));
195195
}
196196

197197
Ok(())
@@ -367,4 +367,20 @@ mod tests {
367367
"<core::result::Result<!, E> as std::process::Termination>::report::hfc41d0da4a40b3e8"
368368
);
369369
}
370+
371+
#[test]
372+
fn demangle_utf8_idents() {
373+
t_nohash!(
374+
"_ZN11utf8_idents157_$u10e1$$u10d0$$u10ed$$u10db$$u10d4$$u10da$$u10d0$$u10d3$_$u10d2$$u10d4$$u10db$$u10e0$$u10d8$$u10d4$$u10da$$u10d8$_$u10e1$$u10d0$$u10d3$$u10d8$$u10da$$u10d8$17h21634fd5714000aaE",
375+
"utf8_idents::საჭმელად_გემრიელი_სადილი"
376+
);
377+
}
378+
379+
#[test]
380+
fn demangle_issue_60925() {
381+
t_nohash!(
382+
"_ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h059a991a004536adE",
383+
"issue_60925::foo::Foo<issue_60925::llvm::Foo>::foo"
384+
);
385+
}
370386
}

0 commit comments

Comments
 (0)
Please sign in to comment.