Skip to content

Commit f13b4b4

Browse files
committed
c2rust-transpile: improve asm syntax variant heuristic
remove comments before searching for sigils to determine if asm is AT&T or Intel syntax
1 parent 52de819 commit f13b4b4

File tree

1 file changed

+36
-2
lines changed

1 file changed

+36
-2
lines changed

c2rust-transpile/src/translator/assembly.rs

+36-2
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,35 @@ fn rewrite_reserved_reg_operands(att_syntax: bool, arch: Arch,
368368
(prolog, epilog)
369369
}
370370

371+
/// Removes comments from an x86 assembly template.
372+
fn remove_comments(mut asm: &str) -> String {
373+
// Remove C-style comments
374+
let mut without_c_comments = String::with_capacity(asm.len());
375+
while let Some(comment_begin) = asm.find("/*") {
376+
let comment_len = asm[comment_begin..]
377+
.find("*/")
378+
// Comments with no terminator extend to the end of the string
379+
.unwrap_or(asm[comment_begin..].len());
380+
let before_comment = &asm[..comment_begin];
381+
without_c_comments.push_str(before_comment);
382+
asm = &asm[comment_begin + comment_len..];
383+
}
384+
// Push whatever is left after the final comment
385+
without_c_comments.push_str(asm);
386+
387+
// Remove EOL comments from each line
388+
let mut without_comments = String::with_capacity(without_c_comments.len());
389+
for line in without_c_comments.lines() {
390+
if let Some(line_comment_idx) = line.find("#") {
391+
without_comments.push_str(&line[..line_comment_idx]);
392+
} else {
393+
without_comments.push_str(line);
394+
}
395+
without_comments.push('\n');
396+
}
397+
without_comments
398+
}
399+
371400
fn asm_is_att_syntax(asm: &str) -> bool {
372401
// For GCC, AT&T syntax is default... unless -masm=intel is passed. This
373402
// means we can hope but not guarantee that x86 asm with no syntax directive
@@ -377,6 +406,11 @@ fn asm_is_att_syntax(asm: &str) -> bool {
377406
// As the rust x86 default is intel syntax, we need to emit the "att_syntax"
378407
// option if we get a hint that this asm uses AT&T syntax.
379408

409+
// First, remove comments, so we can look at only the semantically
410+
// significant parts of the asm template.
411+
let asm = &*remove_comments(asm);
412+
413+
// Look for syntax directives.
380414
let intel_directive = asm.find(".intel_syntax");
381415
let att_directive = asm.find(".att_syntax");
382416
if let (Some(intel_pos), Some(att_pos)) = (intel_directive, att_directive) {
@@ -391,8 +425,8 @@ fn asm_is_att_syntax(asm: &str) -> bool {
391425
} else if asm.contains("word ptr") {
392426
false
393427
} else {
394-
// Guess based on sigils used in AT&T assembly. This would be more
395-
// robust if it stripped comments first.
428+
// Guess based on sigils used in AT&T assembly:
429+
// $ for constants, % for registers, and ( for address calculations
396430
asm.contains('$') || asm.contains('%') || asm.contains('(')
397431
}
398432
}

0 commit comments

Comments
 (0)