diff --git a/src/lib.rs b/src/lib.rs index 70eee9a..4b3140b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -116,7 +116,7 @@ mod snippet; /// This is important for untrusted input, as it can contain /// invalid unicode sequences. pub fn normalize_untrusted_str(s: &str) -> String { - renderer::normalize_whitespace(s) + renderer::normalize_whitespace(s, false) } #[doc(inline)] diff --git a/src/renderer/mod.rs b/src/renderer/mod.rs index 2a2a3c2..e4fa3c0 100644 --- a/src/renderer/mod.rs +++ b/src/renderer/mod.rs @@ -111,6 +111,7 @@ pub struct Renderer { decor_style: DecorStyle, stylesheet: Stylesheet, short_message: bool, + force_ascii: bool, } impl Renderer { @@ -122,6 +123,7 @@ impl Renderer { decor_style: DecorStyle::Ascii, stylesheet: Stylesheet::plain(), short_message: false, + force_ascii: false, } } @@ -190,6 +192,12 @@ impl Renderer { self.anonymized_line_numbers = anonymized_line_numbers; self } + + /// Makes it so that *every* non-ASCII character in the terminal output is replaced. + pub const fn force_ascii(mut self) -> Self { + self.force_ascii = true; + self + } } impl Renderer { diff --git a/src/renderer/render.rs b/src/renderer/render.rs index 2bbadd7..43a0a09 100644 --- a/src/renderer/render.rs +++ b/src/renderer/render.rs @@ -30,7 +30,7 @@ pub(crate) fn render(renderer: &Renderer, groups: Report<'_>) -> String { if renderer.short_message { render_short_message(renderer, groups).unwrap() } else { - let (max_line_num, og_primary_path, groups) = pre_process(groups); + let (max_line_num, og_primary_path, groups) = pre_process(groups, renderer.force_ascii); let max_line_num_len = if renderer.anonymized_line_numbers { ANONYMIZED_LINE_NUM.len() } else { @@ -270,7 +270,7 @@ fn render_short_message(renderer: &Renderer, groups: &[Group<'_>]) -> Result margin.term_width * 2 && width > (MIN_PAD * 2 + margin_width) { // If the terminal is *too* small, we keep at least a tiny bit of the span for // display. @@ -1446,6 +1455,7 @@ fn emit_suggestion_default( is_cont: bool, ) { let buffer_offset = buffer.num_lines(); + let force_ascii = renderer.force_ascii; let mut row_num = buffer_offset + usize::from(!matches_previous_suggestion); let (complete, parts, highlights) = spliced_lines; let is_multiline = complete.lines().count() > 1; @@ -1513,7 +1523,7 @@ fn emit_suggestion_default( buffer.puts( row_num - 1 + line - line_start.line, max_line_num_len + 3, - &normalize_whitespace(sm.get_line(line).unwrap()), + &normalize_whitespace(sm.get_line(line).unwrap(), force_ascii), ElementStyle::Removal, ); } @@ -1574,7 +1584,7 @@ fn emit_suggestion_default( } let placeholder = renderer.decor_style.margin(); - let padding = str_width(placeholder); + let padding = str_width(placeholder, force_ascii); buffer.puts( row_num, max_line_num_len.saturating_sub(padding), @@ -1642,11 +1652,14 @@ fn emit_suggestion_default( }; // ...or trailing spaces. Account for substitutions containing unicode // characters. - let sub_len: usize = str_width(if is_whitespace_addition { - &part.replacement - } else { - part.replacement.trim() - }); + let sub_len: usize = str_width( + if is_whitespace_addition { + &part.replacement + } else { + part.replacement.trim() + }, + force_ascii, + ); let offset: isize = offsets .iter() @@ -1723,7 +1736,7 @@ fn emit_suggestion_default( // logic to show the whole prior snippet, but the current output is not // too bad to begin with, so we side-step that issue here. for (i, line) in snippet.lines().enumerate() { - let norm_line = normalize_whitespace(line); + let norm_line = normalize_whitespace(line, force_ascii); // Going lower than buffer_offset (+ 1) would mean // overwriting existing content in the buffer let min_row = buffer_offset + usize::from(!matches_previous_suggestion); @@ -1772,7 +1785,7 @@ fn emit_suggestion_default( } // length of the code after substitution - let full_sub_len = str_width(&part.replacement) as isize; + let full_sub_len = str_width(&part.replacement, force_ascii) as isize; // length of the code to be substituted let snippet_len = span_end_pos as isize - span_start_pos as isize; @@ -1787,7 +1800,7 @@ fn emit_suggestion_default( // if we elided some lines, add an ellipsis if lines.next().is_some() { let placeholder = renderer.decor_style.margin(); - let padding = str_width(placeholder); + let padding = str_width(placeholder, force_ascii); buffer.puts( row_num, max_line_num_len.saturating_sub(padding), @@ -1822,6 +1835,7 @@ fn draw_code_line( file_lines: &[&LineInfo<'_>], is_multiline: bool, ) { + let force_ascii = renderer.force_ascii; if let DisplaySuggestion::Diff = show_code_change { // We need to print more than one line if the span we need to remove is multiline. // For more info: https://github.com/rust-lang/rust/issues/92741 @@ -1839,7 +1853,7 @@ fn draw_code_line( "- ", ElementStyle::Removal, ); - let line = normalize_whitespace(line_to_remove.line); + let line = normalize_whitespace(line_to_remove.line, force_ascii); buffer.puts( *row_num - 1, max_line_num_len + 3, @@ -1873,7 +1887,7 @@ fn draw_code_line( buffer.puts( *row_num - 1, max_line_num_len + 3, - &normalize_whitespace(last_line.line), + &normalize_whitespace(last_line.line, force_ascii), ElementStyle::NoStyle, ); if line_to_add.trim().is_empty() { @@ -1901,7 +1915,7 @@ fn draw_code_line( buffer.puts(*row_num, max_line_num_len + 1, "+ ", ElementStyle::Addition); buffer.append( *row_num, - &normalize_whitespace(line_to_add), + &normalize_whitespace(line_to_add, force_ascii), ElementStyle::NoStyle, ); } @@ -1939,7 +1953,7 @@ fn draw_code_line( buffer.puts( *row_num, max_line_num_len + 3, - &normalize_whitespace(line_to_add), + &normalize_whitespace(line_to_add, force_ascii), ElementStyle::NoStyle, ); } else if let DisplaySuggestion::Add = show_code_change { @@ -1952,7 +1966,7 @@ fn draw_code_line( buffer.puts(*row_num, max_line_num_len + 1, "+ ", ElementStyle::Addition); buffer.append( *row_num, - &normalize_whitespace(line_to_add), + &normalize_whitespace(line_to_add, force_ascii), ElementStyle::NoStyle, ); } else { @@ -1965,7 +1979,7 @@ fn draw_code_line( draw_col_separator(renderer, buffer, *row_num, max_line_num_len + 1); buffer.append( *row_num, - &normalize_whitespace(line_to_add), + &normalize_whitespace(line_to_add, force_ascii), ElementStyle::NoStyle, ); } @@ -2002,7 +2016,8 @@ fn draw_line( ) -> usize { // Tabs are assumed to have been replaced by spaces in calling code. debug_assert!(!source_string.contains('\t')); - let line_len = str_width(source_string); + let force_ascii = renderer.force_ascii; + let line_len = str_width(source_string, force_ascii); // Create the source line we will highlight. let mut left = margin.left(line_len); let right = margin.right(line_len); @@ -2012,7 +2027,7 @@ fn draw_line( let code: String = source_string .chars() .skip_while(|ch| { - let w = char_width(*ch); + let w = char_width(*ch, force_ascii); // If `skipped` is less than `left`, always skip the next `ch`, // even if `ch` is a multi-width char that would make `skipped` // exceed `left`. This ensures that we do not exceed term width on @@ -2026,7 +2041,7 @@ fn draw_line( }) .take_while(|ch| { // Make sure that the trimming on the right will fall within the terminal width. - taken += char_width(*ch); + taken += char_width(*ch, force_ascii); taken <= (right - left) }) .collect(); @@ -2035,13 +2050,13 @@ fn draw_line( left += skipped - left; } let placeholder = renderer.decor_style.margin(); - let padding = str_width(placeholder); + let padding = str_width(placeholder, force_ascii); let (width_taken, bytes_taken) = if margin.was_cut_left() { // We have stripped some code/whitespace from the beginning, make it clear. let mut bytes_taken = 0; let mut width_taken = 0; for ch in code.chars() { - width_taken += char_width(ch); + width_taken += char_width(ch, force_ascii); bytes_taken += ch.len_utf8(); if width_taken >= padding { @@ -2072,7 +2087,7 @@ fn draw_line( let mut char_taken = 0; let mut width_taken_inner = 0; for ch in code.chars().rev() { - width_taken_inner += char_width(ch); + width_taken_inner += char_width(ch, force_ascii); char_taken += 1; if width_taken_inner >= padding { @@ -2317,11 +2332,11 @@ fn num_decimal_digits(num: usize) -> usize { MAX_DIGITS } -fn str_width(s: &str) -> usize { - s.chars().map(char_width).sum() +fn str_width(s: &str, force_ascii: bool) -> usize { + s.chars().map(|ch| char_width(ch, force_ascii)).sum() } -pub(crate) fn char_width(ch: char) -> usize { +pub(crate) fn char_width(ch: char, force_ascii: bool) -> usize { // FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` is. For now, // just accept that sometimes the code line will be longer than desired. match ch { @@ -2335,7 +2350,20 @@ pub(crate) fn char_width(ch: char) -> usize { | '\u{0014}' | '\u{0015}' | '\u{0016}' | '\u{0017}' | '\u{0018}' | '\u{0019}' | '\u{001A}' | '\u{001B}' | '\u{001C}' | '\u{001D}' | '\u{001E}' | '\u{001F}' | '\u{007F}' | '\u{202A}' | '\u{202B}' | '\u{202D}' | '\u{202E}' | '\u{2066}' - | '\u{2067}' | '\u{2068}' | '\u{202C}' | '\u{2069}' => 1, + | '\u{2067}' | '\u{2068}' | '\u{202C}' | '\u{2069}' + if !force_ascii => + { + 1 + } + _ if force_ascii && !(32..=126).contains(&(ch as u32)) => { + if let Ok(i) = OUTPUT_REPLACEMENTS_ASCII.binary_search_by_key(&ch, |(k, _)| *k) + && let Some((_, replacement)) = OUTPUT_REPLACEMENTS_ASCII.get(i) + { + replacement.len() + } else { + 3 // + } + } _ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1), } } @@ -2490,10 +2518,53 @@ impl DisplaySuggestion { // We replace some characters so the CLI output is always consistent and underlines aligned. // Keep the following list in sync with `rustc_span::char_width`. +const OUTPUT_REPLACEMENTS_ASCII: &[(char, &str)] = &[ + ('\0', ""), + ('\u{0001}', ""), + ('\u{0002}', ""), + ('\u{0003}', ""), + ('\u{0004}', ""), + ('\u{0005}', ""), + ('\u{0006}', ""), + ('\u{0007}', ""), + ('\u{0008}', ""), + ('\t', " "), // We do our own tab replacement + ('\u{000b}', ""), + ('\u{000c}', ""), + ('\u{000d}', ""), + ('\u{000e}', ""), + ('\u{000f}', ""), + ('\u{0010}', ""), + ('\u{0011}', ""), + ('\u{0012}', ""), + ('\u{0013}', ""), + ('\u{0014}', ""), + ('\u{0015}', ""), + ('\u{0016}', ""), + ('\u{0017}', ""), + ('\u{0018}', ""), + ('\u{0019}', ""), + ('\u{001a}', ""), + ('\u{001b}', ""), + ('\u{001c}', ""), + ('\u{001d}', ""), + ('\u{001e}', ""), + ('\u{001f}', ""), + ('\u{007f}', ""), + ('\u{200d}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters. + ('\u{202a}', ""), // The following unicode text flow control characters are inconsistently + ('\u{202b}', ""), // supported across CLIs and can cause confusion due to the bytes on disk + ('\u{202c}', ""), // not corresponding to the visible source code, so we replace them always. + ('\u{202d}', ""), + ('\u{202e}', ""), + ('\u{2066}', ""), + ('\u{2067}', ""), + ('\u{2068}', ""), + ('\u{2069}', ""), +]; const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ // In terminals without Unicode support the following will be garbled, but in *all* terminals - // the underlying codepoint will be as well. We could gate this replacement behind a "unicode - // support" gate. + // the underlying codepoint will be as well. In such terminals, use the `force_ascii` mode. ('\0', "␀"), ('\u{0001}', "␁"), ('\u{0002}', "␂"), @@ -2538,13 +2609,19 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ ('\u{2069}', "�"), ]; -pub(crate) fn normalize_whitespace(s: &str) -> String { +pub(crate) fn normalize_whitespace(s: &str, force_ascii: bool) -> String { + let replacements = if force_ascii { + OUTPUT_REPLACEMENTS_ASCII + } else { + OUTPUT_REPLACEMENTS + }; // Scan the input string for a character in the ordered table above. // If it's present, replace it with its alternative string (it can be more than 1 char!). // Otherwise, retain the input char. s.chars().fold(String::with_capacity(s.len()), |mut s, c| { - match OUTPUT_REPLACEMENTS.binary_search_by_key(&c, |(k, _)| *k) { - Ok(i) => s.push_str(OUTPUT_REPLACEMENTS[i].1), + match replacements.binary_search_by_key(&c, |(k, _)| *k) { + Ok(i) => s.push_str(replacements[i].1), + _ if force_ascii && !(32..=126).contains(&(c as u32)) => s.push_str(""), _ => s.push(c), } s @@ -2641,6 +2718,7 @@ enum PreProcessedElement<'a> { fn pre_process<'a>( groups: &'a [Group<'a>], + force_ascii: bool, ) -> (usize, Option<&'a Cow<'a, str>>, Vec>) { let mut max_line_num = 0; let mut og_primary_path = None; @@ -2655,7 +2733,7 @@ fn pre_process<'a>( elements.push(PreProcessedElement::Message(message)); } Element::Cause(cause) => { - let sm = SourceMap::new(&cause.source, cause.line_start); + let sm = SourceMap::new(&cause.source, cause.line_start, force_ascii); let (depth, annotated_lines) = sm.annotated_lines(cause.markers.clone(), cause.fold); @@ -2686,7 +2764,7 @@ fn pre_process<'a>( elements.push(PreProcessedElement::Cause((cause, sm, annotated_lines))); } Element::Suggestion(suggestion) => { - let sm = SourceMap::new(&suggestion.source, suggestion.line_start); + let sm = SourceMap::new(&suggestion.source, suggestion.line_start, force_ascii); if let Some((complete, patches, highlights)) = sm.splice_lines(suggestion.markers.clone(), suggestion.fold) { diff --git a/src/renderer/source_map.rs b/src/renderer/source_map.rs index 8f602e1..01851fb 100644 --- a/src/renderer/source_map.rs +++ b/src/renderer/source_map.rs @@ -11,10 +11,11 @@ use crate::{Annotation, AnnotationKind, Patch}; pub(crate) struct SourceMap<'a> { lines: Vec>, pub(crate) source: &'a str, + force_ascii: bool, } impl<'a> SourceMap<'a> { - pub(crate) fn new(source: &'a str, line_start: usize) -> Self { + pub(crate) fn new(source: &'a str, line_start: usize, force_ascii: bool) -> Self { // Empty sources do have a "line", but it is empty, so we need to add // a line with an empty string to the source map. if source.is_empty() { @@ -27,6 +28,7 @@ impl<'a> SourceMap<'a> { end_line_size: 0, }], source, + force_ascii, }; } @@ -51,6 +53,7 @@ impl<'a> SourceMap<'a> { Self { lines: mapping, source, + force_ascii, } } @@ -71,7 +74,7 @@ impl<'a> SourceMap<'a> { [0..(span.start - start_info.start_byte).min(start_info.line.len())] .chars() .fold((0, 0), |(char_pos, byte_pos), c| { - let display = char_width(c); + let display = char_width(c, self.force_ascii); (char_pos + 1, byte_pos + display) }); // correct the char pos if we are highlighting the end of a line @@ -98,7 +101,7 @@ impl<'a> SourceMap<'a> { [0..(span.end - end_info.start_byte).min(end_info.line.len())] .chars() .fold((0, 0), |(char_pos, byte_pos), c| { - let display = char_width(c); + let display = char_width(c, self.force_ascii); (char_pos + 1, byte_pos + display) }); diff --git a/tests/color/highlight_source_multi_width_chars.forced_ascii.term.svg b/tests/color/highlight_source_multi_width_chars.forced_ascii.term.svg new file mode 100644 index 0000000..10a3cc9 --- /dev/null +++ b/tests/color/highlight_source_multi_width_chars.forced_ascii.term.svg @@ -0,0 +1,30 @@ + + + + + + + | + + 1 | [lorem ipsum](<?><?><?><?>) + + | ^^^^^^^^^^^^ + + + + diff --git a/tests/color/highlight_source_multi_width_chars.rs b/tests/color/highlight_source_multi_width_chars.rs index 84acd1d..cf4e3a4 100644 --- a/tests/color/highlight_source_multi_width_chars.rs +++ b/tests/color/highlight_source_multi_width_chars.rs @@ -10,6 +10,11 @@ fn case() { .annotation(AnnotationKind::Primary.span(14..26).highlight_source(true)), )]; + let expected_forced_ascii = + file!["highlight_source_multi_width_chars.forced_ascii.term.svg": TermSvg]; + let renderer = Renderer::styled().force_ascii(); + assert_data_eq!(renderer.render(report), expected_forced_ascii); + let expected_ascii = file!["highlight_source_multi_width_chars.ascii.term.svg": TermSvg]; let renderer = Renderer::styled(); assert_data_eq!(renderer.render(report), expected_ascii); diff --git a/tests/formatter.rs b/tests/formatter.rs index 8f2d474..fb9807c 100644 --- a/tests/formatter.rs +++ b/tests/formatter.rs @@ -8,15 +8,26 @@ use snapbox::{assert_data_eq, str}; #[test] fn test_i_29() { let input = &[Level::ERROR.primary_title("oops").element( - Snippet::source("First line\r\nSecond oops line") + Snippet::source("First line\r\nSecond oops \u{0001} line") .path("") .annotation(AnnotationKind::Primary.span(19..23).label("oops")), )]; + let expected_forced_ascii = str![[r#" +error: oops + --> :2:8 + | +2 | Second oops line + | ^^^^ oops +"#]]; + + let renderer = Renderer::plain().force_ascii(); + assert_data_eq!(renderer.render(input), expected_forced_ascii); + let expected_ascii = str![[r#" error: oops --> :2:8 | -2 | Second oops line +2 | Second oops ␁ line | ^^^^ oops "#]]; @@ -27,7 +38,7 @@ error: oops error: oops ╭▸ :2:8 │ -2 │ Second oops line +2 │ Second oops ␁ line ╰╴ ━━━━ oops "#]]; let renderer = renderer.decor_style(DecorStyle::Unicode); @@ -3324,6 +3335,24 @@ fn foo() { .element(Level::NOTE.message("this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info)")), ]; + let expected_forced_ascii = str![[r#" +error: couldn't read `$DIR/not-utf8.bin`: stream did not contain valid UTF-8 + --> $DIR/not-utf8.rs:6:5 + | +6 | include!("not-utf8.bin"); + | ^^^^^^^^^^^^^^^^^^^^^^^^ + | +note: byte `193` is not valid utf-8 + --> $DIR/not-utf8.bin:1:1 + | +1 | |!5cciWWj'}JWO@wVLO $DIR/not-utf8.rs:6:5