Skip to content

Commit 004f3ac

Browse files
committed
convert \r\n to \n when loading files
1 parent 60960a2 commit 004f3ac

File tree

2 files changed

+76
-0
lines changed

2 files changed

+76
-0
lines changed

src/libsyntax_pos/lib.rs

+56
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,7 @@ impl SourceFile {
10451045
mut src: String,
10461046
start_pos: BytePos) -> Result<SourceFile, OffsetOverflowError> {
10471047
remove_bom(&mut src);
1048+
normalize_newlines(&mut src);
10481049

10491050
let src_hash = {
10501051
let mut hasher: StableHasher<u128> = StableHasher::new();
@@ -1212,6 +1213,61 @@ fn remove_bom(src: &mut String) {
12121213
}
12131214
}
12141215

1216+
1217+
/// Replaces `\r\n` with `\n` in-place in `src`.
1218+
///
1219+
/// Returns error if there's a lone `\r` in the string
1220+
fn normalize_newlines(src: &mut String) {
1221+
if !src.as_bytes().contains(&b'\r') {
1222+
return;
1223+
}
1224+
1225+
// We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
1226+
// While we *can* call `as_mut_vec` and do surgery on the live string
1227+
// directly, let's rather steal the contents of `src`. This makes the code
1228+
// safe even if a panic occurs.
1229+
1230+
let mut buf = std::mem::replace(src, String::new()).into_bytes();
1231+
let mut gap_len = 0;
1232+
let mut tail = buf.as_mut_slice();
1233+
loop {
1234+
let idx = match find_crlf(&tail[gap_len..]) {
1235+
None => tail.len(),
1236+
Some(idx) => idx + gap_len,
1237+
};
1238+
tail.copy_within(gap_len..idx, 0);
1239+
tail = &mut tail[idx - gap_len..];
1240+
if tail.len() == gap_len {
1241+
break;
1242+
}
1243+
gap_len += 1;
1244+
}
1245+
1246+
// Account for removed `\r`.
1247+
// After `set_len`, `buf` is guaranteed to contain utf-8 again.
1248+
let new_len = buf.len() - gap_len;
1249+
unsafe {
1250+
buf.set_len(new_len);
1251+
*src = String::from_utf8_unchecked(buf);
1252+
}
1253+
1254+
fn find_crlf(src: &[u8]) -> Option<usize> {
1255+
let mut search_idx = 0;
1256+
while let Some(idx) = find_cr(&src[search_idx..]) {
1257+
if src[search_idx..].get(idx + 1) != Some(&b'\n') {
1258+
search_idx += idx + 1;
1259+
continue;
1260+
}
1261+
return Some(search_idx + idx);
1262+
}
1263+
None
1264+
}
1265+
1266+
fn find_cr(src: &[u8]) -> Option<usize> {
1267+
src.iter().position(|&b| b == b'\r')
1268+
}
1269+
}
1270+
12151271
// _____________________________________________________________________________
12161272
// Pos, BytePos, CharPos
12171273
//

src/libsyntax_pos/tests.rs

+20
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,23 @@ fn test_lookup_line() {
1616
assert_eq!(lookup_line(lines, BytePos(28)), 2);
1717
assert_eq!(lookup_line(lines, BytePos(29)), 2);
1818
}
19+
20+
#[test]
21+
fn test_normalize_newlines() {
22+
fn check(before: &str, after: &str) {
23+
let mut actual = before.to_string();
24+
normalize_newlines(&mut actual);
25+
assert_eq!(actual.as_str(), after);
26+
}
27+
check("", "");
28+
check("\n", "\n");
29+
check("\r", "\r");
30+
check("\r\r", "\r\r");
31+
check("\r\n", "\n");
32+
check("hello world", "hello world");
33+
check("hello\nworld", "hello\nworld");
34+
check("hello\r\nworld", "hello\nworld");
35+
check("\r\nhello\r\nworld\r\n", "\nhello\nworld\n");
36+
check("\r\r\n", "\r\n");
37+
check("hello\rworld", "hello\rworld");
38+
}

0 commit comments

Comments
 (0)