@@ -1045,6 +1045,7 @@ impl SourceFile {
1045
1045
mut src : String ,
1046
1046
start_pos : BytePos ) -> Result < SourceFile , OffsetOverflowError > {
1047
1047
remove_bom ( & mut src) ;
1048
+ normalize_newlines ( & mut src) ;
1048
1049
1049
1050
let src_hash = {
1050
1051
let mut hasher: StableHasher < u128 > = StableHasher :: new ( ) ;
@@ -1212,6 +1213,61 @@ fn remove_bom(src: &mut String) {
1212
1213
}
1213
1214
}
1214
1215
1216
+
1217
+ /// Replaces `\r\n` with `\n` in-place in `src`.
1218
+ ///
1219
+ /// Returns error if there's a lone `\r` in the string
1220
+ fn normalize_newlines ( src : & mut String ) {
1221
+ if !src. as_bytes ( ) . contains ( & b'\r' ) {
1222
+ return ;
1223
+ }
1224
+
1225
+ // We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
1226
+ // While we *can* call `as_mut_vec` and do surgery on the live string
1227
+ // directly, let's rather steal the contents of `src`. This makes the code
1228
+ // safe even if a panic occurs.
1229
+
1230
+ let mut buf = std:: mem:: replace ( src, String :: new ( ) ) . into_bytes ( ) ;
1231
+ let mut gap_len = 0 ;
1232
+ let mut tail = buf. as_mut_slice ( ) ;
1233
+ loop {
1234
+ let idx = match find_crlf ( & tail[ gap_len..] ) {
1235
+ None => tail. len ( ) ,
1236
+ Some ( idx) => idx + gap_len,
1237
+ } ;
1238
+ tail. copy_within ( gap_len..idx, 0 ) ;
1239
+ tail = & mut tail[ idx - gap_len..] ;
1240
+ if tail. len ( ) == gap_len {
1241
+ break ;
1242
+ }
1243
+ gap_len += 1 ;
1244
+ }
1245
+
1246
+ // Account for removed `\r`.
1247
+ // After `set_len`, `buf` is guaranteed to contain utf-8 again.
1248
+ let new_len = buf. len ( ) - gap_len;
1249
+ unsafe {
1250
+ buf. set_len ( new_len) ;
1251
+ * src = String :: from_utf8_unchecked ( buf) ;
1252
+ }
1253
+
1254
+ fn find_crlf ( src : & [ u8 ] ) -> Option < usize > {
1255
+ let mut search_idx = 0 ;
1256
+ while let Some ( idx) = find_cr ( & src[ search_idx..] ) {
1257
+ if src[ search_idx..] . get ( idx + 1 ) != Some ( & b'\n' ) {
1258
+ search_idx += idx + 1 ;
1259
+ continue ;
1260
+ }
1261
+ return Some ( search_idx + idx) ;
1262
+ }
1263
+ None
1264
+ }
1265
+
1266
+ fn find_cr ( src : & [ u8 ] ) -> Option < usize > {
1267
+ src. iter ( ) . position ( |& b| b == b'\r' )
1268
+ }
1269
+ }
1270
+
1215
1271
// _____________________________________________________________________________
1216
1272
// Pos, BytePos, CharPos
1217
1273
//
0 commit comments