Skip to content

Commit 370d4ce

Browse files
rope: Micro optimize the creation of masks (#41132)
Using compiler explorer I saw that the compiler wasn't clever enough to optimise away the branches in the masking code. I thought the compiler would have a better chance if we always branched, which [turned out to be the case](https://godbolt.org/z/PM594Pz18). Running the benchmarks the biggest benefit I saw was: ``` push/65536 time: [2.9067 ms 2.9243 ms 2.9417 ms] thrpt: [21.246 MiB/s 21.373 MiB/s 21.502 MiB/s] change: time: [-8.3452% -7.2617% -6.2009%] (p = 0.00 < 0.05) thrpt: [+6.6108% +7.8303% +9.1050%] Performance has improved. ``` But I did also see some regressions: ``` slice/4096 time: [66.195 µs 66.815 µs 67.448 µs] thrpt: [57.915 MiB/s 58.464 MiB/s 59.012 MiB/s] change: time: [+3.7131% +5.1698% +6.6971%] (p = 0.00 < 0.05) thrpt: [-6.2768% -4.9157% -3.5802%] Performance has regressed. ``` Release Notes: - N/A
1 parent 2284131 commit 370d4ce

File tree

1 file changed

+15
-20
lines changed

1 file changed

+15
-20
lines changed

crates/rope/src/chunk.rs

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,16 @@ pub struct Chunk {
3232
pub text: ArrayString<MAX_BASE>,
3333
}
3434

35+
#[inline(always)]
36+
const fn saturating_shl_mask(offset: u32) -> Bitmap {
37+
(1 as Bitmap).unbounded_shl(offset).wrapping_sub(1)
38+
}
39+
40+
#[inline(always)]
41+
const fn saturating_shr_mask(offset: u32) -> Bitmap {
42+
!Bitmap::MAX.unbounded_shr(offset)
43+
}
44+
3545
impl Chunk {
3646
pub const MASK_BITS: usize = Bitmap::BITS as usize;
3747

@@ -291,34 +301,19 @@ impl<'a> ChunkSlice<'a> {
291301
/// Get number of chars in first line
292302
#[inline(always)]
293303
pub fn first_line_chars(&self) -> u32 {
294-
if self.newlines == 0 {
295-
self.chars.count_ones()
296-
} else {
297-
let mask = ((1 as Bitmap) << self.newlines.trailing_zeros()) - 1;
298-
(self.chars & mask).count_ones()
299-
}
304+
(self.chars & saturating_shl_mask(self.newlines.trailing_zeros())).count_ones()
300305
}
301306

302307
/// Get number of chars in last line
303308
#[inline(always)]
304309
pub fn last_line_chars(&self) -> u32 {
305-
if self.newlines == 0 {
306-
self.chars.count_ones()
307-
} else {
308-
let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
309-
(self.chars & mask).count_ones()
310-
}
310+
(self.chars & saturating_shr_mask(self.newlines.leading_zeros())).count_ones()
311311
}
312312

313313
/// Get number of UTF-16 code units in last line
314314
#[inline(always)]
315315
pub fn last_line_len_utf16(&self) -> u32 {
316-
if self.newlines == 0 {
317-
self.chars_utf16.count_ones()
318-
} else {
319-
let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
320-
(self.chars_utf16 & mask).count_ones()
321-
}
316+
(self.chars_utf16 & saturating_shr_mask(self.newlines.leading_zeros())).count_ones()
322317
}
323318

324319
/// Get the longest row in the chunk and its length in characters.
@@ -492,8 +487,8 @@ impl<'a> ChunkSlice<'a> {
492487

493488
#[inline(always)]
494489
pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
495-
let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
496-
let row = (self.newlines & mask).count_ones();
490+
let mask = saturating_shl_mask(offset as u32);
491+
let row = (self.newlines & saturating_shl_mask(offset as u32)).count_ones();
497492
let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
498493
let column = if newline_ix as usize == MAX_BASE {
499494
0

0 commit comments

Comments
 (0)