Skip to content

Commit 9cf6fba

Browse files
committed
Auto merge of #31253 - ranma42:improve-unicode-iter-offset, r=brson
Improve computation of offset in `EscapeUnicode` Unify the computation of `offset` and use `leading_zeros` instead of manually scanning the bits. This PR removes some duplicated code and makes it a little simpler . The computation of `offset` is also faster, but it is unlikely to have an impact on actual code. (split from #31049)
2 parents c2aaad4 + 8984242 commit 9cf6fba

File tree

1 file changed

+32
-20
lines changed

1 file changed

+32
-20
lines changed

src/libcore/char.rs

+32-20
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,20 @@ impl CharExt for char {
299299

300300
#[inline]
301301
fn escape_unicode(self) -> EscapeUnicode {
302-
EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash }
302+
let c = self as u32;
303+
304+
// or-ing 1 ensures that for c==0 the code computes that one
305+
// digit should be printed and (which is the same) avoids the
306+
// (31 - 32) underflow
307+
let msb = 31 - (c | 1).leading_zeros();
308+
309+
// the index of the most significant hex digit
310+
let ms_hex_digit = msb / 4;
311+
EscapeUnicode {
312+
c: self,
313+
state: EscapeUnicodeState::Backslash,
314+
hex_digit_idx: ms_hex_digit as usize,
315+
}
303316
}
304317

305318
#[inline]
@@ -392,15 +405,20 @@ impl CharExt for char {
392405
#[stable(feature = "rust1", since = "1.0.0")]
393406
pub struct EscapeUnicode {
394407
c: char,
395-
state: EscapeUnicodeState
408+
state: EscapeUnicodeState,
409+
410+
// The index of the next hex digit to be printed (0 if none),
411+
// i.e. the number of remaining hex digits to be printed;
412+
// increasing from the least significant digit: 0x543210
413+
hex_digit_idx: usize,
396414
}
397415

398416
#[derive(Clone, Debug)]
399417
enum EscapeUnicodeState {
400418
Backslash,
401419
Type,
402420
LeftBrace,
403-
Value(usize),
421+
Value,
404422
RightBrace,
405423
Done,
406424
}
@@ -420,19 +438,16 @@ impl Iterator for EscapeUnicode {
420438
Some('u')
421439
}
422440
EscapeUnicodeState::LeftBrace => {
423-
let mut n = 0;
424-
while (self.c as u32) >> (4 * (n + 1)) != 0 {
425-
n += 1;
426-
}
427-
self.state = EscapeUnicodeState::Value(n);
441+
self.state = EscapeUnicodeState::Value;
428442
Some('{')
429443
}
430-
EscapeUnicodeState::Value(offset) => {
431-
let c = from_digit(((self.c as u32) >> (offset * 4)) & 0xf, 16).unwrap();
432-
if offset == 0 {
444+
EscapeUnicodeState::Value => {
445+
let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
446+
let c = from_digit(hex_digit, 16).unwrap();
447+
if self.hex_digit_idx == 0 {
433448
self.state = EscapeUnicodeState::RightBrace;
434449
} else {
435-
self.state = EscapeUnicodeState::Value(offset - 1);
450+
self.hex_digit_idx -= 1;
436451
}
437452
Some(c)
438453
}
@@ -445,18 +460,15 @@ impl Iterator for EscapeUnicode {
445460
}
446461

447462
fn size_hint(&self) -> (usize, Option<usize>) {
448-
let mut n = 0;
449-
while (self.c as usize) >> (4 * (n + 1)) != 0 {
450-
n += 1;
451-
}
452463
let n = match self.state {
453-
EscapeUnicodeState::Backslash => n + 5,
454-
EscapeUnicodeState::Type => n + 4,
455-
EscapeUnicodeState::LeftBrace => n + 3,
456-
EscapeUnicodeState::Value(offset) => offset + 2,
464+
EscapeUnicodeState::Backslash => 5,
465+
EscapeUnicodeState::Type => 4,
466+
EscapeUnicodeState::LeftBrace => 3,
467+
EscapeUnicodeState::Value => 2,
457468
EscapeUnicodeState::RightBrace => 1,
458469
EscapeUnicodeState::Done => 0,
459470
};
471+
let n = n + self.hex_digit_idx;
460472
(n, Some(n))
461473
}
462474
}

0 commit comments

Comments
 (0)