Skip to content

Commit 33c076e

Browse files
committed
Inspect the first byte before skipping ASCII chunk
To reduce the chance getting into ASCII bypass but without making any progress, thus reduce branch flipping.
1 parent 0056ad2 commit 33c076e

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

library/core/src/str/validations.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,10 @@ fn run_utf8_validation_rt(bytes: &[u8]) -> Result<(), Utf8Error> {
356356

357357
while i + MAIN_CHUNK_SIZE <= bytes.len() {
358358
// Fast path: if the current state is ACCEPT, we can skip to the next non-ASCII chunk.
359-
if st == ST_ACCEPT {
359+
// We also did a quick inspection on the first byte to avoid getting into this path at all
360+
// when handling strings with almost no ASCII, eg. Chinese scripts.
361+
// SAFETY: `i` is inbound.
362+
if st == ST_ACCEPT && unsafe { *bytes.get_unchecked(i) } < 0x80 {
360363
// SAFETY: `i` is inbound.
361364
let rest = unsafe { bytes.get_unchecked(i..) };
362365
let mut ascii_chunks = rest.array_chunks::<ASCII_CHUNK_SIZE>();

0 commit comments

Comments
 (0)