3
3
use core:: ascii:: EscapeDefault ;
4
4
5
5
use crate :: fmt:: { self , Write } ;
6
- use crate :: { ascii, iter, mem , ops} ;
6
+ use crate :: { ascii, iter, ops} ;
7
7
8
8
#[ cfg( not( test) ) ]
9
9
impl [ u8 ] {
@@ -297,14 +297,6 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
297
297
}
298
298
}
299
299
300
- /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
301
- /// from `../str/mod.rs`, which does something similar for utf8 validation.
302
- #[ inline]
303
- const fn contains_nonascii ( v : usize ) -> bool {
304
- const NONASCII_MASK : usize = usize:: repeat_u8 ( 0x80 ) ;
305
- ( NONASCII_MASK & v) != 0
306
- }
307
-
308
300
/// ASCII test *without* the chunk-at-a-time optimizations.
309
301
///
310
302
/// This is carefully structured to produce nice small code -- it's smaller in
@@ -323,100 +315,39 @@ pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
323
315
bytes. is_empty ( )
324
316
}
325
317
326
- /// Optimized ASCII test that will use usize-at-a-time operations instead of
327
- /// byte-at-a-time operations (when possible).
328
- ///
329
- /// The algorithm we use here is pretty simple. If `s` is too short, we just
330
- /// check each byte and be done with it. Otherwise:
331
- ///
332
- /// - Read the first word with an unaligned load.
333
- /// - Align the pointer, read subsequent words until end with aligned loads.
334
- /// - Read the last `usize` from `s` with an unaligned load.
335
- ///
336
- /// If any of these loads produces something for which `contains_nonascii`
337
- /// (above) returns true, then we know the answer is false.
338
318
#[ inline]
339
- const fn is_ascii ( s : & [ u8 ] ) -> bool {
340
- const USIZE_SIZE : usize = mem:: size_of :: < usize > ( ) ;
341
-
342
- let len = s. len ( ) ;
343
- let align_offset = s. as_ptr ( ) . align_offset ( USIZE_SIZE ) ;
344
-
345
- // If we wouldn't gain anything from the word-at-a-time implementation, fall
346
- // back to a scalar loop.
347
- //
348
- // We also do this for architectures where `size_of::<usize>()` isn't
349
- // sufficient alignment for `usize`, because it's a weird edge case.
350
- if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem:: align_of :: < usize > ( ) {
351
- return is_ascii_simple ( s) ;
352
- }
353
-
354
- // We always read the first word unaligned, which means `align_offset` is
355
- // 0, we'd read the same value again for the aligned read.
356
- let offset_to_aligned = if align_offset == 0 { USIZE_SIZE } else { align_offset } ;
357
-
358
- let start = s. as_ptr ( ) ;
359
- // SAFETY: We verify `len < USIZE_SIZE` above.
360
- let first_word = unsafe { ( start as * const usize ) . read_unaligned ( ) } ;
361
-
362
- if contains_nonascii ( first_word) {
363
- return false ;
364
- }
365
- // We checked this above, somewhat implicitly. Note that `offset_to_aligned`
366
- // is either `align_offset` or `USIZE_SIZE`, both of are explicitly checked
367
- // above.
368
- debug_assert ! ( offset_to_aligned <= len) ;
369
-
370
- // SAFETY: word_ptr is the (properly aligned) usize ptr we use to read the
371
- // middle chunk of the slice.
372
- let mut word_ptr = unsafe { start. add ( offset_to_aligned) as * const usize } ;
373
-
374
- // `byte_pos` is the byte index of `word_ptr`, used for loop end checks.
375
- let mut byte_pos = offset_to_aligned;
376
-
377
- // Paranoia check about alignment, since we're about to do a bunch of
378
- // unaligned loads. In practice this should be impossible barring a bug in
379
- // `align_offset` though.
380
- // While this method is allowed to spuriously fail in CTFE, if it doesn't
381
- // have alignment information it should have given a `usize::MAX` for
382
- // `align_offset` earlier, sending things through the scalar path instead of
383
- // this one, so this check should pass if it's reachable.
384
- debug_assert ! ( word_ptr. is_aligned_to( mem:: align_of:: <usize >( ) ) ) ;
385
-
386
- // Read subsequent words until the last aligned word, excluding the last
387
- // aligned word by itself to be done in tail check later, to ensure that
388
- // tail is always one `usize` at most to extra branch `byte_pos == len`.
389
- while byte_pos < len - USIZE_SIZE {
390
- // Sanity check that the read is in bounds
391
- debug_assert ! ( byte_pos + USIZE_SIZE <= len) ;
392
- // And that our assumptions about `byte_pos` hold.
393
- debug_assert ! ( matches!(
394
- word_ptr. cast:: <u8 >( ) . guaranteed_eq( start. wrapping_add( byte_pos) ) ,
395
- // These are from the same allocation, so will hopefully always be
396
- // known to match even in CTFE, but if it refuses to compare them
397
- // that's ok since it's just a debug check anyway.
398
- None | Some ( true ) ,
399
- ) ) ;
319
+ const fn is_ascii ( bytes : & [ u8 ] ) -> bool {
320
+ // Constant chosen to enable `pmovmskb` instruction on x86-64
321
+ const N : usize = 32 ;
322
+
323
+ let mut i = 0 ;
324
+
325
+ while i + N <= bytes. len ( ) {
326
+ let chunk_end = i + N ;
327
+
328
+ // Get LLVM to produce a `pmovmskb` instruction on x86-64 which
329
+ // creates a mask from the most significant bit of each byte.
330
+ // ASCII bytes are less than 128 (0x80), so their most significant
331
+ // bit is unset. Thus, detecting non-ASCII bytes can be done in one
332
+ // instruction.
333
+ let mut count = 0 ;
334
+ while i < chunk_end {
335
+ count += ( bytes[ i] <= 127 ) as u8 ;
336
+ i += 1 ;
337
+ }
400
338
401
- // SAFETY: We know `word_ptr` is properly aligned (because of
402
- // `align_offset`), and we know that we have enough bytes between `word_ptr` and the end
403
- let word = unsafe { word_ptr. read ( ) } ;
404
- if contains_nonascii ( word) {
339
+ // All bytes should be <= 127 so count is equal to chunk size.
340
+ if count != N as u8 {
405
341
return false ;
406
342
}
407
-
408
- byte_pos += USIZE_SIZE ;
409
- // SAFETY: We know that `byte_pos <= len - USIZE_SIZE`, which means that
410
- // after this `add`, `word_ptr` will be at most one-past-the-end.
411
- word_ptr = unsafe { word_ptr. add ( 1 ) } ;
412
343
}
413
344
414
- // Sanity check to ensure there really is only one `usize` left. This should
415
- // be guaranteed by our loop condition.
416
- debug_assert ! ( byte_pos <= len && len - byte_pos <= USIZE_SIZE ) ;
417
-
418
- // SAFETY: This relies on `len >= USIZE_SIZE`, which we check at the start.
419
- let last_word = unsafe { ( start . add ( len - USIZE_SIZE ) as * const usize ) . read_unaligned ( ) } ;
345
+ // Process the remaining `bytes.len() % N` bytes.
346
+ let mut is_ascii = true ;
347
+ while i < bytes . len ( ) {
348
+ is_ascii &= bytes [ i ] <= 127 ;
349
+ i += 1 ;
350
+ }
420
351
421
- ! contains_nonascii ( last_word )
352
+ is_ascii
422
353
}
0 commit comments