3
3
use core:: ascii:: EscapeDefault ;
4
4
5
5
use crate :: fmt:: { self , Write } ;
6
- use crate :: { ascii, iter, mem , ops} ;
6
+ use crate :: { ascii, iter, ops} ;
7
7
8
8
#[ cfg( not( test) ) ]
9
9
impl [ u8 ] {
@@ -297,14 +297,6 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
297
297
}
298
298
}
299
299
300
- /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
301
- /// from `../str/mod.rs`, which does something similar for utf8 validation.
302
- #[ inline]
303
- const fn contains_nonascii ( v : usize ) -> bool {
304
- const NONASCII_MASK : usize = usize:: repeat_u8 ( 0x80 ) ;
305
- ( NONASCII_MASK & v) != 0
306
- }
307
-
308
300
/// ASCII test *without* the chunk-at-a-time optimizations.
309
301
///
310
302
/// This is carefully structured to produce nice small code -- it's smaller in
@@ -335,9 +327,17 @@ pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
335
327
///
336
328
/// If any of these loads produces something for which `contains_nonascii`
337
329
/// (above) returns true, then we know the answer is false.
330
+ #[ cfg( not( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ) ]
338
331
#[ inline]
339
332
const fn is_ascii ( s : & [ u8 ] ) -> bool {
340
- const USIZE_SIZE : usize = mem:: size_of :: < usize > ( ) ;
333
+ /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
334
+ /// from `../str/mod.rs`, which does something similar for utf8 validation.
335
+ const fn contains_nonascii ( v : usize ) -> bool {
336
+ const NONASCII_MASK : usize = usize:: repeat_u8 ( 0x80 ) ;
337
+ ( NONASCII_MASK & v) != 0
338
+ }
339
+
340
+ const USIZE_SIZE : usize = size_of :: < usize > ( ) ;
341
341
342
342
let len = s. len ( ) ;
343
343
let align_offset = s. as_ptr ( ) . align_offset ( USIZE_SIZE ) ;
@@ -347,7 +347,7 @@ const fn is_ascii(s: &[u8]) -> bool {
347
347
//
348
348
// We also do this for architectures where `size_of::<usize>()` isn't
349
349
// sufficient alignment for `usize`, because it's a weird edge case.
350
- if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem :: align_of :: < usize > ( ) {
350
+ if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < align_of :: < usize > ( ) {
351
351
return is_ascii_simple ( s) ;
352
352
}
353
353
@@ -381,7 +381,7 @@ const fn is_ascii(s: &[u8]) -> bool {
381
381
// have alignment information it should have given a `usize::MAX` for
382
382
// `align_offset` earlier, sending things through the scalar path instead of
383
383
// this one, so this check should pass if it's reachable.
384
- debug_assert ! ( word_ptr. is_aligned_to( mem :: align_of:: <usize >( ) ) ) ;
384
+ debug_assert ! ( word_ptr. is_aligned_to( align_of:: <usize >( ) ) ) ;
385
385
386
386
// Read subsequent words until the last aligned word, excluding the last
387
387
// aligned word by itself to be done in tail check later, to ensure that
@@ -420,3 +420,48 @@ const fn is_ascii(s: &[u8]) -> bool {
420
420
421
421
!contains_nonascii ( last_word)
422
422
}
423
+
424
+ /// ASCII test optimized to use the `pmovmskb` instruction available on `x86-64`
425
+ /// platforms.
426
+ ///
427
+ /// Other platforms are not likely to benefit from this code structure, so they
428
+ /// use SWAR techniques to test for ASCII in `usize`-sized chunks.
429
+ #[ cfg( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ]
430
+ #[ inline]
431
+ const fn is_ascii ( bytes : & [ u8 ] ) -> bool {
432
+ // Process chunks of 32 bytes at a time in the fast path to enable
433
+ // auto-vectorization and use of `pmovmskb`. Two 128-bit vector registers
434
+ // can be OR'd together and then the resulting vector can be tested for
435
+ // non-ASCII bytes.
436
+ const CHUNK_SIZE : usize = 32 ;
437
+
438
+ let mut i = 0 ;
439
+
440
+ while i + CHUNK_SIZE <= bytes. len ( ) {
441
+ let chunk_end = i + CHUNK_SIZE ;
442
+
443
+ // Get LLVM to produce a `pmovmskb` instruction on x86-64 which
444
+ // creates a mask from the most significant bit of each byte.
445
+ // ASCII bytes are less than 128 (0x80), so their most significant
446
+ // bit is unset.
447
+ let mut count = 0 ;
448
+ while i < chunk_end {
449
+ count += bytes[ i] . is_ascii ( ) as u8 ;
450
+ i += 1 ;
451
+ }
452
+
453
+ // All bytes should be <= 127 so count is equal to chunk size.
454
+ if count != CHUNK_SIZE as u8 {
455
+ return false ;
456
+ }
457
+ }
458
+
459
+ // Process the remaining `bytes.len() % N` bytes.
460
+ let mut is_ascii = true ;
461
+ while i < bytes. len ( ) {
462
+ is_ascii &= bytes[ i] . is_ascii ( ) ;
463
+ i += 1 ;
464
+ }
465
+
466
+ is_ascii
467
+ }
0 commit comments