11#![ allow( unsafe_code) ]
2+ // May be unused if no architecture features are detected at compile time or runtime.
3+ #[ allow( unused_imports) ]
24use core:: mem:: transmute;
35
46// For when we want to make sure we have a texel at compile time based on bytemuck.
@@ -12,8 +14,12 @@ macro_rules! expect_texel {
1214 } ;
1315}
1416
17+ // May be unused if no architecture features are detected at compile time or runtime.
18+ #[ allow( dead_code) ]
1519#[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
1620mod x86_avx2;
21+ // May be unused if no architecture features are detected at compile time or runtime.
22+ #[ allow( dead_code) ]
1723#[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
1824mod x86_ssse3;
1925
@@ -30,29 +36,85 @@ pub(crate) struct ShuffleOps {
3036
3137impl ShuffleOps {
3238 /// FIXME(perf): implement and choose arch-specific shuffles.
39+ // May be unused if no architecture features are detected at compile time or runtime.
40+ #[ allow( unused_mut) ]
3341 pub fn with_arch ( mut self ) -> Self {
3442 #[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
43+ {
44+ self = self . with_x86 ( ) ;
45+ }
46+
47+ self
48+ }
49+
50+ #[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
51+ // May be unused if no architecture features are detected at compile time or runtime.
52+ #[ allow( unused_mut) ]
53+ fn with_x86 ( mut self ) -> Self {
54+ #[ cfg( target_feature = "ssse3" ) ]
55+ // SAFETY: `ssse3` detected at compile time
56+ unsafe {
57+ self = self . with_x86_ssse3 ( ) ;
58+ }
59+
60+ #[ cfg( not( target_feature = "ssse3" ) ) ]
61+ #[ cfg( feature = "runtime-features" ) ]
3562 if std:: is_x86_feature_detected!( "ssse3" ) {
36- self . shuffle_u8x4 = unsafe {
37- transmute :: < unsafe fn ( & mut [ [ u8 ; 4 ] ] , [ u8 ; 4 ] ) , _ > ( x86_ssse3:: shuffle_u8x4)
38- } ;
39- self . shuffle_u16x4 = unsafe {
40- transmute :: < unsafe fn ( & mut [ [ u16 ; 4 ] ] , [ u8 ; 4 ] ) , _ > ( x86_ssse3:: shuffle_u16x4)
41- } ;
63+ // SAFETY: `ssse3` detected at runtime
64+ unsafe {
65+ self = self . with_x86_ssse3 ( ) ;
66+ }
67+ }
68+
69+ #[ cfg( target_feature = "avx2" ) ]
70+ // SAFETY: `avx2` detected at compile time
71+ unsafe {
72+ self = self . with_x86_avx2 ( ) ;
73+ }
74+
75+ #[ cfg( not( target_feature = "avx2" ) ) ]
76+ #[ cfg( feature = "runtime-features" ) ]
77+ if std:: is_x86_feature_detected!( "avx2" ) {
78+ // SAFETY: `avx2` detected at runtime
79+ unsafe {
80+ self = self . with_x86_avx2 ( ) ;
81+ }
4282 }
4383
84+ self
85+ }
86+
87+ /// # Safety
88+ ///
89+ /// Must only be used when the `ssse3` feature is available.
90+ // May be unused if no architecture features are detected at compile time or runtime.
91+ #[ allow( dead_code) ]
92+ #[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
93+ unsafe fn with_x86_ssse3 ( mut self ) -> Self {
94+ self . shuffle_u8x4 =
95+ unsafe { transmute :: < unsafe fn ( & mut [ [ u8 ; 4 ] ] , [ u8 ; 4 ] ) , _ > ( x86_ssse3:: shuffle_u8x4) } ;
96+ self . shuffle_u16x4 = unsafe {
97+ transmute :: < unsafe fn ( & mut [ [ u16 ; 4 ] ] , [ u8 ; 4 ] ) , _ > ( x86_ssse3:: shuffle_u16x4)
98+ } ;
99+
100+ self
101+ }
102+
103+ /// # Safety
104+ ///
105+ /// Must only be used when the `avx2` feature is available.
106+ // May be unused if no architecture features are detected at compile time or runtime.
107+ #[ allow( dead_code) ]
108+ #[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
109+ unsafe fn with_x86_avx2 ( mut self ) -> Self {
44110 // Note: On Ivy Bridge these have the same *throughput* of 256bit-per-cycle as their SSSE3
45111 // equivalents until Icelake. With Icelake they are twice as fast at 512bit-per-cycle.
46112 // Therefore, we don't select them until we find a way to predict/select this.
47- #[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
48- if std:: is_x86_feature_detected!( "avx2" ) {
49- self . shuffle_u8x4 = unsafe {
50- transmute :: < unsafe fn ( & mut [ [ u8 ; 4 ] ] , [ u8 ; 4 ] ) , _ > ( x86_avx2:: shuffle_u8x4)
51- } ;
52- self . shuffle_u16x4 = unsafe {
53- transmute :: < unsafe fn ( & mut [ [ u16 ; 4 ] ] , [ u8 ; 4 ] ) , _ > ( x86_avx2:: shuffle_u16x4)
54- } ;
55- }
113+
114+ self . shuffle_u8x4 =
115+ unsafe { transmute :: < unsafe fn ( & mut [ [ u8 ; 4 ] ] , [ u8 ; 4 ] ) , _ > ( x86_avx2:: shuffle_u8x4) } ;
116+ self . shuffle_u16x4 =
117+ unsafe { transmute :: < unsafe fn ( & mut [ [ u16 ; 4 ] ] , [ u8 ; 4 ] ) , _ > ( x86_avx2:: shuffle_u16x4) } ;
56118
57119 self
58120 }
0 commit comments