@@ -287,6 +287,24 @@ pub unsafe fn _mm_cvtepi8_epi64(a: i8x16) -> i64x2 {
287
287
}
288
288
*/
289
289
290
+ /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers
291
+ #[ inline( always) ]
292
+ #[ target_feature = "+sse4.1" ]
293
+ #[ cfg_attr( test, assert_instr( pmovsxwd) ) ]
294
+ pub unsafe fn _mm_cvtepi16_epi32 ( a : i16x8 ) -> i32x4 {
295
+ simd_cast :: < :: v64:: i16x4 , _ > ( simd_shuffle4 ( a, a, [ 0 , 1 , 2 , 3 ] ) )
296
+ }
297
+
298
+ /// Sign extend packed 16-bit integers in a to packed 64-bit integers
299
+ /*
300
+ #[inline(always)]
301
+ #[target_feature = "+sse4.1"]
302
+ #[cfg_attr(test, assert_instr(pmovsxwq))]
303
+ pub unsafe fn _mm_cvtepi16_epi64(a: i16x8) -> i64x2 {
304
+ simd_cast::<::v32::i16x2, _>(simd_shuffle2(a, a, [0, 1]))
305
+ }
306
+ */
307
+
290
308
/// Returns the dot product of two f64x2 vectors.
291
309
///
292
310
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -825,6 +843,31 @@ mod tests {
825
843
}
826
844
*/
827
845
846
+ #[ simd_test = "sse4.1" ]
847
+ unsafe fn _mm_cvtepi16_epi32 ( ) {
848
+ let a = i16x8:: splat ( 10 ) ;
849
+ let r = sse41:: _mm_cvtepi16_epi32 ( a) ;
850
+ let e = i32x4:: splat ( 10 ) ;
851
+ assert_eq ! ( r, e) ;
852
+ let a = i16x8:: splat ( -10 ) ;
853
+ let r = sse41:: _mm_cvtepi16_epi32 ( a) ;
854
+ let e = i32x4:: splat ( -10 ) ;
855
+ assert_eq ! ( r, e) ;
856
+ }
857
+
858
+ /*
859
+ #[simd_test = "sse4.1"]
860
+ unsafe fn _mm_cvtepi16_epi64() {
861
+ let a = i16x8::splat(10);
862
+ let r = sse41::_mm_cvtepi16_epi64(a);
863
+ let e = i64x2::splat(10);
864
+ assert_eq!(r, e);
865
+ let a = i16x8::splat(-10);
866
+ let r = sse41::_mm_cvtepi16_epi64(a);
867
+ let e = i64x2::splat(-10);
868
+ assert_eq!(r, e);
869
+ }
870
+ */
828
871
#[ simd_test = "sse4.1" ]
829
872
unsafe fn _mm_dp_pd ( ) {
830
873
let a = f64x2:: new ( 2.0 , 3.0 ) ;
0 commit comments