Skip to content

Commit 8c35c4f

Browse files
p32bloBurntSushi
authored andcommitted
Add _mm_cvtepi16_epi32 and _mm_cvtepi16_epi64 (commented)
1 parent 44bc687 commit 8c35c4f

File tree

1 file changed

+43
-0
lines changed

1 file changed

+43
-0
lines changed

src/x86/sse41.rs

+43
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,24 @@ pub unsafe fn _mm_cvtepi8_epi64(a: i8x16) -> i64x2 {
287287
}
288288
*/
289289

290+
/// Sign extend packed 16-bit integers in `a` to packed 32-bit integers
291+
#[inline(always)]
292+
#[target_feature = "+sse4.1"]
293+
#[cfg_attr(test, assert_instr(pmovsxwd))]
294+
pub unsafe fn _mm_cvtepi16_epi32(a: i16x8) -> i32x4 {
295+
simd_cast::<::v64::i16x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3]))
296+
}
297+
298+
/// Sign extend packed 16-bit integers in a to packed 64-bit integers
299+
/*
300+
#[inline(always)]
301+
#[target_feature = "+sse4.1"]
302+
#[cfg_attr(test, assert_instr(pmovsxwq))]
303+
pub unsafe fn _mm_cvtepi16_epi64(a: i16x8) -> i64x2 {
304+
simd_cast::<::v32::i16x2, _>(simd_shuffle2(a, a, [0, 1]))
305+
}
306+
*/
307+
290308
/// Returns the dot product of two f64x2 vectors.
291309
///
292310
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -825,6 +843,31 @@ mod tests {
825843
}
826844
*/
827845

846+
#[simd_test = "sse4.1"]
847+
unsafe fn _mm_cvtepi16_epi32() {
848+
let a = i16x8::splat(10);
849+
let r = sse41::_mm_cvtepi16_epi32(a);
850+
let e = i32x4::splat(10);
851+
assert_eq!(r, e);
852+
let a = i16x8::splat(-10);
853+
let r = sse41::_mm_cvtepi16_epi32(a);
854+
let e = i32x4::splat(-10);
855+
assert_eq!(r, e);
856+
}
857+
858+
/*
859+
#[simd_test = "sse4.1"]
860+
unsafe fn _mm_cvtepi16_epi64() {
861+
let a = i16x8::splat(10);
862+
let r = sse41::_mm_cvtepi16_epi64(a);
863+
let e = i64x2::splat(10);
864+
assert_eq!(r, e);
865+
let a = i16x8::splat(-10);
866+
let r = sse41::_mm_cvtepi16_epi64(a);
867+
let e = i64x2::splat(-10);
868+
assert_eq!(r, e);
869+
}
870+
*/
828871
#[simd_test = "sse4.1"]
829872
unsafe fn _mm_dp_pd() {
830873
let a = f64x2::new(2.0, 3.0);

0 commit comments

Comments
 (0)