Skip to content

Commit 517534e

Browse files
committed
Auto merge of rust-lang#117116 - calebzulawski:repr-simd-packed, r=workingjubilee
Implement repr(packed) for repr(simd) This allows creating vectors with non-power-of-2 lengths that do not have padding. See rust-lang/portable-simd#319
2 parents 1a3aa4a + c623489 commit 517534e

File tree

4 files changed

+115
-3
lines changed

4 files changed

+115
-3
lines changed

compiler/rustc_codegen_llvm/src/intrinsic.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use crate::value::Value;
1010
use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
1111
use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
1212
use rustc_codegen_ssa::errors::{ExpectedPointerMutability, InvalidMonomorphization};
13-
use rustc_codegen_ssa::mir::operand::OperandRef;
13+
use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
1414
use rustc_codegen_ssa::mir::place::PlaceRef;
1515
use rustc_codegen_ssa::traits::*;
1616
use rustc_hir as hir;
@@ -946,6 +946,13 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
946946
tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), callee_ty.fn_sig(tcx));
947947
let arg_tys = sig.inputs();
948948

949+
// Vectors must be immediates (non-power-of-2 #[repr(packed)] are not)
950+
for (ty, arg) in arg_tys.iter().zip(args) {
951+
if ty.is_simd() && !matches!(arg.val, OperandValue::Immediate(_)) {
952+
return_error!(InvalidMonomorphization::SimdArgument { span, name, ty: *ty });
953+
}
954+
}
955+
949956
if name == sym::simd_select_bitmask {
950957
let (len, _) = require_simd!(arg_tys[1], SimdArgument);
951958

compiler/rustc_ty_utils/src/layout.rs

+16-2
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,21 @@ fn layout_of_uncached<'tcx>(
435435
.size
436436
.checked_mul(e_len, dl)
437437
.ok_or_else(|| error(cx, LayoutError::SizeOverflow(ty)))?;
438-
let align = dl.vector_align(size);
438+
439+
let (abi, align) = if def.repr().packed() && !e_len.is_power_of_two() {
440+
// Non-power-of-two vectors have padding up to the next power-of-two.
441+
// If we're a packed repr, remove the padding while keeping the alignment as close
442+
// to a vector as possible.
443+
(
444+
Abi::Aggregate { sized: true },
445+
AbiAndPrefAlign {
446+
abi: Align::max_for_offset(size),
447+
pref: dl.vector_align(size).pref,
448+
},
449+
)
450+
} else {
451+
(Abi::Vector { element: e_abi, count: e_len }, dl.vector_align(size))
452+
};
439453
let size = size.align_to(align.abi);
440454

441455
// Compute the placement of the vector fields:
@@ -448,7 +462,7 @@ fn layout_of_uncached<'tcx>(
448462
tcx.mk_layout(LayoutS {
449463
variants: Variants::Single { index: FIRST_VARIANT },
450464
fields,
451-
abi: Abi::Vector { element: e_abi, count: e_len },
465+
abi,
452466
largest_niche: e_ly.largest_niche,
453467
size,
454468
align,

tests/codegen/simd/repr-packed.rs

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// compile-flags: -C no-prepopulate-passes
2+
3+
#![crate_type = "lib"]
4+
#![feature(repr_simd, platform_intrinsics)]
5+
6+
#[repr(simd, packed)]
7+
pub struct Simd<T, const N: usize>([T; N]);
8+
9+
#[repr(simd)]
10+
#[derive(Copy, Clone)]
11+
pub struct FullSimd<T, const N: usize>([T; N]);
12+
13+
extern "platform-intrinsic" {
14+
fn simd_mul<T>(a: T, b: T) -> T;
15+
}
16+
17+
// non-powers-of-two have padding and need to be expanded to full vectors
18+
fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> {
19+
unsafe {
20+
let mut tmp = core::mem::MaybeUninit::<FullSimd<T, N>>::uninit();
21+
std::ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1);
22+
tmp.assume_init()
23+
}
24+
}
25+
26+
// CHECK-LABEL: @square_packed
27+
#[no_mangle]
28+
pub fn square_packed(x: Simd<f32, 3>) -> FullSimd<f32, 3> {
29+
// CHECK: align 4 dereferenceable(12) %x
30+
let x = load(x);
31+
unsafe { simd_mul(x, x) }
32+
}

tests/ui/simd/repr_packed.rs

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// run-pass
2+
3+
#![feature(repr_simd, platform_intrinsics)]
4+
#![allow(non_camel_case_types)]
5+
6+
#[repr(simd, packed)]
7+
struct Simd<T, const N: usize>([T; N]);
8+
9+
#[repr(simd)]
10+
struct FullSimd<T, const N: usize>([T; N]);
11+
12+
fn check_size_align<T, const N: usize>() {
13+
use std::mem;
14+
assert_eq!(mem::size_of::<Simd<T, N>>(), mem::size_of::<[T; N]>());
15+
assert_eq!(mem::size_of::<Simd<T, N>>() % mem::align_of::<Simd<T, N>>(), 0);
16+
}
17+
18+
fn check_ty<T>() {
19+
check_size_align::<T, 1>();
20+
check_size_align::<T, 2>();
21+
check_size_align::<T, 3>();
22+
check_size_align::<T, 4>();
23+
check_size_align::<T, 8>();
24+
check_size_align::<T, 9>();
25+
check_size_align::<T, 15>();
26+
}
27+
28+
extern "platform-intrinsic" {
29+
fn simd_add<T>(a: T, b: T) -> T;
30+
}
31+
32+
fn main() {
33+
check_ty::<u8>();
34+
check_ty::<i16>();
35+
check_ty::<u32>();
36+
check_ty::<i64>();
37+
check_ty::<usize>();
38+
check_ty::<f32>();
39+
check_ty::<f64>();
40+
41+
unsafe {
42+
// powers-of-two have no padding and work as usual
43+
let x: Simd<f64, 4> =
44+
simd_add(Simd::<f64, 4>([0., 1., 2., 3.]), Simd::<f64, 4>([2., 2., 2., 2.]));
45+
assert_eq!(std::mem::transmute::<_, [f64; 4]>(x), [2., 3., 4., 5.]);
46+
47+
// non-powers-of-two have padding and need to be expanded to full vectors
48+
fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> {
49+
unsafe {
50+
let mut tmp = core::mem::MaybeUninit::<FullSimd<T, N>>::uninit();
51+
std::ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1);
52+
tmp.assume_init()
53+
}
54+
}
55+
let x: FullSimd<f64, 3> =
56+
simd_add(load(Simd::<f64, 3>([0., 1., 2.])), load(Simd::<f64, 3>([2., 2., 2.])));
57+
assert_eq!(x.0, [2., 3., 4.]);
58+
}
59+
}

0 commit comments

Comments
 (0)