Skip to content

Commit 547c468

Browse files
committed
[WIP] KnownLayout::validate_size_align
TODO: Tests
1 parent a494e2b commit 547c468

File tree

2 files changed

+260
-0
lines changed

2 files changed

+260
-0
lines changed

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ optional = true
6969
zerocopy-derive = { version = "=0.7.3", path = "zerocopy-derive" }
7070

7171
[dev-dependencies]
72+
assert_matches = "1.5"
73+
itertools = "0.11"
7274
rand = { version = "0.8.5", features = ["small_rng"] }
7375
rustversion = "1.0"
7476
static_assertions = "1.1"

src/lib.rs

+258
Original file line numberDiff line numberDiff line change
@@ -219,12 +219,21 @@ pub struct DstLayout {
219219
/// `size_of::<T>()`. For DSTs, the size represents the size of the type
220220
/// when the trailing slice field contains 0 elements.
221221
/// - For all types, the alignment represents the alignment of the type.
222+
// TODO: If we end up replacing this with separate size and alignment to
223+
// make Kani happy, file an issue to eventually adopt the stdlib's
224+
// `Alignment` type trick.
222225
_base_layout: Layout,
223226
/// For sized types, `None`. For DSTs, the size of the element type of the
224227
/// trailing slice.
225228
_trailing_slice_elem_size: Option<usize>,
226229
}
227230

231+
#[cfg_attr(test, derive(Copy, Clone, Debug))]
232+
enum _CastType {
233+
_Prefix,
234+
_Suffix,
235+
}
236+
228237
impl DstLayout {
229238
/// Constructs a `DstLayout` which describes `T`.
230239
///
@@ -251,6 +260,162 @@ impl DstLayout {
251260
_trailing_slice_elem_size: Some(mem::size_of::<T>()),
252261
}
253262
}
263+
264+
/// TODO
265+
///
266+
/// The caller is responsible for ensuring that `addr + bytes_len` does not
267+
/// overflow `usize`.
268+
///
269+
/// # Panics
270+
///
271+
/// If `addr + bytes_len` overflows `usize`, `validate_cast` may panic, or
272+
/// it may return incorrect results. No guarantees are made about when
273+
/// `validate_cast` will panic. The caller should not rely on
274+
/// `validate_cast` panicking in any particular condition, even if
275+
/// `debug_assertions` are enabled.
276+
const fn _validate_cast(
277+
&self,
278+
addr: usize,
279+
bytes_len: usize,
280+
cast_type: _CastType,
281+
) -> Option<(usize, usize)> {
282+
// `debug_assert!`, but with `#[allow(clippy::arithmetic_side_effects)]`.
283+
macro_rules! __debug_assert {
284+
($e:expr) => {
285+
debug_assert!({
286+
#[allow(clippy::arithmetic_side_effects)]
287+
let e = $e;
288+
e
289+
})
290+
};
291+
}
292+
293+
let base_size = self._base_layout.size();
294+
295+
// Precondition
296+
__debug_assert!(addr.checked_add(bytes_len).is_some());
297+
298+
// LEMMA 0: max_slice_bytes + base_size == bytes_len
299+
//
300+
// LEMMA 1: base_size <= bytes_len:
301+
// - If `base_size > bytes_len`, `bytes_len.checked_sub(base_size)`
302+
// returns `None`, and we return.
303+
//
304+
// TODO(#67): Once our MSRV is 1.65, use let-else:
305+
// https://blog.rust-lang.org/2022/11/03/Rust-1.65.0.html#let-else-statements
306+
let max_slice_bytes = if let Some(max_byte_slice) = bytes_len.checked_sub(base_size) {
307+
max_byte_slice
308+
} else {
309+
return None;
310+
};
311+
312+
// Lemma 0
313+
__debug_assert!(max_slice_bytes + base_size == bytes_len);
314+
315+
// Lemma 1
316+
__debug_assert!(base_size <= bytes_len);
317+
318+
let (elems, self_bytes) = if let Some(elem_size) = self._trailing_slice_elem_size {
319+
// TODO(#67): Once our MSRV is 1.65, use let-else:
320+
// https://blog.rust-lang.org/2022/11/03/Rust-1.65.0.html#let-else-statements
321+
let elem_size = if let Some(elem_size) = NonZeroUsize::new(elem_size) {
322+
elem_size
323+
} else {
324+
panic!("attempted to cast to slice type with zero-sized element");
325+
};
326+
327+
// Guaranteed not to divide by 0 because `elem_size` is a
328+
// `NonZeroUsize`.
329+
#[allow(clippy::arithmetic_side_effects)]
330+
let elems = max_slice_bytes / elem_size.get();
331+
332+
// NOTE: Another option for this step in the algorithm is to set
333+
// `slice_bytes = elems * elem_size`. However, using multiplication
334+
// causes Kani to choke. In practice, the compiler is likely to
335+
// generate identical machine code in both cases. Note that this
336+
// divide-then-mod approach is trivially optimizable into a single
337+
// operation that computes both the quotient and the remainder.
338+
339+
// First line is guaranteed not to mod by 0 because `elem_size` is a
340+
// `NonZeroUsize`. Second line is guaranteed not to underflow
341+
// because `rem <= max_slice_bytes` thanks to the mod operation.
342+
//
343+
// LEMMA 2: slice_bytes <= max_slice_bytes
344+
#[allow(clippy::arithmetic_side_effects)]
345+
let rem = max_slice_bytes % elem_size.get();
346+
#[allow(clippy::arithmetic_side_effects)]
347+
let slice_bytes = max_slice_bytes - rem;
348+
349+
// Lemma 2
350+
__debug_assert!(slice_bytes <= max_slice_bytes);
351+
352+
// Guaranteed not to overflow:
353+
// - max_slice_bytes + base_size == bytes_len (lemma 0)
354+
// - slice_bytes <= max_slice_bytes (lemma 2)
355+
// - slice_bytes + base_size <= bytes_len (substitution)
356+
// - bytes_len <= usize::MAX (bytes_len: usize)
357+
// - slice_bytes + base_size <= usize::MAX (substitution)
358+
//
359+
// LEMMA 3: self_bytes <= bytes_len: TODO
360+
#[allow(clippy::arithmetic_side_effects)]
361+
let self_bytes = base_size + slice_bytes;
362+
363+
// Lemma 3
364+
__debug_assert!(self_bytes <= bytes_len);
365+
366+
(elems, self_bytes)
367+
} else {
368+
(0, base_size)
369+
};
370+
371+
// LEMMA 4: self_bytes <= bytes_len:
372+
// - `if` branch returns `self_bytes`; lemma 3 guarantees `self_bytes <=
373+
// bytes_len`
374+
// - `else` branch returns `base_size`; lemma 1 guarantees `base_size <=
375+
// bytes_len`
376+
377+
// Lemma 5
378+
__debug_assert!(self_bytes <= bytes_len);
379+
380+
// `self_addr` indicates where in the given byte range the `Self` will
381+
// start. If we're doing a prefix cast, it starts at the beginning. If
382+
// we're doing a suffix cast, it starts after whatever bytes are
383+
// remaining.
384+
let (self_addr, split_at) = match cast_type {
385+
_CastType::_Prefix => (addr, self_bytes),
386+
_CastType::_Suffix => {
387+
// Guaranteed not to underflow because `self_bytes <= bytes_len`
388+
// (lemma 4).
389+
//
390+
// LEMMA 5: split_at == bytes_len - self_bytes
391+
#[allow(clippy::arithmetic_side_effects)]
392+
let split_at = bytes_len - self_bytes;
393+
394+
// Lemma 5
395+
__debug_assert!(split_at == bytes_len - self_bytes);
396+
397+
// Guaranteed not to overflow:
398+
// - addr + bytes_len <= usize::MAX (method precondition)
399+
// - split_at == bytes_len - self_bytes (lemma 5)
400+
// - addr + split_at == addr + bytes_len - self_bytes (substitution)
401+
// - addr + split_at <= addr + bytes_len
402+
// - addr + split_at <= usize::MAX (substitution)
403+
#[allow(clippy::arithmetic_side_effects)]
404+
let self_addr = addr + split_at;
405+
406+
(self_addr, split_at)
407+
}
408+
};
409+
410+
// Guaranteed not to divide by 0 because `.align()` guarantees that it
411+
// returns a non-zero value.
412+
#[allow(clippy::arithmetic_side_effects)]
413+
if self_addr % self._base_layout.align() != 0 {
414+
return None;
415+
}
416+
417+
Some((elems, split_at))
418+
}
254419
}
255420

256421
/// A trait which carries information about a type's layout that is used by the
@@ -2738,6 +2903,99 @@ mod tests {
27382903
}
27392904
}
27402905

2906+
// This test takes a long time when running under Miri, so we skip it in
2907+
// that case. This is acceptable because this is a logic test that doesn't
2908+
// attempt to expose UB.
2909+
#[test]
2910+
#[cfg_attr(miri, ignore)]
2911+
fn test_validate_cast() {
2912+
let layout = |base_size, align, _trailing_slice_elem_size| DstLayout {
2913+
_base_layout: Layout::from_size_align(base_size, align).unwrap(),
2914+
_trailing_slice_elem_size,
2915+
};
2916+
2917+
macro_rules! test {
2918+
(
2919+
layout($base_size:tt, $align:tt, $trailing_size:tt)
2920+
.validate_cast($addr:tt, $bytes_len:tt, $cast_type:tt), $expect:pat $(,)?
2921+
) => {
2922+
itertools::iproduct!(
2923+
test!(@generate_usize $base_size),
2924+
test!(@generate_align $align),
2925+
test!(@generate_opt_usize $trailing_size),
2926+
test!(@generate_usize $addr),
2927+
test!(@generate_usize $bytes_len),
2928+
test!(@generate_cast_type $cast_type)
2929+
).for_each(|(base_size, align, trailing_size, addr, bytes_len, cast_type)| {
2930+
assert_matches::assert_matches!(
2931+
layout(base_size, align, trailing_size)._validate_cast(addr, bytes_len, cast_type), $expect,
2932+
"layout({base_size}, {align}, {trailing_size:?}).validate_cast({addr}, {bytes_len}, {cast_type:?})",
2933+
);
2934+
});
2935+
};
2936+
(@generate_usize _) => { 0..8 };
2937+
(@generate_align _) => { [1, 2, 4, 8, 16] };
2938+
(@generate_opt_usize _) => { [None].into_iter().chain((0..8).map(Some).into_iter()) };
2939+
(@generate_cast_type _) => { [_CastType::_Prefix, _CastType::_Suffix] };
2940+
(@generate_cast_type $variant:ident) => { [_CastType::$variant] };
2941+
// Some expressions need to be wrapped in parentheses in order to be
2942+
// valid `tt`s (required by the top match pattern). See the comment
2943+
// below for more details. This arm removes these parentheses to
2944+
// avoid generating an `unused_parens` warning.
2945+
(@$_:ident ($vals:expr)) => { $vals };
2946+
(@$_:ident $vals:expr) => { $vals };
2947+
}
2948+
2949+
// The format of all of these test cases is:
2950+
//
2951+
// layout(_, _, _).validate_cast(_, _, _), Some((_, _))
2952+
// | | | | | | | |
2953+
// base_size ----+ | | | | | | |
2954+
// align -----------+ | | | | | |
2955+
// trailing_size ------+ | | | | |
2956+
// addr --------------------------------+ | | | |
2957+
// bytes_len ------------------------------+ | | |
2958+
// cast_type ---------------------------------+ | |
2959+
// elems -----------------------------------------------+ |
2960+
// split_at -----------------------------------------------+
2961+
//
2962+
// Each argument can either be an iterator or a wildcard. Each
2963+
// wildcarded variable is implicitly replaced by an iterator over a
2964+
// representative sample of values for that variable. Each `test!`
2965+
// invocation iterates over every combination of values provided by each
2966+
// variable's iterator (ie, the cartesian product) and validates that
2967+
// the results are expected.
2968+
//
2969+
// The final argument uses the same syntax, but it has a different
2970+
// meaning. The final argument is a pattern that will be supplied to
2971+
// `assert_matches!` to validate the computed result for each
2972+
// combination of input values.
2973+
//
2974+
// Note that the meta-variables that match these variables have the `tt`
2975+
// type, and some valid expressions are not valid `tt`s (such as
2976+
// `a..b`). In this case, wrap the expression in parentheses, and it
2977+
// will become valid `tt`.
2978+
2979+
// base_size is too big for the memory region.
2980+
test!(layout((1..8), _, _).validate_cast(_, [0], _), None);
2981+
test!(layout((2..8), _, _).validate_cast(_, [1], _), None);
2982+
2983+
// addr is unaligned
2984+
test!(layout(_, [2], [None]).validate_cast([1, 3, 5, 7, 9], _, _Prefix), None);
2985+
test!(layout(_, [2], ((1..8).map(Some))).validate_cast([1, 3, 5, 7, 9], _, _Prefix), None);
2986+
2987+
// TODO: Test Suffix cast failure cases, especially regarding alignment.
2988+
2989+
// TDOO: Success cases
2990+
}
2991+
2992+
#[test]
2993+
fn test_validate_cast_panics() {
2994+
// TODO: Test for these cases:
2995+
// - addr + bytes overflows usize
2996+
// - zero-sized trailing element type
2997+
}
2998+
27412999
#[test]
27423000
fn test_known_layout() {
27433001
// Test that `$ty` and `ManuallyDrop<$ty>` have the expected layout.

0 commit comments

Comments
 (0)