Skip to content

Commit 20d0860

Browse files
committed
copy_misaligned_words: avoid out-of-bounds accesses
1 parent 571ce5f commit 20d0860

File tree

1 file changed

+115
-26
lines changed

1 file changed

+115
-26
lines changed

src/mem/impls.rs

+115-26
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,65 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
4141
core::mem::transmute(x_read)
4242
}
4343

44+
/// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
45+
/// read with the out-of-bounds part filled with 0s.
46+
/// `load_sz` be strictly less than `WORD_SIZE`.
47+
#[cfg(not(feature = "mem-unaligned"))]
48+
#[inline(always)]
49+
unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
50+
debug_assert!(load_sz < WORD_SIZE);
51+
52+
let mut i = 0;
53+
let mut out = 0usize;
54+
macro_rules! load_prefix {
55+
($($ty:ty)+) => {$(
56+
let chunk_sz = core::mem::size_of::<$ty>();
57+
if (load_sz & chunk_sz) != 0 {
58+
// Since we are doing the large reads first, this must still be aligned to `chunk_sz`.
59+
*(&raw mut out).byte_add(i).cast::<$ty>() = *src.byte_add(i).cast::<$ty>();
60+
i |= chunk_sz;
61+
}
62+
)+};
63+
}
64+
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
65+
// (since `load_size < WORD_SIZE`).
66+
const { assert!(WORD_SIZE <= 8) };
67+
load_prefix!(u32 u16 u8);
68+
debug_assert!(i == load_sz);
69+
out
70+
}
71+
72+
/// Load `load_sz` many bytes from `src.byte_add(WORD_SIZE - load_sz)`. `src` must be `usize`-aligned.
73+
/// The bytes are returned as the *last* bytes of the return value, i.e., this acts as if we had done
74+
/// a `usize` read from `src`, with the out-of-bounds part filled with 0s.
75+
/// `load_sz` be strictly less than `WORD_SIZE`.
76+
#[cfg(not(feature = "mem-unaligned"))]
77+
#[inline(always)]
78+
unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize {
79+
debug_assert!(load_sz < WORD_SIZE);
80+
81+
let mut i = 0;
82+
let mut out = 0usize;
83+
let start_shift = WORD_SIZE - load_sz;
84+
macro_rules! load_prefix {
85+
($($ty:ty)+) => {$(
86+
let chunk_sz = core::mem::size_of::<$ty>();
87+
if (load_sz & chunk_sz) != 0 {
88+
// Since we are doing the small reads first, `start_shift + i` has in the mean
89+
// time become aligned to `chunk_sz`.
90+
*(&raw mut out).byte_add(start_shift + i).cast::<$ty>() = *src.byte_add(start_shift + i).cast::<$ty>();
91+
i |= chunk_sz;
92+
}
93+
)+};
94+
}
95+
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
96+
// (since `load_size < WORD_SIZE`).
97+
const { assert!(WORD_SIZE <= 8) };
98+
load_prefix!(u8 u16 u32);
99+
debug_assert!(i == load_sz);
100+
out
101+
}
102+
44103
#[inline(always)]
45104
pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
46105
#[inline(always)]
@@ -66,40 +125,55 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
66125
}
67126
}
68127

128+
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
129+
/// `src` *must not* be `usize`-aligned.
69130
#[cfg(not(feature = "mem-unaligned"))]
70131
#[inline(always)]
71132
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
133+
debug_assert!(n > 0 && n % WORD_SIZE == 0);
134+
debug_assert!(src.addr() % WORD_SIZE != 0);
135+
72136
let mut dest_usize = dest as *mut usize;
73137
let dest_end = dest.wrapping_add(n) as *mut usize;
74138

75139
// Calculate the misalignment offset and shift needed to reassemble value.
140+
// Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
76141
let offset = src as usize & WORD_MASK;
77142
let shift = offset * 8;
78143

79144
// Realign src
80-
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
81-
// This will read (but won't use) bytes out of bound.
82-
// cfg needed because not all targets will have atomic loads that can be lowered
83-
// (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
84-
#[cfg(target_has_atomic_load_store = "ptr")]
85-
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
86-
#[cfg(not(target_has_atomic_load_store = "ptr"))]
87-
let mut prev_word = core::ptr::read_volatile(src_aligned);
145+
let mut src_aligned = src.byte_sub(offset) as *mut usize;
146+
let mut prev_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset);
88147

89-
while dest_usize < dest_end {
148+
while dest_usize.wrapping_add(1) < dest_end {
90149
src_aligned = src_aligned.wrapping_add(1);
91150
let cur_word = *src_aligned;
92151
#[cfg(target_endian = "little")]
93-
let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
152+
let reassembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
94153
#[cfg(target_endian = "big")]
95-
let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift);
154+
let reassembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift);
96155
prev_word = cur_word;
97156

98-
*dest_usize = resembled;
157+
*dest_usize = reassembled;
99158
dest_usize = dest_usize.wrapping_add(1);
100159
}
160+
161+
// There's one more element left to go, and we can't use the loop for that as on the `src` side,
162+
// it is partially out-of-bounds.
163+
src_aligned = src_aligned.wrapping_add(1);
164+
let cur_word = load_aligned_partial(src_aligned, offset);
165+
#[cfg(target_endian = "little")]
166+
let reassembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
167+
#[cfg(target_endian = "big")]
168+
let reassembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift);
169+
// prev_word does not matter any more
170+
171+
*dest_usize = reassembled;
172+
// dest_usize does not matter any more
101173
}
102174

175+
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
176+
/// `src` *must not* be `usize`-aligned.
103177
#[cfg(feature = "mem-unaligned")]
104178
#[inline(always)]
105179
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
@@ -164,40 +238,55 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
164238
}
165239
}
166240

241+
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
242+
/// `src` *must not* be `usize`-aligned.
167243
#[cfg(not(feature = "mem-unaligned"))]
168244
#[inline(always)]
169245
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
246+
debug_assert!(n > 0 && n % WORD_SIZE == 0);
247+
debug_assert!(src.addr() % WORD_SIZE != 0);
248+
170249
let mut dest_usize = dest as *mut usize;
171-
let dest_start = dest.wrapping_sub(n) as *mut usize;
250+
let dest_start = dest.wrapping_sub(n) as *mut usize; // we're moving towards the start
172251

173252
// Calculate the misalignment offset and shift needed to reassemble value.
253+
// Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
174254
let offset = src as usize & WORD_MASK;
175255
let shift = offset * 8;
176256

177-
// Realign src_aligned
178-
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
179-
// This will read (but won't use) bytes out of bound.
180-
// cfg needed because not all targets will have atomic loads that can be lowered
181-
// (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
182-
#[cfg(target_has_atomic_load_store = "ptr")]
183-
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
184-
#[cfg(not(target_has_atomic_load_store = "ptr"))]
185-
let mut prev_word = core::ptr::read_volatile(src_aligned);
257+
// Realign src
258+
let mut src_aligned = src.byte_sub(offset) as *mut usize;
259+
let mut prev_word = load_aligned_partial(src_aligned, offset);
186260

187-
while dest_start < dest_usize {
261+
while dest_start.wrapping_add(1) < dest_usize {
188262
src_aligned = src_aligned.wrapping_sub(1);
189263
let cur_word = *src_aligned;
190264
#[cfg(target_endian = "little")]
191-
let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
265+
let reassembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
192266
#[cfg(target_endian = "big")]
193-
let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift;
267+
let reassembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift;
194268
prev_word = cur_word;
195269

196270
dest_usize = dest_usize.wrapping_sub(1);
197-
*dest_usize = resembled;
271+
*dest_usize = reassembled;
198272
}
273+
274+
// There's one more element left to go, and we can't use the loop for that as on the `src` side,
275+
// it is partially out-of-bounds.
276+
src_aligned = src_aligned.wrapping_sub(1);
277+
let cur_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset);
278+
#[cfg(target_endian = "little")]
279+
let reassembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
280+
#[cfg(target_endian = "big")]
281+
let reassembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift;
282+
// prev_word does not matter any more
283+
284+
dest_usize = dest_usize.wrapping_sub(1);
285+
*dest_usize = reassembled;
199286
}
200287

288+
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
289+
/// `src` *must not* be `usize`-aligned.
201290
#[cfg(feature = "mem-unaligned")]
202291
#[inline(always)]
203292
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {

0 commit comments

Comments
 (0)