Skip to content

Commit 6ee9f22

Browse files
committed
turn load_prefix macro into helper function
1 parent 23207fd commit 6ee9f22

File tree

1 file changed

+37
-30
lines changed

1 file changed

+37
-30
lines changed

src/mem/impls.rs

+37-30
Original file line numberDiff line numberDiff line change
@@ -41,30 +41,43 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
4141
core::mem::transmute(x_read)
4242
}
4343

44+
/// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
45+
/// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the
46+
/// chunk size if a load happened.
47+
#[cfg(not(feature = "mem-unaligned"))]
48+
#[inline(always)]
49+
unsafe fn load_chunk_aligned<T: Copy>(
50+
src: *const usize,
51+
dst: *mut usize,
52+
load_sz: usize,
53+
offset: usize,
54+
) -> usize {
55+
let chunk_sz = core::mem::size_of::<T>();
56+
if (load_sz & chunk_sz) != 0 {
57+
*dst.wrapping_byte_add(offset).cast::<T>() = *src.wrapping_byte_add(offset).cast::<T>();
58+
offset | chunk_sz
59+
} else {
60+
offset
61+
}
62+
}
63+
4464
/// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
4565
/// read with the out-of-bounds part filled with 0s.
4666
/// `load_sz` be strictly less than `WORD_SIZE`.
4767
#[cfg(not(feature = "mem-unaligned"))]
4868
#[inline(always)]
4969
unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
5070
debug_assert!(load_sz < WORD_SIZE);
71+
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
72+
// (since `load_sz < WORD_SIZE`).
73+
const { assert!(WORD_SIZE <= 8) };
5174

5275
let mut i = 0;
5376
let mut out = 0usize;
54-
macro_rules! load_prefix {
55-
($($ty:ty)+) => {$(
56-
let chunk_sz = core::mem::size_of::<$ty>();
57-
if (load_sz & chunk_sz) != 0 {
58-
// Since we are doing the large reads first, this must still be aligned to `chunk_sz`.
59-
*(&raw mut out).wrapping_byte_add(i).cast::<$ty>() = *src.wrapping_byte_add(i).cast::<$ty>();
60-
i |= chunk_sz;
61-
}
62-
)+};
63-
}
64-
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
65-
// (since `load_size < WORD_SIZE`).
66-
const { assert!(WORD_SIZE <= 8) };
67-
load_prefix!(u32 u16 u8);
77+
// We load in decreasing order, so the pointers remain sufficiently aligned for the next step.
78+
i = load_chunk_aligned::<u32>(src, &raw mut out, load_sz, i);
79+
i = load_chunk_aligned::<u16>(src, &raw mut out, load_sz, i);
80+
i = load_chunk_aligned::<u8>(src, &raw mut out, load_sz, i);
6881
debug_assert!(i == load_sz);
6982
out
7083
}
@@ -77,25 +90,19 @@ unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
7790
#[inline(always)]
7891
unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize {
7992
debug_assert!(load_sz < WORD_SIZE);
93+
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
94+
// (since `load_sz < WORD_SIZE`).
95+
const { assert!(WORD_SIZE <= 8) };
8096

8197
let mut i = 0;
8298
let mut out = 0usize;
83-
let start_shift = WORD_SIZE - load_sz;
84-
macro_rules! load_prefix {
85-
($($ty:ty)+) => {$(
86-
let chunk_sz = core::mem::size_of::<$ty>();
87-
if (load_sz & chunk_sz) != 0 {
88-
// Since we are doing the small reads first, `start_shift + i` has in the mean
89-
// time become aligned to `chunk_sz`.
90-
*(&raw mut out).wrapping_byte_add(start_shift + i).cast::<$ty>() = *src.wrapping_byte_add(start_shift + i).cast::<$ty>();
91-
i |= chunk_sz;
92-
}
93-
)+};
94-
}
95-
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
96-
// (since `load_size < WORD_SIZE`).
97-
const { assert!(WORD_SIZE <= 8) };
98-
load_prefix!(u8 u16 u32);
99+
// Obtain pointers pointing to the beginning of the range we want to load.
100+
let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz);
101+
let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz);
102+
// We load in increasing order, so by the time we reach `u16` things are 2-aligned etc.
103+
i = load_chunk_aligned::<u8>(src_shifted, out_shifted, load_sz, i);
104+
i = load_chunk_aligned::<u16>(src_shifted, out_shifted, load_sz, i);
105+
i = load_chunk_aligned::<u32>(src_shifted, out_shifted, load_sz, i);
99106
debug_assert!(i == load_sz);
100107
out
101108
}

0 commit comments

Comments
 (0)