Skip to content

Commit 461b5f8

Browse files
authored
Merge pull request #405 from nbdd0121/master
2 parents 826d9e9 + ce86d41 commit 461b5f8

File tree

4 files changed

+480
-50
lines changed

4 files changed

+480
-50
lines changed

build.rs

+5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ fn main() {
3333
println!("cargo:rustc-cfg=feature=\"mem\"");
3434
}
3535

36+
// These targets have hardware unaligned access support.
37+
if target.contains("x86_64") || target.contains("i686") || target.contains("aarch64") {
38+
println!("cargo:rustc-cfg=feature=\"mem-unaligned\"");
39+
}
40+
3641
// NOTE we are going to assume that llvm-target, what determines our codegen option, matches the
3742
// target triple. This is usually correct for our built-in targets but can break in presence of
3843
// custom targets, which can have arbitrary names.

src/mem/impls.rs

+246-16
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,257 @@
1+
use core::intrinsics::likely;
2+
3+
const WORD_SIZE: usize = core::mem::size_of::<usize>();
4+
const WORD_MASK: usize = WORD_SIZE - 1;
5+
6+
// If the number of bytes involved exceed this threshold we will opt in word-wise copy.
7+
// The value here selected is max(2 * WORD_SIZE, 16):
8+
// * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through
9+
// word-wise copy.
10+
// * The word-wise copy logic needs to perform some checks so it has some small overhead.
11+
// ensures that even on 32-bit platforms we have copied at least 8 bytes through
12+
// word-wise copy so the saving of word-wise copy outweights the fixed overhead.
13+
const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 {
14+
2 * WORD_SIZE
15+
} else {
16+
16
17+
};
18+
19+
#[cfg(feature = "mem-unaligned")]
20+
unsafe fn read_usize_unaligned(x: *const usize) -> usize {
21+
// Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which
22+
// is translated to memcpy in LLVM.
23+
let x_read = (x as *const [u8; core::mem::size_of::<usize>()]).read();
24+
core::mem::transmute(x_read)
25+
}
26+
127
#[inline(always)]
2-
pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, n: usize) {
3-
let mut i = 0;
4-
while i < n {
5-
*dest.add(i) = *src.add(i);
6-
i += 1;
28+
pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
29+
#[inline(always)]
30+
unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
31+
let dest_end = dest.add(n);
32+
while dest < dest_end {
33+
*dest = *src;
34+
dest = dest.add(1);
35+
src = src.add(1);
36+
}
37+
}
38+
39+
#[inline(always)]
40+
unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
41+
let mut dest_usize = dest as *mut usize;
42+
let mut src_usize = src as *mut usize;
43+
let dest_end = dest.add(n) as *mut usize;
44+
45+
while dest_usize < dest_end {
46+
*dest_usize = *src_usize;
47+
dest_usize = dest_usize.add(1);
48+
src_usize = src_usize.add(1);
49+
}
50+
}
51+
52+
#[cfg(not(feature = "mem-unaligned"))]
53+
#[inline(always)]
54+
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
55+
let mut dest_usize = dest as *mut usize;
56+
let dest_end = dest.add(n) as *mut usize;
57+
58+
// Calculate the misalignment offset and shift needed to reassemble value.
59+
let offset = src as usize & WORD_MASK;
60+
let shift = offset * 8;
61+
62+
// Realign src
63+
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
64+
// This will read (but won't use) bytes out of bound.
65+
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
66+
67+
while dest_usize < dest_end {
68+
src_aligned = src_aligned.add(1);
69+
let cur_word = *src_aligned;
70+
#[cfg(target_endian = "little")]
71+
let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
72+
#[cfg(target_endian = "big")]
73+
let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift);
74+
prev_word = cur_word;
75+
76+
*dest_usize = resembled;
77+
dest_usize = dest_usize.add(1);
78+
}
79+
}
80+
81+
#[cfg(feature = "mem-unaligned")]
82+
#[inline(always)]
83+
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
84+
let mut dest_usize = dest as *mut usize;
85+
let mut src_usize = src as *mut usize;
86+
let dest_end = dest.add(n) as *mut usize;
87+
88+
while dest_usize < dest_end {
89+
*dest_usize = read_usize_unaligned(src_usize);
90+
dest_usize = dest_usize.add(1);
91+
src_usize = src_usize.add(1);
92+
}
793
}
94+
95+
if n >= WORD_COPY_THRESHOLD {
96+
// Align dest
97+
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
98+
let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK;
99+
copy_forward_bytes(dest, src, dest_misalignment);
100+
dest = dest.add(dest_misalignment);
101+
src = src.add(dest_misalignment);
102+
n -= dest_misalignment;
103+
104+
let n_words = n & !WORD_MASK;
105+
let src_misalignment = src as usize & WORD_MASK;
106+
if likely(src_misalignment == 0) {
107+
copy_forward_aligned_words(dest, src, n_words);
108+
} else {
109+
copy_forward_misaligned_words(dest, src, n_words);
110+
}
111+
dest = dest.add(n_words);
112+
src = src.add(n_words);
113+
n -= n_words;
114+
}
115+
copy_forward_bytes(dest, src, n);
8116
}
9117

10118
#[inline(always)]
11-
pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, n: usize) {
12-
// copy from end
13-
let mut i = n;
14-
while i != 0 {
15-
i -= 1;
16-
*dest.add(i) = *src.add(i);
119+
pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
120+
// The following backward copy helper functions uses the pointers past the end
121+
// as their inputs instead of pointers to the start!
122+
#[inline(always)]
123+
unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
124+
let dest_start = dest.sub(n);
125+
while dest_start < dest {
126+
dest = dest.sub(1);
127+
src = src.sub(1);
128+
*dest = *src;
129+
}
17130
}
131+
132+
#[inline(always)]
133+
unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
134+
let mut dest_usize = dest as *mut usize;
135+
let mut src_usize = src as *mut usize;
136+
let dest_start = dest.sub(n) as *mut usize;
137+
138+
while dest_start < dest_usize {
139+
dest_usize = dest_usize.sub(1);
140+
src_usize = src_usize.sub(1);
141+
*dest_usize = *src_usize;
142+
}
143+
}
144+
145+
#[cfg(not(feature = "mem-unaligned"))]
146+
#[inline(always)]
147+
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
148+
let mut dest_usize = dest as *mut usize;
149+
let dest_start = dest.sub(n) as *mut usize;
150+
151+
// Calculate the misalignment offset and shift needed to reassemble value.
152+
let offset = src as usize & WORD_MASK;
153+
let shift = offset * 8;
154+
155+
// Realign src_aligned
156+
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
157+
// This will read (but won't use) bytes out of bound.
158+
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
159+
160+
while dest_start < dest_usize {
161+
src_aligned = src_aligned.sub(1);
162+
let cur_word = *src_aligned;
163+
#[cfg(target_endian = "little")]
164+
let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
165+
#[cfg(target_endian = "big")]
166+
let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift;
167+
prev_word = cur_word;
168+
169+
dest_usize = dest_usize.sub(1);
170+
*dest_usize = resembled;
171+
}
172+
}
173+
174+
#[cfg(feature = "mem-unaligned")]
175+
#[inline(always)]
176+
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
177+
let mut dest_usize = dest as *mut usize;
178+
let mut src_usize = src as *mut usize;
179+
let dest_start = dest.sub(n) as *mut usize;
180+
181+
while dest_start < dest_usize {
182+
dest_usize = dest_usize.sub(1);
183+
src_usize = src_usize.sub(1);
184+
*dest_usize = read_usize_unaligned(src_usize);
185+
}
186+
}
187+
188+
let mut dest = dest.add(n);
189+
let mut src = src.add(n);
190+
191+
if n >= WORD_COPY_THRESHOLD {
192+
// Align dest
193+
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
194+
let dest_misalignment = dest as usize & WORD_MASK;
195+
copy_backward_bytes(dest, src, dest_misalignment);
196+
dest = dest.sub(dest_misalignment);
197+
src = src.sub(dest_misalignment);
198+
n -= dest_misalignment;
199+
200+
let n_words = n & !WORD_MASK;
201+
let src_misalignment = src as usize & WORD_MASK;
202+
if likely(src_misalignment == 0) {
203+
copy_backward_aligned_words(dest, src, n_words);
204+
} else {
205+
copy_backward_misaligned_words(dest, src, n_words);
206+
}
207+
dest = dest.sub(n_words);
208+
src = src.sub(n_words);
209+
n -= n_words;
210+
}
211+
copy_backward_bytes(dest, src, n);
18212
}
19213

20214
#[inline(always)]
21-
pub unsafe fn set_bytes(s: *mut u8, c: u8, n: usize) {
22-
let mut i = 0;
23-
while i < n {
24-
*s.add(i) = c;
25-
i += 1;
215+
pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
216+
#[inline(always)]
217+
pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) {
218+
let end = s.add(n);
219+
while s < end {
220+
*s = c;
221+
s = s.add(1);
222+
}
223+
}
224+
225+
#[inline(always)]
226+
pub unsafe fn set_bytes_words(s: *mut u8, c: u8, n: usize) {
227+
let mut broadcast = c as usize;
228+
let mut bits = 8;
229+
while bits < WORD_SIZE * 8 {
230+
broadcast |= broadcast << bits;
231+
bits *= 2;
232+
}
233+
234+
let mut s_usize = s as *mut usize;
235+
let end = s.add(n) as *mut usize;
236+
237+
while s_usize < end {
238+
*s_usize = broadcast;
239+
s_usize = s_usize.add(1);
240+
}
241+
}
242+
243+
if likely(n >= WORD_COPY_THRESHOLD) {
244+
// Align s
245+
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
246+
let misalignment = (s as usize).wrapping_neg() & WORD_MASK;
247+
set_bytes_bytes(s, c, misalignment);
248+
s = s.add(misalignment);
249+
n -= misalignment;
250+
251+
let n_words = n & !WORD_MASK;
252+
set_bytes_words(s, c, n_words);
253+
s = s.add(n_words);
254+
n -= n_words;
26255
}
256+
set_bytes_bytes(s, c, n);
27257
}

0 commit comments

Comments
 (0)