@@ -41,6 +41,69 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
41
41
core:: mem:: transmute ( x_read)
42
42
}
43
43
44
+ /// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
45
+ /// read with the out-of-bounds part filled with 0s.
46
+ /// `load_sz` must not exceed WORD_SIZE.
47
+ #[ cfg( not( feature = "mem-unaligned" ) ) ]
48
+ #[ inline( always) ]
49
+ unsafe fn load_aligned_partial ( src : * const usize , load_sz : usize ) -> usize {
50
+ if load_sz == WORD_SIZE {
51
+ return * src;
52
+ }
53
+
54
+ let mut i = 0 ;
55
+ let mut out = 0usize ;
56
+ macro_rules! load_prefix {
57
+ ( $( $ty: ty) +) => { $(
58
+ let chunk_sz = core:: mem:: size_of:: <$ty>( ) ;
59
+ if ( load_sz & chunk_sz) != 0 {
60
+ // Since we are doing the large reads first, this must still be aligned to `chunk_sz`.
61
+ * ( & raw mut out) . byte_add( i) . cast:: <$ty>( ) = * src. byte_add( i) . cast:: <$ty>( ) ;
62
+ i |= chunk_sz;
63
+ }
64
+ ) +} ;
65
+ }
66
+ // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
67
+ // (as we handled the full-word case above).
68
+ const { assert ! ( WORD_SIZE <= 8 ) } ;
69
+ load_prefix ! ( u32 u16 u8 ) ;
70
+ debug_assert ! ( i == load_sz) ;
71
+ out
72
+ }
73
+
74
+ /// Load `load_sz` many bytes from `src.byte_add(WORD_SIZE - load_sz)`. `src` must be `usize`-aligned.
75
+ /// The bytes are returned as the *last* bytes of the return value, i.e., acts as if we had done
76
+ /// a `usize` read from `src`, with the out-of-bounds part filled with 0s.
77
+ /// `load_sz` must not exceed WORD_SIZE.
78
+ #[ cfg( not( feature = "mem-unaligned" ) ) ]
79
+ #[ inline( always) ]
80
+ unsafe fn load_aligned_end_partial ( src : * const usize , load_sz : usize ) -> usize {
81
+ if load_sz == WORD_SIZE {
82
+ return * src;
83
+ }
84
+
85
+ let mut i = 0 ;
86
+ let mut out = 0usize ;
87
+ let start_shift = WORD_SIZE - load_sz;
88
+ macro_rules! load_prefix {
89
+ ( $( $ty: ty) +) => { $(
90
+ let chunk_sz = core:: mem:: size_of:: <$ty>( ) ;
91
+ if ( load_sz & chunk_sz) != 0 {
92
+ // Since we are doing the small reads first, `start_shift + i` has in the mean
93
+ // time become aligned to `chunk_sz`.
94
+ * ( & raw mut out) . byte_add( start_shift + i) . cast:: <$ty>( ) = * src. byte_add( start_shift + i) . cast:: <$ty>( ) ;
95
+ i |= chunk_sz;
96
+ }
97
+ ) +} ;
98
+ }
99
+ // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
100
+ // (as we handled the full-word case above).
101
+ const { assert ! ( WORD_SIZE <= 8 ) } ;
102
+ load_prefix ! ( u8 u16 u32 ) ;
103
+ debug_assert ! ( i == load_sz) ;
104
+ out
105
+ }
106
+
44
107
#[ inline( always) ]
45
108
pub unsafe fn copy_forward ( mut dest : * mut u8 , mut src : * const u8 , mut n : usize ) {
46
109
#[ inline( always) ]
@@ -66,9 +129,12 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
66
129
}
67
130
}
68
131
132
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
69
133
#[ cfg( not( feature = "mem-unaligned" ) ) ]
70
134
#[ inline( always) ]
71
135
unsafe fn copy_forward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
136
+ debug_assert ! ( n > 0 && n % WORD_SIZE == 0 ) ;
137
+
72
138
let mut dest_usize = dest as * mut usize ;
73
139
let dest_end = dest. wrapping_add ( n) as * mut usize ;
74
140
@@ -77,29 +143,37 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
77
143
let shift = offset * 8 ;
78
144
79
145
// Realign src
80
- let mut src_aligned = ( src as usize & !WORD_MASK ) as * mut usize ;
81
- // This will read (but won't use) bytes out of bound.
82
- // cfg needed because not all targets will have atomic loads that can be lowered
83
- // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
84
- #[ cfg( target_has_atomic_load_store = "ptr" ) ]
85
- let mut prev_word = core:: intrinsics:: atomic_load_unordered ( src_aligned) ;
86
- #[ cfg( not( target_has_atomic_load_store = "ptr" ) ) ]
87
- let mut prev_word = core:: ptr:: read_volatile ( src_aligned) ;
146
+ let mut src_aligned = src. byte_sub ( offset) as * mut usize ;
147
+ let mut prev_word = load_aligned_end_partial ( src_aligned, WORD_SIZE - offset) ;
88
148
89
- while dest_usize < dest_end {
149
+ while dest_usize. wrapping_add ( 1 ) < dest_end {
90
150
src_aligned = src_aligned. wrapping_add ( 1 ) ;
91
151
let cur_word = * src_aligned;
92
152
#[ cfg( target_endian = "little" ) ]
93
- let resembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
153
+ let reassembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
94
154
#[ cfg( target_endian = "big" ) ]
95
- let resembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
155
+ let reassembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
96
156
prev_word = cur_word;
97
157
98
- * dest_usize = resembled ;
158
+ * dest_usize = reassembled ;
99
159
dest_usize = dest_usize. wrapping_add ( 1 ) ;
100
160
}
161
+
162
+ // There's one more element left to go, and we can't use the loop for that as on the `src` side,
163
+ // it is partially out-of-bounds.
164
+ src_aligned = src_aligned. wrapping_add ( 1 ) ;
165
+ let cur_word = load_aligned_partial ( src_aligned, offset) ;
166
+ #[ cfg( target_endian = "little" ) ]
167
+ let reassembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
168
+ #[ cfg( target_endian = "big" ) ]
169
+ let reassembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
170
+ // prev_word does not matter any more
171
+
172
+ * dest_usize = reassembled;
173
+ // dest_usize does not matter any more
101
174
}
102
175
176
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
103
177
#[ cfg( feature = "mem-unaligned" ) ]
104
178
#[ inline( always) ]
105
179
unsafe fn copy_forward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
@@ -164,40 +238,51 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
164
238
}
165
239
}
166
240
241
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
167
242
#[ cfg( not( feature = "mem-unaligned" ) ) ]
168
243
#[ inline( always) ]
169
244
unsafe fn copy_backward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
245
+ debug_assert ! ( n > 0 && n % WORD_SIZE == 0 ) ;
246
+
170
247
let mut dest_usize = dest as * mut usize ;
171
- let dest_start = dest. wrapping_sub ( n) as * mut usize ;
248
+ let dest_start = dest. wrapping_sub ( n) as * mut usize ; // we're moving towards the start
172
249
173
250
// Calculate the misalignment offset and shift needed to reassemble value.
174
251
let offset = src as usize & WORD_MASK ;
175
252
let shift = offset * 8 ;
176
253
177
- // Realign src_aligned
178
- let mut src_aligned = ( src as usize & !WORD_MASK ) as * mut usize ;
179
- // This will read (but won't use) bytes out of bound.
180
- // cfg needed because not all targets will have atomic loads that can be lowered
181
- // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
182
- #[ cfg( target_has_atomic_load_store = "ptr" ) ]
183
- let mut prev_word = core:: intrinsics:: atomic_load_unordered ( src_aligned) ;
184
- #[ cfg( not( target_has_atomic_load_store = "ptr" ) ) ]
185
- let mut prev_word = core:: ptr:: read_volatile ( src_aligned) ;
254
+ // Realign src
255
+ let mut src_aligned = src. byte_sub ( offset) as * mut usize ;
256
+ let mut prev_word = load_aligned_partial ( src_aligned, offset) ;
186
257
187
- while dest_start < dest_usize {
258
+ while dest_start. wrapping_add ( 1 ) < dest_usize {
188
259
src_aligned = src_aligned. wrapping_sub ( 1 ) ;
189
260
let cur_word = * src_aligned;
190
261
#[ cfg( target_endian = "little" ) ]
191
- let resembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
262
+ let reassembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
192
263
#[ cfg( target_endian = "big" ) ]
193
- let resembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
264
+ let reassembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
194
265
prev_word = cur_word;
195
266
196
267
dest_usize = dest_usize. wrapping_sub ( 1 ) ;
197
- * dest_usize = resembled ;
268
+ * dest_usize = reassembled ;
198
269
}
270
+
271
+ // There's one more element left to go, and we can't use the loop for that as on the `src` side,
272
+ // it is partially out-of-bounds.
273
+ src_aligned = src_aligned. wrapping_sub ( 1 ) ;
274
+ let cur_word = load_aligned_end_partial ( src_aligned, WORD_SIZE - offset) ;
275
+ #[ cfg( target_endian = "little" ) ]
276
+ let reassembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
277
+ #[ cfg( target_endian = "big" ) ]
278
+ let reassembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
279
+ // prev_word does not matter any more
280
+
281
+ dest_usize = dest_usize. wrapping_sub ( 1 ) ;
282
+ * dest_usize = reassembled;
199
283
}
200
284
285
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
201
286
#[ cfg( feature = "mem-unaligned" ) ]
202
287
#[ inline( always) ]
203
288
unsafe fn copy_backward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
0 commit comments