@@ -41,6 +41,65 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
41
41
core:: mem:: transmute ( x_read)
42
42
}
43
43
44
+ /// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
45
+ /// read with the out-of-bounds part filled with 0s.
46
+ /// `load_sz` be strictly less than `WORD_SIZE`.
47
+ #[ cfg( not( feature = "mem-unaligned" ) ) ]
48
+ #[ inline( always) ]
49
+ unsafe fn load_aligned_partial ( src : * const usize , load_sz : usize ) -> usize {
50
+ debug_assert ! ( load_sz < WORD_SIZE ) ;
51
+
52
+ let mut i = 0 ;
53
+ let mut out = 0usize ;
54
+ macro_rules! load_prefix {
55
+ ( $( $ty: ty) +) => { $(
56
+ let chunk_sz = core:: mem:: size_of:: <$ty>( ) ;
57
+ if ( load_sz & chunk_sz) != 0 {
58
+ // Since we are doing the large reads first, this must still be aligned to `chunk_sz`.
59
+ * ( & raw mut out) . byte_add( i) . cast:: <$ty>( ) = * src. byte_add( i) . cast:: <$ty>( ) ;
60
+ i |= chunk_sz;
61
+ }
62
+ ) +} ;
63
+ }
64
+ // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
65
+ // (since `load_size < WORD_SIZE`).
66
+ const { assert ! ( WORD_SIZE <= 8 ) } ;
67
+ load_prefix ! ( u32 u16 u8 ) ;
68
+ debug_assert ! ( i == load_sz) ;
69
+ out
70
+ }
71
+
72
+ /// Load `load_sz` many bytes from `src.byte_add(WORD_SIZE - load_sz)`. `src` must be `usize`-aligned.
73
+ /// The bytes are returned as the *last* bytes of the return value, i.e., this acts as if we had done
74
+ /// a `usize` read from `src`, with the out-of-bounds part filled with 0s.
75
+ /// `load_sz` be strictly less than `WORD_SIZE`.
76
+ #[ cfg( not( feature = "mem-unaligned" ) ) ]
77
+ #[ inline( always) ]
78
+ unsafe fn load_aligned_end_partial ( src : * const usize , load_sz : usize ) -> usize {
79
+ debug_assert ! ( load_sz < WORD_SIZE ) ;
80
+
81
+ let mut i = 0 ;
82
+ let mut out = 0usize ;
83
+ let start_shift = WORD_SIZE - load_sz;
84
+ macro_rules! load_prefix {
85
+ ( $( $ty: ty) +) => { $(
86
+ let chunk_sz = core:: mem:: size_of:: <$ty>( ) ;
87
+ if ( load_sz & chunk_sz) != 0 {
88
+ // Since we are doing the small reads first, `start_shift + i` has in the mean
89
+ // time become aligned to `chunk_sz`.
90
+ * ( & raw mut out) . byte_add( start_shift + i) . cast:: <$ty>( ) = * src. byte_add( start_shift + i) . cast:: <$ty>( ) ;
91
+ i |= chunk_sz;
92
+ }
93
+ ) +} ;
94
+ }
95
+ // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
96
+ // (since `load_size < WORD_SIZE`).
97
+ const { assert ! ( WORD_SIZE <= 8 ) } ;
98
+ load_prefix ! ( u8 u16 u32 ) ;
99
+ debug_assert ! ( i == load_sz) ;
100
+ out
101
+ }
102
+
44
103
#[ inline( always) ]
45
104
pub unsafe fn copy_forward ( mut dest : * mut u8 , mut src : * const u8 , mut n : usize ) {
46
105
#[ inline( always) ]
@@ -66,40 +125,55 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
66
125
}
67
126
}
68
127
128
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
129
+ /// `src` *must not* be `usize`-aligned.
69
130
#[ cfg( not( feature = "mem-unaligned" ) ) ]
70
131
#[ inline( always) ]
71
132
unsafe fn copy_forward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
133
+ debug_assert ! ( n > 0 && n % WORD_SIZE == 0 ) ;
134
+ debug_assert ! ( src. addr( ) % WORD_SIZE != 0 ) ;
135
+
72
136
let mut dest_usize = dest as * mut usize ;
73
137
let dest_end = dest. wrapping_add ( n) as * mut usize ;
74
138
75
139
// Calculate the misalignment offset and shift needed to reassemble value.
140
+ // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
76
141
let offset = src as usize & WORD_MASK ;
77
142
let shift = offset * 8 ;
78
143
79
144
// Realign src
80
- let mut src_aligned = ( src as usize & !WORD_MASK ) as * mut usize ;
81
- // This will read (but won't use) bytes out of bound.
82
- // cfg needed because not all targets will have atomic loads that can be lowered
83
- // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
84
- #[ cfg( target_has_atomic_load_store = "ptr" ) ]
85
- let mut prev_word = core:: intrinsics:: atomic_load_unordered ( src_aligned) ;
86
- #[ cfg( not( target_has_atomic_load_store = "ptr" ) ) ]
87
- let mut prev_word = core:: ptr:: read_volatile ( src_aligned) ;
145
+ let mut src_aligned = src. byte_sub ( offset) as * mut usize ;
146
+ let mut prev_word = load_aligned_end_partial ( src_aligned, WORD_SIZE - offset) ;
88
147
89
- while dest_usize < dest_end {
148
+ while dest_usize. wrapping_add ( 1 ) < dest_end {
90
149
src_aligned = src_aligned. wrapping_add ( 1 ) ;
91
150
let cur_word = * src_aligned;
92
151
#[ cfg( target_endian = "little" ) ]
93
- let resembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
152
+ let reassembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
94
153
#[ cfg( target_endian = "big" ) ]
95
- let resembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
154
+ let reassembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
96
155
prev_word = cur_word;
97
156
98
- * dest_usize = resembled ;
157
+ * dest_usize = reassembled ;
99
158
dest_usize = dest_usize. wrapping_add ( 1 ) ;
100
159
}
160
+
161
+ // There's one more element left to go, and we can't use the loop for that as on the `src` side,
162
+ // it is partially out-of-bounds.
163
+ src_aligned = src_aligned. wrapping_add ( 1 ) ;
164
+ let cur_word = load_aligned_partial ( src_aligned, offset) ;
165
+ #[ cfg( target_endian = "little" ) ]
166
+ let reassembled = prev_word >> shift | cur_word << ( WORD_SIZE * 8 - shift) ;
167
+ #[ cfg( target_endian = "big" ) ]
168
+ let reassembled = prev_word << shift | cur_word >> ( WORD_SIZE * 8 - shift) ;
169
+ // prev_word does not matter any more
170
+
171
+ * dest_usize = reassembled;
172
+ // dest_usize does not matter any more
101
173
}
102
174
175
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
176
+ /// `src` *must not* be `usize`-aligned.
103
177
#[ cfg( feature = "mem-unaligned" ) ]
104
178
#[ inline( always) ]
105
179
unsafe fn copy_forward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
@@ -164,40 +238,55 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
164
238
}
165
239
}
166
240
241
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
242
+ /// `src` *must not* be `usize`-aligned.
167
243
#[ cfg( not( feature = "mem-unaligned" ) ) ]
168
244
#[ inline( always) ]
169
245
unsafe fn copy_backward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
246
+ debug_assert ! ( n > 0 && n % WORD_SIZE == 0 ) ;
247
+ debug_assert ! ( src. addr( ) % WORD_SIZE != 0 ) ;
248
+
170
249
let mut dest_usize = dest as * mut usize ;
171
- let dest_start = dest. wrapping_sub ( n) as * mut usize ;
250
+ let dest_start = dest. wrapping_sub ( n) as * mut usize ; // we're moving towards the start
172
251
173
252
// Calculate the misalignment offset and shift needed to reassemble value.
253
+ // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
174
254
let offset = src as usize & WORD_MASK ;
175
255
let shift = offset * 8 ;
176
256
177
- // Realign src_aligned
178
- let mut src_aligned = ( src as usize & !WORD_MASK ) as * mut usize ;
179
- // This will read (but won't use) bytes out of bound.
180
- // cfg needed because not all targets will have atomic loads that can be lowered
181
- // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
182
- #[ cfg( target_has_atomic_load_store = "ptr" ) ]
183
- let mut prev_word = core:: intrinsics:: atomic_load_unordered ( src_aligned) ;
184
- #[ cfg( not( target_has_atomic_load_store = "ptr" ) ) ]
185
- let mut prev_word = core:: ptr:: read_volatile ( src_aligned) ;
257
+ // Realign src
258
+ let mut src_aligned = src. byte_sub ( offset) as * mut usize ;
259
+ let mut prev_word = load_aligned_partial ( src_aligned, offset) ;
186
260
187
- while dest_start < dest_usize {
261
+ while dest_start. wrapping_add ( 1 ) < dest_usize {
188
262
src_aligned = src_aligned. wrapping_sub ( 1 ) ;
189
263
let cur_word = * src_aligned;
190
264
#[ cfg( target_endian = "little" ) ]
191
- let resembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
265
+ let reassembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
192
266
#[ cfg( target_endian = "big" ) ]
193
- let resembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
267
+ let reassembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
194
268
prev_word = cur_word;
195
269
196
270
dest_usize = dest_usize. wrapping_sub ( 1 ) ;
197
- * dest_usize = resembled ;
271
+ * dest_usize = reassembled ;
198
272
}
273
+
274
+ // There's one more element left to go, and we can't use the loop for that as on the `src` side,
275
+ // it is partially out-of-bounds.
276
+ src_aligned = src_aligned. wrapping_sub ( 1 ) ;
277
+ let cur_word = load_aligned_end_partial ( src_aligned, WORD_SIZE - offset) ;
278
+ #[ cfg( target_endian = "little" ) ]
279
+ let reassembled = prev_word << ( WORD_SIZE * 8 - shift) | cur_word >> shift;
280
+ #[ cfg( target_endian = "big" ) ]
281
+ let reassembled = prev_word >> ( WORD_SIZE * 8 - shift) | cur_word << shift;
282
+ // prev_word does not matter any more
283
+
284
+ dest_usize = dest_usize. wrapping_sub ( 1 ) ;
285
+ * dest_usize = reassembled;
199
286
}
200
287
288
+ /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
289
+ /// `src` *must not* be `usize`-aligned.
201
290
#[ cfg( feature = "mem-unaligned" ) ]
202
291
#[ inline( always) ]
203
292
unsafe fn copy_backward_misaligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
0 commit comments