@@ -398,6 +398,7 @@ use crate::cmp::Ordering;
398
398
use crate :: intrinsics:: const_eval_select;
399
399
use crate :: marker:: FnPtr ;
400
400
use crate :: mem:: { self , MaybeUninit , SizedTypeProperties } ;
401
+ use crate :: num:: NonZero ;
401
402
use crate :: { fmt, hash, intrinsics, ub_checks} ;
402
403
403
404
mod alignment;
@@ -1094,49 +1095,26 @@ pub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize) {
1094
1095
// are pointers inside `T` we will copy them in one go rather than trying to copy a part
1095
1096
// of a pointer (which would not work).
1096
1097
// SAFETY: Same preconditions as this function
1097
- unsafe { swap_nonoverlapping_simple_untyped ( x, y, count) }
1098
+ unsafe { swap_nonoverlapping_const ( x, y, count) }
1098
1099
} else {
1099
- macro_rules! attempt_swap_as_chunks {
1100
- ( $ChunkTy : ty) => {
1101
- if mem:: align_of:: <T >( ) >= mem:: align_of:: <$ChunkTy >( )
1102
- && mem:: size_of:: <T >( ) % mem:: size_of:: <$ChunkTy >( ) == 0
1103
- {
1104
- let x: * mut $ChunkTy = x. cast( ) ;
1105
- let y: * mut $ChunkTy = y. cast( ) ;
1106
- let count = count * ( mem:: size_of:: <T >( ) / mem:: size_of:: <$ChunkTy >( ) ) ;
1107
- // SAFETY: these are the same bytes that the caller promised were
1108
- // ok, just typed as `MaybeUninit<ChunkTy>`s instead of as `T`s.
1109
- // The `if` condition above ensures that we're not violating
1110
- // alignment requirements, and that the division is exact so
1111
- // that we don't lose any bytes off the end.
1112
- return unsafe { swap_nonoverlapping_simple_untyped( x, y, count) } ;
1113
- }
1114
- } ;
1100
+ // Going though a slice here helps codegen know the size fits in `isize`
1101
+ let slice = slice_from_raw_parts_mut( x, count) ;
1102
+ // SAFETY: We have two non-overlapping ranges in memory and they're both
1103
+ // readable so don't include the null address, thus they're at most
1104
+ // `(usize::MAX - 1)/2 = isize::MAX` bytes long.
1105
+ let bytes = unsafe { mem:: size_of_val_raw:: <[ T ] >( slice) } ;
1106
+ if let Some ( bytes) = NonZero :: new( bytes) {
1107
+ // SAFETY: These are the same ranges, just expressed in a different
1108
+ // type, so they're still non-overlapping.
1109
+ unsafe { swap_nonoverlapping_bytes( x. cast( ) , y. cast( ) , bytes) } ;
1115
1110
}
1116
-
1117
- // Split up the slice into small power-of-two-sized chunks that LLVM is able
1118
- // to vectorize (unless it's a special type with more-than-pointer alignment,
1119
- // because we don't want to pessimize things like slices of SIMD vectors.)
1120
- if mem:: align_of:: <T >( ) <= mem:: size_of:: <usize >( )
1121
- && ( !mem:: size_of:: <T >( ) . is_power_of_two( )
1122
- || mem:: size_of:: <T >( ) > mem:: size_of:: <usize >( ) * 2 )
1123
- {
1124
- attempt_swap_as_chunks!( usize ) ;
1125
- attempt_swap_as_chunks!( u8 ) ;
1126
- }
1127
-
1128
- // SAFETY: Same preconditions as this function
1129
- unsafe { swap_nonoverlapping_simple_untyped( x, y, count) }
1130
1111
}
1131
1112
)
1132
1113
}
1133
1114
1134
1115
/// Same behavior and safety conditions as [`swap_nonoverlapping`]
1135
- ///
1136
- /// LLVM can vectorize this (at least it can for the power-of-two-sized types
1137
- /// `swap_nonoverlapping` tries to use) so no need to manually SIMD it.
1138
1116
#[ inline]
1139
- const unsafe fn swap_nonoverlapping_simple_untyped < T > ( x : * mut T , y : * mut T , count : usize ) {
1117
+ const unsafe fn swap_nonoverlapping_const < T > ( x : * mut T , y : * mut T , count : usize ) {
1140
1118
let x = x. cast :: < MaybeUninit < T > > ( ) ;
1141
1119
let y = y. cast :: < MaybeUninit < T > > ( ) ;
1142
1120
let mut i = 0 ;
@@ -1147,13 +1125,6 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
1147
1125
// and it's distinct from `x` since the ranges are non-overlapping
1148
1126
let y = unsafe { y. add ( i) } ;
1149
1127
1150
- // If we end up here, it's because we're using a simple type -- like
1151
- // a small power-of-two-sized thing -- or a special type with particularly
1152
- // large alignment, particularly SIMD types.
1153
- // Thus, we're fine just reading-and-writing it, as either it's small
1154
- // and that works well anyway or it's special and the type's author
1155
- // presumably wanted things to be done in the larger chunk.
1156
-
1157
1128
// SAFETY: we're only ever given pointers that are valid to read/write,
1158
1129
// including being aligned, and nothing here panics so it's drop-safe.
1159
1130
unsafe {
@@ -1167,6 +1138,72 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
1167
1138
}
1168
1139
}
1169
1140
1141
+ // Don't let MIR inline this, because we really want it to keep its noalias metadata
1142
+ #[ rustc_no_mir_inline]
1143
+ #[ inline]
1144
+ fn swap_chunk < const N : usize > ( x : & mut MaybeUninit < [ u8 ; N ] > , y : & mut MaybeUninit < [ u8 ; N ] > ) {
1145
+ let a = * x;
1146
+ let b = * y;
1147
+ * x = b;
1148
+ * y = a;
1149
+ }
1150
+
1151
+ #[ inline]
1152
+ unsafe fn swap_nonoverlapping_bytes ( x : * mut u8 , y : * mut u8 , bytes : NonZero < usize > ) {
1153
+ // Same as `swap_nonoverlapping::<[u8; N]>`.
1154
+ #[ inline]
1155
+ unsafe fn swap_nonoverlapping_chunks < const N : usize > (
1156
+ x : * mut MaybeUninit < [ u8 ; N ] > ,
1157
+ y : * mut MaybeUninit < [ u8 ; N ] > ,
1158
+ chunks : NonZero < usize > ,
1159
+ ) {
1160
+ let chunks = chunks. get ( ) ;
1161
+ for i in 0 ..chunks {
1162
+ // SAFETY: i is in [0, chunks) so the adds and dereferences are in-bounds.
1163
+ unsafe { swap_chunk ( & mut * x. add ( i) , & mut * y. add ( i) ) } ;
1164
+ }
1165
+ }
1166
+
1167
+ // Same as `swap_nonoverlapping_bytes`, but accepts at most 1+2+4=7 bytes
1168
+ #[ inline]
1169
+ unsafe fn swap_nonoverlapping_short ( x : * mut u8 , y : * mut u8 , bytes : NonZero < usize > ) {
1170
+ let bytes = bytes. get ( ) ;
1171
+ let mut i = 0 ;
1172
+ macro_rules! swap_prefix {
1173
+ ( $( $n: literal) +) => { $(
1174
+ if ( bytes & $n) != 0 {
1175
+ // SAFETY: `i` can only have the same bits set as those in bytes,
1176
+ // so these `add`s are in-bounds of `bytes`. But the bit for
1177
+ // `$n` hasn't been set yet, so the `$n` bytes that `swap_chunk`
1178
+ // will read and write are within the usable range.
1179
+ unsafe { swap_chunk:: <$n>( & mut * x. add( i) . cast( ) , & mut * y. add( i) . cast( ) ) } ;
1180
+ i |= $n;
1181
+ }
1182
+ ) +} ;
1183
+ }
1184
+ swap_prefix ! ( 4 2 1 ) ;
1185
+ debug_assert_eq ! ( i, bytes) ;
1186
+ }
1187
+
1188
+ const CHUNK_SIZE : usize = size_of :: < * const ( ) > ( ) ;
1189
+ let bytes = bytes. get ( ) ;
1190
+
1191
+ let chunks = bytes / CHUNK_SIZE ;
1192
+ let tail = bytes % CHUNK_SIZE ;
1193
+ if let Some ( chunks) = NonZero :: new ( chunks) {
1194
+ // SAFETY: this is bytes/CHUNK_SIZE*CHUNK_SIZE bytes, which is <= bytes,
1195
+ // so it's within the range of our non-overlapping bytes.
1196
+ unsafe { swap_nonoverlapping_chunks :: < CHUNK_SIZE > ( x. cast ( ) , y. cast ( ) , chunks) } ;
1197
+ }
1198
+ if let Some ( tail) = NonZero :: new ( tail) {
1199
+ const { assert ! ( CHUNK_SIZE <= 8 ) } ;
1200
+ let delta = chunks * CHUNK_SIZE ;
1201
+ // SAFETY: the tail length is below CHUNK SIZE because of the remainder,
1202
+ // and CHUNK_SIZE is at most 8 by the const assert, so tail <= 7
1203
+ unsafe { swap_nonoverlapping_short ( x. add ( delta) , y. add ( delta) , tail) } ;
1204
+ }
1205
+ }
1206
+
1170
1207
/// Moves `src` into the pointed `dst`, returning the previous `dst` value.
1171
1208
///
1172
1209
/// Neither value is dropped.
0 commit comments