@@ -19,10 +19,12 @@ use core::cmp::Ordering::{self, Less};
19
19
use core:: mem:: { self , SizedTypeProperties } ;
20
20
#[ cfg( not( no_global_oom_handling) ) ]
21
21
use core:: ptr;
22
+ #[ cfg( not( no_global_oom_handling) ) ]
23
+ use core:: slice:: sort;
22
24
23
25
use crate :: alloc:: Allocator ;
24
26
#[ cfg( not( no_global_oom_handling) ) ]
25
- use crate :: alloc:: Global ;
27
+ use crate :: alloc:: { self , Global } ;
26
28
#[ cfg( not( no_global_oom_handling) ) ]
27
29
use crate :: borrow:: ToOwned ;
28
30
use crate :: boxed:: Box ;
@@ -203,7 +205,7 @@ impl<T> [T] {
203
205
where
204
206
T : Ord ,
205
207
{
206
- merge_sort ( self , T :: lt) ;
208
+ stable_sort ( self , T :: lt) ;
207
209
}
208
210
209
211
/// Sorts the slice with a comparator function.
@@ -259,7 +261,7 @@ impl<T> [T] {
259
261
where
260
262
F : FnMut ( & T , & T ) -> Ordering ,
261
263
{
262
- merge_sort ( self , |a, b| compare ( a, b) == Less ) ;
264
+ stable_sort ( self , |a, b| compare ( a, b) == Less ) ;
263
265
}
264
266
265
267
/// Sorts the slice with a key extraction function.
@@ -302,7 +304,7 @@ impl<T> [T] {
302
304
F : FnMut ( & T ) -> K ,
303
305
K : Ord ,
304
306
{
305
- merge_sort ( self , |a, b| f ( a) . lt ( & f ( b) ) ) ;
307
+ stable_sort ( self , |a, b| f ( a) . lt ( & f ( b) ) ) ;
306
308
}
307
309
308
310
/// Sorts the slice with a key extraction function.
@@ -809,324 +811,52 @@ impl<T: Clone> ToOwned for [T] {
809
811
// Sorting
810
812
////////////////////////////////////////////////////////////////////////////////
811
813
812
- /// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
813
- ///
814
- /// This is the integral subroutine of insertion sort.
815
- #[ cfg( not( no_global_oom_handling) ) ]
816
- fn insert_head < T , F > ( v : & mut [ T ] , is_less : & mut F )
817
- where
818
- F : FnMut ( & T , & T ) -> bool ,
819
- {
820
- if v. len ( ) >= 2 && is_less ( & v[ 1 ] , & v[ 0 ] ) {
821
- unsafe {
822
- // There are three ways to implement insertion here:
823
- //
824
- // 1. Swap adjacent elements until the first one gets to its final destination.
825
- // However, this way we copy data around more than is necessary. If elements are big
826
- // structures (costly to copy), this method will be slow.
827
- //
828
- // 2. Iterate until the right place for the first element is found. Then shift the
829
- // elements succeeding it to make room for it and finally place it into the
830
- // remaining hole. This is a good method.
831
- //
832
- // 3. Copy the first element into a temporary variable. Iterate until the right place
833
- // for it is found. As we go along, copy every traversed element into the slot
834
- // preceding it. Finally, copy data from the temporary variable into the remaining
835
- // hole. This method is very good. Benchmarks demonstrated slightly better
836
- // performance than with the 2nd method.
837
- //
838
- // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
839
- let tmp = mem:: ManuallyDrop :: new ( ptr:: read ( & v[ 0 ] ) ) ;
840
-
841
- // Intermediate state of the insertion process is always tracked by `hole`, which
842
- // serves two purposes:
843
- // 1. Protects integrity of `v` from panics in `is_less`.
844
- // 2. Fills the remaining hole in `v` in the end.
845
- //
846
- // Panic safety:
847
- //
848
- // If `is_less` panics at any point during the process, `hole` will get dropped and
849
- // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
850
- // initially held exactly once.
851
- let mut hole = InsertionHole { src : & * tmp, dest : & mut v[ 1 ] } ;
852
- ptr:: copy_nonoverlapping ( & v[ 1 ] , & mut v[ 0 ] , 1 ) ;
853
-
854
- for i in 2 ..v. len ( ) {
855
- if !is_less ( & v[ i] , & * tmp) {
856
- break ;
857
- }
858
- ptr:: copy_nonoverlapping ( & v[ i] , & mut v[ i - 1 ] , 1 ) ;
859
- hole. dest = & mut v[ i] ;
860
- }
861
- // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
862
- }
863
- }
864
-
865
- // When dropped, copies from `src` into `dest`.
866
- struct InsertionHole < T > {
867
- src : * const T ,
868
- dest : * mut T ,
869
- }
870
-
871
- impl < T > Drop for InsertionHole < T > {
872
- fn drop ( & mut self ) {
873
- unsafe {
874
- ptr:: copy_nonoverlapping ( self . src , self . dest , 1 ) ;
875
- }
876
- }
877
- }
878
- }
879
-
880
- /// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
881
- /// stores the result into `v[..]`.
882
- ///
883
- /// # Safety
884
- ///
885
- /// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
886
- /// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
887
- #[ cfg( not( no_global_oom_handling) ) ]
888
- unsafe fn merge < T , F > ( v : & mut [ T ] , mid : usize , buf : * mut T , is_less : & mut F )
889
- where
890
- F : FnMut ( & T , & T ) -> bool ,
891
- {
892
- let len = v. len ( ) ;
893
- let v = v. as_mut_ptr ( ) ;
894
- let ( v_mid, v_end) = unsafe { ( v. add ( mid) , v. add ( len) ) } ;
895
-
896
- // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
897
- // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
898
- // copying the lesser (or greater) one into `v`.
899
- //
900
- // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
901
- // consumed first, then we must copy whatever is left of the shorter run into the remaining
902
- // hole in `v`.
903
- //
904
- // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
905
- // 1. Protects integrity of `v` from panics in `is_less`.
906
- // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
907
- //
908
- // Panic safety:
909
- //
910
- // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
911
- // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
912
- // object it initially held exactly once.
913
- let mut hole;
914
-
915
- if mid <= len - mid {
916
- // The left run is shorter.
917
- unsafe {
918
- ptr:: copy_nonoverlapping ( v, buf, mid) ;
919
- hole = MergeHole { start : buf, end : buf. add ( mid) , dest : v } ;
920
- }
921
-
922
- // Initially, these pointers point to the beginnings of their arrays.
923
- let left = & mut hole. start ;
924
- let mut right = v_mid;
925
- let out = & mut hole. dest ;
926
-
927
- while * left < hole. end && right < v_end {
928
- // Consume the lesser side.
929
- // If equal, prefer the left run to maintain stability.
930
- unsafe {
931
- let to_copy = if is_less ( & * right, & * * left) {
932
- get_and_increment ( & mut right)
933
- } else {
934
- get_and_increment ( left)
935
- } ;
936
- ptr:: copy_nonoverlapping ( to_copy, get_and_increment ( out) , 1 ) ;
937
- }
938
- }
939
- } else {
940
- // The right run is shorter.
941
- unsafe {
942
- ptr:: copy_nonoverlapping ( v_mid, buf, len - mid) ;
943
- hole = MergeHole { start : buf, end : buf. add ( len - mid) , dest : v_mid } ;
944
- }
945
-
946
- // Initially, these pointers point past the ends of their arrays.
947
- let left = & mut hole. dest ;
948
- let right = & mut hole. end ;
949
- let mut out = v_end;
950
-
951
- while v < * left && buf < * right {
952
- // Consume the greater side.
953
- // If equal, prefer the right run to maintain stability.
954
- unsafe {
955
- let to_copy = if is_less ( & * right. sub ( 1 ) , & * left. sub ( 1 ) ) {
956
- decrement_and_get ( left)
957
- } else {
958
- decrement_and_get ( right)
959
- } ;
960
- ptr:: copy_nonoverlapping ( to_copy, decrement_and_get ( & mut out) , 1 ) ;
961
- }
962
- }
963
- }
964
- // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
965
- // it will now be copied into the hole in `v`.
966
-
967
- unsafe fn get_and_increment < T > ( ptr : & mut * mut T ) -> * mut T {
968
- let old = * ptr;
969
- * ptr = unsafe { ptr. add ( 1 ) } ;
970
- old
971
- }
972
-
973
- unsafe fn decrement_and_get < T > ( ptr : & mut * mut T ) -> * mut T {
974
- * ptr = unsafe { ptr. sub ( 1 ) } ;
975
- * ptr
976
- }
977
-
978
- // When dropped, copies the range `start..end` into `dest..`.
979
- struct MergeHole < T > {
980
- start : * mut T ,
981
- end : * mut T ,
982
- dest : * mut T ,
983
- }
984
-
985
- impl < T > Drop for MergeHole < T > {
986
- fn drop ( & mut self ) {
987
- // `T` is not a zero-sized type, and these are pointers into a slice's elements.
988
- unsafe {
989
- let len = self . end . sub_ptr ( self . start ) ;
990
- ptr:: copy_nonoverlapping ( self . start , self . dest , len) ;
991
- }
992
- }
993
- }
994
- }
995
-
996
- /// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
997
- /// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt).
998
- ///
999
- /// The algorithm identifies strictly descending and non-descending subsequences, which are called
1000
- /// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
1001
- /// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
1002
- /// satisfied:
1003
- ///
1004
- /// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
1005
- /// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
1006
- ///
1007
- /// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case.
814
+ #[ inline]
1008
815
#[ cfg( not( no_global_oom_handling) ) ]
1009
- fn merge_sort < T , F > ( v : & mut [ T ] , mut is_less : F )
816
+ fn stable_sort < T , F > ( v : & mut [ T ] , mut is_less : F )
1010
817
where
1011
818
F : FnMut ( & T , & T ) -> bool ,
1012
819
{
1013
- // Slices of up to this length get sorted using insertion sort.
1014
- const MAX_INSERTION : usize = 20 ;
1015
- // Very short runs are extended using insertion sort to span at least this many elements.
1016
- const MIN_RUN : usize = 10 ;
1017
-
1018
- // Sorting has no meaningful behavior on zero-sized types.
1019
820
if T :: IS_ZST {
821
+ // Sorting has no meaningful behavior on zero-sized types. Do nothing.
1020
822
return ;
1021
823
}
1022
824
1023
- let len = v. len ( ) ;
1024
-
1025
- // Short arrays get sorted in-place via insertion sort to avoid allocations.
1026
- if len <= MAX_INSERTION {
1027
- if len >= 2 {
1028
- for i in ( 0 ..len - 1 ) . rev ( ) {
1029
- insert_head ( & mut v[ i..] , & mut is_less) ;
1030
- }
1031
- }
1032
- return ;
1033
- }
1034
-
1035
- // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
1036
- // shallow copies of the contents of `v` without risking the dtors running on copies if
1037
- // `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
1038
- // which will always have length at most `len / 2`.
1039
- let mut buf = Vec :: with_capacity ( len / 2 ) ;
825
+ let elem_alloc_fn = |len : usize | -> * mut T {
826
+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
827
+ // v.len(). Alloc in general will only be used as 'shadow-region' to store temporary swap
828
+ // elements.
829
+ unsafe { alloc:: alloc ( alloc:: Layout :: array :: < T > ( len) . unwrap_unchecked ( ) ) as * mut T }
830
+ } ;
1040
831
1041
- // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
1042
- // strange decision, but consider the fact that merges more often go in the opposite direction
1043
- // (forwards). According to benchmarks, merging forwards is slightly faster than merging
1044
- // backwards. To conclude, identifying runs by traversing backwards improves performance.
1045
- let mut runs = vec ! [ ] ;
1046
- let mut end = len;
1047
- while end > 0 {
1048
- // Find the next natural run, and reverse it if it's strictly descending.
1049
- let mut start = end - 1 ;
1050
- if start > 0 {
1051
- start -= 1 ;
1052
- unsafe {
1053
- if is_less ( v. get_unchecked ( start + 1 ) , v. get_unchecked ( start) ) {
1054
- while start > 0 && is_less ( v. get_unchecked ( start) , v. get_unchecked ( start - 1 ) ) {
1055
- start -= 1 ;
1056
- }
1057
- v[ start..end] . reverse ( ) ;
1058
- } else {
1059
- while start > 0 && !is_less ( v. get_unchecked ( start) , v. get_unchecked ( start - 1 ) )
1060
- {
1061
- start -= 1 ;
1062
- }
1063
- }
1064
- }
1065
- }
1066
-
1067
- // Insert some more elements into the run if it's too short. Insertion sort is faster than
1068
- // merge sort on short sequences, so this significantly improves performance.
1069
- while start > 0 && end - start < MIN_RUN {
1070
- start -= 1 ;
1071
- insert_head ( & mut v[ start..end] , & mut is_less) ;
832
+ let elem_dealloc_fn = |buf_ptr : * mut T , len : usize | {
833
+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
834
+ // v.len(). The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
835
+ // len.
836
+ unsafe {
837
+ alloc:: dealloc ( buf_ptr as * mut u8 , alloc:: Layout :: array :: < T > ( len) . unwrap_unchecked ( ) ) ;
1072
838
}
839
+ } ;
1073
840
1074
- // Push this run onto the stack.
1075
- runs. push ( Run { start, len : end - start } ) ;
1076
- end = start;
1077
-
1078
- // Merge some pairs of adjacent runs to satisfy the invariants.
1079
- while let Some ( r) = collapse ( & runs) {
1080
- let left = runs[ r + 1 ] ;
1081
- let right = runs[ r] ;
1082
- unsafe {
1083
- merge (
1084
- & mut v[ left. start ..right. start + right. len ] ,
1085
- left. len ,
1086
- buf. as_mut_ptr ( ) ,
1087
- & mut is_less,
1088
- ) ;
1089
- }
1090
- runs[ r] = Run { start : left. start , len : left. len + right. len } ;
1091
- runs. remove ( r + 1 ) ;
841
+ let run_alloc_fn = |len : usize | -> * mut sort:: TimSortRun {
842
+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with an
843
+ // obscene length or 0.
844
+ unsafe {
845
+ alloc:: alloc ( alloc:: Layout :: array :: < sort:: TimSortRun > ( len) . unwrap_unchecked ( ) )
846
+ as * mut sort:: TimSortRun
1092
847
}
1093
- }
1094
-
1095
- // Finally, exactly one run must remain in the stack.
1096
- debug_assert ! ( runs. len( ) == 1 && runs[ 0 ] . start == 0 && runs[ 0 ] . len == len) ;
848
+ } ;
1097
849
1098
- // Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
1099
- // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
1100
- // algorithm should continue building a new run instead, `None` is returned.
1101
- //
1102
- // TimSort is infamous for its buggy implementations, as described here:
1103
- // http://envisage-project.eu/timsort-specification-and-verification/
1104
- //
1105
- // The gist of the story is: we must enforce the invariants on the top four runs on the stack.
1106
- // Enforcing them on just top three is not sufficient to ensure that the invariants will still
1107
- // hold for *all* runs in the stack.
1108
- //
1109
- // This function correctly checks invariants for the top four runs. Additionally, if the top
1110
- // run starts at index 0, it will always demand a merge operation until the stack is fully
1111
- // collapsed, in order to complete the sort.
1112
- #[ inline]
1113
- fn collapse ( runs : & [ Run ] ) -> Option < usize > {
1114
- let n = runs. len ( ) ;
1115
- if n >= 2
1116
- && ( runs[ n - 1 ] . start == 0
1117
- || runs[ n - 2 ] . len <= runs[ n - 1 ] . len
1118
- || ( n >= 3 && runs[ n - 3 ] . len <= runs[ n - 2 ] . len + runs[ n - 1 ] . len )
1119
- || ( n >= 4 && runs[ n - 4 ] . len <= runs[ n - 3 ] . len + runs[ n - 2 ] . len ) )
1120
- {
1121
- if n >= 3 && runs[ n - 3 ] . len < runs[ n - 1 ] . len { Some ( n - 3 ) } else { Some ( n - 2 ) }
1122
- } else {
1123
- None
850
+ let run_dealloc_fn = |buf_ptr : * mut sort:: TimSortRun , len : usize | {
851
+ // SAFETY: The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
852
+ // len.
853
+ unsafe {
854
+ alloc:: dealloc (
855
+ buf_ptr as * mut u8 ,
856
+ alloc:: Layout :: array :: < sort:: TimSortRun > ( len) . unwrap_unchecked ( ) ,
857
+ ) ;
1124
858
}
1125
- }
859
+ } ;
1126
860
1127
- #[ derive( Clone , Copy ) ]
1128
- struct Run {
1129
- start : usize ,
1130
- len : usize ,
1131
- }
861
+ sort:: merge_sort ( v, & mut is_less, elem_alloc_fn, elem_dealloc_fn, run_alloc_fn, run_dealloc_fn) ;
1132
862
}
0 commit comments