|
17 | 17 | namespace LIBC_NAMESPACE_DECL {
|
18 | 18 | namespace internal {
|
19 | 19 |
|
20 |
| -class ArrayGenericSize { |
21 |
| - cpp::byte *array_base; |
22 |
| - size_t array_len; |
23 |
| - size_t elem_size; |
24 |
| - |
25 |
| - LIBC_INLINE cpp::byte *get_internal(size_t i) const { |
26 |
| - return array_base + (i * elem_size); |
27 |
| - } |
28 |
| - |
29 |
| -public: |
30 |
| - LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e) |
31 |
| - : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s), |
32 |
| - elem_size(e) {} |
33 |
| - |
34 |
| - static constexpr bool has_fixed_size() { return false; } |
35 |
| - |
36 |
| - LIBC_INLINE void *get(size_t i) const { return get_internal(i); } |
37 |
| - |
38 |
| - LIBC_INLINE void swap(size_t i, size_t j) const { |
39 |
| - // It's possible to use 8 byte blocks with `uint64_t`, but that |
40 |
| - // generates more machine code as the remainder loop gets |
41 |
| - // unrolled, plus 4 byte operations are more likely to be |
42 |
| - // efficient on a wider variety of hardware. On x86 LLVM tends |
43 |
| - // to unroll the block loop again into 2 16 byte swaps per |
44 |
| - // iteration which is another reason that 4 byte blocks yields |
45 |
| - // good performance even for big types. |
46 |
| - using block_t = uint32_t; |
47 |
| - constexpr size_t BLOCK_SIZE = sizeof(block_t); |
48 |
| - |
49 |
| - alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE]; |
50 |
| - |
51 |
| - cpp::byte *elem_i = get_internal(i); |
52 |
| - cpp::byte *elem_j = get_internal(j); |
53 |
| - |
54 |
| - const size_t elem_size_rem = elem_size % BLOCK_SIZE; |
55 |
| - const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem); |
56 |
| - |
57 |
| - while (elem_i != elem_i_block_end) { |
58 |
| - __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE); |
59 |
| - __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE); |
60 |
| - __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE); |
61 |
| - |
62 |
| - elem_i += BLOCK_SIZE; |
63 |
| - elem_j += BLOCK_SIZE; |
64 |
| - } |
65 |
| - |
66 |
| - for (size_t n = 0; n < elem_size_rem; ++n) { |
67 |
| - cpp::byte tmp = elem_i[n]; |
68 |
| - elem_i[n] = elem_j[n]; |
69 |
| - elem_j[n] = tmp; |
| 20 | +using Compare = int(const void *, const void *); |
| 21 | +using CompareWithState = int(const void *, const void *, void *); |
| 22 | + |
| 23 | +enum class CompType { COMPARE, COMPARE_WITH_STATE }; |
| 24 | + |
| 25 | +struct Comparator { |
| 26 | + union { |
| 27 | + Compare *comp_func; |
| 28 | + CompareWithState *comp_func_r; |
| 29 | + }; |
| 30 | + const CompType comp_type; |
| 31 | + |
| 32 | + void *arg; |
| 33 | + |
| 34 | + Comparator(Compare *func) |
| 35 | + : comp_func(func), comp_type(CompType::COMPARE), arg(nullptr) {} |
| 36 | + |
| 37 | + Comparator(CompareWithState *func, void *arg_val) |
| 38 | + : comp_func_r(func), comp_type(CompType::COMPARE_WITH_STATE), |
| 39 | + arg(arg_val) {} |
| 40 | + |
| 41 | +#if defined(__clang__) |
| 42 | + // Recent upstream changes to -fsanitize=function find more instances of |
| 43 | + // function type mismatches. One case is with the comparator passed to this |
| 44 | + // class. Libraries will tend to pass comparators that take pointers to |
| 45 | + // varying types while this comparator expects to accept const void pointers. |
| 46 | + // Ideally those tools would pass a function that strictly accepts const |
| 47 | + // void*s to avoid UB, or would use qsort_r to pass their own comparator. |
| 48 | + [[clang::no_sanitize("function")]] |
| 49 | +#endif |
| 50 | + int comp_vals(const void *a, const void *b) const { |
| 51 | + if (comp_type == CompType::COMPARE) { |
| 52 | + return comp_func(a, b); |
| 53 | + } else { |
| 54 | + return comp_func_r(a, b, arg); |
70 | 55 | }
|
71 | 56 | }
|
72 |
| - |
73 |
| - LIBC_INLINE size_t len() const { return array_len; } |
74 |
| - |
75 |
| - // Make an Array starting at index |i| and length |s|. |
76 |
| - LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const { |
77 |
| - return ArrayGenericSize(get_internal(i), s, elem_size); |
78 |
| - } |
79 |
| - |
80 |
| - // Reset this Array to point at a different interval of the same |
81 |
| - // items starting at index |i|. |
82 |
| - LIBC_INLINE void reset_bounds(size_t i, size_t s) { |
83 |
| - array_base = get_internal(i); |
84 |
| - array_len = s; |
85 |
| - } |
86 | 57 | };
|
87 | 58 |
|
88 |
| -// Having a specialized Array type for sorting that knows at |
89 |
| -// compile-time what the size of the element is, allows for much more |
90 |
| -// efficient swapping and for cheaper offset calculations. |
91 |
| -template <size_t ELEM_SIZE> class ArrayFixedSize { |
92 |
| - cpp::byte *array_base; |
93 |
| - size_t array_len; |
94 |
| - |
95 |
| - LIBC_INLINE cpp::byte *get_internal(size_t i) const { |
96 |
| - return array_base + (i * ELEM_SIZE); |
97 |
| - } |
| 59 | +class Array { |
| 60 | + uint8_t *array; |
| 61 | + size_t array_size; |
| 62 | + size_t elem_size; |
| 63 | + Comparator compare; |
98 | 64 |
|
99 | 65 | public:
|
100 |
| - LIBC_INLINE ArrayFixedSize(void *a, size_t s) |
101 |
| - : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {} |
102 |
| - |
103 |
| - // Beware this function is used a heuristic for cheap to swap types, so |
104 |
| - // instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad |
105 |
| - // idea perf wise. |
106 |
| - static constexpr bool has_fixed_size() { return true; } |
107 |
| - |
108 |
| - LIBC_INLINE void *get(size_t i) const { return get_internal(i); } |
109 |
| - |
110 |
| - LIBC_INLINE void swap(size_t i, size_t j) const { |
111 |
| - alignas(32) cpp::byte tmp[ELEM_SIZE]; |
112 |
| - |
113 |
| - cpp::byte *elem_i = get_internal(i); |
114 |
| - cpp::byte *elem_j = get_internal(j); |
| 66 | + Array(uint8_t *a, size_t s, size_t e, Comparator c) |
| 67 | + : array(a), array_size(s), elem_size(e), compare(c) {} |
| 68 | + |
| 69 | + uint8_t *get(size_t i) const { return array + i * elem_size; } |
| 70 | + |
| 71 | + void swap(size_t i, size_t j) const { |
| 72 | + uint8_t *elem_i = get(i); |
| 73 | + uint8_t *elem_j = get(j); |
| 74 | + for (size_t b = 0; b < elem_size; ++b) { |
| 75 | + uint8_t temp = elem_i[b]; |
| 76 | + elem_i[b] = elem_j[b]; |
| 77 | + elem_j[b] = temp; |
| 78 | + } |
| 79 | + } |
115 | 80 |
|
116 |
| - __builtin_memcpy(tmp, elem_i, ELEM_SIZE); |
117 |
| - __builtin_memmove(elem_i, elem_j, ELEM_SIZE); |
118 |
| - __builtin_memcpy(elem_j, tmp, ELEM_SIZE); |
| 81 | + int elem_compare(size_t i, const uint8_t *other) const { |
| 82 | + // An element must compare equal to itself so we don't need to consult the |
| 83 | + // user provided comparator. |
| 84 | + if (get(i) == other) |
| 85 | + return 0; |
| 86 | + return compare.comp_vals(get(i), other); |
119 | 87 | }
|
120 | 88 |
|
121 |
| - LIBC_INLINE size_t len() const { return array_len; } |
| 89 | + size_t size() const { return array_size; } |
122 | 90 |
|
123 |
| - // Make an Array starting at index |i| and length |s|. |
124 |
| - LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const { |
125 |
| - return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s); |
| 91 | + // Make an Array starting at index |i| and size |s|. |
| 92 | + LIBC_INLINE Array make_array(size_t i, size_t s) const { |
| 93 | + return Array(get(i), s, elem_size, compare); |
126 | 94 | }
|
127 | 95 |
|
128 |
| - // Reset this Array to point at a different interval of the same |
129 |
| - // items starting at index |i|. |
130 |
| - LIBC_INLINE void reset_bounds(size_t i, size_t s) { |
131 |
| - array_base = get_internal(i); |
132 |
| - array_len = s; |
| 96 | + // Reset this Array to point at a different interval of the same items. |
| 97 | + LIBC_INLINE void reset_bounds(uint8_t *a, size_t s) { |
| 98 | + array = a; |
| 99 | + array_size = s; |
133 | 100 | }
|
134 | 101 | };
|
135 | 102 |
|
| 103 | +using SortingRoutine = void(const Array &); |
| 104 | + |
136 | 105 | } // namespace internal
|
137 | 106 | } // namespace LIBC_NAMESPACE_DECL
|
138 | 107 |
|
|
0 commit comments