Skip to content

Commit d634e51

Browse files
committed
SIMD: Resolve Highway QSort symbol linking error on aarch32/ASIMD
The fix in numpy/meson#12 for ASIMD*(32-bit) compile-time feature detection revealed a new build error on aarch32 platforms: ImportError: /numpy/build-install/usr/lib/python3/dist-packages/numpy/_core/ _multiarray_umath.cpython-310-arm-linux-gnueabihf.so: undefined symbol: _ZN2np7highway10qsort_simd11QSort_ASIMDIjEEvPT_i This patch prevents platform detection constants of Highway from being exposed across translation units with different compiler flags (baseline). This approach eliminates detection mismatches that were causing symbol resolution failures in the Highway QSort implementation.
1 parent 295e2d5 commit d634e51

File tree

4 files changed

+46
-49
lines changed

4 files changed

+46
-49
lines changed

numpy/_core/src/npysort/highway_qsort.dispatch.cpp

+21-16
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,27 @@
1+
#define VQSORT_ONLY_STATIC 1
2+
#include "hwy/highway.h"
3+
#include "hwy/contrib/sort/vqsort-inl.h"
4+
15
#include "highway_qsort.hpp"
6+
#include "quicksort.hpp"
27

8+
namespace np::highway::qsort_simd {
9+
template <typename T>
10+
void NPY_CPU_DISPATCH_CURFX(QSort)(T *arr, npy_intp size)
11+
{
312
#if VQSORT_ENABLED
13+
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
14+
#else
15+
sort::Quick(arr, size);
16+
#endif
17+
}
418

5-
#define DISPATCH_VQSORT(TYPE) \
6-
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(TYPE *arr, intptr_t size) \
7-
{ \
8-
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending()); \
9-
} \
10-
11-
namespace np { namespace highway { namespace qsort_simd {
12-
13-
DISPATCH_VQSORT(int32_t)
14-
DISPATCH_VQSORT(uint32_t)
15-
DISPATCH_VQSORT(int64_t)
16-
DISPATCH_VQSORT(uint64_t)
17-
DISPATCH_VQSORT(double)
18-
DISPATCH_VQSORT(float)
19+
template void NPY_CPU_DISPATCH_CURFX(QSort)<int32_t>(int32_t*, npy_intp);
20+
template void NPY_CPU_DISPATCH_CURFX(QSort)<uint32_t>(uint32_t*, npy_intp);
21+
template void NPY_CPU_DISPATCH_CURFX(QSort)<int64_t>(int64_t*, npy_intp);
22+
template void NPY_CPU_DISPATCH_CURFX(QSort)<uint64_t>(uint64_t*, npy_intp);
23+
template void NPY_CPU_DISPATCH_CURFX(QSort)<float>(float*, npy_intp);
24+
template void NPY_CPU_DISPATCH_CURFX(QSort)<double>(double*, npy_intp);
1925

20-
} } } // np::highway::qsort_simd
26+
} // np::highway::qsort_simd
2127

22-
#endif // VQSORT_ENABLED
+2-15
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,20 @@
11
#ifndef NUMPY_SRC_COMMON_NPYSORT_HWY_SIMD_QSORT_HPP
22
#define NUMPY_SRC_COMMON_NPYSORT_HWY_SIMD_QSORT_HPP
33

4-
#define VQSORT_ONLY_STATIC 1
5-
#include "hwy/highway.h"
6-
#include "hwy/contrib/sort/vqsort-inl.h"
7-
84
#include "common.hpp"
95

10-
#if !VQSORT_COMPILER_COMPATIBLE
11-
#define NPY_DISABLE_HIGHWAY_SORT
12-
#endif
13-
14-
#ifndef NPY_DISABLE_HIGHWAY_SORT
15-
namespace np { namespace highway { namespace qsort_simd {
6+
namespace np::highway::qsort_simd {
167

178
#ifndef NPY_DISABLE_OPTIMIZATION
189
#include "highway_qsort.dispatch.h"
1910
#endif
2011
NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSort, (T *arr, npy_intp size))
21-
NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSelect, (T* arr, npy_intp num, npy_intp kth))
22-
2312

2413
#ifndef NPY_DISABLE_OPTIMIZATION
2514
#include "highway_qsort_16bit.dispatch.h"
2615
#endif
2716
NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSort, (T *arr, npy_intp size))
28-
NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSelect, (T* arr, npy_intp num, npy_intp kth))
2917

30-
} } } // np::highway::qsort_simd
18+
} // np::highway::qsort_simd
3119

3220
#endif // NUMPY_SRC_COMMON_NPYSORT_HWY_SIMD_QSORT_HPP
33-
#endif // NPY_DISABLE_HIGHWAY_SORT
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,33 @@
1-
#include "highway_qsort.hpp"
1+
#define VQSORT_ONLY_STATIC 1
2+
#include "hwy/highway.h"
3+
#include "hwy/contrib/sort/vqsort-inl.h"
24

5+
#include "highway_qsort.hpp"
36
#include "quicksort.hpp"
47

5-
#if VQSORT_ENABLED
6-
7-
namespace np { namespace highway { namespace qsort_simd {
8-
9-
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, intptr_t size)
8+
namespace np::highway::qsort_simd {
9+
template <typename T>
10+
void NPY_CPU_DISPATCH_CURFX(QSort)(T *arr, npy_intp size)
1011
{
11-
#if HWY_HAVE_FLOAT16
12-
hwy::HWY_NAMESPACE::VQSortStatic(reinterpret_cast<hwy::float16_t*>(arr), size, hwy::SortAscending());
12+
#if VQSORT_ENABLED
13+
using THwy = std::conditional_t<std::is_same_v<T, Half>, hwy::float16_t, T>;
14+
hwy::HWY_NAMESPACE::VQSortStatic(reinterpret_cast<THwy*>(arr), size, hwy::SortAscending());
1315
#else
1416
sort::Quick(arr, size);
1517
#endif
1618
}
17-
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint16_t *arr, intptr_t size)
18-
{
19-
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
20-
}
21-
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int16_t *arr, intptr_t size)
19+
#if !HWY_HAVE_FLOAT16
20+
template <>
21+
void NPY_CPU_DISPATCH_CURFX(QSort)<Half>(Half *arr, npy_intp size)
2222
{
23-
hwy::HWY_NAMESPACE::VQSortStatic(arr, size, hwy::SortAscending());
23+
sort::Quick(arr, size);
2424
}
25+
#endif // !HWY_HAVE_FLOAT16
2526

26-
} } } // np::highway::qsort_simd
27+
template void NPY_CPU_DISPATCH_CURFX(QSort)<int16_t>(int16_t*, npy_intp);
28+
template void NPY_CPU_DISPATCH_CURFX(QSort)<uint16_t>(uint16_t*, npy_intp);
29+
#if HWY_HAVE_FLOAT16
30+
template void NPY_CPU_DISPATCH_CURFX(QSort)<Half>(Half*, npy_intp);
31+
#endif
2732

28-
#endif // VQSORT_ENABLED
33+
} // np::highway::qsort_simd

numpy/_core/src/npysort/quicksort.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ inline bool quicksort_dispatch(T *start, npy_intp num)
8484
#if defined(NPY_CPU_AMD64) || defined(NPY_CPU_X86) // x86 32-bit and 64-bit
8585
#include "x86_simd_qsort_16bit.dispatch.h"
8686
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, <TF>);
87-
#elif !defined(NPY_DISABLE_HIGHWAY_SORT)
87+
#else
8888
#include "highway_qsort_16bit.dispatch.h"
8989
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::highway::qsort_simd::template QSort, <TF>);
9090
#endif
@@ -95,7 +95,7 @@ inline bool quicksort_dispatch(T *start, npy_intp num)
9595
#if defined(NPY_CPU_AMD64) || defined(NPY_CPU_X86) // x86 32-bit and 64-bit
9696
#include "x86_simd_qsort.dispatch.h"
9797
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::qsort_simd::template QSort, <TF>);
98-
#elif !defined(NPY_DISABLE_HIGHWAY_SORT)
98+
#else
9999
#include "highway_qsort.dispatch.h"
100100
NPY_CPU_DISPATCH_CALL_XB(dispfunc = np::highway::qsort_simd::template QSort, <TF>);
101101
#endif

0 commit comments

Comments
 (0)