Skip to content

Commit 4c8569c

Browse files
authored
[SYCL] Fix handling of unsupported alignment by aligned_alloc_xxx<T> (#12569)
Implementation is supposed to return `nullptr` when requested alignment is not supported. Since our runtime performs all allocations through Unified Runtime that means for us that any alignment which is not a power of two is unsupported. Note that the resulting alignment may not be the same as requested one (per the SYCL 2020 specification) and therefore we can't just rely on return value of underlying non-templated version of the alloc function and have to perform the check explicitly. There is an issue with some backends not properly returning error on an unsupported alignment, it wis reported in #11642 --------- Signed-off-by: Hu, Peisen <[email protected]>
1 parent b1d81d7 commit 4c8569c

File tree

2 files changed

+133
-0
lines changed

2 files changed

+133
-0
lines changed

sycl/include/sycl/usm.hpp

+31
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,13 @@ __SYCL_EXPORT void *aligned_alloc(
154154
const property_list &propList,
155155
const detail::code_location &CodeLoc = detail::code_location::current());
156156

157+
///
158+
// Helper function used to determine if the Alignment argument is a power of 2
159+
///
160+
inline size_t is_not_power_of_two(size_t Alignment) {
161+
return (Alignment & (Alignment - 1));
162+
}
163+
157164
///
158165
// Template forms
159166
///
@@ -179,6 +186,9 @@ T *aligned_alloc_device(
179186
size_t Alignment, size_t Count, const device &Dev, const context &Ctxt,
180187
const property_list &PropList = {},
181188
const detail::code_location &CodeLoc = detail::code_location::current()) {
189+
if (is_not_power_of_two(Alignment)) {
190+
return nullptr;
191+
}
182192
return static_cast<T *>(aligned_alloc_device(max(Alignment, alignof(T)),
183193
Count * sizeof(T), Dev, Ctxt,
184194
PropList, CodeLoc));
@@ -189,6 +199,9 @@ T *aligned_alloc_device(
189199
size_t Alignment, size_t Count, const queue &Q,
190200
const property_list &PropList = {},
191201
const detail::code_location &CodeLoc = detail::code_location::current()) {
202+
if (is_not_power_of_two(Alignment)) {
203+
return nullptr;
204+
}
192205
return aligned_alloc_device<T>(Alignment, Count, Q.get_device(),
193206
Q.get_context(), PropList, CodeLoc);
194207
}
@@ -230,6 +243,9 @@ T *aligned_alloc_host(
230243
size_t Alignment, size_t Count, const context &Ctxt,
231244
const property_list &PropList = {},
232245
const detail::code_location &CodeLoc = detail::code_location::current()) {
246+
if (is_not_power_of_two(Alignment)) {
247+
return nullptr;
248+
}
233249
return static_cast<T *>(aligned_alloc_host(std ::max(Alignment, alignof(T)),
234250
Count * sizeof(T), Ctxt, PropList,
235251
CodeLoc));
@@ -240,6 +256,9 @@ T *aligned_alloc_host(
240256
size_t Alignment, size_t Count, const queue &Q,
241257
const property_list &PropList = {},
242258
const detail::code_location &CodeLoc = detail::code_location::current()) {
259+
if (is_not_power_of_two(Alignment)) {
260+
return nullptr;
261+
}
243262
return aligned_alloc_host<T>(Alignment, Count, Q.get_context(), PropList,
244263
CodeLoc);
245264
}
@@ -249,6 +268,9 @@ T *aligned_alloc_shared(
249268
size_t Alignment, size_t Count, const device &Dev, const context &Ctxt,
250269
const property_list &PropList = {},
251270
const detail::code_location &CodeLoc = detail::code_location::current()) {
271+
if (is_not_power_of_two(Alignment)) {
272+
return nullptr;
273+
}
252274
return static_cast<T *>(aligned_alloc_shared(max(Alignment, alignof(T)),
253275
Count * sizeof(T), Dev, Ctxt,
254276
PropList, CodeLoc));
@@ -259,6 +281,9 @@ T *aligned_alloc_shared(
259281
size_t Alignment, size_t Count, const queue &Q,
260282
const property_list &PropList = {},
261283
const detail::code_location &CodeLoc = detail::code_location::current()) {
284+
if (is_not_power_of_two(Alignment)) {
285+
return nullptr;
286+
}
262287
return aligned_alloc_shared<T>(Alignment, Count, Q.get_device(),
263288
Q.get_context(), PropList, CodeLoc);
264289
}
@@ -286,6 +311,9 @@ T *aligned_alloc(
286311
size_t Alignment, size_t Count, const device &Dev, const context &Ctxt,
287312
usm::alloc Kind, const property_list &PropList = {},
288313
const detail::code_location &CodeLoc = detail::code_location::current()) {
314+
if (is_not_power_of_two(Alignment)) {
315+
return nullptr;
316+
}
289317
return static_cast<T *>(aligned_alloc(max(Alignment, alignof(T)),
290318
Count * sizeof(T), Dev, Ctxt, Kind,
291319
PropList, CodeLoc));
@@ -296,6 +324,9 @@ T *aligned_alloc(
296324
size_t Alignment, size_t Count, const queue &Q, usm::alloc Kind,
297325
const property_list &PropList = {},
298326
const detail::code_location &CodeLoc = detail::code_location::current()) {
327+
if (is_not_power_of_two(Alignment)) {
328+
return nullptr;
329+
}
299330
return aligned_alloc<T>(Alignment, Count, Q.get_device(), Q.get_context(),
300331
Kind, PropList, CodeLoc);
301332
}

sycl/test-e2e/USM/align.cpp

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
// UNSUPPORTED: gpu
5+
6+
// E2E tests for annotated USM allocation functions with alignment arguments
7+
// that are not powers of 2. Note this test does not work on gpu because some
8+
// tests expect non-templated aligned_alloc_xxx functions to return nullptr,
9+
// e.g. when the alignment argument is not a power of 2, while they fail to do
10+
// so when run on gpu. This maybe because the gpu runtime has different
11+
// behavior. Therefore, GPU is unsupported until issue #12638 gets resolved.
12+
13+
#include <sycl/sycl.hpp>
14+
15+
#include <complex>
16+
#include <numeric>
17+
18+
using namespace sycl;
19+
using namespace ext::oneapi::experimental;
20+
using namespace ext::intel::experimental;
21+
using alloc = usm::alloc;
22+
23+
template <typename T> void testAlign(sycl::queue &q, unsigned align) {
24+
const sycl::context &Ctx = q.get_context();
25+
auto dev = q.get_device();
26+
27+
constexpr int N = 10;
28+
assert(align > 0 || (align & (align - 1)) == 0);
29+
30+
auto ADevice = [&](size_t align, auto... args) {
31+
return aligned_alloc_device(align, N, args...);
32+
};
33+
auto AHost = [&](size_t align, auto... args) {
34+
return aligned_alloc_host(align, N, args...);
35+
};
36+
auto AShared = [&](size_t align, auto... args) {
37+
return aligned_alloc_shared(align, N, args...);
38+
};
39+
auto AAnnotated = [&](size_t align, auto... args) {
40+
return aligned_alloc(align, N, args...);
41+
};
42+
43+
auto ATDevice = [&](size_t align, auto... args) {
44+
return aligned_alloc_device<T>(align, N, args...);
45+
};
46+
auto ATHost = [&](size_t align, auto... args) {
47+
return aligned_alloc_host<T>(align, N, args...);
48+
};
49+
auto ATShared = [&](size_t align, auto... args) {
50+
return aligned_alloc_shared<T>(align, N, args...);
51+
};
52+
auto ATAnnotated = [&](size_t align, auto... args) {
53+
return aligned_alloc<T>(align, N, args...);
54+
};
55+
56+
// Test cases that are expected to return null
57+
auto check_null = [&q](auto AllocFn, int Line, int Case) {
58+
decltype(AllocFn()) Ptr = AllocFn();
59+
if (Ptr != nullptr) {
60+
free(Ptr, q);
61+
std::cout << "Failed at line " << Line << ", case " << Case << std::endl;
62+
assert(false && "The return is not null!");
63+
}
64+
};
65+
66+
auto CheckNullAll = [&](auto Funcs, int Line = __builtin_LINE()) {
67+
std::apply(
68+
[&](auto... Fs) {
69+
int Case = 0;
70+
(void)std::initializer_list<int>{
71+
(check_null(Fs, Line, Case++), 0)...};
72+
},
73+
Funcs);
74+
};
75+
76+
CheckNullAll(std::tuple{
77+
// Case: aligned_alloc_xxx with no alignment property, and the alignment
78+
// argument is not a power of 2, the result is nullptr
79+
[&]() { return ADevice(3, q); }, [&]() { return ADevice(5, dev, Ctx); },
80+
[&]() { return AHost(7, q); }, [&]() { return AHost(9, Ctx); },
81+
[&]() { return AShared(114, q); },
82+
[&]() { return AShared(1023, dev, Ctx); },
83+
[&]() { return AAnnotated(15, q, alloc::device); },
84+
[&]() { return AAnnotated(17, dev, Ctx, alloc::host); }
85+
// Case: aligned_alloc_xxx<T> with no alignment property, and the
86+
// alignment argument is not a power of 2, the result is nullptr
87+
,
88+
[&]() { return ATDevice(3, q); }, [&]() { return ATDevice(5, dev, Ctx); },
89+
[&]() { return ATHost(7, q); }, [&]() { return ATHost(9, Ctx); },
90+
[&]() { return ATShared(1919, q); },
91+
[&]() { return ATShared(11, dev, Ctx); },
92+
[&]() { return ATAnnotated(15, q, alloc::device); },
93+
[&]() { return ATAnnotated(17, dev, Ctx, alloc::host); }});
94+
}
95+
96+
int main() {
97+
sycl::queue q;
98+
testAlign<char>(q, 4);
99+
testAlign<int>(q, 128);
100+
testAlign<std::complex<double>>(q, 4);
101+
return 0;
102+
}

0 commit comments

Comments
 (0)