Skip to content

Commit 8e05119

Browse files
fthalerhavogt
authored andcommitted
Introduce ldg_ptr to Enable __ldg in Data Stores and simple_ptr_holder (#1802)
Enables LDG loads for data stores with const data type and for neighbor tables. Shows performance improvements of up to 12% using NVCC and SID composites or dim2tuple on smaller domains. Using Clang, up to 70% performance improvements can be observed in these cases.
1 parent b5fceda commit 8e05119

File tree

18 files changed

+87518
-87260
lines changed

18 files changed

+87518
-87260
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* GridTools
3+
*
4+
* Copyright (c) 2014-2023, ETH Zurich
5+
* All rights reserved.
6+
*
7+
* Please, refer to the LICENSE file in the root directory.
8+
* SPDX-License-Identifier: BSD-3-Clause
9+
*/
10+
#pragma once
11+
12+
#include <cstddef>
13+
#include <type_traits>
14+
#include <utility>
15+
16+
#include "defs.hpp"
17+
#include "host_device.hpp"
18+
19+
#ifdef GT_CUDACC
20+
#include "cuda_type_traits.hpp"
21+
#endif
22+
23+
namespace gridtools {
24+
25+
#ifdef GT_CUDACC
26+
namespace impl_ {
27+
28+
template <class T>
29+
struct ldg_ptr {
30+
T const *m_ptr;
31+
32+
static_assert(is_texture_type<T>::value);
33+
34+
GT_FUNCTION constexpr T operator*() const {
35+
#ifdef GT_CUDA_ARCH
36+
return __ldg(m_ptr);
37+
#else
38+
return *m_ptr;
39+
#endif
40+
}
41+
42+
GT_FUNCTION constexpr ldg_ptr &operator+=(std::ptrdiff_t diff) {
43+
m_ptr += diff;
44+
return *this;
45+
}
46+
47+
GT_FUNCTION constexpr ldg_ptr &operator-=(std::ptrdiff_t diff) {
48+
m_ptr -= diff;
49+
return *this;
50+
}
51+
52+
friend GT_FUNCTION constexpr bool operator==(ldg_ptr const &a, ldg_ptr const &b) {
53+
return a.m_ptr == b.m_ptr;
54+
}
55+
friend GT_FUNCTION constexpr bool operator==(ldg_ptr const &a, T const *b) { return a.m_ptr == b; }
56+
friend GT_FUNCTION constexpr bool operator==(T const *a, ldg_ptr const &b) { return a == b.m_ptr; }
57+
58+
friend GT_FUNCTION constexpr bool operator!=(ldg_ptr const &a, ldg_ptr const &b) {
59+
return a.m_ptr != b.m_ptr;
60+
}
61+
friend GT_FUNCTION constexpr bool operator!=(ldg_ptr const &a, T const *b) { return a.m_ptr != b; }
62+
friend GT_FUNCTION constexpr bool operator!=(T const *a, ldg_ptr const &b) { return a != b.m_ptr; }
63+
64+
friend GT_FUNCTION constexpr ldg_ptr &operator++(ldg_ptr &ptr) {
65+
++ptr.m_ptr;
66+
return ptr;
67+
}
68+
69+
friend GT_FUNCTION constexpr ldg_ptr &operator--(ldg_ptr &ptr) {
70+
--ptr.m_ptr;
71+
return ptr;
72+
}
73+
74+
friend GT_FUNCTION constexpr ldg_ptr operator++(ldg_ptr &ptr, int) {
75+
ldg_ptr p = ptr;
76+
++ptr.m_ptr;
77+
return p;
78+
}
79+
80+
friend GT_FUNCTION constexpr ldg_ptr operator--(ldg_ptr &ptr, int) {
81+
ldg_ptr p = ptr;
82+
--ptr.m_ptr;
83+
return p;
84+
}
85+
86+
friend GT_FUNCTION constexpr ldg_ptr operator+(ldg_ptr const &ptr, std::ptrdiff_t diff) {
87+
return {ptr.m_ptr + diff};
88+
}
89+
90+
friend GT_FUNCTION constexpr ldg_ptr operator-(ldg_ptr const &ptr, std::ptrdiff_t diff) {
91+
return {ptr.m_ptr - diff};
92+
}
93+
94+
friend GT_FUNCTION constexpr std::ptrdiff_t operator-(ldg_ptr const &ptr, ldg_ptr const &other) {
95+
return ptr.m_ptr - other.m_ptr;
96+
}
97+
};
98+
} // namespace impl_
99+
100+
template <class T>
101+
GT_FUNCTION constexpr std::enable_if_t<is_texture_type<T>::value, impl_::ldg_ptr<T>> as_ldg_ptr(T const *ptr) {
102+
return {ptr};
103+
}
104+
105+
#endif
106+
107+
template <class T>
108+
GT_FUNCTION constexpr T &&as_ldg_ptr(T &&value) {
109+
return std::forward<T>(value);
110+
}
111+
112+
} // namespace gridtools

include/gridtools/fn/cartesian.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include <functional>
1313

14+
#include "../common/ldg_ptr.hpp"
1415
#include "../common/tuple_util.hpp"
1516
#include "../sid/concept.hpp"
1617
#include "./common_interface.hpp"
@@ -44,7 +45,7 @@ namespace gridtools::fn {
4445

4546
template <class Tag, class Ptr, class Strides>
4647
GT_FUNCTION auto deref(iterator<Tag, Ptr, Strides> const &it) {
47-
return *it.m_ptr;
48+
return *as_ldg_ptr(it.m_ptr);
4849
}
4950

5051
template <class Tag, class Ptr, class Strides, class Dim, class Offset, class... Offsets>

include/gridtools/fn/neighbor_table.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include <type_traits>
1313

14+
#include "../common/ldg_ptr.hpp"
1415
#include "../common/tuple_util.hpp"
1516
#include "../meta/logical.hpp"
1617

@@ -56,7 +57,7 @@ namespace gridtools::fn::neighbor_table {
5657

5758
template <class T, std::enable_if_t<is_neighbor_list<T>::value, int> = 0>
5859
GT_FUNCTION T const &neighbor_table_neighbors(T const *table, int index) {
59-
return table[index];
60+
return *as_ldg_ptr(&table[index]);
6061
}
6162

6263
template <class NeighborTable>

include/gridtools/fn/sid_neighbor_table.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <type_traits>
1414

1515
#include "../common/array.hpp"
16+
#include "../common/ldg_ptr.hpp"
1617
#include "../fn/unstructured.hpp"
1718
#include "../sid/concept.hpp"
1819

@@ -46,7 +47,7 @@ namespace gridtools::fn::sid_neighbor_table {
4647

4748
sid::shift(ptr, sid::get_stride<IndexDimension>(table.strides), index);
4849
for (std::size_t element_idx = 0; element_idx < MaxNumNeighbors; ++element_idx) {
49-
neighbors[element_idx] = *ptr;
50+
neighbors[element_idx] = *as_ldg_ptr(ptr);
5051
sid::shift(ptr, sid::get_stride<NeighborDimension>(table.strides), 1_c);
5152
}
5253
return neighbors;

include/gridtools/fn/unstructured.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "../common/defs.hpp"
1515
#include "../common/hymap.hpp"
16+
#include "../common/ldg_ptr.hpp"
1617
#include "../meta/logical.hpp"
1718
#include "../sid/concept.hpp"
1819
#include "../stencil/positional.hpp"
@@ -80,7 +81,7 @@ namespace gridtools::fn {
8081
GT_FUNCTION constexpr auto deref(iterator<Tag, Ptr, Strides, Domain> const &it) {
8182
GT_PROMISE(can_deref(it));
8283
decltype(auto) stride = host_device::at_key<Tag>(sid::get_stride<dim::horizontal>(it.m_strides));
83-
return *sid::shifted(it.m_ptr, stride, it.m_index);
84+
return *as_ldg_ptr(sid::shifted(it.m_ptr, stride, it.m_index));
8485
}
8586

8687
template <class Tag, class Ptr, class Strides, class Domain, class Conn, class Offset>

include/gridtools/sid/simple_ptr_holder.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "../common/defs.hpp"
1818
#include "../common/host_device.hpp"
19+
#include "../common/ldg_ptr.hpp"
1920

2021
#define GT_FILENAME <gridtools/sid/simple_ptr_holder.hpp>
2122
#include GT_ITERATE_ON_TARGETS()
@@ -38,7 +39,7 @@ namespace gridtools {
3839
simple_ptr_holder() = default;
3940
GT_TARGET GT_FORCE_INLINE constexpr simple_ptr_holder(T const &ptr) : m_val{ptr} {}
4041
#endif
41-
GT_TARGET GT_FORCE_INLINE constexpr T const &operator()() const { return m_val; }
42+
GT_TARGET GT_FORCE_INLINE constexpr decltype(auto) operator()() const { return as_ldg_ptr(m_val); }
4243
};
4344

4445
template <class T>

include/gridtools/stencil/gpu/entry_point.hpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "../../common/defs.hpp"
1919
#include "../../common/hymap.hpp"
2020
#include "../../common/integral_constant.hpp"
21+
#include "../../common/ldg_ptr.hpp"
2122
#include "../../common/tuple_util.hpp"
2223
#include "../../meta.hpp"
2324
#include "../../sid/allocator.hpp"
@@ -132,13 +133,11 @@ namespace gridtools {
132133

133134
template <class Keys>
134135
struct deref_f {
135-
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
136136
template <class Key, class T>
137-
GT_FUNCTION std::enable_if_t<is_texture_type<T>::value && meta::st_contains<Keys, Key>::value, T>
138-
operator()(Key, T const *ptr) const {
139-
return __ldg(ptr);
137+
GT_FUNCTION std::enable_if_t<meta::st_contains<Keys, Key>::value, T> operator()(
138+
Key, T const *ptr) const {
139+
return *as_ldg_ptr(ptr);
140140
}
141-
#endif
142141
template <class Key, class Ptr>
143142
GT_FUNCTION decltype(auto) operator()(Key, Ptr ptr) const {
144143
return *ptr;

include/gridtools/stencil/gpu_horizontal/entry_point.hpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "../../common/host_device.hpp"
2121
#include "../../common/hymap.hpp"
2222
#include "../../common/integral_constant.hpp"
23+
#include "../../common/ldg_ptr.hpp"
2324
#include "../../common/tuple_util.hpp"
2425
#include "../../meta.hpp"
2526
#include "../../sid/as_const.hpp"
@@ -41,13 +42,11 @@ namespace gridtools {
4142
namespace gpu_horizontal_backend {
4243
template <class Keys>
4344
struct deref_f {
44-
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
4545
template <class Key, class T>
46-
GT_FUNCTION std::enable_if_t<is_texture_type<T>::value && meta::st_contains<Keys, Key>::value, T>
47-
operator()(Key, T const *ptr) const {
48-
return __ldg(ptr);
46+
GT_FUNCTION std::enable_if_t<meta::st_contains<Keys, Key>::value, T> operator()(
47+
Key, T const *ptr) const {
48+
return *as_ldg_ptr(ptr);
4949
}
50-
#endif
5150
template <class Key, class Ptr>
5251
GT_FUNCTION decltype(auto) operator()(Key, Ptr ptr) const {
5352
return *ptr;

include/gridtools/storage/sid.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "../common/hymap.hpp"
1919
#include "../common/integral_constant.hpp"
2020
#include "../common/layout_map.hpp"
21+
#include "../common/ldg_ptr.hpp"
2122
#include "../common/tuple.hpp"
2223
#include "../common/tuple_util.hpp"
2324
#include "../meta.hpp"
@@ -36,7 +37,7 @@ namespace gridtools {
3637
template <class T>
3738
struct ptr_holder {
3839
T *m_val;
39-
GT_FUNCTION constexpr T *operator()() const { return m_val; }
40+
GT_FUNCTION constexpr auto operator()() const { return as_ldg_ptr(m_val); }
4041

4142
friend GT_FORCE_INLINE constexpr ptr_holder operator+(ptr_holder obj, int_t arg) {
4243
return {obj.m_val + arg};

0 commit comments

Comments
 (0)