Skip to content
Open
114 changes: 51 additions & 63 deletions libcudacxx/include/cuda/__mdspan/host_device_accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@

#include <cuda/__driver/driver_api.h>
#include <cuda/__memory/address_space.h>
#include <cuda/__memory/is_pointer_accessible.h>
#include <cuda/std/__concepts/concept_macros.h>
#include <cuda/std/__cstddef/types.h>
#include <cuda/std/__iterator/concepts.h>
#include <cuda/std/__memory/pointer_traits.h>
#include <cuda/std/__type_traits/always_false.h>
#include <cuda/std/__type_traits/is_constructible.h>
#include <cuda/std/__type_traits/is_convertible.h>
#include <cuda/std/__type_traits/is_default_constructible.h>
Expand All @@ -35,8 +36,6 @@
#include <cuda/std/__type_traits/is_nothrow_default_constructible.h>
#include <cuda/std/__utility/declval.h>
#include <cuda/std/__utility/move.h>
#include <cuda/std/cassert>
#include <cuda/std/cstddef>

#include <cuda/std/__cccl/prologue.h>

Expand Down Expand Up @@ -105,21 +104,13 @@ class __host_accessor : public _Accessor
noexcept(::cuda::std::declval<_Accessor>().offset(::cuda::std::declval<__data_handle_type>(), 0));

#if !_CCCL_COMPILER(NVRTC)
[[nodiscard]] _CCCL_HOST_API static constexpr bool
__is_host_accessible_pointer([[maybe_unused]] __data_handle_type __p) noexcept
[[nodiscard]]
_CCCL_HOST_API static constexpr bool __is_host_accessible_pointer([[maybe_unused]] __data_handle_type __p) noexcept
{
# if _CCCL_HAS_CTK()
if constexpr (::cuda::std::contiguous_iterator<__data_handle_type>)
{
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
{
auto __p1 = ::cuda::std::to_address(__p);
::CUmemorytype __type{};
const auto __status =
::cuda::__driver::__pointerGetAttributeNoThrow<::CU_POINTER_ATTRIBUTE_MEMORY_TYPE>(__type, __p1);
return (__status != ::cudaSuccess) || __type == ::CU_MEMORYTYPE_HOST;
}
return true;
return ::cuda::is_host_accessible(::cuda::std::to_address(__p));
}
else
# endif // _CCCL_HAS_CTK()
Expand Down Expand Up @@ -202,25 +193,27 @@ class __host_accessor : public _Accessor
: _Accessor{__acc}
{}

_CCCL_API constexpr reference access(data_handle_type __p, size_t __i) const noexcept(__is_access_noexcept)
_CCCL_API constexpr reference access(data_handle_type __p, ::cuda::std::size_t __i) const
noexcept(__is_access_noexcept)
{
NV_IF_ELSE_TARGET(
NV_IS_DEVICE,
(_CCCL_VERIFY(false, "cuda::__host_accessor cannot be used in DEVICE code");),
(_CCCL_ASSERT(__is_host_accessible_pointer(__p), "cuda::__host_accessor data handle is not a HOST pointer");))
NV_IF_TARGET(NV_IS_DEVICE, (_CCCL_VERIFY(false, "cuda::__host_accessor cannot be used in DEVICE code");))
return _Accessor::access(__p, __i);
Comment on lines +199 to 200
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we dropping the check here? that seems like a regression

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, this is on purpose. We don't want to call a driver API for each access of a mdspan. This is overkill

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: there are different ways to store that information during construction. We could either make it a boolean that is set during construction or turn the pointer into a tagged pointer which would be much more onvolved though

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to store any extra information. Checking the pointer validity during mdspan (not accessor) construction is enough.

}

[[nodiscard]] _CCCL_API constexpr data_handle_type offset(data_handle_type __p, size_t __i) const
[[nodiscard]] _CCCL_API constexpr data_handle_type offset(data_handle_type __p, ::cuda::std::size_t __i) const
noexcept(__is_offset_noexcept)
{
return _Accessor::offset(__p, __i);
}

[[nodiscard]] _CCCL_API constexpr bool
__detectably_invalid([[maybe_unused]] data_handle_type __p, size_t) const noexcept
__detectably_invalid([[maybe_unused]] data_handle_type __p, ::cuda::std::size_t) const noexcept
{
NV_IF_ELSE_TARGET(NV_IS_HOST, (return __is_host_accessible_pointer(__p);), (return false;))
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
{
NV_IF_ELSE_TARGET(NV_IS_HOST, (return __is_host_accessible_pointer(__p);), (return false;))
}
return true;
}
};

Expand All @@ -242,29 +235,26 @@ class __device_accessor : public _Accessor
static constexpr bool __is_offset_noexcept =
noexcept(::cuda::std::declval<_Accessor>().offset(::cuda::std::declval<__data_handle_type>(), 0));

[[nodiscard]] _CCCL_API static constexpr bool
[[nodiscard]] _CCCL_API static bool
__is_device_accessible_pointer_from_host([[maybe_unused]] __data_handle_type __p) noexcept
{
#if _CCCL_HAS_CTK()
#if _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC)
if constexpr (::cuda::std::contiguous_iterator<__data_handle_type>)
{
auto __p1 = ::cuda::std::to_address(__p);
::CUmemorytype __type{};
const auto __status =
::cuda::__driver::__pointerGetAttributeNoThrow<::CU_POINTER_ATTRIBUTE_MEMORY_TYPE>(__type, __p1);
return (__status != ::cudaSuccess) || __type == ::CU_MEMORYTYPE_DEVICE;
static const auto __dev_id = static_cast<int>(::cuda::__driver::__ctxGetDevice());
return ::cuda::is_device_accessible(::cuda::std::to_address(__p), ::cuda::device_ref{__dev_id});
Comment on lines +244 to +245
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not correct, we don't want to check for the current device. We should just try to see whether it's accessible from any device here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you suggested adding device_ref at the time. It makes sense, but it also makes sense to have a relaxed version without it, as I initially proposed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

created #6918

}
else
#endif // _CCCL_HAS_CTK()
#endif // _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC)
{
return true; // cannot be verified
}
}

#if _CCCL_DEVICE_COMPILATION()

[[nodiscard]] _CCCL_HIDE_FROM_ABI _CCCL_DEVICE static constexpr bool
__is_device_accessible_pointer_from_device(__data_handle_type __p) noexcept
[[nodiscard]]
_CCCL_DEVICE_API static constexpr bool __is_device_accessible_pointer_from_device(__data_handle_type __p) noexcept
{
return ::cuda::device::is_address_from(__p, ::cuda::device::address_space::global)
|| ::cuda::device::is_address_from(__p, ::cuda::device::address_space::shared)
Expand All @@ -276,12 +266,6 @@ class __device_accessor : public _Accessor

#endif // _CCCL_DEVICE_COMPILATION()

_CCCL_API static constexpr void __check_device_pointer([[maybe_unused]] __data_handle_type __p) noexcept
{
NV_IF_TARGET(NV_IS_HOST,
(_CCCL_ASSERT(__is_device_accessible_pointer_from_host(__p), "The pointer is not device accessible");))
}

public:
using offset_policy = __device_accessor<typename _Accessor::offset_policy>;
using data_handle_type = __data_handle_type;
Expand Down Expand Up @@ -355,24 +339,34 @@ class __device_accessor : public _Accessor
: _Accessor{__acc}
{}

_CCCL_API constexpr reference access(data_handle_type __p, size_t __i) const noexcept(__is_access_noexcept)
_CCCL_API constexpr reference access(data_handle_type __p, ::cuda::std::size_t __i) const
noexcept(__is_access_noexcept)
{
NV_IF_ELSE_TARGET(
NV_IS_DEVICE,
(_CCCL_ASSERT(__is_device_accessible_pointer_from_device(__p), "The pointer is not device accessible");),
(_CCCL_VERIFY(false, "cuda::device_accessor cannot be used in HOST code");))
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
{
NV_IF_ELSE_TARGET(
NV_IS_DEVICE,
(_CCCL_ASSERT(__is_device_accessible_pointer_from_device(__p), "The pointer is not device accessible");),
(_CCCL_VERIFY(false, "cuda::device_accessor cannot be used in HOST code");))
}
return _Accessor::access(__p, __i);
}

[[nodiscard]] _CCCL_API constexpr data_handle_type offset(data_handle_type __p, size_t __i) const
[[nodiscard]] _CCCL_API constexpr data_handle_type offset(data_handle_type __p, ::cuda::std::size_t __i) const
noexcept(__is_offset_noexcept)
{
return _Accessor::offset(__p, __i);
}

[[nodiscard]] _CCCL_API constexpr bool __detectably_invalid(data_handle_type __p, size_t) const noexcept
[[nodiscard]] _CCCL_API constexpr bool __detectably_invalid(data_handle_type __p, ::cuda::std::size_t) const noexcept
{
NV_IF_ELSE_TARGET(NV_IS_HOST, (return __is_device_accessible_pointer_from_host(__p);), (return false;))
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
{
NV_IF_ELSE_TARGET(NV_IS_HOST,
(return __is_device_accessible_pointer_from_host(__p);),
(return __is_device_accessible_pointer_from_device(__p);))
}
return true;
}
};

Expand All @@ -396,27 +390,18 @@ class __managed_accessor : public _Accessor

[[nodiscard]] _CCCL_API static constexpr bool __is_managed_pointer([[maybe_unused]] __data_handle_type __p) noexcept
{
#if _CCCL_HAS_CTK()
#if _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC)
if constexpr (::cuda::std::contiguous_iterator<__data_handle_type>)
{
const auto __p1 = ::cuda::std::to_address(__p);
bool __is_managed{};
const auto __status =
::cuda::__driver::__pointerGetAttributeNoThrow<::CU_POINTER_ATTRIBUTE_IS_MANAGED>(__is_managed, __p1);
return (__status != ::cudaSuccess) || __is_managed;
return ::cuda::is_managed(::cuda::std::to_address(__p));
}
else
#endif // _CCCL_HAS_CTK()
#endif // _CCCL_HAS_CTK() && !_CCCL_COMPILER(NVRTC)
{
return true; // cannot be verified
}
}

_CCCL_API static constexpr void __check_managed_pointer([[maybe_unused]] __data_handle_type __p) noexcept
{
_CCCL_ASSERT(__is_managed_pointer(__p), "cuda::__managed_accessor data handle is not a MANAGED pointer");
}

public:
using offset_policy = __managed_accessor<typename _Accessor::offset_policy>;
using data_handle_type = __data_handle_type;
Expand Down Expand Up @@ -477,22 +462,25 @@ class __managed_accessor : public _Accessor
: _Accessor{::cuda::std::move(__acc)}
{}

_CCCL_API constexpr reference access(data_handle_type __p, size_t __i) const noexcept(__is_access_noexcept)
_CCCL_API constexpr reference access(data_handle_type __p, ::cuda::std::size_t __i) const
noexcept(__is_access_noexcept)
{
NV_IF_TARGET(NV_IS_HOST, (__check_managed_pointer(__p);))
return _Accessor::access(__p, __i);
}

[[nodiscard]] _CCCL_API constexpr data_handle_type offset(data_handle_type __p, size_t __i) const
[[nodiscard]] _CCCL_API constexpr data_handle_type offset(data_handle_type __p, ::cuda::std::size_t __i) const
noexcept(__is_offset_noexcept)
{
return _Accessor::offset(__p, __i);
}

[[nodiscard]] _CCCL_API constexpr bool
__detectably_invalid([[maybe_unused]] data_handle_type __p, size_t) const noexcept
__detectably_invalid([[maybe_unused]] data_handle_type __p, ::cuda::std::size_t) const noexcept
{
NV_IF_ELSE_TARGET(NV_IS_HOST, (return __is_managed_pointer(__p);), (return false;))
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
{
NV_IF_ELSE_TARGET(NV_IS_HOST, (return __is_managed_pointer(__p);), (return true;))
}
}
};

Expand Down
34 changes: 30 additions & 4 deletions libcudacxx/include/cuda/std/__mdspan/concepts.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,15 @@
#include <cuda/std/__tuple_dir/tuple_like.h>
#include <cuda/std/__type_traits/integral_constant.h>
#include <cuda/std/__type_traits/is_convertible.h>
#include <cuda/std/__type_traits/is_integral.h>
#include <cuda/std/__type_traits/is_move_assignable.h>
#include <cuda/std/__type_traits/is_nothrow_constructible.h>
#include <cuda/std/__type_traits/is_nothrow_move_assignable.h>
#include <cuda/std/__type_traits/is_nothrow_move_constructible.h>
#include <cuda/std/__type_traits/is_same.h>
#include <cuda/std/__type_traits/is_signed.h>
#include <cuda/std/__type_traits/is_swappable.h>
#include <cuda/std/__type_traits/is_unsigned.h>
#include <cuda/std/__type_traits/remove_const.h>
#include <cuda/std/__type_traits/remove_cvref.h>
#include <cuda/std/__type_traits/void_t.h>
#include <cuda/std/__utility/declval.h>
#include <cuda/std/span>

#include <cuda/std/__cccl/prologue.h>
Expand Down Expand Up @@ -130,6 +129,33 @@ _CCCL_CONCEPT __index_pair_like = _CCCL_REQUIRES_EXPR((_Tp, _IndexType))(
template <class _Tp>
_CCCL_CONCEPT __index_like = is_signed_v<_Tp> || is_unsigned_v<_Tp> || __integral_constant_like<_Tp>;

#if _CCCL_HAS_CONCEPTS()

template <class _AccessorPolicy>
_CCCL_CONCEPT __has_detect_invalidity = requires(_AccessorPolicy __ap) {
__ap.__has_detect_invalidity(
::cuda::std::declval<typename _AccessorPolicy::data_handle_type>(), ::cuda::std::declval<size_t>());
};

#else // ^^^ _CCCL_HAS_CONCEPTS() ^^^ / vvv !_CCCL_HAS_CONCEPTS() vvv

template <typename, typename = void>
struct __has_detect_invalidity_s : ::cuda::std::false_type
{};

template <typename _AccessorPolicy>
struct __has_detect_invalidity_s<
_AccessorPolicy,
::cuda::std::void_t<decltype(::cuda::std::declval<_AccessorPolicy>().__has_detect_invalidity(
::cuda::std::declval<typename _AccessorPolicy::data_handle_type>(), ::cuda::std::declval<size_t>()))>>
: ::cuda::std::true_type
{};

template <typename _AccessorPolicy>
inline constexpr bool __has_detect_invalidity = __has_detect_invalidity_s<_AccessorPolicy>::value;

#endif // ^^^ _CCCL_HAS_CONCEPTS() ^^^ / vvv !_CCCL_HAS_CONCEPTS() vvv

_CCCL_END_NAMESPACE_CUDA_STD

#include <cuda/std/__cccl/epilogue.h>
Expand Down
Loading
Loading