Skip to content

Commit 89e10d2

Browse files
authored
Merge branch 'branch-25.06' into cagra-fix-nnd-init
2 parents 3399927 + 23ba17b commit 89e10d2

11 files changed

Lines changed: 238 additions & 24 deletions

File tree

ci/build_wheel.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,14 @@ if [[ "${package_dir}" != "python/libcuvs" ]]; then
3535
)
3636
fi
3737

38+
RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
39+
if [[ ${RAPIDS_CUDA_MAJOR} != "11" ]]; then
40+
EXCLUDE_ARGS+=(
41+
--exclude "libnccl.so.*"
42+
)
43+
export SKBUILD_CMAKE_ARGS="-DUSE_NCCL_RUNTIME_WHEEL=ON"
44+
fi
45+
3846
rapids-logger "Building '${package_name}' wheel"
3947

4048
sccache --zero-stats

ci/test_wheel_cuvs.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@
33

44
set -euo pipefail
55

6+
# Delete system libnccl.so to ensure the wheel is used
7+
RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
8+
if [[ ${RAPIDS_CUDA_MAJOR} != "11" ]]; then
9+
rm -rf /usr/lib64/libnccl*
10+
fi
11+
612
mkdir -p ./dist
713
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
814
RAPIDS_PY_WHEEL_NAME="libcuvs_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-libcuvs-dep

cpp/include/cuvs/core/detail/interop.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,4 +141,30 @@ inline bool is_c_contiguous(DLManagedTensor* managed_tensor)
141141
return true;
142142
}
143143

144+
#pragma GCC diagnostic push
145+
#pragma GCC diagnostic ignored "-Wunused-function"
146+
static void free_dlmanaged_tensor_shape(DLManagedTensor* tensor)
147+
{
148+
delete[] tensor->dl_tensor.shape;
149+
}
150+
#pragma GCC diagnostic pop
151+
152+
template <typename MdspanType, typename = raft::is_mdspan_t<MdspanType>>
153+
static void to_dlpack(MdspanType src, DLManagedTensor* dst)
154+
{
155+
auto tensor = &dst->dl_tensor;
156+
157+
tensor->dtype = data_type_to_DLDataType<typename MdspanType::value_type>();
158+
tensor->device = accessor_type_to_DLDevice<typename MdspanType::accessor_type>();
159+
tensor->ndim = MdspanType::extents_type::rank();
160+
tensor->data = src.data_handle();
161+
162+
tensor->shape = new int64_t[tensor->ndim];
163+
dst->deleter = free_dlmanaged_tensor_shape;
164+
165+
for (int64_t i = 0; i < tensor->ndim; ++i) {
166+
tensor->shape[i] = src.extent(i);
167+
}
168+
}
169+
144170
} // namespace cuvs::core::detail

cpp/include/cuvs/core/interop.hpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ inline bool is_c_contiguous(DLManagedTensor* tensor) { return detail::is_c_conti
6868
inline bool is_f_contiguous(DLManagedTensor* tensor) { return detail::is_f_contiguous(tensor); }
6969

7070
/**
71-
* @brief Convert a DLManagedTensor to an mdspan
71+
* @brief Convert a DLManagedTensor to a mdspan
7272
* NOTE: This function only supports compact row-major and col-major layouts.
7373
*
7474
* @code {.cpp}
@@ -93,6 +93,19 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor)
9393
return detail::from_dlpack<MdspanType>(managed_tensor);
9494
}
9595

96+
/**
97+
* @brief Convert a mdspan to a DLManagedTensor
98+
*
99+
* Converts a mdspan to a DLManagedTensor object. This lets us pass non-owning
100+
* views from C++ to C code without copying. Note that returned DLManagedTensor
101+
* is a non-owning view, and doesn't ensure that the underlying memory stays valid.
102+
*/
103+
template <typename MdspanType, typename = raft::is_mdspan_t<MdspanType>>
104+
void to_dlpack(MdspanType src, DLManagedTensor* dst)
105+
{
106+
return detail::to_dlpack(src, dst);
107+
}
108+
96109
/**
97110
* @}
98111
*/

cpp/include/cuvs/neighbors/nn_descent.h

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,29 @@ cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
171171
/**
172172
* @brief Get the KNN graph from a built NN-Descent index
173173
*
174+
* @param[in] res cuvsResources_t opaque C handle
174175
* @param[in] index cuvsNNDescentIndex_t Built NN-Descent index
175-
* @param[inout] graph Optional preallocated graph on host memory to store output
176+
* @param[out] graph Preallocated graph on host memory to store output
177+
* @return cuvsError_t
178+
*/
179+
cuvsError_t cuvsNNDescentIndexGetGraph(cuvsResources_t res,
180+
cuvsNNDescentIndex_t index,
181+
DLManagedTensor* graph);
182+
183+
/**
184+
* @brief Get the distances from a build NN_Descent index
185+
*
186+
* This requires that the `return_distances` parameter was set when building the
187+
* graph
188+
*
189+
* @param[in] res cuvsResources_t opaque C handle
190+
* @param[in] index cuvsNNDescentIndex_t Built NN-Descent index
191+
* @param[out] distances Preallocated memory to store the output distances tensor
176192
* @return cuvsError_t
177193
*/
178-
cuvsError_t cuvsNNDescentIndexGetGraph(cuvsNNDescentIndex_t index, DLManagedTensor* graph);
194+
cuvsError_t cuvsNNDescentIndexGetDistances(cuvsResources_t res,
195+
cuvsNNDescentIndex_t index,
196+
DLManagedTensor* distances);
179197
#ifdef __cplusplus
180198
}
181199
#endif

cpp/src/neighbors/nn_descent_c.cpp

Lines changed: 71 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,57 @@ void* _build(cuvsResources_t res,
7171
RAFT_FAIL("dataset must be accessible on host or device memory");
7272
}
7373
}
74+
75+
template <typename output_mdspan_type>
76+
void _get_graph(cuvsResources_t res, cuvsNNDescentIndex_t index, DLManagedTensor* graph)
77+
{
78+
auto dtype = index->dtype;
79+
if ((dtype.code == kDLUInt) && (dtype.bits == 32)) {
80+
auto index_ptr = reinterpret_cast<cuvs::neighbors::nn_descent::index<uint32_t>*>(index->addr);
81+
auto dst = cuvs::core::from_dlpack<output_mdspan_type>(graph);
82+
auto src = index_ptr->graph();
83+
auto res_ptr = reinterpret_cast<raft::resources*>(res);
84+
85+
RAFT_EXPECTS(src.extent(0) == dst.extent(0), "Output graph has incorrect number of rows");
86+
RAFT_EXPECTS(src.extent(1) == dst.extent(1), "Output graph has incorrect number of cols");
87+
88+
cudaMemcpyAsync(dst.data_handle(),
89+
src.data_handle(),
90+
dst.extent(0) * dst.extent(1) * sizeof(uint32_t),
91+
cudaMemcpyDefault,
92+
raft::resource::get_cuda_stream(*res_ptr));
93+
} else {
94+
RAFT_FAIL("Unsupported nn-descent index dtype: %d and bits: %d", dtype.code, dtype.bits);
95+
}
96+
}
97+
98+
template <typename output_mdspan_type>
99+
void _get_distances(cuvsResources_t res, cuvsNNDescentIndex_t index, DLManagedTensor* distances)
100+
{
101+
auto dtype = index->dtype;
102+
if ((dtype.code == kDLUInt) && (dtype.bits == 32)) {
103+
auto index_ptr = reinterpret_cast<cuvs::neighbors::nn_descent::index<uint32_t>*>(index->addr);
104+
auto src = index_ptr->distances();
105+
if (!src.has_value()) {
106+
RAFT_FAIL("nn-descent index doesn't contain distances - set return_distances when building");
107+
}
108+
109+
auto res_ptr = reinterpret_cast<raft::resources*>(res);
110+
auto dst = cuvs::core::from_dlpack<output_mdspan_type>(distances);
111+
112+
RAFT_EXPECTS(src->extent(0) == dst.extent(0), "Output distances has incorrect number of rows");
113+
RAFT_EXPECTS(src->extent(1) == dst.extent(1), "Output distances has incorrect number of cols");
114+
115+
cudaMemcpyAsync(dst.data_handle(),
116+
src->data_handle(),
117+
dst.extent(0) * dst.extent(1) * sizeof(float),
118+
cudaMemcpyDefault,
119+
raft::resource::get_cuda_stream(*res_ptr));
120+
121+
} else {
122+
RAFT_FAIL("Unsupported nn-descent index dtype: %d and bits: %d", dtype.code, dtype.bits);
123+
}
124+
}
74125
} // namespace
75126

76127
extern "C" cuvsError_t cuvsNNDescentIndexCreate(cuvsNNDescentIndex_t* index)
@@ -146,22 +197,32 @@ extern "C" cuvsError_t cuvsNNDescentIndexParamsDestroy(cuvsNNDescentIndexParams_
146197
return cuvs::core::translate_exceptions([=] { delete params; });
147198
}
148199

149-
extern "C" cuvsError_t cuvsNNDescentIndexGetGraph(cuvsNNDescentIndex_t index,
200+
extern "C" cuvsError_t cuvsNNDescentIndexGetGraph(cuvsResources_t res,
201+
cuvsNNDescentIndex_t index,
150202
DLManagedTensor* graph)
151203
{
152204
return cuvs::core::translate_exceptions([=] {
153-
auto dtype = index->dtype;
154-
if ((dtype.code == kDLUInt) && (dtype.bits == 32)) {
155-
auto index_ptr = reinterpret_cast<cuvs::neighbors::nn_descent::index<uint32_t>*>(index->addr);
205+
if (cuvs::core::is_dlpack_device_compatible(graph->dl_tensor)) {
206+
using output_mdspan_type = raft::device_matrix_view<uint32_t, int64_t, raft::row_major>;
207+
_get_graph<output_mdspan_type>(res, index, graph);
208+
} else {
156209
using output_mdspan_type = raft::host_matrix_view<uint32_t, int64_t, raft::row_major>;
157-
auto dst = cuvs::core::from_dlpack<output_mdspan_type>(graph);
158-
auto src = index_ptr->graph();
210+
_get_graph<output_mdspan_type>(res, index, graph);
211+
}
212+
});
213+
}
159214

160-
RAFT_EXPECTS(src.extent(0) == dst.extent(0), "Output graph has incorrect number of rows");
161-
RAFT_EXPECTS(src.extent(1) == dst.extent(1), "Output graph has incorrect number of cols");
162-
std::copy(src.data_handle(), src.data_handle() + dst.size(), dst.data_handle());
215+
extern "C" cuvsError_t cuvsNNDescentIndexGetDistances(cuvsResources_t res,
216+
cuvsNNDescentIndex_t index,
217+
DLManagedTensor* distances)
218+
{
219+
return cuvs::core::translate_exceptions([=] {
220+
if (cuvs::core::is_dlpack_device_compatible(distances->dl_tensor)) {
221+
using output_mdspan_type = raft::device_matrix_view<float, int64_t, raft::row_major>;
222+
_get_distances<output_mdspan_type>(res, index, distances);
163223
} else {
164-
RAFT_FAIL("Unsupported nn-descent index dtype: %d and bits: %d", dtype.code, dtype.bits);
224+
using output_mdspan_type = raft::host_matrix_view<float, int64_t, raft::row_major>;
225+
_get_distances<output_mdspan_type>(res, index, distances);
165226
}
166227
});
167228
}

dependencies.yaml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ files:
1717
- depends_on_cupy
1818
- depends_on_librmm
1919
- depends_on_pylibraft
20+
- depends_on_nccl
2021
- docs
2122
- rapids_build
2223
- run_py_cuvs
@@ -42,6 +43,7 @@ files:
4243
- depends_on_pylibraft
4344
- depends_on_libcuvs
4445
- depends_on_librmm
46+
- depends_on_nccl
4547
- rapids_build
4648
- rapids_build_setuptools
4749
test_cpp:
@@ -94,6 +96,7 @@ files:
9496
- rust
9597
- depends_on_libcuvs
9698
- depends_on_libraft
99+
- depends_on_nccl
97100
go:
98101
output: conda
99102
matrix:
@@ -107,6 +110,7 @@ files:
107110
- go
108111
- depends_on_libcuvs
109112
- depends_on_libraft
113+
- depends_on_nccl
110114
py_build_libcuvs:
111115
output: pyproject
112116
pyproject_dir: python/libcuvs
@@ -123,6 +127,7 @@ files:
123127
includes:
124128
- depends_on_libraft
125129
- depends_on_librmm
130+
- depends_on_nccl
126131
- rapids_build
127132
py_run_libcuvs:
128133
output: pyproject
@@ -133,6 +138,7 @@ files:
133138
- cuda_wheels
134139
- depends_on_libraft
135140
- depends_on_librmm
141+
- depends_on_nccl
136142
py_build_cuvs:
137143
output: pyproject
138144
pyproject_dir: python/cuvs
@@ -226,7 +232,6 @@ dependencies:
226232
packages:
227233
- c-compiler
228234
- cxx-compiler
229-
- nccl>=2.19
230235
specific:
231236
- output_types: conda
232237
matrices:
@@ -706,3 +711,18 @@ dependencies:
706711
packages:
707712
- pylibraft-cu11==25.6.*,>=0.0.0a0
708713
- {matrix: null, packages: [*pylibraft_unsuffixed]}
714+
depends_on_nccl:
715+
common:
716+
- output_types: conda
717+
packages:
718+
- &nccl_unsuffixed nccl>=2.19
719+
specific:
720+
- output_types: [pyproject, requirements]
721+
matrices:
722+
- matrix:
723+
cuda: "12.*"
724+
cuda_suffixed: "true"
725+
packages:
726+
- nvidia-nccl-cu12>=2.19
727+
- matrix:
728+
packages:

python/cuvs/cuvs/neighbors/nn_descent/nn_descent.pxd

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,14 @@ cdef extern from "cuvs/neighbors/nn_descent.h" nogil:
5353

5454
cuvsError_t cuvsNNDescentIndexDestroy(cuvsNNDescentIndex_t index)
5555

56-
cuvsError_t cuvsNNDescentIndexGetGraph(cuvsNNDescentIndex_t index,
56+
cuvsError_t cuvsNNDescentIndexGetGraph(cuvsResources_t res,
57+
cuvsNNDescentIndex_t index,
5758
DLManagedTensor * output)
5859

60+
cuvsError_t cuvsNNDescentIndexGetDistances(cuvsResources_t res,
61+
cuvsNNDescentIndex_t index,
62+
DLManagedTensor * output)
63+
5964
cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
6065
cuvsNNDescentIndexParams* params,
6166
DLManagedTensor* dataset,

python/cuvs/cuvs/neighbors/nn_descent/nn_descent.pyx

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ cdef class IndexParams:
8888
intermediate_graph_degree=None,
8989
max_iterations=None,
9090
termination_threshold=None,
91-
n_clusters=None
91+
n_clusters=None,
92+
return_distances=None
9293
):
9394
if metric is not None:
9495
self.params.metric = <cuvsDistanceType>DISTANCE_TYPES[metric]
@@ -102,11 +103,8 @@ cdef class IndexParams:
102103
self.params.termination_threshold = termination_threshold
103104
if n_clusters is not None:
104105
self.params.n_clusters = n_clusters
105-
106-
# setting this parameter to true will cause an exception in the c++
107-
# api (`Using return_distances set to true requires distance view to
108-
# be allocated.`) - so instead force to be false here
109-
self.params.return_distances = False
106+
if return_distances is not None:
107+
self.params.return_distances = return_distances
110108

111109
@property
112110
def metric(self):
@@ -163,13 +161,39 @@ cdef class Index:
163161

164162
@property
165163
def graph(self):
164+
return self._get_graph()
165+
166+
@property
167+
def distances(self):
168+
return self._get_distances()
169+
170+
@auto_sync_resources
171+
def _get_graph(self, resources=None):
166172
if not self.trained:
167173
raise ValueError("Index needs to be built before getting graph")
168174

175+
cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
176+
169177
output = np.empty((self.num_rows, self.graph_degree), dtype='uint32')
170178
ai = wrap_array(output)
171179
cdef cydlpack.DLManagedTensor* output_dlpack = cydlpack.dlpack_c(ai)
172-
check_cuvs(cuvsNNDescentIndexGetGraph(self.index, output_dlpack))
180+
check_cuvs(cuvsNNDescentIndexGetGraph(res, self.index, output_dlpack))
181+
return output
182+
183+
@auto_sync_resources
184+
def _get_distances(self, resources=None):
185+
if not self.trained:
186+
raise ValueError("Index needs to be built before getting"
187+
" distances")
188+
189+
cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
190+
191+
output = np.empty((self.num_rows, self.graph_degree), dtype='float32')
192+
ai = wrap_array(output)
193+
cdef cydlpack.DLManagedTensor* output_dlpack = cydlpack.dlpack_c(ai)
194+
check_cuvs(cuvsNNDescentIndexGetDistances(res,
195+
self.index,
196+
output_dlpack))
173197
return output
174198

175199
def __repr__(self):
@@ -221,6 +245,16 @@ def build(IndexParams index_params, dataset, graph=None, resources=None):
221245

222246
cdef cydlpack.DLManagedTensor* graph_dlpack = NULL
223247
if graph is not None:
248+
if params.return_distances:
249+
# When using a pre-existing graph - having return_distances set to
250+
# true will cause an exception in the C++ api
251+
# (`Using return_distances set to true requires distance view to
252+
# be allocated.`). Raise a more informative error here instead of
253+
# the C++ exception
254+
raise ValueError("Can't use return_distances with an existing"
255+
" graph. Either set params.return_distances to"
256+
" False, or set graph to None")
257+
224258
graph_ai = wrap_array(graph)
225259
graph_dlpack = cydlpack.dlpack_c(graph_ai)
226260

0 commit comments

Comments
 (0)