Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
666a688
keeping input data mem location
jinsolp Mar 18, 2026
f4cd395
Merge branch 'rapidsai:main' into nnd-keep-input-data-mem
jinsolp Mar 18, 2026
2855e58
compute norms in fp32 if data is fp32
jinsolp Mar 18, 2026
fbb9023
change padding for uint8 and int8
jinsolp Mar 19, 2026
0c1ff7d
Merge branch 'main' into nnd-keep-input-data-mem
jinsolp Mar 20, 2026
157fe30
rm print and revert test
jinsolp Mar 20, 2026
ace6cae
Merge branch 'main' into nnd-keep-input-data-mem
jinsolp Apr 2, 2026
686eb4b
Merge branch 'main' into nnd-keep-input-data-mem
jinsolp Apr 6, 2026
e59e6c7
fix c abi breakages
jinsolp Apr 8, 2026
632c24b
Merge branch 'main' into nnd-keep-input-data-mem
jinsolp Apr 8, 2026
836ef94
Merge branch 'main' into nnd-keep-input-data-mem
cjnolet Apr 9, 2026
298943e
Merge branch 'main' into nnd-keep-input-data-mem
jinsolp Apr 28, 2026
81fdb69
Merge branch 'main' into nnd-keep-input-data-mem
jinsolp May 13, 2026
be9f30b
rename to use_fp16_dist_comp
jinsolp May 13, 2026
1c7589f
change to match ivfpq style with internal_distance_dtype
jinsolp May 13, 2026
080a32c
raft::ceildiv
jinsolp May 13, 2026
27c918e
merge commit
jinsolp May 13, 2026
9a04366
deprecation logs and CUVS_EXPORT
jinsolp May 13, 2026
1a04d6e
rm comments
jinsolp May 13, 2026
4903f8a
Merge branch 'main' into nnd-keep-input-data-mem
jinsolp May 13, 2026
8d9844c
comment with issues
jinsolp May 14, 2026
931ca58
revert to focusing only on mem allocations
jinsolp May 14, 2026
2add75a
constexpr half
jinsolp May 20, 2026
da392c1
unified struct for bytes
jinsolp May 20, 2026
60e9780
Merge branch 'release/26.06' into nnd-keep-input-data-mem
jinsolp May 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 133 additions & 3 deletions c/include/cuvs/neighbors/nn_descent.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,14 @@ extern "C" {

/**
* @brief Dtype to use for distance computation
* - `NND_DIST_COMP_AUTO`: Automatically determine the best dtype for distance computation based on the dataset dimensions.
* - `NND_DIST_COMP_FP32`: Use fp32 distance computation for better precision at the cost of performance and memory usage.
* - `NND_DIST_COMP_AUTO`: Automatically determine the best dtype for distance computation based on
* the dataset dimensions.
* - `NND_DIST_COMP_FP32`: Use fp32 distance computation for better precision at the cost of
* performance and memory usage.
* - `NND_DIST_COMP_FP16`: Use fp16 distance computation.
*
* @deprecated Deprecated in 26.06; to be removed in 26.08. Use cuvsNNDescentIndexParams_v6 with
* internal_distance_dtype instead.
*/
typedef enum {
NND_DIST_COMP_AUTO = 0,
Expand Down Expand Up @@ -49,7 +54,12 @@ typedef enum {
* the graph for. More iterations produce a better quality graph at cost of performance
* `termination_threshold`: The delta at which nn-descent will terminate its iterations
* `return_distances`: Boolean to decide whether to return distances array
* `dist_comp_dtype`: dtype to use for distance computation. Defaults to `NND_DIST_COMP_AUTO` which automatically determines the best dtype for distance computation based on the dataset dimensions. Use `NND_DIST_COMP_FP32` for better precision at the cost of performance and memory usage. This option is only valid when data type is fp32. Use `NND_DIST_COMP_FP16` for better performance and memory usage at the cost of precision.
* `dist_comp_dtype`: dtype to use for distance computation. Note: as of 26.06, both
* `NND_DIST_COMP_AUTO` and `NND_DIST_COMP_FP32` map to fp32 distance computation (the new default).
* Use `NND_DIST_COMP_FP16` to opt into fp16 distance computation. This behavior differs from
* earlier releases where `NND_DIST_COMP_AUTO` selected fp16 for higher-dimensional fp32 inputs.
*
* @deprecated Deprecated in 26.06; to be removed in 26.08. Replaced by cuvsNNDescentIndexParams_v6.
*/
struct cuvsNNDescentIndexParams {
cuvsDistanceType metric;
Expand All @@ -64,21 +74,87 @@ struct cuvsNNDescentIndexParams {

typedef struct cuvsNNDescentIndexParams* cuvsNNDescentIndexParams_t;

/**
* @brief Parameters used to build an nn-descent index (v6)
*
* `metric`: The distance metric to use
* `metric_arg`: The argument used by distance metrics like Minkowskidistance
* `graph_degree`: For an input dataset of dimensions (N, D),
* determines the final dimensions of the all-neighbors knn graph
* which turns out to be of dimensions (N, graph_degree)
* `intermediate_graph_degree`: Internally, nn-descent builds an
* all-neighbors knn graph of dimensions (N, intermediate_graph_degree)
* before selecting the final `graph_degree` neighbors. It's recommended
* that `intermediate_graph_degree` >= 1.5 * graph_degree
* `max_iterations`: The number of iterations that nn-descent will refine
* the graph for. More iterations produce a better quality graph at cost of performance
* `termination_threshold`: The delta at which nn-descent will terminate its iterations
* `return_distances`: Boolean to decide whether to return distances array
* `internal_distance_dtype`: Only applicable for fp32 input. Controls the precision used to
* compute distances. Possible values: [CUDA_R_32F, CUDA_R_16F]. Defaults to CUDA_R_32F. Set to
* CUDA_R_16F to compute distances in fp16 (faster, uses less device memory; not recommended for
* dim <= 16 due to precision loss). Has no effect on non-fp32 input types (fp16, int8, uint8)
* which always compute distances in fp16.
*
* @since 26.06
*/
struct cuvsNNDescentIndexParams_v6 {
cuvsDistanceType metric;
float metric_arg;
size_t graph_degree;
size_t intermediate_graph_degree;
size_t max_iterations;
float termination_threshold;
bool return_distances;
cudaDataType_t internal_distance_dtype;
};

typedef struct cuvsNNDescentIndexParams_v6* cuvsNNDescentIndexParams_v6_t;

/**
* @brief Allocate NN-Descent Index params, and populate with default values
*
* @deprecated Deprecated in 26.06; to be removed in 26.08. Replaced by
* cuvsNNDescentIndexParamsCreate_v6.
*
* @param[in] index_params cuvsNNDescentIndexParams_t to allocate
* @return cuvsError_t
*/
CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsCreate(cuvsNNDescentIndexParams_t* index_params);

/**
* @brief Allocate NN-Descent Index params (v6), and populate with default values
*
* @since 26.06
*
* @param[in] index_params cuvsNNDescentIndexParams_v6_t to allocate
* @return cuvsError_t
*/
CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsCreate_v6(
cuvsNNDescentIndexParams_v6_t* index_params);

/**
* @brief De-allocate NN-Descent Index params
*
* @deprecated Deprecated in 26.06; to be removed in 26.08. Replaced by
* cuvsNNDescentIndexParamsDestroy_v6.
*
* @param[in] index_params
* @return cuvsError_t
*/
CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsDestroy(cuvsNNDescentIndexParams_t index_params);

/**
* @brief De-allocate NN-Descent Index params (v6)
*
* @since 26.06
*
* @param[in] index_params
* @return cuvsError_t
*/
CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsDestroy_v6(
cuvsNNDescentIndexParams_v6_t index_params);

/**
* @}
*/
Expand Down Expand Up @@ -157,6 +233,8 @@ CUVS_EXPORT cuvsError_t cuvsNNDescentIndexDestroy(cuvsNNDescentIndex_t index);
* cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
* @endcode
*
* @deprecated Deprecated in 26.06; to be removed in 26.08. Replaced by cuvsNNDescentBuild_v6.
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] index_params cuvsNNDescentIndexParams_t used to build NN-Descent index
* @param[in] dataset DLManagedTensor* training dataset on host or device memory
Expand All @@ -169,6 +247,58 @@ CUVS_EXPORT cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
DLManagedTensor* dataset,
DLManagedTensor* graph,
cuvsNNDescentIndex_t index);

/**
* @brief Build a NN-Descent index (v6) with a `DLManagedTensor` which has underlying
* `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
* or `kDLCPU`. Also, acceptable underlying types are:
* 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
* 2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
* 3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
* 4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
*
* @code {.c}
* #include <cuvs/core/c_api.h>
* #include <cuvs/neighbors/nn_descent.h>
*
* // Create cuvsResources_t
* cuvsResources_t res;
* cuvsError_t res_create_status = cuvsResourcesCreate(&res);
*
* // Assume a populated `DLManagedTensor` type here
* DLManagedTensor dataset;
*
* // Create default index params
* cuvsNNDescentIndexParams_v6_t index_params;
* cuvsError_t params_create_status = cuvsNNDescentIndexParamsCreate_v6(&index_params);
*
* // Create NN-Descent index
* cuvsNNDescentIndex_t index;
* cuvsError_t index_create_status = cuvsNNDescentIndexCreate(&index);
*
* // Build the NN-Descent Index
* cuvsError_t build_status = cuvsNNDescentBuild_v6(res, index_params, &dataset, NULL, index);
*
* // de-allocate `index_params`, `index` and `res`
* cuvsError_t params_destroy_status = cuvsNNDescentIndexParamsDestroy_v6(index_params);
* cuvsError_t index_destroy_status = cuvsNNDescentIndexDestroy(index);
* cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
* @endcode
*
* @since 26.06
*
* @param[in] res cuvsResources_t opaque C handle
* @param[in] index_params cuvsNNDescentIndexParams_v6_t used to build NN-Descent index
* @param[in] dataset DLManagedTensor* training dataset on host or device memory
* @param[inout] graph Optional preallocated graph on host memory to store output
* @param[out] index cuvsNNDescentIndex_t Newly built NN-Descent index
* @return cuvsError_t
*/
CUVS_EXPORT cuvsError_t cuvsNNDescentBuild_v6(cuvsResources_t res,
cuvsNNDescentIndexParams_v6_t index_params,
DLManagedTensor* dataset,
DLManagedTensor* graph,
cuvsNNDescentIndex_t index);
/**
* @}
*/
Expand Down
134 changes: 100 additions & 34 deletions c/src/neighbors/nn_descent.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -28,23 +28,13 @@ namespace {

template <typename T, typename IdxT = uint32_t>
void* _build(cuvsResources_t res,
cuvsNNDescentIndexParams params,
cuvs::neighbors::nn_descent::index_params build_params,
DLManagedTensor* dataset_tensor,
DLManagedTensor* graph_tensor)
{
auto res_ptr = reinterpret_cast<raft::resources*>(res);
auto dataset = dataset_tensor->dl_tensor;

auto build_params = cuvs::neighbors::nn_descent::index_params();
build_params.metric = static_cast<cuvs::distance::DistanceType>((int)params.metric),
build_params.metric_arg = params.metric_arg;
build_params.graph_degree = params.graph_degree;
build_params.intermediate_graph_degree = params.intermediate_graph_degree;
build_params.max_iterations = params.max_iterations;
build_params.termination_threshold = params.termination_threshold;
build_params.return_distances = params.return_distances;
build_params.dist_comp_dtype = static_cast<cuvs::neighbors::nn_descent::DIST_COMP_DTYPE>(static_cast<int>(params.dist_comp_dtype));

using graph_type = raft::host_matrix_view<IdxT, int64_t, raft::row_major>;
std::optional<graph_type> graph;
if (graph_tensor != NULL) { graph = cuvs::core::from_dlpack<graph_type>(graph_tensor); }
Expand All @@ -64,6 +54,36 @@ void* _build(cuvsResources_t res,
}
}

cuvs::neighbors::nn_descent::index_params convert_params(cuvsNNDescentIndexParams const& params)
{
auto build_params = cuvs::neighbors::nn_descent::index_params();
build_params.metric = static_cast<cuvs::distance::DistanceType>((int)params.metric);
build_params.metric_arg = params.metric_arg;
build_params.graph_degree = params.graph_degree;
build_params.intermediate_graph_degree = params.intermediate_graph_degree;
build_params.max_iterations = params.max_iterations;
build_params.termination_threshold = params.termination_threshold;
build_params.return_distances = params.return_distances;
build_params.internal_distance_dtype =
(params.dist_comp_dtype == NND_DIST_COMP_FP16) ? CUDA_R_16F : CUDA_R_32F;
return build_params;
Comment thread
jinsolp marked this conversation as resolved.
Outdated
}

cuvs::neighbors::nn_descent::index_params convert_params_v6(
cuvsNNDescentIndexParams_v6 const& params)
{
auto build_params = cuvs::neighbors::nn_descent::index_params();
build_params.metric = static_cast<cuvs::distance::DistanceType>((int)params.metric);
build_params.metric_arg = params.metric_arg;
build_params.graph_degree = params.graph_degree;
build_params.intermediate_graph_degree = params.intermediate_graph_degree;
build_params.max_iterations = params.max_iterations;
build_params.termination_threshold = params.termination_threshold;
build_params.return_distances = params.return_distances;
build_params.internal_distance_dtype = params.internal_distance_dtype;
return build_params;
}

template <typename output_mdspan_type>
void _get_graph(cuvsResources_t res, cuvsNNDescentIndex_t index, DLManagedTensor* graph)
{
Expand Down Expand Up @@ -113,6 +133,37 @@ void _get_distances(cuvsResources_t res, cuvsNNDescentIndex_t index, DLManagedTe
RAFT_FAIL("Unsupported nn-descent index dtype: %d and bits: %d", dtype.code, dtype.bits);
}
}

cuvsError_t _nn_descent_build(cuvsResources_t res,
DLManagedTensor* dataset_tensor,
DLManagedTensor* graph_tensor,
cuvsNNDescentIndex_t index,
cuvs::neighbors::nn_descent::index_params build_params)
{
return cuvs::core::translate_exceptions([=] {
index->dtype.code = kDLUInt;
index->dtype.bits = 32;

auto dtype = dataset_tensor->dl_tensor.dtype;

if ((dtype.code == kDLFloat) && (dtype.bits == 32)) {
index->addr = reinterpret_cast<uintptr_t>(
_build<float, uint32_t>(res, build_params, dataset_tensor, graph_tensor));
} else if ((dtype.code == kDLFloat) && (dtype.bits == 16)) {
index->addr = reinterpret_cast<uintptr_t>(
_build<half, uint32_t>(res, build_params, dataset_tensor, graph_tensor));
} else if ((dtype.code == kDLInt) && (dtype.bits == 8)) {
index->addr = reinterpret_cast<uintptr_t>(
_build<int8_t, uint32_t>(res, build_params, dataset_tensor, graph_tensor));
} else if ((dtype.code == kDLUInt) && (dtype.bits == 8)) {
index->addr = reinterpret_cast<uintptr_t>(
_build<uint8_t, uint32_t>(res, build_params, dataset_tensor, graph_tensor));
} else {
RAFT_FAIL("Unsupported nn-descent dataset dtype: %d and bits: %d", dtype.code, dtype.bits);
}
});
}

} // namespace

extern "C" cuvsError_t cuvsNNDescentIndexCreate(cuvsNNDescentIndex_t* index)
Expand Down Expand Up @@ -141,28 +192,18 @@ extern "C" cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
DLManagedTensor* graph_tensor,
cuvsNNDescentIndex_t index)
{
return cuvs::core::translate_exceptions([=] {
index->dtype.code = kDLUInt;
index->dtype.bits = 32;

auto dtype = dataset_tensor->dl_tensor.dtype;
auto build_params = convert_params(*params);
return _nn_descent_build(res, dataset_tensor, graph_tensor, index, build_params);
}

if ((dtype.code == kDLFloat) && (dtype.bits == 32)) {
index->addr = reinterpret_cast<uintptr_t>(
_build<float, uint32_t>(res, *params, dataset_tensor, graph_tensor));
} else if ((dtype.code == kDLFloat) && (dtype.bits == 16)) {
index->addr = reinterpret_cast<uintptr_t>(
_build<half, uint32_t>(res, *params, dataset_tensor, graph_tensor));
} else if ((dtype.code == kDLInt) && (dtype.bits == 8)) {
index->addr = reinterpret_cast<uintptr_t>(
_build<int8_t, uint32_t>(res, *params, dataset_tensor, graph_tensor));
} else if ((dtype.code == kDLUInt) && (dtype.bits == 8)) {
index->addr = reinterpret_cast<uintptr_t>(
_build<uint8_t, uint32_t>(res, *params, dataset_tensor, graph_tensor));
} else {
RAFT_FAIL("Unsupported nn-descent dataset dtype: %d and bits: %d", dtype.code, dtype.bits);
}
});
extern "C" cuvsError_t cuvsNNDescentBuild_v6(cuvsResources_t res,
cuvsNNDescentIndexParams_v6_t params,
DLManagedTensor* dataset_tensor,
DLManagedTensor* graph_tensor,
cuvsNNDescentIndex_t index)
{
auto build_params = convert_params_v6(*params);
return _nn_descent_build(res, dataset_tensor, graph_tensor, index, build_params);
}

extern "C" cuvsError_t cuvsNNDescentIndexParamsCreate(cuvsNNDescentIndexParams_t* params)
Expand All @@ -179,7 +220,27 @@ extern "C" cuvsError_t cuvsNNDescentIndexParamsCreate(cuvsNNDescentIndexParams_t
.max_iterations = cpp_params.max_iterations,
.termination_threshold = cpp_params.termination_threshold,
.return_distances = cpp_params.return_distances,
.dist_comp_dtype = static_cast<cuvsNNDescentDistCompDtype>(static_cast<int>(cpp_params.dist_comp_dtype))};
.dist_comp_dtype = cpp_params.internal_distance_dtype == CUDA_R_16F
? NND_DIST_COMP_FP16
: NND_DIST_COMP_AUTO};
});
}

extern "C" cuvsError_t cuvsNNDescentIndexParamsCreate_v6(cuvsNNDescentIndexParams_v6_t* params)
{
return cuvs::core::translate_exceptions([=] {
// get defaults from cpp parameters struct
cuvs::neighbors::nn_descent::index_params cpp_params;

*params = new cuvsNNDescentIndexParams_v6{
.metric = static_cast<cuvsDistanceType>((int)cpp_params.metric),
.metric_arg = cpp_params.metric_arg,
.graph_degree = cpp_params.graph_degree,
.intermediate_graph_degree = cpp_params.intermediate_graph_degree,
.max_iterations = cpp_params.max_iterations,
.termination_threshold = cpp_params.termination_threshold,
.return_distances = cpp_params.return_distances,
.internal_distance_dtype = cpp_params.internal_distance_dtype};
});
}

Expand All @@ -188,6 +249,11 @@ extern "C" cuvsError_t cuvsNNDescentIndexParamsDestroy(cuvsNNDescentIndexParams_
return cuvs::core::translate_exceptions([=] { delete params; });
}

extern "C" cuvsError_t cuvsNNDescentIndexParamsDestroy_v6(cuvsNNDescentIndexParams_v6_t params)
{
return cuvs::core::translate_exceptions([=] { delete params; });
}

extern "C" cuvsError_t cuvsNNDescentIndexGetGraph(cuvsResources_t res,
cuvsNNDescentIndex_t index,
DLManagedTensor* graph)
Expand Down
Loading
Loading