NVIDIA · rapids-bot · May 20, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/c/include/cuvs/neighbors/nn_descent.h b/c/include/cuvs/neighbors/nn_descent.h
@@ -19,9 +19,14 @@ extern "C" {
 
 /**
  * @brief Dtype to use for distance computation
- * - `NND_DIST_COMP_AUTO`: Automatically determine the best dtype for distance computation based on the dataset dimensions.
- * - `NND_DIST_COMP_FP32`: Use fp32 distance computation for better precision at the cost of performance and memory usage.
+ * - `NND_DIST_COMP_AUTO`: Automatically determine the best dtype for distance computation based on
+ * the dataset dimensions.
+ * - `NND_DIST_COMP_FP32`: Use fp32 distance computation for better precision at the cost of
+ * performance and memory usage.
  * - `NND_DIST_COMP_FP16`: Use fp16 distance computation.
+ *
+ * @deprecated Deprecated in 26.06; to be removed in 26.08. Use cuvsNNDescentIndexParams_v6 with
+ * internal_distance_dtype instead.
  */
 typedef enum {
   NND_DIST_COMP_AUTO = 0,
@@ -49,7 +54,12 @@ typedef enum {
  * the graph for. More iterations produce a better quality graph at cost of performance
  * `termination_threshold`: The delta at which nn-descent will terminate its iterations
  * `return_distances`: Boolean to decide whether to return distances array
- * `dist_comp_dtype`: dtype to use for distance computation. Defaults to `NND_DIST_COMP_AUTO` which automatically determines the best dtype for distance computation based on the dataset dimensions. Use `NND_DIST_COMP_FP32` for better precision at the cost of performance and memory usage. This option is only valid when data type is fp32. Use `NND_DIST_COMP_FP16` for better performance and memory usage at the cost of precision.
+ * `dist_comp_dtype`: dtype to use for distance computation. Note: as of 26.06, both
+ * `NND_DIST_COMP_AUTO` and `NND_DIST_COMP_FP32` map to fp32 distance computation (the new default).
+ * Use `NND_DIST_COMP_FP16` to opt into fp16 distance computation. This behavior differs from
+ * earlier releases where `NND_DIST_COMP_AUTO` selected fp16 for higher-dimensional fp32 inputs.
+ *
+ * @deprecated Deprecated in 26.06; to be removed in 26.08. Replaced by cuvsNNDescentIndexParams_v6.
  */
 struct cuvsNNDescentIndexParams {
   cuvsDistanceType metric;
@@ -64,21 +74,87 @@ struct cuvsNNDescentIndexParams {
 
 typedef struct cuvsNNDescentIndexParams* cuvsNNDescentIndexParams_t;
 
+/**
+ * @brief Parameters used to build an nn-descent index (v6)
+ *
+ * `metric`: The distance metric to use
+ * `metric_arg`: The argument used by distance metrics like Minkowskidistance
+ * `graph_degree`: For an input dataset of dimensions (N, D),
+ * determines the final dimensions of the all-neighbors knn graph
+ * which turns out to be of dimensions (N, graph_degree)
+ * `intermediate_graph_degree`: Internally, nn-descent builds an
+ * all-neighbors knn graph of dimensions (N, intermediate_graph_degree)
+ * before selecting the final `graph_degree` neighbors. It's recommended
+ * that `intermediate_graph_degree` >= 1.5 * graph_degree
+ * `max_iterations`: The number of iterations that nn-descent will refine
+ * the graph for. More iterations produce a better quality graph at cost of performance
+ * `termination_threshold`: The delta at which nn-descent will terminate its iterations
+ * `return_distances`: Boolean to decide whether to return distances array
+ * `internal_distance_dtype`: Only applicable for fp32 input. Controls the precision used to
+ * compute distances. Possible values: [CUDA_R_32F, CUDA_R_16F]. Defaults to CUDA_R_32F. Set to
+ * CUDA_R_16F to compute distances in fp16 (faster, uses less device memory; not recommended for
+ * dim <= 16 due to precision loss). Has no effect on non-fp32 input types (fp16, int8, uint8)
+ * which always compute distances in fp16.
+ *
+ * @since 26.06
+ */
+struct cuvsNNDescentIndexParams_v6 {
+  cuvsDistanceType metric;
+  float metric_arg;
+  size_t graph_degree;
+  size_t intermediate_graph_degree;
+  size_t max_iterations;
+  float termination_threshold;
+  bool return_distances;
+  cudaDataType_t internal_distance_dtype;
+};
+
+typedef struct cuvsNNDescentIndexParams_v6* cuvsNNDescentIndexParams_v6_t;
+
 /**
  * @brief Allocate NN-Descent Index params, and populate with default values
  *
+ * @deprecated Deprecated in 26.06; to be removed in 26.08. Replaced by
+ * cuvsNNDescentIndexParamsCreate_v6.
+ *
  * @param[in] index_params cuvsNNDescentIndexParams_t to allocate
  * @return cuvsError_t
  */
 CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsCreate(cuvsNNDescentIndexParams_t* index_params);
 
+/**
+ * @brief Allocate NN-Descent Index params (v6), and populate with default values
+ *
+ * @since 26.06
+ *
+ * @param[in] index_params cuvsNNDescentIndexParams_v6_t to allocate
+ * @return cuvsError_t
+ */
+CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsCreate_v6(
+  cuvsNNDescentIndexParams_v6_t* index_params);
+
 /**
  * @brief De-allocate NN-Descent Index params
  *
+ * @deprecated Deprecated in 26.06; to be removed in 26.08. Replaced by
+ * cuvsNNDescentIndexParamsDestroy_v6.
+ *
  * @param[in] index_params
  * @return cuvsError_t
  */
 CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsDestroy(cuvsNNDescentIndexParams_t index_params);
+
+/**
+ * @brief De-allocate NN-Descent Index params (v6)
+ *
+ * @since 26.06
+ *
+ * @param[in] index_params
+ * @return cuvsError_t
+ */
+CUVS_EXPORT cuvsError_t cuvsNNDescentIndexParamsDestroy_v6(
+  cuvsNNDescentIndexParams_v6_t index_params);
+
 /**
  * @}
  */
@@ -157,6 +233,8 @@ CUVS_EXPORT cuvsError_t cuvsNNDescentIndexDestroy(cuvsNNDescentIndex_t index);
  * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
  * @endcode
  *
+ * @deprecated Deprecated in 26.06; to be removed in 26.08. Replaced by cuvsNNDescentBuild_v6.
+ *
  * @param[in] res cuvsResources_t opaque C handle
  * @param[in] index_params cuvsNNDescentIndexParams_t used to build NN-Descent index
  * @param[in] dataset DLManagedTensor* training dataset on host or device memory
@@ -169,6 +247,58 @@ CUVS_EXPORT cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
                                DLManagedTensor* dataset,
                                DLManagedTensor* graph,
                                cuvsNNDescentIndex_t index);
+
+/**
+ * @brief Build a NN-Descent index (v6) with a `DLManagedTensor` which has underlying
+ *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
+ *        or `kDLCPU`. Also, acceptable underlying types are:
+ *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *        3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *        4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *
+ * @code {.c}
+ * #include <cuvs/core/c_api.h>
+ * #include <cuvs/neighbors/nn_descent.h>
+ *
+ * // Create cuvsResources_t
+ * cuvsResources_t res;
+ * cuvsError_t res_create_status = cuvsResourcesCreate(&res);
+ *
+ * // Assume a populated `DLManagedTensor` type here
+ * DLManagedTensor dataset;
+ *
+ * // Create default index params
+ * cuvsNNDescentIndexParams_v6_t index_params;
+ * cuvsError_t params_create_status = cuvsNNDescentIndexParamsCreate_v6(&index_params);
+ *
+ * // Create NN-Descent index
+ * cuvsNNDescentIndex_t index;
+ * cuvsError_t index_create_status = cuvsNNDescentIndexCreate(&index);
+ *
+ * // Build the NN-Descent Index
+ * cuvsError_t build_status = cuvsNNDescentBuild_v6(res, index_params, &dataset, NULL, index);
+ *
+ * // de-allocate `index_params`, `index` and `res`
+ * cuvsError_t params_destroy_status = cuvsNNDescentIndexParamsDestroy_v6(index_params);
+ * cuvsError_t index_destroy_status = cuvsNNDescentIndexDestroy(index);
+ * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
+ * @endcode
+ *
+ * @since 26.06
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] index_params cuvsNNDescentIndexParams_v6_t used to build NN-Descent index
+ * @param[in] dataset DLManagedTensor* training dataset on host or device memory
+ * @param[inout] graph Optional preallocated graph on host memory to store output
+ * @param[out] index cuvsNNDescentIndex_t Newly built NN-Descent index
+ * @return cuvsError_t
+ */
+CUVS_EXPORT cuvsError_t cuvsNNDescentBuild_v6(cuvsResources_t res,
+                                              cuvsNNDescentIndexParams_v6_t index_params,
+                                              DLManagedTensor* dataset,
+                                              DLManagedTensor* graph,
+                                              cuvsNNDescentIndex_t index);
 /**
  * @}
  */

diff --git a/c/src/neighbors/nn_descent.cpp b/c/src/neighbors/nn_descent.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -28,23 +28,13 @@ namespace {
 
 template <typename T, typename IdxT = uint32_t>
 void* _build(cuvsResources_t res,
-             cuvsNNDescentIndexParams params,
+             cuvs::neighbors::nn_descent::index_params build_params,
              DLManagedTensor* dataset_tensor,
              DLManagedTensor* graph_tensor)
 {
   auto res_ptr = reinterpret_cast<raft::resources*>(res);
   auto dataset = dataset_tensor->dl_tensor;
 
-  auto build_params         = cuvs::neighbors::nn_descent::index_params();
-  build_params.metric       = static_cast<cuvs::distance::DistanceType>((int)params.metric),
-  build_params.metric_arg   = params.metric_arg;
-  build_params.graph_degree = params.graph_degree;
-  build_params.intermediate_graph_degree = params.intermediate_graph_degree;
-  build_params.max_iterations            = params.max_iterations;
-  build_params.termination_threshold     = params.termination_threshold;
-  build_params.return_distances          = params.return_distances;
-  build_params.dist_comp_dtype           = static_cast<cuvs::neighbors::nn_descent::DIST_COMP_DTYPE>(static_cast<int>(params.dist_comp_dtype));
-
   using graph_type = raft::host_matrix_view<IdxT, int64_t, raft::row_major>;
   std::optional<graph_type> graph;
   if (graph_tensor != NULL) { graph = cuvs::core::from_dlpack<graph_type>(graph_tensor); }
@@ -64,6 +54,36 @@ void* _build(cuvsResources_t res,
   }
 }
 
+cuvs::neighbors::nn_descent::index_params convert_params(cuvsNNDescentIndexParams const& params)
+{
+  auto build_params                      = cuvs::neighbors::nn_descent::index_params();
+  build_params.metric                    = static_cast<cuvs::distance::DistanceType>((int)params.metric);
+  build_params.metric_arg                = params.metric_arg;
+  build_params.graph_degree              = params.graph_degree;
+  build_params.intermediate_graph_degree = params.intermediate_graph_degree;
+  build_params.max_iterations            = params.max_iterations;
+  build_params.termination_threshold     = params.termination_threshold;
+  build_params.return_distances          = params.return_distances;
+  build_params.internal_distance_dtype =
+    (params.dist_comp_dtype == NND_DIST_COMP_FP16) ? CUDA_R_16F : CUDA_R_32F;
+  return build_params;
+}
+
+cuvs::neighbors::nn_descent::index_params convert_params_v6(
+  cuvsNNDescentIndexParams_v6 const& params)
+{
+  auto build_params                      = cuvs::neighbors::nn_descent::index_params();
+  build_params.metric                    = static_cast<cuvs::distance::DistanceType>((int)params.metric);
+  build_params.metric_arg                = params.metric_arg;
+  build_params.graph_degree              = params.graph_degree;
+  build_params.intermediate_graph_degree = params.intermediate_graph_degree;
+  build_params.max_iterations            = params.max_iterations;
+  build_params.termination_threshold     = params.termination_threshold;
+  build_params.return_distances          = params.return_distances;
+  build_params.internal_distance_dtype   = params.internal_distance_dtype;
+  return build_params;
+}
+
 template <typename output_mdspan_type>
 void _get_graph(cuvsResources_t res, cuvsNNDescentIndex_t index, DLManagedTensor* graph)
 {
@@ -113,6 +133,37 @@ void _get_distances(cuvsResources_t res, cuvsNNDescentIndex_t index, DLManagedTe
     RAFT_FAIL("Unsupported nn-descent index dtype: %d and bits: %d", dtype.code, dtype.bits);
   }
 }
+
+cuvsError_t _nn_descent_build(cuvsResources_t res,
+                              DLManagedTensor* dataset_tensor,
+                              DLManagedTensor* graph_tensor,
+                              cuvsNNDescentIndex_t index,
+                              cuvs::neighbors::nn_descent::index_params build_params)
+{
+  return cuvs::core::translate_exceptions([=] {
+    index->dtype.code = kDLUInt;
+    index->dtype.bits = 32;
+
+    auto dtype = dataset_tensor->dl_tensor.dtype;
+
+    if ((dtype.code == kDLFloat) && (dtype.bits == 32)) {
+      index->addr = reinterpret_cast<uintptr_t>(
+        _build<float, uint32_t>(res, build_params, dataset_tensor, graph_tensor));
+    } else if ((dtype.code == kDLFloat) && (dtype.bits == 16)) {
+      index->addr = reinterpret_cast<uintptr_t>(
+        _build<half, uint32_t>(res, build_params, dataset_tensor, graph_tensor));
+    } else if ((dtype.code == kDLInt) && (dtype.bits == 8)) {
+      index->addr = reinterpret_cast<uintptr_t>(
+        _build<int8_t, uint32_t>(res, build_params, dataset_tensor, graph_tensor));
+    } else if ((dtype.code == kDLUInt) && (dtype.bits == 8)) {
+      index->addr = reinterpret_cast<uintptr_t>(
+        _build<uint8_t, uint32_t>(res, build_params, dataset_tensor, graph_tensor));
+    } else {
+      RAFT_FAIL("Unsupported nn-descent dataset dtype: %d and bits: %d", dtype.code, dtype.bits);
+    }
+  });
+}
+
 }  // namespace
 
 extern "C" cuvsError_t cuvsNNDescentIndexCreate(cuvsNNDescentIndex_t* index)
@@ -141,28 +192,18 @@ extern "C" cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
                                           DLManagedTensor* graph_tensor,
                                           cuvsNNDescentIndex_t index)
 {
-  return cuvs::core::translate_exceptions([=] {
-    index->dtype.code = kDLUInt;
-    index->dtype.bits = 32;
-
-    auto dtype = dataset_tensor->dl_tensor.dtype;
+  auto build_params = convert_params(*params);
+  return _nn_descent_build(res, dataset_tensor, graph_tensor, index, build_params);
+}
 
-    if ((dtype.code == kDLFloat) && (dtype.bits == 32)) {
-      index->addr = reinterpret_cast<uintptr_t>(
-        _build<float, uint32_t>(res, *params, dataset_tensor, graph_tensor));
-    } else if ((dtype.code == kDLFloat) && (dtype.bits == 16)) {
-      index->addr = reinterpret_cast<uintptr_t>(
-        _build<half, uint32_t>(res, *params, dataset_tensor, graph_tensor));
-    } else if ((dtype.code == kDLInt) && (dtype.bits == 8)) {
-      index->addr = reinterpret_cast<uintptr_t>(
-        _build<int8_t, uint32_t>(res, *params, dataset_tensor, graph_tensor));
-    } else if ((dtype.code == kDLUInt) && (dtype.bits == 8)) {
-      index->addr = reinterpret_cast<uintptr_t>(
-        _build<uint8_t, uint32_t>(res, *params, dataset_tensor, graph_tensor));
-    } else {
-      RAFT_FAIL("Unsupported nn-descent dataset dtype: %d and bits: %d", dtype.code, dtype.bits);
-    }
-  });
+extern "C" cuvsError_t cuvsNNDescentBuild_v6(cuvsResources_t res,
+                                             cuvsNNDescentIndexParams_v6_t params,
+                                             DLManagedTensor* dataset_tensor,
+                                             DLManagedTensor* graph_tensor,
+                                             cuvsNNDescentIndex_t index)
+{
+  auto build_params = convert_params_v6(*params);
+  return _nn_descent_build(res, dataset_tensor, graph_tensor, index, build_params);
 }
 
 extern "C" cuvsError_t cuvsNNDescentIndexParamsCreate(cuvsNNDescentIndexParams_t* params)
@@ -179,7 +220,27 @@ extern "C" cuvsError_t cuvsNNDescentIndexParamsCreate(cuvsNNDescentIndexParams_t
       .max_iterations            = cpp_params.max_iterations,
       .termination_threshold     = cpp_params.termination_threshold,
       .return_distances          = cpp_params.return_distances,
-      .dist_comp_dtype           = static_cast<cuvsNNDescentDistCompDtype>(static_cast<int>(cpp_params.dist_comp_dtype))};
+      .dist_comp_dtype           = cpp_params.internal_distance_dtype == CUDA_R_16F
+                                     ? NND_DIST_COMP_FP16
+                                     : NND_DIST_COMP_AUTO};
+  });
+}
+
+extern "C" cuvsError_t cuvsNNDescentIndexParamsCreate_v6(cuvsNNDescentIndexParams_v6_t* params)
+{
+  return cuvs::core::translate_exceptions([=] {
+    // get defaults from cpp parameters struct
+    cuvs::neighbors::nn_descent::index_params cpp_params;
+
+    *params = new cuvsNNDescentIndexParams_v6{
+      .metric                    = static_cast<cuvsDistanceType>((int)cpp_params.metric),
+      .metric_arg                = cpp_params.metric_arg,
+      .graph_degree              = cpp_params.graph_degree,
+      .intermediate_graph_degree = cpp_params.intermediate_graph_degree,
+      .max_iterations            = cpp_params.max_iterations,
+      .termination_threshold     = cpp_params.termination_threshold,
+      .return_distances          = cpp_params.return_distances,
+      .internal_distance_dtype   = cpp_params.internal_distance_dtype};
   });
 }
 
@@ -188,6 +249,11 @@ extern "C" cuvsError_t cuvsNNDescentIndexParamsDestroy(cuvsNNDescentIndexParams_
   return cuvs::core::translate_exceptions([=] { delete params; });
 }
 
+extern "C" cuvsError_t cuvsNNDescentIndexParamsDestroy_v6(cuvsNNDescentIndexParams_v6_t params)
+{
+  return cuvs::core::translate_exceptions([=] { delete params; });
+}
+
 extern "C" cuvsError_t cuvsNNDescentIndexGetGraph(cuvsResources_t res,
                                                   cuvsNNDescentIndex_t index,
                                                   DLManagedTensor* graph)