From f84cf78ad028952269c28a1e4d8c501185cdb2c4 Mon Sep 17 00:00:00 2001 From: jinsolp Date: Thu, 19 Feb 2026 23:32:23 +0000 Subject: [PATCH 1/3] expose brute force metrics --- .../neighbors/all_neighbors/all_neighbors.cuh | 20 ++++++-- .../neighbors/all_neighbors/all_neighbors.pyx | 16 ++++--- python/cuvs/cuvs/tests/test_all_neighbors.py | 48 +++++++++++++++++-- 3 files changed, 68 insertions(+), 16 deletions(-) diff --git a/cpp/src/neighbors/all_neighbors/all_neighbors.cuh b/cpp/src/neighbors/all_neighbors/all_neighbors.cuh index e89440a222..bcd0f4ca1b 100644 --- a/cpp/src/neighbors/all_neighbors/all_neighbors.cuh +++ b/cpp/src/neighbors/all_neighbors/all_neighbors.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -32,10 +32,20 @@ GRAPH_BUILD_ALGO check_params_validity(const all_neighbors_params& params, auto allowed_metrics = params.metric == cuvs::distance::DistanceType::L2Expanded || params.metric == cuvs::distance::DistanceType::L2SqrtExpanded || params.metric == cuvs::distance::DistanceType::CosineExpanded || - params.metric == cuvs::distance::DistanceType::InnerProduct; - RAFT_EXPECTS(allowed_metrics, - "Distance metric for all-neighbors build with brute force should be L2Expanded, " - "L2SqrtExpanded, CosineExpanded, or InnerProduct."); + params.metric == cuvs::distance::DistanceType::L1 || + params.metric == cuvs::distance::DistanceType::L2Unexpanded || + params.metric == cuvs::distance::DistanceType::L2SqrtUnexpanded || + params.metric == cuvs::distance::DistanceType::InnerProduct || + params.metric == cuvs::distance::DistanceType::Linf || + params.metric == cuvs::distance::DistanceType::Canberra || + params.metric == cuvs::distance::DistanceType::LpUnexpanded || + params.metric == cuvs::distance::DistanceType::CorrelationExpanded || + params.metric == cuvs::distance::DistanceType::JensenShannon; + RAFT_EXPECTS( + allowed_metrics, + "Distance metric for all-neighbors build with brute force should be L2Expanded, " + "L2SqrtExpanded, CosineExpanded, L1, L2Unexpanded, L2SqrtUnexpanded, InnerProduct, Linf, " + "Canberra, LpUnexpanded, CorrelationExpanded, or JensenShannon."); } return GRAPH_BUILD_ALGO::BRUTE_FORCE; } else if (std::holds_alternative( diff --git a/python/cuvs/cuvs/neighbors/all_neighbors/all_neighbors.pyx b/python/cuvs/cuvs/neighbors/all_neighbors/all_neighbors.pyx index fb4750d90d..ce920b47c1 100644 --- a/python/cuvs/cuvs/neighbors/all_neighbors/all_neighbors.pyx +++ b/python/cuvs/cuvs/neighbors/all_neighbors/all_neighbors.pyx @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # # cython: language_level=3 @@ -111,11 +111,12 @@ cdef class AllNeighborsParams: ) # Check metric consistency - ivf_pq_metric = ivf_pq_params.metric - if ivf_pq_metric != metric: + metric_type = DISTANCE_TYPES[metric] + ivf_pq_metric_type = DISTANCE_TYPES[ivf_pq_params.metric] + if ivf_pq_metric_type != metric_type: raise ValueError( f"Metric conflict: AllNeighborsParams metric '{metric}' " - f"does not match IVF-PQ metric '{ivf_pq_metric}'. Please " + f"does not match IVF-PQ metric '{ivf_pq_params.metric}'. Please " f"ensure both use the same metric." ) @@ -127,11 +128,12 @@ cdef class AllNeighborsParams: ) # Check metric consistency - nn_descent_metric = nn_descent_params.metric - if nn_descent_metric != metric: + metric_type = DISTANCE_TYPES[metric] + nn_descent_metric_type = DISTANCE_TYPES[nn_descent_params.metric] + if nn_descent_metric_type != metric_type: raise ValueError( f"Metric conflict: AllNeighborsParams metric '{metric}' " - f"does not match NN-Descent metric '{nn_descent_metric}'. " + f"does not match NN-Descent metric '{nn_descent_params.metric}'. " f"Please ensure both use the same metric." ) diff --git a/python/cuvs/cuvs/tests/test_all_neighbors.py b/python/cuvs/cuvs/tests/test_all_neighbors.py index a232a58af8..f2be537ff7 100644 --- a/python/cuvs/cuvs/tests/test_all_neighbors.py +++ b/python/cuvs/cuvs/tests/test_all_neighbors.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # @@ -36,15 +36,40 @@ def make_cosine( @pytest.mark.parametrize("algo", ["nn_descent", "brute_force", "ivf_pq"]) @pytest.mark.parametrize("cluster", ["single_cluster", "multi_cluster"]) -@pytest.mark.parametrize("metric", ["sqeuclidean", "cosine"]) +@pytest.mark.parametrize( + "metric", + [ + "sqeuclidean", + "l2", + "cosine", + "l1", + "inner_product", + "chebyshev", + "canberra", + "minkowski", + "correlation", + "jensenshannon", + ], +) def test_all_neighbors_device_build_quality(algo, cluster, metric): """Test device build with quality validation against brute force ground truth. """ n_rows, n_cols, k = 7151, 64, 16 - if algo == "ivf_pq" and metric == "cosine": - pytest.skip("Skipping IVF-PQ with cosine distance") + if algo == "ivf_pq" and metric != "sqeuclidean": + pytest.skip( + "Skipping IVF-PQ for distance metrics other than sqeuclidean" + ) + elif algo == "nn_descent" and metric not in [ + "sqeuclidean", + "l2", + "cosine", + "inner_product", + ]: + pytest.skip( + "Skipping NN-Descent for distance metrics other than sqeuclidean, l2, cosine, or inner_product" + ) if cluster == "single_cluster": overlap_factor = 0 @@ -57,6 +82,21 @@ def test_all_neighbors_device_build_quality(algo, cluster, metric): X, _ = make_cosine( n_samples=n_rows, n_features=n_cols, random_state=42 ) + elif metric == "jensenshannon": + # Jensen-Shannon requires non-negative values representing probability distributions + X, _ = make_blobs( + n_samples=n_rows, + n_features=n_cols, + centers=10, + cluster_std=1.0, + center_box=(0.0, 10.0), # Non-negative values only + random_state=42, + ) + # Normalize each row to sum to 1 (probability distribution) + X = np.abs(X) # Ensure non-negative + row_sums = X.sum(axis=1, keepdims=True) + row_sums[row_sums == 0] = 1 # Avoid division by zero + X = X / row_sums else: X, _ = make_blobs( n_samples=n_rows, From 99cf2db8da79529ec2ab35e44f1bf1d16a59922b Mon Sep 17 00:00:00 2001 From: jinsolp Date: Thu, 5 Mar 2026 23:58:09 +0000 Subject: [PATCH 2/3] make as set --- .../neighbors/all_neighbors/all_neighbors.cuh | 58 +++++++++---------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/cpp/src/neighbors/all_neighbors/all_neighbors.cuh b/cpp/src/neighbors/all_neighbors/all_neighbors.cuh index 8d4f816d26..02de3ea799 100644 --- a/cpp/src/neighbors/all_neighbors/all_neighbors.cuh +++ b/cpp/src/neighbors/all_neighbors/all_neighbors.cuh @@ -9,6 +9,7 @@ #include #include #include +#include namespace cuvs::neighbors::all_neighbors::detail { using namespace cuvs::neighbors; @@ -16,32 +17,38 @@ using namespace cuvs::neighbors; GRAPH_BUILD_ALGO check_params_validity(const all_neighbors_params& params, bool do_mutual_reachability_dist) { + using DT = cuvs::distance::DistanceType; + + // InnerProduct is not supported for mutual reachability distance, because mutual reachability + // distance takes "max" of core distances and pairwise distance. + static const std::unordered_set
mrd_allowed_metrics = { + DT::L2Expanded, DT::L2SqrtExpanded, DT::CosineExpanded}; + + static const std::unordered_set
bf_allowed_metrics = {DT::L2Expanded, + DT::L2SqrtExpanded, + DT::CosineExpanded, + DT::L1, + DT::L2Unexpanded, + DT::L2SqrtUnexpanded, + DT::InnerProduct, + DT::Linf, + DT::Canberra, + DT::LpUnexpanded, + DT::CorrelationExpanded, + DT::JensenShannon}; + + static const std::unordered_set
nnd_allowed_metrics = { + DT::L2Expanded, DT::L2SqrtExpanded, DT::CosineExpanded, DT::InnerProduct}; + if (std::holds_alternative(params.graph_build_params)) { if (do_mutual_reachability_dist) { - // InnerProduct is not supported for mutual reachability distance, because mutual reachability - // distance takes "max" of core distances and pairwise distance. - auto allowed_metrics = params.metric == cuvs::distance::DistanceType::L2Expanded || - params.metric == cuvs::distance::DistanceType::L2SqrtExpanded || - params.metric == cuvs::distance::DistanceType::CosineExpanded; RAFT_EXPECTS( - allowed_metrics, + mrd_allowed_metrics.count(params.metric), "Distance metric for all-neighbors build with brute force for computing mutual " "reachability distance should be L2Expanded, L2SqrtExpanded, or CosineExpanded."); } else { - auto allowed_metrics = params.metric == cuvs::distance::DistanceType::L2Expanded || - params.metric == cuvs::distance::DistanceType::L2SqrtExpanded || - params.metric == cuvs::distance::DistanceType::CosineExpanded || - params.metric == cuvs::distance::DistanceType::L1 || - params.metric == cuvs::distance::DistanceType::L2Unexpanded || - params.metric == cuvs::distance::DistanceType::L2SqrtUnexpanded || - params.metric == cuvs::distance::DistanceType::InnerProduct || - params.metric == cuvs::distance::DistanceType::Linf || - params.metric == cuvs::distance::DistanceType::Canberra || - params.metric == cuvs::distance::DistanceType::LpUnexpanded || - params.metric == cuvs::distance::DistanceType::CorrelationExpanded || - params.metric == cuvs::distance::DistanceType::JensenShannon; RAFT_EXPECTS( - allowed_metrics, + bf_allowed_metrics.count(params.metric), "Distance metric for all-neighbors build with brute force should be L2Expanded, " "L2SqrtExpanded, CosineExpanded, L1, L2Unexpanded, L2SqrtUnexpanded, InnerProduct, Linf, " "Canberra, LpUnexpanded, CorrelationExpanded, or JensenShannon."); @@ -50,21 +57,12 @@ GRAPH_BUILD_ALGO check_params_validity(const all_neighbors_params& params, } else if (std::holds_alternative( params.graph_build_params)) { if (do_mutual_reachability_dist) { - // InnerProduct is not supported for mutual reachability distance, because mutual reachability - // distance takes "max" of core distances and pairwise distance. - auto allowed_metrics = params.metric == cuvs::distance::DistanceType::L2Expanded || - params.metric == cuvs::distance::DistanceType::L2SqrtExpanded || - params.metric == cuvs::distance::DistanceType::CosineExpanded; RAFT_EXPECTS( - allowed_metrics, + mrd_allowed_metrics.count(params.metric), "Distance metric for all-neighbors build with NN Descent for computing mutual reachability " "distance should be L2Expanded, L2SqrtExpanded, or CosineExpanded."); } else { - auto allowed_metrics = params.metric == cuvs::distance::DistanceType::L2Expanded || - params.metric == cuvs::distance::DistanceType::L2SqrtExpanded || - params.metric == cuvs::distance::DistanceType::CosineExpanded || - params.metric == cuvs::distance::DistanceType::InnerProduct; - RAFT_EXPECTS(allowed_metrics, + RAFT_EXPECTS(nnd_allowed_metrics.count(params.metric), "Distance metric for all-neighbors build with NN Descent should be L2Expanded, " "L2SqrtExpanded, CosineExpanded, or InnerProduct."); } From 4cfa71d5b49aa15ec8f88cba6f3533db70c2ecc8 Mon Sep 17 00:00:00 2001 From: jinsolp Date: Fri, 3 Apr 2026 22:14:21 +0000 Subject: [PATCH 3/3] pytest expect fail --- python/cuvs/cuvs/tests/test_all_neighbors.py | 30 +++++++++++--------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/python/cuvs/cuvs/tests/test_all_neighbors.py b/python/cuvs/cuvs/tests/test_all_neighbors.py index f2be537ff7..2de5846054 100644 --- a/python/cuvs/cuvs/tests/test_all_neighbors.py +++ b/python/cuvs/cuvs/tests/test_all_neighbors.py @@ -57,19 +57,11 @@ def test_all_neighbors_device_build_quality(algo, cluster, metric): """ n_rows, n_cols, k = 7151, 64, 16 - if algo == "ivf_pq" and metric != "sqeuclidean": - pytest.skip( - "Skipping IVF-PQ for distance metrics other than sqeuclidean" - ) - elif algo == "nn_descent" and metric not in [ - "sqeuclidean", - "l2", - "cosine", - "inner_product", - ]: - pytest.skip( - "Skipping NN-Descent for distance metrics other than sqeuclidean, l2, cosine, or inner_product" - ) + ivf_pq_valid_metrics = {"sqeuclidean"} + nnd_valid_metrics = {"sqeuclidean", "l2", "cosine", "inner_product"} + is_invalid = (algo == "ivf_pq" and metric not in ivf_pq_valid_metrics) or ( + algo == "nn_descent" and metric not in nnd_valid_metrics + ) if cluster == "single_cluster": overlap_factor = 0 @@ -138,6 +130,18 @@ def test_all_neighbors_device_build_quality(algo, cluster, metric): ) res = Resources() + + if is_invalid: + with pytest.raises(Exception, match="Distance metric"): + all_neighbors.build( + X_device, + k, + params, + distances=cupy.empty((n_rows, k), dtype=cupy.float32), + resources=res, + ) + return + indices, distances = all_neighbors.build( X_device, k,