diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5ba3c70237..1af49aab62 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,7 +48,7 @@ repos: additional_dependencies: [toml] args: ["--config=pyproject.toml"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v16.0.6 + rev: v20.1.4 hooks: - id: clang-format types_or: [c, c++, cuda] diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index beaa38c66a..754238e484 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - cmake>=3.30.4 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 @@ -26,7 +26,7 @@ dependencies: - go - graphviz - ipython -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 68814a1dc4..a4eb320701 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - cmake>=3.30.4 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 @@ -26,7 +26,7 @@ dependencies: - go - graphviz - ipython -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/all_cuda-128_arch-aarch64.yaml b/conda/environments/all_cuda-128_arch-aarch64.yaml index d0bfb07aec..c1adb7062f 100644 --- a/conda/environments/all_cuda-128_arch-aarch64.yaml +++ b/conda/environments/all_cuda-128_arch-aarch64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc @@ -27,7 +27,7 @@ dependencies: - go - graphviz - ipython -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml index aceef8546d..ec6a42d8b4 100644 --- a/conda/environments/all_cuda-128_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - breathe>=4.35.0 - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc @@ -27,7 +27,7 @@ dependencies: - go - graphviz - ipython -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 0fa891d0c7..4c3aa33dfc 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - click - cmake>=3.30.4 - cuda-nvtx=11.8 @@ -26,7 +26,7 @@ dependencies: - gcc_linux-aarch64=11.* - glog>=0.6.0 - h5py>=3.8.0 -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index a710083237..49e6443eb9 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - click - cmake>=3.30.4 - cuda-nvtx=11.8 @@ -28,7 +28,7 @@ dependencies: - h5py>=3.8.0 - libaio - libboost-devel=1.87 -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/bench_ann_cuda-128_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-128_arch-aarch64.yaml index 3de02e0525..c25d2b3aec 100644 --- a/conda/environments/bench_ann_cuda-128_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-128_arch-aarch64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - click - cmake>=3.30.4 - cuda-cudart-dev @@ -27,7 +27,7 @@ dependencies: - gcc_linux-aarch64=13.* - glog>=0.6.0 - h5py>=3.8.0 -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/conda/environments/bench_ann_cuda-128_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-128_arch-x86_64.yaml index 09dc89b359..27926c20be 100644 --- a/conda/environments/bench_ann_cuda-128_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-128_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - click - cmake>=3.30.4 - cuda-cudart-dev @@ -29,7 +29,7 @@ dependencies: - h5py>=3.8.0 - libaio - libboost-devel=1.87 -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/conda/environments/go_cuda-118_arch-aarch64.yaml b/conda/environments/go_cuda-118_arch-aarch64.yaml index e79504c24a..86a0d5b1a2 100644 --- a/conda/environments/go_cuda-118_arch-aarch64.yaml +++ b/conda/environments/go_cuda-118_arch-aarch64.yaml @@ -8,8 +8,8 @@ channels: - nvidia dependencies: - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - cmake>=3.30.4 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 @@ -19,7 +19,7 @@ dependencies: - dlpack>=0.8,<1.0 - gcc_linux-aarch64=11.* - go -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/go_cuda-118_arch-x86_64.yaml b/conda/environments/go_cuda-118_arch-x86_64.yaml index ba0dbfd4e0..16130813b5 100644 --- a/conda/environments/go_cuda-118_arch-x86_64.yaml +++ b/conda/environments/go_cuda-118_arch-x86_64.yaml @@ -8,8 +8,8 @@ channels: - nvidia dependencies: - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - cmake>=3.30.4 - cuda-nvtx=11.8 - cuda-profiler-api=11.8.86 @@ -19,7 +19,7 @@ dependencies: - dlpack>=0.8,<1.0 - gcc_linux-64=11.* - go -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev=11.11.3.6 - libcublas=11.11.3.6 - libcurand-dev=10.3.0.86 diff --git a/conda/environments/go_cuda-128_arch-aarch64.yaml b/conda/environments/go_cuda-128_arch-aarch64.yaml index a408a53220..aabee554a7 100644 --- a/conda/environments/go_cuda-128_arch-aarch64.yaml +++ b/conda/environments/go_cuda-128_arch-aarch64.yaml @@ -8,8 +8,8 @@ channels: - nvidia dependencies: - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc @@ -20,7 +20,7 @@ dependencies: - dlpack>=0.8,<1.0 - gcc_linux-aarch64=13.* - go -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/conda/environments/go_cuda-128_arch-x86_64.yaml b/conda/environments/go_cuda-128_arch-x86_64.yaml index 6bf8c3c282..4d02971fe1 100644 --- a/conda/environments/go_cuda-128_arch-x86_64.yaml +++ b/conda/environments/go_cuda-128_arch-x86_64.yaml @@ -8,8 +8,8 @@ channels: - nvidia dependencies: - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc @@ -20,7 +20,7 @@ dependencies: - dlpack>=0.8,<1.0 - gcc_linux-64=13.* - go -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev - libcurand-dev - libcusolver-dev diff --git a/conda/environments/rust_cuda-128_arch-aarch64.yaml b/conda/environments/rust_cuda-129_arch-aarch64.yaml similarity index 83% rename from conda/environments/rust_cuda-128_arch-aarch64.yaml rename to conda/environments/rust_cuda-129_arch-aarch64.yaml index a9da34c8a6..f636fe0a4f 100644 --- a/conda/environments/rust_cuda-128_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-129_arch-aarch64.yaml @@ -8,17 +8,17 @@ channels: - nvidia dependencies: - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-version=12.8 +- cuda-version=12.9 - cxx-compiler - gcc_linux-aarch64=13.* -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev - libcurand-dev - libcusolver-dev @@ -30,4 +30,4 @@ dependencies: - ninja - rust - sysroot_linux-aarch64==2.28 -name: rust_cuda-128_arch-aarch64 +name: rust_cuda-129_arch-aarch64 diff --git a/conda/environments/rust_cuda-128_arch-x86_64.yaml b/conda/environments/rust_cuda-129_arch-x86_64.yaml similarity index 83% rename from conda/environments/rust_cuda-128_arch-x86_64.yaml rename to conda/environments/rust_cuda-129_arch-x86_64.yaml index 214ce10930..d75824ae4b 100644 --- a/conda/environments/rust_cuda-128_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-129_arch-x86_64.yaml @@ -8,17 +8,17 @@ channels: - nvidia dependencies: - c-compiler -- clang-tools==16.0.6 -- clang==16.0.6 +- clang-tools==20.1.4 +- clang==20.1.4 - cmake>=3.30.4 - cuda-cudart-dev - cuda-nvcc - cuda-nvtx-dev - cuda-profiler-api -- cuda-version=12.8 +- cuda-version=12.9 - cxx-compiler - gcc_linux-64=13.* -- libclang==16.0.6 +- libclang==20.1.4 - libcublas-dev - libcurand-dev - libcusolver-dev @@ -30,4 +30,4 @@ dependencies: - ninja - rust - sysroot_linux-64==2.28 -name: rust_cuda-128_arch-x86_64 +name: rust_cuda-129_arch-x86_64 diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index a2bc5645fc..7fc49c9e80 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -21,11 +21,15 @@ option(CUVS_ANN_BENCH_USE_FAISS_GPU_FLAT "Include faiss' brute-force knn algorit option(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_FAISS_GPU_CAGRA "Include faiss' cagra algorithm in benchmark" ON) +option(CUVS_ANN_BENCH_USE_FAISS_GPU_CAGRA_HNSW + "Include faiss' cagra algorithm for build and hnsw for search in benchmark" ON +) option(CUVS_ANN_BENCH_USE_FAISS_CPU_FLAT "Include faiss' cpu brute-force algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algorithm in benchmark" ON ) option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm in benchmark" ON) +option(CUVS_ANN_BENCH_USE_FAISS_CPU_HNSW_FLAT "Include faiss' hnsw algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT "Include cuVS ivf flat algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ "Include cuVS ivf pq algorithm in benchmark" ON) option(CUVS_ANN_BENCH_USE_CUVS_CAGRA "Include cuVS CAGRA in benchmark" ON) @@ -276,6 +280,12 @@ if(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_PQ) ) endif() +if(CUVS_ANN_BENCH_USE_FAISS_CPU_HNSW_FLAT) + ConfigureAnnBench( + NAME FAISS_CPU_HNSW_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS ${CUVS_FAISS_TARGETS} + ) +endif() + if(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT AND CUVS_FAISS_ENABLE_GPU) ConfigureAnnBench( NAME FAISS_GPU_IVF_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS} @@ -304,6 +314,13 @@ if(CUVS_ANN_BENCH_USE_FAISS_GPU_CAGRA AND CUVS_FAISS_ENABLE_GPU) ) endif() +if(CUVS_ANN_BENCH_USE_FAISS_GPU_CAGRA_HNSW AND CUVS_FAISS_ENABLE_GPU) + ConfigureAnnBench( + NAME FAISS_GPU_CAGRA_HNSW PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS} + raft::raft + ) +endif() + if(CUVS_ANN_BENCH_USE_GGNN) include(cmake/thirdparty/get_glog) ConfigureAnnBench( diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp index eed18272cd..464932f5a1 100644 --- a/cpp/bench/ann/src/common/ann_types.hpp +++ b/cpp/bench/ann/src/common/ann_types.hpp @@ -168,7 +168,7 @@ class algo : public algo_base { // and set_search_dataset() should save the passed-in pointer somewhere. // The client code should call set_search_dataset() before searching, // and should not release dataset before searching is finished. - virtual void set_search_dataset(const T* /*dataset*/, size_t /*nrow*/){}; + virtual void set_search_dataset(const T* /*dataset*/, size_t /*nrow*/) {}; /** * Make a shallow copy of the algo wrapper that shares the resources and ensures thread-safe @@ -181,6 +181,6 @@ class algo : public algo_base { #define REGISTER_ALGO_INSTANCE(DataT) \ template auto cuvs::bench::create_algo( \ const std::string&, const std::string&, int, const nlohmann::json&) \ - ->std::unique_ptr>; \ + -> std::unique_ptr>; \ template auto cuvs::bench::create_search_param(const std::string&, const nlohmann::json&) \ - ->std::unique_ptr::search_param>; + -> std::unique_ptr::search_param>; diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp index dbde74ccc6..e3d55cb83d 100644 --- a/cpp/bench/ann/src/common/util.hpp +++ b/cpp/bench/ann/src/common/util.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -388,7 +388,7 @@ inline auto host_info() uint64_t cpu_freq_max = 0; int host_processors_used = 0; int host_cores_used = 0; - std::set host_cores_selected{}; + std::set> host_cores_selected{}; // pairs of (socket_id, core_id) for (int cpu_id = 0; cpu_id < host_processors_configured; cpu_id++) { if (CPU_ISSET_S(cpu_id, affinity_mask_buf.size(), affinity_mask) == 0) { continue; } host_processors_used++; @@ -396,12 +396,14 @@ inline auto host_info() if (!std::filesystem::exists(cpu_fpath)) { continue; } int this_cpu_core = 0; + int this_cpu_package = 0; uint64_t this_cpu_freq_min = 0; uint64_t this_cpu_freq_max = 0; std::ifstream(cpu_fpath + "/topology/core_id") >> this_cpu_core; + std::ifstream(cpu_fpath + "/topology/physical_package_id") >> this_cpu_package; std::ifstream(cpu_fpath + "/cpufreq/scaling_min_freq") >> this_cpu_freq_min; std::ifstream(cpu_fpath + "/cpufreq/scaling_max_freq") >> this_cpu_freq_max; - host_cores_selected.insert(this_cpu_core); + host_cores_selected.insert(std::make_pair(this_cpu_package, this_cpu_core)); cpu_freq_min = cpu_freq_min == 0 ? (this_cpu_freq_min * 1000ull) : std::min(this_cpu_freq_min * 1000ull, cpu_freq_min); diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h index 441a1eafac..37d1190e66 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h @@ -81,9 +81,11 @@ class shared_raft_resources { using large_mr_type = rmm::mr::managed_memory_resource; shared_raft_resources() - try : orig_resource_{rmm::mr::get_current_device_resource()}, - pool_resource_(orig_resource_, 1024 * 1024 * 1024ull), - resource_(&pool_resource_, rmm_oom_callback, nullptr), large_mr_() { + try + : orig_resource_{rmm::mr::get_current_device_resource()}, + pool_resource_(orig_resource_, 1024 * 1024 * 1024ull), + resource_(&pool_resource_, rmm_oom_callback, nullptr), + large_mr_() { rmm::mr::set_current_device_resource(&resource_); } catch (const std::exception& e) { auto cuda_status = cudaGetLastError(); diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp index e03abd7255..c34d840132 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp +++ b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp @@ -68,6 +68,18 @@ void parse_build_param(const nlohmann::json& conf, param.quantizer_type = conf.at("quantizer_type"); } +template +void parse_build_param(const nlohmann::json& conf, + typename cuvs::bench::faiss_cpu_hnsw_flat::build_param& param) +{ + param.M = conf.at("M"); + if (conf.contains("efConstruction")) { + param.efConstruction = conf.at("efConstruction"); + } else { + param.efConstruction = 40; + } +} + template void parse_search_param(const nlohmann::json& conf, typename cuvs::bench::faiss_cpu::search_param& param) @@ -78,6 +90,19 @@ void parse_search_param(const nlohmann::json& conf, if (conf.contains("parallel_mode")) { param.parallel_mode = conf.at("parallel_mode"); } } +template +void parse_search_param(const nlohmann::json& conf, + typename cuvs::bench::faiss_cpu_hnsw_flat::search_param& param) +{ + faiss::SearchParametersHNSW p; + if (conf.contains("efSearch")) { p.efSearch = conf.at("efSearch"); } + if (conf.contains("check_relative_distance")) { + p.check_relative_distance = conf.at("check_relative_distance"); + } + if (conf.contains("bounded_queue")) { p.bounded_queue = conf.at("bounded_queue"); } + param.p = p; +} + template class Algo> auto make_algo(cuvs::bench::Metric metric, int dim, const nlohmann::json& conf) -> std::unique_ptr> @@ -105,6 +130,8 @@ auto create_algo(const std::string& algo_name, a = make_algo(metric, dim, conf); } else if (algo_name == "faiss_cpu_flat") { a = std::make_unique>(metric, dim); + } else if (algo_name == "faiss_cpu_hnsw_flat") { + a = make_algo(metric, dim, conf); } } @@ -127,6 +154,10 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con } else if (algo_name == "faiss_cpu_flat") { auto param = std::make_unique::search_param>(); return param; + } else if (algo_name == "faiss_cpu_hnsw_flat") { + auto param = std::make_unique::search_param>(); + parse_search_param(conf, *param); + return param; } // else throw std::runtime_error("invalid algo: '" + algo_name + "'"); diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h index bda579be31..649a24c8d5 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h @@ -20,6 +20,7 @@ #include "../common/util.hpp" #include +#include #include #include #include @@ -98,11 +99,11 @@ class faiss_cpu : public algo { // TODO(snanditale): if the number of results is less than k, the remaining elements of // 'neighbors' will be filled with (size_t)-1 - void search(const T* queries, - int batch_size, - int k, - algo_base::index_type* neighbors, - float* distances) const final; + virtual void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const; [[nodiscard]] auto get_preference() const -> algo_property override { @@ -156,6 +157,7 @@ void faiss_cpu::build(const T* dataset, size_t nrow) index_ivf->cp.max_points_per_centroid = max_ppc; index_ivf->cp.min_points_per_centroid = min_ppc; } + faiss::IndexHNSWFlat* hnsw_index = dynamic_cast(index_.get()); index_->train(nrow, dataset); // faiss::IndexFlat::train() will do nothing assert(index_->is_trained); index_->add(nrow, dataset); @@ -190,12 +192,7 @@ void faiss_cpu::search( static_assert(sizeof(size_t) == sizeof(faiss::idx_t), "sizes of size_t and faiss::idx_t are different"); - thread_pool_->submit( - [&](int i) { - // Use thread pool for batch size = 1. FAISS multi-threads internally for batch size > 1. - index_->search(batch_size, queries, k, distances, reinterpret_cast(neighbors)); - }, - 1); + index_->search(batch_size, queries, k, distances, reinterpret_cast(neighbors)); } template @@ -341,4 +338,62 @@ class faiss_cpu_flat : public faiss_cpu { } }; +template +class faiss_cpu_hnsw_flat : public faiss_cpu { + public: + struct build_param : public faiss_cpu::build_param { + int M; + int efConstruction; + }; + struct search_param : public faiss_cpu::search_param { + faiss::SearchParametersHNSW p; + }; + faiss_cpu_hnsw_flat(Metric metric, int dim, const build_param& param) + : faiss_cpu(metric, dim, param) + { + this->index_ = std::make_shared(dim, param.M, this->metric_type_); + faiss::IndexHNSWFlat* hnsw_index = static_cast(this->index_.get()); + hnsw_index->hnsw.efConstruction = param.efConstruction; + } + + void set_search_param(const typename algo::search_param& param, + const void* filter_bitset) override + { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } + auto sp = static_cast::search_param&>(param); + this->search_params_ = std::make_shared(sp.p); + }; + + void save(const std::string& file) const override + { + this->template save_(file); + } + void load(const std::string& file) override { this->template load_(file); } + + std::unique_ptr> copy() + { + return std::make_unique>(*this); // use copy constructor + } + + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override + { + static_assert(sizeof(size_t) == sizeof(faiss::idx_t), + "sizes of size_t and faiss::idx_t are different"); + + this->index_->search(batch_size, + queries, + k, + distances, + reinterpret_cast(neighbors), + search_params_.get()); + } + + private: + std::shared_ptr search_params_; +}; + } // namespace cuvs::bench diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu index 8483e52d81..f9ad76dc59 100644 --- a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu +++ b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu @@ -27,6 +27,22 @@ #include #include +namespace { +nlohmann::json collect_conf_with_prefix(const nlohmann::json& conf, + const std::string& prefix, + bool remove_prefix = true) +{ + nlohmann::json out; + for (auto& i : conf.items()) { + if (i.key().compare(0, prefix.size(), prefix) == 0) { + auto new_key = remove_prefix ? i.key().substr(prefix.size()) : i.key(); + out[new_key] = i.value(); + } + } + return out; +} +} // namespace + namespace cuvs::bench { template @@ -100,6 +116,82 @@ void parse_build_param(const nlohmann::json& conf, param.intermediate_graph_degree = 128; } if (conf.contains("cagra_build_algo")) { param.cagra_build_algo = conf.at("cagra_build_algo"); } + if (conf.contains("nn_descent_niter")) { + param.nn_descent_niter = conf.at("nn_descent_niter"); + } else { + param.nn_descent_niter = 20; + } + nlohmann::json ivf_pq_build_conf = collect_conf_with_prefix(conf, "b_"); + if (!ivf_pq_build_conf.empty()) { + faiss::gpu::IVFPQBuildCagraConfig ivf_pq_build_p; + + if (ivf_pq_build_conf.contains("nlist")) { + ivf_pq_build_p.n_lists = ivf_pq_build_conf.at("nlist"); + } + if (ivf_pq_build_conf.contains("niter")) { + ivf_pq_build_p.kmeans_n_iters = ivf_pq_build_conf.at("niter"); + } + if (ivf_pq_build_conf.contains("ratio")) { + ivf_pq_build_p.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); + } + if (ivf_pq_build_conf.contains("pq_bits")) { + ivf_pq_build_p.pq_bits = ivf_pq_build_conf.at("pq_bits"); + } + if (ivf_pq_build_conf.contains("pq_dim")) { + ivf_pq_build_p.pq_dim = ivf_pq_build_conf.at("pq_dim"); + } + param.ivf_pq_build_params = std::make_shared(ivf_pq_build_p); + } + nlohmann::json ivf_pq_search_conf = collect_conf_with_prefix(conf, "s_"); + if (!ivf_pq_search_conf.empty()) { + faiss::gpu::IVFPQSearchCagraConfig ivf_pq_search_p; + if (ivf_pq_search_conf.contains("nprobe")) { + ivf_pq_search_p.n_probes = ivf_pq_search_conf.at("nprobe"); + } + if (ivf_pq_search_conf.contains("internalDistanceDtype")) { + std::string type = ivf_pq_search_conf.at("internalDistanceDtype"); + if (type == "float") { + ivf_pq_search_p.internal_distance_dtype = CUDA_R_32F; + } else if (type == "half") { + ivf_pq_search_p.internal_distance_dtype = CUDA_R_16F; + } else { + throw std::runtime_error("internalDistanceDtype: '" + type + + "', should be either 'float' or 'half'"); + } + } else { + // set half as default type + ivf_pq_search_p.internal_distance_dtype = CUDA_R_16F; + } + + if (ivf_pq_search_conf.contains("smemLutDtype")) { + std::string type = ivf_pq_search_conf.at("smemLutDtype"); + if (type == "float") { + ivf_pq_search_p.lut_dtype = CUDA_R_32F; + } else if (type == "half") { + ivf_pq_search_p.lut_dtype = CUDA_R_16F; + } else if (type == "fp8") { + ivf_pq_search_p.lut_dtype = CUDA_R_8U; + } else { + throw std::runtime_error("smemLutDtype: '" + type + + "', should be either 'float', 'half' or 'fp8'"); + } + } else { + // set half as default + ivf_pq_search_p.lut_dtype = CUDA_R_16F; + } + param.ivf_pq_search_params = + std::make_shared(ivf_pq_search_p); + } +} + +template +void parse_build_param(const nlohmann::json& conf, + typename cuvs::bench::faiss_gpu_cagra_hnsw::build_param& param) +{ + typename cuvs::bench::faiss_gpu_cagra::build_param p; + parse_build_param(conf, p); + param.p = p; + if (conf.contains("base_level_only")) { param.base_level_only = conf.at("base_level_only"); } } template @@ -131,7 +223,13 @@ void parse_search_param(const nlohmann::json& conf, THROW("Invalid value for algo: %s", tmp.c_str()); } } - if (conf.contains("refine_ratio")) { param.refine_ratio = conf.at("refine_ratio"); } +} + +template +void parse_search_param(const nlohmann::json& conf, + typename cuvs::bench::faiss_gpu_cagra_hnsw::search_param& param) +{ + if (conf.contains("efSearch")) { param.p.efSearch = conf.at("efSearch"); } } template class Algo> @@ -163,6 +261,8 @@ auto create_algo(const std::string& algo_name, a = std::make_unique>(metric, dim); } else if (algo_name == "faiss_gpu_cagra") { a = make_algo(metric, dim, conf); + } else if (algo_name == "faiss_gpu_cagra_hnsw") { + a = make_algo(metric, dim, conf); } } @@ -187,6 +287,10 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con auto param = std::make_unique::search_param>(); parse_search_param(conf, *param); return param; + } else if (algo_name == "faiss_gpu_cagra_hnsw") { + auto param = std::make_unique::search_param>(); + parse_search_param(conf, *param); + return param; } // else throw std::runtime_error("invalid algo: '" + algo_name + "'"); diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h index ed80917e85..54f685fe79 100644 --- a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h @@ -107,9 +107,12 @@ class faiss_gpu : public algo, public algo_gpu { { static_assert(std::is_same_v, "faiss support only float type"); cudaGetDevice(&device_); + // Disable Faiss' generic temporary memory reservation. All such allocations happen through the + // pool memory resource. + gpu_resource_->noTempMemory(); } - void build(const T* dataset, size_t nrow) final; + virtual void build(const T* dataset, size_t nrow); virtual void set_search_param(const search_param_base& param, const void* filter_bitset) {} @@ -117,11 +120,11 @@ class faiss_gpu : public algo, public algo_gpu { // TODO(snanditale): if the number of results is less than k, the remaining elements of // 'neighbors' will be filled with (size_t)-1 - void search(const T* queries, - int batch_size, - int k, - algo_base::index_type* neighbors, - float* distances) const final; + virtual void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const; [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override { @@ -168,7 +171,7 @@ class faiss_gpu : public algo, public algo_gpu { std::shared_ptr index_refine_{nullptr}; faiss::MetricType metric_type_; int nlist_; - int device_; + int device_ = 0; double training_sample_fraction_; std::shared_ptr search_params_; std::shared_ptr refine_search_params_{nullptr}; @@ -289,8 +292,10 @@ void faiss_gpu::save_(const std::string& file) const { omp_single_thread_scope omp_single_thread; - auto cpu_index = std::make_unique(); - dynamic_cast(index_.get())->copyTo(cpu_index.get()); + auto cpu_index = std::make_unique(); + auto hnsw_index = dynamic_cast(cpu_index.get()); + if (hnsw_index) { hnsw_index->base_level_only = true; } + static_cast(index_.get())->copyTo(cpu_index.get()); faiss::write_index(cpu_index.get(), file.c_str()); } @@ -353,7 +358,7 @@ class faiss_gpu_ivf_flat : public faiss_gpu { std::unique_ptr> copy() override { return std::make_unique>(*this); - }; + } }; template @@ -528,6 +533,10 @@ class faiss_gpu_cagra : public faiss_gpu { std::string cagra_build_algo; /// Number of Iterations to run if building with NN_DESCENT size_t nn_descent_niter; + + std::shared_ptr ivf_pq_build_params = nullptr; + + std::shared_ptr ivf_pq_search_params = nullptr; }; using typename faiss_gpu::search_param_base; struct search_param : public faiss_gpu::search_param { @@ -541,8 +550,14 @@ class faiss_gpu_cagra : public faiss_gpu { config.graph_degree = param.graph_degree; config.intermediate_graph_degree = param.intermediate_graph_degree; config.device = this->device_; + config.store_dataset = false; if (param.cagra_build_algo == "IVF_PQ") { - config.build_algo = faiss::gpu::graph_build_algo::IVF_PQ; + config.build_algo = faiss::gpu::graph_build_algo::IVF_PQ; + this->ivf_pq_build_params_ = param.ivf_pq_build_params; + config.ivf_pq_params = this->ivf_pq_build_params_; + this->ivf_pq_search_params_ = param.ivf_pq_search_params; + config.ivf_pq_search_params = this->ivf_pq_search_params_; + config.refine_rate = 1.0; } else { config.build_algo = faiss::gpu::graph_build_algo::NN_DESCENT; } @@ -561,13 +576,92 @@ class faiss_gpu_cagra : public faiss_gpu { void save(const std::string& file) const override { - this->template save_(file); + omp_single_thread_scope omp_single_thread; + + auto cpu_hnsw_index = std::make_unique(); + // Only add the base HNSW layer to serialize the CAGRA index. + cpu_hnsw_index->base_level_only = true; + static_cast(this->index_.get())->copyTo(cpu_hnsw_index.get()); + faiss::write_index(cpu_hnsw_index.get(), file.c_str()); } void load(const std::string& file) override { this->template load_(file); } std::unique_ptr> copy() override { return std::make_unique>(*this); }; + + std::shared_ptr faiss_index() { return this->index_; } + + private: + std::shared_ptr ivf_pq_build_params_; + std::shared_ptr ivf_pq_search_params_; }; +template +class faiss_gpu_cagra_hnsw : public faiss_gpu { + public: + struct build_param : public faiss_gpu::build_param { + typename faiss_gpu_cagra::build_param p; + bool base_level_only = true; + }; + using typename faiss_gpu::search_param_base; + struct search_param : public faiss_gpu::search_param { + faiss::SearchParametersHNSW p; + }; + + faiss_gpu_cagra_hnsw(Metric metric, int dim, const build_param& param) + : faiss_gpu(metric, dim, param) + { + this->build_index_ = std::make_shared>(metric, dim, param.p); + this->search_index_ = std::make_shared( + dim, int(param.p.graph_degree / 2), this->metric_type_); + this->search_index_->base_level_only = param.base_level_only; + } + + void build(const T* dataset, size_t nrow) override + { + this->build_index_->build(dataset, nrow); + static_cast((build_index_->faiss_index()).get()) + ->copyTo(search_index_.get()); + } + + void set_search_param(const search_param_base& param, const void* filter_bitset) override + { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } + auto sp = static_cast::search_param&>(param); + this->search_params_ = std::make_shared(sp.p); + } + + void search(const T* queries, + int batch_size, + int k, + algo_base::index_type* neighbors, + float* distances) const override + { + search_index_->search(batch_size, + queries, + k, + distances, + reinterpret_cast(neighbors), + this->search_params_.get()); + } + + void save(const std::string& file) const override + { + faiss::write_index(search_index_.get(), file.c_str()); + } + void load(const std::string& file) override + { + omp_single_thread_scope omp_single_thread; + this->search_index_.reset(static_cast(faiss::read_index(file.c_str()))); + } + std::unique_ptr> copy() override + { + return std::make_unique>(*this); + }; + + private: + std::shared_ptr> build_index_; + std::shared_ptr search_index_; +}; } // namespace cuvs::bench diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index e99a530497..0b6ebbaad2 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -49,6 +49,12 @@ list(APPEND CUVS_CXX_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") list(APPEND CUVS_CUDA_FLAGS "-DCUDA_API_PER_THREAD_DEFAULT_STREAM") # make sure we produce smallest binary size list(APPEND CUVS_CUDA_FLAGS -Xfatbin=-compress-all) +if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" + AND (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9 AND CMAKE_CUDA_COMPILER_VERSION + VERSION_LESS 13.0) +) + list(APPEND CUVS_CUDA_FLAGS -Xfatbin=--compress-level=3) +endif() # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking diff --git a/cpp/include/cuvs/distance/grammian.hpp b/cpp/include/cuvs/distance/grammian.hpp index 0c904d493c..4d0a90e6cd 100644 --- a/cpp/include/cuvs/distance/grammian.hpp +++ b/cpp/include/cuvs/distance/grammian.hpp @@ -51,11 +51,11 @@ class GramMatrixBase { bool legacy_interface; public: - GramMatrixBase() : legacy_interface(false){}; + GramMatrixBase() : legacy_interface(false) {}; [[deprecated]] GramMatrixBase(cublasHandle_t cublas_handle) - : cublas_handle(cublas_handle), legacy_interface(true){}; + : cublas_handle(cublas_handle), legacy_interface(true) {}; - virtual ~GramMatrixBase(){}; + virtual ~GramMatrixBase() {}; /** Convenience function to evaluate the Gram matrix for two vector sets. * Vector sets are provided in Matrix format @@ -331,10 +331,10 @@ class PolynomialKernel : public GramMatrixBase { * @param offset */ PolynomialKernel(exp_t exponent, math_t gain, math_t offset) - : GramMatrixBase(), exponent(exponent), gain(gain), offset(offset){}; + : GramMatrixBase(), exponent(exponent), gain(gain), offset(offset) {}; [[deprecated]] PolynomialKernel(exp_t exponent, math_t gain, math_t offset, cublasHandle_t handle) - : GramMatrixBase(handle), exponent(exponent), gain(gain), offset(offset){}; + : GramMatrixBase(handle), exponent(exponent), gain(gain), offset(offset) {}; /** Evaluate kernel matrix using polynomial kernel. * @@ -447,7 +447,7 @@ class TanhKernel : public GramMatrixBase { TanhKernel(math_t gain, math_t offset) : GramMatrixBase(), gain(gain), offset(offset) {} [[deprecated]] TanhKernel(math_t gain, math_t offset, cublasHandle_t handle) - : GramMatrixBase(handle), gain(gain), offset(offset){}; + : GramMatrixBase(handle), gain(gain), offset(offset) {}; /** Evaluate kernel matrix using tanh kernel. * @@ -562,10 +562,10 @@ class RBFKernel : public GramMatrixBase { * @tparam math_t floating point type * @param gain */ - RBFKernel(math_t gain) : GramMatrixBase(), gain(gain){}; + RBFKernel(math_t gain) : GramMatrixBase(), gain(gain) {}; [[deprecated]] RBFKernel(math_t gain, cublasHandle_t handle) - : GramMatrixBase(handle), gain(gain){}; + : GramMatrixBase(handle), gain(gain) {}; void matrixRowNormL2(raft::resources const& handle, dense_input_matrix_view_t matrix, diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h index f43435e971..5959124870 100644 --- a/cpp/include/cuvs/neighbors/cagra.h +++ b/cpp/include/cuvs/neighbors/cagra.h @@ -358,6 +358,33 @@ cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index); */ cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int* dim); +/** + * @} + */ + +/** + * @defgroup cagra_c_merge_params C API for CUDA ANN Graph-based nearest neighbor search + * @{ + */ + +/** + * @brief Supplemental parameters to merge CAGRA index + * + */ + +struct cuvsCagraMergeParams { + cuvsCagraIndexParams_t output_index_params; + cuvsMergeStrategy strategy; +}; + +typedef struct cuvsCagraMergeParams* cuvsCagraMergeParams_t; + +/** Allocate CAGRA merge params with default values */ +cuvsError_t cuvsCagraMergeParamsCreate(cuvsCagraMergeParams_t* params); + +/** De-allocate CAGRA merge params */ +cuvsError_t cuvsCagraMergeParamsDestroy(cuvsCagraMergeParams_t params); + /** * @} */ @@ -585,6 +612,64 @@ cuvsError_t cuvsCagraSerializeToHnswlib(cuvsResources_t res, * @param[out] index CAGRA index loaded disk */ cuvsError_t cuvsCagraDeserialize(cuvsResources_t res, const char* filename, cuvsCagraIndex_t index); + +/** + * @brief Merge multiple CAGRA indices into a single CAGRA index. + * + * All input indices must have been built with the same data type (`index.dtype`) and + * have the same dimensionality (`index.dims`). The merged index uses the output + * parameters specified in `cuvsCagraMergeParams`. + * + * Input indices must have: + * - `index.dtype.code` and `index.dtype.bits` matching across all indices. + * - Supported data types for indices: + * a. `kDLFloat` with `bits = 32` + * b. `kDLFloat` with `bits = 16` + * c. `kDLInt` with `bits = 8` + * d. `kDLUInt` with `bits = 8` + * + * The resulting output index will have the same data type as the input indices. + * + * Example: + * @code{.c} + * #include + * #include + * + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * cuvsCagraIndex_t index1, index2, merged_index; + * cuvsCagraIndexCreate(&index1); + * cuvsCagraIndexCreate(&index2); + * cuvsCagraIndexCreate(&merged_index); + * + * // Assume index1 and index2 have been built using cuvsCagraBuild + * + * cuvsCagraMergeParams_t merge_params; + * cuvsError_t params_create_status = cuvsCagraMergeParamsCreate(&merge_params); + * + * cuvsError_t merge_status = cuvsCagraMerge(res, merge_params, (cuvsCagraIndex_t[]){index1, + * index2}, 2, merged_index); + * + * // Use merged_index for search operations + * + * cuvsError_t params_destroy_status = cuvsCagraMergeParamsDestroy(merge_params); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] params cuvsCagraMergeParams_t parameters controlling merge behavior + * @param[in] indices Array of input cuvsCagraIndex_t handles to merge + * @param[in] num_indices Number of input indices + * @param[out] output_index Output handle that will store the merged index. + * Must be initialized using `cuvsCagraIndexCreate` before use. + */ +cuvsError_t cuvsCagraMerge(cuvsResources_t res, + cuvsCagraMergeParams_t params, + cuvsCagraIndex_t* indices, + size_t num_indices, + cuvsCagraIndex_t output_index); + /** * @} */ diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index e70cf6c2ec..036dbba9c8 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -18,6 +18,7 @@ #include "common.hpp" #include +#include #include #include #include @@ -120,6 +121,7 @@ struct index_params : cuvs::neighbors::index_params { * Whether to use MST optimization to guarantee graph connectivity. */ bool guarantee_connectivity = false; + /** * Whether to add the dataset content to the index, i.e.: * @@ -276,20 +278,9 @@ struct extend_params { /** * @brief Determines the strategy for merging CAGRA graphs. * - * @note Currently, only the PHYSICAL strategy is supported. + * @note Currently, only the MERGE_STRATEGY_PHYSICAL strategy is supported. */ -enum MergeStrategy { - /** - * @brief Physical merge: Builds a new CAGRA graph from the union of dataset points - * in existing CAGRA graphs. - * - * This is expensive to build but does not impact search latency or quality. - * Preferred for many smaller CAGRA graphs. - * - * @note Currently, this is the only supported strategy. - */ - PHYSICAL -}; +using MergeStrategy = cuvsMergeStrategy; /** * @brief Parameters for merging CAGRA indexes. @@ -306,8 +297,8 @@ struct merge_params { /// Parameters for creating the output index. cagra::index_params output_index_params; - /// Strategy for merging. Defaults to `MergeStrategy::PHYSICAL`. - MergeStrategy strategy = MergeStrategy::PHYSICAL; + /// Strategy for merging. Defaults to `MergeStrategy::MERGE_STRATEGY_PHYSICAL`. + MergeStrategy strategy = MergeStrategy::MERGE_STRATEGY_PHYSICAL; }; /** diff --git a/cpp/include/cuvs/neighbors/common.h b/cpp/include/cuvs/neighbors/common.h index d7ca878b99..6bcc4db6b0 100644 --- a/cpp/include/cuvs/neighbors/common.h +++ b/cpp/include/cuvs/neighbors/common.h @@ -16,9 +16,7 @@ #pragma once -#include #include -#include #include #ifdef __cplusplus @@ -56,6 +54,23 @@ typedef struct { * @} */ +/** + * @defgroup index_merge Index Merge + * @brief Common definitions related to index merging. + * @{ + */ + +/** + * @brief Strategy for merging indices. + */ +typedef enum { + MERGE_STRATEGY_PHYSICAL = 0, ///< Merge indices physically + MERGE_STRATEGY_LOGICAL = 1 ///< Merge indices logically +} cuvsMergeStrategy; + +/** + * @} + */ #ifdef __cplusplus } #endif diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index 4c31987989..cb56e9f8c2 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -691,8 +691,7 @@ template struct enable_if_valid_list {}; template - typename SpecT, + template typename SpecT, typename SizeT, typename... SpecExtraArgs> struct enable_if_valid_list, T> { diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index cad08ca551..ebfaa74515 100644 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -28,7 +28,7 @@ import subprocess -EXPECTED_VERSIONS = ("16.0.6",) +EXPECTED_VERSIONS = ("20.1.4",) VERSION_REGEX = re.compile(r"clang version ([0-9.]+)") CMAKE_COMPILER_REGEX = re.compile( r"^\s*CMAKE_CXX_COMPILER:FILEPATH=(.+)\s*$", re.MULTILINE) diff --git a/cpp/src/distance/detail/fused_distance_nn/persistent_gemm.h b/cpp/src/distance/detail/fused_distance_nn/persistent_gemm.h index c506da4ff2..61d65d85dc 100644 --- a/cpp/src/distance/detail/fused_distance_nn/persistent_gemm.h +++ b/cpp/src/distance/detail/fused_distance_nn/persistent_gemm.h @@ -147,8 +147,8 @@ struct FusedDistanceNNPersistent { struct temp_problem_visitor { int problem_count; - CUTLASS_HOST_DEVICE temp_problem_visitor() : problem_count(0){}; - CUTLASS_HOST_DEVICE temp_problem_visitor(int problem_count_) : problem_count(problem_count_){}; + CUTLASS_HOST_DEVICE temp_problem_visitor() : problem_count(0) {}; + CUTLASS_HOST_DEVICE temp_problem_visitor(int problem_count_) : problem_count(problem_count_) {}; }; /// Argument structure diff --git a/cpp/src/distance/detail/masked_distance_base.cuh b/cpp/src/distance/detail/masked_distance_base.cuh index ec7270baa2..34a1382e2d 100644 --- a/cpp/src/distance/detail/masked_distance_base.cuh +++ b/cpp/src/distance/detail/masked_distance_base.cuh @@ -215,7 +215,7 @@ struct MaskedDistances : public BaseClass { } } } // tile_idx_n - } // idx_g + } // idx_g rowEpilog_op(tile_idx_m); } // tile_idx_m } diff --git a/cpp/src/distance/detail/masked_nn.cuh b/cpp/src/distance/detail/masked_nn.cuh index e10d2b7c69..1ef10a504a 100644 --- a/cpp/src/distance/detail/masked_nn.cuh +++ b/cpp/src/distance/detail/masked_nn.cuh @@ -291,13 +291,13 @@ void masked_l2_nn_impl(raft::resources const& handle, auto fin_op = raft::identity_op{}; auto kernel = masked_l2_nn_kernel; + OutT, + IdxT, + P, + ReduceOpT, + KVPReduceOpT, + decltype(core_lambda), + decltype(fin_op)>; constexpr size_t smemSize = P::SmemSize + ((P::Mblk + P::Nblk) * sizeof(DataT)); dim3 block(P::Nthreads); dim3 grid = launchConfigGenerator

(m, n, smemSize, kernel); diff --git a/cpp/src/neighbors/brute_force.cu b/cpp/src/neighbors/brute_force.cu index d54a758791..332940b0b1 100644 --- a/cpp/src/neighbors/brute_force.cu +++ b/cpp/src/neighbors/brute_force.cu @@ -164,37 +164,35 @@ void index::update_dataset( auto build(raft::resources const& res, \ const cuvs::neighbors::brute_force::index_params& index_params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::brute_force::index \ + -> cuvs::neighbors::brute_force::index \ { \ return detail::build(res, dataset, index_params.metric, index_params.metric_arg); \ } \ auto build(raft::resources const& res, \ const cuvs::neighbors::brute_force::index_params& index_params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::brute_force::index \ + -> cuvs::neighbors::brute_force::index \ { \ return detail::build(res, dataset, index_params.metric, index_params.metric_arg); \ } \ auto build(raft::resources const& res, \ raft::device_matrix_view dataset, \ cuvs::distance::DistanceType metric, \ - DistT metric_arg) \ - ->cuvs::neighbors::brute_force::index \ + DistT metric_arg) -> cuvs::neighbors::brute_force::index \ { \ return detail::build(res, dataset, metric, metric_arg); \ } \ auto build(raft::resources const& res, \ const cuvs::neighbors::brute_force::index_params& index_params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::brute_force::index \ + -> cuvs::neighbors::brute_force::index \ { \ return detail::build(res, dataset, index_params.metric, index_params.metric_arg); \ } \ auto build(raft::resources const& res, \ raft::device_matrix_view dataset, \ cuvs::distance::DistanceType metric, \ - DistT metric_arg) \ - ->cuvs::neighbors::brute_force::index \ + DistT metric_arg) -> cuvs::neighbors::brute_force::index \ { \ return detail::build(res, dataset, metric, metric_arg); \ } \ diff --git a/cpp/src/neighbors/cagra_build_float.cu b/cpp/src/neighbors/cagra_build_float.cu index b990d1b329..3f51b0d2aa 100644 --- a/cpp/src/neighbors/cagra_build_float.cu +++ b/cpp/src/neighbors/cagra_build_float.cu @@ -23,7 +23,7 @@ namespace cuvs::neighbors::cagra { auto build(raft::resources const& handle, \ const cuvs::neighbors::cagra::index_params& params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ + -> cuvs::neighbors::cagra::index \ { \ return cuvs::neighbors::cagra::build(handle, params, dataset); \ } \ @@ -31,7 +31,7 @@ namespace cuvs::neighbors::cagra { auto build(raft::resources const& handle, \ const cuvs::neighbors::cagra::index_params& params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ + -> cuvs::neighbors::cagra::index \ { \ return cuvs::neighbors::cagra::build(handle, params, dataset); \ } diff --git a/cpp/src/neighbors/cagra_build_int8.cu b/cpp/src/neighbors/cagra_build_int8.cu index 624ebd060a..046ee31312 100644 --- a/cpp/src/neighbors/cagra_build_int8.cu +++ b/cpp/src/neighbors/cagra_build_int8.cu @@ -23,7 +23,7 @@ namespace cuvs::neighbors::cagra { auto build(raft::resources const& handle, \ const cuvs::neighbors::cagra::index_params& params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ + -> cuvs::neighbors::cagra::index \ { \ return cuvs::neighbors::cagra::build(handle, params, dataset); \ } \ @@ -31,7 +31,7 @@ namespace cuvs::neighbors::cagra { auto build(raft::resources const& handle, \ const cuvs::neighbors::cagra::index_params& params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ + -> cuvs::neighbors::cagra::index \ { \ return cuvs::neighbors::cagra::build(handle, params, dataset); \ } diff --git a/cpp/src/neighbors/cagra_build_uint8.cu b/cpp/src/neighbors/cagra_build_uint8.cu index 8fd806c64e..7a8c710e6e 100644 --- a/cpp/src/neighbors/cagra_build_uint8.cu +++ b/cpp/src/neighbors/cagra_build_uint8.cu @@ -23,7 +23,7 @@ namespace cuvs::neighbors::cagra { auto build(raft::resources const& handle, \ const cuvs::neighbors::cagra::index_params& params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ + -> cuvs::neighbors::cagra::index \ { \ return cuvs::neighbors::cagra::build(handle, params, dataset); \ } \ @@ -31,7 +31,7 @@ namespace cuvs::neighbors::cagra { auto build(raft::resources const& handle, \ const cuvs::neighbors::cagra::index_params& params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::cagra::index \ + -> cuvs::neighbors::cagra::index \ { \ return cuvs::neighbors::cagra::build(handle, params, dataset); \ } diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index a9fdd01d1f..e921216bf9 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -33,70 +33,84 @@ namespace { -template -void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* dataset_tensor) -{ - auto dataset = dataset_tensor->dl_tensor; +static void _set_graph_build_params( + std::variant& out_params, + cuvsCagraIndexParams& params, + cuvsCagraGraphBuildAlgo algo, + int64_t n_rows, + int64_t dim) - auto res_ptr = reinterpret_cast(res); - auto index = new cuvs::neighbors::cagra::index(*res_ptr); - - auto index_params = cuvs::neighbors::cagra::index_params(); - index_params.metric = static_cast((int)params.metric); - index_params.intermediate_graph_degree = params.intermediate_graph_degree; - index_params.graph_degree = params.graph_degree; - - switch (params.build_algo) { +{ + auto metric = static_cast((int)params.metric); + switch (algo) { case cuvsCagraGraphBuildAlgo::AUTO_SELECT: break; case cuvsCagraGraphBuildAlgo::IVF_PQ: { - auto dataset_extent = raft::matrix_extent(dataset.shape[0], dataset.shape[1]); - auto pq_params = cuvs::neighbors::cagra::graph_build_params::ivf_pq_params( - dataset_extent, index_params.metric); - auto ivf_pq_build_params = params.graph_build_params->ivf_pq_build_params; - auto ivf_pq_search_params = params.graph_build_params->ivf_pq_search_params; - if (ivf_pq_build_params) { - pq_params.build_params.add_data_on_build = ivf_pq_build_params->add_data_on_build; - pq_params.build_params.n_lists = ivf_pq_build_params->n_lists; - pq_params.build_params.kmeans_n_iters = ivf_pq_build_params->kmeans_n_iters; - pq_params.build_params.kmeans_trainset_fraction = - ivf_pq_build_params->kmeans_trainset_fraction; - pq_params.build_params.pq_bits = ivf_pq_build_params->pq_bits; - pq_params.build_params.pq_dim = ivf_pq_build_params->pq_dim; - pq_params.build_params.codebook_kind = - static_cast(ivf_pq_build_params->codebook_kind); - pq_params.build_params.force_random_rotation = ivf_pq_build_params->force_random_rotation; - pq_params.build_params.conservative_memory_allocation = - ivf_pq_build_params->conservative_memory_allocation; - pq_params.build_params.max_train_points_per_pq_code = - ivf_pq_build_params->max_train_points_per_pq_code; + auto pq_params = cuvs::neighbors::cagra::graph_build_params::ivf_pq_params( + raft::matrix_extent(n_rows, dim), metric); + if (params.graph_build_params) { + auto ivf_params = static_cast(params.graph_build_params); + if (ivf_params->ivf_pq_build_params) { + auto bp = ivf_params->ivf_pq_build_params; + pq_params.build_params.add_data_on_build = bp->add_data_on_build; + pq_params.build_params.n_lists = bp->n_lists; + pq_params.build_params.kmeans_n_iters = bp->kmeans_n_iters; + pq_params.build_params.kmeans_trainset_fraction = bp->kmeans_trainset_fraction; + pq_params.build_params.pq_bits = bp->pq_bits; + pq_params.build_params.pq_dim = bp->pq_dim; + pq_params.build_params.codebook_kind = + static_cast(bp->codebook_kind); + pq_params.build_params.force_random_rotation = bp->force_random_rotation; + pq_params.build_params.conservative_memory_allocation = + bp->conservative_memory_allocation; + pq_params.build_params.max_train_points_per_pq_code = bp->max_train_points_per_pq_code; + } + if (ivf_params->ivf_pq_search_params) { + auto sp = ivf_params->ivf_pq_search_params; + pq_params.search_params.n_probes = sp->n_probes; + pq_params.search_params.lut_dtype = sp->lut_dtype; + pq_params.search_params.internal_distance_dtype = sp->internal_distance_dtype; + pq_params.search_params.preferred_shmem_carveout = sp->preferred_shmem_carveout; + } + if (ivf_params->refinement_rate > 1.0f) { + pq_params.refinement_rate = ivf_params->refinement_rate; + } } - if (ivf_pq_search_params) { - pq_params.search_params.n_probes = ivf_pq_search_params->n_probes; - pq_params.search_params.lut_dtype = ivf_pq_search_params->lut_dtype; - pq_params.search_params.internal_distance_dtype = - ivf_pq_search_params->internal_distance_dtype; - pq_params.search_params.preferred_shmem_carveout = - ivf_pq_search_params->preferred_shmem_carveout; - } - if (params.graph_build_params->refinement_rate > 1) { - pq_params.refinement_rate = params.graph_build_params->refinement_rate; - } - index_params.graph_build_params = pq_params; + out_params = pq_params; break; } case cuvsCagraGraphBuildAlgo::NN_DESCENT: { - auto nn_descent_params = cuvs::neighbors::cagra::graph_build_params::nn_descent_params{ - index_params.intermediate_graph_degree, index_params.metric}; - nn_descent_params.max_iterations = params.nn_descent_niter; - index_params.graph_build_params = nn_descent_params; + auto nn_params = + cuvs::neighbors::nn_descent::index_params(params.intermediate_graph_degree, metric); + nn_params.max_iterations = params.nn_descent_niter; + out_params = nn_params; break; } case cuvsCagraGraphBuildAlgo::ITERATIVE_CAGRA_SEARCH: { cuvs::neighbors::cagra::graph_build_params::iterative_search_params p; - index_params.graph_build_params = p; + out_params = p; break; } - }; + } +} + +template +void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor* dataset_tensor) +{ + auto dataset = dataset_tensor->dl_tensor; + + auto res_ptr = reinterpret_cast(res); + auto index = new cuvs::neighbors::cagra::index(*res_ptr); + + auto index_params = cuvs::neighbors::cagra::index_params(); + index_params.metric = static_cast((int)params.metric), + index_params.intermediate_graph_degree = params.intermediate_graph_degree; + index_params.graph_degree = params.graph_degree; + + _set_graph_build_params( + index_params.graph_build_params, params, params.build_algo, dataset.shape[0], dataset.shape[1]); if (auto* cparams = params.compression; cparams != nullptr) { auto compression_params = cuvs::neighbors::vpq_params(); @@ -266,6 +280,54 @@ void* _deserialize(cuvsResources_t res, const char* filename) return index; } +template +void* _merge(cuvsResources_t res, + cuvsCagraMergeParams params, + cuvsCagraIndex_t* indices, + size_t num_indices) +{ + auto res_ptr = reinterpret_cast(res); + cuvs::neighbors::cagra::merge_params merge_params_cpp; + auto& out_idx_params = *params.output_index_params; + + merge_params_cpp.output_index_params.metric = + static_cast((int)out_idx_params.metric); + merge_params_cpp.output_index_params.intermediate_graph_degree = + out_idx_params.intermediate_graph_degree; + merge_params_cpp.output_index_params.graph_degree = out_idx_params.graph_degree; + + int64_t total_size = 0; + int64_t dim = 0; + if (out_idx_params.build_algo == cuvsCagraGraphBuildAlgo::IVF_PQ) { + auto first_idx_ptr = + reinterpret_cast*>(indices[0]->addr); + dim = first_idx_ptr->dim(); + for (size_t i = 0; i < num_indices; ++i) { + auto idx_ptr = + reinterpret_cast*>(indices[i]->addr); + total_size += idx_ptr->size(); + } + } + + _set_graph_build_params(merge_params_cpp.output_index_params.graph_build_params, + out_idx_params, + out_idx_params.build_algo, + total_size, + dim); + + std::vector*> index_ptrs; + index_ptrs.reserve(num_indices); + for (size_t i = 0; i < num_indices; ++i) { + auto idx_ptr = reinterpret_cast*>(indices[i]->addr); + index_ptrs.push_back(idx_ptr); + } + + auto merged_index = new cuvs::neighbors::cagra::index( + cuvs::neighbors::cagra::merge(*res_ptr, merge_params_cpp, index_ptrs)); + + return merged_index; +} + } // namespace extern "C" cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index) @@ -401,6 +463,43 @@ extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res, }); } +extern "C" cuvsError_t cuvsCagraMerge(cuvsResources_t res, + cuvsCagraMergeParams_t params, + cuvsCagraIndex_t* indices, + size_t num_indices, + cuvsCagraIndex_t output_index) +{ + return cuvs::core::translate_exceptions([=] { + // Basic checks on inputs + RAFT_EXPECTS(indices != nullptr && num_indices > 0, "indices array cannot be null or empty"); + // Use first index dtype as reference + auto dtype = (*indices[0]).dtype; + for (size_t i = 1; i < num_indices; ++i) { + RAFT_EXPECTS((*indices[i]).dtype.code == dtype.code && (*indices[i]).dtype.bits == dtype.bits, + "All input indices must have the same data type"); + RAFT_EXPECTS((*indices[i]).addr != 0, "All input indices must be built (non-empty)"); + } + RAFT_EXPECTS(output_index != nullptr, "Output index pointer must not be null"); + output_index->dtype = dtype; // output index type matches inputs + // Dispatch based on data type + if (dtype.code == kDLFloat && dtype.bits == 32) { + output_index->addr = + reinterpret_cast(_merge(res, *params, indices, num_indices)); + } else if (dtype.code == kDLFloat && dtype.bits == 16) { + output_index->addr = + reinterpret_cast(_merge(res, *params, indices, num_indices)); + } else if (dtype.code == kDLInt && dtype.bits == 8) { + output_index->addr = + reinterpret_cast(_merge(res, *params, indices, num_indices)); + } else if (dtype.code == kDLUInt && dtype.bits == 8) { + output_index->addr = + reinterpret_cast(_merge(res, *params, indices, num_indices)); + } else { + RAFT_FAIL("Unsupported index data type: code=%d, bits=%d", dtype.code, dtype.bits); + } + }); +} + extern "C" cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params) { return cuvs::core::translate_exceptions([=] { @@ -472,6 +571,24 @@ extern "C" cuvsError_t cuvsCagraSearchParamsDestroy(cuvsCagraSearchParams_t para return cuvs::core::translate_exceptions([=] { delete params; }); } +extern "C" cuvsError_t cuvsCagraMergeParamsCreate(cuvsCagraMergeParams_t* params) +{ + return cuvs::core::translate_exceptions([=] { + cuvsCagraIndexParams_t idx_params; + cuvsCagraIndexParamsCreate(&idx_params); + *params = new cuvsCagraMergeParams{.output_index_params = idx_params, + .strategy = MERGE_STRATEGY_PHYSICAL}; + }); +} + +extern "C" cuvsError_t cuvsCagraMergeParamsDestroy(cuvsCagraMergeParams_t params) +{ + return cuvs::core::translate_exceptions([=] { + cuvsCagraIndexParamsDestroy(params->output_index_params); + delete params; + }); +} + extern "C" cuvsError_t cuvsCagraDeserialize(cuvsResources_t res, const char* filename, cuvsCagraIndex_t index) diff --git a/cpp/src/neighbors/cagra_merge_float.cu b/cpp/src/neighbors/cagra_merge_float.cu index 951c0c5fe9..01526d7b3c 100644 --- a/cpp/src/neighbors/cagra_merge_float.cu +++ b/cpp/src/neighbors/cagra_merge_float.cu @@ -23,7 +23,7 @@ namespace cuvs::neighbors::cagra { auto merge(raft::resources const& handle, \ const cuvs::neighbors::cagra::merge_params& params, \ std::vector*>& indices) \ - ->cuvs::neighbors::cagra::index \ + -> cuvs::neighbors::cagra::index \ { \ return cuvs::neighbors::cagra::merge(handle, params, indices); \ } diff --git a/cpp/src/neighbors/cagra_merge_half.cu b/cpp/src/neighbors/cagra_merge_half.cu index 704a00f747..90bdafb255 100644 --- a/cpp/src/neighbors/cagra_merge_half.cu +++ b/cpp/src/neighbors/cagra_merge_half.cu @@ -23,7 +23,7 @@ namespace cuvs::neighbors::cagra { auto merge(raft::resources const& handle, \ const cuvs::neighbors::cagra::merge_params& params, \ std::vector*>& indices) \ - ->cuvs::neighbors::cagra::index \ + -> cuvs::neighbors::cagra::index \ { \ return cuvs::neighbors::cagra::merge(handle, params, indices); \ } diff --git a/cpp/src/neighbors/cagra_merge_int8.cu b/cpp/src/neighbors/cagra_merge_int8.cu index a7e9035626..854261336e 100644 --- a/cpp/src/neighbors/cagra_merge_int8.cu +++ b/cpp/src/neighbors/cagra_merge_int8.cu @@ -23,7 +23,7 @@ namespace cuvs::neighbors::cagra { auto merge(raft::resources const& handle, \ const cuvs::neighbors::cagra::merge_params& params, \ std::vector*>& indices) \ - ->cuvs::neighbors::cagra::index \ + -> cuvs::neighbors::cagra::index \ { \ return cuvs::neighbors::cagra::merge(handle, params, indices); \ } diff --git a/cpp/src/neighbors/cagra_merge_uint8.cu b/cpp/src/neighbors/cagra_merge_uint8.cu index a4fc7149c1..d536029cc1 100644 --- a/cpp/src/neighbors/cagra_merge_uint8.cu +++ b/cpp/src/neighbors/cagra_merge_uint8.cu @@ -23,7 +23,7 @@ namespace cuvs::neighbors::cagra { auto merge(raft::resources const& handle, \ const cuvs::neighbors::cagra::merge_params& params, \ std::vector*>& indices) \ - ->cuvs::neighbors::cagra::index \ + -> cuvs::neighbors::cagra::index \ { \ return cuvs::neighbors::cagra::merge(handle, params, indices); \ } diff --git a/cpp/src/neighbors/detail/cagra/search_plan.cuh b/cpp/src/neighbors/detail/cagra/search_plan.cuh index ab2396cc21..c15006257b 100644 --- a/cpp/src/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/src/neighbors/detail/cagra/search_plan.cuh @@ -192,7 +192,7 @@ struct search_plan_impl : public search_plan_impl_base { const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] std::uint32_t* const num_executed_iterations, // [num_queries] uint32_t topk, - SAMPLE_FILTER_T sample_filter){}; + SAMPLE_FILTER_T sample_filter) {}; void adjust_search_params() { diff --git a/cpp/src/neighbors/detail/knn_brute_force.cuh b/cpp/src/neighbors/detail/knn_brute_force.cuh index 989c4adce4..72b3532dfc 100644 --- a/cpp/src/neighbors/detail/knn_brute_force.cuh +++ b/cpp/src/neighbors/detail/knn_brute_force.cuh @@ -265,7 +265,7 @@ void tiled_brute_force_knn(const raft::resources& handle, IndexType col = j + (idx % current_centroid_size); IndexType g_idx = row * n_cols + col; IndexType item_idx = (g_idx) >> 5; - uint32_t bit_idx = (g_idx)&31; + uint32_t bit_idx = (g_idx) & 31; uint32_t filter = filter_bits[item_idx]; if ((filter & (uint32_t(1) << bit_idx)) == 0) { distances_ptr[idx] = masked_distance; @@ -800,7 +800,7 @@ cuvs::neighbors::brute_force::index build( auto dataset_storage = std::optional>{}; auto dataset_view = [&res, &dataset_storage, dataset]() { if constexpr (std::is_same_v>) { + raft::device_matrix_view>) { return dataset; } else { dataset_storage = diff --git a/cpp/src/neighbors/detail/nn_descent.cuh b/cpp/src/neighbors/detail/nn_descent.cuh index 9beea05646..03002569d8 100644 --- a/cpp/src/neighbors/detail/nn_descent.cuh +++ b/cpp/src/neighbors/detail/nn_descent.cuh @@ -67,9 +67,9 @@ class ResultItem { public: __host__ __device__ ResultItem() - : id_(std::numeric_limits::max()), dist_(std::numeric_limits::max()){}; + : id_(std::numeric_limits::max()), dist_(std::numeric_limits::max()) {}; __host__ __device__ ResultItem(const Index_t id_with_flag, const DistData_t dist) - : id_(id_with_flag), dist_(dist){}; + : id_(id_with_flag), dist_(dist) {}; __host__ __device__ bool is_new() const { return id_ >= 0; } __host__ __device__ Index_t& id_with_flag() { return id_; } __host__ __device__ Index_t id() const diff --git a/cpp/src/neighbors/detail/vamana/priority_queue.cuh b/cpp/src/neighbors/detail/vamana/priority_queue.cuh index 6dc1dc94a1..883dc8d440 100644 --- a/cpp/src/neighbors/detail/vamana/priority_queue.cuh +++ b/cpp/src/neighbors/detail/vamana/priority_queue.cuh @@ -202,8 +202,7 @@ class PriorityQueue { * Used for other operations like checking for duplicates, etc. ****************************************************************************************/ template -class __align__(16) Node -{ +class __align__(16) Node { public: SUMTYPE distance; int nodeid; diff --git a/cpp/src/neighbors/detail/vamana/vamana_structs.cuh b/cpp/src/neighbors/detail/vamana/vamana_structs.cuh index 22678c1962..0cf29a5b42 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_structs.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_structs.cuh @@ -47,8 +47,7 @@ static const int DEGREE_SIZES[4] = {32, 64, 128, 256}; // Object used to store id,distance combination graph construction operations template -struct __align__(16) DistPair -{ +struct __align__(16) DistPair { accT dist; IdxT idx; diff --git a/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu index b7ad428ad8..b256dac5a3 100644 --- a/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu +++ b/cpp/src/neighbors/iface/iface_cagra_float_uint32_t.cu @@ -29,9 +29,9 @@ namespace cuvs::neighbors { #define CUVS_INST_MG_CAGRA(T, IdxT) \ using T_ha = raft::host_device_accessor, \ - raft::memory_type::device>; \ + raft::memory_type::device>; \ using T_da = raft::host_device_accessor, \ - raft::memory_type::host>; \ + raft::memory_type::host>; \ using IdxT_ha = raft::host_device_accessor, \ raft::memory_type::device>; \ using IdxT_da = raft::host_device_accessor, \ diff --git a/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu index 86e0633bb1..a650cf95c3 100644 --- a/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu +++ b/cpp/src/neighbors/iface/iface_cagra_half_uint32_t.cu @@ -29,9 +29,9 @@ namespace cuvs::neighbors { #define CUVS_INST_MG_CAGRA(T, IdxT) \ using T_ha = raft::host_device_accessor, \ - raft::memory_type::device>; \ + raft::memory_type::device>; \ using T_da = raft::host_device_accessor, \ - raft::memory_type::host>; \ + raft::memory_type::host>; \ using IdxT_ha = raft::host_device_accessor, \ raft::memory_type::device>; \ using IdxT_da = raft::host_device_accessor, \ diff --git a/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu index 64f174184b..746ffeb961 100644 --- a/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu +++ b/cpp/src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu @@ -29,9 +29,9 @@ namespace cuvs::neighbors { #define CUVS_INST_MG_CAGRA(T, IdxT) \ using T_ha = raft::host_device_accessor, \ - raft::memory_type::device>; \ + raft::memory_type::device>; \ using T_da = raft::host_device_accessor, \ - raft::memory_type::host>; \ + raft::memory_type::host>; \ using IdxT_ha = raft::host_device_accessor, \ raft::memory_type::device>; \ using IdxT_da = raft::host_device_accessor, \ diff --git a/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu b/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu index 9f6db32df8..948e0993b3 100644 --- a/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu +++ b/cpp/src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu @@ -29,9 +29,9 @@ namespace cuvs::neighbors { #define CUVS_INST_MG_CAGRA(T, IdxT) \ using T_ha = raft::host_device_accessor, \ - raft::memory_type::device>; \ + raft::memory_type::device>; \ using T_da = raft::host_device_accessor, \ - raft::memory_type::host>; \ + raft::memory_type::host>; \ using IdxT_ha = raft::host_device_accessor, \ raft::memory_type::device>; \ using IdxT_da = raft::host_device_accessor, \ diff --git a/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu index 0afffe0ad3..9d7e40db15 100644 --- a/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu +++ b/cpp/src/neighbors/iface/iface_flat_float_int64_t.cu @@ -29,9 +29,9 @@ namespace cuvs::neighbors { #define CUVS_INST_MG_FLAT(T, IdxT) \ using T_ha = raft::host_device_accessor, \ - raft::memory_type::device>; \ + raft::memory_type::device>; \ using T_da = raft::host_device_accessor, \ - raft::memory_type::host>; \ + raft::memory_type::host>; \ using IdxT_ha = raft::host_device_accessor, \ raft::memory_type::device>; \ using IdxT_da = raft::host_device_accessor, \ diff --git a/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu index 5afd77053e..8daa6b435d 100644 --- a/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu +++ b/cpp/src/neighbors/iface/iface_flat_int8_t_int64_t.cu @@ -29,9 +29,9 @@ namespace cuvs::neighbors { #define CUVS_INST_MG_FLAT(T, IdxT) \ using T_ha = raft::host_device_accessor, \ - raft::memory_type::device>; \ + raft::memory_type::device>; \ using T_da = raft::host_device_accessor, \ - raft::memory_type::host>; \ + raft::memory_type::host>; \ using IdxT_ha = raft::host_device_accessor, \ raft::memory_type::device>; \ using IdxT_da = raft::host_device_accessor, \ diff --git a/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu index 4f2f85700c..0fdaef195d 100644 --- a/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu +++ b/cpp/src/neighbors/iface/iface_flat_uint8_t_int64_t.cu @@ -29,9 +29,9 @@ namespace cuvs::neighbors { #define CUVS_INST_MG_FLAT(T, IdxT) \ using T_ha = raft::host_device_accessor, \ - raft::memory_type::device>; \ + raft::memory_type::device>; \ using T_da = raft::host_device_accessor, \ - raft::memory_type::host>; \ + raft::memory_type::host>; \ using IdxT_ha = raft::host_device_accessor, \ raft::memory_type::device>; \ using IdxT_da = raft::host_device_accessor, \ diff --git a/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu index 90759d5f1a..c36d92d859 100644 --- a/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu +++ b/cpp/src/neighbors/iface/iface_pq_float_int64_t.cu @@ -29,9 +29,9 @@ namespace cuvs::neighbors { #define CUVS_INST_MG_PQ(T, IdxT) \ using T_ha = raft::host_device_accessor, \ - raft::memory_type::device>; \ + raft::memory_type::device>; \ using T_da = raft::host_device_accessor, \ - raft::memory_type::host>; \ + raft::memory_type::host>; \ using IdxT_ha = raft::host_device_accessor, \ raft::memory_type::device>; \ using IdxT_da = raft::host_device_accessor, \ diff --git a/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu index c92d6fd651..3f8ec9ac42 100644 --- a/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu +++ b/cpp/src/neighbors/iface/iface_pq_half_int64_t.cu @@ -29,9 +29,9 @@ namespace cuvs::neighbors { #define CUVS_INST_MG_PQ(T, IdxT) \ using T_ha = raft::host_device_accessor, \ - raft::memory_type::device>; \ + raft::memory_type::device>; \ using T_da = raft::host_device_accessor, \ - raft::memory_type::host>; \ + raft::memory_type::host>; \ using IdxT_ha = raft::host_device_accessor, \ raft::memory_type::device>; \ using IdxT_da = raft::host_device_accessor, \ diff --git a/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu index 59269e9da1..e58132b5b0 100644 --- a/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu +++ b/cpp/src/neighbors/iface/iface_pq_int8_t_int64_t.cu @@ -29,9 +29,9 @@ namespace cuvs::neighbors { #define CUVS_INST_MG_PQ(T, IdxT) \ using T_ha = raft::host_device_accessor, \ - raft::memory_type::device>; \ + raft::memory_type::device>; \ using T_da = raft::host_device_accessor, \ - raft::memory_type::host>; \ + raft::memory_type::host>; \ using IdxT_ha = raft::host_device_accessor, \ raft::memory_type::device>; \ using IdxT_da = raft::host_device_accessor, \ diff --git a/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu b/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu index c407e64cac..b6ce641b54 100644 --- a/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu +++ b/cpp/src/neighbors/iface/iface_pq_uint8_t_int64_t.cu @@ -29,9 +29,9 @@ namespace cuvs::neighbors { #define CUVS_INST_MG_PQ(T, IdxT) \ using T_ha = raft::host_device_accessor, \ - raft::memory_type::device>; \ + raft::memory_type::device>; \ using T_da = raft::host_device_accessor, \ - raft::memory_type::host>; \ + raft::memory_type::host>; \ using IdxT_ha = raft::host_device_accessor, \ raft::memory_type::device>; \ using IdxT_da = raft::host_device_accessor, \ diff --git a/cpp/src/neighbors/ivf_common.cuh b/cpp/src/neighbors/ivf_common.cuh index 0c6f7c5ba9..2169f72998 100644 --- a/cpp/src/neighbors/ivf_common.cuh +++ b/cpp/src/neighbors/ivf_common.cuh @@ -37,7 +37,7 @@ struct dummy_block_sort_t { using queue_t = raft::matrix::detail::select::warpsort:: warp_sort_distributed; template - __device__ dummy_block_sort_t(int k, Args...){}; + __device__ dummy_block_sort_t(int k, Args...) {}; }; /** diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu index 52026172c7..485612ab7e 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu @@ -33,7 +33,7 @@ namespace cuvs::neighbors::ivf_flat { auto build(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::index_params& params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index( \ std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ @@ -49,7 +49,7 @@ namespace cuvs::neighbors::ivf_flat { auto build(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::index_params& params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index( \ std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ @@ -66,7 +66,7 @@ namespace cuvs::neighbors::ivf_flat { raft::device_matrix_view new_vectors, \ std::optional> new_indices, \ const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index(std::move( \ cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ @@ -83,7 +83,7 @@ namespace cuvs::neighbors::ivf_flat { raft::host_matrix_view new_vectors, \ std::optional> new_indices, \ const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index(std::move( \ cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_half_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_half_int64_t.cu index 8f4b253d7b..71916587e6 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_half_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_half_int64_t.cu @@ -33,7 +33,7 @@ namespace cuvs::neighbors::ivf_flat { auto build(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::index_params& params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index( \ std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ @@ -49,7 +49,7 @@ namespace cuvs::neighbors::ivf_flat { auto build(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::index_params& params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index( \ std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ @@ -66,7 +66,7 @@ namespace cuvs::neighbors::ivf_flat { raft::device_matrix_view new_vectors, \ std::optional> new_indices, \ const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index(std::move( \ cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ @@ -83,7 +83,7 @@ namespace cuvs::neighbors::ivf_flat { raft::host_matrix_view new_vectors, \ std::optional> new_indices, \ const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index(std::move( \ cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu index ef326f2415..f36d3c3bb2 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu @@ -33,7 +33,7 @@ namespace cuvs::neighbors::ivf_flat { auto build(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::index_params& params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index( \ std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ @@ -49,7 +49,7 @@ namespace cuvs::neighbors::ivf_flat { auto build(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::index_params& params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index( \ std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ @@ -66,7 +66,7 @@ namespace cuvs::neighbors::ivf_flat { raft::device_matrix_view new_vectors, \ std::optional> new_indices, \ const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index(std::move( \ cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ @@ -83,7 +83,7 @@ namespace cuvs::neighbors::ivf_flat { raft::host_matrix_view new_vectors, \ std::optional> new_indices, \ const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index(std::move( \ cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu index 8a1f3e42fa..899c1049f3 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu @@ -33,7 +33,7 @@ namespace cuvs::neighbors::ivf_flat { auto build(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::index_params& params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index( \ std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ @@ -49,7 +49,7 @@ namespace cuvs::neighbors::ivf_flat { auto build(raft::resources const& handle, \ const cuvs::neighbors::ivf_flat::index_params& params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index( \ std::move(cuvs::neighbors::ivf_flat::detail::build(handle, params, dataset))); \ @@ -66,7 +66,7 @@ namespace cuvs::neighbors::ivf_flat { raft::device_matrix_view new_vectors, \ std::optional> new_indices, \ const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index(std::move( \ cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ @@ -83,7 +83,7 @@ namespace cuvs::neighbors::ivf_flat { raft::host_matrix_view new_vectors, \ std::optional> new_indices, \ const cuvs::neighbors::ivf_flat::index& orig_index) \ - ->cuvs::neighbors::ivf_flat::index \ + -> cuvs::neighbors::ivf_flat::index \ { \ return cuvs::neighbors::ivf_flat::index(std::move( \ cuvs::neighbors::ivf_flat::detail::extend(handle, new_vectors, new_indices, orig_index))); \ diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh index 79b4f1a188..9bf4ae6784 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_interleaved_scan.cuh @@ -1023,15 +1023,15 @@ void launch_kernel(Lambda lambda, RAFT_EXPECTS(Veclen == index.veclen(), "Configured Veclen does not match the index interleaving pattern."); constexpr auto kKernel = interleaved_scan_kernel; + Veclen, + Ascending, + ComputeNorm, + T, + AccT, + IdxT, + IvfSampleFilterT, + Lambda, + PostLambda>; const int max_query_smem = 16384; int query_smem_elems = std::min(max_query_smem / sizeof(T), raft::Pow2::roundUp(index.dim())); diff --git a/cpp/src/neighbors/ivf_list.cuh b/cpp/src/neighbors/ivf_list.cuh index 9d8aef5033..2cfe02a903 100644 --- a/cpp/src/neighbors/ivf_list.cuh +++ b/cpp/src/neighbors/ivf_list.cuh @@ -101,10 +101,10 @@ void resize_list(raft::resources const& res, if (old_used_size > 0) { auto copied_data_extents = spec.make_list_extents(old_used_size); auto copied_view = raft::make_mdspan(new_list->data.data_handle(), copied_data_extents); + typename ListT::size_type, + raft::row_major, + false, + true>(new_list->data.data_handle(), copied_data_extents); raft::copy(copied_view.data_handle(), orig_list->data.data_handle(), copied_view.size(), diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_inst.cuh b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_inst.cuh index 4b963b0b37..6060d2b7a1 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_inst.cuh +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_inst.cuh @@ -29,7 +29,7 @@ namespace cuvs::neighbors::ivf_pq { auto build(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::index_params& params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_pq::index \ + -> cuvs::neighbors::ivf_pq::index \ { \ return cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset); \ } \ @@ -45,7 +45,7 @@ namespace cuvs::neighbors::ivf_pq { auto build(raft::resources const& handle, \ const cuvs::neighbors::ivf_pq::index_params& params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::ivf_pq::index \ + -> cuvs::neighbors::ivf_pq::index \ { \ return cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset); \ } \ @@ -62,7 +62,7 @@ namespace cuvs::neighbors::ivf_pq { raft::device_matrix_view new_vectors, \ std::optional> new_indices, \ const cuvs::neighbors::ivf_pq::index& orig_index) \ - ->cuvs::neighbors::ivf_pq::index \ + -> cuvs::neighbors::ivf_pq::index \ { \ return cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, orig_index); \ } \ @@ -77,7 +77,7 @@ namespace cuvs::neighbors::ivf_pq { raft::host_matrix_view new_vectors, \ std::optional> new_indices, \ const cuvs::neighbors::ivf_pq::index& orig_index) \ - ->cuvs::neighbors::ivf_pq::index \ + -> cuvs::neighbors::ivf_pq::index \ { \ return cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, orig_index); \ } \ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float.cu index bc73ff5a36..33e7067a70 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float_bitset64.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float_bitset64.cu index 6e9e0bcf77..8c28e2c5b9 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float_bitset64.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float_bitset64.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false.cu index 2aa0bacf4c..4cb3424cb5 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu index 4a57add148..e3f330dccb 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true.cu index d4e3fdf5cc..75f8f2cf8a 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu index 1ad240b5bb..3d0c610dd2 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half.cu index 02e118158d..9a288a1c7c 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half_bitset64.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half_bitset64.cu index fcc883c92f..07af7815e2 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half_bitset64.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half_bitset64.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false.cu index cde961c72c..8886b01bef 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu index 5b34cb3536..a921545f80 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true.cu index f1efe79f99..05241135b5 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu index dee55e22b7..2ebd3d5270 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half.cu index bb56fd08d5..95a6f0bedd 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half_bitset64.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half_bitset64.cu index 4340956157..a0be0b392f 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half_bitset64.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half_bitset64.cu @@ -24,46 +24,45 @@ #include "../ivf_pq_compute_similarity_impl.cuh" #include "../ivf_pq_fp_8bit.cuh" -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_compute_similarity.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_compute_similarity.cuh index 37612402c4..235f0aacb3 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_compute_similarity.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_compute_similarity.cuh @@ -130,46 +130,45 @@ auto compute_similarity_select(const cudaDeviceProp& dev_props, } // namespace cuvs::neighbors::ivf_pq::detail -#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - extern template auto \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->cuvs::neighbors::ivf_pq::detail::selected; \ - \ - extern template void \ - cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ - cuvs::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - cuvs::distance::DistanceType metric, \ - cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ +#define instantiate_cuvs_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + extern template auto \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) -> cuvs::neighbors::ivf_pq::detail::selected; \ + \ + extern template void \ + cuvs::neighbors::ivf_pq::detail::compute_similarity_run( \ + cuvs::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + cuvs::distance::DistanceType metric, \ + cuvs::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ uint32_t* _out_indices); #define COMMA , diff --git a/cpp/src/neighbors/mg/snmg.cuh b/cpp/src/neighbors/mg/snmg.cuh index 3d9e795adc..119c94fe14 100644 --- a/cpp/src/neighbors/mg/snmg.cuh +++ b/cpp/src/neighbors/mg/snmg.cuh @@ -17,7 +17,8 @@ #pragma once #include "../detail/knn_merge_parts.cuh" -#include +#include +#include #include #include #include @@ -75,10 +76,10 @@ void deserialize(const raft::resources& clique, index.mode_ = (cuvs::neighbors::distribution_mode)deserialize_scalar(handle, is); index.num_ranks_ = deserialize_scalar(handle, is); - if (index.num_ranks_ != raft::resource::get_nccl_num_ranks(clique)) { + if (index.num_ranks_ != raft::resource::get_num_ranks(clique)) { RAFT_FAIL("Serialized index has %d ranks whereas NCCL clique has %d ranks", index.num_ranks_, - raft::resource::get_nccl_num_ranks(clique)); + raft::resource::get_num_ranks(clique)); } for (int rank = 0; rank < index.num_ranks_; rank++) { @@ -215,8 +216,8 @@ void sharded_search_with_direct_merge(const raft::resources& clique, const raft::resources& dev_res = raft::resource::set_current_device_to_rank(clique, rank); auto& ann_if = index.ann_interfaces_[rank]; - if (rank == raft::resource::get_nccl_clique_root_rank(clique)) { // root rank - uint64_t batch_offset = raft::resource::get_nccl_clique_root_rank(clique) * part_size; + if (rank == raft::resource::get_root_rank(clique)) { // root rank + uint64_t batch_offset = raft::resource::get_root_rank(clique) * part_size; auto d_neighbors = raft::make_device_matrix_view( in_neighbors.data_handle() + batch_offset, n_rows_of_current_batch, n_neighbors); auto d_distances = raft::make_device_matrix_view( @@ -227,20 +228,20 @@ void sharded_search_with_direct_merge(const raft::resources& clique, // wait for other ranks ncclGroupStart(); for (int from_rank = 0; from_rank < index.num_ranks_; from_rank++) { - if (from_rank == raft::resource::get_nccl_clique_root_rank(clique)) continue; + if (from_rank == raft::resource::get_root_rank(clique)) continue; batch_offset = from_rank * part_size; ncclRecv(in_neighbors.data_handle() + batch_offset, part_size * sizeof(IdxT), ncclUint8, from_rank, - raft::resource::get_nccl_comm(dev_res), + raft::resource::get_nccl_comm_for_rank(clique, rank), raft::resource::get_cuda_stream(dev_res)); ncclRecv(in_distances.data_handle() + batch_offset, part_size * sizeof(float), ncclUint8, from_rank, - raft::resource::get_nccl_comm(dev_res), + raft::resource::get_nccl_comm_for_rank(clique, rank), raft::resource::get_cuda_stream(dev_res)); } ncclGroupEnd(); @@ -258,14 +259,14 @@ void sharded_search_with_direct_merge(const raft::resources& clique, ncclSend(d_neighbors.data_handle(), part_size * sizeof(IdxT), ncclUint8, - raft::resource::get_nccl_clique_root_rank(clique), - raft::resource::get_nccl_comm(dev_res), + raft::resource::get_root_rank(clique), + raft::resource::get_nccl_comm_for_rank(clique, rank), raft::resource::get_cuda_stream(dev_res)); ncclSend(d_distances.data_handle(), part_size * sizeof(float), ncclUint8, - raft::resource::get_nccl_clique_root_rank(clique), - raft::resource::get_nccl_comm(dev_res), + raft::resource::get_root_rank(clique), + raft::resource::get_nccl_comm_for_rank(clique, rank), raft::resource::get_cuda_stream(dev_res)); ncclGroupEnd(); resource::sync_stream(dev_res); @@ -379,13 +380,13 @@ void sharded_search_with_tree_merge(const raft::resources& clique, part_size * sizeof(IdxT), ncclUint8, other_id, - raft::resource::get_nccl_comm(dev_res), + raft::resource::get_nccl_comm_for_rank(clique, rank), raft::resource::get_cuda_stream(dev_res)); ncclRecv(tmp_distances.data_handle() + part_size, part_size * sizeof(float), ncclUint8, other_id, - raft::resource::get_nccl_comm(dev_res), + raft::resource::get_nccl_comm_for_rank(clique, rank), raft::resource::get_cuda_stream(dev_res)); received_something = true; } @@ -396,13 +397,13 @@ void sharded_search_with_tree_merge(const raft::resources& clique, part_size * sizeof(IdxT), ncclUint8, other_id, - raft::resource::get_nccl_comm(dev_res), + raft::resource::get_nccl_comm_for_rank(clique, rank), raft::resource::get_cuda_stream(dev_res)); ncclSend(tmp_distances.data_handle(), part_size * sizeof(float), ncclUint8, other_id, - raft::resource::get_nccl_comm(dev_res), + raft::resource::get_nccl_comm_for_rank(clique, rank), raft::resource::get_cuda_stream(dev_res)); } ncclGroupEnd(); @@ -655,7 +656,7 @@ template mg_index::mg_index(const raft::resources& clique, distribution_mode mode) : mode_(mode), round_robin_counter_(std::make_shared>(0)) { - num_ranks_ = raft::resource::get_nccl_num_ranks(clique); + num_ranks_ = raft::resource::get_num_ranks(clique); } template diff --git a/cpp/src/neighbors/nn_descent_float.cu b/cpp/src/neighbors/nn_descent_float.cu index 3942775570..b1e58b2330 100644 --- a/cpp/src/neighbors/nn_descent_float.cu +++ b/cpp/src/neighbors/nn_descent_float.cu @@ -24,7 +24,7 @@ namespace cuvs::neighbors::nn_descent { const cuvs::neighbors::nn_descent::index_params& params, \ raft::device_matrix_view dataset, \ std::optional> graph) \ - ->cuvs::neighbors::nn_descent::index \ + -> cuvs::neighbors::nn_descent::index \ { \ if (!graph.has_value()) { \ return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ @@ -41,7 +41,7 @@ namespace cuvs::neighbors::nn_descent { const cuvs::neighbors::nn_descent::index_params& params, \ raft::host_matrix_view dataset, \ std::optional> graph) \ - ->cuvs::neighbors::nn_descent::index \ + -> cuvs::neighbors::nn_descent::index \ { \ if (!graph.has_value()) { \ return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ diff --git a/cpp/src/neighbors/nn_descent_half.cu b/cpp/src/neighbors/nn_descent_half.cu index 0b6ba74b18..06d3a8c612 100644 --- a/cpp/src/neighbors/nn_descent_half.cu +++ b/cpp/src/neighbors/nn_descent_half.cu @@ -24,7 +24,7 @@ namespace cuvs::neighbors::nn_descent { const cuvs::neighbors::nn_descent::index_params& params, \ raft::device_matrix_view dataset, \ std::optional> graph) \ - ->cuvs::neighbors::nn_descent::index \ + -> cuvs::neighbors::nn_descent::index \ { \ if (!graph.has_value()) { \ return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ @@ -42,7 +42,7 @@ namespace cuvs::neighbors::nn_descent { const cuvs::neighbors::nn_descent::index_params& params, \ raft::host_matrix_view dataset, \ std::optional> graph) \ - ->cuvs::neighbors::nn_descent::index \ + -> cuvs::neighbors::nn_descent::index \ { \ if (!graph.has_value()) { \ return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ diff --git a/cpp/src/neighbors/nn_descent_int8.cu b/cpp/src/neighbors/nn_descent_int8.cu index a43e68d4ed..be20b00a10 100644 --- a/cpp/src/neighbors/nn_descent_int8.cu +++ b/cpp/src/neighbors/nn_descent_int8.cu @@ -24,7 +24,7 @@ namespace cuvs::neighbors::nn_descent { const cuvs::neighbors::nn_descent::index_params& params, \ raft::device_matrix_view dataset, \ std::optional> graph) \ - ->cuvs::neighbors::nn_descent::index \ + -> cuvs::neighbors::nn_descent::index \ { \ if (!graph.has_value()) { \ return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ @@ -42,7 +42,7 @@ namespace cuvs::neighbors::nn_descent { const cuvs::neighbors::nn_descent::index_params& params, \ raft::host_matrix_view dataset, \ std::optional> graph) \ - ->cuvs::neighbors::nn_descent::index \ + -> cuvs::neighbors::nn_descent::index \ { \ if (!graph.has_value()) { \ return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ diff --git a/cpp/src/neighbors/nn_descent_uint8.cu b/cpp/src/neighbors/nn_descent_uint8.cu index 6b565c8342..9c9ffedaf1 100644 --- a/cpp/src/neighbors/nn_descent_uint8.cu +++ b/cpp/src/neighbors/nn_descent_uint8.cu @@ -24,7 +24,7 @@ namespace cuvs::neighbors::nn_descent { const cuvs::neighbors::nn_descent::index_params& params, \ raft::device_matrix_view dataset, \ std::optional> graph) \ - ->cuvs::neighbors::nn_descent::index \ + -> cuvs::neighbors::nn_descent::index \ { \ if (!graph.has_value()) { \ return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ @@ -42,7 +42,7 @@ namespace cuvs::neighbors::nn_descent { const cuvs::neighbors::nn_descent::index_params& params, \ raft::host_matrix_view dataset, \ std::optional> graph) \ - ->cuvs::neighbors::nn_descent::index \ + -> cuvs::neighbors::nn_descent::index \ { \ if (!graph.has_value()) { \ return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ diff --git a/cpp/src/neighbors/vamana_build_float.cu b/cpp/src/neighbors/vamana_build_float.cu index 0e09d63994..37b917acb2 100644 --- a/cpp/src/neighbors/vamana_build_float.cu +++ b/cpp/src/neighbors/vamana_build_float.cu @@ -23,7 +23,7 @@ namespace cuvs::neighbors::vamana { auto build(raft::resources const& handle, \ const cuvs::neighbors::vamana::index_params& params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::vamana::index \ + -> cuvs::neighbors::vamana::index \ { \ return cuvs::neighbors::vamana::build(handle, params, dataset); \ } \ @@ -31,7 +31,7 @@ namespace cuvs::neighbors::vamana { auto build(raft::resources const& handle, \ const cuvs::neighbors::vamana::index_params& params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::vamana::index \ + -> cuvs::neighbors::vamana::index \ { \ return cuvs::neighbors::vamana::build(handle, params, dataset); \ } diff --git a/cpp/src/neighbors/vamana_build_int8.cu b/cpp/src/neighbors/vamana_build_int8.cu index f70b9ea276..d06954ab17 100644 --- a/cpp/src/neighbors/vamana_build_int8.cu +++ b/cpp/src/neighbors/vamana_build_int8.cu @@ -23,7 +23,7 @@ namespace cuvs::neighbors::vamana { auto build(raft::resources const& handle, \ const cuvs::neighbors::vamana::index_params& params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::vamana::index \ + -> cuvs::neighbors::vamana::index \ { \ return cuvs::neighbors::vamana::build(handle, params, dataset); \ } \ @@ -31,7 +31,7 @@ namespace cuvs::neighbors::vamana { auto build(raft::resources const& handle, \ const cuvs::neighbors::vamana::index_params& params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::vamana::index \ + -> cuvs::neighbors::vamana::index \ { \ return cuvs::neighbors::vamana::build(handle, params, dataset); \ } diff --git a/cpp/src/neighbors/vamana_build_uint8.cu b/cpp/src/neighbors/vamana_build_uint8.cu index 8daf0c065c..e614c69685 100644 --- a/cpp/src/neighbors/vamana_build_uint8.cu +++ b/cpp/src/neighbors/vamana_build_uint8.cu @@ -23,7 +23,7 @@ namespace cuvs::neighbors::vamana { auto build(raft::resources const& handle, \ const cuvs::neighbors::vamana::index_params& params, \ raft::device_matrix_view dataset) \ - ->cuvs::neighbors::vamana::index \ + -> cuvs::neighbors::vamana::index \ { \ return cuvs::neighbors::vamana::build(handle, params, dataset); \ } \ @@ -31,7 +31,7 @@ namespace cuvs::neighbors::vamana { auto build(raft::resources const& handle, \ const cuvs::neighbors::vamana::index_params& params, \ raft::host_matrix_view dataset) \ - ->cuvs::neighbors::vamana::index \ + -> cuvs::neighbors::vamana::index \ { \ return cuvs::neighbors::vamana::build(handle, params, dataset); \ } diff --git a/cpp/src/preprocessing/quantize/scalar.cu b/cpp/src/preprocessing/quantize/scalar.cu index 68cba4cc8d..b4d8737b7f 100644 --- a/cpp/src/preprocessing/quantize/scalar.cu +++ b/cpp/src/preprocessing/quantize/scalar.cu @@ -23,15 +23,13 @@ namespace cuvs::preprocessing::quantize::scalar { #define CUVS_INST_QUANTIZATION(T, QuantI) \ auto train(raft::resources const& res, \ const params params, \ - raft::device_matrix_view dataset) \ - ->quantizer \ + raft::device_matrix_view dataset) -> quantizer \ { \ return detail::train(res, params, dataset); \ } \ auto train(raft::resources const& res, \ const params params, \ - raft::host_matrix_view dataset) \ - ->quantizer \ + raft::host_matrix_view dataset) -> quantizer \ { \ return detail::train(res, params, dataset); \ } \ diff --git a/cpp/src/sparse/neighbors/detail/cross_component_nn.cuh b/cpp/src/sparse/neighbors/detail/cross_component_nn.cuh index fe5a5d2963..c5cf52a449 100644 --- a/cpp/src/sparse/neighbors/detail/cross_component_nn.cuh +++ b/cpp/src/sparse/neighbors/detail/cross_component_nn.cuh @@ -67,7 +67,7 @@ struct FixConnectivitiesRedOp { // default constructor for cutlass DI FixConnectivitiesRedOp() : m(0) {} - FixConnectivitiesRedOp(value_idx m_) : m(m_){}; + FixConnectivitiesRedOp(value_idx m_) : m(m_) {}; typedef typename raft::KeyValuePair KVP; DI void operator()(value_idx rit, KVP* out, const KVP& other) const diff --git a/cpp/tests/distance/distance_base.cuh b/cpp/tests/distance/distance_base.cuh index 8a431f49a1..e9a7bd286a 100644 --- a/cpp/tests/distance/distance_base.cuh +++ b/cpp/tests/distance/distance_base.cuh @@ -707,7 +707,7 @@ class BigMatrixDistanceTest : public ::testing::Test { public: BigMatrixDistanceTest() : x(m * k, raft::resource::get_cuda_stream(handle)), - dist(std::size_t(m) * m, raft::resource::get_cuda_stream(handle)){}; + dist(std::size_t(m) * m, raft::resource::get_cuda_stream(handle)) {}; void SetUp() override { auto testInfo = testing::UnitTest::GetInstance()->current_test_info(); diff --git a/cpp/tests/neighbors/ann_cagra_c.cu b/cpp/tests/neighbors/ann_cagra_c.cu index 9e0890c34d..ae80cc8986 100644 --- a/cpp/tests/neighbors/ann_cagra_c.cu +++ b/cpp/tests/neighbors/ann_cagra_c.cu @@ -476,3 +476,127 @@ TEST(CagraC, BuildSearchFiltered) cuvsCagraIndexDestroy(index); cuvsResourcesDestroy(res); } + +TEST(CagraC, BuildMergeSearch) +{ + cuvsResources_t res; + cuvsResourcesCreate(&res); + cudaStream_t stream; + cuvsStreamGet(res, &stream); + + float dataset[7][2] = {{0.74021935f, 0.92099380f}, + {0.03902049f, 0.96896291f}, + {0.92514056f, 0.44635010f}, + {0.12345678f, 0.87654321f}, + {0.50112233f, 0.33221100f}, + {0.66731918f, 0.10993068f}, + {0.77777777f, 0.88888888f}}; + + float* main_data_ptr = &dataset[0][0]; + float* additional_data_ptr = &dataset[4][0]; + float* query_data_ptr = &dataset[6][0]; + + rmm::device_uvector main_d(8, stream); + rmm::device_uvector additional_d(6, stream); + rmm::device_uvector queries_d(2, stream); + raft::copy(main_d.data(), main_data_ptr, 8, stream); + raft::copy(additional_d.data(), additional_data_ptr, 6, stream); + raft::copy(queries_d.data(), query_data_ptr, 2, stream); + + DLManagedTensor main_dataset_tensor; + int64_t main_shape[2] = {4, 2}; + main_dataset_tensor.dl_tensor.data = main_d.data(); + main_dataset_tensor.dl_tensor.device.device_type = kDLCUDA; + main_dataset_tensor.dl_tensor.device.device_id = 0; + main_dataset_tensor.dl_tensor.ndim = 2; + main_dataset_tensor.dl_tensor.dtype.code = kDLFloat; + main_dataset_tensor.dl_tensor.dtype.bits = 32; + main_dataset_tensor.dl_tensor.dtype.lanes = 1; + main_dataset_tensor.dl_tensor.shape = main_shape; + main_dataset_tensor.dl_tensor.strides = nullptr; + + DLManagedTensor additional_dataset_tensor = main_dataset_tensor; + int64_t additional_shape[2] = {3, 2}; + additional_dataset_tensor.dl_tensor.data = additional_d.data(); + additional_dataset_tensor.dl_tensor.shape = additional_shape; + + DLManagedTensor query_tensor = main_dataset_tensor; + int64_t query_shape[2] = {1, 2}; + query_tensor.dl_tensor.data = queries_d.data(); + query_tensor.dl_tensor.shape = query_shape; + + cuvsCagraIndexParams_t build_params; + cuvsCagraIndexParamsCreate(&build_params); + cuvsCagraIndex_t index_main, index_add; + cuvsCagraIndexCreate(&index_main); + cuvsCagraIndexCreate(&index_add); + ASSERT_EQ(cuvsCagraBuild(res, build_params, &main_dataset_tensor, index_main), CUVS_SUCCESS); + ASSERT_EQ(cuvsCagraBuild(res, build_params, &additional_dataset_tensor, index_add), CUVS_SUCCESS); + + cuvsCagraMergeParams_t merge_params; + cuvsCagraMergeParamsCreate(&merge_params); + cuvsCagraIndex_t index_merged; + cuvsCagraIndexCreate(&index_merged); + + cuvsCagraIndex_t index_array[2] = {index_main, index_add}; + ASSERT_EQ(cuvsCagraMerge(res, merge_params, index_array, 2, index_merged), CUVS_SUCCESS); + + int merged_dim = -1; + ASSERT_EQ(cuvsCagraIndexGetDims(index_merged, &merged_dim), CUVS_SUCCESS); + EXPECT_EQ(merged_dim, 2); + + DLManagedTensor neighbors_tensor, distances_tensor; + rmm::device_uvector neighbors_d(1, stream); + rmm::device_uvector distances_d(1, stream); + int64_t neighbors_shape[2] = {1, 1}; + int64_t distances_shape[2] = {1, 1}; + neighbors_tensor.dl_tensor.data = neighbors_d.data(); + neighbors_tensor.dl_tensor.device = main_dataset_tensor.dl_tensor.device; + neighbors_tensor.dl_tensor.ndim = 2; + neighbors_tensor.dl_tensor.dtype.code = kDLInt; + neighbors_tensor.dl_tensor.dtype.bits = 64; + neighbors_tensor.dl_tensor.dtype.lanes = 1; + neighbors_tensor.dl_tensor.shape = neighbors_shape; + neighbors_tensor.dl_tensor.strides = nullptr; + distances_tensor.dl_tensor.data = distances_d.data(); + distances_tensor.dl_tensor.device = main_dataset_tensor.dl_tensor.device; + distances_tensor.dl_tensor.ndim = 2; + distances_tensor.dl_tensor.dtype.code = kDLFloat; + distances_tensor.dl_tensor.dtype.bits = 32; + distances_tensor.dl_tensor.dtype.lanes = 1; + distances_tensor.dl_tensor.shape = distances_shape; + distances_tensor.dl_tensor.strides = nullptr; + + cuvsCagraSearchParams_t search_params; + cuvsCagraSearchParamsCreate(&search_params); + (*search_params).itopk_size = 1; + + cuvsFilter filter; + filter.type = NO_FILTER; + filter.addr = 0; + ASSERT_EQ(cuvsCagraSearch(res, + search_params, + index_merged, + &query_tensor, + &neighbors_tensor, + &distances_tensor, + filter), + CUVS_SUCCESS); + + int64_t neighbor_host = -1; + float distance_host = 1.0f; + raft::copy(&neighbor_host, neighbors_d.data(), 1, stream); + raft::copy(&distance_host, distances_d.data(), 1, stream); + cudaStreamSynchronize(stream); + + EXPECT_EQ(neighbor_host, 6); + EXPECT_NEAR(distance_host, 0.0f, 1e-6); + + cuvsCagraSearchParamsDestroy(search_params); + cuvsCagraMergeParamsDestroy(merge_params); + cuvsCagraIndexParamsDestroy(build_params); + cuvsCagraIndexDestroy(index_merged); + cuvsCagraIndexDestroy(index_add); + cuvsCagraIndexDestroy(index_main); + cuvsResourcesDestroy(res); +} diff --git a/cpp/tests/neighbors/brute_force_prefiltered.cu b/cpp/tests/neighbors/brute_force_prefiltered.cu index bf7dce7eec..7f2c1f195b 100644 --- a/cpp/tests/neighbors/brute_force_prefiltered.cu +++ b/cpp/tests/neighbors/brute_force_prefiltered.cu @@ -127,7 +127,7 @@ RAFT_KERNEL set_bitmap_kernel( index_t col = dst[idx]; index_t g_idx = row * n_cols + col; index_t item_idx = (g_idx) >> 5; - uint32_t bit_idx = (g_idx)&31; + uint32_t bit_idx = (g_idx) & 31; atomicOr(bitmap + item_idx, (uint32_t(1) << bit_idx)); } } diff --git a/cpp/tests/sparse/neighbors/cross_component_nn.cu b/cpp/tests/sparse/neighbors/cross_component_nn.cu index d931fedc46..374e273d68 100644 --- a/cpp/tests/sparse/neighbors/cross_component_nn.cu +++ b/cpp/tests/sparse/neighbors/cross_component_nn.cu @@ -392,7 +392,7 @@ struct MutualReachabilityFixConnectivitiesRedOp { DI MutualReachabilityFixConnectivitiesRedOp() : m(0) {} MutualReachabilityFixConnectivitiesRedOp(value_t* core_dists_, value_idx m_) - : core_dists(core_dists_), m(m_){}; + : core_dists(core_dists_), m(m_) {}; typedef typename raft::KeyValuePair KVP; DI void operator()(value_idx rit, KVP* out, const KVP& other) const diff --git a/dependencies.yaml b/dependencies.yaml index b4b3e9ea3b..55d7bf149b 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -85,7 +85,7 @@ files: rust: output: conda matrix: - cuda: ["12.8"] + cuda: ["12.9"] arch: [x86_64, aarch64] includes: # clang/libclang only needed for bindgen support @@ -326,9 +326,9 @@ dependencies: common: - output_types: conda packages: - - clang==16.0.6 - - clang-tools==16.0.6 - - libclang==16.0.6 + - clang==20.1.4 + - clang-tools==20.1.4 + - libclang==20.1.4 cuda_version: specific: - output_types: conda @@ -365,6 +365,10 @@ dependencies: cuda: "12.8" packages: - cuda-version=12.8 + - matrix: + cuda: "12.9" + packages: + - cuda-version=12.9 cuda: specific: - output_types: conda diff --git a/python/cuvs_bench/cuvs_bench/config/algorithms.yaml b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml index 16d449117a..191c640c00 100644 --- a/python/cuvs_bench/cuvs_bench/config/algorithms.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algorithms.yaml @@ -13,6 +13,9 @@ faiss_gpu_ivf_sq: faiss_gpu_cagra: executable: FAISS_GPU_CAGRA_ANN_BENCH requires_gpu: true +faiss_gpu_cagra_hnsw: + executable: FAISS_GPU_CAGRA_HNSW_ANN_BENCH + requires_gpu: true faiss_cpu_flat: executable: FAISS_CPU_FLAT_ANN_BENCH requires_gpu: false @@ -22,6 +25,9 @@ faiss_cpu_ivf_flat: faiss_cpu_ivf_pq: executable: FAISS_CPU_IVF_PQ_ANN_BENCH requires_gpu: false +faiss_cpu_hnsw_flat: + executable: FAISS_CPU_HNSW_FLAT_ANN_BENCH + requires_gpu: false cuvs_ivf_flat: executable: CUVS_IVF_FLAT_ANN_BENCH requires_gpu: true diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_hnsw_flat.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_hnsw_flat.yaml new file mode 100644 index 0000000000..403cd29e91 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_cpu_hnsw_flat.yaml @@ -0,0 +1,10 @@ +name: faiss_cpu_hnsw_flat +groups: + base: + build: + M: [16, 32] + efConstruction: [32, 64, 128, 256, 512] + search: + bounded_queue: [False, True] + check_relative_distance: [False, True] + efSearch: [8, 16, 32, 64, 128, 256, 512, 768, 1024] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra.yaml index 3ec60c7b8e..47a92e3d20 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra.yaml @@ -5,17 +5,18 @@ constraints: groups: base: build: - graph_degree: [32, 64, 96, 128] - intermediate_graph_degree: [32, 64, 96, 128] - graph_build_algo: ["NN_DESCENT"] + graph_degree: [32] + intermediate_graph_degree: [64] + cagra_build_algo: ["NN_DESCENT"] search: - itopk: [32, 64, 128, 256, 512] - search_width: [1, 2, 4, 8, 16, 32, 64] + itopk: [32, 64, 128, 256, 512, 768, 1024] + search_width: [1 ,2 ,4 ,8, 16, 32] + max_iterations: [16, 20, 24, 28, 32, 64, 128] test: build: graph_degree: [32] intermediate_graph_degree: [32] - graph_build_algo: ["NN_DESCENT"] + cagra_build_algo: ["IVF_PQ", "NN_DESCENT"] search: itopk: [32] search_width: [1, 2] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra_hnsw.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra_hnsw.yaml new file mode 100644 index 0000000000..097a20aa32 --- /dev/null +++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_cagra_hnsw.yaml @@ -0,0 +1,12 @@ +name: faiss_gpu_cagra_hnsw +constraints: + build: cuvs_bench.config.algos.constraints.cuvs_cagra_build +groups: + base: + build: + graph_degree: [32, 64] + intermediate_graph_degree: [64, 128] + cagra_build_algo: ["NN_DESCENT"] + base_level_only: [False, True] + search: + efSearch: [16, 32, 64, 128, 256, 512, 768, 1024] diff --git a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml index 158246a030..a533ff6d8c 100644 --- a/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml +++ b/python/cuvs_bench/cuvs_bench/config/algos/faiss_gpu_ivf_pq.yaml @@ -53,9 +53,9 @@ groups: refine_ratio: [1, 2, 4] 100M: build: - nlist: [100000] - M: [96, 64] - ratio: [4] + nlist: [8192, 16384, 32768] + M: [96, 48] + ratio: [10] usePrecomputed: [False, True] useFloat16: [True] use_cuvs: [False] @@ -65,9 +65,9 @@ groups: refine_ratio: [1, 2, 4] 100Mcuvs: build: - nlist: [100000] - M: [96, 64] - ratio: [4] + nlist: [8192, 16384, 50000, 100000] + M: [96, 48] + ratio: [10] useFloat16: [False, True] use_cuvs: [True] bitsPerCode: [8, 6, 5, 4]