From f9aebaf2baf85d928fea8adcd3ebef6719913e2a Mon Sep 17 00:00:00 2001
From: blaise-muhirwa <blaise@groundlight.ai>
Date: Tue, 21 Nov 2023 05:47:22 +0000
Subject: [PATCH] add python tests

---
 .gitignore                         |   1 +
 CMakeLists.txt                     |   3 +-
 bin/run_anns.sh                    |  20 +-
 flatnav_python/bindings.cpp        | 251 ----------------------
 flatnav_python/pyproject.toml      |   3 +
 flatnav_python/python_bindings.cpp | 332 +++++++++++++++++------------
 flatnav_python/setup.py            |  28 +--
 flatnav_python/test_index.py       | 210 ++++++++++++++++--
 tools/query.cpp                    | 224 -------------------
 tools/query_npy.cpp                |   2 +-
 10 files changed, 410 insertions(+), 664 deletions(-)
 delete mode 100644 flatnav_python/bindings.cpp
 delete mode 100644 tools/query.cpp

diff --git a/.gitignore b/.gitignore
index 2b50cb0..6ceb4e4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ build
 flatnav_python/flatnav.egg-info/
 flatnav_python/poetry.lock
 flatnav_python/dist
+flatnav_python/__pycache__
 
 
 # other files
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b713529..f501cc0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -218,7 +218,6 @@ set(HEADERS
     ${PROJECT_SOURCE_DIR}/flatnav/distances/inner_products_from_hnswlib.h
     ${PROJECT_SOURCE_DIR}/flatnav/distances/SquaredL2Distance.h
     ${PROJECT_SOURCE_DIR}/flatnav/distances/SquaredL2DistanceSpecializations.h
-    ${PROJECT_SOURCE_DIR}/flatnav/distances/SQDistance.h
     ${PROJECT_SOURCE_DIR}/flatnav/util/ExplicitSet.h
     ${PROJECT_SOURCE_DIR}/flatnav/util/GorderPriorityQueue.h
     ${PROJECT_SOURCE_DIR}/flatnav/util/reordering.h
@@ -238,7 +237,7 @@ set_target_properties(FLAT_NAV_LIB PROPERTIES LINKER_LANGUAGE CXX)
 
 if(BUILD_EXAMPLES)
   message(STATUS "Building examples for Flatnav")
-  foreach(CONSTRUCT_EXEC construct_npy query query_npy cereal_tests)
+  foreach(CONSTRUCT_EXEC construct_npy query_npy cereal_tests)
     add_executable(${CONSTRUCT_EXEC}
                    ${PROJECT_SOURCE_DIR}/tools/${CONSTRUCT_EXEC}.cpp ${HEADERS})
     add_dependencies(${CONSTRUCT_EXEC} FLAT_NAV_LIB)
diff --git a/bin/run_anns.sh b/bin/run_anns.sh
index 9678674..0ae3719 100755
--- a/bin/run_anns.sh
+++ b/bin/run_anns.sh
@@ -5,13 +5,13 @@ if [ -f mnist_784.index ]; then
     rm mnist_784.index
 fi
 
-if [ -f sift_128.index ]; then
-    rm sift_128.index
-fi
+# if [ -f sift_128.index ]; then
+#     rm sift_128.index
+# fi
 
-if [ -f glove_25.index ]; then
-    rm glove_25.index
-fi
+# if [ -f glove_25.index ]; then
+#     rm glove_25.index
+# fi
 
 # if [ -f gist_960.index ]; then
 #     rm gist_960.index
@@ -22,10 +22,10 @@ fi
 # fi
 
 # Build the index for MNIST 
-# build/construct_npy 1 0 data/mnist-784-euclidean/mnist-784-euclidean.train.npy 16 128 mnist_784.index
+build/construct_npy 0 0 data/mnist-784-euclidean/mnist-784-euclidean.train.npy 16 128 mnist_784.index
 
 # # Query MNIST
-# build/query_npy 0 mnist_784.index data/mnist-784-euclidean/mnist-784-euclidean.test.npy data/mnist-784-euclidean/mnist-784-euclidean.gtruth.npy 256 100 0 1
+build/query_npy 0 mnist_784.index data/mnist-784-euclidean/mnist-784-euclidean.test.npy data/mnist-784-euclidean/mnist-784-euclidean.gtruth.npy 256 100 0 0
 
 # # Query MNIST with reordering
 # build/query_npy 0 mnist_784.index data/mnist/mnist-784-euclidean.test.npy data/mnist/mnist-784-euclidean.gtruth.npy 256,512 100 1
@@ -37,10 +37,10 @@ fi
 # build/query_npy 0 sift_128.index data/sift/sift-128-euclidean.test.npy data/sift/sift-128-euclidean.gtruth.npy 256,512 100 0
 
 # Build the index for GloVe
-build/construct_npy 1 1 data/glove/glove-25-angular.train.npy 16 128 glove_25.index
+# build/construct_npy 1 1 data/glove/glove-25-angular.train.npy 16 128 glove_25.index
 
 # Query GloVe without reordering 
-build/query_npy 1 glove_25.index data/glove/glove-25-angular.test.npy data/glove/glove-25-angular.gtruth.npy 128,256 100 0 1
+# build/query_npy 1 glove_25.index data/glove/glove-25-angular.test.npy data/glove/glove-25-angular.gtruth.npy 128,256 100 0 1
 
 # # Query GloVe with reordering
 # build/query_npy 1 glove_25.index data/glove/glove-25-angular.test.npy data/glove/glove-25-angular.gtruth.npy 256,512 100 1
diff --git a/flatnav_python/bindings.cpp b/flatnav_python/bindings.cpp
deleted file mode 100644
index 0338fba..0000000
--- a/flatnav_python/bindings.cpp
+++ /dev/null
@@ -1,251 +0,0 @@
-#include <algorithm>
-#include <memory>
-#include <pybind11/numpy.h>
-#include <pybind11/pybind11.h>
-#include <string>
-#include <utility>
-#include <iostream>
-#include <ostream>
-#include <vector>
-
-#include <flatnav/DistanceInterface.h>
-#include <flatnav/Index.h>
-#include <flatnav/distances/InnerProductDistance.h>
-#include <flatnav/distances/SquaredL2Distance.h>
-
-using flatnav::DistanceInterface;
-using flatnav::Index;
-using flatnav::InnerProductDistance;
-using flatnav::SquaredL2Distance;
-
-namespace py = pybind11;
-
-template <typename dist_t, typename label_t> class PythonIndex {
-  const uint32_t NUM_LOG_STEPS = 1000;
-private:
-  int _dim, label_id;
-  bool _verbose;
-  Index<dist_t, label_t> *_index;
-
-public:
-  typedef std::pair<py::array_t<float>, py::array_t<label_t>>
-      DistancesLabelsPair;
-
-  explicit PythonIndex(std::unique_ptr<Index<dist_t, label_t>> index)
-      : _dim(index->dataDimension()), label_id(0), _verbose(false),
-        _index(index.get()) {}
-
-  PythonIndex(std::shared_ptr<DistanceInterface<dist_t>> distance, int dim,
-              int dataset_size, int max_edges_per_node, bool verbose = false)
-      : _dim(dim), label_id(0), _verbose(verbose),
-        _index(new Index<dist_t, label_t>(
-            /* dist = */ std::move(distance),
-            /* dataset_size = */ dataset_size,
-            /* max_edges_per_node = */ max_edges_per_node)) {}
-
-  Index<dist_t, label_t> *getIndex() { return _index; }
-
-  ~PythonIndex() { delete _index; }
-
-  static std::unique_ptr<PythonIndex<dist_t, label_t>>
-  loadIndex(const std::string &filename) {
-    auto index = Index<dist_t, label_t>::loadIndex(/* filename = */ filename);
-    return std::make_unique<PythonIndex<dist_t, label_t>>(std::move(index));
-  }
-
-  void
-  add(const py::array_t<float, py::array::c_style | py::array::forcecast> &data,
-      int ef_construction, py::object labels = py::none()) {
-    // py::array_t<float, py::array::c_style | py::array::forcecast> means that
-    // the functions expects either a Numpy array of floats or a castable type
-    // to that type. If the given type can't be casted, pybind11 will throw an
-    // error.
-
-    auto num_vectors = data.shape(0);
-    auto data_dim = data.shape(1);
-    if (data.ndim() != 2 || data_dim != _dim) {
-      throw std::invalid_argument("Data has incorrect dimensions.");
-    }
-
-    std::clog << "[num-vectors] = " << num_vectors << std::flush;
-    std::clog << "[data_dim] = " << data_dim << std::flush;
-    if (labels.is_none()) {
-      for (size_t vec_index = 0; vec_index < num_vectors; vec_index++) {
-        this->_index->add(/* data = */ (void *)data.data(vec_index),
-                          /* label = */ label_id,
-                          /* ef_construction = */ ef_construction);
-        if (_verbose && vec_index % NUM_LOG_STEPS == 0) {
-          std::clog << "." << std::flush;
-        }
-        label_id++;
-      }
-      std::clog << std::endl;
-      return;
-    }
-
-    // Use the provided labels now
-    py::array_t<label_t, py::array::c_style | py::array::forcecast> node_labels(
-        labels);
-    if (node_labels.ndim() != 1 || node_labels.shape(0) != num_vectors) {
-      throw std::invalid_argument("Labels have incorrect dimensions.");
-    }
-
-    for (size_t vec_index = 0; vec_index < num_vectors; vec_index++) {
-      label_t label_id = *node_labels.data(vec_index);
-      this->_index->add(/* data = */ (void *)data.data(vec_index),
-                        /* label = */ label_id,
-                        /* ef_construction = */ ef_construction);
-
-      if (_verbose && vec_index % NUM_LOG_STEPS == 0) {
-        std::clog << "." << std::flush;
-      }
-    }
-    std::clog << std::endl;
-  }
-
-  DistancesLabelsPair
-  search(const py::array_t<float, py::array::c_style | py::array::forcecast>
-             queries,
-         int K, int ef_search) {
-    size_t num_queries = queries.shape(0);
-    size_t queries_dim = queries.shape(1);
-
-    if (queries.ndim() != 2 || queries_dim != _dim) {
-      throw std::invalid_argument("Queries have incorrect dimensions.");
-    }
-
-    label_t *results = new label_t[num_queries * K];
-    float *distances = new float[num_queries * K];
-
-    for (size_t query_index = 0; query_index < num_queries; query_index++) {
-      std::vector<std::pair<float, label_t>> top_k = this->_index->search(
-          /* query = */ (const void *)queries.data(query_index), /* K = */ K,
-          /* ef_search = */ ef_search);
-
-      for (size_t i = 0; i < top_k.size(); i++) {
-        distances[query_index * K + i] = top_k[i].first;
-        results[query_index * K + i] = top_k[i].second;
-      }
-    }
-
-    // Allows to transfer ownership to Python
-    py::capsule free_results_when_done(
-        results, [](void *ptr) { delete (label_t *)ptr; });
-    py::capsule free_distances_when_done(
-        distances, [](void *ptr) { delete (float *)ptr; });
-
-    py::array_t<label_t> labels =
-        py::array_t<label_t>({num_queries, (size_t)K}, // shape of the array
-                             {K * sizeof(label_t), sizeof(label_t)}, // strides
-                             results,               // data pointer
-                             free_results_when_done // capsule
-        );
-
-    py::array_t<float> dists = py::array_t<float>(
-        {num_queries, (size_t)K}, {K * sizeof(float), sizeof(float)}, distances,
-        free_distances_when_done);
-
-    return {dists, labels};
-  }
-};
-
-using L2FlatNavIndex = PythonIndex<SquaredL2Distance, int>;
-using InnerProductFlatNavIndex = PythonIndex<InnerProductDistance, int>;
-
-template <typename IndexType>
-void bindIndexMethods(py::class_<IndexType> &index_class) {
-  index_class
-      .def(
-          "save",
-          [](IndexType &index_type, const std::string &filename) {
-            auto index = index_type.getIndex();
-            index->saveIndex(/* filename = */ filename);
-          },
-          py::arg("filename"),
-          "Save a FlatNav index at the given file location.")
-      .def_static("load", &IndexType::loadIndex, py::arg("filename"),
-                  "Load a FlatNav index from a given file location")
-      .def("add", &IndexType::add, py::arg("data"), py::arg("ef_construction"),
-           py::arg("labels") = py::none(),
-           "Add vectors(data) to the index with the given `ef_construction` "
-           "parameter and optional labels. `ef_construction` determines how "
-           "many "
-           "vertices are visited while inserting every vector in the "
-           "underlying graph structure.")
-      .def("search", &IndexType::search, py::arg("queries"), py::arg("K"),
-           py::arg("ef_search"),
-           "Return top `K` closest data points for every query in the "
-           "provided `queries`. The results are returned as a Tuple of "
-           "distances and label ID's. The `ef_search` parameter determines how "
-           "many neighbors are visited while finding the closest neighbors "
-           "for every query.")
-      .def(
-          "reorder",
-          [](IndexType &index_type, const std::string &algorithm) {
-            auto index = index_type.getIndex();
-            auto alg = algorithm;
-            std::transform(alg.begin(), alg.end(), alg.begin(),
-                           [](unsigned char c) { return std::tolower(c); });
-            if (alg == "gorder") {
-              index->reorderGOrder();
-            } else if (alg == "rcm") {
-              index->reorderRCM();
-            } else {
-              throw std::invalid_argument(
-                  "`" + algorithm +
-                  "` is not a supported graph re-ordering algorithm.");
-            }
-          },
-          py::arg("algorithm"),
-          "Perform graph re-ordering based on the given re-ordering strategy.")
-      .def_property_readonly(
-          "max_edges_per_node",
-          [](IndexType &index_type) {
-            return index_type.getIndex()->maxEdgesPerNode();
-          },
-          "Maximum number of edges(links) per node in the underlying NSW graph "
-          "data structure.");
-}
-
-py::object createIndex(const std::string &distance_type, int dim,
-                       int dataset_size, int max_edges_per_node,
-                       bool verbose = false) {
-  auto dist_type = distance_type;
-  std::transform(dist_type.begin(), dist_type.end(), dist_type.begin(),
-                 [](unsigned char c) { return std::tolower(c); });
-
-  if (dist_type == "l2") {
-    auto distance = std::make_shared<SquaredL2Distance>(/* dim = */ dim);
-    return py::cast(new L2FlatNavIndex(std::move(distance), dim, dataset_size,
-                                       max_edges_per_node, verbose));
-  } else if (dist_type == "angular") {
-    auto distance = std::make_shared<InnerProductDistance>(/* dim = */ dim);
-    return py::cast(new InnerProductFlatNavIndex(
-        std::move(distance), dim, dataset_size, max_edges_per_node, verbose));
-  }
-  throw std::invalid_argument("Invalid distance type: `" + dist_type +
-                              "` during index construction. Valid options "
-                              "include `l2` and `angular`.");
-}
-
-void defineIndexSubmodule(py::module_ &index_submodule) {
-  index_submodule.def("index_factory", &createIndex, py::arg("distance_type"),
-                      py::arg("dim"), py::arg("dataset_size"),
-                      py::arg("max_edges_per_node"), py::arg("verbose") = false,
-                      "Creates a FlatNav index given the corresponding "
-                      "parameters. The `distance_type` argument determines the "
-                      "kind of index created (either L2Index or IPIndex)");
-
-  py::class_<L2FlatNavIndex> l2_index_class(index_submodule, "L2Index");
-  bindIndexMethods(l2_index_class);
-
-  py::class_<InnerProductFlatNavIndex> ip_index_class(index_submodule,
-                                                      "IPIndex");
-  bindIndexMethods(ip_index_class);
-}
-
-PYBIND11_MODULE(flatnav, module) {
-  auto index_submodule = module.def_submodule("index");
-
-  defineIndexSubmodule(index_submodule);
-}
\ No newline at end of file
diff --git a/flatnav_python/pyproject.toml b/flatnav_python/pyproject.toml
index e1cb0fb..9539524 100644
--- a/flatnav_python/pyproject.toml
+++ b/flatnav_python/pyproject.toml
@@ -21,6 +21,9 @@ setuptools = "68.2.2"
 black = "^23.11.0"
 pytest = "^7.4.3"
 numpy = "^1.26.2"
+h5py = "^3.10.0"
+requests = "^2.31.0"
+
 
 
 [build-system]
diff --git a/flatnav_python/python_bindings.cpp b/flatnav_python/python_bindings.cpp
index 793bcb9..2476305 100644
--- a/flatnav_python/python_bindings.cpp
+++ b/flatnav_python/python_bindings.cpp
@@ -1,192 +1,250 @@
 #include <algorithm>
-#include <cctype>
-#include <stdexcept>
-#include <vector>
-
+#include <iostream>
+#include <memory>
+#include <ostream>
 #include <pybind11/numpy.h>
 #include <pybind11/pybind11.h>
-
-#include <flatnav/quantization/ProductQuantizer.h>
+#include <string>
+#include <utility>
+#include <vector>
 
 #include <flatnav/DistanceInterface.h>
 #include <flatnav/Index.h>
+#include <flatnav/distances/InnerProductDistance.h>
 #include <flatnav/distances/SquaredL2Distance.h>
 
-using namespace flatnav;
+using flatnav::DistanceInterface;
 using flatnav::Index;
 using flatnav::InnerProductDistance;
 using flatnav::SquaredL2Distance;
-using flatnav::quantization::ProductQuantizer;
 
 namespace py = pybind11;
 
 template <typename dist_t, typename label_t> class PyIndex {
+  const uint32_t NUM_LOG_STEPS = 10000;
+
 private:
+  int _dim, label_id;
+  bool _verbose;
   Index<dist_t, label_t> *_index;
-  std::unique_ptr<DistanceInterface<dist_t>> _distance;
 
-  size_t _dim;
-  int _added;
+public:
+  typedef std::pair<py::array_t<float>, py::array_t<label_t>>
+      DistancesLabelsPair;
 
-  void setIndexMetric(std::string &metric) {
-    std::transform(metric.begin(), metric.end(), metric.begin(),
-                   [](unsigned char c) { return std::tolower(c); });
+  explicit PyIndex(std::unique_ptr<Index<dist_t, label_t>> index)
+      : _dim(index->dataDimension()), label_id(0), _verbose(false),
+        _index(index.get()) {}
 
-    if (metric == "l2") {
-      _distance = std::make_unique<SquaredL2Distance>(/* dim = */ _dim);
-    } else if (metric == "angular") {
-      _distance = std::make_unique<InnerProductDistance>(/* dim = */ _dim);
-    }
-    throw std::invalid_argument("Invalid metric `" + metric +
-                                "` used during index construction.");
-  }
+  PyIndex(std::shared_ptr<DistanceInterface<dist_t>> distance, int dim,
+          int dataset_size, int max_edges_per_node, bool verbose = false)
+      : _dim(dim), label_id(0), _verbose(verbose),
+        _index(new Index<dist_t, label_t>(
+            /* dist = */ std::move(distance),
+            /* dataset_size = */ dataset_size,
+            /* max_edges_per_node = */ max_edges_per_node)) {}
 
-public:
-  PyIndex(std::string metric_type, size_t dim, int N, int M)
-      : _dim(dim), _added(0) {
-    setIndexMetric(metric_type);
-    _index = new Index<dist_t, label_t>(
-        /* dist = */ std::move(_distance), /* dataset_size = */ N,
-        /* max_edges_per_node = */ M);
-  }
+  Index<dist_t, label_t> *getIndex() { return _index; }
 
-  PyIndex(std::string filename) {
-    _index = new Index<dist_t, label_t>(/* in = */ filename);
+  ~PyIndex() { delete _index; }
+
+  static std::unique_ptr<PyIndex<dist_t, label_t>>
+  loadIndex(const std::string &filename) {
+    auto index = Index<dist_t, label_t>::loadIndex(/* filename = */ filename);
+    return std::make_unique<PyIndex<dist_t, label_t>>(std::move(index));
   }
 
-  void add(py::array_t<float, py::array::c_style | py::array::forcecast> data,
-           int ef_construction, py::object labels_obj = py::none()) {
+  void
+  add(const py::array_t<float, py::array::c_style | py::array::forcecast> &data,
+      int ef_construction, py::object labels = py::none()) {
+    // py::array_t<float, py::array::c_style | py::array::forcecast> means that
+    // the functions expects either a Numpy array of floats or a castable type
+    // to that type. If the given type can't be casted, pybind11 will throw an
+    // error.
+
+    auto num_vectors = data.shape(0);
+    auto data_dim = data.shape(1);
+    if (data.ndim() != 2 || data_dim != _dim) {
+      throw std::invalid_argument("Data has incorrect dimensions.");
+    }
+    if (labels.is_none()) {
+      for (size_t vec_index = 0; vec_index < num_vectors; vec_index++) {
+        this->_index->add(/* data = */ (void *)data.data(vec_index),
+                          /* label = */ label_id,
+                          /* ef_construction = */ ef_construction);
+        if (_verbose && vec_index % NUM_LOG_STEPS == 0) {
+          std::clog << "." << std::flush;
+        }
+        label_id++;
+      }
+      std::clog << std::endl;
+      return;
+    }
 
-    if (data.n_dim() != 2 || data.shape(1) != _dim) {
-      throw std::invalid_argument("Data has incorrect _dimensions");
+    // Use the provided labels now
+    py::array_t<label_t, py::array::c_style | py::array::forcecast> node_labels(
+        labels);
+    if (node_labels.ndim() != 1 || node_labels.shape(0) != num_vectors) {
+      throw std::invalid_argument("Labels have incorrect dimensions.");
     }
 
-    if (labels_obj.is_none()) {
-      for (size_t n = 0; n < data.shape(0); n++) {
-        this->index->add((void *)data.data(n), _added, ef_construction);
-        _added++;
-      }
-    } else {
-      py::array_t<label_t, py::array::c_style | py::array::forcecast> labels(
-          labels_obj);
-      if (labels.n_dim() != 1 || labels.shape(0) != data.shape(0)) {
-        throw std::invalid_argument("Labels have incorrect _dimensions");
-      }
+    for (size_t vec_index = 0; vec_index < num_vectors; vec_index++) {
+      label_t label_id = *node_labels.data(vec_index);
+      this->_index->add(/* data = */ (void *)data.data(vec_index),
+                        /* label = */ label_id,
+                        /* ef_construction = */ ef_construction);
 
-      for (size_t n = 0; n < data.shape(0); n++) {
-        label_t l = *labels.data(n);
-        this->index->add((void *)data.data(n), l, ef_construction);
-        _added++;
+      if (_verbose && vec_index % NUM_LOG_STEPS == 0) {
+        std::clog << "." << std::flush;
       }
     }
+    std::clog << std::endl;
   }
 
-  py::array_t<label_t>
-  search(py::array_t<float, py::array::c_style | py::array::forcecast> queries,
+  DistancesLabelsPair
+  search(const py::array_t<float, py::array::c_style | py::array::forcecast>
+             queries,
          int K, int ef_search) {
-    if (queries.n_dim() != 2 || queries.shape(1) != _dim) {
-      throw std::invalid_argument("Queries have incorrect _dimensions");
-    }
     size_t num_queries = queries.shape(0);
+    size_t queries_dim = queries.shape(1);
+
+    if (queries.ndim() != 2 || queries_dim != _dim) {
+      throw std::invalid_argument("Queries have incorrect dimensions.");
+    }
 
     label_t *results = new label_t[num_queries * K];
+    float *distances = new float[num_queries * K];
+
+    for (size_t query_index = 0; query_index < num_queries; query_index++) {
+      std::vector<std::pair<float, label_t>> top_k = this->_index->search(
+          /* query = */ (const void *)queries.data(query_index), /* K = */ K,
+          /* ef_search = */ ef_search);
 
-    for (size_t q = 0; q < num_queries; q++) {
-      std::vector<std::pair<dist_t, label_t>> topK =
-          this->index->search(queries.data(q), K, ef_search);
-      for (size_t i = 0; i < topK.size(); i++) {
-        results[q * K + i] = topK[i].second;
+      for (size_t i = 0; i < top_k.size(); i++) {
+        distances[query_index * K + i] = top_k[i].first;
+        results[query_index * K + i] = top_k[i].second;
       }
     }
 
-    py::capsule free_when_done(results, [](void *ptr) { delete ptr; });
+    // Allows to transfer ownership to Python
+    py::capsule free_results_when_done(
+        results, [](void *ptr) { delete (label_t *)ptr; });
+    py::capsule free_distances_when_done(
+        distances, [](void *ptr) { delete (float *)ptr; });
 
-    return py::array_t<label_t>({num_queries, (size_t)K},
-                                {K * sizeof(label_t), sizeof(label_t)}, results,
-                                free_when_done);
-  }
-
-  void reorder(std::string alg) {
-    std::transform(alg.begin(), alg.end(), std::tolower);
-
-    if (alg == "gorder") {
-      this->index->reorder_gorder();
-    } else if (alg == "rcm") {
-      this->index->reorder_rcm();
-    } else {
-      throw std::invalid_argument(
-          "'" + alg + "' is not a supported graph re-ordering algorithm.");
-    }
-  }
+    py::array_t<label_t> labels =
+        py::array_t<label_t>({num_queries, (size_t)K}, // shape of the array
+                             {K * sizeof(label_t), sizeof(label_t)}, // strides
+                             results,               // data pointer
+                             free_results_when_done // capsule
+        );
 
-  void save(std::string filename) { this->index->save(filename); }
+    py::array_t<float> dists = py::array_t<float>(
+        {num_queries, (size_t)K}, {K * sizeof(float), sizeof(float)}, distances,
+        free_distances_when_done);
 
-  ~PyIndex() {
-    delete index;
-    delete space;
+    return {dists, labels};
   }
 };
 
-template <typename label_t>
-double ComputeRecall(py::array_t<label_t> results,
-                     py::array_t<label_t> gtruths) {
-  double avg_recall = 0.0;
-  for (size_t q = 0; q < results.shape(0); q++) {
-    double recall = 0.0;
-    const label_t *result = results.data(q);
-    const label_t *topk = gtruths.data(q);
-    for (size_t i = 0; i < results.shape(1); i++) {
-      for (size_t j = 0; j < results.shape(1); j++) {
-        if (result[i] == topk[j]) {
-          recall += 1.0;
-          break;
-        }
-      }
-    }
-    avg_recall += recall;
-  }
-
-  return avg_recall /= (results.shape(0) * results.shape(1));
+using L2FlatNavIndex = PyIndex<SquaredL2Distance, int>;
+using InnerProductFlatNavIndex = PyIndex<InnerProductDistance, int>;
+
+template <typename IndexType>
+void bindIndexMethods(py::class_<IndexType> &index_class) {
+  index_class
+      .def(
+          "save",
+          [](IndexType &index_type, const std::string &filename) {
+            auto index = index_type.getIndex();
+            index->saveIndex(/* filename = */ filename);
+          },
+          py::arg("filename"),
+          "Save a FlatNav index at the given file location.")
+      .def_static("load", &IndexType::loadIndex, py::arg("filename"),
+                  "Load a FlatNav index from a given file location")
+      .def("add", &IndexType::add, py::arg("data"), py::arg("ef_construction"),
+           py::arg("labels") = py::none(),
+           "Add vectors(data) to the index with the given `ef_construction` "
+           "parameter and optional labels. `ef_construction` determines how "
+           "many "
+           "vertices are visited while inserting every vector in the "
+           "underlying graph structure.")
+      .def("search", &IndexType::search, py::arg("queries"), py::arg("K"),
+           py::arg("ef_search"),
+           "Return top `K` closest data points for every query in the "
+           "provided `queries`. The results are returned as a Tuple of "
+           "distances and label ID's. The `ef_search` parameter determines how "
+           "many neighbors are visited while finding the closest neighbors "
+           "for every query.")
+      .def(
+          "reorder",
+          [](IndexType &index_type, const std::string &algorithm) {
+            auto index = index_type.getIndex();
+            auto alg = algorithm;
+            std::transform(alg.begin(), alg.end(), alg.begin(),
+                           [](unsigned char c) { return std::tolower(c); });
+            if (alg == "gorder") {
+              index->reorderGOrder();
+            } else if (alg == "rcm") {
+              index->reorderRCM();
+            } else {
+              throw std::invalid_argument(
+                  "`" + algorithm +
+                  "` is not a supported graph re-ordering algorithm.");
+            }
+          },
+          py::arg("algorithm"),
+          "Perform graph re-ordering based on the given re-ordering strategy. "
+          "Supported re-ordering algorithms include `gorder` and `rcm`.")
+      .def_property_readonly(
+          "max_edges_per_node",
+          [](IndexType &index_type) {
+            return index_type.getIndex()->maxEdgesPerNode();
+          },
+          "Maximum number of edges(links) per node in the underlying NSW graph "
+          "data structure.");
 }
 
-using L2FloatPyIndex = PyIndex<SquaredL2Distance, unsigned int>;
-
-PYBIND11_MODULE(flatnav, m) {
-  py::class_<L2FloatPyIndex>(m, "Index")
-      .def(py::init<std::string, size_t, int, int>(), py::arg("metric"),
-           py::arg("_dim"), py::arg("N"), py::arg("M"))
-      .def(py::init<std::string>(), py::arg("save_loc"))
-      .def("add", &L2FloatPyIndex::add, py::arg("data"),
-           py::arg("ef_construction"), py::arg("labels") = py::none())
-      .def("search", &L2FloatPyIndex::search, py::arg("queries"), py::arg("K"),
-           py::arg("ef_search"))
-      .def("reorder", &L2FloatPyIndex::reorder, py::arg("alg"))
-      .def("save", &L2FloatPyIndex::save, py::arg("filename"));
+py::object createIndex(const std::string &distance_type, int dim,
+                       int dataset_size, int max_edges_per_node,
+                       bool verbose = false) {
+  auto dist_type = distance_type;
+  std::transform(dist_type.begin(), dist_type.end(), dist_type.begin(),
+                 [](unsigned char c) { return std::tolower(c); });
+
+  if (dist_type == "l2") {
+    auto distance = std::make_shared<SquaredL2Distance>(/* dim = */ dim);
+    return py::cast(new L2FlatNavIndex(std::move(distance), dim, dataset_size,
+                                       max_edges_per_node, verbose));
+  } else if (dist_type == "angular") {
+    auto distance = std::make_shared<InnerProductDistance>(/* dim = */ dim);
+    return py::cast(new InnerProductFlatNavIndex(
+        std::move(distance), dim, dataset_size, max_edges_per_node, verbose));
+  }
+  throw std::invalid_argument("Invalid distance type: `" + dist_type +
+                              "` during index construction. Valid options "
+                              "include `l2` and `angular`.");
 }
 
-#include <iostream>
-#include <pybind11/pybind11.h>
-
-namespace py = pybind11;
-
-class Index {
-public:
-  int _m;
-  explicit Index(int num) : _m(num) {}
-
-  int add(int j) { return _m + j; }
-};
-
 void defineIndexSubmodule(py::module_ &index_submodule) {
-  py::class_<Index>(index_submodule, "Index")
-      .def(py::init<int>(), py::arg("num"),
-           "Initializes a naive quantizer (int8) object.")
-      .def("add", &Index::add, py::arg("j"),
-           "Quantizes input vectors based by clipping the bit width.");
+  index_submodule.def("index_factory", &createIndex, py::arg("distance_type"),
+                      py::arg("dim"), py::arg("dataset_size"),
+                      py::arg("max_edges_per_node"), py::arg("verbose") = false,
+                      "Creates a FlatNav index given the corresponding "
+                      "parameters. The `distance_type` argument determines the "
+                      "kind of index created (either L2Index or IPIndex)");
+
+  py::class_<L2FlatNavIndex> l2_index_class(index_submodule, "L2Index");
+  bindIndexMethods(l2_index_class);
+
+  py::class_<InnerProductFlatNavIndex> ip_index_class(index_submodule,
+                                                      "IPIndex");
+  bindIndexMethods(ip_index_class);
 }
 
-PYBIND11_MODULE(flatnav, module_) {
+PYBIND11_MODULE(flatnav, module) {
+  auto index_submodule = module.def_submodule("index");
 
-  auto index_submodule = module_.def_submodule("index");
   defineIndexSubmodule(index_submodule);
-}
+}
\ No newline at end of file
diff --git a/flatnav_python/setup.py b/flatnav_python/setup.py
index 85bc91c..c93e53e 100644
--- a/flatnav_python/setup.py
+++ b/flatnav_python/setup.py
@@ -1,21 +1,11 @@
-# import toml
 import os
-
-# Available at setup time due to pyproject.toml
 from pybind11.setup_helpers import Pybind11Extension, build_ext
 from setuptools import setup
 
-# def parse_version_from_pyproject() -> str:
-#     with open("pyproject.toml") as f:
-#         pyproject = toml.load(f)
-#         return pyproject["tool"]["poetry"]["version"]
-
-#     raise RuntimeError("Unable to find version string.")
-
 __version__ = "0.0.1"
 
 CURRENT_DIR = os.getcwd()
-SOURCE_PATH = os.path.join(CURRENT_DIR, "bindings.cpp")
+SOURCE_PATH = os.path.join(CURRENT_DIR, "python_bindings.cpp")
 
 
 ext_modules = [
@@ -28,9 +18,16 @@
             os.path.join(CURRENT_DIR, ".."),
             os.path.join(CURRENT_DIR, "..", "external", "cereal", "include"),
         ],
-        # Ignoring the `Wno-sign-compare` which warns you when you compare int with something like
-        # uint64_t.
-        extra_compile_args=["-Wno-sign-compare", "-fopenmp"],
+        extra_compile_args=[
+            "-fopenmp",  # Enable OpenMP
+            "-Ofast",  # Use the fastest optimization
+            "-fpic",  # Position-independent code
+            "-w",  # Suppress all warnings (note: this overrides -Wall)
+            "-ffast-math",  # Enable fast math optimizations
+            "-funroll-loops",  # Unroll loops
+            "-ftree-vectorize",  # Vectorize where possible
+        ],
+        extra_link_args=["-fopenmp"],  # Link OpenMP when linking the extension
     )
 ]
 
@@ -44,9 +41,6 @@
     description="Graph kNN with reordering.",
     long_description="",
     ext_modules=ext_modules,
-    # extras_require={"test": "pytest"},
-    # Currently, build_ext only provides an optional "highest supported C++
-    # level" feature, but in the future it may provide more features.
     cmdclass={"build_ext": build_ext},
     zip_safe=False,
     python_requires=">=3.7",
diff --git a/flatnav_python/test_index.py b/flatnav_python/test_index.py
index 76f0d4f..77f7542 100644
--- a/flatnav_python/test_index.py
+++ b/flatnav_python/test_index.py
@@ -1,16 +1,81 @@
 import flatnav
 from flatnav.index import index_factory
 from flatnav.index import L2Index, IPIndex
-from typing import Union
-import pytest
+from typing import Union, Optional
 import numpy as np
-import time 
+import time
+import tempfile
+import h5py
+import requests
+import os
 
 
 def generate_random_data(dataset_length: int, dim: int) -> np.ndarray:
     return np.random.rand(dataset_length, dim)
 
 
+def get_ann_benchmark_dataset(dataset_name):
+    base_uri = "http://ann-benchmarks.com"
+    dataset_uri = f"{base_uri}/{dataset_name}.hdf5"
+
+    with tempfile.TemporaryDirectory() as tmp:
+        response = requests.get(dataset_uri)
+        loc = os.path.join(tmp, dataset_name)
+
+        with open(loc, "wb") as f:
+            f.write(response.content)
+        data = h5py.File(loc, "r")
+
+    training_set = data["train"]
+    queries = data["test"]
+    true_neighbors = data["neighbors"]
+    distances = data["distances"]
+
+    return (
+        np.array(training_set),
+        np.array(queries),
+        np.array(true_neighbors),
+        np.array(distances),
+    )
+
+
+def compute_recall(
+    index, queries: np.ndarray, ground_truth: np.ndarray, ef_search: int, k: int = 100
+):
+    """
+    Compute recall for given queries, ground truth, and a FlatNav index.
+
+    Args:
+        - index: The Faiss index to search.
+        - queries: The query vectors.
+        - ground_truth: The ground truth indices for each query.
+        - k: Number of neighbors to search.
+
+    Returns:
+        Mean recall over all queries.
+    """
+    start = time.time()
+    _, top_k_indices = index.search(queries=queries, ef_search=ef_search, K=k)
+    end = time.time()
+
+    duration = (end - start) / len(queries)
+    print(f"Querying time: {duration * 1000} milliseconds")
+
+    # Convert each ground truth list to a set for faster lookup
+    ground_truth_sets = [set(gt) for gt in ground_truth]
+
+    mean_recall = 0
+
+    for idx, k_neighbors in enumerate(top_k_indices):
+        query_recall = sum(
+            1 for neighbor in k_neighbors if neighbor in ground_truth_sets[idx]
+        )
+        mean_recall += query_recall / k
+
+    recall = mean_recall / len(queries)
+    return recall
+
+
 def create_index(
     distance_type: str, dim: int, dataset_size: int, max_edges_per_node: int
 ) -> Union[L2Index, IPIndex]:
@@ -19,20 +84,18 @@ def create_index(
         dim=dim,
         dataset_size=dataset_size,
         max_edges_per_node=max_edges_per_node,
-        verbose=True
+        verbose=True,
     )
-    if not (
-        isinstance(index, flatnav.index.L2Index)
-        or isinstance(index, flatnav.index.IPIndex)
-    ):
+    if not (isinstance(index, L2Index) or isinstance(index, IPIndex)):
         raise RuntimeError("Invalid index.")
 
     return index
 
 
-def test_flatnav_l2_index():
-    dataset_to_index = generate_random_data(dataset_length=60_000, dim=784)
-    queries = generate_random_data(dataset_length=10_000, dim=784)
+def test_flatnav_l2_index_random_dataset():
+    dataset_to_index = generate_random_data(dataset_length=30_000, dim=784)
+    queries = generate_random_data(dataset_length=5_000, dim=784)
+    ground_truth = np.random.randint(low=0, high=50, size=(5_000, 100))
     index = create_index(
         distance_type="l2",
         dim=dataset_to_index.shape[1],
@@ -43,22 +106,125 @@ def test_flatnav_l2_index():
     assert hasattr(index, "max_edges_per_node")
     assert index.max_edges_per_node == 32
 
-    start = time.time()
-    index.add(data=dataset_to_index, ef_construction=64)
-    end = time.time()
+    run_test(
+        index=index,
+        ef_construction=64,
+        ef_search=32,
+        training_set=dataset_to_index,
+        queries=queries,
+        ground_truth=ground_truth,
+    )
+
+
+def test_flatnav_l2_index_mnist_dataset():
+    training_set, queries, ground_truth, _ = get_ann_benchmark_dataset(
+        dataset_name="mnist-784-euclidean"
+    )
+
+    index = create_index(
+        distance_type="l2",
+        dim=training_set.shape[1],
+        dataset_size=training_set.shape[0],
+        max_edges_per_node=16,
+    )
+
+    assert hasattr(index, "max_edges_per_node")
+    assert index.max_edges_per_node == 16
+
+    run_test(
+        index=index,
+        ef_construction=128,
+        ef_search=256,
+        training_set=training_set,
+        queries=queries,
+        ground_truth=ground_truth,
+        assert_recall_threshold=True,
+        recall_threshold=0.97,
+    )
+
+
+def test_flatnav_ip_index_random_dataset():
+    dataset_to_index = generate_random_data(dataset_length=30_000, dim=225)
+    queries = generate_random_data(dataset_length=5_000, dim=225)
+    ground_truth = np.random.randint(low=0, high=50, size=(5_000, 100))
     
-    print(f"Indexing time = {end - start}")
+    index = create_index(
+        distance_type="angular",
+        dim=dataset_to_index.shape[1],
+        dataset_size=len(dataset_to_index),
+        max_edges_per_node=16,
+    )
 
+    assert hasattr(index, "max_edges_per_node")
+    assert index.max_edges_per_node == 16
+
+    run_test(
+        index=index,
+        ef_construction=64,
+        ef_search=32,
+        training_set=dataset_to_index,
+        queries=queries,
+        ground_truth=ground_truth,
+    )
+    
     
+def test_flatnav_index_with_reordering():
+    training_set, queries, ground_truth, _ = get_ann_benchmark_dataset(
+        dataset_name="mnist-784-euclidean"
+    )
+
+    index = create_index(
+        distance_type="l2",
+        dim=training_set.shape[1],
+        dataset_size=training_set.shape[0],
+        max_edges_per_node=16,
+    )
+
+    assert hasattr(index, "max_edges_per_node")
+    assert index.max_edges_per_node == 16
+
+    run_test(
+        index=index,
+        ef_construction=128,
+        ef_search=256,
+        training_set=training_set,
+        queries=queries,
+        ground_truth=ground_truth,
+        assert_recall_threshold=True,
+        recall_threshold=0.97,
+        use_reordering=True,
+        reordering_algorithm="gorder"
+    )
+
+
+def run_test(
+    index: Union[L2Index, IPIndex],
+    ef_construction: int,
+    ef_search: int,
+    training_set: np.ndarray,
+    queries: np.ndarray,
+    ground_truth: np.ndarray,
+    use_reordering: bool = False,
+    reordering_algorithm: Optional[str] = None,
+    assert_recall_threshold: bool = False,
+    recall_threshold: Optional[float] = None,
+):
     start = time.time()
-    distances, node_ids = index.search(queries=queries, ef_search=64, K=100)
+    index.add(data=training_set, ef_construction=ef_construction)
     end = time.time()
-    print(f"Querying time = {end - start}")
 
-    assert distances.shape == node_ids.shape
+    print(f"Indexing time = {end - start} seconds")
+
+    if use_reordering:
+        if not reordering_algorithm:
+            raise RuntimeError("Re-ordering algorithm must be provided.")
+        index.reorder(algorithm=reordering_algorithm)
 
+    recall = compute_recall(
+        index=index, queries=queries, ground_truth=ground_truth, ef_search=ef_search
+    )
 
-"""
-Indexing time = 693.3694415092468
-Querying time = 48.112215518951416
-"""
\ No newline at end of file
+    if assert_recall_threshold:
+        if not recall_threshold:
+            raise RuntimeError("Recall threshold must be provided.")
+        assert recall >= recall_threshold
diff --git a/tools/query.cpp b/tools/query.cpp
deleted file mode 100644
index 1d52ba0..0000000
--- a/tools/query.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-#include "cnpy.h"
-#include <algorithm>
-#include <chrono>
-#include <cmath>
-#include <flatnav/Index.h>
-#include <flatnav/distances/SquaredL2Distance.h>
-#include <fstream>
-#include <iostream>
-#include <memory>
-#include <random>
-#include <sstream>
-#include <string>
-#include <utility>
-#include <vector>
-
-using flatnav::Index;
-using flatnav::SquaredL2Distance;
-
-std::shared_ptr<Index<SquaredL2Distance, int>>
-buildIndex(float *data, uint32_t dim, uint64_t N, uint32_t max_edges,
-           uint32_t ef_construction) {
-  auto distance = std::make_unique<SquaredL2Distance>(dim);
-  auto index = std::make_shared<Index<SquaredL2Distance, int>>(
-      /* dist = */ std::move(distance), /* dataset_size = */ N,
-      /* max_edges = */ max_edges);
-
-  auto start = std::chrono::high_resolution_clock::now();
-
-  for (int label = 0; label < N; label++) {
-    float *element = data + (dim * label);
-    index->add(/* data = */ (void *)element, /* label = */ label,
-               /* ef_construction */ ef_construction);
-    if (label % 100000 == 0)
-      std::clog << "." << std::flush;
-  }
-  std::clog << std::endl;
-
-  auto stop = std::chrono::high_resolution_clock::now();
-  auto duration =
-      std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
-  std::clog << "Build time: " << (float)duration.count() << " milliseconds"
-            << std::endl;
-  return index;
-}
-
-int main(int argc, char **argv) {
-
-  if (argc < 6) {
-    std::clog << "Usage: " << std::endl;
-    std::clog << "query <data> <space> <queries> <gtruth> <ef_search> <k>";
-    std::clog << " [--nq num_queries] [--reorder_id reorder_id] [--ef_profile "
-                 "ef_profile] [--num_profile num_profile]"
-              << std::endl;
-    std::clog << "Positional arguments:" << std::endl;
-    std::clog << "\t index: Filename for the training data (float32 index)."
-              << std::endl;
-    std::clog << "\t space: Integer distance ID: 0 for L2 distance, 1 for "
-                 "inner product (angular distance)."
-              << std::endl;
-    std::clog << "\t queries: Filename for queries (float32 file)."
-              << std::endl;
-    std::clog << "\t gtruth: Filename for ground truth (int32 file)."
-              << std::endl;
-
-    std::clog << "\t k: Number of neighbors to return." << std::endl;
-
-    std::clog << "Optional arguments:" << std::endl;
-    std::clog << "\t [--nq num_queries]: (Optional, default 0) Number of "
-                 "queries to use. If 0, uses all queries."
-              << std::endl;
-    std::clog << "\t [--reorder_id reorder_id]: (Optional, default 0) Which "
-                 "reordering algorithm to use? 0:none 1:gorder 2:indegsort "
-                 "3:outdegsort 4:RCM 5:hubsort 6:hubcluster 7:DBG 8:corder "
-                 "91:profiled_gorder 94:profiled_rcm 41:RCM+gorder"
-              << std::endl;
-    std::clog << "\t [--ef_profile ef_profile]: (Optional, default 100) "
-                 "ef_search parameter to use for profiling."
-              << std::endl;
-    std::clog << "\t [--num_profile num_profile]: (Optional, default 1000) "
-                 "Number of queries to use for profiling."
-              << std::endl;
-    return -1;
-  }
-
-  // Optional arguments.
-  int num_queries = 10000;
-  bool reorder = false;
-  int reorder_ID = 0;
-  int ef_profile = 100;
-  int num_profile = 1000;
-
-  std::string train_file =
-      "data/mnist-784-euclidean/mnist-784-euclidean.train.npy";
-  std::string queries_file =
-      "data/mnist-784-euclidean/mnist-784-euclidean.test.npy";
-  std::string groundtruth_file =
-      "data/mnist-784-euclidean/mnist-784-euclidean.gtruth.npy";
-
-  for (int i = 0; i < argc; ++i) {
-    if (std::strcmp("--nq", argv[i]) == 0) {
-      if ((i + 1) < argc) {
-        num_queries = std::stoi(argv[i + 1]);
-      } else {
-        std::cerr << "Invalid argument for optional parameter --nq"
-                  << std::endl;
-        return -1;
-      }
-    }
-    if (std::strcmp("--reorder_id", argv[i]) == 0) {
-      if ((i + 1) < argc) {
-        reorder_ID = std::stoi(argv[i + 1]);
-      } else {
-        std::cerr << "Invalid argument for optional parameter --reorder_id"
-                  << std::endl;
-        return -1;
-      }
-    }
-    if (std::strcmp("--ef_profile", argv[i]) == 0) {
-      if ((i + 1) < argc) {
-        ef_profile = std::stoi(argv[i + 1]);
-      } else {
-        std::cerr << "Invalid argument for optional parameter --ef_profile"
-                  << std::endl;
-        return -1;
-      }
-    }
-    if (std::strcmp("--num_profile", argv[i]) == 0) {
-      if ((i + 1) < argc) {
-        num_profile = std::stoi(argv[i + 1]);
-      } else {
-        std::cerr << "Invalid argument for optional parameter --num_profile"
-                  << std::endl;
-        return -1;
-      }
-    }
-  }
-  // Positional arguments.
-  std::string indexfilename(train_file); // Index filename.
-  int space_ID = 0;                      // Space ID for querying.
-
-  // Load queries.
-  std::clog << "[INFO] Loading queries." << std::endl;
-  cnpy::NpyArray queries_array = cnpy::npy_load(queries_file);
-  float *queries = queries_array.data<float>();
-
-  // Load ground truth.
-  std::clog << "[INFO] Loading ground truth." << std::endl;
-  cnpy::NpyArray gtruth_array = cnpy::npy_load(groundtruth_file);
-  uint32_t *gtruth = gtruth_array.data<uint32_t>();
-
-  // EF search vector.
-  std::vector<int> ef_searches{100};
-
-  // Number of search results.
-  int k = 100;
-
-  std::clog << "[INFO] Loading training data." << std::endl;
-  cnpy::NpyArray train_data_array = cnpy::npy_load(train_file);
-  float *data = train_data_array.data<float>();
-
-  std::clog << "[INFO] Building index from " << indexfilename << std::endl;
-
-  uint32_t dim = 784;
-  auto index = buildIndex(/* data = */ data, /* dim = */ dim, /* N = */ 60000,
-                          /* max_edges = */ 16, /* ef_construction = */ 200);
-
-  // Do reordering, if necessary.
-  if (num_profile > num_queries) {
-    std::clog << "Warning: Number of profiling queries (" << num_profile
-              << ") is greater than number of queries (" << num_queries << ")!"
-              << std::endl;
-    num_profile = num_queries;
-  }
-  if (reorder) {
-    std::clog << "Using GORDER" << std::endl;
-    std::clog << "Reordering: " << std::endl;
-    auto start_r = std::chrono::high_resolution_clock::now();
-    index->reorder_gorder();
-    auto stop_r = std::chrono::high_resolution_clock::now();
-    auto duration_r =
-        std::chrono::duration_cast<std::chrono::milliseconds>(stop_r - start_r);
-    std::clog << "Reorder time: " << (float)(duration_r.count()) / (1000.0)
-              << " seconds" << std::endl;
-  } else {
-    std::clog << "No reordering" << std::endl;
-  }
-
-  int num_gtruth_entries = 100;
-
-  // Now, finally, do the actual search.
-  std::cout << "recall, mean_latency_ms" << std::endl;
-  for (int &ef_search : ef_searches) {
-    double mean_recall = 0;
-
-    auto start_q = std::chrono::high_resolution_clock::now();
-    for (int i = 0; i < num_queries; i++) {
-      float *query = queries + dim * i;
-      uint32_t *g = gtruth + num_gtruth_entries * i;
-
-      std::vector<std::pair<float, int>> result = index->search(
-          /* query = */ query, /* K = */ k, /* ef_search = */ ef_search);
-
-      double recall = 0;
-      for (int j = 0; j < k; j++) {
-        for (int l = 0; l < k; l++) {
-          if (static_cast<uint32_t>(result[j].second) == g[l]) {
-            recall += 1;
-            break;
-          }
-        }
-      }
-      recall /= k;
-      mean_recall = mean_recall + recall;
-    }
-    auto stop_q = std::chrono::high_resolution_clock::now();
-    auto duration_q =
-        std::chrono::duration_cast<std::chrono::milliseconds>(stop_q - start_q);
-    std::cout << "[INFO] recall: " << mean_recall / num_queries << std::endl;
-    std::cout << "[INFO] mean_latency_ms: "
-              << (float)(duration_q.count()) / num_queries << std::endl;
-  }
-
-  return 0;
-}
\ No newline at end of file
diff --git a/tools/query_npy.cpp b/tools/query_npy.cpp
index fb4a4d7..fad19f8 100644
--- a/tools/query_npy.cpp
+++ b/tools/query_npy.cpp
@@ -34,7 +34,7 @@ void run(float *queries, int *gtruth, const std::string &index_filename,
   if (reorder) {
     std::clog << "[INFO] Gorder Reordering: " << std::endl;
     auto start_r = std::chrono::high_resolution_clock::now();
-    index->reorder_gorder();
+    index->reorderGOrder();
     auto stop_r = std::chrono::high_resolution_clock::now();
     auto duration_r =
         std::chrono::duration_cast<std::chrono::milliseconds>(stop_r - start_r);