diff --git a/.github/workflows/wheels.yaml b/.github/workflows/wheels.yaml
new file mode 100644
index 0000000..e69de29
diff --git a/.gitignore b/.gitignore
index c850efb..6ceb4e4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,7 +12,11 @@ build
 .env 
 
 # Python wheel related folders/files
-flatnav_pyton/flatnav.egg-info
+flatnav_python/flatnav.egg-info/
+flatnav_python/poetry.lock
+flatnav_python/dist
+flatnav_python/__pycache__
+
 
 # other files
 data/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4beee7b..3370110 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,34 +21,32 @@ set(CMAKE_CXX_STANDARD 17)
 # I added compiler flags for ASan (address sanitizer). It is supposed to be very
 # fast, but if we find it slow, we can remove it for good or use compiler
 # directives to skip analyzing functions __attribute__((no_sanitize_address))
-
+# https://clang.llvm.org/docs/AddressSanitizer.html Compiler flags
 set(CMAKE_CXX_FLAGS
     "${CMAKE_CXX_FLAGS} \
-    -Xclang -std=c++17 \
-    -Wall -Ofast \
+    -std=c++17 \
+    -Ofast \
     -DHAVE_CXX0X \
     -DNDEBUG \
-    -openmp \
-    -L/opt/homebrew/opt/libomp/lib \
-    -I/opt/homebrew/opt/libomp/include \
-    -lomp \
+    -fopenmp \
     -fpic \
     -w \
     -ffast-math \
     -funroll-loops \
-    -ftree-vectorize \
-    -g \
-    -fsanitize=address")
+    -ftree-vectorize")
 
-# set(OpenMP_CXX_FLAGS "-fopenmp") set(OpenMP_CXX_LIB_NAMES "omp")
-# link_libraries(omp)
+option(CMAKE_BUILD_TYPE "Build type" Release)
+if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+  # Add debug compile flags
+  message(STATUS "Building in Debug mode")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Wall -fsanitize=address")
+endif()
 
 include(ExternalProject)
 include(FeatureSummary)
 include(FetchContent)
 
 find_package(Git REQUIRED)
-# find_package(OpenMP REQUIRED)
 
 option(USE_GIT_PROTOCOL
        "If behind a firewall turn this off to use HTTPS instead." OFF)
@@ -127,12 +125,13 @@ include_directories(${PROJECT_BINARY_DIR}/ep/include)
 
 set(CNPY_LIB ${PROJECT_BINARY_DIR}/ep/lib/libcnpy.a)
 
-find_package(OpenMP)
+find_package(OpenMP REQUIRED)
 if(OpenMP_FOUND)
   message(STATUS "OpenMP Found. Building the Package using the system OpenMP.")
 else()
   message(
-    "OpenMP Not Found. Building the Package using LLVM's OpenMP. This is slower than the system OpenMP."
+    FATAL_ERROR
+      "OpenMP Not Found. Building the Package using LLVM's OpenMP. This is slower than the system OpenMP."
   )
 endif(OpenMP_FOUND)
 
@@ -217,7 +216,6 @@ set(HEADERS
     ${PROJECT_SOURCE_DIR}/flatnav/distances/inner_products_from_hnswlib.h
     ${PROJECT_SOURCE_DIR}/flatnav/distances/SquaredL2Distance.h
     ${PROJECT_SOURCE_DIR}/flatnav/distances/SquaredL2DistanceSpecializations.h
-    ${PROJECT_SOURCE_DIR}/flatnav/distances/SQDistance.h
     ${PROJECT_SOURCE_DIR}/flatnav/util/ExplicitSet.h
     ${PROJECT_SOURCE_DIR}/flatnav/util/GorderPriorityQueue.h
     ${PROJECT_SOURCE_DIR}/flatnav/util/reordering.h
@@ -237,8 +235,7 @@ set_target_properties(FLAT_NAV_LIB PROPERTIES LINKER_LANGUAGE CXX)
 
 if(BUILD_EXAMPLES)
   message(STATUS "Building examples for Flatnav")
-  foreach(CONSTRUCT_EXEC construct_npy query query_npy
-                         cereal_tests)
+  foreach(CONSTRUCT_EXEC construct_npy query_npy cereal_tests)
     add_executable(${CONSTRUCT_EXEC}
                    ${PROJECT_SOURCE_DIR}/tools/${CONSTRUCT_EXEC}.cpp ${HEADERS})
     add_dependencies(${CONSTRUCT_EXEC} FLAT_NAV_LIB)
diff --git a/README.md b/README.md
index 7c23386..c5ffa5d 100644
--- a/README.md
+++ b/README.md
@@ -70,51 +70,13 @@ correct distance is computed.
 The most straightforward way to include a new dataset for this evaluation is to put it into either the ANN-Benchmarks (NPY) format or to put it into the Big ANN-Benchmarks format. The NPY format requires a float32 2-D Numpy array for the train and test sets and an integer array for the ground truth. The Big ANN-Benchmarks format uses the following binary representation. For the train and test data, there is a 4-byte little-endian unsigned integer number of points followed by a 4-byte little-endian unsigned integer number of dimensions. This is followed by a flat list of `num_points * num_dimensions` values, where each value is a 32-bit float or an 8-bit integer (depending on the dataset type). The ground truth files consist of a 32-bit integer number of queries, followed by a 32-bit integer number of ground truth results for each query. This is followed by a flat list of ground truth results.
 
 
-## Python Binding Instructions
-We also provide python bindings for a subset of index types. This is very much a work in progress - the default build may or may not work with a given Pyton configuration. While we've successfully built the bindings on Windows, Linux and MacOS, this will still probably require some customization of the build system. To begin with, follow these instructions:
+## Python Binding Instructions 
+We also provide python bindings for a subset of index types. We've successfully built the bindings on Linux and MacOS, and if there is interest,
+we can also support Windows. To generate the python bindings you will need a stable installation of [poetry](https://python-poetry.org/). 
 
-1. `$ cd python_bindings`
-2. `$ make python-bindings`
-3. `$ export PYTHONPATH=$(pwd)/build:$PYTHONPATH`
-4. `$ python3 python_bindings/test.py`
+Then, follow instructions [here](/flatnav_python/README.md) on how to build the library. There are also examples for how to use the library 
+to build an index and run queries on top of it [here](/flatnav_python/test_index.py).
 
-You are likely to encounter compilation issues depending on your Python configuration. See below for notes and instructions on how to get this working.
-
-### Note on python bindings: 
-The python bindings require pybind11 to compile. This can be installed with `pip3 install pybind11`. The command `python3 -m pybind11 --includes` which is included in the Makefile gets the correct include flags for the `pybind11/pybind11.h` header file, as well as the include flags for the `Python.h` header file. On most Linux platforms, the paths in the Makefile should point to the correct include directories for this to work (for the system Python). If the `Python.h` file is not located at the specified include paths (e.g. for a non-system Python installation), then another include path may need to be added (specified by the PYTHON_INC_FLAGS variable in the Makefile). The headers may also need to be installed with `$ sudo apt-get install python3-dev`. 
-
-If you encounter the following error:
-
-`ld: can't open output file for writing: ../build/flatnav.so, errno=2 for architecture x86_64`
-
-The reason is likely that you forgot to make the build directory. Run `mkdir build` in the top-level flatnav directory and re-build the Python bindings.
-
-### Special Instructions for MacOS
-
-On MacOS, the default installation directory (`/usr/lib`) is where the global, system Python libraries are located, but this is often not where we want to perform the installation. If the user has installed their own (non-system) version of Python via Homebrew or a similar tool, the actual Python libraries will be located somewhere else. This will result in many errors similar to the following:
-
-```
-Undefined symbols for architecture x86_64:
-  "_PyBaseObject_Type...
-```
-
-This happens because homebrew does not install into the global installation directory, and we need to explicitly link the libpython object files on MacOS. To fix it, you will need the location of `libpython*.dylib` (where `*` stands in for the Python version). To find them, run 
-
-`sudo find / -iname "libpython*"`
-
-And pick the one corresponding to the version of Python you use. Once you've located the library, add the following to the Makefile:
-
-`PYTHON_LINK_FLAGS := -L /path/to/directory/containing/dylib/ -lpythonX.Y`
-
-For example, on an Intel MacBook, I installed Python 3.9 using Homebrew and found:
-
-`/usr/local/Cellar/python@3.9/3.9.4/Frameworks/Python.framework/Versions/3.9/lib/libpython3.9.dylib`
-
-This means that my link flags are:
-
-`PYTHON_LINK_FLAGS := -L /usr/local/Cellar/python@3.9/3.9.4/Frameworks/Python.framework/Versions/3.9/lib/python3.9/config-3.9-darwin/ -lpython3.9`
-
-If you installed Python in some other place (or if you use the system Python on MacOS), you will probably have a different, non-standard location for `libpython.dylib`. Note that building python bindings on M1 Macs is a work-in-progress, given the switch from x86 to arm64. 
 
 
 
diff --git a/bin/build.sh b/bin/build.sh
index 4076481..27fed72 100755
--- a/bin/build.sh
+++ b/bin/build.sh
@@ -1,9 +1,13 @@
 #!/bin/bash 
 
+# Make sure we are at the root directory
+cd "$(dirname "$0")/.."
+
 BUILD_TESTS=OFF
 BUILD_EXAMPLES=OFF 
 BUILD_BENCHMARKS=OFF
 MAKE_VERBOSE=0
+CMAKE_BUILD_TYPE=Release
 
 function print_usage() {
     echo "Usage ./build.sh [OPTIONS]"
@@ -13,6 +17,7 @@ function print_usage() {
     echo "  -e, --examples:     Build examples"
     echo "  -v, --verbose:      Make verbose"
     echo "  -b, --benchmark:    Build benchmarks"
+    echo "  -bt, --build_type:  Build type (Debug, Release, RelWithDebInfo, MinSizeRel)"
     echo "  -h, --help:         Print this help message"
     echo ""
     echo "Example Usage:"
@@ -22,18 +27,19 @@ function print_usage() {
 
 function check_clang_installed() {
     if [[ ! -x "$(command -v clang)" ]]; then
-        echo "clang is not installed. You should have clang installed first.Exiting..."
-        exit 1
+        echo "clang is not installed. Installing it..."
+        ./bin/install_clang.sh
     fi
 }
 
-# Process the options and arguments 
+# Process the options and arguments     
 while [[ "$#" -gt 0 ]]; do
     case $1 in 
         -t|--tests) BUILD_TESTS=ON; shift ;;
         -e|--examples) BUILD_EXAMPLES=ON; shift ;; 
         -v|--verbose) MAKE_VERBOSE=1; shift ;;
         -b|--benchmark) BUILD_BENCHMARKS=ON; shift ;;
+        -bt|--build_type) CMAKE_BUILD_TYPE=$2; shift; shift ;;
         *) print_usage ;;
     esac 
 done
@@ -49,6 +55,8 @@ if [[ "$(uname)" == "Darwin" ]]; then
     echo "Using LLVM clang"
     export CC=/opt/homebrew/opt/llvm/bin/clang
     export CXX=/opt/homebrew/opt/llvm/bin/clang++
+    export LDFLAGS="-L/opt/homebrew/opt/libomp/lib"
+    export CPPFLAGS="-I/opt/homebrew/opt/libomp/include"
 elif [[ "$(uname)" == "Linux" ]]; then
     echo "Using system clang"
 else
@@ -60,7 +68,10 @@ echo "Using CC=${CC} and CXX=${CXX} compilers for building."
 
 mkdir -p build 
 cd build && cmake \
+                -DCMAKE_C_COMPILER=${CC} \
+                -DCMAKE_CXX_COMPILER=${CXX} \
+                -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
                 -DBUILD_TESTS=${BUILD_TESTS} \
                 -DBUILD_EXAMPLES=${BUILD_EXAMPLES} \
-                -DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} ..  
+                -DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} .. 
 make -j VERBOSE=${MAKE_VERBOSE}
\ No newline at end of file
diff --git a/bin/install_clang.sh b/bin/install_clang.sh
index 380cc69..5921414 100755
--- a/bin/install_clang.sh
+++ b/bin/install_clang.sh
@@ -5,24 +5,44 @@ command_exists() {
     type "$1" &> /dev/null ;
 }
 
+function install_clang_mac() {
+    # Install clang and clang-format on Darwin
+    if ! command_exists brew; then
+        echo "Homebrew not found. Homebrew should be installed first."
+        exit 1
+    fi 
+    brew install llvm 
+}
+
+function install_clang_linux() {
+    # Install clang and clang-format on Linux
+    if ! command_exists apt; then
+        echo "apt not found. apt should be installed first."
+        exit 1
+    fi 
+    echo "Installing clang and clang-format..."
+    sudo apt update
+    sudo apt install -y clang clang-format 
+}
+
+
 # Check for clang
 if ! command_exists clang++; then
     echo "clang++ not found. Installing..."
-    sudo apt update
-    sudo apt install -y clang
-else
-    echo "clang++ already installed."
-fi
 
-if ! command_exists clang-format; then
-    echo "clang-format not found. Installing..."
-    sudo apt update
-    sudo apt install -y clang-format
+    if [[ "$(uname)" == "Darwin" ]]; then
+        install_clang_mac
+    elif [[ "$(uname)" == "Linux" ]]; then
+        install_clang_linux
+    else
+        echo "Unsupported OS."
+        exit 1
+    fi
 else
-    echo "clang-format already installed."
-fi
+    echo "clang/clang++ already installed."
+fi 
 
-# Check for libomp-dev
+# Check for libomp-dev. This is required for OpenMP support.
 PKG_STATUS=$(dpkg-query -W --showformat='${Status}\n' libomp-dev | grep "install ok installed")
 if [ "" == "$PKG_STATUS" ]; then
     echo "libomp-dev not found. Installing..."
diff --git a/bin/run_anns.sh b/bin/run_anns.sh
deleted file mode 100755
index 9678674..0000000
--- a/bin/run_anns.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/bin/bash
-
-# If filename called `sift_128.index` exists, delete it first
-if [ -f mnist_784.index ]; then
-    rm mnist_784.index
-fi
-
-if [ -f sift_128.index ]; then
-    rm sift_128.index
-fi
-
-if [ -f glove_25.index ]; then
-    rm glove_25.index
-fi
-
-# if [ -f gist_960.index ]; then
-#     rm gist_960.index
-# fi
-
-# if [ -f deep1b_96.index ]; then
-#     rm deep1b_96.index
-# fi
-
-# Build the index for MNIST 
-# build/construct_npy 1 0 data/mnist-784-euclidean/mnist-784-euclidean.train.npy 16 128 mnist_784.index
-
-# # Query MNIST
-# build/query_npy 0 mnist_784.index data/mnist-784-euclidean/mnist-784-euclidean.test.npy data/mnist-784-euclidean/mnist-784-euclidean.gtruth.npy 256 100 0 1
-
-# # Query MNIST with reordering
-# build/query_npy 0 mnist_784.index data/mnist/mnist-784-euclidean.test.npy data/mnist/mnist-784-euclidean.gtruth.npy 256,512 100 1
-
-# Build the index
-# build/construct_npy 1 0 data/sift/sift-128-euclidean.train.npy 16 128 sift_128.index
-
-# # Query 
-# build/query_npy 0 sift_128.index data/sift/sift-128-euclidean.test.npy data/sift/sift-128-euclidean.gtruth.npy 256,512 100 0
-
-# Build the index for GloVe
-build/construct_npy 1 1 data/glove/glove-25-angular.train.npy 16 128 glove_25.index
-
-# Query GloVe without reordering 
-build/query_npy 1 glove_25.index data/glove/glove-25-angular.test.npy data/glove/glove-25-angular.gtruth.npy 128,256 100 0 1
-
-# # Query GloVe with reordering
-# build/query_npy 1 glove_25.index data/glove/glove-25-angular.test.npy data/glove/glove-25-angular.gtruth.npy 256,512 100 1
-
-# Build the index for GIST
-# build/construct_npy 0 0 data/gist/gist-960-euclidean.train.npy 32 128 gist_960.index
-
-# # Query GIST without reordering
-# build/query_npy 0 gist_960.index data/gist/gist-960-euclidean.test.npy data/gist/gist-960-euclidean.gtruth.npy 128,256 100 0
-
-# # Query GIST with reordering
-# echo "querying with re-ordering \n"
-# build/query_npy 0 gist_960.index data/gist/gist-960-euclidean.test.npy data/gist/gist-960-euclidean.gtruth.npy 128,256 100 1
-
-
-# Build the index for DEEP1B
-# build/construct_npy 0 1 data/deep1b/deep-image-96-angular.train.npy 32 128 deep1b_96.index
-
-# # Query DEEP1B without reordering
-# build/query_npy 1 deep1b_96.index data/deep1b/deep-image-96-angular.test.npy data/deep1b/deep-image-96-angular.gtruth.npy 128,256 100 0
-
-# # Query DEEP1B with reordering
-# build/query_npy 1 deep1b_96.index data/deep1b/deep-image-96-angular.test.npy data/deep1b/deep-image-96-angular.gtruth.npy 128,256 100 1
\ No newline at end of file
diff --git a/flatnav/Index.h b/flatnav/Index.h
index 5a6e079..6f9e766 100644
--- a/flatnav/Index.h
+++ b/flatnav/Index.h
@@ -115,7 +115,7 @@ template <typename dist_t, typename label_t> class Index {
     return results;
   }
 
-  void reorder_gorder(const int window_size = 5) {
+  void reorderGOrder(const int window_size = 5) {
     std::vector<std::vector<node_id_t>> outdegree_table(_cur_num_nodes);
     for (node_id_t node = 0; node < _cur_num_nodes; node++) {
       node_id_t *links = getNodeLinks(node);
@@ -125,12 +125,12 @@ template <typename dist_t, typename label_t> class Index {
         }
       }
     }
-    std::vector<node_id_t> P = g_order<node_id_t>(outdegree_table, window_size);
+    std::vector<node_id_t> P = gOrder<node_id_t>(outdegree_table, window_size);
 
     relabel(P);
   }
 
-  void reorder_rcm() {
+  void reorderRCM() {
     // TODO: Remove code duplication for outdegree_table.
     std::vector<std::vector<node_id_t>> outdegree_table(_cur_num_nodes);
     for (node_id_t node = 0; node < _cur_num_nodes; node++) {
@@ -141,7 +141,7 @@ template <typename dist_t, typename label_t> class Index {
         }
       }
     }
-    std::vector<node_id_t> P = rcm_order<node_id_t>(outdegree_table);
+    std::vector<node_id_t> P = rcmOrder<node_id_t>(outdegree_table);
     relabel(P);
   }
 
@@ -198,6 +198,7 @@ template <typename dist_t, typename label_t> class Index {
 
   inline char *indexMemory() const { return _index_memory; }
   inline size_t currentNumNodes() const { return _cur_num_nodes; }
+  inline size_t dataDimension() const { return _distance->dimension(); }
 
   void printIndexParams() const {
     std::cout << "\nIndex Parameters" << std::endl;
@@ -442,7 +443,7 @@ template <typename dist_t, typename label_t> class Index {
       // now do the back-connections (a little tricky)
       node_id_t *neighbor_node_links = getNodeLinks(neighbor_node_id);
       bool is_inserted = false;
-      for (int j = 0; j < _M; j++) {
+      for (size_t j = 0; j < _M; j++) {
         if (neighbor_node_links[j] == neighbor_node_id) {
           // If there is a self-loop, replace the self-loop with
           // the desired link.
@@ -464,7 +465,7 @@ template <typename dist_t, typename label_t> class Index {
 
         PriorityQueue candidates;
         candidates.emplace(max_dist, new_node_id);
-        for (int j = 0; j < _M; j++) {
+        for (size_t j = 0; j < _M; j++) {
           if (neighbor_node_links[j] != neighbor_node_id) {
             auto label = neighbor_node_links[j];
             auto distance =
@@ -475,7 +476,7 @@ template <typename dist_t, typename label_t> class Index {
         }
         selectNeighbors(candidates);
         // connect the pruned set of candidates, including self-loops:
-        int j = 0;
+        size_t j = 0;
         while (candidates.size() > 0) { // candidates
           neighbor_node_links[j] = candidates.top().second;
           candidates.pop();
diff --git a/flatnav/distances/SQDistance.h b/flatnav/distances/SQDistance.h
deleted file mode 100644
index b63a188..0000000
--- a/flatnav/distances/SQDistance.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#pragma once
-#include "../DistanceInterface.h"
-#include <cstddef> // for size_t
-
-// This implements the quantized distance functions from:
-// "Low-Precision Quantization for Efficient Nearest Neighbor Search" by
-// Ko, Lakshman, Keivanloo and Schkufza (https://arxiv.org/abs/2110.08919).
-namespace flatnav {
-
-class SquaredL2Distance : public DistanceInterface<SquaredL2Distance> {
-  friend class DistanceInterface<SquaredL2Distance>;
-  static const int distance_id = 0;
-
-public:
-  SquaredL2Distance(size_t dim) {
-    _dimension = dim;
-    _data_size_bytes = dim * sizeof(float);
-  }
-
-private:
-  size_t _dimension;
-  size_t _data_size_bytes;
-
-  float distanceImpl(const void *x, const void *y) {
-    // Default implementation of squared-L2 distance, in case we cannot
-    // support the SIMD specializations for special input _dimension sizes.
-    float *p_x = (float *)x;
-    float *p_y = (float *)y;
-    float squared_distance = 0;
-
-    for (size_t i = 0; i < _dimension; i++) {
-      float difference = *p_x - *p_y;
-      p_x++;
-      p_y++;
-      squared_distance += difference * difference;
-    }
-    return squared_distance;
-  }
-
-  size_t dataSizeImpl() { return _data_size_bytes; }
-
-  void transformDataImpl(void *destination, const void *src) {
-    std::memcpy(destination, src, _data_size_bytes);
-  }
-
-  void serializeImpl(std::ofstream &out) {
-    // TODO: Make this safe across machines and compilers.
-    out.write(reinterpret_cast<const char *>(&distance_id), sizeof(int));
-    out.write(reinterpret_cast<char *>(&_dimension), sizeof(size_t));
-  }
-
-  void deserializeImpl(std::ifstream &in) {
-    // TODO: Make this safe across machines and compilers.
-    int distance_id_check;
-    in.read(reinterpret_cast<char *>(&distance_id_check), sizeof(int));
-    if (distance_id_check != distance_id) {
-      throw std::invalid_argument(
-          "Error reading distance metric: Distance ID does not match "
-          "the ID of the deserialized distance instance.");
-    }
-    in.read(reinterpret_cast<char *>(&_dimension), sizeof(size_t));
-    _data_size_bytes = _dimension * sizeof(float);
-  }
-};
-
-} // namespace flatnav
\ No newline at end of file
diff --git a/flatnav/util/reordering.h b/flatnav/util/reordering.h
index b8bfa64..a5a3979 100644
--- a/flatnav/util/reordering.h
+++ b/flatnav/util/reordering.h
@@ -25,7 +25,7 @@ namespace flatnav {
 
 template <typename node_id_t>
 std::vector<node_id_t>
-g_order(std::vector<std::vector<node_id_t>> &outdegree_table, const int w) {
+gOrder(std::vector<std::vector<node_id_t>> &outdegree_table, const int w) {
   /* Simple explanation of the Gorder Algorithm:
   insert all v into Q each with priority 0
   select a start node into P
@@ -119,7 +119,7 @@ g_order(std::vector<std::vector<node_id_t>> &outdegree_table, const int w) {
 
 template <typename node_id_t>
 std::vector<node_id_t>
-rcm_order(std::vector<std::vector<node_id_t>> &outdegree_table) {
+rcmOrder(std::vector<std::vector<node_id_t>> &outdegree_table) {
 
   int cur_num_nodes = outdegree_table.size();
   std::vector<std::pair<node_id_t, int>> sorted_nodes;
diff --git a/flatnav_python/README.md b/flatnav_python/README.md
new file mode 100644
index 0000000..d253544
--- /dev/null
+++ b/flatnav_python/README.md
@@ -0,0 +1,19 @@
+
+
+## Building the FlatNav Python Library 
+
+
+First, if you are on a Linux machine (e.g. Ubuntu), ensure that you have the header files and static libraries
+for python dev. To install them on Ubuntu, run 
+
+```shell
+> sudo apt-get install python3-dev
+```
+
+To build the wheel file and pip-install it, run
+
+```shell
+> cd flatnav_python
+> poetry install --no-dev
+> ./install_flatnav.sh 
+```
\ No newline at end of file
diff --git a/flatnav_python/flatnav.egg-info/PKG-INFO b/flatnav_python/flatnav.egg-info/PKG-INFO
deleted file mode 100644
index c95a358..0000000
--- a/flatnav_python/flatnav.egg-info/PKG-INFO
+++ /dev/null
@@ -1,8 +0,0 @@
-Metadata-Version: 2.1
-Name: flatnav
-Version: 0.0.1
-Summary: Graph kNN with reordering.
-Home-page: https://randorithms.com
-Author: Benjamin Coleman
-Author-email: benjamin.ray.coleman@gmail.com
-Requires-Python: >=3.7
diff --git a/flatnav_python/flatnav.egg-info/SOURCES.txt b/flatnav_python/flatnav.egg-info/SOURCES.txt
deleted file mode 100644
index 2dd80af..0000000
--- a/flatnav_python/flatnav.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-pyproject.toml
-setup.py
-/Users/blaisemunyampirwa/Downloads/flatnav-experimental/flatnav_python/python_bindings.cpp
-flatnav.egg-info/PKG-INFO
-flatnav.egg-info/SOURCES.txt
-flatnav.egg-info/dependency_links.txt
-flatnav.egg-info/not-zip-safe
-flatnav.egg-info/top_level.txt
\ No newline at end of file
diff --git a/flatnav_python/flatnav.egg-info/dependency_links.txt b/flatnav_python/flatnav.egg-info/dependency_links.txt
deleted file mode 100644
index 8b13789..0000000
--- a/flatnav_python/flatnav.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/flatnav_python/flatnav.egg-info/not-zip-safe b/flatnav_python/flatnav.egg-info/not-zip-safe
deleted file mode 100644
index 8b13789..0000000
--- a/flatnav_python/flatnav.egg-info/not-zip-safe
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/flatnav_python/flatnav.egg-info/top_level.txt b/flatnav_python/flatnav.egg-info/top_level.txt
deleted file mode 100644
index 8c4f559..0000000
--- a/flatnav_python/flatnav.egg-info/top_level.txt
+++ /dev/null
@@ -1 +0,0 @@
-flatnav
diff --git a/flatnav_python/install_flatnav.sh b/flatnav_python/install_flatnav.sh
new file mode 100755
index 0000000..95adef0
--- /dev/null
+++ b/flatnav_python/install_flatnav.sh
@@ -0,0 +1,22 @@
+#!/bin/bash 
+
+set -ex 
+
+# Activate the poetry environment 
+POETRY_ENV=$(poetry env info --path)
+
+# Generate wheel file
+$POETRY_ENV/bin/python setup.py bdist_wheel
+
+# Assuming the build only produces one wheel file in the dist directory
+WHEEL_FILE=$(ls dist/*.whl)
+
+
+# Install the wheel using pip 
+$POETRY_ENV/bin/pip install $WHEEL_FILE --force-reinstall
+
+echo "Installation of wheel completed"
+
+#Testing the wheel 
+$POETRY_ENV/bin/python -c "import flatnav"
+
diff --git a/flatnav_python/pyproject.toml b/flatnav_python/pyproject.toml
index 79869ea..3986881 100644
--- a/flatnav_python/pyproject.toml
+++ b/flatnav_python/pyproject.toml
@@ -1,3 +1,31 @@
+[tool.poetry]
+name = "flatnav"
+version = "0.0.1"
+description = "Graph kNN with reordering. "
+homepage="https://randorithms.com"
+authors = [
+  "Benjamin Coleman <benjamin.ray.coleman@gmail.com>",
+  "Blaise Munyampirwa <blaisemunyampirwa@gmail.com>",
+  "Nicholas Meisburger",
+  "Joshua Engels",
+  "Vihan Lakshman <vihan.lakshman@gmail.com>"
+]
+license = "Apache 2.0"
+
+[tool.poetry.dependencies]
+python = ">=3.9"
+pybind11 = "2.10.4"
+setuptools = "68.2.2"
+
+[tool.poetry.dev-dependencies]
+black = "^23.11.0"
+pytest = "^7.4.3"
+numpy = "^1.26.2"
+h5py = "^3.10.0"
+requests = "^2.31.0"
+
+
+
 [build-system]
-requires = ["setuptools>=42", "wheel", "pybind11~=2.6.1"]
+requires = ["setuptools>=68", "wheel", "pybind11=2.10.4", "python>=3.9"]
 build-backend = "setuptools.build_meta"
\ No newline at end of file
diff --git a/flatnav_python/python_bindings.cpp b/flatnav_python/python_bindings.cpp
index 098a2e8..2476305 100644
--- a/flatnav_python/python_bindings.cpp
+++ b/flatnav_python/python_bindings.cpp
@@ -1,162 +1,250 @@
 #include <algorithm>
-#include <cctype>
-#include <stdexcept>
-#include <vector>
-
+#include <iostream>
+#include <memory>
+#include <ostream>
 #include <pybind11/numpy.h>
 #include <pybind11/pybind11.h>
+#include <string>
+#include <utility>
+#include <vector>
 
-#include "../flatnav/Index.h"
-#include "../flatnav/distances/InnerProductDistance.h"
-#include "../flatnav/distances/SquaredL2Distance.h"
-#include "../flatnav/distances/SquaredL2DistanceSpecializations.h"
+#include <flatnav/DistanceInterface.h>
+#include <flatnav/Index.h>
+#include <flatnav/distances/InnerProductDistance.h>
+#include <flatnav/distances/SquaredL2Distance.h>
+
+using flatnav::DistanceInterface;
+using flatnav::Index;
+using flatnav::InnerProductDistance;
+using flatnav::SquaredL2Distance;
 
-using namespace flatnav;
 namespace py = pybind11;
 
 template <typename dist_t, typename label_t> class PyIndex {
+  const uint32_t NUM_LOG_STEPS = 10000;
+
 private:
+  int _dim, label_id;
+  bool _verbose;
   Index<dist_t, label_t> *_index;
-  std::unique_ptr<DistanceInterface<dist_t>> _distance;
 
-  size_t _dim;
-  int _added;
+public:
+  typedef std::pair<py::array_t<float>, py::array_t<label_t>>
+      DistancesLabelsPair;
 
-  void setIndexMetric(std::string &metric) {
-    std::transform(metric.begin(), metric.end(), metric.begin(),
-                   [](unsigned char c) { return std::tolower(c); });
+  explicit PyIndex(std::unique_ptr<Index<dist_t, label_t>> index)
+      : _dim(index->dataDimension()), label_id(0), _verbose(false),
+        _index(index.get()) {}
 
-    if (metric == "l2") {
-      _distance = std::make_unique<SquaredL2Distance>(/* dim = */ _dim);
-    } else if (metric == "angular") {
-      _distance = std::make_unique<InnerProductDistance>(/* dim = */ _dim);
-    }
-    throw std::invalid_argument("Invalid metric `" + metric +
-                                "` used during index construction.");
-  }
+  PyIndex(std::shared_ptr<DistanceInterface<dist_t>> distance, int dim,
+          int dataset_size, int max_edges_per_node, bool verbose = false)
+      : _dim(dim), label_id(0), _verbose(verbose),
+        _index(new Index<dist_t, label_t>(
+            /* dist = */ std::move(distance),
+            /* dataset_size = */ dataset_size,
+            /* max_edges_per_node = */ max_edges_per_node)) {}
 
-public:
-  PyIndex(std::string metric_type, size_t dim, int N, int M)
-      : _dim(dim), _added(0) {
-    setIndexMetric(metric_type);
-    _index = new Index<dist_t, label_t>(
-        /* dist = */ std::move(_distance), /* dataset_size = */ N,
-        /* max_edges_per_node = */ M);
-  }
+  Index<dist_t, label_t> *getIndex() { return _index; }
+
+  ~PyIndex() { delete _index; }
 
-  PyIndex(std::string filename) {
-    _index = new Index<dist_t, label_t>(/* in = */ filename);
+  static std::unique_ptr<PyIndex<dist_t, label_t>>
+  loadIndex(const std::string &filename) {
+    auto index = Index<dist_t, label_t>::loadIndex(/* filename = */ filename);
+    return std::make_unique<PyIndex<dist_t, label_t>>(std::move(index));
   }
 
-  void add(py::array_t<float, py::array::c_style | py::array::forcecast> data,
-           int ef_construction, py::object labels_obj = py::none()) {
+  void
+  add(const py::array_t<float, py::array::c_style | py::array::forcecast> &data,
+      int ef_construction, py::object labels = py::none()) {
+    // py::array_t<float, py::array::c_style | py::array::forcecast> means that
+    // the functions expects either a Numpy array of floats or a castable type
+    // to that type. If the given type can't be casted, pybind11 will throw an
+    // error.
+
+    auto num_vectors = data.shape(0);
+    auto data_dim = data.shape(1);
+    if (data.ndim() != 2 || data_dim != _dim) {
+      throw std::invalid_argument("Data has incorrect dimensions.");
+    }
+    if (labels.is_none()) {
+      for (size_t vec_index = 0; vec_index < num_vectors; vec_index++) {
+        this->_index->add(/* data = */ (void *)data.data(vec_index),
+                          /* label = */ label_id,
+                          /* ef_construction = */ ef_construction);
+        if (_verbose && vec_index % NUM_LOG_STEPS == 0) {
+          std::clog << "." << std::flush;
+        }
+        label_id++;
+      }
+      std::clog << std::endl;
+      return;
+    }
 
-    if (data.n_dim() != 2 || data.shape(1) != _dim) {
-      throw std::invalid_argument("Data has incorrect _dimensions");
+    // Use the provided labels now
+    py::array_t<label_t, py::array::c_style | py::array::forcecast> node_labels(
+        labels);
+    if (node_labels.ndim() != 1 || node_labels.shape(0) != num_vectors) {
+      throw std::invalid_argument("Labels have incorrect dimensions.");
     }
 
-    if (labels_obj.is_none()) {
-      for (size_t n = 0; n < data.shape(0); n++) {
-        this->index->add((void *)data.data(n), _added, ef_construction);
-        _added++;
-      }
-    } else {
-      py::array_t<label_t, py::array::c_style | py::array::forcecast> labels(
-          labels_obj);
-      if (labels.n_dim() != 1 || labels.shape(0) != data.shape(0)) {
-        throw std::invalid_argument("Labels have incorrect _dimensions");
-      }
+    for (size_t vec_index = 0; vec_index < num_vectors; vec_index++) {
+      label_t label_id = *node_labels.data(vec_index);
+      this->_index->add(/* data = */ (void *)data.data(vec_index),
+                        /* label = */ label_id,
+                        /* ef_construction = */ ef_construction);
 
-      for (size_t n = 0; n < data.shape(0); n++) {
-        label_t l = *labels.data(n);
-        this->index->add((void *)data.data(n), l, ef_construction);
-        _added++;
+      if (_verbose && vec_index % NUM_LOG_STEPS == 0) {
+        std::clog << "." << std::flush;
       }
     }
+    std::clog << std::endl;
   }
 
-  py::array_t<label_t>
-  search(py::array_t<float, py::array::c_style | py::array::forcecast> queries,
+  DistancesLabelsPair
+  search(const py::array_t<float, py::array::c_style | py::array::forcecast>
+             queries,
          int K, int ef_search) {
-    if (queries.n_dim() != 2 || queries.shape(1) != _dim) {
-      throw std::invalid_argument("Queries have incorrect _dimensions");
-    }
     size_t num_queries = queries.shape(0);
+    size_t queries_dim = queries.shape(1);
+
+    if (queries.ndim() != 2 || queries_dim != _dim) {
+      throw std::invalid_argument("Queries have incorrect dimensions.");
+    }
 
     label_t *results = new label_t[num_queries * K];
+    float *distances = new float[num_queries * K];
 
-    for (size_t q = 0; q < num_queries; q++) {
-      std::vector<std::pair<dist_t, label_t>> topK =
-          this->index->search(queries.data(q), K, ef_search);
-      for (size_t i = 0; i < topK.size(); i++) {
-        results[q * K + i] = topK[i].second;
+    for (size_t query_index = 0; query_index < num_queries; query_index++) {
+      std::vector<std::pair<float, label_t>> top_k = this->_index->search(
+          /* query = */ (const void *)queries.data(query_index), /* K = */ K,
+          /* ef_search = */ ef_search);
+
+      for (size_t i = 0; i < top_k.size(); i++) {
+        distances[query_index * K + i] = top_k[i].first;
+        results[query_index * K + i] = top_k[i].second;
       }
     }
 
-    py::capsule free_when_done(results, [](void *ptr) { delete ptr; });
+    // Allows to transfer ownership to Python
+    py::capsule free_results_when_done(
+        results, [](void *ptr) { delete (label_t *)ptr; });
+    py::capsule free_distances_when_done(
+        distances, [](void *ptr) { delete (float *)ptr; });
 
-    return py::array_t<label_t>({num_queries, (size_t)K},
-                                {K * sizeof(label_t), sizeof(label_t)}, results,
-                                free_when_done);
-  }
+    py::array_t<label_t> labels =
+        py::array_t<label_t>({num_queries, (size_t)K}, // shape of the array
+                             {K * sizeof(label_t), sizeof(label_t)}, // strides
+                             results,               // data pointer
+                             free_results_when_done // capsule
+        );
 
-  void reorder(std::string alg) {
-    std::transform(alg.begin(), alg.end(), std::tolower);
+    py::array_t<float> dists = py::array_t<float>(
+        {num_queries, (size_t)K}, {K * sizeof(float), sizeof(float)}, distances,
+        free_distances_when_done);
 
-    if (alg == "gorder") {
-      this->index->reorder_gorder();
-    } else if (alg == "rcm") {
-      this->index->reorder_rcm();
-    } else {
-      throw std::invalid_argument(
-          "'" + alg + "' is not a supported graph re-ordering algorithm.");
-    }
+    return {dists, labels};
   }
+};
 
-  void save(std::string filename) { this->index->save(filename); }
+using L2FlatNavIndex = PyIndex<SquaredL2Distance, int>;
+using InnerProductFlatNavIndex = PyIndex<InnerProductDistance, int>;
+
+template <typename IndexType>
+void bindIndexMethods(py::class_<IndexType> &index_class) {
+  index_class
+      .def(
+          "save",
+          [](IndexType &index_type, const std::string &filename) {
+            auto index = index_type.getIndex();
+            index->saveIndex(/* filename = */ filename);
+          },
+          py::arg("filename"),
+          "Save a FlatNav index at the given file location.")
+      .def_static("load", &IndexType::loadIndex, py::arg("filename"),
+                  "Load a FlatNav index from a given file location")
+      .def("add", &IndexType::add, py::arg("data"), py::arg("ef_construction"),
+           py::arg("labels") = py::none(),
+           "Add vectors(data) to the index with the given `ef_construction` "
+           "parameter and optional labels. `ef_construction` determines how "
+           "many "
+           "vertices are visited while inserting every vector in the "
+           "underlying graph structure.")
+      .def("search", &IndexType::search, py::arg("queries"), py::arg("K"),
+           py::arg("ef_search"),
+           "Return top `K` closest data points for every query in the "
+           "provided `queries`. The results are returned as a Tuple of "
+           "distances and label ID's. The `ef_search` parameter determines how "
+           "many neighbors are visited while finding the closest neighbors "
+           "for every query.")
+      .def(
+          "reorder",
+          [](IndexType &index_type, const std::string &algorithm) {
+            auto index = index_type.getIndex();
+            auto alg = algorithm;
+            std::transform(alg.begin(), alg.end(), alg.begin(),
+                           [](unsigned char c) { return std::tolower(c); });
+            if (alg == "gorder") {
+              index->reorderGOrder();
+            } else if (alg == "rcm") {
+              index->reorderRCM();
+            } else {
+              throw std::invalid_argument(
+                  "`" + algorithm +
+                  "` is not a supported graph re-ordering algorithm.");
+            }
+          },
+          py::arg("algorithm"),
+          "Perform graph re-ordering based on the given re-ordering strategy. "
+          "Supported re-ordering algorithms include `gorder` and `rcm`.")
+      .def_property_readonly(
+          "max_edges_per_node",
+          [](IndexType &index_type) {
+            return index_type.getIndex()->maxEdgesPerNode();
+          },
+          "Maximum number of edges(links) per node in the underlying NSW graph "
+          "data structure.");
+}
 
-  ~PyIndex() {
-    delete index;
-    delete space;
+py::object createIndex(const std::string &distance_type, int dim,
+                       int dataset_size, int max_edges_per_node,
+                       bool verbose = false) {
+  auto dist_type = distance_type;
+  std::transform(dist_type.begin(), dist_type.end(), dist_type.begin(),
+                 [](unsigned char c) { return std::tolower(c); });
+
+  if (dist_type == "l2") {
+    auto distance = std::make_shared<SquaredL2Distance>(/* dim = */ dim);
+    return py::cast(new L2FlatNavIndex(std::move(distance), dim, dataset_size,
+                                       max_edges_per_node, verbose));
+  } else if (dist_type == "angular") {
+    auto distance = std::make_shared<InnerProductDistance>(/* dim = */ dim);
+    return py::cast(new InnerProductFlatNavIndex(
+        std::move(distance), dim, dataset_size, max_edges_per_node, verbose));
   }
-};
+  throw std::invalid_argument("Invalid distance type: `" + dist_type +
+                              "` during index construction. Valid options "
+                              "include `l2` and `angular`.");
+}
 
-template <typename label_t>
-double ComputeRecall(py::array_t<label_t> results,
-                     py::array_t<label_t> gtruths) {
-  double avg_recall = 0.0;
-  for (size_t q = 0; q < results.shape(0); q++) {
-    double recall = 0.0;
-    const label_t *result = results.data(q);
-    const label_t *topk = gtruths.data(q);
-    for (size_t i = 0; i < results.shape(1); i++) {
-      for (size_t j = 0; j < results.shape(1); j++) {
-        if (result[i] == topk[j]) {
-          recall += 1.0;
-          break;
-        }
-      }
-    }
-    avg_recall += recall;
-  }
+void defineIndexSubmodule(py::module_ &index_submodule) {
+  index_submodule.def("index_factory", &createIndex, py::arg("distance_type"),
+                      py::arg("dim"), py::arg("dataset_size"),
+                      py::arg("max_edges_per_node"), py::arg("verbose") = false,
+                      "Creates a FlatNav index given the corresponding "
+                      "parameters. The `distance_type` argument determines the "
+                      "kind of index created (either L2Index or IPIndex)");
 
-  return avg_recall /= (results.shape(0) * results.shape(1));
-}
+  py::class_<L2FlatNavIndex> l2_index_class(index_submodule, "L2Index");
+  bindIndexMethods(l2_index_class);
 
-using L2FloatPyIndex = PyIndex<SquaredL2Distance, unsigned int>;
-
-PYBIND11_MODULE(flatnav, m) {
-  py::class_<L2FloatPyIndex>(m, "Index")
-      .def(py::init<std::string, size_t, int, int>(), py::arg("metric"),
-           py::arg("_dim"), py::arg("N"), py::arg("M"))
-      .def(py::init<std::string>(), py::arg("save_loc"))
-      .def("add", &L2FloatPyIndex::add, py::arg("data"),
-           py::arg("ef_construction"), py::arg("labels") = py::none())
-      .def("search", &L2FloatPyIndex::search, py::arg("queries"), py::arg("K"),
-           py::arg("ef_search"))
-      .def("reorder", &L2FloatPyIndex::reorder, py::arg("alg"))
-      .def("save", &L2FloatPyIndex::save, py::arg("filename"));
-
-  // m.def("ComputeRecall", &ComputeRecall<int>, py::arg("results"),
-  //       py::arg("gtruths"));
+  py::class_<InnerProductFlatNavIndex> ip_index_class(index_submodule,
+                                                      "IPIndex");
+  bindIndexMethods(ip_index_class);
 }
+
+PYBIND11_MODULE(flatnav, module) {
+  auto index_submodule = module.def_submodule("index");
+
+  defineIndexSubmodule(index_submodule);
+}
\ No newline at end of file
diff --git a/flatnav_python/setup.py b/flatnav_python/setup.py
index 4b95df8..66e0d42 100644
--- a/flatnav_python/setup.py
+++ b/flatnav_python/setup.py
@@ -1,34 +1,50 @@
-import sys
-import os 
-
-# Available at setup time due to pyproject.toml
-from pybind11 import get_cmake_dir
+import os
 from pybind11.setup_helpers import Pybind11Extension, build_ext
 from setuptools import setup
+import sys
 
 __version__ = "0.0.1"
 
-# The main interface is through Pybind11Extension.
-# * You can add cxx_std=11/14/17, and then build_ext can be removed.
-# * You can set include_pybind11=false to add the include directory yourself,
-#   say from a submodule.
-#
-# Note:
-#   Sort input source files if you glob sources to ensure bit-for-bit
-#   reproducible builds (https://github.com/pybind/python_example/pull/53)
+CURRENT_DIR = os.getcwd()
+SOURCE_PATH = os.path.join(CURRENT_DIR, "python_bindings.cpp")
+
+
+INCLUDE_DIRS = [
+    os.path.join(CURRENT_DIR, ".."),
+    os.path.join(CURRENT_DIR, "..", "external", "cereal", "include"),
+]
+EXTRA_LINK_ARGS = []
+
+if sys.platform == "darwin":
+    omp_flag = "-Xclang -fopenmp"
+    INCLUDE_DIRS.extend(["/opt/homebrew/opt/libomp/include"])
+    EXTRA_LINK_ARGS.extend(["-lomp", "-L/opt/homebrew/opt/libomp/lib"])
+elif sys.platform() == "linux":
+    omp_flag = "-fopenmp"
+    EXTRA_LINK_ARGS.extend(["-fopenmp"])
 
-binding_file = os.getcwd() + "/python_bindings.cpp"
 
 ext_modules = [
     Pybind11Extension(
         "flatnav",
-        [binding_file],
-        # Example: passing in the version to the compiled code
+        [SOURCE_PATH],
         define_macros=[("VERSION_INFO", __version__)],
-        cxx_std=11,
-    ),
+        cxx_std=17,
+        include_dirs=INCLUDE_DIRS,
+        extra_compile_args=[
+            omp_flag,  # Enable OpenMP
+            "-Ofast",  # Use the fastest optimization
+            "-fpic",  # Position-independent code
+            "-w",  # Suppress all warnings (note: this overrides -Wall)
+            "-ffast-math",  # Enable fast math optimizations
+            "-funroll-loops",  # Unroll loops
+            "-ftree-vectorize",  # Vectorize where possible
+        ],
+        extra_link_args=EXTRA_LINK_ARGS,  # Link OpenMP when linking the extension
+    )
 ]
 
+
 setup(
     name="flatnav",
     version=__version__,
@@ -38,10 +54,7 @@
     description="Graph kNN with reordering.",
     long_description="",
     ext_modules=ext_modules,
-    # extras_require={"test": "pytest"},
-    # Currently, build_ext only provides an optional "highest supported C++
-    # level" feature, but in the future it may provide more features.
-    # cmdclass={"build_ext": build_ext},
+    cmdclass={"build_ext": build_ext},
     zip_safe=False,
     python_requires=">=3.7",
 )
diff --git a/flatnav_python/test_index.py b/flatnav_python/test_index.py
new file mode 100644
index 0000000..c2f32d5
--- /dev/null
+++ b/flatnav_python/test_index.py
@@ -0,0 +1,230 @@
+import flatnav
+from flatnav.index import index_factory
+from flatnav.index import L2Index, IPIndex
+from typing import Union, Optional
+import numpy as np
+import time
+import tempfile
+import h5py
+import requests
+import os
+
+
+def generate_random_data(dataset_length: int, dim: int) -> np.ndarray:
+    return np.random.rand(dataset_length, dim)
+
+
+def get_ann_benchmark_dataset(dataset_name):
+    base_uri = "http://ann-benchmarks.com"
+    dataset_uri = f"{base_uri}/{dataset_name}.hdf5"
+
+    with tempfile.TemporaryDirectory() as tmp:
+        response = requests.get(dataset_uri)
+        loc = os.path.join(tmp, dataset_name)
+
+        with open(loc, "wb") as f:
+            f.write(response.content)
+        data = h5py.File(loc, "r")
+
+    training_set = data["train"]
+    queries = data["test"]
+    true_neighbors = data["neighbors"]
+    distances = data["distances"]
+
+    return (
+        np.array(training_set),
+        np.array(queries),
+        np.array(true_neighbors),
+        np.array(distances),
+    )
+
+
+def compute_recall(
+    index, queries: np.ndarray, ground_truth: np.ndarray, ef_search: int, k: int = 100
+):
+    """
+    Compute recall for given queries, ground truth, and a FlatNav index.
+
+    Args:
+        - index: The Faiss index to search.
+        - queries: The query vectors.
+        - ground_truth: The ground truth indices for each query.
+        - k: Number of neighbors to search.
+
+    Returns:
+        Mean recall over all queries.
+    """
+    start = time.time()
+    _, top_k_indices = index.search(queries=queries, ef_search=ef_search, K=k)
+    end = time.time()
+
+    duration = (end - start) / len(queries)
+    print(f"Querying time: {duration * 1000} milliseconds")
+
+    # Convert each ground truth list to a set for faster lookup
+    ground_truth_sets = [set(gt) for gt in ground_truth]
+
+    mean_recall = 0
+
+    for idx, k_neighbors in enumerate(top_k_indices):
+        query_recall = sum(
+            1 for neighbor in k_neighbors if neighbor in ground_truth_sets[idx]
+        )
+        mean_recall += query_recall / k
+
+    recall = mean_recall / len(queries)
+    return recall
+
+
+def create_index(
+    distance_type: str, dim: int, dataset_size: int, max_edges_per_node: int
+) -> Union[L2Index, IPIndex]:
+    index = index_factory(
+        distance_type=distance_type,
+        dim=dim,
+        dataset_size=dataset_size,
+        max_edges_per_node=max_edges_per_node,
+        verbose=True,
+    )
+    if not (isinstance(index, L2Index) or isinstance(index, IPIndex)):
+        raise RuntimeError("Invalid index.")
+
+    return index
+
+
+def test_flatnav_l2_index_random_dataset():
+    dataset_to_index = generate_random_data(dataset_length=30_000, dim=784)
+    queries = generate_random_data(dataset_length=5_000, dim=784)
+    ground_truth = np.random.randint(low=0, high=50, size=(5_000, 100))
+    index = create_index(
+        distance_type="l2",
+        dim=dataset_to_index.shape[1],
+        dataset_size=len(dataset_to_index),
+        max_edges_per_node=32,
+    )
+
+    assert hasattr(index, "max_edges_per_node")
+    assert index.max_edges_per_node == 32
+
+    run_test(
+        index=index,
+        ef_construction=64,
+        ef_search=32,
+        training_set=dataset_to_index,
+        queries=queries,
+        ground_truth=ground_truth,
+    )
+
+
+def test_flatnav_l2_index_mnist_dataset():
+    training_set, queries, ground_truth, _ = get_ann_benchmark_dataset(
+        dataset_name="mnist-784-euclidean"
+    )
+
+    index = create_index(
+        distance_type="l2",
+        dim=training_set.shape[1],
+        dataset_size=training_set.shape[0],
+        max_edges_per_node=16,
+    )
+
+    assert hasattr(index, "max_edges_per_node")
+    assert index.max_edges_per_node == 16
+
+    run_test(
+        index=index,
+        ef_construction=128,
+        ef_search=256,
+        training_set=training_set,
+        queries=queries,
+        ground_truth=ground_truth,
+        assert_recall_threshold=True,
+        recall_threshold=0.97,
+    )
+
+
+def test_flatnav_ip_index_random_dataset():
+    dataset_to_index = generate_random_data(dataset_length=30_000, dim=225)
+    queries = generate_random_data(dataset_length=5_000, dim=225)
+    ground_truth = np.random.randint(low=0, high=50, size=(5_000, 100))
+
+    index = create_index(
+        distance_type="angular",
+        dim=dataset_to_index.shape[1],
+        dataset_size=len(dataset_to_index),
+        max_edges_per_node=16,
+    )
+
+    assert hasattr(index, "max_edges_per_node")
+    assert index.max_edges_per_node == 16
+
+    run_test(
+        index=index,
+        ef_construction=64,
+        ef_search=32,
+        training_set=dataset_to_index,
+        queries=queries,
+        ground_truth=ground_truth,
+    )
+
+
+def test_flatnav_index_with_reordering():
+    training_set, queries, ground_truth, _ = get_ann_benchmark_dataset(
+        dataset_name="mnist-784-euclidean"
+    )
+
+    index = create_index(
+        distance_type="l2",
+        dim=training_set.shape[1],
+        dataset_size=training_set.shape[0],
+        max_edges_per_node=16,
+    )
+
+    assert hasattr(index, "max_edges_per_node")
+    assert index.max_edges_per_node == 16
+
+    run_test(
+        index=index,
+        ef_construction=128,
+        ef_search=256,
+        training_set=training_set,
+        queries=queries,
+        ground_truth=ground_truth,
+        assert_recall_threshold=True,
+        recall_threshold=0.97,
+        use_reordering=True,
+        reordering_algorithm="gorder",
+    )
+
+
+def run_test(
+    index: Union[L2Index, IPIndex],
+    ef_construction: int,
+    ef_search: int,
+    training_set: np.ndarray,
+    queries: np.ndarray,
+    ground_truth: np.ndarray,
+    use_reordering: bool = False,
+    reordering_algorithm: Optional[str] = None,
+    assert_recall_threshold: bool = False,
+    recall_threshold: Optional[float] = None,
+):
+    start = time.time()
+    index.add(data=training_set, ef_construction=ef_construction)
+    end = time.time()
+
+    print(f"Indexing time = {end - start} seconds")
+
+    if use_reordering:
+        if not reordering_algorithm:
+            raise RuntimeError("Re-ordering algorithm must be provided.")
+        index.reorder(algorithm=reordering_algorithm)
+
+    recall = compute_recall(
+        index=index, queries=queries, ground_truth=ground_truth, ef_search=ef_search
+    )
+
+    if assert_recall_threshold:
+        if not recall_threshold:
+            raise RuntimeError("Recall threshold must be provided.")
+        assert recall >= recall_threshold
diff --git a/quantization/ProductQuantization.h b/quantization/ProductQuantization.h
index 958f1d2..2e45507 100644
--- a/quantization/ProductQuantization.h
+++ b/quantization/ProductQuantization.h
@@ -14,7 +14,11 @@
 #include <flatnav/distances/InnerProductDistance.h>
 #include <flatnav/distances/SquaredL2Distance.h>
 #include <memory>
+
+#ifdef _OPENMP
 #include <omp.h>
+#endif 
+
 #include <quantization/CentroidsGenerator.h>
 #include <quantization/Utils.h>
 #include <queue>
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index c5dad89..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-black>=23.3.0
-numpy 
-h5py
\ No newline at end of file
diff --git a/tools/query.cpp b/tools/query.cpp
deleted file mode 100644
index 1d52ba0..0000000
--- a/tools/query.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-#include "cnpy.h"
-#include <algorithm>
-#include <chrono>
-#include <cmath>
-#include <flatnav/Index.h>
-#include <flatnav/distances/SquaredL2Distance.h>
-#include <fstream>
-#include <iostream>
-#include <memory>
-#include <random>
-#include <sstream>
-#include <string>
-#include <utility>
-#include <vector>
-
-using flatnav::Index;
-using flatnav::SquaredL2Distance;
-
-std::shared_ptr<Index<SquaredL2Distance, int>>
-buildIndex(float *data, uint32_t dim, uint64_t N, uint32_t max_edges,
-           uint32_t ef_construction) {
-  auto distance = std::make_unique<SquaredL2Distance>(dim);
-  auto index = std::make_shared<Index<SquaredL2Distance, int>>(
-      /* dist = */ std::move(distance), /* dataset_size = */ N,
-      /* max_edges = */ max_edges);
-
-  auto start = std::chrono::high_resolution_clock::now();
-
-  for (int label = 0; label < N; label++) {
-    float *element = data + (dim * label);
-    index->add(/* data = */ (void *)element, /* label = */ label,
-               /* ef_construction */ ef_construction);
-    if (label % 100000 == 0)
-      std::clog << "." << std::flush;
-  }
-  std::clog << std::endl;
-
-  auto stop = std::chrono::high_resolution_clock::now();
-  auto duration =
-      std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
-  std::clog << "Build time: " << (float)duration.count() << " milliseconds"
-            << std::endl;
-  return index;
-}
-
-int main(int argc, char **argv) {
-
-  if (argc < 6) {
-    std::clog << "Usage: " << std::endl;
-    std::clog << "query <data> <space> <queries> <gtruth> <ef_search> <k>";
-    std::clog << " [--nq num_queries] [--reorder_id reorder_id] [--ef_profile "
-                 "ef_profile] [--num_profile num_profile]"
-              << std::endl;
-    std::clog << "Positional arguments:" << std::endl;
-    std::clog << "\t index: Filename for the training data (float32 index)."
-              << std::endl;
-    std::clog << "\t space: Integer distance ID: 0 for L2 distance, 1 for "
-                 "inner product (angular distance)."
-              << std::endl;
-    std::clog << "\t queries: Filename for queries (float32 file)."
-              << std::endl;
-    std::clog << "\t gtruth: Filename for ground truth (int32 file)."
-              << std::endl;
-
-    std::clog << "\t k: Number of neighbors to return." << std::endl;
-
-    std::clog << "Optional arguments:" << std::endl;
-    std::clog << "\t [--nq num_queries]: (Optional, default 0) Number of "
-                 "queries to use. If 0, uses all queries."
-              << std::endl;
-    std::clog << "\t [--reorder_id reorder_id]: (Optional, default 0) Which "
-                 "reordering algorithm to use? 0:none 1:gorder 2:indegsort "
-                 "3:outdegsort 4:RCM 5:hubsort 6:hubcluster 7:DBG 8:corder "
-                 "91:profiled_gorder 94:profiled_rcm 41:RCM+gorder"
-              << std::endl;
-    std::clog << "\t [--ef_profile ef_profile]: (Optional, default 100) "
-                 "ef_search parameter to use for profiling."
-              << std::endl;
-    std::clog << "\t [--num_profile num_profile]: (Optional, default 1000) "
-                 "Number of queries to use for profiling."
-              << std::endl;
-    return -1;
-  }
-
-  // Optional arguments.
-  int num_queries = 10000;
-  bool reorder = false;
-  int reorder_ID = 0;
-  int ef_profile = 100;
-  int num_profile = 1000;
-
-  std::string train_file =
-      "data/mnist-784-euclidean/mnist-784-euclidean.train.npy";
-  std::string queries_file =
-      "data/mnist-784-euclidean/mnist-784-euclidean.test.npy";
-  std::string groundtruth_file =
-      "data/mnist-784-euclidean/mnist-784-euclidean.gtruth.npy";
-
-  for (int i = 0; i < argc; ++i) {
-    if (std::strcmp("--nq", argv[i]) == 0) {
-      if ((i + 1) < argc) {
-        num_queries = std::stoi(argv[i + 1]);
-      } else {
-        std::cerr << "Invalid argument for optional parameter --nq"
-                  << std::endl;
-        return -1;
-      }
-    }
-    if (std::strcmp("--reorder_id", argv[i]) == 0) {
-      if ((i + 1) < argc) {
-        reorder_ID = std::stoi(argv[i + 1]);
-      } else {
-        std::cerr << "Invalid argument for optional parameter --reorder_id"
-                  << std::endl;
-        return -1;
-      }
-    }
-    if (std::strcmp("--ef_profile", argv[i]) == 0) {
-      if ((i + 1) < argc) {
-        ef_profile = std::stoi(argv[i + 1]);
-      } else {
-        std::cerr << "Invalid argument for optional parameter --ef_profile"
-                  << std::endl;
-        return -1;
-      }
-    }
-    if (std::strcmp("--num_profile", argv[i]) == 0) {
-      if ((i + 1) < argc) {
-        num_profile = std::stoi(argv[i + 1]);
-      } else {
-        std::cerr << "Invalid argument for optional parameter --num_profile"
-                  << std::endl;
-        return -1;
-      }
-    }
-  }
-  // Positional arguments.
-  std::string indexfilename(train_file); // Index filename.
-  int space_ID = 0;                      // Space ID for querying.
-
-  // Load queries.
-  std::clog << "[INFO] Loading queries." << std::endl;
-  cnpy::NpyArray queries_array = cnpy::npy_load(queries_file);
-  float *queries = queries_array.data<float>();
-
-  // Load ground truth.
-  std::clog << "[INFO] Loading ground truth." << std::endl;
-  cnpy::NpyArray gtruth_array = cnpy::npy_load(groundtruth_file);
-  uint32_t *gtruth = gtruth_array.data<uint32_t>();
-
-  // EF search vector.
-  std::vector<int> ef_searches{100};
-
-  // Number of search results.
-  int k = 100;
-
-  std::clog << "[INFO] Loading training data." << std::endl;
-  cnpy::NpyArray train_data_array = cnpy::npy_load(train_file);
-  float *data = train_data_array.data<float>();
-
-  std::clog << "[INFO] Building index from " << indexfilename << std::endl;
-
-  uint32_t dim = 784;
-  auto index = buildIndex(/* data = */ data, /* dim = */ dim, /* N = */ 60000,
-                          /* max_edges = */ 16, /* ef_construction = */ 200);
-
-  // Do reordering, if necessary.
-  if (num_profile > num_queries) {
-    std::clog << "Warning: Number of profiling queries (" << num_profile
-              << ") is greater than number of queries (" << num_queries << ")!"
-              << std::endl;
-    num_profile = num_queries;
-  }
-  if (reorder) {
-    std::clog << "Using GORDER" << std::endl;
-    std::clog << "Reordering: " << std::endl;
-    auto start_r = std::chrono::high_resolution_clock::now();
-    index->reorder_gorder();
-    auto stop_r = std::chrono::high_resolution_clock::now();
-    auto duration_r =
-        std::chrono::duration_cast<std::chrono::milliseconds>(stop_r - start_r);
-    std::clog << "Reorder time: " << (float)(duration_r.count()) / (1000.0)
-              << " seconds" << std::endl;
-  } else {
-    std::clog << "No reordering" << std::endl;
-  }
-
-  int num_gtruth_entries = 100;
-
-  // Now, finally, do the actual search.
-  std::cout << "recall, mean_latency_ms" << std::endl;
-  for (int &ef_search : ef_searches) {
-    double mean_recall = 0;
-
-    auto start_q = std::chrono::high_resolution_clock::now();
-    for (int i = 0; i < num_queries; i++) {
-      float *query = queries + dim * i;
-      uint32_t *g = gtruth + num_gtruth_entries * i;
-
-      std::vector<std::pair<float, int>> result = index->search(
-          /* query = */ query, /* K = */ k, /* ef_search = */ ef_search);
-
-      double recall = 0;
-      for (int j = 0; j < k; j++) {
-        for (int l = 0; l < k; l++) {
-          if (static_cast<uint32_t>(result[j].second) == g[l]) {
-            recall += 1;
-            break;
-          }
-        }
-      }
-      recall /= k;
-      mean_recall = mean_recall + recall;
-    }
-    auto stop_q = std::chrono::high_resolution_clock::now();
-    auto duration_q =
-        std::chrono::duration_cast<std::chrono::milliseconds>(stop_q - start_q);
-    std::cout << "[INFO] recall: " << mean_recall / num_queries << std::endl;
-    std::cout << "[INFO] mean_latency_ms: "
-              << (float)(duration_q.count()) / num_queries << std::endl;
-  }
-
-  return 0;
-}
\ No newline at end of file
diff --git a/tools/query_npy.cpp b/tools/query_npy.cpp
index fb4a4d7..fad19f8 100644
--- a/tools/query_npy.cpp
+++ b/tools/query_npy.cpp
@@ -34,7 +34,7 @@ void run(float *queries, int *gtruth, const std::string &index_filename,
   if (reorder) {
     std::clog << "[INFO] Gorder Reordering: " << std::endl;
     auto start_r = std::chrono::high_resolution_clock::now();
-    index->reorder_gorder();
+    index->reorderGOrder();
     auto stop_r = std::chrono::high_resolution_clock::now();
     auto duration_r =
         std::chrono::duration_cast<std::chrono::milliseconds>(stop_r - start_r);