BlaiseMuhirwa · BlaiseMuhirwa · Jul 31, 2024 · Apr 3, 2024 · Apr 4, 2024 · Apr 6, 2024
diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
@@ -38,7 +38,7 @@ jobs:
     - name: Build flatnav 
       run: |
         cd flatnav_python
-        export NO_MANUAL_VECTORIZATION=1
+        export NO_SIMD_VECTORIZATION=1
         ./install_flatnav.sh
 
     - name: Run Unit Tests

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -57,15 +57,15 @@ endif(OpenMP_FOUND)
 option(BUILD_TESTS "Build all tests")
 option(BUIL_EXAMPLES "Build examples")
 option(BUILD_BENCHMARKS "Build ANNS benchmarks")
-option(NO_MANUAL_VECTORIZATION "Disable manual vectorization (SIMD)")
+option(NO_SIMD_VECTORIZATION "Disable using SIMD instructions")
 message(STATUS "Building tests: ${BUILD_TESTS}")
 message(STATUS "Building examples: ${BUILD_EXAMPLES}")
 message(STATUS "Building benchmarks: ${BUILD_BENCHMARKS}")
 
 # Enable auto-vectorization if we are not using SIMD.
-if(NO_MANUAL_VECTORIZATION)
-  message(STATUS "Disabling manual vectorization (SIMD)")
-  add_definitions(-DNO_MANUAL_VECTORIZATION)
+if(NO_SIMD_VECTORIZATION)
+  message(STATUS "Disabling using SIMD instructions")
+  add_definitions(-DNO_SIMD_VECTORIZATION)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftree-vectorize")
 endif()
 
@@ -74,14 +74,18 @@ endif()
 set(HEADERS
     ${PROJECT_SOURCE_DIR}/flatnav/distances/InnerProductDistance.h
     ${PROJECT_SOURCE_DIR}/flatnav/distances/SquaredL2Distance.h
+    ${PROJECT_SOURCE_DIR}/flatnav/util/SquaredL2SimdExtensions.h
+    ${PROJECT_SOURCE_DIR}/flatnav/util/InnerProductSimdExtensions.h
     ${PROJECT_SOURCE_DIR}/flatnav/util/VisitedSetPool.h
     ${PROJECT_SOURCE_DIR}/flatnav/util/GorderPriorityQueue.h
     ${PROJECT_SOURCE_DIR}/flatnav/util/Reordering.h
-    ${PROJECT_SOURCE_DIR}/flatnav/util/SIMDDistanceSpecializations.h
     ${PROJECT_SOURCE_DIR}/flatnav/util/ParallelConstructs.h
+    ${PROJECT_SOURCE_DIR}/flatnav/util/Macros.h
+    ${PROJECT_SOURCE_DIR}/flatnav/util/Datatype.h
+    ${PROJECT_SOURCE_DIR}/flatnav/util/SimdBaseTypes.h
     ${PROJECT_SOURCE_DIR}/flatnav/DistanceInterface.h
     ${PROJECT_SOURCE_DIR}/flatnav/Index.h
-    ${PROJECT_SOURCE_DIR}/quantization/ProductQuantization.h
+    # ${PROJECT_SOURCE_DIR}/quantization/ProductQuantization.h
     ${PROJECT_SOURCE_DIR}/quantization/CentroidsGenerator.h
     ${PROJECT_SOURCE_DIR}/quantization/Utils.h)
 

diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ Available Options:
   -v, --verbose:                  Make verbose
   -b, --benchmark:                Build benchmarks
   -bt, --build_type:              Build type (Debug, Release, RelWithDebInfo, MinSizeRel)
-  -nmv, --no_manual_vectorization:Disable manual vectorization (SIMD)
+  -nmv, --no_simd_vectorization:Disable SIMD instructions
   -h, --help:                     Print this help message
 
 Example Usage:

diff --git a/bin/build.sh b/bin/build.sh
@@ -6,7 +6,7 @@ cd "$(dirname "$0")/.."
 BUILD_TESTS=OFF
 BUILD_EXAMPLES=OFF 
 BUILD_BENCHMARKS=OFF
-NO_MANUAL_VECTORIZATION=OFF
+NO_SIMD_VECTORIZATION=OFF
 MAKE_VERBOSE=0
 CMAKE_BUILD_TYPE=Release
 
@@ -19,7 +19,7 @@ function print_usage() {
     echo "  -v, --verbose:                  Make verbose"
     echo "  -b, --benchmark:                Build benchmarks"
     echo "  -bt, --build_type:              Build type (Debug, Release, RelWithDebInfo, MinSizeRel)"
-    echo "  -nmv, --no_manual_vectorization:Disable manual vectorization (SIMD)"
+    echo "  -nsv, --no_simd_vectorization:Disable SIMD vectorization"
     echo "  -h, --help:                     Print this help message"
     echo ""
     echo "Example Usage:"
@@ -41,7 +41,7 @@ while [[ "$#" -gt 0 ]]; do
         -e|--examples) BUILD_EXAMPLES=ON; shift ;; 
         -v|--verbose) MAKE_VERBOSE=1; shift ;;
         -b|--benchmark) BUILD_BENCHMARKS=ON; shift ;;
-        -nmv|--no_manual_vectorization) NO_MANUAL_VECTORIZATION=ON; shift ;;
+        -nsv|--NO_SIMD_VECTORIZATION) NO_SIMD_VECTORIZATION=ON; shift ;;
         -bt|--build_type) CMAKE_BUILD_TYPE=$2; shift; shift ;;
         *) print_usage ;;
     esac 
@@ -73,7 +73,7 @@ mkdir -p build
 cd build && cmake \
                 -DCMAKE_C_COMPILER=${CC} \
                 -DCMAKE_CXX_COMPILER=${CXX} \
-                -DNO_MANUAL_VECTORIZATION=${NO_MANUAL_VECTORIZATION} \
+                -DNO_SIMD_VECTORIZATION=${NO_SIMD_VECTORIZATION} \
                 -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
                 -DBUILD_TESTS=${BUILD_TESTS} \
                 -DBUILD_EXAMPLES=${BUILD_EXAMPLES} \

diff --git a/cmake/FindAVX.cmake b/cmake/FindAVX.cmake
@@ -42,7 +42,7 @@ function(check_compiler_and_hardware_support FLAG CODE_VAR EXTENSION_NAME)
       set(CMAKE_CXX_FLAGS
           "${CMAKE_CXX_FLAGS} ${FLAG}"
           PARENT_SCOPE)
-      message(STATUS "Building with ${EXTENSION_NAME}")
+      message(STATUS "Building with ${FLAG}")
     else()
       message(
         STATUS "Compiler supports ${FLAG} flag but the target machine does not "
@@ -54,6 +54,7 @@ endfunction()
 # Build SSE/AVX/AVX512 code only on x86-64 processors.
 if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
   check_compiler_and_hardware_support("-mavx512f" "AVX512_CODE" "AVX512")
+  check_compiler_and_hardware_support("-mavx512bw" "AVX512_CODE" "AVX512")
   check_compiler_and_hardware_support("-mavx" "AVX_CODE" "AVX")
 
   check_cxx_compiler_flag("-msse" CXX_SSE)

diff --git a/experiments/Makefile b/experiments/Makefile
@@ -111,10 +111,9 @@ sift-bench-flatnav:
 		--queries /root/data/sift-128-euclidean/sift-128-euclidean.test.npy \
 		--gtruth /root/data/sift-128-euclidean/sift-128-euclidean.gtruth.npy \
 		--index-type flatnav \
-		--use-hnsw-base-layer \
-		--hnsw-base-layer-filename sift.mtx \
+		--data-type float32 \
 		--num-node-links 32 \
-		--ef-construction 30 40 50 100 200 300 \
+		--ef-construction 100 \
 		--ef-search 100 200 300 500 1000 \
 		--metric l2 \
 		--num-build-threads 16 \

diff --git a/experiments/data_loader.py b/experiments/data_loader.py
@@ -52,13 +52,11 @@ def load_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
             start_index, end_index = self.range
             train_dataset = np.load(self.train_dataset_path)[
                 start_index:end_index
-            ].astype(np.float32, copy=False)
+            ]
         else:
-            train_dataset = np.load(self.train_dataset_path).astype(
-                np.float32, copy=False
-            )
-        queries = np.load(self.queries_path).astype(np.float32, copy=False)
-        ground_truth = np.load(self.ground_truth_path).astype(np.int32, copy=False)
+            train_dataset = np.load(self.train_dataset_path)
+        queries = np.load(self.queries_path)
+        ground_truth = np.load(self.ground_truth_path)
         return train_dataset, queries, ground_truth
 
 
@@ -114,7 +112,7 @@ def _read_bvecs_file(self, filename: str) -> np.ndarray:
             return v.reshape((end - start + 1, dimension + 4))[:, 4:]
 
     def load_data(self) -> Tuple[np.ndarray]:
-        ground_truth = self._read_ivecs_file(self.gtruth_path)
+        ground_truth = self._read_ivecs_file(self.ground_truth_path)
         # Ground truth has shape (10000, 1000) but we only need the first 100 queries
         ground_truth = ground_truth[:, 0:100]
 

diff --git a/experiments/pyproject.toml b/experiments/pyproject.toml
@@ -12,6 +12,7 @@ numpy = "^1.26.1"
 matplotlib = "^3.8.2"
 psutil = "^5.9.8"
 pydantic = "^2.6.4"
+flatnav = {path = "../flatnav_python/dist/flatnav-0.0.1-cp310-cp310-linux_x86_64.whl"}
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/experiments/run-benchmark.py b/experiments/run-benchmark.py
@@ -51,7 +51,7 @@ def compute_metrics(
     :return: Dictionary of metrics.
 
     """
-    is_flatnav_index = type(index) in (flatnav.index.L2Index, flatnav.index.IPIndex)
+    is_flatnav_index = not type(index) == hnswlib.Index
     latencies = []
     top_k_indices = []
     distance_computations = []
@@ -158,6 +158,7 @@ def train_index(
     max_edges_per_node: int,
     ef_construction: int,
     index_type: str = "flatnav",
+    data_type: str = "float32",
     use_hnsw_base_layer: bool = False,
     hnsw_base_layer_filename: Optional[str] = None,
     num_build_threads: int = 1,
@@ -214,6 +215,7 @@ def train_index(
 
         index = flatnav.index.index_factory(
             distance_type=distance_type,
+            index_data_type=data_type,
             dim=dim,
             dataset_size=dataset_size,
             max_edges_per_node=max_edges_per_node,
@@ -232,6 +234,7 @@ def train_index(
     else:
         index = flatnav.index.index_factory(
             distance_type=distance_type,
+            index_data_type=data_type,
             dim=dim,
             dataset_size=dataset_size,
             max_edges_per_node=max_edges_per_node,
@@ -264,13 +267,75 @@ def main(
     dataset_name: str,
     requested_metrics: List[str],
     index_type: str = "flatnav",
+    data_type: str = "float32",
     use_hnsw_base_layer: bool = False,
     hnsw_base_layer_filename: Optional[str] = None,
     reordering_strategies: List[str] | None = None,
     num_initializations: Optional[List[int]] = None,
     num_build_threads: int = 1,
     num_search_threads: int = 1,
 ):
+
+    def build_and_run_knn_search(ef_cons: int, node_links: int):
+        """
+        Build the index and run the KNN search.
+        This part is here to ensure that two indices are not in memory at the same time.
+        With large datasets, we might get an OOM error. 
+        """
+
+        index = train_index(
+            index_type=index_type,
+            data_type=data_type,
+            train_dataset=train_dataset,
+            max_edges_per_node=node_links,
+            ef_construction=ef_cons,
+            dataset_size=dataset_size,
+            dim=dim,
+            distance_type=distance_type,
+            use_hnsw_base_layer=use_hnsw_base_layer,
+            hnsw_base_layer_filename=hnsw_base_layer_filename,
+            num_build_threads=num_build_threads,
+        )
+
+        if reordering_strategies is not None:
+            if index_type != "flatnav":
+                raise ValueError("Reordering only applies to the FlatNav index.")
+            index.reorder(strategies=reordering_strategies)
+
+        index.set_num_threads(num_search_threads)
+        for ef_search in ef_search_params:
+            # Extend metrics with computed metrics
+            metrics.update(
+                compute_metrics(
+                    requested_metrics=requested_metrics,
+                    index=index,
+                    queries=queries,
+                    ground_truth=gtruth,
+                    ef_search=ef_search,
+                )
+            )
+            logging.info(f"Metrics: {metrics}")
+
+            # Add parameters to the metrics dictionary.
+            metrics["distance_type"] = distance_type
+            metrics["ef_search"] = ef_search
+            all_metrics = {experiment_key: []}
+
+            if os.path.exists(metrics_file) and os.path.getsize(metrics_file) > 0:
+                with open(metrics_file, "r") as file:
+                    try:
+                        all_metrics = json.load(file)
+                    except json.JSONDecodeError:
+                        logging.error(f"Error reading {metrics_file=}")
+
+            if experiment_key not in all_metrics:
+                all_metrics[experiment_key] = []
+
+            all_metrics[experiment_key].append(metrics)
+            with open(metrics_file, "w") as file:
+                json.dump(all_metrics, file, indent=4)
+
+
     dataset_size = train_dataset.shape[0]
     dim = train_dataset.shape[1]
 
@@ -284,59 +349,7 @@ def main(
             metrics["ef_construction"] = ef_cons
 
             logging.info(f"Building {index_type=}")
-            index = train_index(
-                index_type=index_type,
-                train_dataset=train_dataset,
-                max_edges_per_node=node_links,
-                ef_construction=ef_cons,
-                dataset_size=dataset_size,
-                dim=dim,
-                distance_type=distance_type,
-                use_hnsw_base_layer=use_hnsw_base_layer,
-                hnsw_base_layer_filename=hnsw_base_layer_filename,
-                num_build_threads=num_build_threads,
-            )
-
-            if reordering_strategies is not None:
-                if type(index) not in (
-                    flatnav.index.L2Index,
-                    flatnav.index.IPIndex,
-                ):
-                    raise ValueError("Reordering only applies to the FlatNav index.")
-                index.reorder(strategies=reordering_strategies)
-
-            index.set_num_threads(num_search_threads)
-            for ef_search in ef_search_params:
-                # Extend metrics with computed metrics
-                metrics.update(
-                    compute_metrics(
-                        requested_metrics=requested_metrics,
-                        index=index,
-                        queries=queries,
-                        ground_truth=gtruth,
-                        ef_search=ef_search,
-                    )
-                )
-                logging.info(f"Metrics: {metrics}")
-
-                # Add parameters to the metrics dictionary.
-                metrics["distance_type"] = distance_type
-                metrics["ef_search"] = ef_search
-                all_metrics = {experiment_key: []}
-
-                if os.path.exists(metrics_file) and os.path.getsize(metrics_file) > 0:
-                    with open(metrics_file, "r") as file:
-                        try:
-                            all_metrics = json.load(file)
-                        except json.JSONDecodeError:
-                            logging.error(f"Error reading {metrics_file=}")
-
-                if experiment_key not in all_metrics:
-                    all_metrics[experiment_key] = []
-
-                all_metrics[experiment_key].append(metrics)
-                with open(metrics_file, "w") as file:
-                    json.dump(all_metrics, file, indent=4)
+            build_and_run_knn_search(ef_cons=ef_cons, node_links=node_links)
 
 
 def parse_arguments() -> argparse.Namespace:
@@ -349,6 +362,12 @@ def parse_arguments() -> argparse.Namespace:
         default="flatnav",
         help="Type of index to benchmark. Options include `flatnav` and `hnsw`.",
     )
+
+    parser.add_argument(
+        "--data-type",
+        default="float32",
+        help="Data type of the index. Options include `float32`, `uint8` and `int8`.",
+    )
 
     parser.add_argument(
         "--use-hnsw-base-layer",
@@ -542,7 +561,7 @@ def run_experiment():
             raise ValueError("HNSW does not support num_initializations.")
 
     metrics_file_path = os.path.join(ROOT_DIR, "metrics", args.metrics_file)
-
+    
     main(
         train_dataset=train_data,
         queries=queries,
@@ -553,6 +572,7 @@ def run_experiment():
         distance_type=args.metric.lower(),
         dataset_name=args.dataset_name,
         index_type=args.index_type.lower(),
+        data_type=args.data_type,
         use_hnsw_base_layer=args.use_hnsw_base_layer,
         hnsw_base_layer_filename=args.hnsw_base_layer_filename,
         reordering_strategies=args.reordering_strategies,

diff --git a/flatnav/DistanceInterface.h b/flatnav/DistanceInterface.h
@@ -2,11 +2,19 @@
 
 #include <cereal/access.hpp>
 #include <cstddef> // for size_t
+#include <flatnav/util/Datatype.h>
 #include <fstream> // for ifstream, ofstream
+#include <functional>
 #include <iostream>
 
 namespace flatnav {
 
+using util::DataType;
+typedef std::function<float(const void *, const void *, const size_t &)>
+    DistanceFunction;
+
+typedef std::unique_ptr<DistanceFunction> DistanceFunctionPtr;
+
 enum class METRIC_TYPE { EUCLIDEAN, INNER_PRODUCT };
 
 // We use the CRTP to implement static polymorphism on the distance. This is
@@ -35,6 +43,10 @@ template <typename T> class DistanceInterface {
   // Prints the parameters of the distance function.
   void getSummary() { static_cast<T *>(this)->getSummaryImpl(); }
 
+  inline constexpr DataType dataType() {
+    return static_cast<T *>(this)->dataTypeImpl();
+  }
+
   // This transforms the data located at src into a form that is writeable
   // to disk / storable in RAM. For distance functions that don't
   // compress the input, this just passses through a copy from src to