diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..a4ceef3 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include src/knncolle/include * diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 40762c9..58d9ae1 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -20,6 +20,7 @@ pybind11_add_module(knncolle_py ) target_include_directories(knncolle_py PRIVATE "${ASSORTHEAD_INCLUDE_DIR}") +target_include_directories(knncolle_py PRIVATE "../src/knncolle/include") set_property(TARGET knncolle_py PROPERTY CXX_STANDARD 17) diff --git a/lib/src/annoy.cpp b/lib/src/annoy.cpp index a54c070..6cede57 100644 --- a/lib/src/annoy.cpp +++ b/lib/src/annoy.cpp @@ -1,29 +1,33 @@ -#include "def.h" +#include "knncolle_py.h" #include "pybind11/pybind11.h" +#include +#include + // Turn off manual vectorization always, to avoid small inconsistencies in // distance calculations across otherwise-compliant machines. #define NO_MANUAL_VECTORIZATION 1 #include "knncolle_annoy/knncolle_annoy.hpp" -BuilderPointer create_annoy_builder(int num_trees, double search_mult, std::string distance) { +uintptr_t create_annoy_builder(int num_trees, double search_mult, std::string distance) { knncolle_annoy::AnnoyOptions opt; opt.num_trees = num_trees; opt.search_mult = search_mult; + auto tmp = std::make_unique(); if (distance == "Manhattan") { - return BuilderPointer(new knncolle_annoy::AnnoyBuilder(opt)); + tmp->ptr.reset(new knncolle_annoy::AnnoyBuilder(opt)); } else if (distance == "Euclidean") { - return BuilderPointer(new knncolle_annoy::AnnoyBuilder(opt)); + tmp->ptr.reset(new knncolle_annoy::AnnoyBuilder(opt)); } else if (distance == "Cosine") { - return BuilderPointer( - new knncolle::L2NormalizedBuilder( + tmp->ptr.reset( + new knncolle::L2NormalizedBuilder( new knncolle_annoy::AnnoyBuilder< Annoy::Euclidean, - knncolle::L2NormalizedMatrix, + knncolle::L2NormalizedMatrix, double >(opt) ) @@ -31,8 +35,9 @@ BuilderPointer create_annoy_builder(int num_trees, double search_mult, std::stri } else { throw std::runtime_error("unknown distance type '" + distance + "'"); - return BuilderPointer(); } + + return reinterpret_cast(static_cast(tmp.release())); } void init_annoy(pybind11::module& m) { diff --git a/lib/src/def.h b/lib/src/def.h deleted file mode 100644 index c759bf2..0000000 --- a/lib/src/def.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef DEF_H -#define DEF_H - -#include -#include - -#include "knncolle/knncolle.hpp" - -typedef knncolle::SimpleMatrix SimpleMatrix; - -typedef knncolle::Builder Builder; - -typedef std::shared_ptr BuilderPointer; - -typedef knncolle::Prebuilt Prebuilt; - -typedef std::shared_ptr PrebuiltPointer; - -#endif diff --git a/lib/src/exhaustive.cpp b/lib/src/exhaustive.cpp index cf770ab..b4656ae 100644 --- a/lib/src/exhaustive.cpp +++ b/lib/src/exhaustive.cpp @@ -1,19 +1,24 @@ -#include "def.h" +#include "knncolle_py.h" #include "pybind11/pybind11.h" -BuilderPointer create_exhaustive_builder(std::string distance) { +#include +#include + +uintptr_t create_exhaustive_builder(std::string distance) { + auto tmp = std::make_unique(); + if (distance == "Manhattan") { - return BuilderPointer(new knncolle::BruteforceBuilder); + tmp->ptr.reset(new knncolle::BruteforceBuilder); } else if (distance == "Euclidean") { - return BuilderPointer(new knncolle::BruteforceBuilder); + tmp->ptr.reset(new knncolle::BruteforceBuilder); } else if (distance == "Cosine") { - return BuilderPointer( + tmp->ptr.reset( new knncolle::L2NormalizedBuilder( new knncolle::BruteforceBuilder< knncolle::EuclideanDistance, - knncolle::L2NormalizedMatrix, + knncolle::L2NormalizedMatrix, double > ) @@ -21,8 +26,9 @@ BuilderPointer create_exhaustive_builder(std::string distance) { } else { throw std::runtime_error("unknown distance type '" + distance + "'"); - return BuilderPointer(); } + + return reinterpret_cast(static_cast(tmp.release())); } void init_exhaustive(pybind11::module& m) { diff --git a/lib/src/generics.cpp b/lib/src/generics.cpp index 8bde097..2e2144b 100644 --- a/lib/src/generics.cpp +++ b/lib/src/generics.cpp @@ -1,4 +1,5 @@ -#include "def.h" +#include "knncolle_py.h" + #include "pybind11/pybind11.h" #include "pybind11/numpy.h" #include "pybind11/stl.h" @@ -6,20 +7,38 @@ #include #include #include +#include +#include +#include typedef pybind11::array_t DataMatrix; -PrebuiltPointer generic_build(const BuilderPointer& builder, const DataMatrix& data) { +void free_builder(uintptr_t builder_ptr) { + delete knncolle_py::cast_builder(builder_ptr); +} + +uintptr_t generic_build(uintptr_t builder_ptr, const DataMatrix& data) { auto buffer = data.request(); uint32_t NR = buffer.shape[0], NC = buffer.shape[1]; - return PrebuiltPointer(builder->build_raw(SimpleMatrix(NR, NC, static_cast(buffer.ptr)))); + + auto builder = knncolle_py::cast_builder(builder_ptr); + auto tmp = std::make_unique(); + tmp->ptr.reset(builder->ptr->build_raw(knncolle_py::SimpleMatrix(NR, NC, static_cast(buffer.ptr)))); + + return reinterpret_cast(static_cast(tmp.release())); } -uint32_t generic_num_obs(const PrebuiltPointer& prebuilt) { +void free_prebuilt(uintptr_t prebuilt_ptr) { + delete knncolle_py::cast_prebuilt(prebuilt_ptr); +} + +uint32_t generic_num_obs(uintptr_t prebuilt_ptr) { + const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr; return prebuilt->num_observations(); } -uint32_t generic_num_dims(const PrebuiltPointer& prebuilt) { +uint32_t generic_num_dims(uintptr_t prebuilt_ptr) { + const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr; return prebuilt->num_dimensions(); } @@ -54,7 +73,7 @@ typedef pybind11::array_t ChosenVector; pybind11::object generic_find_knn( - const PrebuiltPointer& prebuilt, + uintptr_t prebuilt_ptr, const NeighborVector& num_neighbors, bool force_variable_neighbors, std::optional chosen, @@ -63,6 +82,7 @@ pybind11::object generic_find_knn( bool report_index, bool report_distance) { + const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr; uint32_t nobs = prebuilt->num_observations(); // Checking if we have to handle subsets. @@ -206,7 +226,7 @@ pybind11::object generic_find_knn( } pybind11::object generic_query_knn( - const PrebuiltPointer& prebuilt, + uintptr_t prebuilt_ptr, const DataMatrix& query, const NeighborVector& num_neighbors, bool force_variable_neighbors, @@ -215,18 +235,19 @@ pybind11::object generic_query_knn( bool report_index, bool report_distance) { - int nobs = prebuilt->num_observations(); - size_t ndim = prebuilt->num_dimensions(); + const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr; + uint32_t nobs = prebuilt->num_observations(); + uint32_t ndim = prebuilt->num_dimensions(); auto buf_info = query.request(); uint32_t nquery = buf_info.shape[1]; const double* query_ptr = static_cast(buf_info.ptr); - if (static_cast(buf_info.shape[0]) != ndim) { + if (static_cast(buf_info.shape[0]) != ndim) { throw std::runtime_error("mismatch in dimensionality between index and 'query'"); } // Checking that 'k' is valid. - auto sanitize_k = [&](int k) -> int { + auto sanitize_k = [&](uint32_t k) -> int { if (k <= nobs) { return k; } @@ -354,13 +375,14 @@ pybind11::object generic_query_knn( typedef pybind11::array_t ThresholdVector; pybind11::object generic_find_all( - const PrebuiltPointer& prebuilt, + uintptr_t prebuilt_ptr, std::optional chosen, const ThresholdVector& thresholds, int num_threads, bool report_index, bool report_distance) { + const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr; uint32_t nobs = prebuilt->num_observations(); uint32_t num_output = nobs; @@ -438,13 +460,14 @@ pybind11::object generic_find_all( } pybind11::object generic_query_all( - const PrebuiltPointer& prebuilt, + uintptr_t prebuilt_ptr, const DataMatrix& query, const ThresholdVector& thresholds, int num_threads, bool report_index, bool report_distance) { + const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr; size_t ndim = prebuilt->num_dimensions(); auto buf_info = query.request(); @@ -522,7 +545,9 @@ pybind11::object generic_query_all( *********************************/ void init_generics(pybind11::module& m) { + m.def("free_builder", &free_builder); m.def("generic_build", &generic_build); + m.def("free_prebuilt", &free_prebuilt); m.def("generic_num_obs", &generic_num_obs); m.def("generic_num_dims", &generic_num_dims); m.def("generic_find_knn", &generic_find_knn); diff --git a/lib/src/hnsw.cpp b/lib/src/hnsw.cpp index 51492d2..235cb62 100644 --- a/lib/src/hnsw.cpp +++ b/lib/src/hnsw.cpp @@ -1,4 +1,4 @@ -#include "def.h" +#include "knncolle_py.h" #include "pybind11/pybind11.h" // Turn off manual vectorization always, to avoid small inconsistencies in @@ -7,26 +7,27 @@ #include "knncolle_hnsw/knncolle_hnsw.hpp" -BuilderPointer create_hnsw_builder(int nlinks, int ef_construct, int ef_search, std::string distance) { +uintptr_t create_hnsw_builder(int nlinks, int ef_construct, int ef_search, std::string distance) { knncolle_hnsw::HnswOptions opt; opt.num_links = nlinks; opt.ef_construction = ef_construct; opt.ef_search = ef_search; + auto tmp = std::make_unique(); if (distance == "Manhattan") { opt.distance_options.create = [&](int dim) -> hnswlib::SpaceInterface* { return new knncolle_hnsw::ManhattanDistance(dim); }; - return BuilderPointer(new knncolle_hnsw::HnswBuilder(opt)); + tmp->ptr.reset(new knncolle_hnsw::HnswBuilder(opt)); } else if (distance == "Euclidean") { - return BuilderPointer(new knncolle_hnsw::HnswBuilder(opt)); + tmp->ptr.reset(new knncolle_hnsw::HnswBuilder(opt)); } else if (distance == "Cosine") { - return BuilderPointer( - new knncolle::L2NormalizedBuilder( + tmp->ptr.reset( + new knncolle::L2NormalizedBuilder( new knncolle_hnsw::HnswBuilder< - knncolle::L2NormalizedMatrix, + knncolle::L2NormalizedMatrix, double >(opt) ) @@ -34,8 +35,9 @@ BuilderPointer create_hnsw_builder(int nlinks, int ef_construct, int ef_search, } else { throw std::runtime_error("unknown distance type '" + distance + "'"); - return BuilderPointer(); } + + return reinterpret_cast(static_cast(tmp.release())); } void init_hnsw(pybind11::module& m) { diff --git a/lib/src/init.cpp b/lib/src/init.cpp index a500c56..b8e7e1f 100644 --- a/lib/src/init.cpp +++ b/lib/src/init.cpp @@ -1,4 +1,3 @@ -#include "def.h" #include "pybind11/pybind11.h" #include "pybind11/numpy.h" #include "pybind11/stl.h" @@ -17,7 +16,4 @@ PYBIND11_MODULE(lib_knncolle, m) { init_hnsw(m); init_kmknn(m); init_vptree(m); - - pybind11::class_(m, "Builder"); - pybind11::class_(m, "Prebuilt"); } diff --git a/lib/src/kmknn.cpp b/lib/src/kmknn.cpp index ab74ced..66da0bd 100644 --- a/lib/src/kmknn.cpp +++ b/lib/src/kmknn.cpp @@ -1,19 +1,24 @@ -#include "def.h" +#include "knncolle_py.h" #include "pybind11/pybind11.h" -BuilderPointer create_kmknn_builder(std::string distance) { +#include +#include + +uintptr_t create_kmknn_builder(std::string distance) { + auto tmp = std::make_unique(); + if (distance == "Manhattan") { - return BuilderPointer(new knncolle::KmknnBuilder); + tmp->ptr.reset(new knncolle::KmknnBuilder); } else if (distance == "Euclidean") { - return BuilderPointer(new knncolle::KmknnBuilder); + tmp->ptr.reset(new knncolle::KmknnBuilder); } else if (distance == "Cosine") { - return BuilderPointer( - new knncolle::L2NormalizedBuilder( + tmp->ptr.reset( + new knncolle::L2NormalizedBuilder( new knncolle::KmknnBuilder< knncolle::EuclideanDistance, - knncolle::L2NormalizedMatrix, + knncolle::L2NormalizedMatrix, double > ) @@ -21,8 +26,9 @@ BuilderPointer create_kmknn_builder(std::string distance) { } else { throw std::runtime_error("unknown distance type '" + distance + "'"); - return BuilderPointer(); } + + return reinterpret_cast(static_cast(tmp.release())); } void init_kmknn(pybind11::module& m) { diff --git a/lib/src/vptree.cpp b/lib/src/vptree.cpp index a9ed0a8..6ba6504 100644 --- a/lib/src/vptree.cpp +++ b/lib/src/vptree.cpp @@ -1,19 +1,24 @@ -#include "def.h" +#include "knncolle_py.h" #include "pybind11/pybind11.h" -BuilderPointer create_vptree_builder(std::string distance) { +#include +#include + +uintptr_t create_vptree_builder(std::string distance) { + auto tmp = std::make_unique(); + if (distance == "Manhattan") { - return BuilderPointer(new knncolle::VptreeBuilder); + tmp->ptr.reset(new knncolle::VptreeBuilder); } else if (distance == "Euclidean") { - return BuilderPointer(new knncolle::VptreeBuilder); + tmp->ptr.reset(new knncolle::VptreeBuilder); } else if (distance == "Cosine") { - return BuilderPointer( + tmp->ptr.reset( new knncolle::L2NormalizedBuilder( new knncolle::VptreeBuilder< knncolle::EuclideanDistance, - knncolle::L2NormalizedMatrix, + knncolle::L2NormalizedMatrix, double > ) @@ -21,8 +26,9 @@ BuilderPointer create_vptree_builder(std::string distance) { } else { throw std::runtime_error("unknown distance type '" + distance + "'"); - return BuilderPointer(); } + + return reinterpret_cast(static_cast(tmp.release())); } void init_vptree(pybind11::module& m) { diff --git a/src/knncolle/__init__.py b/src/knncolle/__init__.py index 345870b..4744295 100644 --- a/src/knncolle/__init__.py +++ b/src/knncolle/__init__.py @@ -29,3 +29,15 @@ from .query_knn import query_knn from .query_neighbors import query_neighbors from .vptree import VptreeParameters, VptreeIndex + + +def includes() -> str: + """Provides access to ``knncolle_py.h`` C++ header. + + Returns: + Path to a directory containing the header. + """ + import os + import inspect + dirname = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) + return os.path.join(dirname, "include") diff --git a/src/knncolle/annoy.py b/src/knncolle/annoy.py index 924c417..b2731a9 100644 --- a/src/knncolle/annoy.py +++ b/src/knncolle/annoy.py @@ -3,7 +3,7 @@ from . import lib_knncolle as lib from .define_builder import define_builder -from .classes import Index, GenericIndex, Parameters +from .classes import Index, Builder, GenericIndex, Parameters class AnnoyParameters(Parameters): @@ -96,21 +96,16 @@ class AnnoyIndex(GenericIndex): with a :py:class:`~knncolle.annoy.AnnoyParameters` object. """ - def __init__(self, ptr): + def __init__(self, ptr: int): """ Args: ptr: - Shared pointer to a ``knncolle::Prebuilt``, created and wrapped by pybind11. + Address of a ``knncolle_py::WrappedPrebuilt`` containing an + Annoy search index, allocated in C++. """ - self._ptr = ptr - - @property - def ptr(self): - """Pointer to the prebuilt index, :py:meth:`~__init__`.""" - return self._ptr + super().__init__(ptr) @define_builder.register def _define_builder_annoy(x: AnnoyParameters) -> Tuple: - return (lib.create_annoy_builder(x.num_trees, x.search_mult, x.distance), AnnoyIndex) + return (Builder(lib.create_annoy_builder(x.num_trees, x.search_mult, x.distance)), AnnoyIndex) diff --git a/src/knncolle/build_index.py b/src/knncolle/build_index.py index e08fd8b..2c80e62 100644 --- a/src/knncolle/build_index.py +++ b/src/knncolle/build_index.py @@ -29,5 +29,5 @@ def build_index(param: Parameters, x: numpy.ndarray, **kwargs) -> Index: Instance of a :py:class:`~knncolle.classes.Index` subclass. """ builder, cls = define_builder(param) - prebuilt = lib.generic_build(builder, x) + prebuilt = lib.generic_build(builder.ptr, x) return cls(prebuilt) diff --git a/src/knncolle/classes.py b/src/knncolle/classes.py index 0dbc02a..ecbfc08 100644 --- a/src/knncolle/classes.py +++ b/src/knncolle/classes.py @@ -11,6 +11,33 @@ class Parameters(ABC): pass +class Builder: + """ + Pointer to a search index builder, i.e., ``knncolle_py::WrappedBuilder``, + for use in C++ to build new neighbor search indices. The associated memory + is automatically freed upon garbage collection. + """ + + def __init__(self, ptr: int): + """ + Args: + ptr: + Address of a ``knncolle_py::WrappedBuilder``. + """ + self._ptr = ptr + + def __del__(self): + """Frees the builder in C++.""" + lib.free_builder(self._ptr) + + @property + def ptr(self): + """Address of a ``knncolle_py::WrappedBuilder``, to be passed into + C++ as a ``uintptr_t``; see ``knncolle_py.h`` for details.""" + return self._ptr + + + class Index(ABC): """ Abstract base class for a prebuilt nearest neighbor-search index. Each @@ -22,21 +49,39 @@ class Index(ABC): class GenericIndex(Index): """ - Abstract base class for a prebuilt nearest neighbor-search index that is - represented as a ``std::shared_ptr >``. Compatible algorithms should implement their own subclasses. + Abstract base class for a prebuilt nearest neighbor-search index that holds + an address to a ``knncolle_py::WrappedPrebuilt`` instance in C++. The + associated memory is automatically freed upon garbage collection. """ + def __init__(self, ptr: int): + """ + Args: + ptr: + Address of a ``knncolle_py::WrappedPrebuilt``. + """ + self._ptr = ptr + + @property + def ptr(self) -> int: + """Address of a ``knncolle_py::WrappedPrebuilt``, to be passed into + C++ as a ``uintptr_t``; see ``knncolle_py.h`` for details.""" + return self._ptr + + def __del__(self): + """Frees the index in C++.""" + lib.free_prebuilt(self._ptr) + def num_observations(self) -> int: """ Returns: Number of observations in this index. """ - return lib.generic_num_obs(self.ptr) + return lib.generic_num_obs(self._ptr) def num_dimensions(self) -> int: """ Returns: Number of dimensions in this index. """ - return lib.generic_num_dims(self.ptr) + return lib.generic_num_dims(self._ptr) diff --git a/src/knncolle/define_builder.py b/src/knncolle/define_builder.py index f6a902d..64717d4 100644 --- a/src/knncolle/define_builder.py +++ b/src/knncolle/define_builder.py @@ -16,9 +16,7 @@ def define_builder(param: Parameters) -> Tuple: Parameters for a particular search algorithm. Returns: - Tuple where the first element is a shared pointer to a - ``knncolle::Builder, - double>`` instance, and the second element is a - :py:class:`~knncolle.classes.GenericIndex` subclass. + Tuple where the first element is a :py:class:`~knncolle.classes.Builder` + and the second element is a :py:class:`~knncolle.classes.GenericIndex`. """ raise NotImplementedError("no available method for '" + str(type(x)) + "'") diff --git a/src/knncolle/exhaustive.py b/src/knncolle/exhaustive.py index e953721..d961536 100644 --- a/src/knncolle/exhaustive.py +++ b/src/knncolle/exhaustive.py @@ -2,7 +2,7 @@ from typing import Literal, Tuple from . import lib_knncolle as lib -from .classes import Parameters, GenericIndex +from .classes import Parameters, GenericIndex, Builder from .define_builder import define_builder @@ -48,17 +48,12 @@ def __init__(self, ptr): """ Args: ptr: - Shared pointer to a ``knncolle::Prebuilt``, created and wrapped by pybind11. + Address of a ``knncolle_py::WrappedPrebuilt`` containing an + exhaustive search index, allocated in C++. """ - self._ptr = ptr - - @property - def ptr(self): - """Pointer to the prebuilt index, see :py:meth:`~__init__`.""" - return self._ptr + super().__init__(ptr) @define_builder.register def _define_builder_exhaustive(x: ExhaustiveParameters) -> Tuple: - return (lib.create_exhaustive_builder(x.distance), ExhaustiveIndex) + return (Builder(lib.create_exhaustive_builder(x.distance)), ExhaustiveIndex) diff --git a/src/knncolle/hnsw.py b/src/knncolle/hnsw.py index d90697b..c9ed266 100644 --- a/src/knncolle/hnsw.py +++ b/src/knncolle/hnsw.py @@ -2,7 +2,7 @@ from typing import Literal, Optional, Tuple from . import lib_knncolle as lib -from .classes import Parameters, GenericIndex +from .classes import Parameters, GenericIndex, Builder from .define_builder import define_builder @@ -111,7 +111,7 @@ def ef_search(self, ef_search: int): class HnswIndex(GenericIndex): - """A prebuilt index for the hierarchical navigable small worlds (Hnsw) + """A prebuilt index for the hierarchical navigable small worlds (HNSW) algorithm, created by :py:func:`~knncolle.define_builder.define_builder` with a :py:class:`~knncolle.hnsw.HnswParameters` object. """ @@ -120,17 +120,12 @@ def __init__(self, ptr): """ Args: ptr: - Shared pointer to a ``knncolle::Prebuilt``, created and wrapped by pybind11. + Address of a ``knncolle_py::WrappedPrebuilt`` containing a + HNSW search index, allocated in C++. """ - self._ptr = ptr - - @property - def ptr(self): - """Pointer to a prebuilt index, see :py:meth:`~__init__`.""" - return self._ptr + super().__init__(ptr) @define_builder.register def _define_builder_hnsw(x: HnswParameters) -> Tuple: - return (lib.create_hnsw_builder(x.num_links, x.ef_construction, x.ef_search, x.distance), HnswIndex) + return (Builder(lib.create_hnsw_builder(x.num_links, x.ef_construction, x.ef_search, x.distance)), HnswIndex) diff --git a/src/knncolle/include/knncolle_py.h b/src/knncolle/include/knncolle_py.h new file mode 100644 index 0000000..385c6b1 --- /dev/null +++ b/src/knncolle/include/knncolle_py.h @@ -0,0 +1,69 @@ +#ifndef KNNCOLLE_PY_H +#define KNNCOLLE_PY_H + +#include +#include +#include "knncolle/knncolle.hpp" + +namespace knncolle_py { + +/** + * Type of the indices. + */ +typedef uint32_t Index; + +/** + * Type of the distances. + */ +typedef double Distance; + +/** + * Type of the input matrix data. + */ +typedef double MatrixValue; + +/** + * Type for the matrix inputs into the **knncolle** interface. + * Indices are unsigned 32-bit points while values are double-precision. + */ +typedef knncolle::SimpleMatrix SimpleMatrix; + +/** + * @brief Wrapper for the builder factory. + */ +struct WrappedBuilder { + /** + * Pointer to an algorithm-specific `knncolle::Builder`. + */ + std::shared_ptr > ptr; +}; + +/** + * @param ptr Stored pointer to a `WrappedBuilder`. + * @return Pointer to a `WrappedBuilder`. + */ +inline const WrappedBuilder* cast_builder(uintptr_t ptr) { + return static_cast(reinterpret_cast(ptr)); +} + +/** + * @brief Wrapper for a prebuilt search index. + */ +struct WrappedPrebuilt { + /** + * Pointer to a `knncolle::Prebuilt` containing a prebuilt search index. + */ + std::shared_ptr > ptr; +}; + +/** + * @param ptr Stored pointer to a `WrappedPrebuilt`. + * @return Pointer to a `WrappedPrebuilt`. + */ +inline const WrappedPrebuilt* cast_prebuilt(uintptr_t ptr) { + return static_cast(reinterpret_cast(ptr)); +} + +} + +#endif diff --git a/src/knncolle/kmknn.py b/src/knncolle/kmknn.py index 6ab02c1..b7a3d99 100644 --- a/src/knncolle/kmknn.py +++ b/src/knncolle/kmknn.py @@ -2,7 +2,7 @@ from typing import Literal, Tuple from . import lib_knncolle as lib -from .classes import Parameters, GenericIndex +from .classes import Parameters, GenericIndex, Builder from .define_builder import define_builder @@ -48,17 +48,12 @@ def __init__(self, ptr): """ Args: ptr: - Shared pointer to a ``knncolle::Prebuilt``, created and wrapped by pybind11. + Address of a ``knncolle_py::WrappedPrebuilt`` containing a + KMKNN search index, allocated in C++. """ - self._ptr = ptr - - @property - def ptr(self): - """Pointer to a prebuilt index, see :py:meth:`~__init__`.""" - return self._ptr + super().__init__(ptr) @define_builder.register def _define_builder_kmknn(x: KmknnParameters) -> Tuple: - return (lib.create_kmknn_builder(x.distance), KmknnIndex) + return (Builder(lib.create_kmknn_builder(x.distance)), KmknnIndex) diff --git a/src/knncolle/vptree.py b/src/knncolle/vptree.py index 6ce8fc0..b4c14bb 100644 --- a/src/knncolle/vptree.py +++ b/src/knncolle/vptree.py @@ -2,7 +2,7 @@ from typing import Literal, Tuple from . import lib_knncolle as lib -from .classes import Parameters, GenericIndex +from .classes import Parameters, GenericIndex, Builder from .define_builder import define_builder @@ -48,17 +48,12 @@ def __init__(self, ptr): """ Args: ptr: - Shared pointer to a ``knncolle::Prebuilt``, created and wrapped by pybind11. + Address of a ``knncolle_py::WrappedPrebuilt`` containing a + VP tree search index, allocated in C++. """ - self._ptr = ptr - - @property - def ptr(self): - """Pointer to a prebuilt index, see :py:meth:`~__init__`.""" - return self._ptr + super().__init__(ptr) @define_builder.register def _define_builder_vptree(x: VptreeParameters) -> Tuple: - return (lib.create_vptree_builder(x.distance), VptreeIndex) + return (Builder(lib.create_vptree_builder(x.distance)), VptreeIndex) diff --git a/tests/conftest.py b/tests/conftest.py index 223e30e..c2465eb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -49,6 +49,14 @@ def compare_lists(x, y): for i, val in enumerate(x): assert (val == y[i]).all() + @staticmethod + def compare_lists_close(x, y): + import numpy + assert len(x) == len(y) + for i, val in enumerate(x): + assert numpy.isclose(val, y[i]).all() + + @pytest.fixture def helpers(): diff --git a/tests/test_find_knn.py b/tests/test_find_knn.py index 25eb92b..cf7fc08 100644 --- a/tests/test_find_knn.py +++ b/tests/test_find_knn.py @@ -34,20 +34,20 @@ def test_find_knn_basic(): out = knncolle.find_knn(idx, num_neighbors=8) ref_i, ref_d = ref_find_knn(Y, k=8) assert (ref_i == out.index).all() - assert (ref_d == out.distance).all() + assert numpy.isclose(ref_d, out.distance).all() idx = knncolle.build_index(knncolle.VptreeParameters(distance="Manhattan"), Y) out = knncolle.find_knn(idx, num_neighbors=8) ref_i, ref_d = ref_find_knn(Y, k=8, distance="manhattan") assert (ref_i == out.index).all() - assert (ref_d == out.distance).all() + assert numpy.isclose(ref_d, out.distance).all() idx = knncolle.build_index(knncolle.VptreeParameters(distance="Cosine"), Y) out = knncolle.find_knn(idx, num_neighbors=8) normed = Y / numpy.sqrt((Y**2).sum(axis=0)) ref_i, ref_d = ref_find_knn(normed, k=8) assert (ref_i == out.index).all() - assert (ref_d == out.distance).all() + assert numpy.isclose(ref_d, out.distance).all() def test_find_knn_parallel(): diff --git a/tests/test_find_neighbors.py b/tests/test_find_neighbors.py index 9e1bcc4..01e71f0 100644 --- a/tests/test_find_neighbors.py +++ b/tests/test_find_neighbors.py @@ -35,14 +35,14 @@ def test_find_neighbors_basic(helpers): out = knncolle.find_neighbors(idx, threshold=d) ref_i, ref_d = ref_find_all(Y, d) helpers.compare_lists(ref_i, out.index) - helpers.compare_lists(ref_d, out.distance) + helpers.compare_lists_close(ref_d, out.distance) idx = knncolle.build_index(knncolle.VptreeParameters(distance="Manhattan"), Y) d = numpy.median(knncolle.find_distance(idx, num_neighbors=8)) out = knncolle.find_neighbors(idx, threshold=d) ref_i, ref_d = ref_find_all(Y, d, distance="manhattan") helpers.compare_lists(ref_i, out.index) - helpers.compare_lists(ref_d, out.distance) + helpers.compare_lists_close(ref_d, out.distance) idx = knncolle.build_index(knncolle.VptreeParameters(distance="Cosine"), Y) d = numpy.median(knncolle.find_distance(idx, num_neighbors=8)) @@ -50,7 +50,7 @@ def test_find_neighbors_basic(helpers): normed = Y / numpy.sqrt((Y**2).sum(axis=0)) ref_i, ref_d = ref_find_all(normed, d) helpers.compare_lists(ref_i, out.index) - helpers.compare_lists(ref_d, out.distance) + helpers.compare_lists_close(ref_d, out.distance) def test_find_neighbors_parallel(helpers): diff --git a/tests/test_includes.py b/tests/test_includes.py new file mode 100644 index 0000000..7f50b9c --- /dev/null +++ b/tests/test_includes.py @@ -0,0 +1,8 @@ +import knncolle + + +def test_includes(): + import os + path = knncolle.includes() + assert isinstance(path, str) + assert os.path.exists(os.path.join(path, "knncolle_py.h")) diff --git a/tests/test_query_knn.py b/tests/test_query_knn.py index 72b8c9b..622ea9a 100644 --- a/tests/test_query_knn.py +++ b/tests/test_query_knn.py @@ -34,13 +34,13 @@ def test_query_knn_basic(): out = knncolle.query_knn(idx, q, num_neighbors=8) ref_i, ref_d = ref_query_knn(Y, q, k=8) assert (ref_i == out.index).all() - assert (ref_d == out.distance).all() + assert numpy.isclose(ref_d, out.distance).all() idx = knncolle.build_index(knncolle.VptreeParameters(distance="Manhattan"), Y) out = knncolle.query_knn(idx, q, num_neighbors=8) ref_i, ref_d = ref_query_knn(Y, q, k=8, distance="manhattan") assert (ref_i == out.index).all() - assert (ref_d == out.distance).all() + assert numpy.isclose(ref_d, out.distance).all() idx = knncolle.build_index(knncolle.VptreeParameters(distance="Cosine"), Y) out = knncolle.query_knn(idx, q, num_neighbors=8) @@ -48,7 +48,7 @@ def test_query_knn_basic(): qnormed = q / numpy.sqrt((q**2).sum(axis=0)) ref_i, ref_d = ref_query_knn(normed, qnormed, k=8) assert (ref_i == out.index).all() - assert (ref_d == out.distance).all() + assert numpy.isclose(ref_d, out.distance).all() def test_query_knn_parallel(): diff --git a/tests/test_query_neighbors.py b/tests/test_query_neighbors.py index 558f4ad..2260014 100644 --- a/tests/test_query_neighbors.py +++ b/tests/test_query_neighbors.py @@ -35,14 +35,14 @@ def test_query_neighbors_basic(helpers): out = knncolle.query_neighbors(idx, q, threshold=d) ref_i, ref_d = ref_query_all(Y, q, d) helpers.compare_lists(ref_i, out.index) - helpers.compare_lists(ref_d, out.distance) + helpers.compare_lists_close(ref_d, out.distance) idx = knncolle.build_index(knncolle.VptreeParameters(distance="Manhattan"), Y) d = numpy.median(knncolle.query_distance(idx, q, num_neighbors=8)) out = knncolle.query_neighbors(idx, q, threshold=d) ref_i, ref_d = ref_query_all(Y, q, d, distance="manhattan") helpers.compare_lists(ref_i, out.index) - helpers.compare_lists(ref_d, out.distance) + helpers.compare_lists_close(ref_d, out.distance) idx = knncolle.build_index(knncolle.VptreeParameters(distance="Cosine"), Y) d = numpy.median(knncolle.query_distance(idx, q, num_neighbors=8)) @@ -51,7 +51,7 @@ def test_query_neighbors_basic(helpers): qnormed = q / numpy.sqrt((q**2).sum(axis=0)) ref_i, ref_d = ref_query_all(normed, qnormed, d) helpers.compare_lists(ref_i, out.index) - helpers.compare_lists(ref_d, out.distance) + helpers.compare_lists_close(ref_d, out.distance) def test_query_neighbors_parallel(helpers):