diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cd6dd03ab..c3b3ed432 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -28,10 +28,10 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall \ # Don't omit frame pointer in RelWithDebInfo, for additional callchain debug. set(CMAKE_CXX_FLAGS_RELWITHDEBINFO - "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-omit-frame-pointer") + "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O2 -funsafe-math-optimizations -fvect-cost-model=dynamic -fno-omit-frame-pointer") # Release flags. -set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG") +set(CMAKE_CXX_FLAGS_RELEASE "-O2 -funsafe-math-optimizations -fvect-cost-model=dynamic -DNDEBUG") set(CMAKE_SHARED_LIBRARY_PREFIX "") find_package(Threads REQUIRED) @@ -149,5 +149,6 @@ add_subdirectory(algo_module) add_subdirectory(set_property_module) add_subdirectory(leiden_community_detection_module) add_subdirectory(math_module) +add_subdirectory(knn_module) add_cugraph_subdirectory(cugraph_module) diff --git a/cpp/knn_module/CMakeLists.txt b/cpp/knn_module/CMakeLists.txt new file mode 100644 index 000000000..6049ffcd7 --- /dev/null +++ b/cpp/knn_module/CMakeLists.txt @@ -0,0 +1,12 @@ +set(knn_module_src + knn_module.cpp + algorithms/knn.hpp) + +add_query_module(knn 1 "${knn_module_src}") + +# Find OpenMP +find_package(OpenMP REQUIRED) + +# Link external libraries +target_link_libraries(knn PRIVATE mg_utility fmt::fmt OpenMP::OpenMP_CXX) +target_include_directories(knn PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/cpp/knn_module/algorithms/knn.hpp b/cpp/knn_module/algorithms/knn.hpp new file mode 100644 index 000000000..2fba169fa --- /dev/null +++ b/cpp/knn_module/algorithms/knn.hpp @@ -0,0 +1,293 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace knn_util { + +// Configuration for KNN algorithm +struct KNNConfig { + int top_k = 1; + double similarity_cutoff = 0.0; + double delta_threshold = 0.001; + int max_iterations = 100; + int random_seed = 42; // the value is being set from the knn_module.cpp file + double sample_rate = 0.5; + int concurrency = 1; + std::vector node_properties; +}; + +// Result structure for KNN +struct KNNResult { + mgp::Id node1_id; + mgp::Id node2_id; + double similarity; + + // Default constructor for std::vector compatibility + KNNResult() : similarity(0.0) { + // Initialize with default constructed Ids + node1_id = mgp::Id(); + node2_id = mgp::Id(); + } + + KNNResult(const mgp::Node &n1, const mgp::Node &n2, double sim) + : node1_id(n1.Id()), node2_id(n2.Id()), similarity(sim) {} + + KNNResult(mgp::Id id1, mgp::Id id2, double sim) : node1_id(id1), node2_id(id2), similarity(sim) {} +}; + +} // namespace knn_util + +namespace knn_algs { + +inline double CosineSimilarity(const std::vector &vec1, const std::vector &vec2, const double norm1, + const double norm2) { + const double dot = + std::transform_reduce(vec1.begin(), vec1.end(), vec2.begin(), 0.0, std::plus<>(), std::multiplies<>()); + + const double denom = norm1 * norm2; + if (denom < 1e-9) return 0.0; + return dot / denom; +} + +// Structure to hold pre-loaded node data for efficient comparison +struct NodeData { + mgp::Id node_id; + std::vector> property_values; // One vector per property + std::vector norms; // Norms for each property + + NodeData(const mgp::Node &n, const std::vector> &prop_values) + : node_id(n.Id()), property_values(prop_values) {} +}; + +// Pre-load node properties into memory for efficient comparison +std::vector PreloadNodeData(const std::vector &nodes, const knn_util::KNNConfig &config) { + std::vector node_data; + node_data.reserve(nodes.size()); + + for (const auto &node : nodes) { + // Collect all property values first + std::vector> property_values(config.node_properties.size()); + + // Load all properties into temporary vectors + for (size_t prop_idx = 0; prop_idx < config.node_properties.size(); ++prop_idx) { + const std::string &prop_name = config.node_properties[prop_idx]; + mgp::Value prop_value = node.GetProperty(prop_name); + std::vector values; + + if (!prop_value.IsList()) { + throw mgp::ValueException( + fmt::format("Property {} must be a list of doubles for similarity calculation", prop_name)); + } + + const auto &list = prop_value.ValueList(); + const auto size = list.Size(); + values.reserve(size); + + for (size_t i = 0; i < size; ++i) { + if (!list[i].IsDouble()) { + throw mgp::ValueException( + fmt::format("Property {} must be a list of doubles for similarity calculation", prop_name)); + } + values.push_back(list[i].ValueDouble()); + } + + if (values.empty()) { + throw mgp::ValueException(fmt::format("Invalid property values: empty lists for property {}", prop_name)); + } + + property_values[prop_idx] = values; + } + + // Create node_info at the end with the final property_values + node_data.emplace_back(node, std::move(property_values)); + } + + // Validate vector sizes + if (node_data.size() > 1) { + // Validate that all property vectors have the same size + for (size_t prop_idx = 0; prop_idx < node_data[0].property_values.size(); ++prop_idx) { + size_t expected_size = node_data[0].property_values[prop_idx].size(); + for (size_t i = 1; i < node_data.size(); ++i) { + if (node_data[i].property_values[prop_idx].size() != expected_size) { + throw mgp::ValueException("Property vectors must have the same size for similarity calculation"); + } + } + } + } + + return node_data; +} + +void PreloadNorms(std::vector &node_data, const knn_util::KNNConfig &config) { +#pragma omp parallel for + for (size_t ni = 0; ni < node_data.size(); ++ni) { + auto &node = node_data[ni]; + + // Calculate norms for each property vector + node.norms.resize(node.property_values.size(), 0.0); + for (size_t i = 0; i < node.property_values.size(); ++i) { + const auto &v = node.property_values[i]; + node.norms[i] = std::sqrt(std::inner_product(v.begin(), v.end(), v.begin(), 0.0)); + } + } +} + +// Calculate similarity between pre-loaded node data +double CalculateNodeSimilarity(const NodeData &node1_data, const NodeData &node2_data, + const knn_util::KNNConfig &config) { + double total_similarity = 0.0; + const size_t num_properties = node1_data.property_values.size(); + + for (size_t prop_idx = 0; prop_idx < num_properties; ++prop_idx) { + const auto &values1 = node1_data.property_values[prop_idx]; + const auto &values2 = node2_data.property_values[prop_idx]; + + // Use cosine similarity for each property + double property_similarity = + CosineSimilarity(values1, values2, node1_data.norms[prop_idx], node2_data.norms[prop_idx]); + + total_similarity += property_similarity; + } + + // Return the mean of all property similarities + return total_similarity / num_properties; +} + +// Get candidate indices for comparison, excluding self +std::vector GetCandidateIndices(const size_t node_idx, std::vector &all_indices, + const knn_util::KNNConfig &config) { + // Safe: std::mt19937 is used for reproducible simulations, not cryptography + std::mt19937 rng(config.random_seed); // NOSONAR + std::shuffle(all_indices.begin(), all_indices.end(), rng); // NOSONAR + + const size_t sample_size = static_cast(all_indices.size() * config.sample_rate); + + std::vector comparison_indices; + comparison_indices.reserve(sample_size); + for (size_t i = 0; i < sample_size; ++i) { + if (all_indices[i] != node_idx) { + comparison_indices.push_back(all_indices[i]); + } + } + + return comparison_indices; +} + +// Calculate similarity for one node against all candidates (parallel implementation) +std::vector CalculateSimilarityForNode(const size_t node_idx, + const std::vector &node_data, + const std::vector &comparison_indices, + const knn_util::KNNConfig &config) { + const auto &node1_data = node_data[node_idx]; + const auto num_of_similarities = comparison_indices.size(); + + // Pre-allocate results vector + std::vector results; + results.reserve(num_of_similarities); + + // Pre-allocate parallel results vector + std::vector parallel_results(num_of_similarities); + + // Set OpenMP parameters + omp_set_dynamic(0); + omp_set_num_threads(config.concurrency); + + // Parallel similarity calculation using OpenMP +#pragma omp parallel for + for (size_t i = 0; i < num_of_similarities; ++i) { + const size_t idx = comparison_indices[i]; + const auto &node2_data = node_data[idx]; + + // Calculate similarity directly + const double similarity = CalculateNodeSimilarity(node1_data, node2_data, config); + + // Store result + parallel_results[i] = knn_util::KNNResult(node1_data.node_id, node2_data.node_id, similarity); + } + + // Filter results based on similarity cutoff and add to final results + for (const auto &result : parallel_results) { + if (result.similarity >= config.similarity_cutoff) { + results.push_back(result); + } + } + + const size_t k = std::min(results.size(), static_cast(config.top_k)); + auto cmp = [](const knn_util::KNNResult &a, const knn_util::KNNResult &b) { + return a.similarity > b.similarity; // descending + }; + + if (k > 0 && results.size() > k) { + std::nth_element(results.begin(), results.begin() + k, results.end(), cmp); + results.resize(k); + } + std::sort(results.begin(), results.end(), cmp); + + return results; +} + +// Insert top-k results into final results +void InsertTopKResults(const std::vector &top_k_results, const mgp::Graph &graph, + std::vector> &final_results) { + // Convert to final results with actual nodes (results are already sorted) + for (const auto &result : top_k_results) { + const auto node1 = graph.GetNodeById(result.node1_id); + const auto node2 = graph.GetNodeById(result.node2_id); + final_results.emplace_back(node1, node2, result.similarity); + } +} + +// Main KNN algorithm implementation +std::vector> CalculateKNN(const mgp::Graph &graph, + const knn_util::KNNConfig &config) { + std::vector> results; + + // we can't reserve here because it's an iterator + std::vector nodes; + + // Collect all nodes + for (const auto &node : graph.Nodes()) { + nodes.push_back(node); + } + + if (nodes.size() < 2) { + // Need at least 2 nodes for similarity + return results; + } + + // Pre-load node properties into memory for efficient comparison + std::vector node_data = PreloadNodeData(nodes, config); + PreloadNorms(node_data, config); + + const auto num_nodes = nodes.size(); + + std::vector all_indices; + all_indices.reserve(num_nodes); + for (size_t i = 0; i < num_nodes; ++i) { + all_indices.push_back(i); + } + + // For each node, find its top-k most similar nodes + for (size_t i = 0; i < num_nodes; ++i) { + // Get candidate indices for comparison + const std::vector comparison_indices = GetCandidateIndices(i, all_indices, config); + + // 2. Calculate similarity for one node + const std::vector top_k_results = + CalculateSimilarityForNode(i, node_data, comparison_indices, config); + + // 3. Insert sorted top-k results + InsertTopKResults(top_k_results, graph, results); + } + + return results; +} + +} // namespace knn_algs diff --git a/cpp/knn_module/knn_module.cpp b/cpp/knn_module/knn_module.cpp new file mode 100644 index 000000000..3b2d516f8 --- /dev/null +++ b/cpp/knn_module/knn_module.cpp @@ -0,0 +1,206 @@ +#include +#include +#include +#include +#include + +#include "algorithms/knn.hpp" + +// Procedure names +constexpr std::string_view kProcedureGet = "get"; + +// Argument names +constexpr std::string_view kArgumentConfig = "config"; +constexpr std::string_view kConfigNodeProperties = "nodeProperties"; +constexpr std::string_view kConfigTopK = "topK"; +constexpr std::string_view kConfigSimilarityCutoff = "similarityCutoff"; +constexpr std::string_view kConfigDeltaThreshold = "deltaThreshold"; +constexpr std::string_view kConfigMaxIterations = "maxIterations"; +constexpr std::string_view kConfigRandomSeed = "randomSeed"; +constexpr std::string_view kConfigSampleRate = "sampleRate"; +constexpr std::string_view kConfigConcurrency = "concurrency"; + +// Return field names +constexpr std::string_view kFieldNode = "node"; +constexpr std::string_view kFieldNeighbour = "neighbour"; +constexpr std::string_view kFieldSimilarity = "similarity"; + +// Default parameter values +constexpr int kDefaultTopK = 1; +constexpr double kDefaultSimilarityCutoff = 0.0; +constexpr double kDefaultDeltaThreshold = 0.001; +constexpr int kDefaultMaxIterations = 100; +constexpr int kDefaultConcurrency = 1; +constexpr double kDefaultSampleRate = 0.5; + +// Helper function to validate parameter ranges +void ValidateParameterRanges(const knn_util::KNNConfig &config) { + // Validate range [0, 1] parameters + if (config.sample_rate < 0.0 || config.sample_rate > 1.0) { + throw mgp::ValueException(fmt::format("sampleRate must be between 0 and 1, got {}", config.sample_rate)); + } + + if (config.delta_threshold < 0.0 || config.delta_threshold > 1.0) { + throw mgp::ValueException(fmt::format("deltaThreshold must be between 0 and 1, got {}", config.delta_threshold)); + } + + if (config.similarity_cutoff < 0.0 || config.similarity_cutoff > 1.0) { + throw mgp::ValueException( + fmt::format("similarityCutoff must be between 0 and 1, got {}", config.similarity_cutoff)); + } + + // Validate positive integer parameters + if (config.top_k <= 0) { + throw mgp::ValueException(fmt::format("topK must be a positive integer, got {}", config.top_k)); + } + + if (config.concurrency <= 0) { + throw mgp::ValueException(fmt::format("concurrency must be a positive integer, got {}", config.concurrency)); + } + + if (config.max_iterations <= 0) { + throw mgp::ValueException(fmt::format("maxIterations must be a positive integer, got {}", config.max_iterations)); + } + + // randomSeed can be negative, so we only check it's not zero + if (config.random_seed == 0) { + throw mgp::ValueException("randomSeed cannot be 0"); + } +} + +// Helper function to parse nodeProperties configuration +std::vector ParseNodeProperties(const mgp::Value &node_props_value) { + std::vector properties; + + if (node_props_value.IsString()) { + // Single property name + const std::string prop_name = std::string(node_props_value.ValueString()); + if (prop_name.empty()) { + throw mgp::ValueException("Property name cannot be empty"); + } + properties.push_back(prop_name); + } else if (node_props_value.IsList()) { + // List of property names + mgp::List prop_list = node_props_value.ValueList(); + if (prop_list.Size() == 0) { + throw mgp::ValueException("Property list cannot be empty"); + } + + for (size_t i = 0; i < prop_list.Size(); ++i) { + if (prop_list[i].IsString()) { + const std::string prop_name = std::string(prop_list[i].ValueString()); + if (prop_name.empty()) { + throw mgp::ValueException(fmt::format("Property name at index {} cannot be empty", i)); + } + properties.push_back(prop_name); + } else { + throw mgp::ValueException(fmt::format("Property list element at index {} must be a string", i)); + } + } + } else { + throw mgp::ValueException( + "nodeProperties must be a string or list of strings defining properties to be used for similarity calculation. " + "Each property must be a list of numbers."); + } + + if (properties.empty()) { + throw mgp::ValueException("No valid properties found in nodeProperties configuration"); + } + + return properties; +} + +// Helper function to insert results into record factory +void InsertResults(const std::vector> &results, + const mgp::RecordFactory &record_factory) { + for (const auto &result : results) { + auto new_record = record_factory.NewRecord(); + new_record.Insert(kFieldNode.data(), std::get<0>(result)); + new_record.Insert(kFieldNeighbour.data(), std::get<1>(result)); + new_record.Insert(kFieldSimilarity.data(), std::get<2>(result)); + } +} + +// Get procedure - returns similarity pairs +void Get(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { + mgp::MemoryDispatcherGuard guard{memory}; + const auto record_factory = mgp::RecordFactory(result); + const auto &arguments = mgp::List(args); + const auto &config_map = arguments[0].ValueMap(); + + try { + knn_util::KNNConfig config; + + // Parse node properties - required parameter + if (!config_map.KeyExists(kConfigNodeProperties)) { + throw mgp::ValueException("Required parameter 'nodeProperties' is missing from config"); + } + + config.node_properties = ParseNodeProperties(config_map[kConfigNodeProperties]); + + // Parse other parameters with defaults + config.top_k = + config_map.KeyExists(kConfigTopK) ? static_cast(config_map[kConfigTopK].ValueInt()) : kDefaultTopK; + config.similarity_cutoff = config_map.KeyExists(kConfigSimilarityCutoff) + ? config_map[kConfigSimilarityCutoff].ValueDouble() + : kDefaultSimilarityCutoff; + config.delta_threshold = config_map.KeyExists(kConfigDeltaThreshold) + ? config_map[kConfigDeltaThreshold].ValueDouble() + : kDefaultDeltaThreshold; + config.max_iterations = config_map.KeyExists(kConfigMaxIterations) + ? static_cast(config_map[kConfigMaxIterations].ValueInt()) + : kDefaultMaxIterations; + // Parse concurrency first (needed for validation) + config.concurrency = config_map.KeyExists(kConfigConcurrency) + ? static_cast(config_map[kConfigConcurrency].ValueInt()) + : kDefaultConcurrency; + + // Parse random seed with validation + if (config_map.KeyExists(kConfigRandomSeed)) { + if (!config_map[kConfigRandomSeed].IsInt()) { + throw mgp::ValueException("randomSeed must be an integer"); + } + config.random_seed = static_cast(config_map[kConfigRandomSeed].ValueInt()); + } else { + // Generate completely random seed + std::random_device rd; + config.random_seed = static_cast(rd()); + } + + config.sample_rate = + config_map.KeyExists(kConfigSampleRate) ? config_map[kConfigSampleRate].ValueDouble() : kDefaultSampleRate; + + // Validate all parameter ranges + ValidateParameterRanges(config); + + const auto results = knn_algs::CalculateKNN(mgp::Graph(memgraph_graph), config); + InsertResults(results, record_factory); + } catch (const mgp::ValueException &e) { + record_factory.SetErrorMessage(e.what()); + } catch (const std::exception &e) { + record_factory.SetErrorMessage(fmt::format("Unexpected error: {}", e.what())); + } +} + +extern "C" int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) { + try { + mgp::MemoryDispatcherGuard guard{memory}; + + // Return types for get procedure + std::vector returns = {mgp::Return(kFieldNode, mgp::Type::Node), + mgp::Return(kFieldNeighbour, mgp::Type::Node), + mgp::Return(kFieldSimilarity, mgp::Type::Double)}; + + // Single config parameter + std::vector parameters = {mgp::Parameter(kArgumentConfig, mgp::Type::Map)}; + + // Add the single get procedure + mgp::AddProcedure(Get, kProcedureGet, mgp::ProcedureType::Read, parameters, returns, module, memory); + + } catch (const std::exception &e) { + return 1; + } + return 0; +} + +extern "C" int mgp_shutdown_module() { return 0; } diff --git a/e2e/knn_test/test_knn_cosine_distance copy/input.cyp b/e2e/knn_test/test_knn_cosine_distance copy/input.cyp new file mode 100644 index 000000000..4989fee19 --- /dev/null +++ b/e2e/knn_test/test_knn_cosine_distance copy/input.cyp @@ -0,0 +1,2 @@ +CREATE (:Node {id:1, embedding: [1.0, 0.0, 0.5]}); +CREATE (:Node {id:2, embedding: [1.0, 0.0, -0.5]}); diff --git a/e2e/knn_test/test_knn_cosine_distance copy/test.yml b/e2e/knn_test/test_knn_cosine_distance copy/test.yml new file mode 100644 index 000000000..d9db80e5c --- /dev/null +++ b/e2e/knn_test/test_knn_cosine_distance copy/test.yml @@ -0,0 +1,11 @@ +query: > + CALL knn.get({nodeProperties: ["embedding"], sampleRate: 1.0}) YIELD node, neighbour, similarity + RETURN node.id as node_id, neighbour.id as neighbour_id, similarity + +output: + - node_id: 1 + neighbour_id: 2 + similarity: 0.6 + - node_id: 2 + neighbour_id: 1 + similarity: 0.6 diff --git a/e2e/knn_test/test_knn_empty/input.cyp b/e2e/knn_test/test_knn_empty/input.cyp new file mode 100644 index 000000000..e69de29bb diff --git a/e2e/knn_test/test_knn_empty/test.yml b/e2e/knn_test/test_knn_empty/test.yml new file mode 100644 index 000000000..8276187b8 --- /dev/null +++ b/e2e/knn_test/test_knn_empty/test.yml @@ -0,0 +1,5 @@ +query: > + CALL knn.get({nodeProperties: ["embedding"]}) YIELD node, neighbour, similarity + RETURN node.id as node_id, neighbour.id as neighbour_id, similarity + +output: [] diff --git a/e2e/knn_test/test_knn_error_int_passed/input.cyp b/e2e/knn_test/test_knn_error_int_passed/input.cyp new file mode 100644 index 000000000..dc41b7a15 --- /dev/null +++ b/e2e/knn_test/test_knn_error_int_passed/input.cyp @@ -0,0 +1,2 @@ +CREATE (:Node {id:1, embedding: [1, 0.0, 0.5]}); +CREATE (:Node {id:2, embedding: [1, 0.0, 0.5]}); diff --git a/e2e/knn_test/test_knn_error_int_passed/test.yml b/e2e/knn_test/test_knn_error_int_passed/test.yml new file mode 100644 index 000000000..0af204ff5 --- /dev/null +++ b/e2e/knn_test/test_knn_error_int_passed/test.yml @@ -0,0 +1,5 @@ +query: > + CALL knn.get({nodeProperties: ["embedding"]}) YIELD node, neighbour, similarity + RETURN node.id as node_id, neighbour.id as neighbour_id, similarity + +exception: "Property 'embedding' must be a list of doubles for similarity calculation" \ No newline at end of file diff --git a/e2e/knn_test/test_knn_neighbours/input.cyp b/e2e/knn_test/test_knn_neighbours/input.cyp new file mode 100644 index 000000000..e098cbdd8 --- /dev/null +++ b/e2e/knn_test/test_knn_neighbours/input.cyp @@ -0,0 +1,6 @@ +CREATE (:Node {id:1, embedding: [1.0, 0.0, 0.5]}); +CREATE (:Node {id:2, embedding: [1.0, 0.0, 0.5]}); +CREATE (:Node {id:3, embedding: [1.0, 0.0, 0.5]}); +CREATE (:Node {id:4, embedding: [1.0, 0.0, -0.5]}); +CREATE (:Node {id:5, embedding: [1.0, 0.0, -0.5]}); +CREATE (:Node {id:6, embedding: [1.0, 0.0, -0.5]}); diff --git a/e2e/knn_test/test_knn_neighbours/test.yml b/e2e/knn_test/test_knn_neighbours/test.yml new file mode 100644 index 000000000..689574861 --- /dev/null +++ b/e2e/knn_test/test_knn_neighbours/test.yml @@ -0,0 +1,41 @@ +query: > + CALL knn.get({nodeProperties: ["embedding"], sampleRate: 1.0, topK: 2}) YIELD node, neighbour, similarity + RETURN node.id as node_id, neighbour.id as neighbour_id, similarity ORDER BY node_id, similarity DESC, neighbour_id + +output: + - node_id: 1 + neighbour_id: 2 + similarity: 1 + - node_id: 1 + neighbour_id: 3 + similarity: 1 + - node_id: 2 + neighbour_id: 1 + similarity: 1 + - node_id: 2 + neighbour_id: 3 + similarity: 1 + - node_id: 3 + neighbour_id: 1 + similarity: 1 + - node_id: 3 + neighbour_id: 2 + similarity: 1 + - node_id: 4 + neighbour_id: 5 + similarity: 1 + - node_id: 4 + neighbour_id: 6 + similarity: 1 + - node_id: 5 + neighbour_id: 4 + similarity: 1 + - node_id: 5 + neighbour_id: 6 + similarity: 1 + - node_id: 6 + neighbour_id: 4 + similarity: 1 + - node_id: 6 + neighbour_id: 5 + similarity: 1 \ No newline at end of file diff --git a/e2e/knn_test/test_knn_simple/input.cyp b/e2e/knn_test/test_knn_simple/input.cyp new file mode 100644 index 000000000..884801de0 --- /dev/null +++ b/e2e/knn_test/test_knn_simple/input.cyp @@ -0,0 +1,2 @@ +CREATE (:Node {id:1, embedding: [1.0, 0.0, 0.5]}); +CREATE (:Node {id:2, embedding: [1.0, 0.0, 0.5]}); diff --git a/e2e/knn_test/test_knn_simple/test.yml b/e2e/knn_test/test_knn_simple/test.yml new file mode 100644 index 000000000..52d1bd6bb --- /dev/null +++ b/e2e/knn_test/test_knn_simple/test.yml @@ -0,0 +1,11 @@ +query: > + CALL knn.get({nodeProperties: ["embedding"], sampleRate: 1.0}) YIELD node, neighbour, similarity + RETURN node.id as node_id, neighbour.id as neighbour_id, similarity + +output: + - node_id: 1 + neighbour_id: 2 + similarity: 1.0 + - node_id: 2 + neighbour_id: 1 + similarity: 1.0