-
Notifications
You must be signed in to change notification settings - Fork 202
Add cuvs-bench-elastic: HTTP backend for Elasticsearch GPU vector search #1907
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
336432d
80c104f
7b25521
c224a8c
e499469
087289d
e8f8a64
b787396
94a9199
4e35484
e00c288
1264a98
cd93845
4b0b1f0
ad40caa
1d2a1a9
680c2ab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,10 +12,15 @@ | |
| from typing import Dict, Type, Optional | ||
| from pathlib import Path | ||
| import importlib | ||
| import importlib.metadata | ||
| import yaml | ||
|
|
||
| from .base import BenchmarkBackend | ||
|
|
||
| # Entry point group names for plugin discovery | ||
| _BACKENDS_GROUP = "cuvs_bench.backends" | ||
| _CONFIG_LOADERS_GROUP = "cuvs_bench.config_loaders" | ||
|
|
||
|
|
||
| class BackendRegistry: | ||
| """ | ||
|
|
@@ -375,10 +380,43 @@ def get_backend(name: str, config: Dict) -> BenchmarkBackend: | |
| return registry.get_backend(name, config) | ||
|
|
||
|
|
||
| def _try_load_plugin(name: str) -> None: | ||
| """ | ||
| Try to load backend and config loader from entry points for the given name. | ||
|
|
||
| Plugins register themselves when their entry point is loaded. | ||
| Raises ImportError with install instructions if the plugin requires | ||
| an optional dependency that is not installed. | ||
| """ | ||
| for group in (_BACKENDS_GROUP, _CONFIG_LOADERS_GROUP): | ||
| try: | ||
| eps = importlib.metadata.entry_points(group=group) | ||
| except TypeError: | ||
| eps = importlib.metadata.entry_points().get(group, []) | ||
| if hasattr(eps, "select"): # Python 3.10+ | ||
| eps = list(eps.select(name=name)) | ||
| else: | ||
| eps = [e for e in eps if e.name == name] | ||
| for ep in eps: | ||
| try: | ||
| ep.load()() | ||
| except ImportError as e: | ||
| if "elasticsearch" in str(e).lower() or "elasticsearch" in str(e): | ||
| raise ImportError( | ||
|
afourniernv marked this conversation as resolved.
Outdated
|
||
| f"Elasticsearch backend requires the 'elastic' extra. " | ||
| f"Install with: pip install cuvs-bench[elastic]" | ||
| ) from e | ||
| raise | ||
| return # Plugin loaded successfully | ||
|
Comment on lines
+415
to
+440
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. HIGH: At Line 440, the function returns after the first successful load in Suggested fix-def _try_load_plugin(name: str) -> None:
+def _try_load_plugin(
+ name: str,
+ groups: tuple[str, ...] = (_BACKENDS_GROUP, _CONFIG_LOADERS_GROUP),
+) -> None:
@@
- for group in (_BACKENDS_GROUP, _CONFIG_LOADERS_GROUP):
+ for group in groups:
@@
- return # Plugin loaded successfully
+ return # Plugin loaded successfully
@@
def get_config_loader(name: str) -> Type:
@@
if name not in _CONFIG_LOADER_REGISTRY:
- _try_load_plugin(name)
+ _try_load_plugin(
+ name,
+ groups=(_CONFIG_LOADERS_GROUP, _BACKENDS_GROUP),
+ )As per coding guidelines, integration errors are HIGH-priority review targets. Also applies to: 532-534 🤖 Prompt for AI AgentsSource: Coding guidelines |
||
|
|
||
|
|
||
| def get_backend_class(name: str) -> Type[BenchmarkBackend]: | ||
| """ | ||
| Get the backend class (not instance) from the global registry. | ||
|
|
||
| If the backend is not registered, attempts to load it from entry points | ||
| (e.g., optional plugins like elastic). | ||
|
|
||
| Parameters | ||
| ---------- | ||
| name : str | ||
|
|
@@ -390,10 +428,15 @@ def get_backend_class(name: str) -> Type[BenchmarkBackend]: | |
| Backend class | ||
| """ | ||
| registry = get_registry() | ||
| if name not in registry._backends: | ||
| _try_load_plugin(name) | ||
| if name not in registry._backends: | ||
| available = ", ".join(registry._backends.keys()) | ||
| hint = "" | ||
| if name == "elastic": | ||
| hint = " Install with: pip install cuvs-bench[elastic]" | ||
| raise ValueError( | ||
| f"Backend '{name}' not found. Available backends: {available or '(none)'}" | ||
| f"Backend '{name}' not found. Available backends: {available or '(none)'}.{hint}" | ||
| ) | ||
| return registry._backends[name] | ||
|
|
||
|
|
@@ -440,6 +483,9 @@ def get_config_loader(name: str) -> Type: | |
| """ | ||
| Get a registered config loader class by name. | ||
|
|
||
| If the config loader is not registered, attempts to load it from entry points | ||
| (e.g., optional plugins like elastic). | ||
|
|
||
| Parameters | ||
| ---------- | ||
| name : str | ||
|
|
@@ -455,15 +501,32 @@ def get_config_loader(name: str) -> Type: | |
| ValueError | ||
| If config loader is not registered | ||
| """ | ||
| # _CONFIG_LOADER_REGISTRY is a dictionary that maps backend names to config loader classes | ||
| if name not in _CONFIG_LOADER_REGISTRY: | ||
| _try_load_plugin(name) | ||
| if name not in _CONFIG_LOADER_REGISTRY: | ||
| available = ", ".join(_CONFIG_LOADER_REGISTRY.keys()) or "none" | ||
| hint = "" | ||
| if name == "elastic": | ||
| hint = " Install with: pip install cuvs-bench[elastic]" | ||
| raise ValueError( | ||
| f"Unknown config loader for backend: '{name}'. Available: {available}" | ||
| f"Unknown config loader for backend: '{name}'. Available: {available}.{hint}" | ||
| ) | ||
| return _CONFIG_LOADER_REGISTRY[name] | ||
|
|
||
|
|
||
| def list_config_loaders() -> Dict[str, Type]: | ||
| """Return all registered config loaders.""" | ||
| return dict(_CONFIG_LOADER_REGISTRY) | ||
|
|
||
|
|
||
| def unregister_config_loader(name: str) -> None: | ||
| """ | ||
| Unregister a config loader by name (primarily for testing). | ||
|
|
||
| Parameters | ||
| ---------- | ||
| name : str | ||
| Backend name to unregister | ||
| """ | ||
| if name in _CONFIG_LOADER_REGISTRY: | ||
| del _CONFIG_LOADER_REGISTRY[name] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| # | ||
| # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| """Pytest configuration for cuvs_bench tests.""" | ||
|
|
||
|
|
||
| def pytest_configure(config): | ||
| """Register elastic plugin when elasticsearch is available. | ||
|
|
||
| Ensures elastic tests run when elasticsearch is installed, even if | ||
| cuvs-bench-elastic was not installed via pip (e.g. using PYTHONPATH). | ||
| """ | ||
| try: | ||
| import elasticsearch # noqa: F401 | ||
| except ImportError: | ||
| return | ||
|
|
||
| try: | ||
| from cuvs_bench_elastic import register | ||
| register() | ||
| except ImportError: | ||
| pass | ||
|
Comment on lines
+19
to
+23
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
echo "=== conftest registration block ==="
sed -n '8,24p' python/cuvs_bench/cuvs_bench/tests/conftest.py
echo
echo "=== in-tree elastic register symbol ==="
rg -n "def register\\(" python/cuvs_bench/cuvs_bench/backends/elasticsearch.py -C2 || true
echo
echo "=== legacy cuvs_bench_elastic module paths in repo ==="
fd -HI "cuvs_bench_elastic" python || trueRepository: rapidsai/cuvs Length of output: 809 conftest elastic plugin registration ignores the in-tree backend and silently drops failures
Suggested fix- try:
- from cuvs_bench_elastic import register
- register()
- except ImportError:
- pass
+ try:
+ from cuvs_bench.backends.elasticsearch import register
+ except ImportError:
+ try:
+ from cuvs_bench_elastic import register # legacy fallback
+ except ImportError:
+ return
+ register()🤖 Prompt for AI AgentsSource: Coding guidelines |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| # Integration Tests | ||
|
|
||
| Integration tests run against real services (e.g. Elasticsearch in Docker). | ||
|
|
||
| **Note:** The Elasticsearch integration test is currently disabled. It targets the | ||
| Elasticsearch GPU backend (cuVS-accelerated), which requires an ES GPU image, | ||
| cuVS libs from this repo, and a GPU-enabled runner. See `test_elastic_integration.py` | ||
| module docstring for details. Can be re-enabled when CI has these dependencies. | ||
|
|
||
| ## Requirements | ||
|
|
||
| - **Docker** running locally | ||
| - Optional dependencies: | ||
|
|
||
| ```bash | ||
| pip install cuvs-bench[elastic,integration] | ||
| ``` | ||
|
|
||
| Or install separately: | ||
|
|
||
| ```bash | ||
| pip install cuvs-bench[elastic] | ||
| pip install testcontainers[elasticsearch] | ||
| ``` | ||
|
|
||
| ## Running | ||
|
|
||
| ```bash | ||
| # From repo root | ||
| PYTHONPATH="python/cuvs_bench:python/cuvs_bench_elastic:$PYTHONPATH" \ | ||
| pytest python/cuvs_bench/cuvs_bench/tests/integration/ -v | ||
| ``` | ||
|
|
||
| Integration tests are skipped automatically if `testcontainers` or `elasticsearch` is not installed. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| # | ||
| # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| """Integration tests for cuvs-bench backends. | ||
|
|
||
| These tests require Docker and optional dependencies: | ||
| pip install cuvs-bench[elastic,integration] | ||
| """ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| # | ||
| # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| """Pytest fixtures for integration tests. | ||
|
|
||
| Requires: pip install cuvs-bench[elastic,integration] | ||
| Requires: Docker running locally | ||
| """ | ||
|
|
||
| import pytest | ||
|
|
||
|
|
||
| def _testcontainers_available(): | ||
| try: | ||
| from testcontainers.elasticsearch import ElasticSearchContainer | ||
| return True | ||
| except ImportError: | ||
| return False | ||
|
|
||
|
|
||
| def _elasticsearch_installed(): | ||
| try: | ||
| import elasticsearch # noqa: F401 | ||
| return True | ||
| except ImportError: | ||
| return False | ||
|
|
||
|
|
||
| @pytest.fixture(scope="module") | ||
| def elasticsearch_container(): | ||
|
afourniernv marked this conversation as resolved.
Outdated
|
||
| """Start an Elasticsearch container for the duration of the test module. | ||
|
|
||
| Yields a dict with host, port, and get_url() for connecting. | ||
| Skips the test module if testcontainers or elasticsearch is not installed. | ||
| """ | ||
| if not _testcontainers_available(): | ||
| pytest.skip( | ||
| "Requires testcontainers[elasticsearch]. " | ||
| "Install with: pip install cuvs-bench[integration]" | ||
| ) | ||
| if not _elasticsearch_installed(): | ||
| pytest.skip( | ||
| "Requires elasticsearch. Install with: pip install cuvs-bench[elastic]" | ||
| ) | ||
|
|
||
| from testcontainers.elasticsearch import ElasticSearchContainer | ||
|
|
||
| # Use standard ES OSS image (no GPU in OSS; use use_gpu=False in tests) | ||
| with ElasticSearchContainer( | ||
| image="elasticsearch:8.15.0", | ||
| mem_limit="2g", | ||
| ) as container: | ||
| url = container.get_url() | ||
| # Parse host:port from URL (e.g. http://localhost:32768) | ||
| if url.startswith("http://"): | ||
| rest = url[7:] | ||
| elif url.startswith("https://"): | ||
| rest = url[8:] | ||
| else: | ||
| rest = url | ||
| if "/" in rest: | ||
| host_port = rest.split("/")[0] | ||
| else: | ||
| host_port = rest | ||
| if ":" in host_port: | ||
| host, port_str = host_port.rsplit(":", 1) | ||
| port = int(port_str) | ||
| else: | ||
| host = host_port | ||
| port = 9200 | ||
|
|
||
| yield {"host": host, "port": port, "url": url} | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: rapidsai/cuvs
Length of output: 1500
Fix
elasticextra source-of-truth drift betweendependencies.yamland generatedpyproject.tomldependencies.yamlmapsproject.optional-dependencies.elastic(viapy_elastic_cuvs_bench→bench_elastic) tocuvs-bench-elastic>=26.4.0, butpython/cuvs_bench/pyproject.tomlcurrently declareselastic = ["elasticsearch>=8.0"]—regenerating fromdependencies.yamlwill overwrite the committedpyprojectextra and create install-hint/metadata drift.Suggested fix
bench_elastic: common: - output_types: [conda, pyproject, requirements] packages: - - cuvs-bench-elastic>=26.4.0 + - elasticsearch>=8.0🤖 Prompt for AI Agents
Source: Coding guidelines