From 651fe998c0422d72fd28300eac1ed2fbe60aa100 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Fri, 7 Nov 2025 19:05:25 +0000 Subject: [PATCH 01/12] Add OpenSearch store support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement OpenSearchStore to support Amazon OpenSearch and open-source OpenSearch deployments. This addresses the incompatibility with the Elasticsearch client which rejects OpenSearch servers. Changes: - Add OpenSearchStore with async/sync variants - Implement opensearch-py client integration - Add sanitization strategies for keys and collections - Include comprehensive test suite - Support connection via URL or pre-configured client - Add optional dependency: opensearch-py[async]>=2.0.0 The implementation mirrors the Elasticsearch store architecture but uses the opensearch-py client library to avoid product detection errors. Fixes #209 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: William Easton --- key-value/key-value-aio/pyproject.toml | 3 +- .../aio/stores/opensearch/__init__.py | 3 + .../key_value/aio/stores/opensearch/store.py | 567 ++++++++++++++++++ .../key_value/aio/stores/opensearch/utils.py | 133 ++++ .../tests/stores/opensearch/__init__.py | 1 + .../stores/opensearch/test_opensearch.py | 169 ++++++ .../code_gen/stores/opensearch/__init__.py | 6 + .../sync/code_gen/stores/opensearch/store.py | 486 +++++++++++++++ .../sync/code_gen/stores/opensearch/utils.py | 133 ++++ .../sync/stores/opensearch/__init__.py | 6 + .../code_gen/stores/opensearch/__init__.py | 4 + .../stores/opensearch/test_opensearch.py | 160 +++++ uv.lock | 39 +- 13 files changed, 1706 insertions(+), 4 deletions(-) create mode 100644 key-value/key-value-aio/src/key_value/aio/stores/opensearch/__init__.py create mode 100644 key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py create mode 100644 key-value/key-value-aio/src/key_value/aio/stores/opensearch/utils.py create mode 100644 key-value/key-value-aio/tests/stores/opensearch/__init__.py create mode 100644 key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py create mode 100644 key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/__init__.py create mode 100644 key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py create mode 100644 key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/utils.py create mode 100644 key-value/key-value-sync/src/key_value/sync/stores/opensearch/__init__.py create mode 100644 key-value/key-value-sync/tests/code_gen/stores/opensearch/__init__.py create mode 100644 key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py diff --git a/key-value/key-value-aio/pyproject.toml b/key-value/key-value-aio/pyproject.toml index 43aebd42..b19d10f7 100644 --- a/key-value/key-value-aio/pyproject.toml +++ b/key-value/key-value-aio/pyproject.toml @@ -40,6 +40,7 @@ valkey = ["valkey-glide>=2.1.0"] vault = ["hvac>=2.3.0", "types-hvac>=2.3.0"] memcached = ["aiomcache>=0.8.0"] elasticsearch = ["elasticsearch>=8.0.0", "aiohttp>=3.12"] +opensearch = ["opensearch-py[async]>=2.0.0"] dynamodb = ["aioboto3>=13.3.0", "types-aiobotocore-dynamodb>=2.16.0"] keyring = ["keyring>=25.6.0"] keyring-linux = ["keyring>=25.6.0", "dbus-python>=1.4.0"] @@ -67,7 +68,7 @@ env_files = [".env"] [dependency-groups] dev = [ - "py-key-value-aio[memory,disk,redis,elasticsearch,memcached,mongodb,vault,dynamodb,rocksdb]", + "py-key-value-aio[memory,disk,redis,elasticsearch,opensearch,memcached,mongodb,vault,dynamodb,rocksdb]", "py-key-value-aio[valkey]; platform_system != 'Windows'", "py-key-value-aio[keyring]", "py-key-value-aio[pydantic]", diff --git a/key-value/key-value-aio/src/key_value/aio/stores/opensearch/__init__.py b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/__init__.py new file mode 100644 index 00000000..d7f02bcc --- /dev/null +++ b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/__init__.py @@ -0,0 +1,3 @@ +from key_value.aio.stores.opensearch.store import OpenSearchStore + +__all__ = ["OpenSearchStore"] diff --git a/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py new file mode 100644 index 00000000..027ca87d --- /dev/null +++ b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py @@ -0,0 +1,567 @@ +import contextlib +import logging +from collections.abc import Sequence +from datetime import datetime +from typing import Any, overload + +from key_value.shared.errors import DeserializationError, SerializationError +from key_value.shared.utils.managed_entry import ManagedEntry +from key_value.shared.utils.sanitization import ( + AlwaysHashStrategy, + HashFragmentMode, + HybridSanitizationStrategy, + SanitizationStrategy, +) +from key_value.shared.utils.sanitize import ( + ALPHANUMERIC_CHARACTERS, + LOWERCASE_ALPHABET, + NUMBERS, + UPPERCASE_ALPHABET, +) +from key_value.shared.utils.serialization import SerializationAdapter +from key_value.shared.utils.time_to_live import now_as_epoch +from typing_extensions import override + +from key_value.aio.stores.base import ( + BaseContextManagerStore, + BaseCullStore, + BaseDestroyCollectionStore, + BaseEnumerateCollectionsStore, + BaseEnumerateKeysStore, + BaseStore, +) +from key_value.aio.stores.opensearch.utils import LessCapableJsonSerializer, new_bulk_action + +try: + from opensearchpy import AsyncOpenSearch + from opensearchpy.exceptions import RequestError + + from key_value.aio.stores.opensearch.utils import ( + get_aggregations_from_body, + get_body_from_response, + get_first_value_from_field_in_hit, + get_hits_from_response, + get_source_from_body, + ) +except ImportError as e: + msg = "OpenSearchStore requires py-key-value-aio[opensearch]" + raise ImportError(msg) from e + + +logger = logging.getLogger(__name__) + +DEFAULT_INDEX_PREFIX = "opensearch_kv_store" + +DEFAULT_MAPPING = { + "properties": { + "created_at": { + "type": "date", + }, + "expires_at": { + "type": "date", + }, + "collection": { + "type": "keyword", + }, + "key": { + "type": "keyword", + }, + "value": { + "properties": { + "flattened": { + "type": "flattened", + }, + }, + }, + }, +} + +DEFAULT_PAGE_SIZE = 10000 +PAGE_LIMIT = 10000 + +MAX_KEY_LENGTH = 256 +ALLOWED_KEY_CHARACTERS: str = ALPHANUMERIC_CHARACTERS + +MAX_INDEX_LENGTH = 200 +ALLOWED_INDEX_CHARACTERS: str = LOWERCASE_ALPHABET + NUMBERS + "_" + "-" + "." + + +class OpenSearchSerializationAdapter(SerializationAdapter): + """Adapter for OpenSearch.""" + + def __init__(self) -> None: + """Initialize the OpenSearch adapter""" + super().__init__() + + self._date_format = "isoformat" + self._value_format = "dict" + + @override + def prepare_dump(self, data: dict[str, Any]) -> dict[str, Any]: + value = data.pop("value") + + data["value"] = { + "flattened": value, + } + + return data + + @override + def prepare_load(self, data: dict[str, Any]) -> dict[str, Any]: + data["value"] = data.pop("value").get("flattened") + + return data + + +class OpenSearchV1KeySanitizationStrategy(AlwaysHashStrategy): + def __init__(self) -> None: + super().__init__( + hash_length=64, + ) + + +class OpenSearchV1CollectionSanitizationStrategy(HybridSanitizationStrategy): + def __init__(self) -> None: + super().__init__( + replacement_character="_", + max_length=MAX_INDEX_LENGTH, + allowed_characters=UPPERCASE_ALPHABET + ALLOWED_INDEX_CHARACTERS, + hash_fragment_mode=HashFragmentMode.ALWAYS, + ) + + +class OpenSearchStore( + BaseEnumerateCollectionsStore, BaseEnumerateKeysStore, BaseDestroyCollectionStore, BaseCullStore, BaseContextManagerStore, BaseStore +): + """An OpenSearch-based store. + + Stores collections in their own indices and stores values in Flattened fields. + + This store has specific restrictions on what is allowed in keys and collections. Keys and collections are not sanitized + by default which may result in errors when using the store. + + To avoid issues, you may want to consider leveraging the `OpenSearchV1KeySanitizationStrategy` and + `OpenSearchV1CollectionSanitizationStrategy` strategies. + """ + + _client: AsyncOpenSearch + + _index_prefix: str + + _default_collection: str | None + + _serializer: SerializationAdapter + + _key_sanitization_strategy: SanitizationStrategy + _collection_sanitization_strategy: SanitizationStrategy + + @overload + def __init__( + self, + *, + opensearch_client: AsyncOpenSearch, + index_prefix: str, + default_collection: str | None = None, + key_sanitization_strategy: SanitizationStrategy | None = None, + collection_sanitization_strategy: SanitizationStrategy | None = None, + ) -> None: + """Initialize the opensearch store. + + Args: + opensearch_client: The opensearch client to use. + index_prefix: The index prefix to use. Collections will be prefixed with this prefix. + default_collection: The default collection to use if no collection is provided. + key_sanitization_strategy: The sanitization strategy to use for keys. + collection_sanitization_strategy: The sanitization strategy to use for collections. + """ + + @overload + def __init__( + self, + *, + url: str, + api_key: str | None = None, + index_prefix: str, + default_collection: str | None = None, + key_sanitization_strategy: SanitizationStrategy | None = None, + collection_sanitization_strategy: SanitizationStrategy | None = None, + ) -> None: + """Initialize the opensearch store. + + Args: + url: The url of the opensearch cluster. + api_key: The api key to use. + index_prefix: The index prefix to use. Collections will be prefixed with this prefix. + default_collection: The default collection to use if no collection is provided. + """ + + def __init__( + self, + *, + opensearch_client: AsyncOpenSearch | None = None, + url: str | None = None, + api_key: str | None = None, + index_prefix: str, + default_collection: str | None = None, + key_sanitization_strategy: SanitizationStrategy | None = None, + collection_sanitization_strategy: SanitizationStrategy | None = None, + ) -> None: + """Initialize the opensearch store. + + Args: + opensearch_client: The opensearch client to use. + url: The url of the opensearch cluster. + api_key: The api key to use. + index_prefix: The index prefix to use. Collections will be prefixed with this prefix. + default_collection: The default collection to use if no collection is provided. + key_sanitization_strategy: The sanitization strategy to use for keys. + collection_sanitization_strategy: The sanitization strategy to use for collections. + """ + if opensearch_client is None and url is None: + msg = "Either opensearch_client or url must be provided" + raise ValueError(msg) + + if opensearch_client: + self._client = opensearch_client + elif url: + # Build kwargs for AsyncOpenSearch + client_kwargs: dict[str, Any] = { + "hosts": [url], + "http_compress": True, + "timeout": 10, + "max_retries": 3, + } + if api_key: + client_kwargs["api_key"] = api_key + + self._client = AsyncOpenSearch(**client_kwargs) + else: + msg = "Either opensearch_client or url must be provided" + raise ValueError(msg) + + LessCapableJsonSerializer.install_serializer(client=self._client) + + self._index_prefix = index_prefix.lower() + + self._serializer = OpenSearchSerializationAdapter() + + super().__init__( + default_collection=default_collection, + collection_sanitization_strategy=collection_sanitization_strategy, + key_sanitization_strategy=key_sanitization_strategy, + ) + + @override + async def _setup(self) -> None: + # OpenSearch doesn't have serverless mode, so we can skip the cluster info check + pass + + @override + async def _setup_collection(self, *, collection: str) -> None: + index_name = self._get_index_name(collection=collection) + + if await self._client.indices.exists(index=index_name): + return + + try: + _ = await self._client.indices.create(index=index_name, body={"mappings": DEFAULT_MAPPING, "settings": {}}) + except RequestError as e: + if "resource_already_exists_exception" in str(e).lower(): + return + raise + + def _get_index_name(self, collection: str) -> str: + return self._index_prefix + "-" + self._sanitize_collection(collection=collection).lower() + + def _get_document_id(self, key: str) -> str: + return self._sanitize_key(key=key) + + def _get_destination(self, *, collection: str, key: str) -> tuple[str, str]: + index_name: str = self._get_index_name(collection=collection) + document_id: str = self._get_document_id(key=key) + + return index_name, document_id + + @override + async def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None: + index_name, document_id = self._get_destination(collection=collection, key=key) + + try: + opensearch_response = await self._client.get(index=index_name, id=document_id) + except Exception: + return None + + body: dict[str, Any] = get_body_from_response(response=opensearch_response) + + if not (source := get_source_from_body(body=body)): + return None + + try: + return self._serializer.load_dict(data=source) + except DeserializationError: + return None + + @override + async def _get_managed_entries(self, *, collection: str, keys: Sequence[str]) -> list[ManagedEntry | None]: + if not keys: + return [] + + # Use mget for efficient batch retrieval + index_name = self._get_index_name(collection=collection) + document_ids = [self._get_document_id(key=key) for key in keys] + docs = [{"_id": document_id} for document_id in document_ids] + + try: + opensearch_response = await self._client.mget(index=index_name, body={"docs": docs}) + except Exception: + return [None] * len(keys) + + body: dict[str, Any] = get_body_from_response(response=opensearch_response) + docs_result = body.get("docs", []) + + entries_by_id: dict[str, ManagedEntry | None] = {} + for doc in docs_result: + if not (doc_id := doc.get("_id")): + continue + + if "found" not in doc or not doc.get("found"): + entries_by_id[doc_id] = None + continue + + if not (source := doc.get("_source")): + entries_by_id[doc_id] = None + continue + + try: + entries_by_id[doc_id] = self._serializer.load_dict(data=source) + except DeserializationError as e: + logger.error( + "Failed to deserialize OpenSearch document in batch operation", + extra={ + "collection": collection, + "document_id": doc_id, + "error": str(e), + }, + exc_info=True, + ) + entries_by_id[doc_id] = None + + # Return entries in the same order as input keys + return [entries_by_id.get(document_id) for document_id in document_ids] + + @override + async def _put_managed_entry( + self, + *, + key: str, + collection: str, + managed_entry: ManagedEntry, + ) -> None: + index_name: str = self._get_index_name(collection=collection) + document_id: str = self._get_document_id(key=key) + + document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry) + + try: + _ = await self._client.index( # type: ignore[reportUnknownVariableType] + index=index_name, + id=document_id, + body=document, + params={"refresh": "true"}, + ) + except Exception as e: + msg = f"Failed to serialize document: {e}" + raise SerializationError(message=msg) from e + + @override + async def _put_managed_entries( + self, + *, + collection: str, + keys: Sequence[str], + managed_entries: Sequence[ManagedEntry], + ttl: float | None, + created_at: datetime, + expires_at: datetime | None, + ) -> None: + if not keys: + return + + operations: list[dict[str, Any] | str] = [] + + index_name: str = self._get_index_name(collection=collection) + + for key, managed_entry in zip(keys, managed_entries, strict=True): + document_id: str = self._get_document_id(key=key) + + index_action: dict[str, Any] = new_bulk_action(action="index", index=index_name, document_id=document_id) + + document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry) + + operations.extend([index_action, document]) + + try: + _ = await self._client.bulk(body=operations, params={"refresh": "true"}) # type: ignore[reportUnknownVariableType] + except Exception as e: + msg = f"Failed to serialize bulk operations: {e}" + raise SerializationError(message=msg) from e + + @override + async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: + index_name: str = self._get_index_name(collection=collection) + document_id: str = self._get_document_id(key=key) + + try: + opensearch_response = await self._client.delete(index=index_name, id=document_id) + except Exception: + return False + + body: dict[str, Any] = get_body_from_response(response=opensearch_response) + + if not (result := body.get("result")) or not isinstance(result, str): + return False + + return result == "deleted" + + @override + async def _delete_managed_entries(self, *, keys: Sequence[str], collection: str) -> int: + if not keys: + return 0 + + operations: list[dict[str, Any]] = [] + + for key in keys: + index_name, document_id = self._get_destination(collection=collection, key=key) + + delete_action: dict[str, Any] = new_bulk_action(action="delete", index=index_name, document_id=document_id) + + operations.append(delete_action) + + try: + opensearch_response = await self._client.bulk(body=operations) + except Exception: + return 0 + + body: dict[str, Any] = get_body_from_response(response=opensearch_response) + + # Count successful deletions + deleted_count = 0 + items = body.get("items", []) + for item in items: + delete_result = item.get("delete", {}) + if delete_result.get("result") == "deleted": + deleted_count += 1 + + return deleted_count + + @override + async def _get_collection_keys(self, *, collection: str, limit: int | None = None) -> list[str]: + """Get up to 10,000 keys in the specified collection (eventually consistent).""" + + limit = min(limit or DEFAULT_PAGE_SIZE, PAGE_LIMIT) + + try: + result = await self._client.search( + index=self._get_index_name(collection=collection), + body={ + "query": { + "term": { + "collection": collection, + }, + }, + "_source": False, + "fields": ["key"], + "size": limit, + }, + ) + except Exception: + return [] + + if not (hits := get_hits_from_response(response=result)): + return [] + + all_keys: list[str] = [] + + for hit in hits: + if not (key := get_first_value_from_field_in_hit(hit=hit, field="key", value_type=str)): + continue + + all_keys.append(key) + + return all_keys + + @override + async def _get_collection_names(self, *, limit: int | None = None) -> list[str]: + """List up to 10,000 collections in the opensearch store (eventually consistent).""" + + limit = min(limit or DEFAULT_PAGE_SIZE, PAGE_LIMIT) + + try: + search_response = await self._client.search( + index=f"{self._index_prefix}-*", + body={ + "aggs": { + "collections": { + "terms": { + "field": "collection", + "size": limit, + }, + }, + }, + "size": 0, + }, + ) + except Exception: + return [] + + body: dict[str, Any] = get_body_from_response(response=search_response) + aggregations: dict[str, Any] = get_aggregations_from_body(body=body) + + if not aggregations or "collections" not in aggregations: + return [] + + buckets: list[Any] = aggregations["collections"].get("buckets", []) + + return [bucket["key"] for bucket in buckets if isinstance(bucket, dict) and "key" in bucket] + + @override + async def _delete_collection(self, *, collection: str) -> bool: + try: + result = await self._client.delete_by_query( + index=self._get_index_name(collection=collection), + body={ + "query": { + "term": { + "collection": collection, + }, + }, + }, + ) + except Exception: + return False + + body: dict[str, Any] = get_body_from_response(response=result) + + if not (deleted := body.get("deleted")) or not isinstance(deleted, int): + return False + + return deleted > 0 + + @override + async def _cull(self) -> None: + ms_epoch = int(now_as_epoch() * 1000) + with contextlib.suppress(Exception): + _ = await self._client.delete_by_query( + index=f"{self._index_prefix}-*", + body={ + "query": { + "range": { + "expires_at": {"lt": ms_epoch}, + }, + }, + }, + ) + + @override + async def _close(self) -> None: + await self._client.close() diff --git a/key-value/key-value-aio/src/key_value/aio/stores/opensearch/utils.py b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/utils.py new file mode 100644 index 00000000..ebc75a9c --- /dev/null +++ b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/utils.py @@ -0,0 +1,133 @@ +from typing import Any, TypeVar, cast + +from opensearchpy import AsyncOpenSearch +from opensearchpy.serializer import JSONSerializer + + +def get_body_from_response(response: Any) -> dict[str, Any]: + if not response: + return {} + + if isinstance(response, dict): + return cast("dict[str, Any]", response) + + # OpenSearch response objects might have a body attribute + if hasattr(response, "body"): + body = response.body + if not body: + return {} + if isinstance(body, dict): + return cast("dict[str, Any]", body) + + return {} + + +def get_source_from_body(body: dict[str, Any]) -> dict[str, Any]: + if not (source := body.get("_source")): + return {} + + if not isinstance(source, dict) or not all(isinstance(key, str) for key in source): # pyright: ignore[reportUnknownVariableType] + return {} + + return cast("dict[str, Any]", source) + + +def get_aggregations_from_body(body: dict[str, Any]) -> dict[str, Any]: + if not (aggregations := body.get("aggregations")): + return {} + + if not isinstance(aggregations, dict) or not all( + isinstance(key, str) + for key in aggregations # pyright: ignore[reportUnknownVariableType] + ): + return {} + + return cast("dict[str, Any]", aggregations) + + +def get_hits_from_response(response: Any) -> list[dict[str, Any]]: + body = get_body_from_response(response=response) + + if not body: + return [] + + if not (hits := body.get("hits")): + return [] + + hits_dict: dict[str, Any] = cast("dict[str, Any]", hits) + + if not (hits_list := hits_dict.get("hits")): + return [] + + if not all(isinstance(hit, dict) for hit in hits_list): # pyright: ignore[reportAny] + return [] + + hits_list_dict: list[dict[str, Any]] = cast("list[dict[str, Any]]", hits_list) + + return hits_list_dict + + +T = TypeVar("T") + + +def get_fields_from_hit(hit: dict[str, Any]) -> dict[str, list[Any]]: + if not (fields := hit.get("fields")): + return {} + + if not isinstance(fields, dict) or not all(isinstance(key, str) for key in fields): # pyright: ignore[reportUnknownVariableType] + msg = f"Fields in hit {hit} is not a dict" + raise TypeError(msg) + + if not all(isinstance(value, list) for value in fields.values()): # pyright: ignore[reportUnknownVariableType] + msg = f"Fields in hit {hit} is not a dict of lists" + raise TypeError(msg) + + return cast("dict[str, list[Any]]", fields) + + +def get_field_from_hit(hit: dict[str, Any], field: str) -> list[Any]: + if not (fields := get_fields_from_hit(hit=hit)): + return [] + + if not (value := fields.get(field)): + msg = f"Field {field} is not in hit {hit}" + raise TypeError(msg) + + return value + + +def get_values_from_field_in_hit(hit: dict[str, Any], field: str, value_type: type[T]) -> list[T]: + if not (value := get_field_from_hit(hit=hit, field=field)): + msg = f"Field {field} is not in hit {hit}" + raise TypeError(msg) + + if not all(isinstance(item, value_type) for item in value): # pyright: ignore[reportAny] + msg = f"Field {field} in hit {hit} is not a list of {value_type}" + raise TypeError(msg) + + return cast("list[T]", value) + + +def get_first_value_from_field_in_hit(hit: dict[str, Any], field: str, value_type: type[T]) -> T: + values: list[T] = get_values_from_field_in_hit(hit=hit, field=field, value_type=value_type) + if len(values) != 1: + msg: str = f"Field {field} in hit {hit} is not a single value" + raise TypeError(msg) + return values[0] + + +def new_bulk_action(action: str, index: str, document_id: str) -> dict[str, Any]: + return {action: {"_index": index, "_id": document_id}} + + +class LessCapableJsonSerializer(JSONSerializer): + """A JSON Serializer that doesnt try to be smart with datetime, floats, etc.""" + + def default(self, data: Any) -> Any: # type: ignore[reportIncompatibleMethodOverride] + msg = f"Unable to serialize to JSON: {data!r} (type: {type(data).__name__})" + raise TypeError(msg) + + @classmethod + def install_serializer(cls, client: AsyncOpenSearch) -> None: + # OpenSearch uses a different serializer architecture + client.transport.serializer = cls() # type: ignore[reportUnknownMemberType] diff --git a/key-value/key-value-aio/tests/stores/opensearch/__init__.py b/key-value/key-value-aio/tests/stores/opensearch/__init__.py new file mode 100644 index 00000000..21c381ad --- /dev/null +++ b/key-value/key-value-aio/tests/stores/opensearch/__init__.py @@ -0,0 +1 @@ +# OpenSearch store tests diff --git a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py new file mode 100644 index 00000000..1b446805 --- /dev/null +++ b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py @@ -0,0 +1,169 @@ +import contextlib +from collections.abc import AsyncGenerator +from datetime import datetime, timedelta, timezone + +import pytest +from dirty_equals import IsFloat +from inline_snapshot import snapshot +from key_value.shared.stores.wait import async_wait_for_true +from key_value.shared.utils.managed_entry import ManagedEntry +from opensearchpy import AsyncOpenSearch +from typing_extensions import override + +from key_value.aio.stores.base import BaseStore +from key_value.aio.stores.opensearch import OpenSearchStore +from key_value.aio.stores.opensearch.store import ( + OpenSearchSerializationAdapter, + OpenSearchV1CollectionSanitizationStrategy, + OpenSearchV1KeySanitizationStrategy, +) +from tests.conftest import docker_container, should_skip_docker_tests +from tests.stores.base import BaseStoreTests, ContextManagerStoreTestMixin + +TEST_SIZE_LIMIT = 1 * 1024 * 1024 # 1MB +OS_HOST = "localhost" +OS_PORT = 9200 +OS_URL = f"http://{OS_HOST}:{OS_PORT}" +OS_CONTAINER_PORT = 9200 + +WAIT_FOR_OPENSEARCH_TIMEOUT = 30 + +OPENSEARCH_VERSIONS_TO_TEST = [ + "2.11.0", # Released 2023 + "2.18.0", # Recent stable version +] + + +def get_opensearch_client() -> AsyncOpenSearch: + return AsyncOpenSearch(hosts=[OS_URL], use_ssl=False, verify_certs=False) + + +async def ping_opensearch() -> bool: + os_client: AsyncOpenSearch = get_opensearch_client() + + async with os_client: + try: + return await os_client.ping() + except Exception: + return False + + +async def cleanup_opensearch_indices(opensearch_client: AsyncOpenSearch): + with contextlib.suppress(Exception): + indices = await opensearch_client.indices.get(index="opensearch-kv-store-e2e-test-*") + for index in indices: + _ = await opensearch_client.indices.delete(index=index) + + +class OpenSearchFailedToStartError(Exception): + pass + + +def test_managed_entry_document_conversion(): + created_at = datetime(year=2025, month=1, day=1, hour=0, minute=0, second=0, tzinfo=timezone.utc) + expires_at = created_at + timedelta(seconds=10) + + managed_entry = ManagedEntry(value={"test": "test"}, created_at=created_at, expires_at=expires_at) + adapter = OpenSearchSerializationAdapter() + document = adapter.dump_dict(entry=managed_entry) + + assert document == snapshot( + { + "value": {"flattened": {"test": "test"}}, + "created_at": "2025-01-01T00:00:00+00:00", + "expires_at": "2025-01-01T00:00:10+00:00", + } + ) + + round_trip_managed_entry = adapter.load_dict(data=document) + + assert round_trip_managed_entry.value == managed_entry.value + assert round_trip_managed_entry.created_at == created_at + assert round_trip_managed_entry.ttl == IsFloat(lt=0) + assert round_trip_managed_entry.expires_at == expires_at + + +@pytest.mark.skipif(should_skip_docker_tests(), reason="Docker is not available") +@pytest.mark.filterwarnings("ignore:A configured store is unstable and may change in a backwards incompatible way. Use at your own risk.") +class TestOpenSearchStore(ContextManagerStoreTestMixin, BaseStoreTests): + @pytest.fixture(autouse=True, scope="session", params=OPENSEARCH_VERSIONS_TO_TEST) + async def setup_opensearch(self, request: pytest.FixtureRequest) -> AsyncGenerator[None, None]: + version = request.param + os_image = f"opensearchproject/opensearch:{version}" + + with docker_container( + f"opensearch-test-{version}", + os_image, + {str(OS_CONTAINER_PORT): OS_PORT}, + { + "discovery.type": "single-node", + "DISABLE_SECURITY_PLUGIN": "true", + "OPENSEARCH_INITIAL_ADMIN_PASSWORD": "TestPassword123!", + }, + ): + if not await async_wait_for_true(bool_fn=ping_opensearch, tries=WAIT_FOR_OPENSEARCH_TIMEOUT, wait_time=2): + msg = f"OpenSearch {version} failed to start" + raise OpenSearchFailedToStartError(msg) + + yield + + @pytest.fixture + async def opensearch_client(self, setup_opensearch: None) -> AsyncGenerator[AsyncOpenSearch, None]: + os_client = get_opensearch_client() + + async with os_client: + await cleanup_opensearch_indices(opensearch_client=os_client) + + yield os_client + + @override + @pytest.fixture + async def default_store(self, opensearch_client: AsyncOpenSearch) -> AsyncGenerator[BaseStore, None]: + store = OpenSearchStore( + opensearch_client=opensearch_client, + index_prefix="opensearch-kv-store-e2e-test", + default_collection="test-collection", + ) + + async with store: + yield store + + @override + @pytest.fixture + async def collection_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> AsyncGenerator[BaseStore, None]: + store = OpenSearchStore( + opensearch_client=opensearch_client, + index_prefix="opensearch-kv-store-e2e-test", + default_collection="test-collection", + collection_sanitization_strategy=OpenSearchV1CollectionSanitizationStrategy(), + ) + + async with store: + yield store + + @override + @pytest.fixture + async def key_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> AsyncGenerator[BaseStore, None]: + store = OpenSearchStore( + opensearch_client=opensearch_client, + index_prefix="opensearch-kv-store-e2e-test", + default_collection="test-collection", + key_sanitization_strategy=OpenSearchV1KeySanitizationStrategy(), + ) + + async with store: + yield store + + @override + @pytest.fixture + async def fully_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> AsyncGenerator[BaseStore, None]: + store = OpenSearchStore( + opensearch_client=opensearch_client, + index_prefix="opensearch-kv-store-e2e-test", + default_collection="test-collection", + key_sanitization_strategy=OpenSearchV1KeySanitizationStrategy(), + collection_sanitization_strategy=OpenSearchV1CollectionSanitizationStrategy(), + ) + + async with store: + yield store diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/__init__.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/__init__.py new file mode 100644 index 00000000..52af1c01 --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/__init__.py @@ -0,0 +1,6 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file '__init__.py' +# DO NOT CHANGE! Change the original file instead. +from key_value.sync.code_gen.stores.opensearch.store import OpenSearchStore + +__all__ = ["OpenSearchStore"] diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py new file mode 100644 index 00000000..92945c7a --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py @@ -0,0 +1,486 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file 'store.py' +# DO NOT CHANGE! Change the original file instead. +import contextlib +import logging +from collections.abc import Sequence +from datetime import datetime +from typing import Any, overload + +from key_value.shared.errors import DeserializationError, SerializationError +from key_value.shared.utils.managed_entry import ManagedEntry +from key_value.shared.utils.sanitization import AlwaysHashStrategy, HashFragmentMode, HybridSanitizationStrategy, SanitizationStrategy +from key_value.shared.utils.sanitize import ALPHANUMERIC_CHARACTERS, LOWERCASE_ALPHABET, NUMBERS, UPPERCASE_ALPHABET +from key_value.shared.utils.serialization import SerializationAdapter +from key_value.shared.utils.time_to_live import now_as_epoch +from typing_extensions import override + +from key_value.sync.code_gen.stores.base import ( + BaseContextManagerStore, + BaseCullStore, + BaseDestroyCollectionStore, + BaseEnumerateCollectionsStore, + BaseEnumerateKeysStore, + BaseStore, +) +from key_value.sync.code_gen.stores.opensearch.utils import LessCapableJsonSerializer, new_bulk_action + +try: + from opensearchpy import AsyncOpenSearch + from opensearchpy.exceptions import RequestError + + from key_value.sync.code_gen.stores.opensearch.utils import ( + get_aggregations_from_body, + get_body_from_response, + get_first_value_from_field_in_hit, + get_hits_from_response, + get_source_from_body, + ) +except ImportError as e: + msg = "OpenSearchStore requires py-key-value-aio[opensearch]" + raise ImportError(msg) from e + +logger = logging.getLogger(__name__) + +DEFAULT_INDEX_PREFIX = "opensearch_kv_store" + +DEFAULT_MAPPING = { + "properties": { + "created_at": {"type": "date"}, + "expires_at": {"type": "date"}, + "collection": {"type": "keyword"}, + "key": {"type": "keyword"}, + "value": {"properties": {"flattened": {"type": "flattened"}}}, + } +} + +DEFAULT_PAGE_SIZE = 10000 +PAGE_LIMIT = 10000 + +MAX_KEY_LENGTH = 256 +ALLOWED_KEY_CHARACTERS: str = ALPHANUMERIC_CHARACTERS + +MAX_INDEX_LENGTH = 200 +ALLOWED_INDEX_CHARACTERS: str = LOWERCASE_ALPHABET + NUMBERS + "_" + "-" + "." + + +class OpenSearchSerializationAdapter(SerializationAdapter): + """Adapter for OpenSearch.""" + + def __init__(self) -> None: + """Initialize the OpenSearch adapter""" + super().__init__() + + self._date_format = "isoformat" + self._value_format = "dict" + + @override + def prepare_dump(self, data: dict[str, Any]) -> dict[str, Any]: + value = data.pop("value") + + data["value"] = {"flattened": value} + + return data + + @override + def prepare_load(self, data: dict[str, Any]) -> dict[str, Any]: + data["value"] = data.pop("value").get("flattened") + + return data + + +class OpenSearchV1KeySanitizationStrategy(AlwaysHashStrategy): + def __init__(self) -> None: + super().__init__(hash_length=64) + + +class OpenSearchV1CollectionSanitizationStrategy(HybridSanitizationStrategy): + def __init__(self) -> None: + super().__init__( + replacement_character="_", + max_length=MAX_INDEX_LENGTH, + allowed_characters=UPPERCASE_ALPHABET + ALLOWED_INDEX_CHARACTERS, + hash_fragment_mode=HashFragmentMode.ALWAYS, + ) + + +class OpenSearchStore( + BaseEnumerateCollectionsStore, BaseEnumerateKeysStore, BaseDestroyCollectionStore, BaseCullStore, BaseContextManagerStore, BaseStore +): + """An OpenSearch-based store. + + Stores collections in their own indices and stores values in Flattened fields. + + This store has specific restrictions on what is allowed in keys and collections. Keys and collections are not sanitized + by default which may result in errors when using the store. + + To avoid issues, you may want to consider leveraging the `OpenSearchV1KeySanitizationStrategy` and + `OpenSearchV1CollectionSanitizationStrategy` strategies. + """ + + _client: AsyncOpenSearch + + _index_prefix: str + + _default_collection: str | None + + _serializer: SerializationAdapter + + _key_sanitization_strategy: SanitizationStrategy + _collection_sanitization_strategy: SanitizationStrategy + + @overload + def __init__( + self, + *, + opensearch_client: AsyncOpenSearch, + index_prefix: str, + default_collection: str | None = None, + key_sanitization_strategy: SanitizationStrategy | None = None, + collection_sanitization_strategy: SanitizationStrategy | None = None, + ) -> None: + """Initialize the opensearch store. + + Args: + opensearch_client: The opensearch client to use. + index_prefix: The index prefix to use. Collections will be prefixed with this prefix. + default_collection: The default collection to use if no collection is provided. + key_sanitization_strategy: The sanitization strategy to use for keys. + collection_sanitization_strategy: The sanitization strategy to use for collections. + """ + + @overload + def __init__( + self, + *, + url: str, + api_key: str | None = None, + index_prefix: str, + default_collection: str | None = None, + key_sanitization_strategy: SanitizationStrategy | None = None, + collection_sanitization_strategy: SanitizationStrategy | None = None, + ) -> None: + """Initialize the opensearch store. + + Args: + url: The url of the opensearch cluster. + api_key: The api key to use. + index_prefix: The index prefix to use. Collections will be prefixed with this prefix. + default_collection: The default collection to use if no collection is provided. + """ + + def __init__( + self, + *, + opensearch_client: AsyncOpenSearch | None = None, + url: str | None = None, + api_key: str | None = None, + index_prefix: str, + default_collection: str | None = None, + key_sanitization_strategy: SanitizationStrategy | None = None, + collection_sanitization_strategy: SanitizationStrategy | None = None, + ) -> None: + """Initialize the opensearch store. + + Args: + opensearch_client: The opensearch client to use. + url: The url of the opensearch cluster. + api_key: The api key to use. + index_prefix: The index prefix to use. Collections will be prefixed with this prefix. + default_collection: The default collection to use if no collection is provided. + key_sanitization_strategy: The sanitization strategy to use for keys. + collection_sanitization_strategy: The sanitization strategy to use for collections. + """ + if opensearch_client is None and url is None: + msg = "Either opensearch_client or url must be provided" + raise ValueError(msg) + + if opensearch_client: + self._client = opensearch_client + elif url: + # Build kwargs for AsyncOpenSearch + client_kwargs: dict[str, Any] = {"hosts": [url], "http_compress": True, "timeout": 10, "max_retries": 3} + if api_key: + client_kwargs["api_key"] = api_key + + self._client = AsyncOpenSearch(**client_kwargs) + else: + msg = "Either opensearch_client or url must be provided" + raise ValueError(msg) + + LessCapableJsonSerializer.install_serializer(client=self._client) + + self._index_prefix = index_prefix.lower() + + self._serializer = OpenSearchSerializationAdapter() + + super().__init__( + default_collection=default_collection, + collection_sanitization_strategy=collection_sanitization_strategy, + key_sanitization_strategy=key_sanitization_strategy, + ) + + @override + def _setup(self) -> None: + # OpenSearch doesn't have serverless mode, so we can skip the cluster info check + pass + + @override + def _setup_collection(self, *, collection: str) -> None: + index_name = self._get_index_name(collection=collection) + + if self._client.indices.exists(index=index_name): + return + + try: + _ = self._client.indices.create(index=index_name, body={"mappings": DEFAULT_MAPPING, "settings": {}}) + except RequestError as e: + if "resource_already_exists_exception" in str(e).lower(): + return + raise + + def _get_index_name(self, collection: str) -> str: + return self._index_prefix + "-" + self._sanitize_collection(collection=collection).lower() + + def _get_document_id(self, key: str) -> str: + return self._sanitize_key(key=key) + + def _get_destination(self, *, collection: str, key: str) -> tuple[str, str]: + index_name: str = self._get_index_name(collection=collection) + document_id: str = self._get_document_id(key=key) + + return (index_name, document_id) + + @override + def _get_managed_entry(self, *, key: str, collection: str) -> ManagedEntry | None: + (index_name, document_id) = self._get_destination(collection=collection, key=key) + + try: + opensearch_response = self._client.get(index=index_name, id=document_id) + except Exception: + return None + + body: dict[str, Any] = get_body_from_response(response=opensearch_response) + + if not (source := get_source_from_body(body=body)): + return None + + try: + return self._serializer.load_dict(data=source) + except DeserializationError: + return None + + @override + def _get_managed_entries(self, *, collection: str, keys: Sequence[str]) -> list[ManagedEntry | None]: + if not keys: + return [] + + # Use mget for efficient batch retrieval + index_name = self._get_index_name(collection=collection) + document_ids = [self._get_document_id(key=key) for key in keys] + docs = [{"_id": document_id} for document_id in document_ids] + + try: + opensearch_response = self._client.mget(index=index_name, body={"docs": docs}) + except Exception: + return [None] * len(keys) + + body: dict[str, Any] = get_body_from_response(response=opensearch_response) + docs_result = body.get("docs", []) + + entries_by_id: dict[str, ManagedEntry | None] = {} + for doc in docs_result: + if not (doc_id := doc.get("_id")): + continue + + if "found" not in doc or not doc.get("found"): + entries_by_id[doc_id] = None + continue + + if not (source := doc.get("_source")): + entries_by_id[doc_id] = None + continue + + try: + entries_by_id[doc_id] = self._serializer.load_dict(data=source) + except DeserializationError as e: + logger.error( + "Failed to deserialize OpenSearch document in batch operation", + extra={"collection": collection, "document_id": doc_id, "error": str(e)}, + exc_info=True, + ) + entries_by_id[doc_id] = None + + # Return entries in the same order as input keys + return [entries_by_id.get(document_id) for document_id in document_ids] + + @override + def _put_managed_entry(self, *, key: str, collection: str, managed_entry: ManagedEntry) -> None: + index_name: str = self._get_index_name(collection=collection) + document_id: str = self._get_document_id(key=key) + + document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry) + + try: # type: ignore[reportUnknownVariableType] + _ = self._client.index(index=index_name, id=document_id, body=document, params={"refresh": "true"}) + except Exception as e: + msg = f"Failed to serialize document: {e}" + raise SerializationError(message=msg) from e + + @override + def _put_managed_entries( + self, + *, + collection: str, + keys: Sequence[str], + managed_entries: Sequence[ManagedEntry], + ttl: float | None, + created_at: datetime, + expires_at: datetime | None, + ) -> None: + if not keys: + return + + operations: list[dict[str, Any] | str] = [] + + index_name: str = self._get_index_name(collection=collection) + + for key, managed_entry in zip(keys, managed_entries, strict=True): + document_id: str = self._get_document_id(key=key) + + index_action: dict[str, Any] = new_bulk_action(action="index", index=index_name, document_id=document_id) + + document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry) + + operations.extend([index_action, document]) + + try: + _ = self._client.bulk(body=operations, params={"refresh": "true"}) # type: ignore[reportUnknownVariableType] + except Exception as e: + msg = f"Failed to serialize bulk operations: {e}" + raise SerializationError(message=msg) from e + + @override + def _delete_managed_entry(self, *, key: str, collection: str) -> bool: + index_name: str = self._get_index_name(collection=collection) + document_id: str = self._get_document_id(key=key) + + try: + opensearch_response = self._client.delete(index=index_name, id=document_id) + except Exception: + return False + + body: dict[str, Any] = get_body_from_response(response=opensearch_response) + + if not (result := body.get("result")) or not isinstance(result, str): + return False + + return result == "deleted" + + @override + def _delete_managed_entries(self, *, keys: Sequence[str], collection: str) -> int: + if not keys: + return 0 + + operations: list[dict[str, Any]] = [] + + for key in keys: + (index_name, document_id) = self._get_destination(collection=collection, key=key) + + delete_action: dict[str, Any] = new_bulk_action(action="delete", index=index_name, document_id=document_id) + + operations.append(delete_action) + + try: + opensearch_response = self._client.bulk(body=operations) + except Exception: + return 0 + + body: dict[str, Any] = get_body_from_response(response=opensearch_response) + + # Count successful deletions + deleted_count = 0 + items = body.get("items", []) + for item in items: + delete_result = item.get("delete", {}) + if delete_result.get("result") == "deleted": + deleted_count += 1 + + return deleted_count + + @override + def _get_collection_keys(self, *, collection: str, limit: int | None = None) -> list[str]: + """Get up to 10,000 keys in the specified collection (eventually consistent).""" + + limit = min(limit or DEFAULT_PAGE_SIZE, PAGE_LIMIT) + + try: + result = self._client.search( + index=self._get_index_name(collection=collection), + body={"query": {"term": {"collection": collection}}, "_source": False, "fields": ["key"], "size": limit}, + ) + except Exception: + return [] + + if not (hits := get_hits_from_response(response=result)): + return [] + + all_keys: list[str] = [] + + for hit in hits: + if not (key := get_first_value_from_field_in_hit(hit=hit, field="key", value_type=str)): + continue + + all_keys.append(key) + + return all_keys + + @override + def _get_collection_names(self, *, limit: int | None = None) -> list[str]: + """List up to 10,000 collections in the opensearch store (eventually consistent).""" + + limit = min(limit or DEFAULT_PAGE_SIZE, PAGE_LIMIT) + + try: + search_response = self._client.search( + index=f"{self._index_prefix}-*", + body={"aggs": {"collections": {"terms": {"field": "collection", "size": limit}}}, "size": 0}, + ) + except Exception: + return [] + + body: dict[str, Any] = get_body_from_response(response=search_response) + aggregations: dict[str, Any] = get_aggregations_from_body(body=body) + + if not aggregations or "collections" not in aggregations: + return [] + + buckets: list[Any] = aggregations["collections"].get("buckets", []) + + return [bucket["key"] for bucket in buckets if isinstance(bucket, dict) and "key" in bucket] + + @override + def _delete_collection(self, *, collection: str) -> bool: + try: + result = self._client.delete_by_query( + index=self._get_index_name(collection=collection), body={"query": {"term": {"collection": collection}}} + ) + except Exception: + return False + + body: dict[str, Any] = get_body_from_response(response=result) + + if not (deleted := body.get("deleted")) or not isinstance(deleted, int): + return False + + return deleted > 0 + + @override + def _cull(self) -> None: + ms_epoch = int(now_as_epoch() * 1000) + with contextlib.suppress(Exception): + _ = self._client.delete_by_query(index=f"{self._index_prefix}-*", body={"query": {"range": {"expires_at": {"lt": ms_epoch}}}}) + + @override + def _close(self) -> None: + self._client.close() diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/utils.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/utils.py new file mode 100644 index 00000000..8c5a8fef --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/utils.py @@ -0,0 +1,133 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file 'utils.py' +# DO NOT CHANGE! Change the original file instead. +from typing import Any, TypeVar, cast + +from opensearchpy import AsyncOpenSearch +from opensearchpy.serializer import JSONSerializer + + +def get_body_from_response(response: Any) -> dict[str, Any]: + if not response: + return {} + + if isinstance(response, dict): + return cast("dict[str, Any]", response) + + # OpenSearch response objects might have a body attribute + if hasattr(response, "body"): + body = response.body + if not body: + return {} + if isinstance(body, dict): + return cast("dict[str, Any]", body) + + return {} + + +def get_source_from_body(body: dict[str, Any]) -> dict[str, Any]: + if not (source := body.get("_source")): + return {} + + if not isinstance(source, dict) or not all(isinstance(key, str) for key in source): # pyright: ignore[reportUnknownVariableType] + return {} + + return cast("dict[str, Any]", source) + + +def get_aggregations_from_body(body: dict[str, Any]) -> dict[str, Any]: + if not (aggregations := body.get("aggregations")): + return {} + + if not isinstance(aggregations, dict) or not all(isinstance(key, str) for key in aggregations): # pyright: ignore[reportUnknownVariableType] + return {} + + return cast("dict[str, Any]", aggregations) + + +def get_hits_from_response(response: Any) -> list[dict[str, Any]]: + body = get_body_from_response(response=response) + + if not body: + return [] + + if not (hits := body.get("hits")): + return [] + + hits_dict: dict[str, Any] = cast("dict[str, Any]", hits) + + if not (hits_list := hits_dict.get("hits")): + return [] + + if not all(isinstance(hit, dict) for hit in hits_list): # pyright: ignore[reportAny] + return [] + + hits_list_dict: list[dict[str, Any]] = cast("list[dict[str, Any]]", hits_list) + + return hits_list_dict + + +T = TypeVar("T") + + +def get_fields_from_hit(hit: dict[str, Any]) -> dict[str, list[Any]]: + if not (fields := hit.get("fields")): + return {} + + if not isinstance(fields, dict) or not all(isinstance(key, str) for key in fields): # pyright: ignore[reportUnknownVariableType] + msg = f"Fields in hit {hit} is not a dict" + raise TypeError(msg) + + if not all(isinstance(value, list) for value in fields.values()): # pyright: ignore[reportUnknownVariableType] + msg = f"Fields in hit {hit} is not a dict of lists" + raise TypeError(msg) + + return cast("dict[str, list[Any]]", fields) + + +def get_field_from_hit(hit: dict[str, Any], field: str) -> list[Any]: + if not (fields := get_fields_from_hit(hit=hit)): + return [] + + if not (value := fields.get(field)): + msg = f"Field {field} is not in hit {hit}" + raise TypeError(msg) + + return value + + +def get_values_from_field_in_hit(hit: dict[str, Any], field: str, value_type: type[T]) -> list[T]: + if not (value := get_field_from_hit(hit=hit, field=field)): + msg = f"Field {field} is not in hit {hit}" + raise TypeError(msg) + + if not all(isinstance(item, value_type) for item in value): # pyright: ignore[reportAny] + msg = f"Field {field} in hit {hit} is not a list of {value_type}" + raise TypeError(msg) + + return cast("list[T]", value) + + +def get_first_value_from_field_in_hit(hit: dict[str, Any], field: str, value_type: type[T]) -> T: + values: list[T] = get_values_from_field_in_hit(hit=hit, field=field, value_type=value_type) + if len(values) != 1: + msg: str = f"Field {field} in hit {hit} is not a single value" + raise TypeError(msg) + return values[0] + + +def new_bulk_action(action: str, index: str, document_id: str) -> dict[str, Any]: + return {action: {"_index": index, "_id": document_id}} + + +class LessCapableJsonSerializer(JSONSerializer): + """A JSON Serializer that doesnt try to be smart with datetime, floats, etc.""" + + def default(self, data: Any) -> Any: # type: ignore[reportIncompatibleMethodOverride] + msg = f"Unable to serialize to JSON: {data!r} (type: {type(data).__name__})" + raise TypeError(msg) + + @classmethod + def install_serializer(cls, client: AsyncOpenSearch) -> None: + # OpenSearch uses a different serializer architecture + client.transport.serializer = cls() # type: ignore[reportUnknownMemberType] diff --git a/key-value/key-value-sync/src/key_value/sync/stores/opensearch/__init__.py b/key-value/key-value-sync/src/key_value/sync/stores/opensearch/__init__.py new file mode 100644 index 00000000..52af1c01 --- /dev/null +++ b/key-value/key-value-sync/src/key_value/sync/stores/opensearch/__init__.py @@ -0,0 +1,6 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file '__init__.py' +# DO NOT CHANGE! Change the original file instead. +from key_value.sync.code_gen.stores.opensearch.store import OpenSearchStore + +__all__ = ["OpenSearchStore"] diff --git a/key-value/key-value-sync/tests/code_gen/stores/opensearch/__init__.py b/key-value/key-value-sync/tests/code_gen/stores/opensearch/__init__.py new file mode 100644 index 00000000..7f876cc2 --- /dev/null +++ b/key-value/key-value-sync/tests/code_gen/stores/opensearch/__init__.py @@ -0,0 +1,4 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file '__init__.py' +# DO NOT CHANGE! Change the original file instead. +# OpenSearch store tests diff --git a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py new file mode 100644 index 00000000..f0d75837 --- /dev/null +++ b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py @@ -0,0 +1,160 @@ +# WARNING: this file is auto-generated by 'build_sync_library.py' +# from the original file 'test_opensearch.py' +# DO NOT CHANGE! Change the original file instead. +import contextlib +from collections.abc import Generator +from datetime import datetime, timedelta, timezone + +import pytest +from dirty_equals import IsFloat +from inline_snapshot import snapshot +from key_value.shared.stores.wait import wait_for_true +from key_value.shared.utils.managed_entry import ManagedEntry +from opensearchpy import AsyncOpenSearch +from typing_extensions import override + +from key_value.sync.code_gen.stores.base import BaseStore +from key_value.sync.code_gen.stores.opensearch import OpenSearchStore +from key_value.sync.code_gen.stores.opensearch.store import ( + OpenSearchSerializationAdapter, + OpenSearchV1CollectionSanitizationStrategy, + OpenSearchV1KeySanitizationStrategy, +) +from tests.code_gen.conftest import docker_container, should_skip_docker_tests +from tests.code_gen.stores.base import BaseStoreTests, ContextManagerStoreTestMixin + +TEST_SIZE_LIMIT = 1 * 1024 * 1024 # 1MB +OS_HOST = "localhost" +OS_PORT = 9200 +OS_URL = f"http://{OS_HOST}:{OS_PORT}" +OS_CONTAINER_PORT = 9200 + +WAIT_FOR_OPENSEARCH_TIMEOUT = 30 +# Released 2023 +# Recent stable version +OPENSEARCH_VERSIONS_TO_TEST = ["2.11.0", "2.18.0"] + + +def get_opensearch_client() -> AsyncOpenSearch: + return AsyncOpenSearch(hosts=[OS_URL], use_ssl=False, verify_certs=False) + + +def ping_opensearch() -> bool: + os_client: AsyncOpenSearch = get_opensearch_client() + + with os_client: + try: + return os_client.ping() + except Exception: + return False + + +def cleanup_opensearch_indices(opensearch_client: AsyncOpenSearch): + with contextlib.suppress(Exception): + indices = opensearch_client.indices.get(index="opensearch-kv-store-e2e-test-*") + for index in indices: + _ = opensearch_client.indices.delete(index=index) + + +class OpenSearchFailedToStartError(Exception): + pass + + +def test_managed_entry_document_conversion(): + created_at = datetime(year=2025, month=1, day=1, hour=0, minute=0, second=0, tzinfo=timezone.utc) + expires_at = created_at + timedelta(seconds=10) + + managed_entry = ManagedEntry(value={"test": "test"}, created_at=created_at, expires_at=expires_at) + adapter = OpenSearchSerializationAdapter() + document = adapter.dump_dict(entry=managed_entry) + + assert document == snapshot( + {"value": {"flattened": {"test": "test"}}, "created_at": "2025-01-01T00:00:00+00:00", "expires_at": "2025-01-01T00:00:10+00:00"} + ) + + round_trip_managed_entry = adapter.load_dict(data=document) + + assert round_trip_managed_entry.value == managed_entry.value + assert round_trip_managed_entry.created_at == created_at + assert round_trip_managed_entry.ttl == IsFloat(lt=0) + assert round_trip_managed_entry.expires_at == expires_at + + +@pytest.mark.skipif(should_skip_docker_tests(), reason="Docker is not available") +@pytest.mark.filterwarnings("ignore:A configured store is unstable and may change in a backwards incompatible way. Use at your own risk.") +class TestOpenSearchStore(ContextManagerStoreTestMixin, BaseStoreTests): + @pytest.fixture(autouse=True, scope="session", params=OPENSEARCH_VERSIONS_TO_TEST) + def setup_opensearch(self, request: pytest.FixtureRequest) -> Generator[None, None, None]: + version = request.param + os_image = f"opensearchproject/opensearch:{version}" + + with docker_container( + f"opensearch-test-{version}", + os_image, + {str(OS_CONTAINER_PORT): OS_PORT}, + {"discovery.type": "single-node", "DISABLE_SECURITY_PLUGIN": "true", "OPENSEARCH_INITIAL_ADMIN_PASSWORD": "TestPassword123!"}, + ): + if not wait_for_true(bool_fn=ping_opensearch, tries=WAIT_FOR_OPENSEARCH_TIMEOUT, wait_time=2): + msg = f"OpenSearch {version} failed to start" + raise OpenSearchFailedToStartError(msg) + + yield + + @pytest.fixture + def opensearch_client(self, setup_opensearch: None) -> Generator[AsyncOpenSearch, None, None]: + os_client = get_opensearch_client() + + with os_client: + cleanup_opensearch_indices(opensearch_client=os_client) + + yield os_client + + @override + @pytest.fixture + def default_store(self, opensearch_client: AsyncOpenSearch) -> Generator[BaseStore, None, None]: + store = OpenSearchStore( + opensearch_client=opensearch_client, index_prefix="opensearch-kv-store-e2e-test", default_collection="test-collection" + ) + + with store: + yield store + + @override + @pytest.fixture + def collection_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> Generator[BaseStore, None, None]: + store = OpenSearchStore( + opensearch_client=opensearch_client, + index_prefix="opensearch-kv-store-e2e-test", + default_collection="test-collection", + collection_sanitization_strategy=OpenSearchV1CollectionSanitizationStrategy(), + ) + + with store: + yield store + + @override + @pytest.fixture + def key_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> Generator[BaseStore, None, None]: + store = OpenSearchStore( + opensearch_client=opensearch_client, + index_prefix="opensearch-kv-store-e2e-test", + default_collection="test-collection", + key_sanitization_strategy=OpenSearchV1KeySanitizationStrategy(), + ) + + with store: + yield store + + @override + @pytest.fixture + def fully_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> Generator[BaseStore, None, None]: + store = OpenSearchStore( + opensearch_client=opensearch_client, + index_prefix="opensearch-kv-store-e2e-test", + default_collection="test-collection", + key_sanitization_strategy=OpenSearchV1KeySanitizationStrategy(), + collection_sanitization_strategy=OpenSearchV1CollectionSanitizationStrategy(), + ) + + with store: + yield store diff --git a/uv.lock b/uv.lock index bea6629b..bcc74bbc 100644 --- a/uv.lock +++ b/uv.lock @@ -737,6 +737,14 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cf/4c/c0c95d3d881732a5d1b28e12c9be4dea5953ade71810f94565bd5bd2101a/elasticsearch-9.1.1-py3-none-any.whl", hash = "sha256:2a5c27c57ca3dd3365f665c82c9dcd8666ccfb550d5b07c688c21ec636c104e5", size = 937483, upload-time = "2025-09-12T13:27:34.948Z" }, ] +[[package]] +name = "events" +version = "0.5" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/ed/e47dec0626edd468c84c04d97769e7ab4ea6457b7f54dcb3f72b17fcd876/Events-0.5-py3-none-any.whl", hash = "sha256:a7286af378ba3e46640ac9825156c93bdba7502174dd696090fdfcd4d80a1abd", size = 6758, upload-time = "2023-07-31T08:23:13.645Z" }, +] + [[package]] name = "exceptiongroup" version = "1.3.0" @@ -1461,6 +1469,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/b1/6a4eb2c6e9efa028074b0001b61008c9d202b6b46caee9e5d1b18c088216/nodejs_wheel_binaries-22.20.0-py2.py3-none-win_arm64.whl", hash = "sha256:1fccac931faa210d22b6962bcdbc99269d16221d831b9a118bbb80fe434a60b8", size = 38844133, upload-time = "2025-09-26T09:47:57.357Z" }, ] +[[package]] +name = "opensearch-py" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "events" }, + { name = "python-dateutil" }, + { name = "requests" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b8/58/ecec7f855aae7bcfb08f570088c6cb993f68c361a0727abab35dbf021acb/opensearch_py-3.0.0.tar.gz", hash = "sha256:ebb38f303f8a3f794db816196315bcddad880be0dc75094e3334bc271db2ed39", size = 248890, upload-time = "2025-06-17T05:39:48.453Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/e0/69fd114c607b0323d3f864ab4a5ecb87d76ec5a172d2e36a739c8baebea1/opensearch_py-3.0.0-py3-none-any.whl", hash = "sha256:842bf5d56a4a0d8290eda9bb921c50f3080e5dc4e5fefb9c9648289da3f6a8bb", size = 371491, upload-time = "2025-06-17T05:39:46.539Z" }, +] + +[package.optional-dependencies] +async = [ + { name = "aiohttp" }, +] + [[package]] name = "packaging" version = "25.0" @@ -1734,6 +1763,9 @@ memory = [ mongodb = [ { name = "pymongo" }, ] +opensearch = [ + { name = "opensearch-py", extra = ["async"] }, +] pydantic = [ { name = "pydantic" }, ] @@ -1757,7 +1789,7 @@ wrappers-encryption = [ [package.dev-dependencies] dev = [ { name = "py-key-value", extra = ["dev"] }, - { name = "py-key-value-aio", extra = ["disk", "dynamodb", "elasticsearch", "keyring", "memcached", "memory", "mongodb", "pydantic", "redis", "rocksdb", "vault", "wrappers-encryption"] }, + { name = "py-key-value-aio", extra = ["disk", "dynamodb", "elasticsearch", "keyring", "memcached", "memory", "mongodb", "opensearch", "pydantic", "redis", "rocksdb", "vault", "wrappers-encryption"] }, { name = "py-key-value-aio", extra = ["valkey"], marker = "sys_platform != 'win32'" }, ] @@ -1775,6 +1807,7 @@ requires-dist = [ { name = "hvac", marker = "extra == 'vault'", specifier = ">=2.3.0" }, { name = "keyring", marker = "extra == 'keyring'", specifier = ">=25.6.0" }, { name = "keyring", marker = "extra == 'keyring-linux'", specifier = ">=25.6.0" }, + { name = "opensearch-py", extras = ["async"], marker = "extra == 'opensearch'", specifier = ">=2.0.0" }, { name = "pathvalidate", marker = "extra == 'disk'", specifier = ">=3.3.1" }, { name = "py-key-value-shared", editable = "key-value/key-value-shared" }, { name = "pydantic", marker = "extra == 'pydantic'", specifier = ">=2.11.9" }, @@ -1786,13 +1819,13 @@ requires-dist = [ { name = "types-hvac", marker = "extra == 'vault'", specifier = ">=2.3.0" }, { name = "valkey-glide", marker = "extra == 'valkey'", specifier = ">=2.1.0" }, ] -provides-extras = ["memory", "disk", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "dynamodb", "keyring", "keyring-linux", "pydantic", "rocksdb", "wrappers-encryption"] +provides-extras = ["memory", "disk", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "opensearch", "dynamodb", "keyring", "keyring-linux", "pydantic", "rocksdb", "wrappers-encryption"] [package.metadata.requires-dev] dev = [ { name = "py-key-value", extras = ["dev"], editable = "." }, { name = "py-key-value-aio", extras = ["keyring"] }, - { name = "py-key-value-aio", extras = ["memory", "disk", "redis", "elasticsearch", "memcached", "mongodb", "vault", "dynamodb", "rocksdb"] }, + { name = "py-key-value-aio", extras = ["memory", "disk", "redis", "elasticsearch", "opensearch", "memcached", "mongodb", "vault", "dynamodb", "rocksdb"] }, { name = "py-key-value-aio", extras = ["pydantic"] }, { name = "py-key-value-aio", extras = ["valkey"], marker = "sys_platform != 'win32'" }, { name = "py-key-value-aio", extras = ["wrappers-encryption"] }, From 22bbfa28ce009740dbcdee3e6d601727d82184a3 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Fri, 7 Nov 2025 23:14:49 +0000 Subject: [PATCH 02/12] Fix OpenSearch sync store: add dependency and update codegen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add opensearch-py dependency to sync library's pyproject.toml - Update codegen script to transform AsyncOpenSearch → OpenSearch - Regenerate sync code with correct synchronous OpenSearch client - Improve ImportError message for missing opensearch dependency This fixes the type checking errors in the sync library where the generated code was incorrectly using AsyncOpenSearch instead of the synchronous OpenSearch client. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: William Easton --- .../key_value/aio/stores/opensearch/store.py | 2 +- key-value/key-value-sync/pyproject.toml | 3 ++- .../sync/code_gen/stores/opensearch/store.py | 12 +++++------ .../sync/code_gen/stores/opensearch/utils.py | 4 ++-- .../stores/opensearch/test_opensearch.py | 20 +++++++++---------- scripts/build_sync_library.py | 1 + uv.lock | 10 +++++++--- 7 files changed, 29 insertions(+), 23 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py index 027ca87d..af2f758e 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py @@ -44,7 +44,7 @@ get_source_from_body, ) except ImportError as e: - msg = "OpenSearchStore requires py-key-value-aio[opensearch]" + msg = "OpenSearchStore requires opensearch-py[async]>=2.0.0. Install with: pip install 'py-key-value-aio[opensearch]'" raise ImportError(msg) from e diff --git a/key-value/key-value-sync/pyproject.toml b/key-value/key-value-sync/pyproject.toml index c268ed04..5b748ad3 100644 --- a/key-value/key-value-sync/pyproject.toml +++ b/key-value/key-value-sync/pyproject.toml @@ -40,6 +40,7 @@ valkey = ["valkey-glide-sync>=2.1.0"] vault = ["hvac>=2.3.0", "types-hvac>=2.3.0"] memcached = ["aiomcache>=0.8.0"] elasticsearch = ["elasticsearch>=8.0.0", "aiohttp>=3.12"] +opensearch = ["opensearch-py[async]>=2.0.0"] pydantic = ["pydantic>=2.11.9"] keyring = ["keyring>=25.6.0"] keyring-linux = ["keyring>=25.6.0", "dbus-python>=1.4.0"] @@ -66,7 +67,7 @@ env_files = [".env"] [dependency-groups] dev = [ - "py-key-value-sync[memory,disk,redis,elasticsearch,memcached,mongodb,vault,rocksdb]", + "py-key-value-sync[memory,disk,redis,elasticsearch,opensearch,memcached,mongodb,vault,rocksdb]", "py-key-value-sync[valkey]; platform_system != 'Windows'", "py-key-value-sync[pydantic]", "py-key-value-sync[keyring]", diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py index 92945c7a..2f3026d3 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py @@ -26,7 +26,7 @@ from key_value.sync.code_gen.stores.opensearch.utils import LessCapableJsonSerializer, new_bulk_action try: - from opensearchpy import AsyncOpenSearch + from opensearchpy import OpenSearch from opensearchpy.exceptions import RequestError from key_value.sync.code_gen.stores.opensearch.utils import ( @@ -37,7 +37,7 @@ get_source_from_body, ) except ImportError as e: - msg = "OpenSearchStore requires py-key-value-aio[opensearch]" + msg = "OpenSearchStore requires opensearch-py[async]>=2.0.0. Install with: pip install 'py-key-value-aio[opensearch]'" raise ImportError(msg) from e logger = logging.getLogger(__name__) @@ -118,7 +118,7 @@ class OpenSearchStore( `OpenSearchV1CollectionSanitizationStrategy` strategies. """ - _client: AsyncOpenSearch + _client: OpenSearch _index_prefix: str @@ -133,7 +133,7 @@ class OpenSearchStore( def __init__( self, *, - opensearch_client: AsyncOpenSearch, + opensearch_client: OpenSearch, index_prefix: str, default_collection: str | None = None, key_sanitization_strategy: SanitizationStrategy | None = None, @@ -172,7 +172,7 @@ def __init__( def __init__( self, *, - opensearch_client: AsyncOpenSearch | None = None, + opensearch_client: OpenSearch | None = None, url: str | None = None, api_key: str | None = None, index_prefix: str, @@ -203,7 +203,7 @@ def __init__( if api_key: client_kwargs["api_key"] = api_key - self._client = AsyncOpenSearch(**client_kwargs) + self._client = OpenSearch(**client_kwargs) else: msg = "Either opensearch_client or url must be provided" raise ValueError(msg) diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/utils.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/utils.py index 8c5a8fef..5f0490df 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/utils.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/utils.py @@ -3,7 +3,7 @@ # DO NOT CHANGE! Change the original file instead. from typing import Any, TypeVar, cast -from opensearchpy import AsyncOpenSearch +from opensearchpy import OpenSearch from opensearchpy.serializer import JSONSerializer @@ -128,6 +128,6 @@ def default(self, data: Any) -> Any: # type: ignore[reportIncompatibleMethodOve raise TypeError(msg) @classmethod - def install_serializer(cls, client: AsyncOpenSearch) -> None: + def install_serializer(cls, client: OpenSearch) -> None: # OpenSearch uses a different serializer architecture client.transport.serializer = cls() # type: ignore[reportUnknownMemberType] diff --git a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py index f0d75837..2d13a2aa 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py @@ -10,7 +10,7 @@ from inline_snapshot import snapshot from key_value.shared.stores.wait import wait_for_true from key_value.shared.utils.managed_entry import ManagedEntry -from opensearchpy import AsyncOpenSearch +from opensearchpy import OpenSearch from typing_extensions import override from key_value.sync.code_gen.stores.base import BaseStore @@ -35,12 +35,12 @@ OPENSEARCH_VERSIONS_TO_TEST = ["2.11.0", "2.18.0"] -def get_opensearch_client() -> AsyncOpenSearch: - return AsyncOpenSearch(hosts=[OS_URL], use_ssl=False, verify_certs=False) +def get_opensearch_client() -> OpenSearch: + return OpenSearch(hosts=[OS_URL], use_ssl=False, verify_certs=False) def ping_opensearch() -> bool: - os_client: AsyncOpenSearch = get_opensearch_client() + os_client: OpenSearch = get_opensearch_client() with os_client: try: @@ -49,7 +49,7 @@ def ping_opensearch() -> bool: return False -def cleanup_opensearch_indices(opensearch_client: AsyncOpenSearch): +def cleanup_opensearch_indices(opensearch_client: OpenSearch): with contextlib.suppress(Exception): indices = opensearch_client.indices.get(index="opensearch-kv-store-e2e-test-*") for index in indices: @@ -101,7 +101,7 @@ def setup_opensearch(self, request: pytest.FixtureRequest) -> Generator[None, No yield @pytest.fixture - def opensearch_client(self, setup_opensearch: None) -> Generator[AsyncOpenSearch, None, None]: + def opensearch_client(self, setup_opensearch: None) -> Generator[OpenSearch, None, None]: os_client = get_opensearch_client() with os_client: @@ -111,7 +111,7 @@ def opensearch_client(self, setup_opensearch: None) -> Generator[AsyncOpenSearch @override @pytest.fixture - def default_store(self, opensearch_client: AsyncOpenSearch) -> Generator[BaseStore, None, None]: + def default_store(self, opensearch_client: OpenSearch) -> Generator[BaseStore, None, None]: store = OpenSearchStore( opensearch_client=opensearch_client, index_prefix="opensearch-kv-store-e2e-test", default_collection="test-collection" ) @@ -121,7 +121,7 @@ def default_store(self, opensearch_client: AsyncOpenSearch) -> Generator[BaseSto @override @pytest.fixture - def collection_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> Generator[BaseStore, None, None]: + def collection_sanitized_store(self, opensearch_client: OpenSearch) -> Generator[BaseStore, None, None]: store = OpenSearchStore( opensearch_client=opensearch_client, index_prefix="opensearch-kv-store-e2e-test", @@ -134,7 +134,7 @@ def collection_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> Gene @override @pytest.fixture - def key_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> Generator[BaseStore, None, None]: + def key_sanitized_store(self, opensearch_client: OpenSearch) -> Generator[BaseStore, None, None]: store = OpenSearchStore( opensearch_client=opensearch_client, index_prefix="opensearch-kv-store-e2e-test", @@ -147,7 +147,7 @@ def key_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> Generator[B @override @pytest.fixture - def fully_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> Generator[BaseStore, None, None]: + def fully_sanitized_store(self, opensearch_client: OpenSearch) -> Generator[BaseStore, None, None]: store = OpenSearchStore( opensearch_client=opensearch_client, index_prefix="opensearch-kv-store-e2e-test", diff --git a/scripts/build_sync_library.py b/scripts/build_sync_library.py index c79a870b..fda6384c 100644 --- a/scripts/build_sync_library.py +++ b/scripts/build_sync_library.py @@ -219,6 +219,7 @@ class RenameAsyncToSync(ast.NodeTransformer): # type: ignore "__aiter__": "__iter__", "asyncio.locks": "threading", "AsyncElasticsearch": "Elasticsearch", + "AsyncOpenSearch": "OpenSearch", "AsyncDatabase": "Database", "AsyncCollection": "Collection", "AsyncMongoClient": "MongoClient", diff --git a/uv.lock b/uv.lock index bcc74bbc..192bc9d4 100644 --- a/uv.lock +++ b/uv.lock @@ -1925,6 +1925,9 @@ memory = [ mongodb = [ { name = "pymongo" }, ] +opensearch = [ + { name = "opensearch-py", extra = ["async"] }, +] pydantic = [ { name = "pydantic" }, ] @@ -1948,7 +1951,7 @@ wrappers-encryption = [ [package.dev-dependencies] dev = [ { name = "py-key-value", extra = ["dev"] }, - { name = "py-key-value-sync", extra = ["disk", "elasticsearch", "keyring", "memcached", "memory", "mongodb", "pydantic", "redis", "rocksdb", "vault", "wrappers-encryption"] }, + { name = "py-key-value-sync", extra = ["disk", "elasticsearch", "keyring", "memcached", "memory", "mongodb", "opensearch", "pydantic", "redis", "rocksdb", "vault", "wrappers-encryption"] }, { name = "py-key-value-sync", extra = ["valkey"], marker = "sys_platform != 'win32'" }, ] @@ -1965,6 +1968,7 @@ requires-dist = [ { name = "hvac", marker = "extra == 'vault'", specifier = ">=2.3.0" }, { name = "keyring", marker = "extra == 'keyring'", specifier = ">=25.6.0" }, { name = "keyring", marker = "extra == 'keyring-linux'", specifier = ">=25.6.0" }, + { name = "opensearch-py", extras = ["async"], marker = "extra == 'opensearch'", specifier = ">=2.0.0" }, { name = "pathvalidate", marker = "extra == 'disk'", specifier = ">=3.3.1" }, { name = "py-key-value-shared", editable = "key-value/key-value-shared" }, { name = "pydantic", marker = "extra == 'pydantic'", specifier = ">=2.11.9" }, @@ -1975,13 +1979,13 @@ requires-dist = [ { name = "types-hvac", marker = "extra == 'vault'", specifier = ">=2.3.0" }, { name = "valkey-glide-sync", marker = "extra == 'valkey'", specifier = ">=2.1.0" }, ] -provides-extras = ["memory", "disk", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "pydantic", "keyring", "keyring-linux", "rocksdb", "wrappers-encryption"] +provides-extras = ["memory", "disk", "redis", "mongodb", "valkey", "vault", "memcached", "elasticsearch", "opensearch", "pydantic", "keyring", "keyring-linux", "rocksdb", "wrappers-encryption"] [package.metadata.requires-dev] dev = [ { name = "py-key-value", extras = ["dev"], editable = "." }, { name = "py-key-value-sync", extras = ["keyring"] }, - { name = "py-key-value-sync", extras = ["memory", "disk", "redis", "elasticsearch", "memcached", "mongodb", "vault", "rocksdb"] }, + { name = "py-key-value-sync", extras = ["memory", "disk", "redis", "elasticsearch", "opensearch", "memcached", "mongodb", "vault", "rocksdb"] }, { name = "py-key-value-sync", extras = ["pydantic"] }, { name = "py-key-value-sync", extras = ["valkey"], marker = "sys_platform != 'win32'" }, { name = "py-key-value-sync", extras = ["wrappers-encryption"] }, From 15c67dc93ee4297ce34dc7369c800189d5eba347 Mon Sep 17 00:00:00 2001 From: William Easton Date: Sat, 8 Nov 2025 11:36:37 -0600 Subject: [PATCH 03/12] updates for opensearch tests --- .../key_value/aio/stores/opensearch/store.py | 9 +- .../stores/opensearch/test_opensearch.py | 126 +++++++++++++----- .../sync/code_gen/stores/opensearch/store.py | 7 +- .../stores/opensearch/test_opensearch.py | 122 ++++++++++++----- 4 files changed, 181 insertions(+), 83 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py index af2f758e..412d58b9 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py @@ -68,8 +68,8 @@ }, "value": { "properties": { - "flattened": { - "type": "flattened", + "flat": { + "type": "flat_object", }, }, }, @@ -101,14 +101,14 @@ def prepare_dump(self, data: dict[str, Any]) -> dict[str, Any]: value = data.pop("value") data["value"] = { - "flattened": value, + "flat": value, } return data @override def prepare_load(self, data: dict[str, Any]) -> dict[str, Any]: - data["value"] = data.pop("value").get("flattened") + data["value"] = data.pop("value").get("flat") return data @@ -224,7 +224,6 @@ def __init__( if opensearch_client: self._client = opensearch_client elif url: - # Build kwargs for AsyncOpenSearch client_kwargs: dict[str, Any] = { "hosts": [url], "http_compress": True, diff --git a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py index 1b446805..17c38dae 100644 --- a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py @@ -1,15 +1,18 @@ import contextlib from collections.abc import AsyncGenerator from datetime import datetime, timedelta, timezone +from typing import Any import pytest -from dirty_equals import IsFloat +from dirty_equals import IsFloat, IsStr +from elasticsearch import AsyncElasticsearch from inline_snapshot import snapshot from key_value.shared.stores.wait import async_wait_for_true from key_value.shared.utils.managed_entry import ManagedEntry from opensearchpy import AsyncOpenSearch from typing_extensions import override +from key_value.aio.protocols.key_value import AsyncKeyValueProtocol from key_value.aio.stores.base import BaseStore from key_value.aio.stores.opensearch import OpenSearchStore from key_value.aio.stores.opensearch.store import ( @@ -21,10 +24,13 @@ from tests.stores.base import BaseStoreTests, ContextManagerStoreTestMixin TEST_SIZE_LIMIT = 1 * 1024 * 1024 # 1MB -OS_HOST = "localhost" -OS_PORT = 9200 -OS_URL = f"http://{OS_HOST}:{OS_PORT}" -OS_CONTAINER_PORT = 9200 +LOCALHOST = "localhost" + +CONTAINER_PORT = 9200 +HOST_PORT = 19200 + +OPENSEARCH_URL = f"http://{LOCALHOST}:{HOST_PORT}" + WAIT_FOR_OPENSEARCH_TIMEOUT = 30 @@ -35,15 +41,15 @@ def get_opensearch_client() -> AsyncOpenSearch: - return AsyncOpenSearch(hosts=[OS_URL], use_ssl=False, verify_certs=False) + return AsyncOpenSearch(hosts=[OPENSEARCH_URL], use_ssl=False, verify_certs=False) async def ping_opensearch() -> bool: - os_client: AsyncOpenSearch = get_opensearch_client() + opensearch_client: AsyncOpenSearch = get_opensearch_client() - async with os_client: + async with opensearch_client: try: - return await os_client.ping() + return await opensearch_client.ping() except Exception: return False @@ -69,7 +75,7 @@ def test_managed_entry_document_conversion(): assert document == snapshot( { - "value": {"flattened": {"test": "test"}}, + "value": {"f": {"test": "test"}}, "created_at": "2025-01-01T00:00:00+00:00", "expires_at": "2025-01-01T00:00:10+00:00", } @@ -94,7 +100,7 @@ async def setup_opensearch(self, request: pytest.FixtureRequest) -> AsyncGenerat with docker_container( f"opensearch-test-{version}", os_image, - {str(OS_CONTAINER_PORT): OS_PORT}, + {str(CONTAINER_PORT): HOST_PORT}, { "discovery.type": "single-node", "DISABLE_SECURITY_PLUGIN": "true", @@ -109,16 +115,16 @@ async def setup_opensearch(self, request: pytest.FixtureRequest) -> AsyncGenerat @pytest.fixture async def opensearch_client(self, setup_opensearch: None) -> AsyncGenerator[AsyncOpenSearch, None]: - os_client = get_opensearch_client() + opensearch_client = get_opensearch_client() - async with os_client: - await cleanup_opensearch_indices(opensearch_client=os_client) + async with opensearch_client: + await cleanup_opensearch_indices(opensearch_client=opensearch_client) - yield os_client + yield opensearch_client @override @pytest.fixture - async def default_store(self, opensearch_client: AsyncOpenSearch) -> AsyncGenerator[BaseStore, None]: + async def store(self, opensearch_client: AsyncOpenSearch) -> AsyncGenerator[BaseStore, None]: store = OpenSearchStore( opensearch_client=opensearch_client, index_prefix="opensearch-kv-store-e2e-test", @@ -130,11 +136,12 @@ async def default_store(self, opensearch_client: AsyncOpenSearch) -> AsyncGenera @override @pytest.fixture - async def collection_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> AsyncGenerator[BaseStore, None]: + async def sanitizing_store(self, opensearch_client: AsyncOpenSearch) -> AsyncGenerator[BaseStore, None]: store = OpenSearchStore( opensearch_client=opensearch_client, index_prefix="opensearch-kv-store-e2e-test", default_collection="test-collection", + key_sanitization_strategy=OpenSearchV1KeySanitizationStrategy(), collection_sanitization_strategy=OpenSearchV1CollectionSanitizationStrategy(), ) @@ -142,28 +149,73 @@ async def collection_sanitized_store(self, opensearch_client: AsyncOpenSearch) - yield store @override - @pytest.fixture - async def key_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> AsyncGenerator[BaseStore, None]: - store = OpenSearchStore( - opensearch_client=opensearch_client, - index_prefix="opensearch-kv-store-e2e-test", - default_collection="test-collection", - key_sanitization_strategy=OpenSearchV1KeySanitizationStrategy(), - ) + @pytest.mark.timeout(120) + async def test_store(self, store: BaseStore): + """Tests that the store is a valid AsyncKeyValueProtocol.""" + assert isinstance(store, AsyncKeyValueProtocol) is True - async with store: - yield store + @pytest.mark.skip(reason="Distributed Caches are unbounded") + @override + async def test_not_unbounded(self, store: BaseStore): ... + @pytest.mark.skip(reason="Skip concurrent tests on distributed caches") @override - @pytest.fixture - async def fully_sanitized_store(self, opensearch_client: AsyncOpenSearch) -> AsyncGenerator[BaseStore, None]: - store = OpenSearchStore( - opensearch_client=opensearch_client, - index_prefix="opensearch-kv-store-e2e-test", - default_collection="test-collection", - key_sanitization_strategy=OpenSearchV1KeySanitizationStrategy(), - collection_sanitization_strategy=OpenSearchV1CollectionSanitizationStrategy(), + async def test_concurrent_operations(self, store: BaseStore): ... + + @override + async def test_long_collection_name(self, store: OpenSearchStore, sanitizing_store: OpenSearchStore): # pyright: ignore[reportIncompatibleMethodOverride] + with pytest.raises(Exception): # noqa: B017, PT011 + await store.put(collection="test_collection" * 100, key="test_key", value={"test": "test"}) + + await sanitizing_store.put(collection="test_collection" * 100, key="test_key", value={"test": "test"}) + assert await sanitizing_store.get(collection="test_collection" * 100, key="test_key") == {"test": "test"} + + @override + async def test_long_key_name(self, store: OpenSearchStore, sanitizing_store: OpenSearchStore): # pyright: ignore[reportIncompatibleMethodOverride] + """Tests that a long key name will not raise an error.""" + with pytest.raises(Exception): # noqa: B017, PT011 + await store.put(collection="test_collection", key="test_key" * 100, value={"test": "test"}) + + await sanitizing_store.put(collection="test_collection", key="test_key" * 100, value={"test": "test"}) + assert await sanitizing_store.get(collection="test_collection", key="test_key" * 100) == {"test": "test"} + + async def test_put_put_two_indices(self, store: OpenSearchStore, opensearch_client: AsyncOpenSearch): + await store.put(collection="test_collection", key="test_key", value={"test": "test"}) + await store.put(collection="test_collection_2", key="test_key", value={"test": "test"}) + assert await store.get(collection="test_collection", key="test_key") == {"test": "test"} + assert await store.get(collection="test_collection_2", key="test_key") == {"test": "test"} + + indices: dict[str, Any] = await opensearch_client.indices.get(index="opensearch-kv-store-e2e-test-*") + index_names: list[str] = list(indices.keys()) + assert index_names == snapshot(["opensearch-kv-store-e2e-test-test_collection", "opensearch-kv-store-e2e-test-test_collection_2"]) + + async def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch_client: AsyncElasticsearch): + """Verify values are stored as f objects, not JSON strings""" + await store.put(collection="test", key="test_key", value={"name": "Alice", "age": 30}) + + index_name = store._get_index_name(collection="test") # pyright: ignore[reportPrivateUsage] + doc_id = store._get_document_id(key="test_key") # pyright: ignore[reportPrivateUsage] + + response = await opensearch_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + { + "value": {"f": {"name": "Alice", "age": 30}}, + "created_at": IsStr(min_length=20, max_length=40), + } ) - async with store: - yield store + # Test with TTL + await store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) + response = await opensearch_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + { + "value": {"f": {"name": "Bob", "age": 25}}, + "created_at": IsStr(min_length=20, max_length=40), + "expires_at": IsStr(min_length=20, max_length=40), + } + ) + + @override + async def test_special_characters_in_collection_name(self, store: OpenSearchStore, sanitizing_store: OpenSearchStore): # pyright: ignore[reportIncompatibleMethodOverride] + """Tests that a special characters in the collection name will not raise an error.""" + await super().test_special_characters_in_collection_name(store=sanitizing_store) diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py index 2f3026d3..969bb8eb 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py @@ -50,7 +50,7 @@ "expires_at": {"type": "date"}, "collection": {"type": "keyword"}, "key": {"type": "keyword"}, - "value": {"properties": {"flattened": {"type": "flattened"}}}, + "value": {"properties": {"flat": {"type": "flat_object"}}}, } } @@ -78,13 +78,13 @@ def __init__(self) -> None: def prepare_dump(self, data: dict[str, Any]) -> dict[str, Any]: value = data.pop("value") - data["value"] = {"flattened": value} + data["value"] = {"flat": value} return data @override def prepare_load(self, data: dict[str, Any]) -> dict[str, Any]: - data["value"] = data.pop("value").get("flattened") + data["value"] = data.pop("value").get("flat") return data @@ -198,7 +198,6 @@ def __init__( if opensearch_client: self._client = opensearch_client elif url: - # Build kwargs for AsyncOpenSearch client_kwargs: dict[str, Any] = {"hosts": [url], "http_compress": True, "timeout": 10, "max_retries": 3} if api_key: client_kwargs["api_key"] = api_key diff --git a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py index 2d13a2aa..d1596e9f 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py @@ -4,15 +4,18 @@ import contextlib from collections.abc import Generator from datetime import datetime, timedelta, timezone +from typing import Any import pytest -from dirty_equals import IsFloat +from dirty_equals import IsFloat, IsStr +from elasticsearch import Elasticsearch from inline_snapshot import snapshot from key_value.shared.stores.wait import wait_for_true from key_value.shared.utils.managed_entry import ManagedEntry from opensearchpy import OpenSearch from typing_extensions import override +from key_value.sync.code_gen.protocols.key_value import KeyValueProtocol from key_value.sync.code_gen.stores.base import BaseStore from key_value.sync.code_gen.stores.opensearch import OpenSearchStore from key_value.sync.code_gen.stores.opensearch.store import ( @@ -24,10 +27,12 @@ from tests.code_gen.stores.base import BaseStoreTests, ContextManagerStoreTestMixin TEST_SIZE_LIMIT = 1 * 1024 * 1024 # 1MB -OS_HOST = "localhost" -OS_PORT = 9200 -OS_URL = f"http://{OS_HOST}:{OS_PORT}" -OS_CONTAINER_PORT = 9200 +LOCALHOST = "localhost" + +CONTAINER_PORT = 9200 +HOST_PORT = 19200 + +OPENSEARCH_URL = f"http://{LOCALHOST}:{HOST_PORT}" WAIT_FOR_OPENSEARCH_TIMEOUT = 30 # Released 2023 @@ -36,15 +41,15 @@ def get_opensearch_client() -> OpenSearch: - return OpenSearch(hosts=[OS_URL], use_ssl=False, verify_certs=False) + return OpenSearch(hosts=[OPENSEARCH_URL], use_ssl=False, verify_certs=False) def ping_opensearch() -> bool: - os_client: OpenSearch = get_opensearch_client() + opensearch_client: OpenSearch = get_opensearch_client() - with os_client: + with opensearch_client: try: - return os_client.ping() + return opensearch_client.ping() except Exception: return False @@ -69,7 +74,7 @@ def test_managed_entry_document_conversion(): document = adapter.dump_dict(entry=managed_entry) assert document == snapshot( - {"value": {"flattened": {"test": "test"}}, "created_at": "2025-01-01T00:00:00+00:00", "expires_at": "2025-01-01T00:00:10+00:00"} + {"value": {"f": {"test": "test"}}, "created_at": "2025-01-01T00:00:00+00:00", "expires_at": "2025-01-01T00:00:10+00:00"} ) round_trip_managed_entry = adapter.load_dict(data=document) @@ -91,7 +96,7 @@ def setup_opensearch(self, request: pytest.FixtureRequest) -> Generator[None, No with docker_container( f"opensearch-test-{version}", os_image, - {str(OS_CONTAINER_PORT): OS_PORT}, + {str(CONTAINER_PORT): HOST_PORT}, {"discovery.type": "single-node", "DISABLE_SECURITY_PLUGIN": "true", "OPENSEARCH_INITIAL_ADMIN_PASSWORD": "TestPassword123!"}, ): if not wait_for_true(bool_fn=ping_opensearch, tries=WAIT_FOR_OPENSEARCH_TIMEOUT, wait_time=2): @@ -102,16 +107,16 @@ def setup_opensearch(self, request: pytest.FixtureRequest) -> Generator[None, No @pytest.fixture def opensearch_client(self, setup_opensearch: None) -> Generator[OpenSearch, None, None]: - os_client = get_opensearch_client() + opensearch_client = get_opensearch_client() - with os_client: - cleanup_opensearch_indices(opensearch_client=os_client) + with opensearch_client: + cleanup_opensearch_indices(opensearch_client=opensearch_client) - yield os_client + yield opensearch_client @override @pytest.fixture - def default_store(self, opensearch_client: OpenSearch) -> Generator[BaseStore, None, None]: + def store(self, opensearch_client: OpenSearch) -> Generator[BaseStore, None, None]: store = OpenSearchStore( opensearch_client=opensearch_client, index_prefix="opensearch-kv-store-e2e-test", default_collection="test-collection" ) @@ -121,11 +126,12 @@ def default_store(self, opensearch_client: OpenSearch) -> Generator[BaseStore, N @override @pytest.fixture - def collection_sanitized_store(self, opensearch_client: OpenSearch) -> Generator[BaseStore, None, None]: + def sanitizing_store(self, opensearch_client: OpenSearch) -> Generator[BaseStore, None, None]: store = OpenSearchStore( opensearch_client=opensearch_client, index_prefix="opensearch-kv-store-e2e-test", default_collection="test-collection", + key_sanitization_strategy=OpenSearchV1KeySanitizationStrategy(), collection_sanitization_strategy=OpenSearchV1CollectionSanitizationStrategy(), ) @@ -133,28 +139,70 @@ def collection_sanitized_store(self, opensearch_client: OpenSearch) -> Generator yield store @override - @pytest.fixture - def key_sanitized_store(self, opensearch_client: OpenSearch) -> Generator[BaseStore, None, None]: - store = OpenSearchStore( - opensearch_client=opensearch_client, - index_prefix="opensearch-kv-store-e2e-test", - default_collection="test-collection", - key_sanitization_strategy=OpenSearchV1KeySanitizationStrategy(), - ) + @pytest.mark.timeout(120) + def test_store(self, store: BaseStore): + """Tests that the store is a valid KeyValueProtocol.""" + assert isinstance(store, KeyValueProtocol) is True - with store: - yield store + @pytest.mark.skip(reason="Distributed Caches are unbounded") + @override + def test_not_unbounded(self, store: BaseStore): ... + @pytest.mark.skip(reason="Skip concurrent tests on distributed caches") @override - @pytest.fixture - def fully_sanitized_store(self, opensearch_client: OpenSearch) -> Generator[BaseStore, None, None]: - store = OpenSearchStore( - opensearch_client=opensearch_client, - index_prefix="opensearch-kv-store-e2e-test", - default_collection="test-collection", - key_sanitization_strategy=OpenSearchV1KeySanitizationStrategy(), - collection_sanitization_strategy=OpenSearchV1CollectionSanitizationStrategy(), + def test_concurrent_operations(self, store: BaseStore): ... + + @override + def test_long_collection_name(self, store: OpenSearchStore, sanitizing_store: OpenSearchStore): # pyright: ignore[reportIncompatibleMethodOverride] + with pytest.raises(Exception): # noqa: B017, PT011 + store.put(collection="test_collection" * 100, key="test_key", value={"test": "test"}) + + sanitizing_store.put(collection="test_collection" * 100, key="test_key", value={"test": "test"}) + assert sanitizing_store.get(collection="test_collection" * 100, key="test_key") == {"test": "test"} + + @override + def test_long_key_name(self, store: OpenSearchStore, sanitizing_store: OpenSearchStore): # pyright: ignore[reportIncompatibleMethodOverride] + "Tests that a long key name will not raise an error." + with pytest.raises(Exception): # noqa: B017, PT011 + store.put(collection="test_collection", key="test_key" * 100, value={"test": "test"}) + + sanitizing_store.put(collection="test_collection", key="test_key" * 100, value={"test": "test"}) + assert sanitizing_store.get(collection="test_collection", key="test_key" * 100) == {"test": "test"} + + def test_put_put_two_indices(self, store: OpenSearchStore, opensearch_client: OpenSearch): + store.put(collection="test_collection", key="test_key", value={"test": "test"}) + store.put(collection="test_collection_2", key="test_key", value={"test": "test"}) + assert store.get(collection="test_collection", key="test_key") == {"test": "test"} + assert store.get(collection="test_collection_2", key="test_key") == {"test": "test"} + + indices: dict[str, Any] = opensearch_client.indices.get(index="opensearch-kv-store-e2e-test-*") + index_names: list[str] = list(indices.keys()) + assert index_names == snapshot(["opensearch-kv-store-e2e-test-test_collection", "opensearch-kv-store-e2e-test-test_collection_2"]) + + def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch_client: Elasticsearch): + """Verify values are stored as f objects, not JSON strings""" + store.put(collection="test", key="test_key", value={"name": "Alice", "age": 30}) + + index_name = store._get_index_name(collection="test") # pyright: ignore[reportPrivateUsage] + doc_id = store._get_document_id(key="test_key") # pyright: ignore[reportPrivateUsage] + + response = opensearch_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + {"value": {"f": {"name": "Alice", "age": 30}}, "created_at": IsStr(min_length=20, max_length=40)} ) - with store: - yield store + # Test with TTL + store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) + response = opensearch_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + { + "value": {"f": {"name": "Bob", "age": 25}}, + "created_at": IsStr(min_length=20, max_length=40), + "expires_at": IsStr(min_length=20, max_length=40), + } + ) + + @override + def test_special_characters_in_collection_name(self, store: OpenSearchStore, sanitizing_store: OpenSearchStore): # pyright: ignore[reportIncompatibleMethodOverride] + "Tests that a special characters in the collection name will not raise an error." + super().test_special_characters_in_collection_name(store=sanitizing_store) From 8d89ad1104f9c6660c88d602888326a223929061 Mon Sep 17 00:00:00 2001 From: William Easton Date: Sat, 8 Nov 2025 11:41:47 -0600 Subject: [PATCH 04/12] Fix opensearch tests --- .../tests/stores/opensearch/test_opensearch.py | 6 +++--- .../tests/code_gen/stores/opensearch/test_opensearch.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py index 17c38dae..4253a465 100644 --- a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py @@ -75,7 +75,7 @@ def test_managed_entry_document_conversion(): assert document == snapshot( { - "value": {"f": {"test": "test"}}, + "value": {"flat": {"test": "test"}}, "created_at": "2025-01-01T00:00:00+00:00", "expires_at": "2025-01-01T00:00:10+00:00", } @@ -199,7 +199,7 @@ async def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch response = await opensearch_client.get(index=index_name, id=doc_id) assert response.body["_source"] == snapshot( { - "value": {"f": {"name": "Alice", "age": 30}}, + "value": {"flat": {"name": "Alice", "age": 30}}, "created_at": IsStr(min_length=20, max_length=40), } ) @@ -209,7 +209,7 @@ async def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch response = await opensearch_client.get(index=index_name, id=doc_id) assert response.body["_source"] == snapshot( { - "value": {"f": {"name": "Bob", "age": 25}}, + "value": {"flat": {"name": "Bob", "age": 25}}, "created_at": IsStr(min_length=20, max_length=40), "expires_at": IsStr(min_length=20, max_length=40), } diff --git a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py index d1596e9f..f8e5f571 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py @@ -74,7 +74,7 @@ def test_managed_entry_document_conversion(): document = adapter.dump_dict(entry=managed_entry) assert document == snapshot( - {"value": {"f": {"test": "test"}}, "created_at": "2025-01-01T00:00:00+00:00", "expires_at": "2025-01-01T00:00:10+00:00"} + {"value": {"flat": {"test": "test"}}, "created_at": "2025-01-01T00:00:00+00:00", "expires_at": "2025-01-01T00:00:10+00:00"} ) round_trip_managed_entry = adapter.load_dict(data=document) @@ -188,7 +188,7 @@ def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch_clien response = opensearch_client.get(index=index_name, id=doc_id) assert response.body["_source"] == snapshot( - {"value": {"f": {"name": "Alice", "age": 30}}, "created_at": IsStr(min_length=20, max_length=40)} + {"value": {"flat": {"name": "Alice", "age": 30}}, "created_at": IsStr(min_length=20, max_length=40)} ) # Test with TTL @@ -196,7 +196,7 @@ def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch_clien response = opensearch_client.get(index=index_name, id=doc_id) assert response.body["_source"] == snapshot( { - "value": {"f": {"name": "Bob", "age": 25}}, + "value": {"flat": {"name": "Bob", "age": 25}}, "created_at": IsStr(min_length=20, max_length=40), "expires_at": IsStr(min_length=20, max_length=40), } From 7930314901595200927a1c65167d8e01a737e05f Mon Sep 17 00:00:00 2001 From: William Easton Date: Sun, 9 Nov 2025 11:00:11 -0600 Subject: [PATCH 05/12] Fix opensearch tests --- .../key-value-aio/tests/stores/opensearch/test_opensearch.py | 4 ++-- .../tests/code_gen/stores/opensearch/test_opensearch.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py index 4253a465..6e32dd28 100644 --- a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py @@ -197,7 +197,7 @@ async def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch doc_id = store._get_document_id(key="test_key") # pyright: ignore[reportPrivateUsage] response = await opensearch_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( + assert response["_source"] == snapshot( { "value": {"flat": {"name": "Alice", "age": 30}}, "created_at": IsStr(min_length=20, max_length=40), @@ -207,7 +207,7 @@ async def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch # Test with TTL await store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) response = await opensearch_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( + assert response["_source"] == snapshot( { "value": {"flat": {"name": "Bob", "age": 25}}, "created_at": IsStr(min_length=20, max_length=40), diff --git a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py index f8e5f571..b77dcd21 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py @@ -187,14 +187,14 @@ def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch_clien doc_id = store._get_document_id(key="test_key") # pyright: ignore[reportPrivateUsage] response = opensearch_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( + assert response["_source"] == snapshot( {"value": {"flat": {"name": "Alice", "age": 30}}, "created_at": IsStr(min_length=20, max_length=40)} ) # Test with TTL store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) response = opensearch_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( + assert response["_source"] == snapshot( { "value": {"flat": {"name": "Bob", "age": 25}}, "created_at": IsStr(min_length=20, max_length=40), From 0675d55aaf000b9ab1f63ebfc34def50d4cd8625 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 9 Nov 2025 18:38:35 +0000 Subject: [PATCH 06/12] fix: address CodeRabbit PR feedback for OpenSearch store MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix type annotation: AsyncElasticsearch → AsyncOpenSearch in async test - Add key/collection/version fields to serialized documents (following PR #204) - Update test snapshots to expect version, key, and collection fields - Fix ImportError message in sync store to reference correct package - Run codegen to regenerate sync library with fixes Co-authored-by: William Easton --- .../key_value/aio/stores/opensearch/store.py | 4 ++-- .../stores/opensearch/test_opensearch.py | 10 +++++++-- .../sync/code_gen/stores/opensearch/store.py | 6 +++--- .../stores/opensearch/test_opensearch.py | 21 +++++++++++++++---- 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py index 412d58b9..cf9f0b8e 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py @@ -359,7 +359,7 @@ async def _put_managed_entry( index_name: str = self._get_index_name(collection=collection) document_id: str = self._get_document_id(key=key) - document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry) + document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry, key=key, collection=collection) try: _ = await self._client.index( # type: ignore[reportUnknownVariableType] @@ -395,7 +395,7 @@ async def _put_managed_entries( index_action: dict[str, Any] = new_bulk_action(action="index", index=index_name, document_id=document_id) - document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry) + document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry, key=key, collection=collection) operations.extend([index_action, document]) diff --git a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py index 6e32dd28..baac4d34 100644 --- a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py @@ -5,7 +5,6 @@ import pytest from dirty_equals import IsFloat, IsStr -from elasticsearch import AsyncElasticsearch from inline_snapshot import snapshot from key_value.shared.stores.wait import async_wait_for_true from key_value.shared.utils.managed_entry import ManagedEntry @@ -75,6 +74,7 @@ def test_managed_entry_document_conversion(): assert document == snapshot( { + "version": 1, "value": {"flat": {"test": "test"}}, "created_at": "2025-01-01T00:00:00+00:00", "expires_at": "2025-01-01T00:00:10+00:00", @@ -189,7 +189,7 @@ async def test_put_put_two_indices(self, store: OpenSearchStore, opensearch_clie index_names: list[str] = list(indices.keys()) assert index_names == snapshot(["opensearch-kv-store-e2e-test-test_collection", "opensearch-kv-store-e2e-test-test_collection_2"]) - async def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch_client: AsyncElasticsearch): + async def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch_client: AsyncOpenSearch): """Verify values are stored as f objects, not JSON strings""" await store.put(collection="test", key="test_key", value={"name": "Alice", "age": 30}) @@ -199,6 +199,9 @@ async def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch response = await opensearch_client.get(index=index_name, id=doc_id) assert response["_source"] == snapshot( { + "version": 1, + "key": "test_key", + "collection": "test", "value": {"flat": {"name": "Alice", "age": 30}}, "created_at": IsStr(min_length=20, max_length=40), } @@ -209,6 +212,9 @@ async def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch response = await opensearch_client.get(index=index_name, id=doc_id) assert response["_source"] == snapshot( { + "version": 1, + "key": "test_key", + "collection": "test", "value": {"flat": {"name": "Bob", "age": 25}}, "created_at": IsStr(min_length=20, max_length=40), "expires_at": IsStr(min_length=20, max_length=40), diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py index 969bb8eb..2784b766 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py @@ -37,7 +37,7 @@ get_source_from_body, ) except ImportError as e: - msg = "OpenSearchStore requires opensearch-py[async]>=2.0.0. Install with: pip install 'py-key-value-aio[opensearch]'" + msg = "OpenSearchStore requires opensearch-py>=2.0.0. Install with: pip install 'py-key-value-sync[opensearch]'" raise ImportError(msg) from e logger = logging.getLogger(__name__) @@ -318,7 +318,7 @@ def _put_managed_entry(self, *, key: str, collection: str, managed_entry: Manage index_name: str = self._get_index_name(collection=collection) document_id: str = self._get_document_id(key=key) - document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry) + document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry, key=key, collection=collection) try: # type: ignore[reportUnknownVariableType] _ = self._client.index(index=index_name, id=document_id, body=document, params={"refresh": "true"}) @@ -349,7 +349,7 @@ def _put_managed_entries( index_action: dict[str, Any] = new_bulk_action(action="index", index=index_name, document_id=document_id) - document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry) + document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry, key=key, collection=collection) operations.extend([index_action, document]) diff --git a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py index b77dcd21..efef6182 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py @@ -8,7 +8,6 @@ import pytest from dirty_equals import IsFloat, IsStr -from elasticsearch import Elasticsearch from inline_snapshot import snapshot from key_value.shared.stores.wait import wait_for_true from key_value.shared.utils.managed_entry import ManagedEntry @@ -74,7 +73,12 @@ def test_managed_entry_document_conversion(): document = adapter.dump_dict(entry=managed_entry) assert document == snapshot( - {"value": {"flat": {"test": "test"}}, "created_at": "2025-01-01T00:00:00+00:00", "expires_at": "2025-01-01T00:00:10+00:00"} + { + "version": 1, + "value": {"flat": {"test": "test"}}, + "created_at": "2025-01-01T00:00:00+00:00", + "expires_at": "2025-01-01T00:00:10+00:00", + } ) round_trip_managed_entry = adapter.load_dict(data=document) @@ -179,7 +183,7 @@ def test_put_put_two_indices(self, store: OpenSearchStore, opensearch_client: Op index_names: list[str] = list(indices.keys()) assert index_names == snapshot(["opensearch-kv-store-e2e-test-test_collection", "opensearch-kv-store-e2e-test-test_collection_2"]) - def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch_client: Elasticsearch): + def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch_client: OpenSearch): """Verify values are stored as f objects, not JSON strings""" store.put(collection="test", key="test_key", value={"name": "Alice", "age": 30}) @@ -188,7 +192,13 @@ def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch_clien response = opensearch_client.get(index=index_name, id=doc_id) assert response["_source"] == snapshot( - {"value": {"flat": {"name": "Alice", "age": 30}}, "created_at": IsStr(min_length=20, max_length=40)} + { + "version": 1, + "key": "test_key", + "collection": "test", + "value": {"flat": {"name": "Alice", "age": 30}}, + "created_at": IsStr(min_length=20, max_length=40), + } ) # Test with TTL @@ -196,6 +206,9 @@ def test_value_stored_as_f_object(self, store: OpenSearchStore, opensearch_clien response = opensearch_client.get(index=index_name, id=doc_id) assert response["_source"] == snapshot( { + "version": 1, + "key": "test_key", + "collection": "test", "value": {"flat": {"name": "Bob", "age": 25}}, "created_at": IsStr(min_length=20, max_length=40), "expires_at": IsStr(min_length=20, max_length=40), From 3a29eaf0d3c885430a7865041fb83496cabefca4 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Sun, 9 Nov 2025 19:04:39 +0000 Subject: [PATCH 07/12] fix: improve codegen string transformations and remove OpenSearch bulk operations - Add visit_Constant method to codegen script to transform string literals - Replace py-key-value-aio with py-key-value-sync in error messages - Remove [async] extras from opensearch-py package install instructions - Remove bulk write and delete operations from OpenSearch store - Remove _put_managed_entries method - Remove _delete_managed_entries method - Remove new_bulk_action import - Regenerate all sync code with corrected package references This fixes the codegen check failures where error messages incorrectly pointed users to install the async package instead of the sync package. Co-authored-by: William Easton --- .../key_value/aio/stores/opensearch/store.py | 67 +------------------ .../sync/code_gen/stores/disk/multi_store.py | 2 +- .../sync/code_gen/stores/disk/store.py | 2 +- .../code_gen/stores/elasticsearch/store.py | 2 +- .../sync/code_gen/stores/keyring/store.py | 2 +- .../sync/code_gen/stores/memory/store.py | 2 +- .../sync/code_gen/stores/mongodb/store.py | 2 +- .../sync/code_gen/stores/opensearch/store.py | 67 +------------------ .../sync/code_gen/stores/redis/store.py | 2 +- .../sync/code_gen/stores/rocksdb/store.py | 2 +- .../sync/code_gen/stores/valkey/store.py | 2 +- .../sync/code_gen/stores/vault/store.py | 2 +- scripts/build_sync_library.py | 10 +++ 13 files changed, 22 insertions(+), 142 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py index cf9f0b8e..c62ea220 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/opensearch/store.py @@ -1,7 +1,6 @@ import contextlib import logging from collections.abc import Sequence -from datetime import datetime from typing import Any, overload from key_value.shared.errors import DeserializationError, SerializationError @@ -30,7 +29,7 @@ BaseEnumerateKeysStore, BaseStore, ) -from key_value.aio.stores.opensearch.utils import LessCapableJsonSerializer, new_bulk_action +from key_value.aio.stores.opensearch.utils import LessCapableJsonSerializer try: from opensearchpy import AsyncOpenSearch @@ -372,39 +371,6 @@ async def _put_managed_entry( msg = f"Failed to serialize document: {e}" raise SerializationError(message=msg) from e - @override - async def _put_managed_entries( - self, - *, - collection: str, - keys: Sequence[str], - managed_entries: Sequence[ManagedEntry], - ttl: float | None, - created_at: datetime, - expires_at: datetime | None, - ) -> None: - if not keys: - return - - operations: list[dict[str, Any] | str] = [] - - index_name: str = self._get_index_name(collection=collection) - - for key, managed_entry in zip(keys, managed_entries, strict=True): - document_id: str = self._get_document_id(key=key) - - index_action: dict[str, Any] = new_bulk_action(action="index", index=index_name, document_id=document_id) - - document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry, key=key, collection=collection) - - operations.extend([index_action, document]) - - try: - _ = await self._client.bulk(body=operations, params={"refresh": "true"}) # type: ignore[reportUnknownVariableType] - except Exception as e: - msg = f"Failed to serialize bulk operations: {e}" - raise SerializationError(message=msg) from e - @override async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: index_name: str = self._get_index_name(collection=collection) @@ -422,37 +388,6 @@ async def _delete_managed_entry(self, *, key: str, collection: str) -> bool: return result == "deleted" - @override - async def _delete_managed_entries(self, *, keys: Sequence[str], collection: str) -> int: - if not keys: - return 0 - - operations: list[dict[str, Any]] = [] - - for key in keys: - index_name, document_id = self._get_destination(collection=collection, key=key) - - delete_action: dict[str, Any] = new_bulk_action(action="delete", index=index_name, document_id=document_id) - - operations.append(delete_action) - - try: - opensearch_response = await self._client.bulk(body=operations) - except Exception: - return 0 - - body: dict[str, Any] = get_body_from_response(response=opensearch_response) - - # Count successful deletions - deleted_count = 0 - items = body.get("items", []) - for item in items: - delete_result = item.get("delete", {}) - if delete_result.get("result") == "deleted": - deleted_count += 1 - - return deleted_count - @override async def _get_collection_keys(self, *, collection: str, limit: int | None = None) -> list[str]: """Get up to 10,000 keys in the specified collection (eventually consistent).""" diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/disk/multi_store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/disk/multi_store.py index 9bcc33c2..639c7d40 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/disk/multi_store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/disk/multi_store.py @@ -16,7 +16,7 @@ from diskcache import Cache from pathvalidate import sanitize_filename except ImportError as e: - msg = "DiskStore requires py-key-value-aio[disk]" + msg = "DiskStore requires py-key-value-sync[disk]" raise ImportError(msg) from e CacheFactory = Callable[[str], Cache] diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/disk/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/disk/store.py index 07c0b929..2fbedb6c 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/disk/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/disk/store.py @@ -14,7 +14,7 @@ try: from diskcache import Cache except ImportError as e: - msg = "DiskStore requires py-key-value-aio[disk]" + msg = "DiskStore requires py-key-value-sync[disk]" raise ImportError(msg) from e diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py index cbd016cb..ad3e9d05 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py @@ -38,7 +38,7 @@ get_source_from_body, ) except ImportError as e: - msg = "ElasticsearchStore requires py-key-value-aio[elasticsearch]" + msg = "ElasticsearchStore requires py-key-value-sync[elasticsearch]" raise ImportError(msg) from e logger = logging.getLogger(__name__) diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/keyring/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/keyring/store.py index 5ff8a812..5c1f1180 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/keyring/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/keyring/store.py @@ -18,7 +18,7 @@ import keyring from keyring.errors import PasswordDeleteError except ImportError as e: - msg = "KeyringStore requires py-key-value-aio[keyring]" + msg = "KeyringStore requires py-key-value-sync[keyring]" raise ImportError(msg) from e DEFAULT_KEYCHAIN_SERVICE = "py-key-value" diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/memory/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/memory/store.py index 89575d86..41c2fa49 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/memory/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/memory/store.py @@ -21,7 +21,7 @@ try: from cachetools import TLRUCache except ImportError as e: - msg = "MemoryStore requires py-key-value-aio[memory]" + msg = "MemoryStore requires py-key-value-sync[memory]" raise ImportError(msg) from e diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/mongodb/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/mongodb/store.py index a66c7178..7b016de9 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/mongodb/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/mongodb/store.py @@ -21,7 +21,7 @@ from pymongo.database import Database from pymongo.results import DeleteResult # noqa: TC002 except ImportError as e: - msg = "MongoDBStore requires py-key-value-aio[mongodb]" + msg = "MongoDBStore requires py-key-value-sync[mongodb]" raise ImportError(msg) from e DEFAULT_DB = "kv-store-adapter" diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py index 2784b766..fc023afe 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/opensearch/store.py @@ -4,7 +4,6 @@ import contextlib import logging from collections.abc import Sequence -from datetime import datetime from typing import Any, overload from key_value.shared.errors import DeserializationError, SerializationError @@ -23,7 +22,7 @@ BaseEnumerateKeysStore, BaseStore, ) -from key_value.sync.code_gen.stores.opensearch.utils import LessCapableJsonSerializer, new_bulk_action +from key_value.sync.code_gen.stores.opensearch.utils import LessCapableJsonSerializer try: from opensearchpy import OpenSearch @@ -326,39 +325,6 @@ def _put_managed_entry(self, *, key: str, collection: str, managed_entry: Manage msg = f"Failed to serialize document: {e}" raise SerializationError(message=msg) from e - @override - def _put_managed_entries( - self, - *, - collection: str, - keys: Sequence[str], - managed_entries: Sequence[ManagedEntry], - ttl: float | None, - created_at: datetime, - expires_at: datetime | None, - ) -> None: - if not keys: - return - - operations: list[dict[str, Any] | str] = [] - - index_name: str = self._get_index_name(collection=collection) - - for key, managed_entry in zip(keys, managed_entries, strict=True): - document_id: str = self._get_document_id(key=key) - - index_action: dict[str, Any] = new_bulk_action(action="index", index=index_name, document_id=document_id) - - document: dict[str, Any] = self._serializer.dump_dict(entry=managed_entry, key=key, collection=collection) - - operations.extend([index_action, document]) - - try: - _ = self._client.bulk(body=operations, params={"refresh": "true"}) # type: ignore[reportUnknownVariableType] - except Exception as e: - msg = f"Failed to serialize bulk operations: {e}" - raise SerializationError(message=msg) from e - @override def _delete_managed_entry(self, *, key: str, collection: str) -> bool: index_name: str = self._get_index_name(collection=collection) @@ -376,37 +342,6 @@ def _delete_managed_entry(self, *, key: str, collection: str) -> bool: return result == "deleted" - @override - def _delete_managed_entries(self, *, keys: Sequence[str], collection: str) -> int: - if not keys: - return 0 - - operations: list[dict[str, Any]] = [] - - for key in keys: - (index_name, document_id) = self._get_destination(collection=collection, key=key) - - delete_action: dict[str, Any] = new_bulk_action(action="delete", index=index_name, document_id=document_id) - - operations.append(delete_action) - - try: - opensearch_response = self._client.bulk(body=operations) - except Exception: - return 0 - - body: dict[str, Any] = get_body_from_response(response=opensearch_response) - - # Count successful deletions - deleted_count = 0 - items = body.get("items", []) - for item in items: - delete_result = item.get("delete", {}) - if delete_result.get("result") == "deleted": - deleted_count += 1 - - return deleted_count - @override def _get_collection_keys(self, *, collection: str, limit: int | None = None) -> list[str]: """Get up to 10,000 keys in the specified collection (eventually consistent).""" diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/redis/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/redis/store.py index 6885cc4e..6b167d0b 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/redis/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/redis/store.py @@ -18,7 +18,7 @@ try: from redis import Redis except ImportError as e: - msg = "RedisStore requires py-key-value-aio[redis]" + msg = "RedisStore requires py-key-value-sync[redis]" raise ImportError(msg) from e DEFAULT_PAGE_SIZE = 10000 diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/rocksdb/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/rocksdb/store.py index 5b1535eb..a8ba929c 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/rocksdb/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/rocksdb/store.py @@ -16,7 +16,7 @@ try: from rocksdict import Options, Rdict, WriteBatch except ImportError as e: - msg = "RocksDBStore requires py-key-value-aio[rocksdb]" + msg = "RocksDBStore requires py-key-value-sync[rocksdb]" raise ImportError(msg) from e diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/valkey/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/valkey/store.py index 888029d0..dc3bc09b 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/valkey/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/valkey/store.py @@ -15,7 +15,7 @@ from glide_sync.config import GlideClientConfiguration, NodeAddress, ServerCredentials from glide_sync.glide_client import BaseClient, GlideClient except ImportError as e: - msg = "ValkeyStore requires py-key-value-aio[valkey]" + msg = "ValkeyStore requires py-key-value-sync[valkey]" raise ImportError(msg) from e DEFAULT_PAGE_SIZE = 10000 diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/vault/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/vault/store.py index b96d6af9..d7154dc6 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/vault/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/vault/store.py @@ -16,7 +16,7 @@ from hvac.api.secrets_engines.kv_v2 import KvV2 from hvac.exceptions import InvalidPath except ImportError as e: - msg = "VaultStore requires py-key-value-aio[vault]" + msg = "VaultStore requires py-key-value-sync[vault]" raise ImportError(msg) from e diff --git a/scripts/build_sync_library.py b/scripts/build_sync_library.py index cee57f92..3bfbeab6 100644 --- a/scripts/build_sync_library.py +++ b/scripts/build_sync_library.py @@ -409,6 +409,16 @@ def _manage_async_generator(self, node: ast.Subscript) -> ast.AST | None: pass return None + def visit_Constant(self, node: ast.Constant) -> ast.AST: + # Transform string literals containing package names + if isinstance(node.value, str): + # Replace py-key-value-aio with py-key-value-sync + node.value = node.value.replace("py-key-value-aio", "py-key-value-sync") + # Remove [async] extras from package install instructions + node.value = node.value.replace("opensearch-py[async]", "opensearch-py") + self.generic_visit(node) + return node + class BlanksInserter(ast.NodeTransformer): # type: ignore """ From da5cc6248436a6c998eed26e67c25e4677858f96 Mon Sep 17 00:00:00 2001 From: William Easton Date: Sun, 9 Nov 2025 13:26:31 -0600 Subject: [PATCH 08/12] Fix opensearch timeout hopefully --- key-value/key-value-aio/tests/stores/base.py | 2 +- .../tests/stores/opensearch/test_opensearch.py | 7 ------- key-value/key-value-sync/tests/code_gen/stores/base.py | 2 +- .../tests/code_gen/stores/opensearch/test_opensearch.py | 7 ------- 4 files changed, 2 insertions(+), 16 deletions(-) diff --git a/key-value/key-value-aio/tests/stores/base.py b/key-value/key-value-aio/tests/stores/base.py index 43177ee2..c2e57acf 100644 --- a/key-value/key-value-aio/tests/stores/base.py +++ b/key-value/key-value-aio/tests/stores/base.py @@ -30,7 +30,7 @@ async def eventually_consistent(self) -> None: # noqa: B027 @abstractmethod async def store(self) -> BaseStore | AsyncGenerator[BaseStore, None]: ... - @pytest.mark.timeout(60) + @pytest.mark.timeout(90) async def test_store(self, store: BaseStore): """Tests that the store is a valid AsyncKeyValueProtocol.""" assert isinstance(store, AsyncKeyValueProtocol) is True diff --git a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py index baac4d34..77c78cec 100644 --- a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py @@ -11,7 +11,6 @@ from opensearchpy import AsyncOpenSearch from typing_extensions import override -from key_value.aio.protocols.key_value import AsyncKeyValueProtocol from key_value.aio.stores.base import BaseStore from key_value.aio.stores.opensearch import OpenSearchStore from key_value.aio.stores.opensearch.store import ( @@ -148,12 +147,6 @@ async def sanitizing_store(self, opensearch_client: AsyncOpenSearch) -> AsyncGen async with store: yield store - @override - @pytest.mark.timeout(120) - async def test_store(self, store: BaseStore): - """Tests that the store is a valid AsyncKeyValueProtocol.""" - assert isinstance(store, AsyncKeyValueProtocol) is True - @pytest.mark.skip(reason="Distributed Caches are unbounded") @override async def test_not_unbounded(self, store: BaseStore): ... diff --git a/key-value/key-value-sync/tests/code_gen/stores/base.py b/key-value/key-value-sync/tests/code_gen/stores/base.py index b78e279d..9c6303b1 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/base.py +++ b/key-value/key-value-sync/tests/code_gen/stores/base.py @@ -27,7 +27,7 @@ def eventually_consistent(self) -> None: # noqa: B027 @abstractmethod def store(self) -> BaseStore | Generator[BaseStore, None, None]: ... - @pytest.mark.timeout(60) + @pytest.mark.timeout(90) def test_store(self, store: BaseStore): """Tests that the store is a valid KeyValueProtocol.""" assert isinstance(store, KeyValueProtocol) is True diff --git a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py index efef6182..ad5acd4d 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py @@ -14,7 +14,6 @@ from opensearchpy import OpenSearch from typing_extensions import override -from key_value.sync.code_gen.protocols.key_value import KeyValueProtocol from key_value.sync.code_gen.stores.base import BaseStore from key_value.sync.code_gen.stores.opensearch import OpenSearchStore from key_value.sync.code_gen.stores.opensearch.store import ( @@ -142,12 +141,6 @@ def sanitizing_store(self, opensearch_client: OpenSearch) -> Generator[BaseStore with store: yield store - @override - @pytest.mark.timeout(120) - def test_store(self, store: BaseStore): - """Tests that the store is a valid KeyValueProtocol.""" - assert isinstance(store, KeyValueProtocol) is True - @pytest.mark.skip(reason="Distributed Caches are unbounded") @override def test_not_unbounded(self, store: BaseStore): ... From 338bafd5c9c1817bb6e3370149fd1bbe79cd077f Mon Sep 17 00:00:00 2001 From: William Easton Date: Mon, 10 Nov 2025 09:24:43 -0600 Subject: [PATCH 09/12] ES Test setup/cleanup --- .../elasticsearch/test_elasticsearch.py | 80 +++++++++---------- .../elasticsearch/test_elasticsearch.py | 80 +++++++++---------- 2 files changed, 76 insertions(+), 84 deletions(-) diff --git a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py index d47d6893..0225a8dc 100644 --- a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py @@ -99,14 +99,6 @@ async def setup_elasticsearch(self, request: pytest.FixtureRequest) -> AsyncGene yield - @pytest.fixture - async def es_client(self) -> AsyncGenerator[AsyncElasticsearch, None]: - async with AsyncElasticsearch(hosts=[ES_URL]) as es_client: - try: - yield es_client - finally: - await es_client.close() - @override @pytest.fixture async def store(self) -> ElasticsearchStore: @@ -122,10 +114,11 @@ async def sanitizing_store(self) -> ElasticsearchStore: ) @pytest.fixture(autouse=True) - async def cleanup_elasticsearch_indices(self, es_client: AsyncElasticsearch): - await cleanup_elasticsearch_indices(elasticsearch_client=es_client) - yield - await cleanup_elasticsearch_indices(elasticsearch_client=es_client) + async def cleanup_elasticsearch(self): + async with get_elasticsearch_client() as es_client: + await cleanup_elasticsearch_indices(elasticsearch_client=es_client) + yield + await cleanup_elasticsearch_indices(elasticsearch_client=es_client) @pytest.mark.skip(reason="Distributed Caches are unbounded") @override @@ -152,18 +145,19 @@ async def test_long_key_name(self, store: ElasticsearchStore, sanitizing_store: await sanitizing_store.put(collection="test_collection", key="test_key" * 100, value={"test": "test"}) assert await sanitizing_store.get(collection="test_collection", key="test_key" * 100) == {"test": "test"} - async def test_put_put_two_indices(self, store: ElasticsearchStore, es_client: AsyncElasticsearch): + async def test_put_put_two_indices(self, store: ElasticsearchStore): await store.put(collection="test_collection", key="test_key", value={"test": "test"}) await store.put(collection="test_collection_2", key="test_key", value={"test": "test"}) assert await store.get(collection="test_collection", key="test_key") == {"test": "test"} assert await store.get(collection="test_collection_2", key="test_key") == {"test": "test"} - indices = await es_client.options(ignore_status=404).indices.get(index="kv-store-e2e-test-*") - assert len(indices.body) == 2 - index_names: list[str] = [str(key) for key in indices] - assert index_names == snapshot(["kv-store-e2e-test-test_collection", "kv-store-e2e-test-test_collection_2"]) + async with get_elasticsearch_client() as es_client: + indices = await es_client.options(ignore_status=404).indices.get(index="kv-store-e2e-test-*") + assert len(indices.body) == 2 + index_names: list[str] = [str(key) for key in indices] + assert index_names == snapshot(["kv-store-e2e-test-test_collection", "kv-store-e2e-test-test_collection_2"]) - async def test_value_stored_as_flattened_object(self, store: ElasticsearchStore, es_client: AsyncElasticsearch): + async def test_value_stored_as_flattened_object(self, store: ElasticsearchStore): """Verify values are stored as flattened objects, not JSON strings""" await store.put(collection="test", key="test_key", value={"name": "Alice", "age": 30}) @@ -172,30 +166,32 @@ async def test_value_stored_as_flattened_object(self, store: ElasticsearchStore, index_name = store._get_index_name(collection="test") # pyright: ignore[reportPrivateUsage] doc_id = store._get_document_id(key="test_key") # pyright: ignore[reportPrivateUsage] - response = await es_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( - { - "version": 1, - "key": "test_key", - "collection": "test", - "value": {"flattened": {"name": "Alice", "age": 30}}, - "created_at": IsStr(min_length=20, max_length=40), - } - ) - - # Test with TTL - await store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) - response = await es_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( - { - "version": 1, - "key": "test_key", - "collection": "test", - "value": {"flattened": {"name": "Bob", "age": 25}}, - "created_at": IsStr(min_length=20, max_length=40), - "expires_at": IsStr(min_length=20, max_length=40), - } - ) + async with get_elasticsearch_client() as es_client: + response = await es_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + { + "version": 1, + "key": "test_key", + "collection": "test", + "value": {"flattened": {"name": "Alice", "age": 30}}, + "created_at": IsStr(min_length=20, max_length=40), + } + ) + + # Test with TTL + await store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) + + response = await es_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + { + "version": 1, + "key": "test_key", + "collection": "test", + "value": {"flattened": {"name": "Bob", "age": 25}}, + "created_at": IsStr(min_length=20, max_length=40), + "expires_at": IsStr(min_length=20, max_length=40), + } + ) @override async def test_special_characters_in_collection_name(self, store: ElasticsearchStore, sanitizing_store: ElasticsearchStore): # pyright: ignore[reportIncompatibleMethodOverride] diff --git a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py index 2bdf1274..526e64f2 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py @@ -100,14 +100,6 @@ def setup_elasticsearch(self, request: pytest.FixtureRequest) -> Generator[None, yield - @pytest.fixture - def es_client(self) -> Generator[Elasticsearch, None, None]: - with Elasticsearch(hosts=[ES_URL]) as es_client: - try: - yield es_client - finally: - es_client.close() - @override @pytest.fixture def store(self) -> ElasticsearchStore: @@ -123,10 +115,11 @@ def sanitizing_store(self) -> ElasticsearchStore: ) @pytest.fixture(autouse=True) - def cleanup_elasticsearch_indices(self, es_client: Elasticsearch): - cleanup_elasticsearch_indices(elasticsearch_client=es_client) - yield - cleanup_elasticsearch_indices(elasticsearch_client=es_client) + def cleanup_elasticsearch(self): + with get_elasticsearch_client() as es_client: + cleanup_elasticsearch_indices(elasticsearch_client=es_client) + yield + cleanup_elasticsearch_indices(elasticsearch_client=es_client) @pytest.mark.skip(reason="Distributed Caches are unbounded") @override @@ -153,18 +146,19 @@ def test_long_key_name(self, store: ElasticsearchStore, sanitizing_store: Elasti sanitizing_store.put(collection="test_collection", key="test_key" * 100, value={"test": "test"}) assert sanitizing_store.get(collection="test_collection", key="test_key" * 100) == {"test": "test"} - def test_put_put_two_indices(self, store: ElasticsearchStore, es_client: Elasticsearch): + def test_put_put_two_indices(self, store: ElasticsearchStore): store.put(collection="test_collection", key="test_key", value={"test": "test"}) store.put(collection="test_collection_2", key="test_key", value={"test": "test"}) assert store.get(collection="test_collection", key="test_key") == {"test": "test"} assert store.get(collection="test_collection_2", key="test_key") == {"test": "test"} - indices = es_client.options(ignore_status=404).indices.get(index="kv-store-e2e-test-*") - assert len(indices.body) == 2 - index_names: list[str] = [str(key) for key in indices] - assert index_names == snapshot(["kv-store-e2e-test-test_collection", "kv-store-e2e-test-test_collection_2"]) + with get_elasticsearch_client() as es_client: + indices = es_client.options(ignore_status=404).indices.get(index="kv-store-e2e-test-*") + assert len(indices.body) == 2 + index_names: list[str] = [str(key) for key in indices] + assert index_names == snapshot(["kv-store-e2e-test-test_collection", "kv-store-e2e-test-test_collection_2"]) - def test_value_stored_as_flattened_object(self, store: ElasticsearchStore, es_client: Elasticsearch): + def test_value_stored_as_flattened_object(self, store: ElasticsearchStore): """Verify values are stored as flattened objects, not JSON strings""" store.put(collection="test", key="test_key", value={"name": "Alice", "age": 30}) @@ -173,30 +167,32 @@ def test_value_stored_as_flattened_object(self, store: ElasticsearchStore, es_cl index_name = store._get_index_name(collection="test") # pyright: ignore[reportPrivateUsage] doc_id = store._get_document_id(key="test_key") # pyright: ignore[reportPrivateUsage] - response = es_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( - { - "version": 1, - "key": "test_key", - "collection": "test", - "value": {"flattened": {"name": "Alice", "age": 30}}, - "created_at": IsStr(min_length=20, max_length=40), - } - ) - - # Test with TTL - store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) - response = es_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( - { - "version": 1, - "key": "test_key", - "collection": "test", - "value": {"flattened": {"name": "Bob", "age": 25}}, - "created_at": IsStr(min_length=20, max_length=40), - "expires_at": IsStr(min_length=20, max_length=40), - } - ) + with get_elasticsearch_client() as es_client: + response = es_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + { + "version": 1, + "key": "test_key", + "collection": "test", + "value": {"flattened": {"name": "Alice", "age": 30}}, + "created_at": IsStr(min_length=20, max_length=40), + } + ) + + # Test with TTL + store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) + + response = es_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + { + "version": 1, + "key": "test_key", + "collection": "test", + "value": {"flattened": {"name": "Bob", "age": 25}}, + "created_at": IsStr(min_length=20, max_length=40), + "expires_at": IsStr(min_length=20, max_length=40), + } + ) @override def test_special_characters_in_collection_name(self, store: ElasticsearchStore, sanitizing_store: ElasticsearchStore): # pyright: ignore[reportIncompatibleMethodOverride] From 01e813a8fd2d5e4f75d770b0f1a84b84f29a9233 Mon Sep 17 00:00:00 2001 From: William Easton Date: Mon, 10 Nov 2025 10:16:56 -0600 Subject: [PATCH 10/12] Small updates for store tests --- .../src/key_value/aio/stores/elasticsearch/store.py | 2 ++ .../tests/stores/elasticsearch/test_elasticsearch.py | 2 +- .../key-value-aio/tests/stores/opensearch/test_opensearch.py | 2 +- .../src/key_value/sync/code_gen/stores/elasticsearch/store.py | 2 ++ .../tests/code_gen/stores/elasticsearch/test_elasticsearch.py | 2 +- .../tests/code_gen/stores/opensearch/test_opensearch.py | 2 +- 6 files changed, 8 insertions(+), 4 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py b/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py index 9b120157..19c83e19 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py @@ -270,6 +270,8 @@ async def _setup_collection(self, *, collection: str) -> None: except BadRequestError as e: if "index_already_exists_exception" in str(e).lower(): return + if "resource_already_exists_exception" in str(e).lower(): + return raise def _get_index_name(self, collection: str) -> str: diff --git a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py index 0225a8dc..9944d8e9 100644 --- a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py @@ -23,7 +23,7 @@ ES_HOST = "localhost" ES_PORT = 9200 ES_URL = f"http://{ES_HOST}:{ES_PORT}" -ES_CONTAINER_PORT = 9200 +ES_CONTAINER_PORT = 19200 WAIT_FOR_ELASTICSEARCH_TIMEOUT = 30 diff --git a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py index 77c78cec..5bdda247 100644 --- a/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-aio/tests/stores/opensearch/test_opensearch.py @@ -25,7 +25,7 @@ LOCALHOST = "localhost" CONTAINER_PORT = 9200 -HOST_PORT = 19200 +HOST_PORT = 19201 OPENSEARCH_URL = f"http://{LOCALHOST}:{HOST_PORT}" diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py index ad3e9d05..81b7ebcd 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py @@ -242,6 +242,8 @@ def _setup_collection(self, *, collection: str) -> None: except BadRequestError as e: if "index_already_exists_exception" in str(e).lower(): return + if "resource_already_exists_exception" in str(e).lower(): + return raise def _get_index_name(self, collection: str) -> str: diff --git a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py index 526e64f2..6d938a9c 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py @@ -26,7 +26,7 @@ ES_HOST = "localhost" ES_PORT = 9200 ES_URL = f"http://{ES_HOST}:{ES_PORT}" -ES_CONTAINER_PORT = 9200 +ES_CONTAINER_PORT = 19200 WAIT_FOR_ELASTICSEARCH_TIMEOUT = 30 # Released Apr 2025 diff --git a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py index ad5acd4d..d1527d4f 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/opensearch/test_opensearch.py @@ -28,7 +28,7 @@ LOCALHOST = "localhost" CONTAINER_PORT = 9200 -HOST_PORT = 19200 +HOST_PORT = 19201 OPENSEARCH_URL = f"http://{LOCALHOST}:{HOST_PORT}" From 313e702dc34e5f6a1e098425f8104a3b5a1b4d88 Mon Sep 17 00:00:00 2001 From: William Easton Date: Mon, 10 Nov 2025 10:25:42 -0600 Subject: [PATCH 11/12] Use host port for ES tests --- .../tests/stores/elasticsearch/test_elasticsearch.py | 8 ++++---- .../code_gen/stores/elasticsearch/test_elasticsearch.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py index 9944d8e9..cea733e5 100644 --- a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py @@ -21,9 +21,9 @@ TEST_SIZE_LIMIT = 1 * 1024 * 1024 # 1MB ES_HOST = "localhost" -ES_PORT = 9200 -ES_URL = f"http://{ES_HOST}:{ES_PORT}" -ES_CONTAINER_PORT = 19200 +CONTAINER_PORT = 9200 +HOST_PORT = 19200 +ES_URL = f"http://{ES_HOST}:{HOST_PORT}" WAIT_FOR_ELASTICSEARCH_TIMEOUT = 30 @@ -90,7 +90,7 @@ async def setup_elasticsearch(self, request: pytest.FixtureRequest) -> AsyncGene with docker_container( f"elasticsearch-test-{version}", es_image, - {str(ES_CONTAINER_PORT): ES_PORT}, + {str(CONTAINER_PORT): HOST_PORT}, {"discovery.type": "single-node", "xpack.security.enabled": "false"}, ): if not await async_wait_for_true(bool_fn=ping_elasticsearch, tries=WAIT_FOR_ELASTICSEARCH_TIMEOUT, wait_time=2): diff --git a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py index 6d938a9c..9f7d94a7 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py @@ -24,9 +24,9 @@ TEST_SIZE_LIMIT = 1 * 1024 * 1024 # 1MB ES_HOST = "localhost" -ES_PORT = 9200 -ES_URL = f"http://{ES_HOST}:{ES_PORT}" -ES_CONTAINER_PORT = 19200 +CONTAINER_PORT = 9200 +HOST_PORT = 19200 +ES_URL = f"http://{ES_HOST}:{HOST_PORT}" WAIT_FOR_ELASTICSEARCH_TIMEOUT = 30 # Released Apr 2025 @@ -91,7 +91,7 @@ def setup_elasticsearch(self, request: pytest.FixtureRequest) -> Generator[None, with docker_container( f"elasticsearch-test-{version}", es_image, - {str(ES_CONTAINER_PORT): ES_PORT}, + {str(CONTAINER_PORT): HOST_PORT}, {"discovery.type": "single-node", "xpack.security.enabled": "false"}, ): if not wait_for_true(bool_fn=ping_elasticsearch, tries=WAIT_FOR_ELASTICSEARCH_TIMEOUT, wait_time=2): From e30894fb548bb5470e65467c6172e5831c9ef68d Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 18:13:57 +0000 Subject: [PATCH 12/12] revert: remove Elasticsearch changes from OpenSearch PR These changes were unrelated to OpenSearch support and should be in a separate PR. Co-authored-by: William Easton --- .../aio/stores/elasticsearch/store.py | 2 - .../elasticsearch/test_elasticsearch.py | 96 ++++++++++--------- .../code_gen/stores/elasticsearch/store.py | 4 +- .../elasticsearch/test_elasticsearch.py | 96 ++++++++++--------- 4 files changed, 107 insertions(+), 91 deletions(-) diff --git a/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py b/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py index 19c83e19..9b120157 100644 --- a/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py +++ b/key-value/key-value-aio/src/key_value/aio/stores/elasticsearch/store.py @@ -270,8 +270,6 @@ async def _setup_collection(self, *, collection: str) -> None: except BadRequestError as e: if "index_already_exists_exception" in str(e).lower(): return - if "resource_already_exists_exception" in str(e).lower(): - return raise def _get_index_name(self, collection: str) -> str: diff --git a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py index cea733e5..fc89561d 100644 --- a/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-aio/tests/stores/elasticsearch/test_elasticsearch.py @@ -1,5 +1,6 @@ from collections.abc import AsyncGenerator from datetime import datetime, timedelta, timezone +from typing import TYPE_CHECKING, Any import pytest from dirty_equals import IsFloat, IsStr @@ -19,11 +20,14 @@ from tests.conftest import docker_container, should_skip_docker_tests from tests.stores.base import BaseStoreTests, ContextManagerStoreTestMixin +if TYPE_CHECKING: + from elastic_transport._response import ObjectApiResponse + TEST_SIZE_LIMIT = 1 * 1024 * 1024 # 1MB ES_HOST = "localhost" -CONTAINER_PORT = 9200 -HOST_PORT = 19200 -ES_URL = f"http://{ES_HOST}:{HOST_PORT}" +ES_PORT = 9200 +ES_URL = f"http://{ES_HOST}:{ES_PORT}" +ES_CONTAINER_PORT = 9200 WAIT_FOR_ELASTICSEARCH_TIMEOUT = 30 @@ -41,7 +45,12 @@ async def ping_elasticsearch() -> bool: es_client: AsyncElasticsearch = get_elasticsearch_client() async with es_client: - return await es_client.ping() + if not await es_client.ping(): + return False + + status: ObjectApiResponse[dict[str, Any]] = await es_client.options(ignore_status=404).cluster.health(wait_for_status="green") + + return status.body.get("status") == "green" async def cleanup_elasticsearch_indices(elasticsearch_client: AsyncElasticsearch): @@ -90,7 +99,7 @@ async def setup_elasticsearch(self, request: pytest.FixtureRequest) -> AsyncGene with docker_container( f"elasticsearch-test-{version}", es_image, - {str(CONTAINER_PORT): HOST_PORT}, + {str(ES_CONTAINER_PORT): ES_PORT}, {"discovery.type": "single-node", "xpack.security.enabled": "false"}, ): if not await async_wait_for_true(bool_fn=ping_elasticsearch, tries=WAIT_FOR_ELASTICSEARCH_TIMEOUT, wait_time=2): @@ -99,6 +108,11 @@ async def setup_elasticsearch(self, request: pytest.FixtureRequest) -> AsyncGene yield + @pytest.fixture + async def es_client(self) -> AsyncGenerator[AsyncElasticsearch, None]: + async with AsyncElasticsearch(hosts=[ES_URL]) as es_client: + yield es_client + @override @pytest.fixture async def store(self) -> ElasticsearchStore: @@ -114,11 +128,10 @@ async def sanitizing_store(self) -> ElasticsearchStore: ) @pytest.fixture(autouse=True) - async def cleanup_elasticsearch(self): - async with get_elasticsearch_client() as es_client: - await cleanup_elasticsearch_indices(elasticsearch_client=es_client) - yield - await cleanup_elasticsearch_indices(elasticsearch_client=es_client) + async def cleanup_elasticsearch_indices(self, es_client: AsyncElasticsearch): + await cleanup_elasticsearch_indices(elasticsearch_client=es_client) + yield + await cleanup_elasticsearch_indices(elasticsearch_client=es_client) @pytest.mark.skip(reason="Distributed Caches are unbounded") @override @@ -145,19 +158,18 @@ async def test_long_key_name(self, store: ElasticsearchStore, sanitizing_store: await sanitizing_store.put(collection="test_collection", key="test_key" * 100, value={"test": "test"}) assert await sanitizing_store.get(collection="test_collection", key="test_key" * 100) == {"test": "test"} - async def test_put_put_two_indices(self, store: ElasticsearchStore): + async def test_put_put_two_indices(self, store: ElasticsearchStore, es_client: AsyncElasticsearch): await store.put(collection="test_collection", key="test_key", value={"test": "test"}) await store.put(collection="test_collection_2", key="test_key", value={"test": "test"}) assert await store.get(collection="test_collection", key="test_key") == {"test": "test"} assert await store.get(collection="test_collection_2", key="test_key") == {"test": "test"} - async with get_elasticsearch_client() as es_client: - indices = await es_client.options(ignore_status=404).indices.get(index="kv-store-e2e-test-*") - assert len(indices.body) == 2 - index_names: list[str] = [str(key) for key in indices] - assert index_names == snapshot(["kv-store-e2e-test-test_collection", "kv-store-e2e-test-test_collection_2"]) + indices = await es_client.options(ignore_status=404).indices.get(index="kv-store-e2e-test-*") + assert len(indices.body) == 2 + index_names: list[str] = [str(key) for key in indices] + assert index_names == snapshot(["kv-store-e2e-test-test_collection", "kv-store-e2e-test-test_collection_2"]) - async def test_value_stored_as_flattened_object(self, store: ElasticsearchStore): + async def test_value_stored_as_flattened_object(self, store: ElasticsearchStore, es_client: AsyncElasticsearch): """Verify values are stored as flattened objects, not JSON strings""" await store.put(collection="test", key="test_key", value={"name": "Alice", "age": 30}) @@ -166,32 +178,30 @@ async def test_value_stored_as_flattened_object(self, store: ElasticsearchStore) index_name = store._get_index_name(collection="test") # pyright: ignore[reportPrivateUsage] doc_id = store._get_document_id(key="test_key") # pyright: ignore[reportPrivateUsage] - async with get_elasticsearch_client() as es_client: - response = await es_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( - { - "version": 1, - "key": "test_key", - "collection": "test", - "value": {"flattened": {"name": "Alice", "age": 30}}, - "created_at": IsStr(min_length=20, max_length=40), - } - ) - - # Test with TTL - await store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) - - response = await es_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( - { - "version": 1, - "key": "test_key", - "collection": "test", - "value": {"flattened": {"name": "Bob", "age": 25}}, - "created_at": IsStr(min_length=20, max_length=40), - "expires_at": IsStr(min_length=20, max_length=40), - } - ) + response = await es_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + { + "version": 1, + "key": "test_key", + "collection": "test", + "value": {"flattened": {"name": "Alice", "age": 30}}, + "created_at": IsStr(min_length=20, max_length=40), + } + ) + + # Test with TTL + await store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) + response = await es_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + { + "version": 1, + "key": "test_key", + "collection": "test", + "value": {"flattened": {"name": "Bob", "age": 25}}, + "created_at": IsStr(min_length=20, max_length=40), + "expires_at": IsStr(min_length=20, max_length=40), + } + ) @override async def test_special_characters_in_collection_name(self, store: ElasticsearchStore, sanitizing_store: ElasticsearchStore): # pyright: ignore[reportIncompatibleMethodOverride] diff --git a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py index 81b7ebcd..cbd016cb 100644 --- a/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py +++ b/key-value/key-value-sync/src/key_value/sync/code_gen/stores/elasticsearch/store.py @@ -38,7 +38,7 @@ get_source_from_body, ) except ImportError as e: - msg = "ElasticsearchStore requires py-key-value-sync[elasticsearch]" + msg = "ElasticsearchStore requires py-key-value-aio[elasticsearch]" raise ImportError(msg) from e logger = logging.getLogger(__name__) @@ -242,8 +242,6 @@ def _setup_collection(self, *, collection: str) -> None: except BadRequestError as e: if "index_already_exists_exception" in str(e).lower(): return - if "resource_already_exists_exception" in str(e).lower(): - return raise def _get_index_name(self, collection: str) -> str: diff --git a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py index 9f7d94a7..778178a8 100644 --- a/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py +++ b/key-value/key-value-sync/tests/code_gen/stores/elasticsearch/test_elasticsearch.py @@ -3,6 +3,7 @@ # DO NOT CHANGE! Change the original file instead. from collections.abc import Generator from datetime import datetime, timedelta, timezone +from typing import TYPE_CHECKING, Any import pytest from dirty_equals import IsFloat, IsStr @@ -22,11 +23,14 @@ from tests.code_gen.conftest import docker_container, should_skip_docker_tests from tests.code_gen.stores.base import BaseStoreTests, ContextManagerStoreTestMixin +if TYPE_CHECKING: + from elastic_transport._response import ObjectApiResponse + TEST_SIZE_LIMIT = 1 * 1024 * 1024 # 1MB ES_HOST = "localhost" -CONTAINER_PORT = 9200 -HOST_PORT = 19200 -ES_URL = f"http://{ES_HOST}:{HOST_PORT}" +ES_PORT = 9200 +ES_URL = f"http://{ES_HOST}:{ES_PORT}" +ES_CONTAINER_PORT = 9200 WAIT_FOR_ELASTICSEARCH_TIMEOUT = 30 # Released Apr 2025 @@ -42,7 +46,12 @@ def ping_elasticsearch() -> bool: es_client: Elasticsearch = get_elasticsearch_client() with es_client: - return es_client.ping() + if not es_client.ping(): + return False + + status: ObjectApiResponse[dict[str, Any]] = es_client.options(ignore_status=404).cluster.health(wait_for_status="green") + + return status.body.get("status") == "green" def cleanup_elasticsearch_indices(elasticsearch_client: Elasticsearch): @@ -91,7 +100,7 @@ def setup_elasticsearch(self, request: pytest.FixtureRequest) -> Generator[None, with docker_container( f"elasticsearch-test-{version}", es_image, - {str(CONTAINER_PORT): HOST_PORT}, + {str(ES_CONTAINER_PORT): ES_PORT}, {"discovery.type": "single-node", "xpack.security.enabled": "false"}, ): if not wait_for_true(bool_fn=ping_elasticsearch, tries=WAIT_FOR_ELASTICSEARCH_TIMEOUT, wait_time=2): @@ -100,6 +109,11 @@ def setup_elasticsearch(self, request: pytest.FixtureRequest) -> Generator[None, yield + @pytest.fixture + def es_client(self) -> Generator[Elasticsearch, None, None]: + with Elasticsearch(hosts=[ES_URL]) as es_client: + yield es_client + @override @pytest.fixture def store(self) -> ElasticsearchStore: @@ -115,11 +129,10 @@ def sanitizing_store(self) -> ElasticsearchStore: ) @pytest.fixture(autouse=True) - def cleanup_elasticsearch(self): - with get_elasticsearch_client() as es_client: - cleanup_elasticsearch_indices(elasticsearch_client=es_client) - yield - cleanup_elasticsearch_indices(elasticsearch_client=es_client) + def cleanup_elasticsearch_indices(self, es_client: Elasticsearch): + cleanup_elasticsearch_indices(elasticsearch_client=es_client) + yield + cleanup_elasticsearch_indices(elasticsearch_client=es_client) @pytest.mark.skip(reason="Distributed Caches are unbounded") @override @@ -146,19 +159,18 @@ def test_long_key_name(self, store: ElasticsearchStore, sanitizing_store: Elasti sanitizing_store.put(collection="test_collection", key="test_key" * 100, value={"test": "test"}) assert sanitizing_store.get(collection="test_collection", key="test_key" * 100) == {"test": "test"} - def test_put_put_two_indices(self, store: ElasticsearchStore): + def test_put_put_two_indices(self, store: ElasticsearchStore, es_client: Elasticsearch): store.put(collection="test_collection", key="test_key", value={"test": "test"}) store.put(collection="test_collection_2", key="test_key", value={"test": "test"}) assert store.get(collection="test_collection", key="test_key") == {"test": "test"} assert store.get(collection="test_collection_2", key="test_key") == {"test": "test"} - with get_elasticsearch_client() as es_client: - indices = es_client.options(ignore_status=404).indices.get(index="kv-store-e2e-test-*") - assert len(indices.body) == 2 - index_names: list[str] = [str(key) for key in indices] - assert index_names == snapshot(["kv-store-e2e-test-test_collection", "kv-store-e2e-test-test_collection_2"]) + indices = es_client.options(ignore_status=404).indices.get(index="kv-store-e2e-test-*") + assert len(indices.body) == 2 + index_names: list[str] = [str(key) for key in indices] + assert index_names == snapshot(["kv-store-e2e-test-test_collection", "kv-store-e2e-test-test_collection_2"]) - def test_value_stored_as_flattened_object(self, store: ElasticsearchStore): + def test_value_stored_as_flattened_object(self, store: ElasticsearchStore, es_client: Elasticsearch): """Verify values are stored as flattened objects, not JSON strings""" store.put(collection="test", key="test_key", value={"name": "Alice", "age": 30}) @@ -167,32 +179,30 @@ def test_value_stored_as_flattened_object(self, store: ElasticsearchStore): index_name = store._get_index_name(collection="test") # pyright: ignore[reportPrivateUsage] doc_id = store._get_document_id(key="test_key") # pyright: ignore[reportPrivateUsage] - with get_elasticsearch_client() as es_client: - response = es_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( - { - "version": 1, - "key": "test_key", - "collection": "test", - "value": {"flattened": {"name": "Alice", "age": 30}}, - "created_at": IsStr(min_length=20, max_length=40), - } - ) - - # Test with TTL - store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) - - response = es_client.get(index=index_name, id=doc_id) - assert response.body["_source"] == snapshot( - { - "version": 1, - "key": "test_key", - "collection": "test", - "value": {"flattened": {"name": "Bob", "age": 25}}, - "created_at": IsStr(min_length=20, max_length=40), - "expires_at": IsStr(min_length=20, max_length=40), - } - ) + response = es_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + { + "version": 1, + "key": "test_key", + "collection": "test", + "value": {"flattened": {"name": "Alice", "age": 30}}, + "created_at": IsStr(min_length=20, max_length=40), + } + ) + + # Test with TTL + store.put(collection="test", key="test_key", value={"name": "Bob", "age": 25}, ttl=10) + response = es_client.get(index=index_name, id=doc_id) + assert response.body["_source"] == snapshot( + { + "version": 1, + "key": "test_key", + "collection": "test", + "value": {"flattened": {"name": "Bob", "age": 25}}, + "created_at": IsStr(min_length=20, max_length=40), + "expires_at": IsStr(min_length=20, max_length=40), + } + ) @override def test_special_characters_in_collection_name(self, store: ElasticsearchStore, sanitizing_store: ElasticsearchStore): # pyright: ignore[reportIncompatibleMethodOverride]