|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import asyncio |
| 4 | +from logging import getLogger |
| 5 | +from typing import TYPE_CHECKING, Any |
| 6 | + |
| 7 | +from apify._crypto import create_hmac_signature |
| 8 | +from apify_client import ApifyClientAsync |
| 9 | +from typing_extensions import override |
| 10 | +from yarl import URL |
| 11 | + |
| 12 | +from crawlee.storage_clients._base import KeyValueStoreClient |
| 13 | +from crawlee.storage_clients.models import ( |
| 14 | + KeyValueStoreListKeysPage, |
| 15 | + KeyValueStoreMetadata, |
| 16 | + KeyValueStoreRecord, |
| 17 | + KeyValueStoreRecordMetadata, |
| 18 | +) |
| 19 | + |
| 20 | +if TYPE_CHECKING: |
| 21 | + from collections.abc import AsyncIterator |
| 22 | + from datetime import datetime |
| 23 | + |
| 24 | + from apify import Configuration |
| 25 | + from apify_client.clients import KeyValueStoreClientAsync |
| 26 | + |
| 27 | +logger = getLogger(__name__) |
| 28 | + |
| 29 | + |
| 30 | +class ApifyKeyValueStoreClient(KeyValueStoreClient): |
| 31 | + """An Apify platform implementation of the key-value store client.""" |
| 32 | + |
| 33 | + def __init__( |
| 34 | + self, |
| 35 | + *, |
| 36 | + id: str, |
| 37 | + name: str | None, |
| 38 | + created_at: datetime, |
| 39 | + accessed_at: datetime, |
| 40 | + modified_at: datetime, |
| 41 | + api_client: KeyValueStoreClientAsync, |
| 42 | + ) -> None: |
| 43 | + """Initialize a new instance. |
| 44 | +
|
| 45 | + Preferably use the `ApifyKeyValueStoreClient.open` class method to create a new instance. |
| 46 | + """ |
| 47 | + self._metadata = KeyValueStoreMetadata( |
| 48 | + id=id, |
| 49 | + name=name, |
| 50 | + created_at=created_at, |
| 51 | + accessed_at=accessed_at, |
| 52 | + modified_at=modified_at, |
| 53 | + ) |
| 54 | + |
| 55 | + self._api_client = api_client |
| 56 | + """The Apify key-value store client for API operations.""" |
| 57 | + |
| 58 | + self._lock = asyncio.Lock() |
| 59 | + """A lock to ensure that only one operation is performed at a time.""" |
| 60 | + |
| 61 | + @override |
| 62 | + @property |
| 63 | + def metadata(self) -> KeyValueStoreMetadata: |
| 64 | + return self._metadata |
| 65 | + |
| 66 | + @override |
| 67 | + @classmethod |
| 68 | + async def open( |
| 69 | + cls, |
| 70 | + *, |
| 71 | + id: str | None, |
| 72 | + name: str | None, |
| 73 | + configuration: Configuration, |
| 74 | + ) -> ApifyKeyValueStoreClient: |
| 75 | + token = configuration.token |
| 76 | + api_url = configuration.api_base_url |
| 77 | + |
| 78 | + # Otherwise, create a new one. |
| 79 | + apify_client_async = ApifyClientAsync( |
| 80 | + token=token, |
| 81 | + api_url=api_url, |
| 82 | + max_retries=8, |
| 83 | + min_delay_between_retries_millis=500, |
| 84 | + timeout_secs=360, |
| 85 | + ) |
| 86 | + |
| 87 | + apify_kvss_client = apify_client_async.key_value_stores() |
| 88 | + |
| 89 | + metadata = KeyValueStoreMetadata.model_validate( |
| 90 | + await apify_kvss_client.get_or_create(name=id if id is not None else name), |
| 91 | + ) |
| 92 | + |
| 93 | + apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=metadata.id) |
| 94 | + |
| 95 | + return cls( |
| 96 | + id=metadata.id, |
| 97 | + name=metadata.name, |
| 98 | + created_at=metadata.created_at, |
| 99 | + accessed_at=metadata.accessed_at, |
| 100 | + modified_at=metadata.modified_at, |
| 101 | + api_client=apify_kvs_client, |
| 102 | + ) |
| 103 | + |
| 104 | + @override |
| 105 | + async def purge(self) -> None: |
| 106 | + async with self._lock: |
| 107 | + await self._api_client.delete() |
| 108 | + |
| 109 | + @override |
| 110 | + async def drop(self) -> None: |
| 111 | + async with self._lock: |
| 112 | + await self._api_client.delete() |
| 113 | + |
| 114 | + @override |
| 115 | + async def get_value(self, key: str) -> KeyValueStoreRecord | None: |
| 116 | + response = await self._api_client.get_record(key) |
| 117 | + record = KeyValueStoreRecord.model_validate(response) if response else None |
| 118 | + await self._update_metadata() |
| 119 | + return record |
| 120 | + |
| 121 | + @override |
| 122 | + async def set_value(self, key: str, value: Any, content_type: str | None = None) -> None: |
| 123 | + async with self._lock: |
| 124 | + await self._api_client.set_record( |
| 125 | + key=key, |
| 126 | + value=value, |
| 127 | + content_type=content_type, |
| 128 | + ) |
| 129 | + await self._update_metadata() |
| 130 | + |
| 131 | + @override |
| 132 | + async def delete_value(self, key: str) -> None: |
| 133 | + async with self._lock: |
| 134 | + await self._api_client.delete_record(key=key) |
| 135 | + await self._update_metadata() |
| 136 | + |
| 137 | + @override |
| 138 | + async def iterate_keys( |
| 139 | + self, |
| 140 | + *, |
| 141 | + exclusive_start_key: str | None = None, |
| 142 | + limit: int | None = None, |
| 143 | + ) -> AsyncIterator[KeyValueStoreRecordMetadata]: |
| 144 | + count = 0 |
| 145 | + |
| 146 | + while True: |
| 147 | + response = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key) |
| 148 | + list_key_page = KeyValueStoreListKeysPage.model_validate(response) |
| 149 | + |
| 150 | + for item in list_key_page.items: |
| 151 | + yield item |
| 152 | + count += 1 |
| 153 | + |
| 154 | + # If we've reached the limit, stop yielding |
| 155 | + if limit and count >= limit: |
| 156 | + break |
| 157 | + |
| 158 | + # If we've reached the limit or there are no more pages, exit the loop |
| 159 | + if (limit and count >= limit) or not list_key_page.is_truncated: |
| 160 | + break |
| 161 | + |
| 162 | + exclusive_start_key = list_key_page.next_exclusive_start_key |
| 163 | + |
| 164 | + await self._update_metadata() |
| 165 | + |
| 166 | + async def get_public_url(self, key: str) -> str: |
| 167 | + """Get a URL for the given key that may be used to publicly access the value in the remote key-value store. |
| 168 | +
|
| 169 | + Args: |
| 170 | + key: The key for which the URL should be generated. |
| 171 | + """ |
| 172 | + if self._api_client.resource_id is None: |
| 173 | + raise ValueError('resource_id cannot be None when generating a public URL') |
| 174 | + |
| 175 | + public_url = ( |
| 176 | + URL(self._api_client.base_url) / 'v2' / 'key-value-stores' / self._api_client.resource_id / 'records' / key |
| 177 | + ) |
| 178 | + |
| 179 | + key_value_store = self.metadata |
| 180 | + |
| 181 | + if key_value_store and key_value_store.model_extra: |
| 182 | + url_signing_secret_key = key_value_store.model_extra.get('urlSigningSecretKey') |
| 183 | + if url_signing_secret_key: |
| 184 | + public_url = public_url.with_query(signature=create_hmac_signature(url_signing_secret_key, key)) |
| 185 | + |
| 186 | + return str(public_url) |
| 187 | + |
| 188 | + async def _update_metadata(self) -> None: |
| 189 | + """Update the key-value store metadata with current information.""" |
| 190 | + metadata = await self._api_client.get() |
| 191 | + self._metadata = KeyValueStoreMetadata.model_validate(metadata) |
0 commit comments