Skip to content

Commit 685db9f

Browse files
committed
rm code duplication for open methods
1 parent 7f2e6b0 commit 685db9f

File tree

4 files changed

+65
-83
lines changed

4 files changed

+65
-83
lines changed

src/crawlee/storages/_dataset.py

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from ._base import Storage
1414
from ._key_value_store import KeyValueStore
15+
from ._utils import open_storage_instance
1516

1617
if TYPE_CHECKING:
1718
from collections.abc import AsyncIterator
@@ -109,39 +110,18 @@ async def open(
109110
configuration: Configuration | None = None,
110111
storage_client: StorageClient | None = None,
111112
) -> Dataset:
112-
if id and name:
113-
raise ValueError('Only one of "id" or "name" can be specified, not both.')
114-
115-
# Check for default instance if no id or name provided
116-
if id is None and name is None and cls._default_instance is not None:
117-
return cls._default_instance
118-
119-
# Check if the dataset is already cached
120-
if id is not None and id in cls._cache_by_id:
121-
return cls._cache_by_id[id]
122-
if name is not None and name in cls._cache_by_name:
123-
return cls._cache_by_name[name]
124-
125113
configuration = service_locator.get_configuration() if configuration is None else configuration
126114
storage_client = service_locator.get_storage_client() if storage_client is None else storage_client
127-
128-
client = await storage_client.open_dataset_client(
115+
return await open_storage_instance(
116+
cls,
129117
id=id,
130118
name=name,
131119
configuration=configuration,
120+
cache_by_id=cls._cache_by_id,
121+
cache_by_name=cls._cache_by_name,
122+
default_instance_attr='_default_instance',
123+
client_opener=storage_client.open_dataset_client,
132124
)
133-
dataset = cls(client)
134-
135-
# Cache the dataset instance by ID and name
136-
cls._cache_by_id[dataset.id] = dataset
137-
if dataset.name is not None:
138-
cls._cache_by_name[dataset.name] = dataset
139-
140-
# Store as default instance if neither id nor name was provided
141-
if id is None and name is None:
142-
cls._default_instance = dataset
143-
144-
return dataset
145125

146126
@override
147127
async def drop(self) -> None:

src/crawlee/storages/_key_value_store.py

Lines changed: 7 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from crawlee.storage_clients.models import KeyValueStoreMetadata
1515

1616
from ._base import Storage
17+
from ._utils import open_storage_instance
1718

1819
if TYPE_CHECKING:
1920
from collections.abc import AsyncIterator
@@ -117,41 +118,19 @@ async def open(
117118
configuration: Configuration | None = None,
118119
storage_client: StorageClient | None = None,
119120
) -> KeyValueStore:
120-
if id and name:
121-
raise ValueError('Only one of "id" or "name" can be specified, not both.')
122-
123-
# Check for default instance if no id or name provided
124-
if id is None and name is None and cls._default_instance is not None:
125-
return cls._default_instance
126-
127-
# Check if the key-value store is already cached
128-
if id is not None and id in cls._cache_by_id:
129-
return cls._cache_by_id[id]
130-
if name is not None and name in cls._cache_by_name:
131-
return cls._cache_by_name[name]
132-
133121
configuration = service_locator.get_configuration() if configuration is None else configuration
134122
storage_client = service_locator.get_storage_client() if storage_client is None else storage_client
135-
136-
client = await storage_client.open_key_value_store_client(
123+
return await open_storage_instance(
124+
cls,
137125
id=id,
138126
name=name,
139127
configuration=configuration,
128+
cache_by_id=cls._cache_by_id,
129+
cache_by_name=cls._cache_by_name,
130+
default_instance_attr='_default_instance',
131+
client_opener=storage_client.open_key_value_store_client,
140132
)
141133

142-
kvs = cls(client)
143-
144-
# Cache the key-value store instance by ID and name
145-
cls._cache_by_id[kvs.id] = kvs
146-
if kvs.name is not None:
147-
cls._cache_by_name[kvs.name] = kvs
148-
149-
# Store as default instance if neither id nor name was provided
150-
if id is None and name is None:
151-
cls._default_instance = kvs
152-
153-
return kvs
154-
155134
@override
156135
async def drop(self) -> None:
157136
if self.id in self._cache_by_id:

src/crawlee/storages/_request_queue.py

Lines changed: 7 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from crawlee.request_loaders import RequestManager
1414

1515
from ._base import Storage
16+
from ._utils import open_storage_instance
1617

1718
if TYPE_CHECKING:
1819
from collections.abc import Sequence
@@ -127,41 +128,19 @@ async def open(
127128
configuration: Configuration | None = None,
128129
storage_client: StorageClient | None = None,
129130
) -> RequestQueue:
130-
if id and name:
131-
raise ValueError('Only one of "id" or "name" can be specified, not both.')
132-
133-
# Check for default instance if no id or name provided
134-
if id is None and name is None and cls._default_instance is not None:
135-
return cls._default_instance
136-
137-
# Check if the request queue is already cached
138-
if id is not None and id in cls._cache_by_id:
139-
return cls._cache_by_id[id]
140-
if name is not None and name in cls._cache_by_name:
141-
return cls._cache_by_name[name]
142-
143131
configuration = service_locator.get_configuration() if configuration is None else configuration
144132
storage_client = service_locator.get_storage_client() if storage_client is None else storage_client
145-
146-
client = await storage_client.open_request_queue_client(
133+
return await open_storage_instance(
134+
cls,
147135
id=id,
148136
name=name,
149137
configuration=configuration,
138+
cache_by_id=cls._cache_by_id,
139+
cache_by_name=cls._cache_by_name,
140+
default_instance_attr='_default_instance',
141+
client_opener=storage_client.open_request_queue_client,
150142
)
151143

152-
request_queue = cls(client)
153-
154-
# Cache the request queue instance by ID and name
155-
cls._cache_by_id[request_queue.id] = request_queue
156-
if request_queue.name is not None:
157-
cls._cache_by_name[request_queue.name] = request_queue
158-
159-
# Store as default instance if neither id nor name was provided
160-
if id is None and name is None:
161-
cls._default_instance = request_queue
162-
163-
return request_queue
164-
165144
@override
166145
async def drop(self) -> None:
167146
# Remove from cache before dropping

src/crawlee/storages/_utils.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from __future__ import annotations
2+
3+
from typing import Any, Callable, TypeVar, cast
4+
5+
from ._base import Storage
6+
7+
T = TypeVar('T', bound=Storage)
8+
9+
10+
async def open_storage_instance(
11+
cls: type[T],
12+
*,
13+
id: str | None,
14+
name: str | None,
15+
configuration: Any,
16+
cache_by_id: dict[str, T],
17+
cache_by_name: dict[str, T],
18+
default_instance_attr: str,
19+
client_opener: Callable[..., Any],
20+
) -> T:
21+
if id and name:
22+
raise ValueError('Only one of "id" or "name" can be specified, not both.')
23+
24+
default_instance = getattr(cls, default_instance_attr)
25+
if id is None and name is None and default_instance is not None:
26+
return cast('T', default_instance)
27+
28+
if id is not None and id in cache_by_id:
29+
return cache_by_id[id]
30+
if name is not None and name in cache_by_name:
31+
return cache_by_name[name]
32+
33+
client = await client_opener(id=id, name=name, configuration=configuration)
34+
instance = cls(client) # type: ignore[call-arg]
35+
instance_name = getattr(instance, 'name', None)
36+
37+
cache_by_id[instance.id] = instance
38+
if instance_name is not None:
39+
cache_by_name[instance_name] = instance
40+
41+
if id is None and name is None:
42+
setattr(cls, default_instance_attr, instance)
43+
44+
return instance

0 commit comments

Comments
 (0)