Skip to content

Commit f499df7

Browse files
committed
Dataset and KVS tests
1 parent 119a108 commit f499df7

File tree

4 files changed

+679
-346
lines changed

4 files changed

+679
-346
lines changed

src/crawlee/configuration.py

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -118,21 +118,7 @@ class Configuration(BaseSettings):
118118
)
119119
),
120120
] = True
121-
"""Whether to purge the storage on the start. This option is utilized by the `MemoryStorageClient`."""
122-
123-
write_metadata: Annotated[bool, Field(alias='crawlee_write_metadata')] = True
124-
"""Whether to write the storage metadata. This option is utilized by the `MemoryStorageClient`."""
125-
126-
persist_storage: Annotated[
127-
bool,
128-
Field(
129-
validation_alias=AliasChoices(
130-
'apify_persist_storage',
131-
'crawlee_persist_storage',
132-
)
133-
),
134-
] = True
135-
"""Whether to persist the storage. This option is utilized by the `MemoryStorageClient`."""
121+
"""Whether to purge the storage on the start. This option is utilized by the storage clients."""
136122

137123
persist_state_interval: Annotated[
138124
timedelta_ms,
@@ -239,7 +225,7 @@ class Configuration(BaseSettings):
239225
),
240226
),
241227
] = './storage'
242-
"""The path to the storage directory. This option is utilized by the `MemoryStorageClient`."""
228+
"""The path to the storage directory. This option is utilized by the storage clients."""
243229

244230
headless: Annotated[
245231
bool,

src/crawlee/storages/_dataset.py

Lines changed: 31 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
from io import StringIO
55
from pathlib import Path
6-
from typing import TYPE_CHECKING, ClassVar, Literal
6+
from typing import TYPE_CHECKING, overload
77

88
from typing_extensions import override
99

@@ -17,7 +17,7 @@
1717

1818
if TYPE_CHECKING:
1919
from collections.abc import AsyncIterator
20-
from typing import Any
20+
from typing import Any, ClassVar, Literal
2121

2222
from typing_extensions import Unpack
2323

@@ -267,12 +267,33 @@ async def iterate_items(
267267
):
268268
yield item
269269

270+
@overload
271+
async def export_to(
272+
self,
273+
key: str,
274+
content_type: Literal['json'],
275+
to_key_value_store_id: str | None = None,
276+
to_key_value_store_name: str | None = None,
277+
**kwargs: Unpack[ExportDataJsonKwargs],
278+
) -> None: ...
279+
280+
@overload
281+
async def export_to(
282+
self,
283+
key: str,
284+
content_type: Literal['csv'],
285+
to_key_value_store_id: str | None = None,
286+
to_key_value_store_name: str | None = None,
287+
**kwargs: Unpack[ExportDataCsvKwargs],
288+
) -> None: ...
289+
270290
async def export_to(
271291
self,
272292
key: str,
273293
content_type: Literal['json', 'csv'] = 'json',
274294
to_key_value_store_id: str | None = None,
275295
to_key_value_store_name: str | None = None,
296+
**kwargs: Any,
276297
) -> None:
277298
"""Export the entire dataset into a specified file stored under a key in a key-value store.
278299
@@ -288,42 +309,16 @@ async def export_to(
288309
Specify only one of ID or name.
289310
to_key_value_store_name: Name of the key-value store to save the exported file.
290311
Specify only one of ID or name.
312+
kwargs: Additional parameters for the export operation, specific to the chosen content type.
291313
"""
314+
kvs = await KeyValueStore.open(id=to_key_value_store_id, name=to_key_value_store_name)
315+
dst = StringIO()
316+
292317
if content_type == 'csv':
293-
await self.export_to_csv(
294-
key,
295-
to_key_value_store_id,
296-
to_key_value_store_name,
297-
)
318+
await export_csv_to_stream(self.iterate_items(), dst, **kwargs)
319+
await kvs.set_value(key, dst.getvalue(), 'text/csv')
298320
elif content_type == 'json':
299-
await self.export_to_json(
300-
key,
301-
to_key_value_store_id,
302-
to_key_value_store_name,
303-
)
321+
await export_json_to_stream(self.iterate_items(), dst, **kwargs)
322+
await kvs.set_value(key, dst.getvalue(), 'application/json')
304323
else:
305324
raise ValueError('Unsupported content type, expecting CSV or JSON')
306-
307-
async def export_to_json(
308-
self,
309-
key: str,
310-
to_key_value_store_id: str | None = None,
311-
to_key_value_store_name: str | None = None,
312-
**kwargs: Unpack[ExportDataJsonKwargs],
313-
) -> None:
314-
kvs = await KeyValueStore.open(id=to_key_value_store_id, name=to_key_value_store_name)
315-
dst = StringIO()
316-
await export_json_to_stream(self.iterate_items(), dst, **kwargs)
317-
await kvs.set_value(key, dst.getvalue(), 'application/json')
318-
319-
async def export_to_csv(
320-
self,
321-
key: str,
322-
to_key_value_store_id: str | None = None,
323-
to_key_value_store_name: str | None = None,
324-
**kwargs: Unpack[ExportDataCsvKwargs],
325-
) -> None:
326-
kvs = await KeyValueStore.open(id=to_key_value_store_id, name=to_key_value_store_name)
327-
dst = StringIO()
328-
await export_csv_to_stream(self.iterate_items(), dst, **kwargs)
329-
await kvs.set_value(key, dst.getvalue(), 'text/csv')

0 commit comments

Comments
 (0)