Skip to content

Commit d665ca0

Browse files
lazebnyioctavia-squidington-iii
and
octavia-squidington-iii
authored
fix: yield partitions for unique stream slices in StreamSlicerPartitionGenerator (#508)
Co-authored-by: octavia-squidington-iii <[email protected]>
1 parent 4bd6048 commit d665ca0

File tree

1 file changed

+17
-1
lines changed

1 file changed

+17
-1
lines changed

airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
22

3-
from typing import Any, Iterable, Mapping, Optional
3+
from typing import Any, Hashable, Iterable, Mapping, Optional
44

55
from airbyte_cdk.sources.declarative.retrievers import Retriever
66
from airbyte_cdk.sources.message import MessageRepository
@@ -89,5 +89,21 @@ def __init__(
8989
self._stream_slicer = stream_slicer
9090

9191
def generate(self) -> Iterable[Partition]:
92+
# Yield partitions for unique stream slices, avoiding duplicates
93+
seen_slices: set[Hashable] = set()
9294
for stream_slice in self._stream_slicer.stream_slices():
95+
slice_key = self._make_hashable(stream_slice)
96+
if slice_key in seen_slices:
97+
continue
98+
seen_slices.add(slice_key)
9399
yield self._partition_factory.create(stream_slice)
100+
101+
@staticmethod
102+
def _make_hashable(obj: Any) -> Any:
103+
if isinstance(obj, dict):
104+
return frozenset(
105+
(k, StreamSlicerPartitionGenerator._make_hashable(v)) for k, v in obj.items()
106+
)
107+
if isinstance(obj, list):
108+
return tuple(StreamSlicerPartitionGenerator._make_hashable(i) for i in obj)
109+
return obj

0 commit comments

Comments
 (0)