|
1 | 1 | """Wrapper of Reader with S3 support."""
|
| 2 | +from __future__ import annotations |
2 | 3 |
|
3 | 4 | import logging
|
4 |
| -from typing import overload, Union, Optional |
| 5 | +from typing import overload, Union, Optional, Any |
5 | 6 | from collections.abc import Iterator
|
6 | 7 | from urllib.parse import urlparse
|
7 | 8 |
|
8 | 9 | from .protocols import ReaderProtocol
|
9 | 10 | from .libcachesim_python import (
|
10 | 11 | TraceType,
|
11 |
| - SamplerType, |
| 12 | + TraceFormat, |
12 | 13 | Request,
|
13 | 14 | ReaderInitParam,
|
14 | 15 | Reader,
|
|
21 | 22 | logger = logging.getLogger(__name__)
|
22 | 23 |
|
23 | 24 |
|
| 25 | +class TraceReaderSliceIterator: |
| 26 | + """Iterator for sliced TraceReader.""" |
| 27 | + |
| 28 | + def __init__(self, reader: "TraceReader", start: int, stop: int, step: int): |
| 29 | + # Clone the reader to avoid side effects on the original |
| 30 | + self.reader = reader.clone() |
| 31 | + self.start = start |
| 32 | + self.stop = stop |
| 33 | + self.step = step |
| 34 | + self.current = start |
| 35 | + |
| 36 | + # Initialize position: reset and skip to start position once |
| 37 | + self.reader.reset() |
| 38 | + if start > 0: |
| 39 | + self._skip_to_start_position(start) |
| 40 | + |
| 41 | + def __iter__(self) -> Iterator[Request]: |
| 42 | + return self |
| 43 | + |
| 44 | + def __next__(self) -> Request: |
| 45 | + if self.current >= self.stop: |
| 46 | + raise StopIteration |
| 47 | + |
| 48 | + # Read the current request |
| 49 | + try: |
| 50 | + req = self.reader.read_one_req() |
| 51 | + except RuntimeError: |
| 52 | + raise StopIteration |
| 53 | + |
| 54 | + # Advance to next position based on step |
| 55 | + if self.step > 1: |
| 56 | + self._skip_requests(self.step - 1) |
| 57 | + |
| 58 | + self.current += self.step |
| 59 | + return req |
| 60 | + |
| 61 | + def _skip_to_start_position(self, position: int) -> None: |
| 62 | + """Skip to the start position efficiently.""" |
| 63 | + if not self.reader._reader.is_zstd_file: |
| 64 | + # Try using skip_n_req for non-zstd files |
| 65 | + skipped = self.reader.skip_n_req(position) |
| 66 | + if skipped != position: |
| 67 | + # If we couldn't skip the expected number, simulate the rest |
| 68 | + remaining = position - skipped |
| 69 | + self._simulate_skip(remaining) |
| 70 | + else: |
| 71 | + # For zstd files, always simulate |
| 72 | + self._simulate_skip(position) |
| 73 | + |
| 74 | + def _skip_requests(self, n: int) -> None: |
| 75 | + """Skip n requests efficiently.""" |
| 76 | + if not self.reader._reader.is_zstd_file: |
| 77 | + # Try using skip_n_req for non-zstd files |
| 78 | + skipped = self.reader.skip_n_req(n) |
| 79 | + if skipped != n: |
| 80 | + # If we couldn't skip all, we're likely at EOF |
| 81 | + self.current = self.stop # Mark as done |
| 82 | + else: |
| 83 | + # For zstd files, simulate |
| 84 | + self._simulate_skip(n) |
| 85 | + |
| 86 | + def _simulate_skip(self, n: int) -> None: |
| 87 | + """Simulate skip by reading requests one by one.""" |
| 88 | + for _ in range(n): |
| 89 | + try: |
| 90 | + self.reader.read_one_req() |
| 91 | + except RuntimeError: |
| 92 | + # If we can't read more, we're at EOF |
| 93 | + self.current = self.stop # Mark as done |
| 94 | + break |
| 95 | + |
| 96 | + |
24 | 97 | class TraceReader(ReaderProtocol):
|
25 | 98 | _reader: Reader
|
26 | 99 |
|
@@ -302,10 +375,51 @@ def __next__(self) -> Request:
|
302 | 375 | raise StopIteration
|
303 | 376 | return req
|
304 | 377 |
|
305 |
| - def __getitem__(self, index: int) -> Request: |
306 |
| - if index < 0 or index >= self._reader.get_num_of_req(): |
307 |
| - raise IndexError("Index out of range") |
308 |
| - self._reader.reset() |
309 |
| - self._reader.skip_n_req(index) |
310 |
| - req = Request() |
311 |
| - return self._reader.read_one_req(req) |
| 378 | + def __getitem__(self, key: Union[int, slice]) -> Union[Request, TraceReaderSliceIterator]: |
| 379 | + if isinstance(key, slice): |
| 380 | + # Handle slice |
| 381 | + total_len = self._reader.get_num_of_req() |
| 382 | + start, stop, step = key.indices(total_len) |
| 383 | + return TraceReaderSliceIterator(self, start, stop, step) |
| 384 | + elif isinstance(key, int): |
| 385 | + # Handle single index |
| 386 | + total_len = self._reader.get_num_of_req() |
| 387 | + if key < 0: |
| 388 | + key += total_len |
| 389 | + if key < 0 or key >= total_len: |
| 390 | + raise IndexError("Index out of range") |
| 391 | + |
| 392 | + self._reader.reset() |
| 393 | + |
| 394 | + # Try to skip to the target position |
| 395 | + if key > 0: |
| 396 | + if not self._reader.is_zstd_file: |
| 397 | + # For non-zstd files, try skip_n_req and check return value |
| 398 | + skipped = self._reader.skip_n_req(key) |
| 399 | + if skipped != key: |
| 400 | + # If we couldn't skip the expected number, simulate the rest |
| 401 | + remaining = key - skipped |
| 402 | + self._simulate_skip_single(remaining) |
| 403 | + else: |
| 404 | + # For zstd files, always simulate |
| 405 | + self._simulate_skip_single(key) |
| 406 | + |
| 407 | + # Read the target request |
| 408 | + req = Request() |
| 409 | + ret = self._reader.read_one_req(req) |
| 410 | + if ret != 0: |
| 411 | + raise IndexError(f"Cannot read request at index {key}") |
| 412 | + return req |
| 413 | + else: |
| 414 | + raise TypeError("TraceReader indices must be integers or slices") |
| 415 | + |
| 416 | + def _simulate_skip_single(self, n: int) -> None: |
| 417 | + """Simulate skip by reading requests one by one for single index access.""" |
| 418 | + for i in range(n): |
| 419 | + req = Request() |
| 420 | + ret = self._reader.read_one_req(req) |
| 421 | + if ret != 0: |
| 422 | + raise IndexError(f"Cannot skip to position, reached EOF at {i}") |
| 423 | + |
| 424 | + # Note: Removed old inefficient methods _can_use_skip_n_req and _simulate_skip_and_read_single |
| 425 | + # The new implementation is more efficient and handles skip_n_req return values properly |
0 commit comments