Skip to content

Commit 684726f

Browse files
committed
Support slicefor reader
1 parent a24ad03 commit 684726f

File tree

6 files changed

+177
-25
lines changed

6 files changed

+177
-25
lines changed

examples/slice_reader.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import libcachesim as lcs
2+
import logging
3+
logging.basicConfig(level=logging.DEBUG)
4+
5+
6+
URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
7+
reader = lcs.TraceReader(
8+
trace = URI,
9+
trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
10+
reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False)
11+
)
12+
13+
for req in reader[:3]:
14+
print(req.obj_id, req.obj_size)
15+
16+
for req in reader[1:4]:
17+
print(req.obj_id, req.obj_size)
18+
19+
reader.reset()
20+
read_n_req = 4
21+
for req in reader:
22+
if read_n_req <= 0:
23+
break
24+
print(req.obj_id, req.obj_size)
25+
read_n_req -= 1

libcachesim/cache.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ def get_occupied_byte(self) -> int:
8181

8282
def get_n_obj(self) -> int:
8383
return self._cache.get_n_obj()
84+
85+
def set_cache_size(self, new_size: int) -> None:
86+
self._cache.set_cache_size(new_size)
8487

8588
def print_cache(self) -> str:
8689
return self._cache.print_cache()

libcachesim/synthetic_reader.py

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,28 @@
1313
from .protocols import ReaderProtocol
1414

1515

16+
class SyntheticReaderSliceIterator:
17+
"""Iterator for sliced SyntheticReader."""
18+
19+
def __init__(self, reader: "SyntheticReader", start: int, stop: int, step: int):
20+
self.reader = reader
21+
self.start = start
22+
self.stop = stop
23+
self.step = step
24+
self.current = start
25+
26+
def __iter__(self) -> Iterator[Request]:
27+
return self
28+
29+
def __next__(self) -> Request:
30+
if self.current >= self.stop:
31+
raise StopIteration
32+
33+
req = self.reader[self.current]
34+
self.current += self.step
35+
return req
36+
37+
1638
class SyntheticReader(ReaderProtocol):
1739
"""Efficient synthetic request generator supporting multiple distributions"""
1840

@@ -206,19 +228,29 @@ def __next__(self) -> Request:
206228

207229
return self.read_one_req()
208230

209-
def __getitem__(self, index: int) -> Request:
210-
"""Support index access"""
211-
if index < 0 or index >= self.num_of_req:
212-
raise IndexError("Index out of range")
231+
def __getitem__(self, key: Union[int, slice]) -> Union[Request, SyntheticReaderSliceIterator]:
232+
"""Support index and slice access"""
233+
if isinstance(key, slice):
234+
# Handle slice
235+
start, stop, step = key.indices(self.num_of_req)
236+
return SyntheticReaderSliceIterator(self, start, stop, step)
237+
elif isinstance(key, int):
238+
# Handle single index
239+
if key < 0:
240+
key += self.num_of_req
241+
if key < 0 or key >= self.num_of_req:
242+
raise IndexError("Index out of range")
213243

214-
req = Request()
215-
obj_id = self.obj_ids[index]
216-
req.obj_id = obj_id
217-
req.obj_size = self.obj_size
218-
req.clock_time = index * self.time_span // self.num_of_req
219-
req.op = ReqOp.OP_READ
220-
req.valid = True
221-
return req
244+
req = Request()
245+
obj_id = self.obj_ids[key]
246+
req.obj_id = obj_id
247+
req.obj_size = self.obj_size
248+
req.clock_time = key * self.time_span // self.num_of_req
249+
req.op = ReqOp.OP_READ
250+
req.valid = True
251+
return req
252+
else:
253+
raise TypeError("SyntheticReader indices must be integers or slices")
222254

223255

224256
def _gen_zipf(m: int, alpha: float, n: int, start: int = 0) -> np.ndarray:

libcachesim/trace_reader.py

Lines changed: 91 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
"""Wrapper of Reader with S3 support."""
2+
from __future__ import annotations
23

34
import logging
4-
from typing import overload, Union, Optional
5+
from typing import overload, Union, Optional, Any
56
from collections.abc import Iterator
67
from urllib.parse import urlparse
78

89
from .protocols import ReaderProtocol
910
from .libcachesim_python import (
1011
TraceType,
11-
SamplerType,
12+
TraceFormat,
1213
Request,
1314
ReaderInitParam,
1415
Reader,
@@ -21,6 +22,46 @@
2122
logger = logging.getLogger(__name__)
2223

2324

25+
class TraceReaderSliceIterator:
26+
"""Iterator for sliced TraceReader."""
27+
28+
def __init__(self, reader: "TraceReader", start: int, stop: int, step: int):
29+
self.reader = reader
30+
self.start = start
31+
self.stop = stop
32+
self.step = step
33+
self.current = start
34+
35+
def __iter__(self) -> Iterator[Request]:
36+
return self
37+
38+
def __next__(self) -> Request:
39+
if self.current >= self.stop:
40+
raise StopIteration
41+
42+
# Reset reader and skip to current position
43+
self.reader.reset()
44+
45+
# Check if we can use skip_n_req or need to simulate with read_one_req
46+
# zstd files cannot use skip_n_req
47+
if not self.reader._reader.is_zstd_file:
48+
logger.debug(f"Skipping {self.current} requests using skip_n_req")
49+
try:
50+
self.reader.skip_n_req(self.current)
51+
req = self.reader.read_one_req()
52+
except RuntimeError:
53+
logger.warning(f"Failed to skip {self.current} requests, falling back to simulation")
54+
# Fallback to simulation if skip_n_req fails
55+
req = self.reader._simulate_skip_and_read_single(self.current)
56+
else:
57+
logger.debug(f"Simulating skip by reading {self.current} requests one by one")
58+
# Simulate skip by reading requests one by one
59+
req = self.reader._simulate_skip_and_read_single(self.current)
60+
61+
self.current += self.step
62+
return req
63+
64+
2465
class TraceReader(ReaderProtocol):
2566
_reader: Reader
2667

@@ -302,10 +343,52 @@ def __next__(self) -> Request:
302343
raise StopIteration
303344
return req
304345

305-
def __getitem__(self, index: int) -> Request:
306-
if index < 0 or index >= self._reader.get_num_of_req():
307-
raise IndexError("Index out of range")
308-
self._reader.reset()
309-
self._reader.skip_n_req(index)
346+
def __getitem__(self, key: Union[int, slice]) -> Union[Request, TraceReaderSliceIterator]:
347+
if isinstance(key, slice):
348+
# Handle slice
349+
total_len = self._reader.get_num_of_req()
350+
start, stop, step = key.indices(total_len)
351+
return TraceReaderSliceIterator(self, start, stop, step)
352+
elif isinstance(key, int):
353+
# Handle single index
354+
total_len = self._reader.get_num_of_req()
355+
if key < 0:
356+
key += total_len
357+
if key < 0 or key >= total_len:
358+
raise IndexError("Index out of range")
359+
360+
self._reader.reset()
361+
362+
# Check if we can use skip_n_req or need to simulate
363+
if self._can_use_skip_n_req():
364+
try:
365+
self._reader.skip_n_req(key)
366+
req = Request()
367+
ret = self._reader.read_one_req(req)
368+
if ret != 0:
369+
raise RuntimeError("Failed to read request")
370+
return req
371+
except RuntimeError:
372+
# Fallback to simulation
373+
self._reader.reset()
374+
return self._simulate_skip_and_read_single(key)
375+
else:
376+
# Simulate skip by reading requests one by one
377+
return self._simulate_skip_and_read_single(key)
378+
else:
379+
raise TypeError("TraceReader indices must be integers or slices")
380+
381+
def _simulate_skip_and_read_single(self, index: int) -> Request:
382+
"""Simulate skip_n_req by reading requests one by one for single index access."""
383+
for _ in range(index):
384+
req = Request()
385+
ret = self._reader.read_one_req(req)
386+
if ret != 0:
387+
raise IndexError(f"Cannot reach index {index}")
388+
389+
# Read the target request
310390
req = Request()
311-
return self._reader.read_one_req(req)
391+
ret = self._reader.read_one_req(req)
392+
if ret != 0:
393+
raise IndexError(f"Cannot read request at index {index}")
394+
return req

src/export_cache.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,10 @@ void export_cache(py::module& m) {
352352
.def("get_occupied_byte",
353353
[](cache_t& self) { return self.get_occupied_byte(&self); })
354354
.def("get_n_obj", [](cache_t& self) { return self.get_n_obj(&self); })
355+
.def(
356+
"set_cache_size",
357+
[](cache_t& self, uint64_t new_size) { self.cache_size = new_size; },
358+
"new_size"_a)
355359
.def("print_cache", [](cache_t& self) {
356360
// Capture stdout to return as string
357361
std::ostringstream captured_output;

src/export_reader.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,13 @@ void export_reader(py::module& m) {
9898
.value("UNKNOWN_TRACE", trace_type_e::UNKNOWN_TRACE)
9999
.export_values();
100100

101+
// Trace format enumeration
102+
py::enum_<trace_format_e>(m, "TraceFormat")
103+
.value("BINARY_TRACE_FORMAT", trace_format_e::BINARY_TRACE_FORMAT)
104+
.value("TXT_TRACE_FORMAT", trace_format_e::TXT_TRACE_FORMAT)
105+
.value("INVALID_TRACE_FORMAT", trace_format_e::INVALID_TRACE_FORMAT)
106+
.export_values();
107+
101108
py::enum_<read_direction>(m, "ReadDirection")
102109
.value("READ_FORWARD", read_direction::READ_FORWARD)
103110
.value("READ_BACKWARD", read_direction::READ_BACKWARD)
@@ -302,11 +309,9 @@ void export_reader(py::module& m) {
302309
.def(
303310
"skip_n_req",
304311
[](reader_t& self, int n) {
305-
int ret = skip_n_req(&self, n);
306-
if (ret != 0) {
307-
throw std::runtime_error("Failed to skip requests");
308-
}
309-
return ret;
312+
int count = skip_n_req(&self, n);
313+
// Return the actual number of requests skipped
314+
return count;
310315
},
311316
"n"_a)
312317
.def("read_one_req_above",

0 commit comments

Comments
 (0)