diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e63c5c..297b526 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -252,6 +252,7 @@ include_directories(${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin) set(PYTHON_MODULE_SOURCES src/export.cpp src/export_cache.cpp + src/export_admissioner.cpp src/export_reader.cpp src/export_analyzer.cpp src/export_misc.cpp diff --git a/docs/src/en/examples/plugins.md b/docs/src/en/examples/plugins.md index d27b805..0671706 100644 --- a/docs/src/en/examples/plugins.md +++ b/docs/src/en/examples/plugins.md @@ -1,8 +1,10 @@ # Plugin System -We enable user add any customized cache via libCacheSim's plugin system. +## PluginCache -With user-defined sive python hook functions, +We enable users to add any customized cache via libCacheSim's plugin system. + +With user-defined python hook functions, ```c++ py::function cache_init_hook; @@ -15,8 +17,7 @@ With user-defined sive python hook functions, We can simulate and determine the cache eviction behavior from the python side. -Here is the signature requirement for these hook functions. - +Here are the signature requirements for these hook functions. ```python def cache_init_hook(ccparams: CommonCacheParams) -> CustomizedCacheData: ... def cache_hit_hook(data: CustomizedCacheData, req: Request) -> None: ... @@ -25,3 +26,32 @@ def cache_eviction_hook(data: CustomizedCacheData, req: Request) -> int | str: . def cache_remove_hook(data: CustomizedCacheData, obj_id: int | str) ->: ... def cache_free_hook(data: CustomizedCacheData) ->: ... ``` + +- **Note:** `CustomizedCacheData` is not a type provided by the library. It simply represents what ever object the user decides to return from `cache_init_hook` and pass to the other hook functions as `data`. + +## PluginAdmissioner + +We enable users to define their own admission policies via libCacheSim's plugin system, which can be used in conjunction with existing cache implementations (e.g., `LRU`, `S3FIFO`). + +With user-defined python hook functions: + +```c++ + py::function admissioner_init_hook; + py::function admissioner_admit_hook; + py::function admissioner_update_hook; + py::function admissioner_clone_hook; + py::function admissioner_free_hook; +``` + +We have complete control over which objects are admitted into the underlying cache conveniently from Python. + +Here are the signature requirements for these hook functions. +```python +def admissioner_init_hook() -> CustomizedAdmissionerData: ... +def admissioner_admit_hook(data: CustomizedAdmissionerData, req: Request) -> bool: ... +def admissioner_update_hook(data: CustomizedAdmissionerData, req: Request, cache_size: int) -> None: ... +def admissioner_clone_hook(data: CustomizedAdmissionerData) -> AdmissionerBase: ... +def admissioner_free_hook(data: CustomizedAdmissionerData) -> None: ... +``` + +- **Note:** `CustomizedAdmissionerData` is not a type provided by the library. It simply represents what ever object the user decides to return from `admissioner_init_hook` and pass to the other hook functions as `data`. diff --git a/docs/src/en/examples/simulation.md b/docs/src/en/examples/simulation.md index 03d5e76..1378339 100644 --- a/docs/src/en/examples/simulation.md +++ b/docs/src/en/examples/simulation.md @@ -1,3 +1,176 @@ # Cache Simulation -[TBD] \ No newline at end of file +## Basic Usage + +The cache classes are the core of cache simulation. When an instance of a cache is creates (e.g., `LRU`, `S3FIFO`), we can configure the cache size and any cache-specific parameters such as promotion thresholds. + +```py +import libcachesim as lcs + +# Initialize cache +cache = lcs.S3FIFO( + cache_size=1024 * 1024, + # Cache specific parameters + small_size_ratio=0.2, + ghost_size_ratio=0.8, + move_to_main_threshold=2, +) +``` + +Admission policies are optional - if none is provided, the cache will simply admit all objects according to the replacement policy. An admissioner (e.g., `BloomFilterAdmissioner`) can be placed infront of the cache by specifying the `admissioner` argument. + +```py +import libcachesim as lcs + +# Initialize admissioner +admissioner = lcs.BloomFilterAdmissioner() + +# Step 2: Initialize cache +cache = lcs.S3FIFO( + cache_size=1024 * 1024, + # Cache specific parameters + small_size_ratio=0.2, + ghost_size_ratio=0.8, + move_to_main_threshold=2, + # Optionally provide admissioner + admissioner=admissioner, +) +``` + +Then we can run cache simulations using real world workloads leveraging trace readers (see [Trace Reader](reader.md) for more on using `TraceReader`): + +```py +# Process entire trace efficiently (C++ backend) +req_miss_ratio, byte_miss_ratio = cache.process_trace(reader) +print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") +``` + +## Caches +The following cache classes all inherit from `CacheBase` and share a common interface, sharing the following arguments in all cache classes unless otherwise specified: + +- `cache_size: int` +- `default_ttl: int` (optional) +- `hashpower: int` (optional) +- `consider_obj_metadata: bool` (optional) +- `admissioner: AdmissionerBase` (optional) + +### LHD +**Lest Hit Density** evicts objects based on each objects expected hits-per-space-consumed (hit density). + +- *No additional parameters beyond the common arguments* + +### LRU +**Least Recently Used** evicts the object that has not been accessed for the longest time. + +- *No additional parameters beyond the common arguments* + +### FIFO +**First-In, First-Out** evicts objects in order regardless of frequency or recency. + +- *No additional parameters beyond the common arguments* + +### LFU +**Least Frequently Used** evicts the object with the lowest access frequency. + +- *No additional parameters beyond the common arguments* + +### Arc +**Adaptive Replacement Cache** a hybrid algorithm which balances recency and frequency. + +- *No additional parameters beyond the common arguments* + +### Clock +**Clock** is an low-complexity approximation of `LRU`. + +- `int_freq: int` - Initial frequency counter value which is used for new objects (default: `0`) +- `n_bit_counter: int` - Number of bits used for the frequency counter (default: `1`) + +### Random +**Random** evicts objects at random. + +- *No additional parameters beyond the common arguments* + +### S3FIFO +[TBD] + +### Sieve +[TBD] + +### LIRS +[TBD] + +### TwoQ +[TBD] + +### SLRU +[TBD] + +### WTinyLFU +[TBD] + +### LeCaR +[TBD] + +### LFUDA +[TBD] + +### ClockPro +[TBD] + +### Cacheus +[TBD] + +### Belady +[TBD] + +### BeladySize +[TBD] + +### LRUProb +[TBD] + +### FlashProb +[TBD] + +### GDSF +[TBD] + +### Hyperbolic +[TBD] + +### ThreeLCache +[TBD] + +### GLCache +[TBD] + +### LRB +[TBD] + +## Admission Policies + +### BloomFilterAdmissioner +Uses a Bloom filter to decide admissions based on how many times an object has been seen. + +- *No parameters* + +### ProbAdmissioner +Admits objects with a fixed probability. + +- `prob: float` (optional) - Probability of admitting an object (default: `0.5`) + +### SizeAdmissioner +Admits objects only if they are below a specified size threshold. + +- `size_threshold: int` (optional) - Maximum allowed object size (in bytes) for admission (default: `9_223_372_036_854_775_807`, or `INT64_MAX`) + +### SizeProbabilisticAdmissioner +Admits objects with a probability that decreases with object size, favoring smaller objects over large. + +- `exponent: float` (optional) - Exponent controlling how aggressively larger objects are filtered out (default: `1e-6`) + +### AdaptSizeAdmissioner +Implements **AdaptSize**, a feedback-driven policy that periodically adjusts its size threshold. + +- `max_iteration: int` (optional) - Maximum number of iterators for parameter tuning (default: `15`) +- `reconf_interval: int` (optional) - Interval (with respect to request count) at which the threshold is re-evaluated (default: `30_000`) diff --git a/examples/admission/bloomfilter.py b/examples/admission/bloomfilter.py new file mode 100755 index 0000000..0d1ad08 --- /dev/null +++ b/examples/admission/bloomfilter.py @@ -0,0 +1,31 @@ +from libcachesim import BloomFilterAdmissioner, SyntheticReader, LRU + +BloomFilter = BloomFilterAdmissioner() +lru_without_admission = LRU( + cache_size=1024, + # admissioner=BloomFilter +) +lru_with_admission = LRU( + cache_size=1024, + admissioner=BloomFilter +) + +reader = SyntheticReader( + num_of_req=100_000, + num_objects=10_000, + obj_size=100, + alpha=0.8, + dist="zipf", +) + +without_admission_hits = 0 +with_admission_hits = 0 + +for req in reader: + if lru_without_admission.get(req): + without_admission_hits += 1 + if lru_with_admission.get(req): + with_admission_hits += 1 + +print(f'Obtained {without_admission_hits} without using cache admission') +print(f'Obtained {with_admission_hits} using cache admission') diff --git a/examples/admission/plugin_admissioner.py b/examples/admission/plugin_admissioner.py new file mode 100755 index 0000000..dc158f4 --- /dev/null +++ b/examples/admission/plugin_admissioner.py @@ -0,0 +1,65 @@ +from libcachesim import PluginAdmissioner, SyntheticReader, LRU +import random + +''' +A toy example where we admit ten percent of all requests +at random. The admit rate is tracked and printed in the +free hook to serve as a final sanity check. +''' + + +class AdmissionerStats: + admitted_requests: int = 0 + total_requests: int = 0 + + +def init_hook(): + return AdmissionerStats() + + +def admit_hook(data, request): + admit = random.randint(1, 10) == 5 + if admit: + data.admitted_requests += 1 + data.total_requests += 1 + return admit + + +def clone_hook(): + raise NotImplementedError("Cloning for this plugin admissioner is not supported.") + + +def update_hook(data, request, cs): + pass + + +def free_hook(data): + print(f'Admit rate: {100 * data.admitted_requests / data.total_requests}%') + + +custom_admissioner = PluginAdmissioner( + "AdmitTenPercent", + init_hook, + admit_hook, + clone_hook, + update_hook, + free_hook, +) +lru_cache = LRU( + cache_size=1024, + admissioner=custom_admissioner +) + +reader = SyntheticReader( + num_of_req=100_000, + num_objects=10_000, + obj_size=100, + alpha=0.8, + dist="zipf", +) + +for req in reader: + lru_cache.get(req) + +# Invokes free_hook, percentage should be ~10% +del lru_cache diff --git a/libcachesim/__init__.py b/libcachesim/__init__.py index 38a6fa6..e2e1c20 100644 --- a/libcachesim/__init__.py +++ b/libcachesim/__init__.py @@ -56,6 +56,16 @@ PluginCache, ) +from .admissioner import ( + BloomFilterAdmissioner, + ProbAdmissioner, + SizeAdmissioner, + SizeProbabilisticAdmissioner, + AdaptSizeAdmissioner, + PluginAdmissioner, + AdmissionerBase, +) + from .trace_reader import TraceReader from .trace_analyzer import TraceAnalyzer from .synthetic_reader import SyntheticReader, create_zipf_requests, create_uniform_requests @@ -110,6 +120,15 @@ "LRB", # Plugin cache "PluginCache", + # Admission algorithms + "BloomFilterAdmissioner", + "ProbAdmissioner", + "SizeAdmissioner", + "SizeProbabilisticAdmissioner", + "AdaptSizeAdmissioner", + "PluginAdmissioner", + # Admissioner base class + "AdmissionerBase", # Readers and analyzers "TraceReader", "TraceAnalyzer", diff --git a/libcachesim/admissioner.py b/libcachesim/admissioner.py new file mode 100644 index 0000000..c239763 --- /dev/null +++ b/libcachesim/admissioner.py @@ -0,0 +1,88 @@ +from abc import ABC +from .libcachesim_python import ( + Admissioner, + Request, + create_bloomfilter_admissioner, + create_prob_admissioner, + create_size_admissioner, + create_size_probabilistic_admissioner, + create_adaptsize_admissioner, + create_plugin_admissioner, +) + + +class AdmissionerBase(ABC): + _admissioner: Admissioner # Internal C++ admissioner object + + def __init__(self, _admissioner: Admissioner): + self._admissioner = _admissioner + + def clone(self): + return self._admissioner.clone() + + def update(self, req: Request, cache_size: int): + return self._admissioner.update(req, cache_size) + + def admit(self, req: Request): + return self._admissioner.admit(req) + + def free(self): + return self._admissioner.free() + + +class BloomFilterAdmissioner(AdmissionerBase): + def __init__(self): + admissioner = create_bloomfilter_admissioner(None) + super().__init__(admissioner) + + +class ProbAdmissioner(AdmissionerBase): + def __init__(self, prob: float = None): + params = f"prob={prob}" if prob is not None else None + admissioner = create_prob_admissioner(params) + super().__init__(admissioner) + + +class SizeAdmissioner(AdmissionerBase): + def __init__(self, size_threshold: int = None): + params = f"size={size_threshold}" if size_threshold is not None else None + admissioner = create_size_admissioner(params) + super().__init__(admissioner) + + +class SizeProbabilisticAdmissioner(AdmissionerBase): + def __init__(self, exponent: float = None): + params = f"exponent={exponent}" if exponent is not None else None + admissioner = create_size_probabilistic_admissioner(params) + super().__init__(admissioner) + + +class AdaptSizeAdmissioner(AdmissionerBase): + def __init__(self, max_iteration: int = None, reconf_interval: int = None): + params = ",".join( + f'{arg}={val}' for arg, val in { + 'max-iteration': max_iteration, + 'reconf-interval': reconf_interval, + }.items() if val is not None + ) or None + + admissioner = create_adaptsize_admissioner(params) + super().__init__(admissioner) + + +class PluginAdmissioner(AdmissionerBase): + def __init__(self, + admissioner_name, + admissioner_init_hook, + admissioner_admit_hook, + admissioner_clone_hook, + admissioner_update_hook, + admissioner_free_hook): + admissioner = create_plugin_admissioner( + admissioner_name, + admissioner_init_hook, + admissioner_admit_hook, + admissioner_clone_hook, + admissioner_update_hook, + admissioner_free_hook) + super().__init__(admissioner) diff --git a/libcachesim/cache.py b/libcachesim/cache.py index 7102c4f..506257e 100644 --- a/libcachesim/cache.py +++ b/libcachesim/cache.py @@ -41,6 +41,7 @@ c_process_trace, ) +from .admissioner import AdmissionerBase from .protocols import ReaderProtocol @@ -49,7 +50,9 @@ class CacheBase(ABC): _cache: Cache # Internal C++ cache object - def __init__(self, _cache: Cache): + def __init__(self, _cache: Cache, admissioner: AdmissionerBase = None): + if admissioner is not None: + _cache.admissioner = admissioner._admissioner self._cache = _cache def get(self, req: Request) -> bool: @@ -81,7 +84,7 @@ def get_occupied_byte(self) -> int: def get_n_obj(self) -> int: return self._cache.get_n_obj() - + def set_cache_size(self, new_size: int) -> None: self._cache.set_cache_size(new_size) @@ -160,10 +163,16 @@ class LHD(CacheBase): """Least Hit Density cache (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=LHD_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=LHD_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -171,10 +180,16 @@ class LRU(CacheBase): """Least Recently Used cache (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=LRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=LRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -182,10 +197,16 @@ class FIFO(CacheBase): """First In First Out cache (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=FIFO_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=FIFO_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -193,10 +214,16 @@ class LFU(CacheBase): """Least Frequently Used cache (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=LFU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=LFU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -204,10 +231,16 @@ class ARC(CacheBase): """Adaptive Replacement Cache (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=ARC_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=ARC_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -227,12 +260,15 @@ def __init__( consider_obj_metadata: bool = False, init_freq: int = 0, n_bit_counter: int = 1, + admissioner: AdmissionerBase = None, ): cache_specific_params = f"init-freq={init_freq}, n-bit-counter={n_bit_counter}" super().__init__( _cache=Clock_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + cache_specific_params, + ), + admissioner=admissioner ) @@ -240,10 +276,16 @@ class Random(CacheBase): """Random replacement cache (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=Random_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=Random_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -266,12 +308,12 @@ def __init__( small_size_ratio: float = 0.1, ghost_size_ratio: float = 0.9, move_to_main_threshold: int = 2, + admissioner: AdmissionerBase = None, ): cache_specific_params = f"small-size-ratio={small_size_ratio}, ghost-size-ratio={ghost_size_ratio}, move-to-main-threshold={move_to_main_threshold}" super().__init__( - _cache=S3FIFO_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _cache=S3FIFO_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params), + admissioner=admissioner ) @@ -279,10 +321,16 @@ class Sieve(CacheBase): """Sieve cache algorithm (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=Sieve_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=Sieve_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -290,10 +338,16 @@ class LIRS(CacheBase): """Low Inter-reference Recency Set (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=LIRS_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=LIRS_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) def insert(self, req: Request) -> Optional[CacheObject]: @@ -316,12 +370,15 @@ def __init__( consider_obj_metadata: bool = False, a_in_size_ratio: float = 0.25, a_out_size_ratio: float = 0.5, + admissioner: AdmissionerBase = None, ): cache_specific_params = f"Ain-size-ratio={a_in_size_ratio}, Aout-size-ratio={a_out_size_ratio}" super().__init__( _cache=TwoQ_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + cache_specific_params, + ), + admissioner=admissioner ) @@ -329,10 +386,16 @@ class SLRU(CacheBase): """Segmented LRU (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=SLRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=SLRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -352,12 +415,15 @@ def __init__( consider_obj_metadata: bool = False, main_cache: str = "SLRU", window_size: float = 0.01, + admissioner: AdmissionerBase = None, ): cache_specific_params = f"main-cache={main_cache}, window-size={window_size}" super().__init__( _cache=WTinyLFU_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + cache_specific_params, + ), + admissioner=admissioner ) @@ -377,12 +443,15 @@ def __init__( consider_obj_metadata: bool = False, update_weight: bool = True, lru_weight: float = 0.5, + admissioner: AdmissionerBase = None, ): cache_specific_params = f"update-weight={int(update_weight)}, lru-weight={lru_weight}" super().__init__( _cache=LeCaR_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + cache_specific_params, + ), + admissioner=admissioner ) @@ -390,10 +459,16 @@ class LFUDA(CacheBase): """LFU with Dynamic Aging (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=LFUDA_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=LFUDA_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -413,12 +488,15 @@ def __init__( consider_obj_metadata: bool = False, init_ref: int = 0, init_ratio_cold: float = 0.5, + admissioner: AdmissionerBase = None, ): cache_specific_params = f"init-ref={init_ref}, init-ratio-cold={init_ratio_cold}" super().__init__( _cache=ClockPro_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + cache_specific_params, + ), + admissioner=admissioner ) @@ -426,10 +504,16 @@ class Cacheus(CacheBase): """Cacheus algorithm (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=Cacheus_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=Cacheus_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -438,10 +522,16 @@ class Belady(CacheBase): """Belady's optimal algorithm (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=Belady_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=Belady_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -459,12 +549,15 @@ def __init__( hashpower: int = 24, consider_obj_metadata: bool = False, n_samples: int = 128, + admissioner: AdmissionerBase = None, ): cache_specific_params = f"n-samples={n_samples}" super().__init__( _cache=BeladySize_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + cache_specific_params, + ), + admissioner=admissioner ) @@ -482,12 +575,15 @@ def __init__( hashpower: int = 24, consider_obj_metadata: bool = False, prob: float = 0.5, + admissioner: AdmissionerBase = None, ): cache_specific_params = f"prob={prob}" super().__init__( _cache=LRU_Prob_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + cache_specific_params, + ), + admissioner=admissioner ) @@ -511,12 +607,15 @@ def __init__( disk_admit_prob: float = 0.2, ram_cache: str = "LRU", disk_cache: str = "FIFO", + admissioner: AdmissionerBase = None, ): cache_specific_params = f"ram-size-ratio={ram_size_ratio}, disk-admit-prob={disk_admit_prob}, ram-cache={ram_cache}, disk-cache={disk_cache}" super().__init__( _cache=flashProb_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + cache_specific_params, + ), + admissioner=admissioner ) @@ -524,10 +623,16 @@ class Size(CacheBase): """Size-based replacement algorithm (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=Size_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=Size_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -535,10 +640,16 @@ class GDSF(CacheBase): """GDSF replacement algorithm (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=GDSF_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=GDSF_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -546,10 +657,16 @@ class Hyperbolic(CacheBase): """Hyperbolic replacement algorithm (no special parameters)""" def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): super().__init__( - _cache=Hyperbolic_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)) + _cache=Hyperbolic_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + admissioner=admissioner ) @@ -568,6 +685,7 @@ def __init__( hashpower: int = 24, consider_obj_metadata: bool = False, objective: str = "byte-miss-ratio", + admissioner: AdmissionerBase = None, ): # Try to import ThreeLCache_init try: @@ -580,8 +698,10 @@ def __init__( cache_specific_params = f"objective={objective}" super().__init__( _cache=ThreeLCache_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + cache_specific_params, + ), + admissioner=admissioner ) @@ -611,6 +731,7 @@ def __init__( merge_consecutive_segs: bool = True, train_source_y: str = "online", retrain_intvl: int = 86400, + admissioner: AdmissionerBase = None, ): # Try to import GLCache_init try: @@ -623,8 +744,10 @@ def __init__( cache_specific_params = f"segment-size={segment_size}, n-merge={n_merge}, type={type}, rank-intvl={rank_intvl}, merge-consecutive-segs={merge_consecutive_segs}, train-source-y={train_source_y}, retrain-intvl={retrain_intvl}" super().__init__( _cache=GLCache_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + cache_specific_params, + ), + admissioner=admissioner ) @@ -642,6 +765,7 @@ def __init__( hashpower: int = 24, consider_obj_metadata: bool = False, objective: str = "byte-miss-ratio", + admissioner: AdmissionerBase = None, ): # Try to import LRB_init try: @@ -654,8 +778,10 @@ def __init__( cache_specific_params = f"objective={objective}" super().__init__( _cache=LRB_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params - ) + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + cache_specific_params, + ), + admissioner=admissioner ) @@ -676,6 +802,7 @@ def __init__( default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, + admissioner: AdmissionerBase = None, ): self.common_cache_params = _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata) @@ -689,5 +816,6 @@ def __init__( cache_eviction_hook, cache_remove_hook, cache_free_hook, - ) + ), + admissioner=admissioner ) diff --git a/src/export.cpp b/src/export.cpp index 0ef8d83..ab55fe0 100644 --- a/src/export.cpp +++ b/src/export.cpp @@ -21,6 +21,7 @@ PYBIND11_MODULE(libcachesim_python, m) { // methods if the codebase is large enough export_cache(m); + export_admissioner(m); export_reader(m); export_analyzer(m); export_misc(m); diff --git a/src/export.h b/src/export.h index 121ff97..75424d1 100644 --- a/src/export.h +++ b/src/export.h @@ -20,6 +20,7 @@ using py::literals::operator""_a; void export_cache(py::module &m); void export_pyplugin_cache(py::module &m); +void export_admissioner(py::module &m); void export_reader(py::module &m); void export_analyzer(py::module &m); void export_misc(py::module &m); diff --git a/src/export_admissioner.cpp b/src/export_admissioner.cpp new file mode 100644 index 0000000..26e5f94 --- /dev/null +++ b/src/export_admissioner.cpp @@ -0,0 +1,228 @@ +// libcachesim_python - libCacheSim Python bindings +// Copyright 2025 The libcachesim Authors. All rights reserved. +// +// Use of this source code is governed by a GPL-3.0 +// license that can be found in the LICENSE file or at +// https://github.com/1a1a11a/libcachesim/blob/develop/LICENSE + +#include +#include +#include + +#include "../libCacheSim/include/libCacheSim/admissionAlgo.h" +#include "export.h" + +namespace libcachesim { + +namespace py = pybind11; + +typedef struct __attribute__((visibility("hidden"))) +pypluginAdmissioner_params { + py::object data; ///< Plugin's internal data structure (python object) + py::function admissioner_init_hook; + py::function admissioner_admit_hook; + py::function admissioner_clone_hook; + py::function admissioner_update_hook; + py::function admissioner_free_hook; + std::string admissioner_name; +} pypluginAdmissioner_params_t; + +static bool pypluginAdmissioner_admit(admissioner_t *, const request_t *); +static admissioner_t *pypluginAdmissioner_clone(admissioner_t *); +static void pypluginAdmissioner_free(admissioner_t *); +static void pypluginAdmissioner_update(admissioner_t *, const request_t *, + const uint64_t); + +struct PypluginAdmissionerParamsDeleter { + void operator()(pypluginAdmissioner_params_t *ptr) const { + if (ptr != nullptr) { + if (!ptr->admissioner_free_hook.is_none()) { + try { + ptr->admissioner_free_hook(ptr->data); + } catch (...) { } + } + delete ptr; + } + } +}; + +admissioner_t *create_plugin_admissioner(std::string admissioner_name, + py::function admissioner_init_hook, + py::function admissioner_admit_hook, + py::function admissioner_clone_hook, + py::function admissioner_update_hook, + py::function admissioner_free_hook) { + std::unique_ptr + params; + admissioner_t *admissioner = nullptr; + try { + admissioner = (admissioner_t *)malloc(sizeof(admissioner_t)); + if (!admissioner) { + throw std::runtime_error("Failed to initialize admissioner structure"); + } + memset(admissioner, 0, sizeof(admissioner_t)); + + // We will pass a raw pointer for C++ to take ownership of + admissioner->admit = pypluginAdmissioner_admit; + admissioner->clone = pypluginAdmissioner_clone; + admissioner->free = pypluginAdmissioner_free; + admissioner->update = pypluginAdmissioner_update; + + // Initialize pointers to python hook functions + params = std::unique_ptr( + new pypluginAdmissioner_params_t(), PypluginAdmissionerParamsDeleter()); + params->data = admissioner_init_hook(); + params->admissioner_admit_hook = admissioner_admit_hook; + params->admissioner_clone_hook = admissioner_clone_hook; + params->admissioner_update_hook = admissioner_update_hook; + params->admissioner_free_hook = admissioner_free_hook; + params->admissioner_name = admissioner_name; + + // Transfer ownership of params to admissioner + admissioner->params = params.release(); + return admissioner; + } catch (...) { + if (admissioner) free(admissioner); + throw; + } +} + +static bool pypluginAdmissioner_admit(admissioner_t *admissioner, + const request_t *req) { + pypluginAdmissioner_params_t *params = + (pypluginAdmissioner_params_t *)admissioner->params; + return params->admissioner_admit_hook(params->data, req).cast(); +} + +static admissioner_t *pypluginAdmissioner_clone(admissioner_t *admissioner) { + pypluginAdmissioner_params_t *params = + (pypluginAdmissioner_params_t *)admissioner->params; + return params->admissioner_clone_hook(params->data).cast(); +} + +static void pypluginAdmissioner_free(admissioner_t *admissioner) { + pypluginAdmissioner_params_t *params = + (pypluginAdmissioner_params_t *)admissioner->params; + params->admissioner_free_hook(params->data); +} + +static void pypluginAdmissioner_update(admissioner_t *admissioner, + const request_t *req, + const uint64_t cache_size) { + pypluginAdmissioner_params_t *params = + (pypluginAdmissioner_params_t *)admissioner->params; + params->admissioner_update_hook(params->data, req, cache_size); +} + +template +void export_admissioner_creator(py::module &m, const std::string &name) { + m.def( + name.c_str(), + [=](py::object params_obj) { + const char *params = nullptr; + std::string s; + + // Here, by allowing the passing of None to resolve to NULL, we can + // allow the default arguments specified in C++ to be used when no + // arguments are specified through the Python wrapper classes. + if (!params_obj.is_none()) { + s = params_obj.cast(); + params = s.c_str(); + } + + // Admissioner is exported lower down + admissioner_t *admissioner = fn(params); + if (!admissioner) + throw std::runtime_error("Creator for " + name + " returned NULL"); + return admissioner; + }, + py::return_value_policy::reference); +} + +void export_admissioner(py::module &m) { + // *********************************************************************** + // **** **** + // **** Admissioner struct bindings **** + // **** **** + // *********************************************************************** + + py::class_(m, "Admissioner") + .def(py::init<>()) + .def_readwrite("params", &admissioner_t::params) + + .def_property( + "admissioner_name", + [](const admissioner_t &self) { + return std::string(self.admissioner_name); + }, + [](admissioner_t &self, const std::string &val) { + strncpy(self.admissioner_name, val.c_str(), CACHE_NAME_LEN); + self.admissioner_name[CACHE_NAME_LEN - 1] = '\0'; + }) + + .def_property( + "init_params", + [](const admissioner_t &self) { + return self.init_params ? std::string(self.init_params) + : std::string{}; + }, + [](admissioner_t &self, const std::string &val) { + if (self.init_params) free(self.init_params); + self.init_params = strdup(val.c_str()); + }) + + .def("admit", + [](admissioner_t &self, uintptr_t req_ptr) { + if (!self.admit) + throw std::runtime_error("admit function pointer is NULL"); + request_t *req = reinterpret_cast(req_ptr); + return self.admit(&self, req); + }) + + .def("clone", + [](admissioner_t &self) { + if (!self.clone) + throw std::runtime_error("clone function pointer is NULL"); + return self.clone(&self); + }) + + .def("update", + [](admissioner_t &self, uintptr_t req_ptr, uint64_t cache_size) { + if (!self.update) + throw std::runtime_error("update function pointer is NULL"); + request_t *req = reinterpret_cast(req_ptr); + self.update(&self, req, cache_size); + }) + + .def("free", [](admissioner_t &self) { + if (!self.free) + throw std::runtime_error("free function pointer is NULL"); + self.free(&self); + }); + // *********************************************************************** + // **** **** + // **** end of admissioner struct bindings **** + // **** **** + // *********************************************************************** + + // Exposing existing implementations of admission algorithms + export_admissioner_creator( + m, "create_bloomfilter_admissioner"); + export_admissioner_creator( + m, "create_prob_admissioner"); + export_admissioner_creator( + m, "create_size_admissioner"); + export_admissioner_creator( + m, "create_size_probabilistic_admissioner"); + export_admissioner_creator( + m, "create_adaptsize_admissioner"); + m.def("create_plugin_admissioner", &create_plugin_admissioner, + "admissioner_name"_a, "admissioner_init_hook"_a, + "admissioner_admit_hook"_a, "admissioner_clone_hook"_a, + "admissioner_update_hook"_a, "admissioner_free_hook"_a, + py::return_value_policy::take_ownership); +} + +} // namespace libcachesim diff --git a/src/export_cache.cpp b/src/export_cache.cpp index e34ddf5..c13f14a 100644 --- a/src/export_cache.cpp +++ b/src/export_cache.cpp @@ -273,6 +273,7 @@ void export_cache(py::module& m) { .def_readonly("n_req", &cache_t::n_req) .def_readonly("cache_name", &cache_t::cache_name) .def_readonly("init_params", &cache_t::init_params) + .def_readwrite("admissioner", &cache_t::admissioner) .def( "get", [](cache_t& self, const request_t& req) { diff --git a/tests/test_admission.py b/tests/test_admission.py new file mode 100644 index 0000000..1099877 --- /dev/null +++ b/tests/test_admission.py @@ -0,0 +1,255 @@ +""" +Test cases for cache admission in libCacheSim Python bindings. + +This module tests the PluginAdmissioner and existing admission policies +""" + +import pytest +from libcachesim import ( + SizeAdmissioner, + ProbAdmissioner, + SizeProbabilisticAdmissioner, + BloomFilterAdmissioner, + PluginAdmissioner, + LRU +) +from libcachesim.libcachesim_python import ( + Request, + ReqOp +) + + +class TestSizeAdmissioner: + """test existing size admissioner policy""" + + def test_default_configuration(self): + int64_max = (2 ** 63) - 1 + cache = LRU( + # Cache size must be large enough to fit the object + cache_size=int64_max, + admissioner=SizeAdmissioner() + ) + + # We should be able to admit an item which lies underneath + # the default threshold of INT64_MAX + req = Request() + req.obj_id = 0 + req.obj_size = int64_max - 1 + req.op = ReqOp.OP_GET + assert cache.can_insert(req) + + # Anything equating to the default threshold should fail + req = Request() + req.obj_id = 0 + req.obj_size = int64_max + req.op = ReqOp.OP_GET + assert not cache.can_insert(req) + + @pytest.mark.parametrize("thresh", [0, 100, 250, 500, 750, 1000]) + def test_custom_configuration(self, thresh): + cache = LRU( + cache_size=1000, + admissioner=SizeAdmissioner(size_threshold=thresh) + ) + admits = 0 + + # Create 1000 requests of unique sizes and test to see and + # use `cache_can_insert_default` to run the admissioner + for i in range(1000): + req = Request() + req.obj_id = i + req.obj_size = i + req.op = ReqOp.OP_GET + if cache.can_insert(req): + admits += 1 + + # All items admitted should lie within the size threshold + assert admits == thresh + + +class TestProbAdmissioner: + """test existing probabilistic admissioner policy""" + + # Note: The `ProbAdmissioner` does not accept zero as a valid + # probability, hence we do not test a `admit_nothing` scenario + def test_admit_all(self): + cache = LRU( + # Cache size must be large enough to fit the object + cache_size=1000, + admissioner=ProbAdmissioner(prob=1.0) + ) + + # Probability threshold is one, so everything should be + # admitted unconditionally + for obj_id in range(1000): + req = Request() + req.obj_id = obj_id + req.obj_size = 1 + req.op = ReqOp.OP_GET + assert cache.can_insert(req) + + @pytest.mark.parametrize("prob", [0.0001, 0.1, 0.5, 0.9, 0.9999]) + def test_admit_amount(self, prob): + cache = LRU( + # Cache size must be large enough to fit the object + cache_size=1000, + admissioner=ProbAdmissioner(prob=prob) + ) + total_requests, admits = 1000, 0 + + # Probability threshold is one, so everything should be + # admitted unconditionally + for obj_id in range(total_requests): + req = Request() + req.obj_id = obj_id + req.obj_size = 1 + req.op = ReqOp.OP_GET + if cache.can_insert(req): + admits += 1 + + # This value is not deterministic, hence just perform a + # basic sanity check to make sure it lies between 0 and 1 + admit_rate = admits / total_requests + assert 0 <= admit_rate and admit_rate <= 1 + + +class TestSizeProbabilisticAdmissioner: + + @pytest.mark.parametrize("exponent", [0.0001, 0.1, 0.5, 0.9, 0.9999]) + def test_admit_amount(self, exponent): + cache = LRU( + # Cache size must be large enough to fit the object + cache_size=1000, + admissioner=SizeProbabilisticAdmissioner(exponent=exponent) + ) + total_requests, admits = 1000, 0 + + # Probability threshold is one, so everything should be + # admitted unconditionally + for obj_id in range(total_requests): + req = Request() + req.obj_id = obj_id + req.obj_size = 1 + req.op = ReqOp.OP_GET + if cache.can_insert(req): + admits += 1 + + # This value is not deterministic, hence just perform a + # basic sanity check to make sure it lies between 0 and 1 + admit_rate = admits / total_requests + assert 0 <= admit_rate and admit_rate <= 1 + + +class TestBloomFilter: + """test existing bloomfilter admissioner policy""" + + @pytest.mark.parametrize("visits", [0, 1, 2, 3]) + def test_multi_pass(self, visits): + cache = LRU( + cache_size=1000, + admissioner=BloomFilterAdmissioner() + ) + admits = 0 + + # Here, we try to "see" each item a certain number of times + # to increment it's "seen_times" count in the bloom filter + # hash table. + for _ in range(visits): + for obj_id in range(1000): + req = Request() + req.obj_id = obj_id + req.obj_size = 1 + req.op = ReqOp.OP_GET + if cache.can_insert(req): + cache.insert(req) + + # Next, we check to see if the items were admitted to cache + for obj_id in range(1000): + req = Request() + req.obj_id = obj_id + req.obj_size = 1 + req.op = ReqOp.OP_GET + if cache.get(req): + admits += 1 + + # Only if each item is visited more than once should we see + # that it was admitted to the cache + expected = 1000 if visits > 1 else 0 + assert admits == expected + + +# TODO: Tests crash if we do not explicitly delete the cache object +class TestPluginAdmissioner: + """test PluginAdmissioner using custom simplistic policies""" + + def test_admit_all(self): + pa = PluginAdmissioner( + "testAdmissioner", + lambda: None, + # Accept all items + lambda data, req: True, + lambda: None, + lambda data, req: None, + lambda data: None, + ) + cache = LRU(cache_size=1000, admissioner=pa) + + # Here, we test a basic custom admission policy which + # should simply accept every single request + for size in range(1000): + req = Request() + req.obj_id = 0 + req.obj_size = size + req.op = ReqOp.OP_GET + assert cache.can_insert(req) + del cache + + def test_admit_nothing(self): + pa = PluginAdmissioner( + "testAdmissioner", + lambda: None, + # Reject all items + lambda data, req: False, + lambda: None, + lambda data, req: None, + lambda data: None, + ) + cache = LRU(cache_size=1000, admissioner=pa) + + # Here, we test a basic custom admission policy which + # should simply reject every single request + for size in range(1000): + req = Request() + req.obj_id = 0 + req.obj_size = size + req.op = ReqOp.OP_GET + assert not cache.can_insert(req) + del cache + + @pytest.mark.parametrize("thresh", [0, 100, 250, 500, 750, 1000]) + def test_custom_size(self, thresh): + pa = PluginAdmissioner( + "testAdmissioner", + lambda: None, + # Equivalent to the size admissioner + lambda data, req: req.obj_size < thresh, + lambda: None, + lambda data, req: None, + lambda data: None, + ) + cache, admits = LRU(cache_size=1000, admissioner=pa), 0 + + # Here, we test a custom implementation of the existing + # size policy which admits everything under a static size + # threshold + for size in range(1000): + req = Request() + req.obj_id = 0 + req.obj_size = size + req.op = ReqOp.OP_GET + if cache.can_insert(req): + admits += 1 + + # Same correctness criteria as `TestSizeAdmissioner` + assert admits == thresh + del cache