Support semantic cache TTL overrides on writes (#217)

tylerhutcherson · web-flow · commit 821706223736 · 2024-09-09T15:57:47.000-04:00
diff --git a/redisvl/extensions/llmcache/semantic.py b/redisvl/extensions/llmcache/semantic.py
@@ -185,6 +185,15 @@ def index(self) -> SearchIndex:
         """
         return self._index
 
+    @property
+    def aindex(self) -> Optional[AsyncSearchIndex]:
+        """The underlying AsyncSearchIndex for the cache.
+
+        Returns:
+            AsyncSearchIndex: The async search index.
+        """
+        return self._aindex
+
     @property
     def distance_threshold(self) -> float:
         """The semantic distance threshold for the cache.
@@ -481,6 +490,7 @@ def store(
         vector: Optional[List[float]] = None,
         metadata: Optional[Dict[str, Any]] = None,
         filters: Optional[Dict[str, Any]] = None,
+        ttl: Optional[int] = None,
     ) -> str:
         """Stores the specified key-value pair in the cache along with metadata.
 
@@ -494,6 +504,8 @@ def store(
                 alongside the prompt and response. Defaults to None.
             filters (Optional[Dict[str, Any]]): The optional tag to assign to the cache entry.
                 Defaults to None.
+            ttl (Optional[int]): The optional TTL override to use on this individual cache
+                entry. Defaults to the global TTL setting.
 
         Returns:
             str: The Redis key for the entries added to the semantic cache.
@@ -513,7 +525,6 @@ def store(
         """
         # Vectorize prompt if necessary and create cache payload
         vector = vector or self._vectorize_prompt(prompt)
-
         self._check_vector_dims(vector)
 
         # Build cache entry for the cache
@@ -526,9 +537,10 @@ def store(
         )
 
         # Load cache entry with TTL
+        ttl = ttl or self._ttl
         keys = self._index.load(
             data=[cache_entry.to_dict()],
-            ttl=self._ttl,
+            ttl=ttl,
             id_field=self.entry_id_field_name,
         )
         return keys[0]
@@ -540,6 +552,7 @@ async def astore(
         vector: Optional[List[float]] = None,
         metadata: Optional[Dict[str, Any]] = None,
         filters: Optional[Dict[str, Any]] = None,
+        ttl: Optional[int] = None,
     ) -> str:
         """Async stores the specified key-value pair in the cache along with metadata.
 
@@ -553,6 +566,8 @@ async def astore(
                 alongside the prompt and response. Defaults to None.
             filters (Optional[Dict[str, Any]]): The optional tag to assign to the cache entry.
                 Defaults to None.
+            ttl (Optional[int]): The optional TTL override to use on this individual cache
+                entry. Defaults to the global TTL setting.
 
         Returns:
             str: The Redis key for the entries added to the semantic cache.
@@ -574,7 +589,6 @@ async def astore(
 
         # Vectorize prompt if necessary and create cache payload
         vector = vector or self._vectorize_prompt(prompt)
-
         self._check_vector_dims(vector)
 
         # Build cache entry for the cache
@@ -587,9 +601,10 @@ async def astore(
         )
 
         # Load cache entry with TTL
+        ttl = ttl or self._ttl
         keys = await aindex.load(
             data=[cache_entry.to_dict()],
-            ttl=self._ttl,
+            ttl=ttl,
             id_field=self.entry_id_field_name,
         )
         return keys[0]
diff --git a/tests/integration/test_llmcache.py b/tests/integration/test_llmcache.py
@@ -106,6 +106,7 @@ async def test_get_async_index(cache):
 async def test_get_async_index_from_provided_client(cache_with_redis_client):
     aindex = await cache_with_redis_client._get_async_index()
     assert isinstance(aindex, AsyncSearchIndex)
+    assert aindex == cache_with_redis_client.aindex
 
 
 def test_delete(cache_no_cleanup):
@@ -275,6 +276,33 @@ async def test_async_ttl_expiration(cache_with_ttl, vectorizer):
     assert len(check_result) == 0
 
 
+def test_custom_ttl(cache_with_ttl, vectorizer):
+    prompt = "This is a test prompt."
+    response = "This is a test response."
+    vector = vectorizer.embed(prompt)
+
+    cache_with_ttl.store(prompt, response, vector=vector, ttl=5)
+    sleep(3)
+
+    check_result = cache_with_ttl.check(vector=vector)
+    assert len(check_result) != 0
+    assert cache_with_ttl.ttl == 2
+
+
+@pytest.mark.asyncio
+async def test_async_custom_ttl(cache_with_ttl, vectorizer):
+    prompt = "This is a test prompt."
+    response = "This is a test response."
+    vector = vectorizer.embed(prompt)
+
+    await cache_with_ttl.astore(prompt, response, vector=vector, ttl=5)
+    await asyncio.sleep(3)
+
+    check_result = await cache_with_ttl.acheck(vector=vector)
+    assert len(check_result) != 0
+    assert cache_with_ttl.ttl == 2
+
+
 def test_ttl_refresh(cache_with_ttl, vectorizer):
     prompt = "This is a test prompt."
     response = "This is a test response."