Skip to content

Commit b965332

Browse files
authored
feat(flags): add a flag_fallback_cache that tracks feature flag evaluation results and uses them as fallback values whenever the /flags API isn't available (#275)
1 parent 4739945 commit b965332

File tree

8 files changed

+559
-16
lines changed

8 files changed

+559
-16
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 6.0.3 - 2025-07-07
2+
3+
- feat: add a feature flag evaluation cache (local storage or redis) to support returning flag evaluations when the service is down
4+
15
# 6.0.2 - 2025-07-02
26

37
- fix: send_feature_flags changed to default to false in `Client::capture_exception`

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ We recommend using [uv](https://docs.astral.sh/uv/). It's super fast.
3232
```bash
3333
uv python install 3.9.19
3434
uv python pin 3.9.19
35-
uv venv env
35+
uv venv
3636
source env/bin/activate
3737
uv sync --extra dev --extra test
3838
pre-commit install

mypy-baseline.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,5 @@ posthog/ai/utils.py:0: error: Function "builtins.any" is not valid as a type [v
4242
posthog/ai/utils.py:0: note: Perhaps you meant "typing.Any" instead of "any"?
4343
posthog/ai/utils.py:0: error: Function "builtins.any" is not valid as a type [valid-type]
4444
posthog/ai/utils.py:0: note: Perhaps you meant "typing.Any" instead of "any"?
45+
posthog/client.py:0: error: Name "urlparse" already defined (possibly by an import) [no-redef]
46+
posthog/client.py:0: error: Name "parse_qs" already defined (possibly by an import) [no-redef]

posthog/client.py

Lines changed: 161 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@
5050
to_values,
5151
)
5252
from posthog.utils import (
53+
FlagCache,
54+
RedisFlagCache,
5355
SizeLimitedDict,
5456
clean,
5557
guess_timezone,
@@ -95,7 +97,30 @@ def add_context_tags(properties):
9597

9698

9799
class Client(object):
98-
"""Create a new PostHog client."""
100+
"""Create a new PostHog client.
101+
102+
Examples:
103+
Basic usage:
104+
>>> client = Client("your-api-key")
105+
106+
With memory-based feature flag fallback cache:
107+
>>> client = Client(
108+
... "your-api-key",
109+
... flag_fallback_cache_url="memory://local/?ttl=300&size=10000"
110+
... )
111+
112+
With Redis fallback cache for high-scale applications:
113+
>>> client = Client(
114+
... "your-api-key",
115+
... flag_fallback_cache_url="redis://localhost:6379/0/?ttl=300"
116+
... )
117+
118+
With Redis authentication:
119+
>>> client = Client(
120+
... "your-api-key",
121+
... flag_fallback_cache_url="redis://username:password@localhost:6379/0/?ttl=300"
122+
... )
123+
"""
99124

100125
log = logging.getLogger("posthog")
101126

@@ -126,6 +151,7 @@ def __init__(
126151
project_root=None,
127152
privacy_mode=False,
128153
before_send=None,
154+
flag_fallback_cache_url=None,
129155
):
130156
self.queue = queue.Queue(max_queue_size)
131157

@@ -151,6 +177,8 @@ def __init__(
151177
)
152178
self.poller = None
153179
self.distinct_ids_feature_flags_reported = SizeLimitedDict(MAX_DICT_SIZE, set)
180+
self.flag_cache = self._initialize_flag_cache(flag_fallback_cache_url)
181+
self.flag_definition_version = 0
154182
self.disabled = disabled
155183
self.disable_geoip = disable_geoip
156184
self.historical_migration = historical_migration
@@ -707,6 +735,9 @@ def shutdown(self):
707735

708736
def _load_feature_flags(self):
709737
try:
738+
# Store old flags to detect changes
739+
old_flags_by_key: dict[str, dict] = self.feature_flags_by_key or {}
740+
710741
response = get(
711742
self.personal_api_key,
712743
f"/api/feature_flag/local_evaluation/?token={self.api_key}&send_cohorts",
@@ -718,6 +749,14 @@ def _load_feature_flags(self):
718749
self.group_type_mapping = response["group_type_mapping"] or {}
719750
self.cohorts = response["cohorts"] or {}
720751

752+
# Check if flag definitions changed and update version
753+
if self.flag_cache and old_flags_by_key != (
754+
self.feature_flags_by_key or {}
755+
):
756+
old_version = self.flag_definition_version
757+
self.flag_definition_version += 1
758+
self.flag_cache.invalidate_version(old_version)
759+
721760
except APIError as e:
722761
if e.status == 401:
723762
self.log.error(
@@ -739,6 +778,10 @@ def _load_feature_flags(self):
739778
self.group_type_mapping = {}
740779
self.cohorts = {}
741780

781+
# Clear flag cache when quota limited
782+
if self.flag_cache:
783+
self.flag_cache.clear()
784+
742785
if self.debug:
743786
raise APIError(
744787
status=402,
@@ -889,6 +932,12 @@ def _get_feature_flag_result(
889932
flag_result = FeatureFlagResult.from_value_and_payload(
890933
key, lookup_match_value, payload
891934
)
935+
936+
# Cache successful local evaluation
937+
if self.flag_cache and flag_result:
938+
self.flag_cache.set_cached_flag(
939+
distinct_id, key, flag_result, self.flag_definition_version
940+
)
892941
elif not only_evaluate_locally:
893942
try:
894943
flag_details, request_id = self._get_feature_flag_details_from_decide(
@@ -902,12 +951,30 @@ def _get_feature_flag_result(
902951
flag_result = FeatureFlagResult.from_flag_details(
903952
flag_details, override_match_value
904953
)
954+
955+
# Cache successful remote evaluation
956+
if self.flag_cache and flag_result:
957+
self.flag_cache.set_cached_flag(
958+
distinct_id, key, flag_result, self.flag_definition_version
959+
)
960+
905961
self.log.debug(
906962
f"Successfully computed flag remotely: #{key} -> #{flag_result}"
907963
)
908964
except Exception as e:
909965
self.log.exception(f"[FEATURE FLAGS] Unable to get flag remotely: {e}")
910966

967+
# Fallback to cached value if remote evaluation fails
968+
if self.flag_cache:
969+
stale_result = self.flag_cache.get_stale_cached_flag(
970+
distinct_id, key
971+
)
972+
if stale_result:
973+
self.log.info(
974+
f"[FEATURE FLAGS] Using stale cached value for flag {key}"
975+
)
976+
flag_result = stale_result
977+
911978
if send_feature_flag_events:
912979
self._capture_feature_flag_called(
913980
distinct_id,
@@ -1278,6 +1345,99 @@ def _get_all_flags_and_payloads_locally(
12781345
"featureFlagPayloads": payloads,
12791346
}, fallback_to_decide
12801347

1348+
def _initialize_flag_cache(self, cache_url):
1349+
"""Initialize feature flag cache for graceful degradation during service outages.
1350+
1351+
When enabled, the cache stores flag evaluation results and serves them as fallback
1352+
when the PostHog API is unavailable. This ensures your application continues to
1353+
receive flag values even during outages.
1354+
1355+
Args:
1356+
cache_url: Cache configuration URL. Examples:
1357+
- None: Disable caching
1358+
- "memory://local/?ttl=300&size=10000": Memory cache with TTL and size
1359+
- "redis://localhost:6379/0/?ttl=300": Redis cache with TTL
1360+
- "redis://username:password@host:port/?ttl=300": Redis with auth
1361+
1362+
Example usage:
1363+
# Memory cache
1364+
client = Client(
1365+
"your-api-key",
1366+
flag_fallback_cache_url="memory://local/?ttl=300&size=10000"
1367+
)
1368+
1369+
# Redis cache
1370+
client = Client(
1371+
"your-api-key",
1372+
flag_fallback_cache_url="redis://localhost:6379/0/?ttl=300"
1373+
)
1374+
1375+
# Normal evaluation - cache is populated
1376+
flag_value = client.get_feature_flag("my-flag", "user123")
1377+
1378+
# During API outage - returns cached value instead of None
1379+
flag_value = client.get_feature_flag("my-flag", "user123") # Uses cache
1380+
"""
1381+
if not cache_url:
1382+
return None
1383+
1384+
try:
1385+
from urllib.parse import urlparse, parse_qs
1386+
except ImportError:
1387+
from urlparse import urlparse, parse_qs
1388+
1389+
try:
1390+
parsed = urlparse(cache_url)
1391+
scheme = parsed.scheme.lower()
1392+
query_params = parse_qs(parsed.query)
1393+
ttl = int(query_params.get("ttl", [300])[0])
1394+
1395+
if scheme == "memory":
1396+
size = int(query_params.get("size", [10000])[0])
1397+
return FlagCache(size, ttl)
1398+
1399+
elif scheme == "redis":
1400+
try:
1401+
# Not worth importing redis if we're not using it
1402+
import redis
1403+
1404+
redis_url = f"{parsed.scheme}://"
1405+
if parsed.username or parsed.password:
1406+
redis_url += f"{parsed.username or ''}:{parsed.password or ''}@"
1407+
redis_url += (
1408+
f"{parsed.hostname or 'localhost'}:{parsed.port or 6379}"
1409+
)
1410+
if parsed.path:
1411+
redis_url += parsed.path
1412+
1413+
client = redis.from_url(redis_url)
1414+
1415+
# Test connection before using it
1416+
client.ping()
1417+
1418+
return RedisFlagCache(client, default_ttl=ttl)
1419+
1420+
except ImportError:
1421+
self.log.warning(
1422+
"[FEATURE FLAGS] Redis not available, flag caching disabled"
1423+
)
1424+
return None
1425+
except Exception as e:
1426+
self.log.warning(
1427+
f"[FEATURE FLAGS] Redis connection failed: {e}, flag caching disabled"
1428+
)
1429+
return None
1430+
else:
1431+
raise ValueError(
1432+
f"Unknown cache URL scheme: {scheme}. Supported schemes: memory, redis"
1433+
)
1434+
1435+
except Exception as e:
1436+
self.log.warning(
1437+
f"[FEATURE FLAGS] Failed to parse cache URL '{cache_url}': {e}"
1438+
)
1439+
return None
1440+
12811441
def feature_flag_definitions(self):
12821442
return self.feature_flags
12831443

posthog/test/test_utils.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import time
12
import unittest
23
from dataclasses import dataclass
34
from datetime import date, datetime, timedelta
@@ -12,6 +13,7 @@
1213
from pydantic.v1 import BaseModel as BaseModelV1
1314

1415
from posthog import utils
16+
from posthog.types import FeatureFlagResult
1517

1618
TEST_API_KEY = "kOOlRy2QlMY9jHZQv0bKz0FZyazBUoY8Arj0lFVNjs4"
1719
FAKE_TEST_API_KEY = "random_key"
@@ -173,3 +175,124 @@ class TestDataClass:
173175
"inner_optional": None,
174176
},
175177
}
178+
179+
180+
class TestFlagCache(unittest.TestCase):
181+
def setUp(self):
182+
self.cache = utils.FlagCache(max_size=3, default_ttl=1)
183+
self.flag_result = FeatureFlagResult.from_value_and_payload(
184+
"test-flag", True, None
185+
)
186+
187+
def test_cache_basic_operations(self):
188+
distinct_id = "user123"
189+
flag_key = "test-flag"
190+
flag_version = 1
191+
192+
# Test cache miss
193+
result = self.cache.get_cached_flag(distinct_id, flag_key, flag_version)
194+
assert result is None
195+
196+
# Test cache set and hit
197+
self.cache.set_cached_flag(
198+
distinct_id, flag_key, self.flag_result, flag_version
199+
)
200+
result = self.cache.get_cached_flag(distinct_id, flag_key, flag_version)
201+
assert result is not None
202+
assert result.get_value()
203+
204+
def test_cache_ttl_expiration(self):
205+
distinct_id = "user123"
206+
flag_key = "test-flag"
207+
flag_version = 1
208+
209+
# Set flag in cache
210+
self.cache.set_cached_flag(
211+
distinct_id, flag_key, self.flag_result, flag_version
212+
)
213+
214+
# Should be available immediately
215+
result = self.cache.get_cached_flag(distinct_id, flag_key, flag_version)
216+
assert result is not None
217+
218+
# Wait for TTL to expire (1 second + buffer)
219+
time.sleep(1.1)
220+
221+
# Should be expired
222+
result = self.cache.get_cached_flag(distinct_id, flag_key, flag_version)
223+
assert result is None
224+
225+
def test_cache_version_invalidation(self):
226+
distinct_id = "user123"
227+
flag_key = "test-flag"
228+
old_version = 1
229+
new_version = 2
230+
231+
# Set flag with old version
232+
self.cache.set_cached_flag(distinct_id, flag_key, self.flag_result, old_version)
233+
234+
# Should hit with old version
235+
result = self.cache.get_cached_flag(distinct_id, flag_key, old_version)
236+
assert result is not None
237+
238+
# Should miss with new version
239+
result = self.cache.get_cached_flag(distinct_id, flag_key, new_version)
240+
assert result is None
241+
242+
# Invalidate old version
243+
self.cache.invalidate_version(old_version)
244+
245+
# Should miss even with old version after invalidation
246+
result = self.cache.get_cached_flag(distinct_id, flag_key, old_version)
247+
assert result is None
248+
249+
def test_stale_cache_functionality(self):
250+
distinct_id = "user123"
251+
flag_key = "test-flag"
252+
flag_version = 1
253+
254+
# Set flag in cache
255+
self.cache.set_cached_flag(
256+
distinct_id, flag_key, self.flag_result, flag_version
257+
)
258+
259+
# Wait for TTL to expire
260+
time.sleep(1.1)
261+
262+
# Should not get fresh cache
263+
result = self.cache.get_cached_flag(distinct_id, flag_key, flag_version)
264+
assert result is None
265+
266+
# Should get stale cache (within 1 hour default)
267+
stale_result = self.cache.get_stale_cached_flag(distinct_id, flag_key)
268+
assert stale_result is not None
269+
assert stale_result.get_value()
270+
271+
def test_lru_eviction(self):
272+
# Cache has max_size=3, so adding 4 users should evict the LRU one
273+
flag_version = 1
274+
275+
# Add 3 users
276+
for i in range(3):
277+
user_id = f"user{i}"
278+
self.cache.set_cached_flag(
279+
user_id, "test-flag", self.flag_result, flag_version
280+
)
281+
282+
# Access user0 to make it recently used
283+
self.cache.get_cached_flag("user0", "test-flag", flag_version)
284+
285+
# Add 4th user, should evict user1 (least recently used)
286+
self.cache.set_cached_flag("user3", "test-flag", self.flag_result, flag_version)
287+
288+
# user0 should still be there (was recently accessed)
289+
result = self.cache.get_cached_flag("user0", "test-flag", flag_version)
290+
assert result is not None
291+
292+
# user2 should still be there (was recently added)
293+
result = self.cache.get_cached_flag("user2", "test-flag", flag_version)
294+
assert result is not None
295+
296+
# user3 should be there (just added)
297+
result = self.cache.get_cached_flag("user3", "test-flag", flag_version)
298+
assert result is not None

0 commit comments

Comments
 (0)