Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,18 @@ def _collect_loop(self):
logger.info(" Cache enabled: %s", self.cache_enabled)
logger.info(" Cache TTL: %d seconds", self.cache_ttl)

def describe(self):
"""
Return an empty descriptor list.

prometheus_client's default Collector.describe() calls collect() to
discover metric names, which means REGISTRY.register() would trigger a
full (potentially very slow) DB collection at startup — blocking the
process for the duration of every query. Returning [] bypasses that
duplicate-name check and lets the server start immediately.
"""
return []

def collect(self):
"""
Collect metrics and yield Prometheus metrics.
Expand All @@ -133,9 +145,19 @@ def collect(self):
yield from self._metrics_cache
return
else:
logger.warning("Cache enabled but no cached metrics available, collecting fresh metrics")
# Cache is still warming up (background thread hasn't finished
# its first collection yet). Return empty rather than falling
# through to a synchronous full collection, which would:
# 1. Block the scrape for the full query duration.
# 2. Run concurrently with the background thread, doubling
# DB load on every scrape until the cache is warm.
logger.warning(
"Cache warm-up in progress; returning empty metrics until "
"background collection completes"
)
return

# Fall back to fresh collection if cache disabled or unavailable
# Cache disabled: collect fresh on every scrape.
logger.debug("Collecting fresh metrics")
yield from self._collect_all_metrics()

Expand Down Expand Up @@ -1715,13 +1737,16 @@ def collect_active_tool_users(self):

port = 8000

# Start the Prometheus exporter
collector = LibreChatMetricsCollector(mongodb_uri, cache_ttl=cache_ttl)
REGISTRY.register(collector)

# Start background collection thread if caching is enabled
# Start the background collection thread BEFORE registering with the
# Prometheus registry. This lets the cache begin warming immediately so
# the first scrape is more likely to be served from cache rather than
# returning empty metrics during the warm-up window.
collector._start_background_collection()

REGISTRY.register(collector)

logger.info("Starting server on port %i", port)

root = Resource()
Expand Down