Skip to content

Commit 8702cab

Browse files
authored
fix(llmobs): move listener hooks to enable instead of on init (#11889)
Follow up on #11781 to fix a weird duplicate span writing issue with the new listener hook logic. Since we were registering these hooks on `LLMObs.__init__()` which also happens at startup (as we create a default LLMObs() instance) as well as on `LLMObs.enable()`, we were double registering these hooks, and the default LLMObsSpanWriter was still saved and called each time the tracer finished a span. A symptom of this issue is that if a user was to manually enable agentless mode, they would see noisy logs indicating a failure to send spans to the agent proxy endpoint (which is the default writer mode) even though they also submitted spans to the agentless endpoint succesfully. This fix resolves the issue by moving the hook registering to `LLMObs.enable()`, and adding corresponding logic to deregister the hooks on `_stop_service()`. This way we should only ever have one set of hooks registered per process. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent 4beeccc commit 8702cab

File tree

3 files changed

+43
-3
lines changed

3 files changed

+43
-3
lines changed

ddtrace/llmobs/_llmobs.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,9 @@ def __init__(self, tracer=None):
111111
self._annotations = []
112112
self._annotation_context_lock = forksafe.RLock()
113113

114-
# Register hooks for span events
115-
core.on("trace.span_start", self._do_annotations)
116-
core.on("trace.span_finish", self._on_span_finish)
114+
def _on_span_start(self, span):
115+
if self.enabled and span.span_type == SpanTypes.LLM:
116+
self._do_annotations(span)
117117

118118
def _on_span_finish(self, span):
119119
if self.enabled and span.span_type == SpanTypes.LLM:
@@ -272,6 +272,10 @@ def _start_service(self) -> None:
272272
log.debug("Error starting evaluator runner")
273273

274274
def _stop_service(self) -> None:
275+
# Remove listener hooks for span events
276+
core.reset_listeners("trace.span_start", self._on_span_start)
277+
core.reset_listeners("trace.span_finish", self._on_span_finish)
278+
275279
try:
276280
self._evaluator_runner.stop()
277281
# flush remaining evaluation spans & evaluations
@@ -366,6 +370,10 @@ def enable(
366370
cls.enabled = True
367371
cls._instance.start()
368372

373+
# Register hooks for span events
374+
core.on("trace.span_start", cls._instance._on_span_start)
375+
core.on("trace.span_finish", cls._instance._on_span_finish)
376+
369377
atexit.register(cls.disable)
370378
telemetry_writer.product_activated(TELEMETRY_APM_PRODUCT.LLMOBS, True)
371379

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
LLM Observability: Resolves an issue where enabling LLM Observability in agentless mode would result in traces also being sent to the agent proxy endpoint.

tests/llmobs/test_llmobs_service.py

+28
Original file line numberDiff line numberDiff line change
@@ -1310,6 +1310,34 @@ def test_activate_distributed_headers_activates_context(llmobs, mock_llmobs_logs
13101310
mock_activate.assert_called_once_with(dummy_context)
13111311

13121312

1313+
def test_listener_hooks_enqueue_correct_writer(run_python_code_in_subprocess):
1314+
"""
1315+
Regression test that ensures that listener hooks enqueue span events to the correct writer,
1316+
not the default writer created at startup.
1317+
"""
1318+
env = os.environ.copy()
1319+
pypath = [os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))]
1320+
if "PYTHONPATH" in env:
1321+
pypath.append(env["PYTHONPATH"])
1322+
env.update({"PYTHONPATH": ":".join(pypath), "DD_TRACE_ENABLED": "0"})
1323+
out, err, status, pid = run_python_code_in_subprocess(
1324+
"""
1325+
from ddtrace.llmobs import LLMObs
1326+
1327+
LLMObs.enable(ml_app="repro-issue", agentless_enabled=True, api_key="foobar.baz", site="datad0g.com")
1328+
with LLMObs.agent("dummy"):
1329+
pass
1330+
""",
1331+
env=env,
1332+
)
1333+
assert status == 0, err
1334+
assert out == b""
1335+
agentless_writer_log = b"failed to send traces to intake at https://llmobs-intake.datad0g.com/api/v2/llmobs: HTTP error status 403, reason Forbidden\n" # noqa: E501
1336+
agent_proxy_log = b"failed to send, dropping 1 traces to intake at http://localhost:8126/evp_proxy/v2/api/v2/llmobs after 5 retries" # noqa: E501
1337+
assert err == agentless_writer_log
1338+
assert agent_proxy_log not in err
1339+
1340+
13131341
def test_llmobs_fork_recreates_and_restarts_span_writer():
13141342
"""Test that forking a process correctly recreates and restarts the LLMObsSpanWriter."""
13151343
with mock.patch("ddtrace.internal.writer.HTTPWriter._send_payload"):

0 commit comments

Comments
 (0)