Skip to content

Commit d676233

Browse files
fix(llmobs): replace trace processor with event listener (#11781)
The LLMObs service formerly depended on the TraceProcessor interface in the tracer. This was problematic due to sharing a dependency with the public API. As such, users could configure a trace filter (under the hood is a trace processor) and overwrite the LLMObs TraceProcessor. Instead, the tracer can emit span start and finish events which the LLMObs service listens to and acts on, as proposed here. The gotcha is that the LLMObs service no longer has a way to drop traces when run in agentless mode, which only LLMObs supports. Instead, we encourage users to explicitly turn off APM which carries the benefit of clarity since this was implicit before. Co-authored-by: Yun Kim <[email protected]>
1 parent bfa3b82 commit d676233

15 files changed

+798
-1079
lines changed

.riot/requirements/16562eb.txt

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#
2+
# This file is autogenerated by pip-compile with Python 3.7
3+
# by the following command:
4+
#
5+
# pip-compile --allow-unsafe --config=pyproject.toml --no-annotate --resolver=backtracking .riot/requirements/16562eb.in
6+
#
7+
attrs==24.2.0
8+
coverage[toml]==7.2.7
9+
exceptiongroup==1.2.2
10+
hypothesis==6.45.0
11+
idna==3.10
12+
importlib-metadata==6.7.0
13+
iniconfig==2.0.0
14+
mock==5.1.0
15+
multidict==6.0.5
16+
opentracing==2.4.0
17+
packaging==24.0
18+
pluggy==1.2.0
19+
pytest==7.4.4
20+
pytest-asyncio==0.21.1
21+
pytest-cov==4.1.0
22+
pytest-mock==3.11.1
23+
pyyaml==6.0.1
24+
six==1.17.0
25+
sortedcontainers==2.4.0
26+
tomli==2.0.1
27+
typing-extensions==4.7.1
28+
urllib3==1.26.20
29+
vcrpy==4.4.0
30+
wrapt==1.16.0
31+
yarl==1.9.4
32+
zipp==3.15.0

ddtrace/_trace/tracer.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from ddtrace.internal.atexit import register_on_exit_signal
4242
from ddtrace.internal.constants import SAMPLING_DECISION_TRACE_TAG_KEY
4343
from ddtrace.internal.constants import SPAN_API_DATADOG
44+
from ddtrace.internal.core import dispatch
4445
from ddtrace.internal.dogstatsd import get_dogstatsd_client
4546
from ddtrace.internal.logger import get_logger
4647
from ddtrace.internal.peer_service.processor import PeerServiceProcessor
@@ -849,7 +850,7 @@ def _start_span(
849850
for p in chain(self._span_processors, SpanProcessor.__processors__, self._deferred_processors):
850851
p.on_span_start(span)
851852
self._hooks.emit(self.__class__.start_span, span)
852-
853+
dispatch("trace.span_start", (span,))
853854
return span
854855

855856
start_span = _start_span
@@ -866,6 +867,8 @@ def _on_span_finish(self, span: Span) -> None:
866867
for p in chain(self._span_processors, SpanProcessor.__processors__, self._deferred_processors):
867868
p.on_span_finish(span)
868869

870+
dispatch("trace.span_finish", (span,))
871+
869872
if log.isEnabledFor(logging.DEBUG):
870873
log.debug("finishing span %s (enabled:%s)", span._pprint(), self.enabled)
871874

ddtrace/llmobs/_llmobs.py

+140-21
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,22 @@
33
import time
44
from typing import Any
55
from typing import Dict
6+
from typing import List
67
from typing import Optional
8+
from typing import Tuple
79
from typing import Union
810

911
import ddtrace
1012
from ddtrace import Span
1113
from ddtrace import config
1214
from ddtrace import patch
1315
from ddtrace._trace.context import Context
16+
from ddtrace.constants import ERROR_MSG
17+
from ddtrace.constants import ERROR_STACK
18+
from ddtrace.constants import ERROR_TYPE
1419
from ddtrace.ext import SpanTypes
1520
from ddtrace.internal import atexit
21+
from ddtrace.internal import core
1622
from ddtrace.internal import forksafe
1723
from ddtrace.internal._rand import rand64bits
1824
from ddtrace.internal.compat import ensure_text
@@ -24,6 +30,7 @@
2430
from ddtrace.internal.telemetry.constants import TELEMETRY_APM_PRODUCT
2531
from ddtrace.internal.utils.formats import asbool
2632
from ddtrace.internal.utils.formats import parse_tags_str
33+
from ddtrace.llmobs import _constants as constants
2734
from ddtrace.llmobs._constants import ANNOTATIONS_CONTEXT_ID
2835
from ddtrace.llmobs._constants import INPUT_DOCUMENTS
2936
from ddtrace.llmobs._constants import INPUT_MESSAGES
@@ -45,11 +52,11 @@
4552
from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING
4653
from ddtrace.llmobs._constants import TAGS
4754
from ddtrace.llmobs._evaluators.runner import EvaluatorRunner
48-
from ddtrace.llmobs._trace_processor import LLMObsTraceProcessor
4955
from ddtrace.llmobs._utils import AnnotationContext
5056
from ddtrace.llmobs._utils import _get_llmobs_parent_id
5157
from ddtrace.llmobs._utils import _get_ml_app
5258
from ddtrace.llmobs._utils import _get_session_id
59+
from ddtrace.llmobs._utils import _get_span_name
5360
from ddtrace.llmobs._utils import _inject_llmobs_parent_id
5461
from ddtrace.llmobs._utils import safe_json
5562
from ddtrace.llmobs._utils import validate_prompt
@@ -81,34 +88,157 @@ class LLMObs(Service):
8188
def __init__(self, tracer=None):
8289
super(LLMObs, self).__init__()
8390
self.tracer = tracer or ddtrace.tracer
84-
self._llmobs_span_writer = None
85-
8691
self._llmobs_span_writer = LLMObsSpanWriter(
8792
is_agentless=config._llmobs_agentless_enabled,
8893
interval=float(os.getenv("_DD_LLMOBS_WRITER_INTERVAL", 1.0)),
8994
timeout=float(os.getenv("_DD_LLMOBS_WRITER_TIMEOUT", 5.0)),
9095
)
91-
9296
self._llmobs_eval_metric_writer = LLMObsEvalMetricWriter(
9397
site=config._dd_site,
9498
api_key=config._dd_api_key,
9599
interval=float(os.getenv("_DD_LLMOBS_WRITER_INTERVAL", 1.0)),
96100
timeout=float(os.getenv("_DD_LLMOBS_WRITER_TIMEOUT", 5.0)),
97101
)
98-
99102
self._evaluator_runner = EvaluatorRunner(
100103
interval=float(os.getenv("_DD_LLMOBS_EVALUATOR_INTERVAL", 1.0)),
101104
llmobs_service=self,
102105
)
103106

104-
self._trace_processor = LLMObsTraceProcessor(self._llmobs_span_writer, self._evaluator_runner)
105107
forksafe.register(self._child_after_fork)
106108

107109
self._annotations = []
108110
self._annotation_context_lock = forksafe.RLock()
109-
self.tracer.on_start_span(self._do_annotations)
110111

111-
def _do_annotations(self, span):
112+
# Register hooks for span events
113+
core.on("trace.span_start", self._do_annotations)
114+
core.on("trace.span_finish", self._on_span_finish)
115+
116+
def _on_span_finish(self, span):
117+
if self.enabled and span.span_type == SpanTypes.LLM:
118+
self._submit_llmobs_span(span)
119+
120+
def _submit_llmobs_span(self, span: Span) -> None:
121+
"""Generate and submit an LLMObs span event to be sent to LLMObs."""
122+
span_event = None
123+
is_llm_span = span._get_ctx_item(SPAN_KIND) == "llm"
124+
is_ragas_integration_span = False
125+
try:
126+
span_event, is_ragas_integration_span = self._llmobs_span_event(span)
127+
self._llmobs_span_writer.enqueue(span_event)
128+
except (KeyError, TypeError):
129+
log.error(
130+
"Error generating LLMObs span event for span %s, likely due to malformed span", span, exc_info=True
131+
)
132+
finally:
133+
if not span_event or not is_llm_span or is_ragas_integration_span:
134+
return
135+
if self._evaluator_runner:
136+
self._evaluator_runner.enqueue(span_event, span)
137+
138+
@classmethod
139+
def _llmobs_span_event(cls, span: Span) -> Tuple[Dict[str, Any], bool]:
140+
"""Span event object structure."""
141+
span_kind = span._get_ctx_item(SPAN_KIND)
142+
if not span_kind:
143+
raise KeyError("Span kind not found in span context")
144+
meta: Dict[str, Any] = {"span.kind": span_kind, "input": {}, "output": {}}
145+
if span_kind in ("llm", "embedding") and span._get_ctx_item(MODEL_NAME) is not None:
146+
meta["model_name"] = span._get_ctx_item(MODEL_NAME)
147+
meta["model_provider"] = (span._get_ctx_item(MODEL_PROVIDER) or "custom").lower()
148+
meta["metadata"] = span._get_ctx_item(METADATA) or {}
149+
if span._get_ctx_item(INPUT_PARAMETERS):
150+
meta["input"]["parameters"] = span._get_ctx_item(INPUT_PARAMETERS)
151+
if span_kind == "llm" and span._get_ctx_item(INPUT_MESSAGES) is not None:
152+
meta["input"]["messages"] = span._get_ctx_item(INPUT_MESSAGES)
153+
if span._get_ctx_item(INPUT_VALUE) is not None:
154+
meta["input"]["value"] = safe_json(span._get_ctx_item(INPUT_VALUE))
155+
if span_kind == "llm" and span._get_ctx_item(OUTPUT_MESSAGES) is not None:
156+
meta["output"]["messages"] = span._get_ctx_item(OUTPUT_MESSAGES)
157+
if span_kind == "embedding" and span._get_ctx_item(INPUT_DOCUMENTS) is not None:
158+
meta["input"]["documents"] = span._get_ctx_item(INPUT_DOCUMENTS)
159+
if span._get_ctx_item(OUTPUT_VALUE) is not None:
160+
meta["output"]["value"] = safe_json(span._get_ctx_item(OUTPUT_VALUE))
161+
if span_kind == "retrieval" and span._get_ctx_item(OUTPUT_DOCUMENTS) is not None:
162+
meta["output"]["documents"] = span._get_ctx_item(OUTPUT_DOCUMENTS)
163+
if span._get_ctx_item(INPUT_PROMPT) is not None:
164+
prompt_json_str = span._get_ctx_item(INPUT_PROMPT)
165+
if span_kind != "llm":
166+
log.warning(
167+
"Dropping prompt on non-LLM span kind, annotating prompts is only supported for LLM span kinds."
168+
)
169+
else:
170+
meta["input"]["prompt"] = prompt_json_str
171+
if span.error:
172+
meta.update(
173+
{
174+
ERROR_MSG: span.get_tag(ERROR_MSG),
175+
ERROR_STACK: span.get_tag(ERROR_STACK),
176+
ERROR_TYPE: span.get_tag(ERROR_TYPE),
177+
}
178+
)
179+
if not meta["input"]:
180+
meta.pop("input")
181+
if not meta["output"]:
182+
meta.pop("output")
183+
metrics = span._get_ctx_item(METRICS) or {}
184+
ml_app = _get_ml_app(span)
185+
186+
is_ragas_integration_span = False
187+
188+
if ml_app.startswith(constants.RAGAS_ML_APP_PREFIX):
189+
is_ragas_integration_span = True
190+
191+
span._set_ctx_item(ML_APP, ml_app)
192+
parent_id = str(_get_llmobs_parent_id(span) or "undefined")
193+
194+
llmobs_span_event = {
195+
"trace_id": "{:x}".format(span.trace_id),
196+
"span_id": str(span.span_id),
197+
"parent_id": parent_id,
198+
"name": _get_span_name(span),
199+
"start_ns": span.start_ns,
200+
"duration": span.duration_ns,
201+
"status": "error" if span.error else "ok",
202+
"meta": meta,
203+
"metrics": metrics,
204+
}
205+
session_id = _get_session_id(span)
206+
if session_id is not None:
207+
span._set_ctx_item(SESSION_ID, session_id)
208+
llmobs_span_event["session_id"] = session_id
209+
210+
llmobs_span_event["tags"] = cls._llmobs_tags(
211+
span, ml_app, session_id, is_ragas_integration_span=is_ragas_integration_span
212+
)
213+
return llmobs_span_event, is_ragas_integration_span
214+
215+
@staticmethod
216+
def _llmobs_tags(
217+
span: Span, ml_app: str, session_id: Optional[str] = None, is_ragas_integration_span: bool = False
218+
) -> List[str]:
219+
tags = {
220+
"version": config.version or "",
221+
"env": config.env or "",
222+
"service": span.service or "",
223+
"source": "integration",
224+
"ml_app": ml_app,
225+
"ddtrace.version": ddtrace.__version__,
226+
"language": "python",
227+
"error": span.error,
228+
}
229+
err_type = span.get_tag(ERROR_TYPE)
230+
if err_type:
231+
tags["error_type"] = err_type
232+
if session_id:
233+
tags["session_id"] = session_id
234+
if is_ragas_integration_span:
235+
tags[constants.RUNNER_IS_INTEGRATION_SPAN_TAG] = "ragas"
236+
existing_tags = span._get_ctx_item(TAGS)
237+
if existing_tags is not None:
238+
tags.update(existing_tags)
239+
return ["{}:{}".format(k, v) for k, v in tags.items()]
240+
241+
def _do_annotations(self, span: Span) -> None:
112242
# get the current span context
113243
# only do the annotations if it matches the context
114244
if span.span_type != SpanTypes.LLM: # do this check to avoid the warning log in `annotate`
@@ -120,20 +250,14 @@ def _do_annotations(self, span):
120250
if current_context_id == context_id:
121251
self.annotate(span, **annotation_kwargs)
122252

123-
def _child_after_fork(self):
253+
def _child_after_fork(self) -> None:
124254
self._llmobs_span_writer = self._llmobs_span_writer.recreate()
125255
self._llmobs_eval_metric_writer = self._llmobs_eval_metric_writer.recreate()
126256
self._evaluator_runner = self._evaluator_runner.recreate()
127-
self._trace_processor._span_writer = self._llmobs_span_writer
128-
self._trace_processor._evaluator_runner = self._evaluator_runner
129257
if self.enabled:
130258
self._start_service()
131259

132260
def _start_service(self) -> None:
133-
tracer_filters = self.tracer._filters
134-
if not any(isinstance(tracer_filter, LLMObsTraceProcessor) for tracer_filter in tracer_filters):
135-
tracer_filters += [self._trace_processor]
136-
self.tracer.configure(settings={"FILTERS": tracer_filters})
137261
try:
138262
self._llmobs_span_writer.start()
139263
self._llmobs_eval_metric_writer.start()
@@ -160,11 +284,7 @@ def _stop_service(self) -> None:
160284
except ServiceStatusError:
161285
log.debug("Error stopping LLMObs writers")
162286

163-
try:
164-
forksafe.unregister(self._child_after_fork)
165-
self.tracer.shutdown()
166-
except Exception:
167-
log.warning("Failed to shutdown tracer", exc_info=True)
287+
forksafe.unregister(self._child_after_fork)
168288

169289
@classmethod
170290
def enable(
@@ -265,7 +385,6 @@ def disable(cls) -> None:
265385

266386
cls._instance.stop()
267387
cls.enabled = False
268-
cls._instance.tracer.deregister_on_start_span(cls._instance._do_annotations)
269388
telemetry_writer.product_activated(TELEMETRY_APM_PRODUCT.LLMOBS, False)
270389

271390
log.debug("%s disabled", cls.__name__)

0 commit comments

Comments
 (0)