fix(tracing): activate distributed headers once per request (#12250)

ZStriker19 · web-flow · commit 9a5fd14ea696 · 2025-04-04T13:31:38.000-04:00
Currently core will call _start_span for each django, flask, and certain other integrations span's generated. Basically an integration using `core.context_with_data` to generate spans. Each `_start_span` call calls `activate_distribute_headers`, which in turn call `HTTP_PROPAGATOR.extract`. This leads to around 7-10 unneeded calls on average of that method per request. The logic in [activate_distributed_headers](https://github.com/DataDog/dd-trace-py/blob/main/ddtrace/contrib/internal/trace_utils.py#L586-L594) is what's currently saving us from not re-activating the same context over and over again when each new span is generated. By adding an `activate_distributed_headers` param we can make sure to only activate the headers when necessary, reducing the calling of code that runs for no reason and improving performance. Something I learned while making this PR is that when `core.context_with_data()` is called, if we do `ctx.get_item` in say ` _start_span` and the item doesn't exist on the local context we look at the parent context, essentially going all the way up the tree of contexts till we either find a value or return `None`. With that being the case, we needed to use `core.get_local_item("activate_distributed_headers")` instead of the usual `ctx.get_item`, since the parent context will almost always have `activate_distribute_headers` set to `True`. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
diff --git a/ddtrace/_trace/trace_handlers.py b/ddtrace/_trace/trace_handlers.py
@@ -106,14 +106,15 @@ def _get_parameters_for_new_span_directly_from_context(ctx: core.ExecutionContex
 
 
 def _start_span(ctx: core.ExecutionContext, call_trace: bool = True, **kwargs) -> "Span":
+    activate_distributed_headers = ctx.get_local_item("activate_distributed_headers")
     span_kwargs = _get_parameters_for_new_span_directly_from_context(ctx)
     call_trace = ctx.get_item("call_trace", call_trace)
     tracer = ctx.get_item("tracer") or (ctx.get_item("middleware") or ctx["pin"]).tracer
-    distributed_headers_config = ctx.get_item("distributed_headers_config")
-    if distributed_headers_config:
+    integration_config = ctx.get_item("integration_config")
+    if integration_config and activate_distributed_headers:
         trace_utils.activate_distributed_headers(
             tracer,
-            int_config=distributed_headers_config,
+            int_config=integration_config,
             request_headers=ctx["distributed_headers"],
             override=ctx.get_item("distributed_headers_config_override"),
         )
@@ -123,7 +124,7 @@ def _start_span(ctx: core.ExecutionContext, call_trace: bool = True, **kwargs) -
 
     if config._inferred_proxy_services_enabled:
         # dispatch event for checking headers and possibly making an inferred proxy span
-        core.dispatch("inferred_proxy.start", (ctx, tracer, span_kwargs, call_trace, distributed_headers_config))
+        core.dispatch("inferred_proxy.start", (ctx, tracer, span_kwargs, call_trace, integration_config))
         # re-get span_kwargs in case an inferred span was created and we have a new span_kwargs.child_of field
         span_kwargs = ctx.get_item("span_kwargs", span_kwargs)
 
@@ -197,7 +198,7 @@ def _set_inferred_proxy_tags(span, status_code):
                 inferred_span.set_tag(ERROR_STACK, span.get_tag(ERROR_STACK))
 
 
-def _on_inferred_proxy_start(ctx, tracer, span_kwargs, call_trace, distributed_headers_config):
+def _on_inferred_proxy_start(ctx, tracer, span_kwargs, call_trace, integration_config):
     # Skip creating another inferred span if one has already been created for this request
     if ctx.get_item("inferred_proxy_span"):
         return
@@ -207,7 +208,7 @@ def _on_inferred_proxy_start(ctx, tracer, span_kwargs, call_trace, distributed_h
     headers = ctx.get_item("headers", ctx.get_item("distributed_headers", None))
 
     # Inferred Proxy Spans
-    if distributed_headers_config and headers is not None:
+    if integration_config and headers is not None:
         create_inferred_proxy_span_if_headers_exist(
             ctx,
             headers=headers,
@@ -499,7 +500,7 @@ def _on_django_finalize_response_pre(ctx, after_request_tags, request, response)
     span = ctx.span
     after_request_tags(ctx["pin"], span, request, response)
 
-    trace_utils.set_http_meta(span, ctx["distributed_headers_config"], route=span.get_tag("http.route"))
+    trace_utils.set_http_meta(span, ctx["integration_config"], route=span.get_tag("http.route"))
     _set_inferred_proxy_tags(span, None)
 
 
@@ -512,7 +513,7 @@ def _on_django_start_response(
 
     trace_utils.set_http_meta(
         ctx.span,
-        ctx["distributed_headers_config"],
+        ctx["integration_config"],
         method=request.method,
         query=query,
         raw_uri=uri,
diff --git a/ddtrace/contrib/internal/aiohttp/middlewares.py b/ddtrace/contrib/internal/aiohttp/middlewares.py
@@ -41,7 +41,8 @@ async def attach_context(request):
             tags={},
             tracer=tracer,
             distributed_headers=request.headers,
-            distributed_headers_config=config.aiohttp,
+            integration_config=config.aiohttp,
+            activate_distributed_headers=True,
             distributed_headers_config_override=app[CONFIG_KEY]["distributed_tracing_enabled"],
             headers_case_sensitive=True,
             analytics_enabled=analytics_enabled,
diff --git a/ddtrace/contrib/internal/asgi/middleware.py b/ddtrace/contrib/internal/asgi/middleware.py
@@ -164,8 +164,9 @@ async def __call__(self, scope, receive, send):
             resource=resource,
             span_type=SpanTypes.WEB,
             service=trace_utils.int_service(None, self.integration_config),
-            distributed_headers_config=config.asgi,
             distributed_headers=headers,
+            integration_config=config.asgi,
+            activate_distributed_headers=True,
             pin=pin,
         ) as ctx, ctx.span as span:
             span.set_tag_str(COMPONENT, self.integration_config.integration_name)
diff --git a/ddtrace/contrib/internal/bottle/trace.py b/ddtrace/contrib/internal/bottle/trace.py
@@ -46,9 +46,10 @@ def wrapped(*args, **kwargs):
                 tags={},
                 tracer=self.tracer,
                 distributed_headers=request.headers,
-                distributed_headers_config=config.bottle,
+                integration_config=config.bottle,
                 headers_case_sensitive=True,
                 analytics_sample_rate=config.bottle.get_analytics_sample_rate(use_global_config=True),
+                activate_distributed_headers=True,
             ) as ctx, ctx.span as req_span:
                 ctx.set_item("req_span", req_span)
                 core.dispatch("web.request.start", (ctx, config.bottle))
diff --git a/ddtrace/contrib/internal/cherrypy/patch.py b/ddtrace/contrib/internal/cherrypy/patch.py
@@ -83,7 +83,8 @@ def _on_start_resource(self):
             tags={},
             tracer=self._tracer,
             distributed_headers=cherrypy.request.headers,
-            distributed_headers_config=config.cherrypy,
+            integration_config=config.cherrypy,
+            activate_distributed_headers=True,
             headers_case_sensitive=True,
         ) as ctx:
             req_span = ctx.span
diff --git a/ddtrace/contrib/internal/django/patch.py b/ddtrace/contrib/internal/django/patch.py
@@ -482,8 +482,9 @@ def traced_get_response(django, pin, func, instance, args, kwargs):
         service=trace_utils.int_service(pin, config.django),
         span_type=SpanTypes.WEB,
         tags={COMPONENT: config.django.integration_name, SPAN_KIND: SpanKind.SERVER},
-        distributed_headers_config=config.django,
+        integration_config=config.django,
         distributed_headers=request_headers,
+        activate_distributed_headers=True,
         pin=pin,
     ) as ctx, ctx.span:
         core.dispatch(
diff --git a/ddtrace/contrib/internal/falcon/middleware.py b/ddtrace/contrib/internal/falcon/middleware.py
@@ -30,7 +30,8 @@ def process_request(self, req, resp):
             tags={},
             tracer=self.tracer,
             distributed_headers=headers,
-            distributed_headers_config=config.falcon,
+            integration_config=config.falcon,
+            activate_distributed_headers=True,
             headers_case_sensitive=True,
             analytics_sample_rate=config.falcon.get_analytics_sample_rate(use_global_config=True),
         ) as ctx:
diff --git a/ddtrace/contrib/internal/molten/patch.py b/ddtrace/contrib/internal/molten/patch.py
@@ -94,7 +94,8 @@ def patch_app_call(wrapped, instance, args, kwargs):
         tags={},
         tracer=pin.tracer,
         distributed_headers=dict(request.headers),  # request.headers is type Iterable[Tuple[str, str]]
-        distributed_headers_config=config.molten,
+        integration_config=config.molten,
+        activate_distributed_headers=True,
         headers_case_sensitive=True,
         analytics_sample_rate=config.molten.get_analytics_sample_rate(use_global_config=True),
     ) as ctx, ctx.span as req_span:
diff --git a/ddtrace/contrib/internal/pyramid/trace.py b/ddtrace/contrib/internal/pyramid/trace.py
@@ -79,7 +79,8 @@ def trace_tween(request):
                 tags={},
                 tracer=tracer,
                 distributed_headers=request.headers,
-                distributed_headers_config=config.pyramid,
+                integration_config=config.pyramid,
+                activate_distributed_headers=True,
                 headers_case_sensitive=True,
                 # DEV: pyramid is special case maintains separate configuration from config api
                 analytics_enabled=settings.get(SETTINGS_ANALYTICS_ENABLED),
diff --git a/ddtrace/contrib/internal/rq/patch.py b/ddtrace/contrib/internal/rq/patch.py
@@ -110,7 +110,7 @@ def traced_perform_job(rq, pin, func, instance, args, kwargs):
             pin=pin,
             span_type=SpanTypes.WORKER,
             resource=job.func_name,
-            distributed_headers_config=config.rq_worker,
+            integration_config=config.rq_worker,
             distributed_headers=job.meta,
             tags={COMPONENT: config.rq.integration_name, SPAN_KIND: SpanKind.CONSUMER, JOB_ID: job.get_id()},
         ) as ctx, ctx.span:
diff --git a/ddtrace/contrib/internal/sanic/patch.py b/ddtrace/contrib/internal/sanic/patch.py
@@ -208,7 +208,8 @@ def _create_sanic_request_span(request):
         tags={},
         pin=pin,
         distributed_headers=headers,
-        distributed_headers_config=config.sanic,
+        integration_config=config.sanic,
+        activate_distributed_headers=True,
         headers_case_sensitive=True,
         analytics_sample_rate=config.sanic.get_analytics_sample_rate(use_global_config=True),
     ) as ctx:
diff --git a/ddtrace/contrib/internal/tornado/handlers.py b/ddtrace/contrib/internal/tornado/handlers.py
@@ -36,7 +36,8 @@ def execute(func, handler, args, kwargs):
             tags={},
             tracer=tracer,
             distributed_headers=handler.request.headers,
-            distributed_headers_config=config.tornado,
+            integration_config=config.tornado,
+            activate_distributed_headers=True,
             distributed_headers_config_override=distributed_tracing,
             headers_case_sensitive=True,
             # DEV: tornado is special case maintains separate configuration from config api
diff --git a/ddtrace/contrib/internal/wsgi/wsgi.py b/ddtrace/contrib/internal/wsgi/wsgi.py
@@ -104,11 +104,12 @@ def __call__(self, environ: Iterable, start_response: Callable) -> wrapt.ObjectP
             span_type=SpanTypes.WEB,
             span_name=(self._request_call_name if hasattr(self, "_request_call_name") else self._request_span_name),
             middleware_config=self._config,
-            distributed_headers_config=self._config,
+            integration_config=self._config,
             distributed_headers=environ,
             environ=environ,
             middleware=self,
             span_key="req_span",
+            activate_distributed_headers=True,
         ) as ctx:
             ctx.set_item("wsgi.construct_url", construct_url)
 
diff --git a/releasenotes/notes/call_extract_once_per_request-620e1680c6fa6a85.yaml b/releasenotes/notes/call_extract_once_per_request-620e1680c6fa6a85.yaml
@@ -0,0 +1,7 @@
+---
+fixes:
+  - |
+    tracing: This performance fix resolves an issue where Django, Flask, Cherrypy, wsgi, 
+    asgi, pyramid, molten, falcon, tornado, aiohttp, bottle, rq, and sanic integrations
+    were unnecessarily running code to activate distributed tracing headers multiple times per request.
+    This is fixed by only activating distributed tracing headers once per request.