Skip to content

Commit e7ebd87

Browse files
authored
feat(apm): add referrer hostname as span tag (#12778)
Parses referrer hostname from http header when presents and adds it to span tags as `http.referrer_hostname`. This will be used to identify frontend clients without leaking the PII which can be contained in the entire referrer. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent 7955b92 commit e7ebd87

File tree

4 files changed

+73
-0
lines changed

4 files changed

+73
-0
lines changed

ddtrace/contrib/internal/trace_utils.py

+29
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,30 @@ def _get_request_header_user_agent(headers, headers_are_case_sensitive=False):
172172
return ""
173173

174174

175+
def _get_request_header_referrer_host(headers, headers_are_case_sensitive=False):
176+
# type: (Mapping[str, str], bool) -> str
177+
"""Get referrer host from request headers
178+
:param headers: A dict of http headers to be stored in the span
179+
:type headers: dict or list
180+
:param headers_are_case_sensitive: Whether the headers are case sensitive
181+
:type headers_are_case_sensitive: bool
182+
:return: The referrer host if found, empty string otherwise
183+
:rtype: str
184+
"""
185+
if headers_are_case_sensitive:
186+
referrer = _get_header_value_case_insensitive(headers, http.REFERER_HEADER)
187+
else:
188+
referrer = headers.get(http.REFERER_HEADER)
189+
if referrer:
190+
try:
191+
parsed_url = parse.urlparse(referrer)
192+
if parsed_url.hostname:
193+
return parsed_url.hostname
194+
except (ValueError, AttributeError):
195+
return ""
196+
return ""
197+
198+
175199
def _get_request_header_client_ip(headers, peer_ip=None, headers_are_case_sensitive=False):
176200
# type: (Optional[Mapping[str, str]], Optional[str], bool) -> str
177201

@@ -501,6 +525,11 @@ def set_http_meta(
501525
if user_agent:
502526
span.set_tag_str(http.USER_AGENT, user_agent)
503527

528+
# Extract referrer host if referer header is present
529+
referrer_host = _get_request_header_referrer_host(request_headers, headers_are_case_sensitive)
530+
if referrer_host:
531+
span.set_tag_str(http.REFERRER_HOSTNAME, referrer_host)
532+
504533
# We always collect the IP if appsec is enabled to report it on potential vulnerabilities.
505534
# https://datadoghq.atlassian.net/wiki/spaces/APS/pages/2118779066/Client+IP+addresses+resolution
506535
if asm_config._asm_enabled or config._retrieve_client_ip:

ddtrace/ext/http.py

+4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
VERSION = "http.version"
1818
CLIENT_IP = "http.client_ip"
1919
ROUTE = "http.route"
20+
REFERRER_HOSTNAME = "http.referrer_hostname"
21+
22+
# HTTP headers
23+
REFERER_HEADER = "referer"
2024

2125
# template render span type
2226
TEMPLATE = "template"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
---
2+
features:
3+
- tracer: extracts the referrer hostname from HTTP requests and stored it as `http.referrer_hostname` tag.

tests/tracer/test_trace_utils.py

+37
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,43 @@ def test_whitelist_case_insensitive(self, span, integration_config):
252252
)
253253
assert span.get_tag("http.response.headers.content-type") == "some;value"
254254

255+
@pytest.mark.parametrize(
256+
"headers,expected_hostname,case_sensitive",
257+
[
258+
(
259+
{"referer": "https://example.com/path?query=1"},
260+
"example.com",
261+
False,
262+
),
263+
(
264+
{"referer": "https://example.com:8080/path?query=1"},
265+
"example.com",
266+
False,
267+
),
268+
(
269+
{"Referer": "https://example.com/path?query=1"},
270+
"example.com",
271+
True,
272+
),
273+
(
274+
{"other-header": "value"},
275+
None,
276+
False,
277+
),
278+
(
279+
{"referer": "not-a-valid-url"},
280+
None,
281+
False,
282+
),
283+
],
284+
)
285+
def test_referrer_hostname_extraction(self, span, integration_config, headers, expected_hostname, case_sensitive):
286+
"""Test that referrer hostname is correctly extracted from referer header"""
287+
trace_utils.set_http_meta(
288+
span, integration_config, request_headers=headers, headers_are_case_sensitive=case_sensitive
289+
)
290+
assert span.get_tag("http.referrer_hostname") == expected_hostname
291+
255292

256293
@pytest.mark.parametrize(
257294
"pin,config_val,default,global_service,expected",

0 commit comments

Comments
 (0)