-
Notifications
You must be signed in to change notification settings - Fork 457
feat(debugger): add agent state check to uploader #14653
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
P403n1x87
wants to merge
6
commits into
main
Choose a base branch
from
chore/debugger-agent-check-uploader
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
8bc5007
chore(debugger): add agent state check to uploader
P403n1x87 333422c
add release note
P403n1x87 eb3e678
Update ddtrace/debugging/_uploader.py
P403n1x87 054a50e
fix warning
P403n1x87 771c395
chore(debugger): rate-limit agent warning logs to once an hour (#14655)
tylfin be76628
Merge branch 'main' into chore/debugger-agent-check-uploader
tylfin File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,6 @@ | ||
from dataclasses import dataclass | ||
from enum import Enum | ||
from typing import Any | ||
from typing import Dict | ||
from typing import Optional | ||
from typing import Set | ||
from urllib.parse import quote | ||
|
@@ -14,14 +13,17 @@ | |
from ddtrace.debugging._signal.collector import SignalCollector | ||
from ddtrace.debugging._signal.model import SignalTrack | ||
from ddtrace.internal import agent | ||
from ddtrace.internal import logger | ||
from ddtrace.internal.logger import get_logger | ||
from ddtrace.internal.periodic import ForksafeAwakeablePeriodicService | ||
from ddtrace.internal.utils.http import connector | ||
from ddtrace.internal.utils.retry import fibonacci_backoff_with_jitter | ||
from ddtrace.internal.utils.time import HourGlass | ||
|
||
|
||
log = get_logger(__name__) | ||
UNSUPPORTED_AGENT = "unsupported_agent" | ||
logger.set_tag_rate_limit(UNSUPPORTED_AGENT, logger.HOUR) | ||
|
||
|
||
meter = metrics.get_meter("uploader") | ||
|
||
|
||
|
@@ -37,16 +39,23 @@ class UploaderProduct(str, Enum): | |
class UploaderTrack: | ||
endpoint: str | ||
queue: SignalQueue | ||
enabled: bool = True | ||
|
||
|
||
class SignalUploaderError(Exception): | ||
"""Signal uploader error.""" | ||
|
||
pass | ||
|
||
|
||
class LogsIntakeUploaderV1(ForksafeAwakeablePeriodicService): | ||
"""Logs intake uploader. | ||
class SignalUploader(agent.AgentCheckPeriodicService): | ||
"""Signal uploader. | ||
|
||
This class implements an interface with the debugger logs intake for both | ||
This class implements an interface with the debugger signal intake for both | ||
the debugger and the events platform. | ||
""" | ||
|
||
_instance: Optional["LogsIntakeUploaderV1"] = None | ||
_instance: Optional["SignalUploader"] = None | ||
_products: Set[UploaderProduct] = set() | ||
_agent_endpoints: Set[str] = set() | ||
|
||
|
@@ -58,10 +67,23 @@ class LogsIntakeUploaderV1(ForksafeAwakeablePeriodicService): | |
def __init__(self, interval: Optional[float] = None) -> None: | ||
super().__init__(interval if interval is not None else di_config.upload_interval_seconds) | ||
|
||
self._agent_endpoints_cache: HourGlass = HourGlass(duration=60.0) | ||
self._endpoint_suffix = endpoint_suffix = ( | ||
f"?ddtags={quote(di_config.tags)}" if di_config._tags_in_qs and di_config.tags else "" | ||
) | ||
|
||
self._tracks: Dict[SignalTrack, UploaderTrack] = {} | ||
self.set_track_endpoints() | ||
self._tracks = { | ||
SignalTrack.LOGS: UploaderTrack( | ||
endpoint=f"/debugger/v1/input{endpoint_suffix}", | ||
queue=self.__queue__( | ||
encoder=LogSignalJsonEncoder(di_config.service_name), on_full=self._on_buffer_full | ||
), | ||
), | ||
SignalTrack.SNAPSHOT: UploaderTrack( | ||
endpoint=f"/debugger/v2/input{endpoint_suffix}", # start optimistically | ||
queue=self.__queue__(encoder=SnapshotJsonEncoder(di_config.service_name), on_full=self._on_buffer_full), | ||
), | ||
} | ||
self._collector = self.__collector__({t: ut.queue for t, ut in self._tracks.items()}) | ||
self._headers = { | ||
"Content-type": "application/json; charset=utf-8", | ||
"Accept": "text/plain", | ||
|
@@ -76,64 +98,67 @@ def __init__(self, interval: Optional[float] = None) -> None: | |
)(self._write) | ||
|
||
log.debug( | ||
"Logs intake uploader initialized (url: %s, endpoints: %s, interval: %f)", | ||
"Signal uploader initialized (url: %s, endpoints: %s, interval: %f)", | ||
di_config._intake_url, | ||
{t: ut.endpoint for t, ut in self._tracks.items()}, | ||
self.interval, | ||
) | ||
|
||
self._flush_full = False | ||
|
||
def set_track_endpoints(self) -> None: | ||
if self._agent_endpoints_cache.trickling(): | ||
return | ||
|
||
try: | ||
agent_info = agent.info() | ||
self._agent_endpoints = set(agent_info.get("endpoints", [])) if agent_info is not None else set() | ||
except Exception: | ||
pass # nosec B110 | ||
finally: | ||
self._agent_endpoints_cache.turn() | ||
|
||
snapshot_track = "/debugger/v1/input" | ||
if "/debugger/v2/input" in self._agent_endpoints: | ||
snapshot_track = "/debugger/v2/input" | ||
elif "/debugger/v1/diagnostics" in self._agent_endpoints: | ||
snapshot_track = "/debugger/v1/diagnostics" | ||
|
||
endpoint_suffix = f"?ddtags={quote(di_config.tags)}" if di_config._tags_in_qs and di_config.tags else "" | ||
|
||
# Only create the tracks if they don't exist to preserve the track queue metadata. | ||
if not self._tracks: | ||
self._tracks = { | ||
SignalTrack.LOGS: UploaderTrack( | ||
endpoint=f"/debugger/v1/input{endpoint_suffix}", | ||
queue=self.__queue__( | ||
encoder=LogSignalJsonEncoder(di_config.service_name), on_full=self._on_buffer_full | ||
), | ||
), | ||
SignalTrack.SNAPSHOT: UploaderTrack( | ||
endpoint=f"{snapshot_track}{endpoint_suffix}", | ||
queue=self.__queue__( | ||
encoder=SnapshotJsonEncoder(di_config.service_name), on_full=self._on_buffer_full | ||
), | ||
), | ||
} | ||
def info_check(self, agent_info: Optional[dict]) -> bool: | ||
if agent_info is None: | ||
# Agent is unreachable | ||
return False | ||
|
||
if "endpoints" not in agent_info: | ||
# Agent not supported | ||
log.debug("Unsupported Datadog agent detected. Please upgrade to 7.49.") | ||
return False | ||
|
||
endpoints = set(agent_info.get("endpoints", [])) | ||
snapshot_track = self._tracks[SignalTrack.SNAPSHOT] | ||
snapshot_track.enabled = True | ||
|
||
if "/debugger/v2/input" in endpoints: | ||
log.debug("Detected /debugger/v2/input endpoint") | ||
snapshot_track.endpoint = f"/debugger/v2/input{self._endpoint_suffix}" | ||
elif "/debugger/v1/diagnostics" in endpoints: | ||
log.debug("Detected /debugger/v1/diagnostics endpoint fallback") | ||
snapshot_track.endpoint = f"/debugger/v1/diagnostics{self._endpoint_suffix}" | ||
else: | ||
self._tracks[SignalTrack.SNAPSHOT].endpoint = f"{snapshot_track}{endpoint_suffix}" | ||
snapshot_track.enabled = False | ||
log.warning( | ||
UNSUPPORTED_AGENT, | ||
extra={ | ||
"product": "debugger", | ||
"more_info": ( | ||
": Unsupported Datadog agent detected. Snapshots from Dynamic Instrumentation/" | ||
"Exception Replay/Code Origin for Spans will not be uploaded. " | ||
"Please upgrade to version 7.49.0 or later" | ||
), | ||
}, | ||
) | ||
|
||
self._collector = self.__collector__({t: ut.queue for t, ut in self._tracks.items()}) | ||
return True | ||
|
||
def _write(self, payload: bytes, endpoint: str) -> None: | ||
self.set_track_endpoints() | ||
try: | ||
with self._connect() as conn: | ||
conn.request("POST", endpoint, payload, headers=self._headers) | ||
resp = conn.getresponse() | ||
if not (200 <= resp.status < 300): | ||
log.error("Failed to upload payload to endpoint %s: [%d] %r", endpoint, resp.status, resp.read()) | ||
meter.increment("upload.error", tags={"status": str(resp.status)}) | ||
if 400 <= resp.status < 500: | ||
log.error( | ||
"Downgrading debugger endpoint after failed upload attempt to %s: [%d] %r", | ||
endpoint, | ||
resp.status, | ||
resp.read(), | ||
) | ||
msg = "Failed to upload payload" | ||
raise SignalUploaderError(msg) | ||
else: | ||
meter.increment("upload.success") | ||
meter.distribution("upload.size", len(payload)) | ||
|
@@ -157,28 +182,37 @@ def reset(self) -> None: | |
|
||
def _flush_track(self, track: UploaderTrack) -> None: | ||
queue = track.queue | ||
payload = queue.flush() | ||
if payload is not None: | ||
if (payload := queue.flush()) is not None and track.enabled: | ||
try: | ||
self._write_with_backoff(payload, track.endpoint) | ||
meter.distribution("batch.cardinality", queue.count) | ||
except SignalUploaderError: | ||
raise # Propagate error to transition to agent check state | ||
except Exception: | ||
log.debug("Cannot upload logs payload", exc_info=True) | ||
|
||
def periodic(self) -> None: | ||
"""Upload the buffer content to the logs intake.""" | ||
def online(self) -> None: | ||
"""Upload the buffer content to the agent.""" | ||
if self._flush_full: | ||
# We received the signal to flush a full buffer | ||
self._flush_full = False | ||
for track in self._tracks.values(): | ||
if track.queue.is_full(): | ||
self._flush_track(track) | ||
for signal_track, uploader_track in self._tracks.items(): | ||
if uploader_track.queue.is_full(): | ||
try: | ||
self._flush_track(uploader_track) | ||
except SignalUploaderError: | ||
if signal_track is SignalTrack.SNAPSHOT: | ||
uploader_track.endpoint = f"/debugger/v1/diagnostics{self._endpoint_suffix}" | ||
log.debug("Downgrading snapshot endpoint to %s", uploader_track.endpoint) | ||
# Try again immediately. If this fails for the same | ||
# reason we transition to agent check state | ||
self._flush_track(uploader_track) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Won't the track be empty after the call to |
||
|
||
for track in self._tracks.values(): | ||
if track.queue.count: | ||
self._flush_track(track) | ||
|
||
on_shutdown = periodic | ||
on_shutdown = online | ||
|
||
@classmethod | ||
def get_collector(cls) -> SignalCollector: | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
6 changes: 6 additions & 0 deletions
6
releasenotes/notes/chore-debugger-agent-check-uploader-5d644d20cf9b4af5.yaml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
--- | ||
features: | ||
- | | ||
dynamic instrumentation/exception replay/code origin for spans: added | ||
support for the latest Datadog agent intake for snapshots. This requires a | ||
minimum agent version of 7.49.0. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.