Skip to content

Commit b6d6cf5

Browse files
add support for profiling threading.Semaphore objects in Python Lock profiler
1 parent a14402e commit b6d6cf5

File tree

6 files changed

+259
-74
lines changed

6 files changed

+259
-74
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,5 +203,6 @@ tests/appsec/iast/fixtures/taint_sinks/not_exists.txt
203203
*.debug
204204
*.dSYM/
205205

206-
# Rust build artifacts
206+
# Rust build artifacts and dependencies
207207
src/native/target*
208+
src/native/Cargo.lock

ddtrace/profiling/collector/_lock.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from __future__ import annotations
33

44
import _thread
5-
import abc
65
import os.path
76
import sys
87
import time
@@ -52,6 +51,7 @@ class _ProfiledLock:
5251
"init_location",
5352
"acquired_time",
5453
"name",
54+
"is_internal",
5555
)
5656

5757
def __init__(
@@ -60,6 +60,7 @@ def __init__(
6060
tracer: Optional[Tracer],
6161
max_nframes: int,
6262
capture_sampler: collector.CaptureSampler,
63+
is_internal: bool = False,
6364
) -> None:
6465
self.__wrapped__: Any = wrapped
6566
self.tracer: Optional[Tracer] = tracer
@@ -71,6 +72,9 @@ def __init__(
7172
self.init_location: str = f"{os.path.basename(code.co_filename)}:{frame.f_lineno}"
7273
self.acquired_time: int = 0
7374
self.name: Optional[str] = None
75+
# If True, this lock is internal to another sync primitive (e.g., Lock inside Semaphore)
76+
# and should not generate profile samples to avoid double-counting
77+
self.is_internal: bool = is_internal
7478

7579
### DUNDER methods ###
7680

@@ -161,6 +165,11 @@ def _flush_sample(self, start: int, end: int, is_acquire: bool) -> None:
161165
end: End timestamp in nanoseconds
162166
is_acquire: True for acquire operations, False for release operations
163167
"""
168+
# Skip profiling for internal locks (e.g., Lock inside Semaphore/Condition)
169+
# to avoid double-counting when multiple collectors are active
170+
if self.is_internal:
171+
return
172+
164173
handle: ddup.SampleHandle = ddup.SampleHandle()
165174

166175
handle.push_monotonic_ns(end)
@@ -262,6 +271,8 @@ class LockCollector(collector.CaptureSamplerCollector):
262271
"""Record lock usage."""
263272

264273
PROFILED_LOCK_CLASS: Type[Any]
274+
PATCH_MODULE: Any # e.g., threading module
275+
PATCH_ATTR_NAME: str # e.g., "Lock", "RLock", "Semaphore"
265276

266277
def __init__(
267278
self,
@@ -275,11 +286,11 @@ def __init__(
275286
self.tracer: Optional[Tracer] = tracer
276287
self._original_lock: Any = None
277288

278-
@abc.abstractmethod
279-
def _get_patch_target(self) -> Callable[..., Any]: ...
289+
def _get_patch_target(self) -> Callable[..., Any]:
290+
return getattr(self.PATCH_MODULE, self.PATCH_ATTR_NAME)
280291

281-
@abc.abstractmethod
282-
def _set_patch_target(self, value: Any) -> None: ...
292+
def _set_patch_target(self, value: Any) -> None:
293+
setattr(self.PATCH_MODULE, self.PATCH_ATTR_NAME, value)
283294

284295
def _start_service(self) -> None:
285296
"""Start collecting lock usage."""
@@ -297,12 +308,35 @@ def patch(self) -> None:
297308
original_lock: Any = self._original_lock # Capture non-None value
298309

299310
def _profiled_allocate_lock(*args: Any, **kwargs: Any) -> _ProfiledLock:
300-
"""Simple wrapper that returns profiled locks."""
311+
"""Simple wrapper that returns profiled locks.
312+
313+
Detects if the lock is being created from within threading.py stdlib
314+
(i.e., internal to Semaphore/Condition) to avoid double-counting.
315+
"""
316+
import threading as threading_module
317+
318+
# Check if caller is from threading.py (internal lock)
319+
is_internal: bool = False
320+
try:
321+
# Frame 0: _profiled_allocate_lock
322+
# Frame 1: _LockAllocatorWrapper.__call__
323+
# Frame 2: actual caller (threading.Lock() call site)
324+
caller_filename = sys._getframe(2).f_code.co_filename
325+
threading_module_file = threading_module.__file__
326+
if threading_module_file and caller_filename:
327+
# Normalize paths to handle symlinks and different path formats
328+
caller_filename_normalized = os.path.normpath(os.path.realpath(caller_filename))
329+
threading_file_normalized = os.path.normpath(os.path.realpath(threading_module_file))
330+
is_internal = caller_filename_normalized == threading_file_normalized
331+
except (ValueError, AttributeError, OSError):
332+
pass
333+
301334
return self.PROFILED_LOCK_CLASS(
302335
wrapped=original_lock(*args, **kwargs),
303336
tracer=self.tracer,
304337
max_nframes=self.nframes,
305338
capture_sampler=self._capture_sampler,
339+
is_internal=is_internal,
306340
)
307341

308342
self._set_patch_target(_LockAllocatorWrapper(_profiled_allocate_lock))

ddtrace/profiling/collector/threading.py

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import absolute_import
22

33
import threading
4-
import typing
54

65
from ddtrace.internal._unpatched import _threading as ddtrace_threading
76
from ddtrace.internal.datadog.profiling import stack_v2
@@ -18,34 +17,32 @@ class _ProfiledThreadingRLock(_lock._ProfiledLock):
1817
pass
1918

2019

20+
class _ProfiledThreadingSemaphore(_lock._ProfiledLock):
21+
pass
22+
23+
2124
class ThreadingLockCollector(_lock.LockCollector):
2225
"""Record threading.Lock usage."""
2326

2427
PROFILED_LOCK_CLASS = _ProfiledThreadingLock
25-
26-
def _get_patch_target(self) -> typing.Type[threading.Lock]:
27-
return threading.Lock
28-
29-
def _set_patch_target(
30-
self,
31-
value: typing.Any,
32-
) -> None:
33-
threading.Lock = value
28+
PATCH_MODULE = threading
29+
PATCH_ATTR_NAME = "Lock"
3430

3531

3632
class ThreadingRLockCollector(_lock.LockCollector):
3733
"""Record threading.RLock usage."""
3834

3935
PROFILED_LOCK_CLASS = _ProfiledThreadingRLock
36+
PATCH_MODULE = threading
37+
PATCH_ATTR_NAME = "RLock"
38+
4039

41-
def _get_patch_target(self) -> typing.Type[threading.RLock]:
42-
return threading.RLock
40+
class ThreadingSemaphoreCollector(_lock.LockCollector):
41+
"""Record threading.Semaphore usage."""
4342

44-
def _set_patch_target(
45-
self,
46-
value: typing.Any,
47-
) -> None:
48-
threading.RLock = value
43+
PROFILED_LOCK_CLASS = _ProfiledThreadingSemaphore
44+
PATCH_MODULE = threading
45+
PATCH_ATTR_NAME = "Semaphore"
4946

5047

5148
# Also patch threading.Thread so echion can track thread lifetimes

ddtrace/profiling/profiler.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ def start_collector(collector_class: Type) -> None:
220220
self._collectors_on_import = [
221221
("threading", lambda _: start_collector(threading.ThreadingLockCollector)),
222222
("threading", lambda _: start_collector(threading.ThreadingRLockCollector)),
223+
("threading", lambda _: start_collector(threading.ThreadingSemaphoreCollector)),
223224
("asyncio", lambda _: start_collector(asyncio.AsyncioLockCollector)),
224225
]
225226

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
features:
3+
- |
4+
profiling: Add support for ``threading.Semaphore`` locking type profiling in Python.
5+
The Lock profiler now detects and marks "internal" Lock objects, i.e. those that are part of implementation of higher-level locking types.
6+
One example of such higher-level primitive is ``threading.Semaphore``, which is implemented with ``threading.Condition``, which itself uses ``threading.Lock`` internally.
7+
Marking a locks as internal will prevent it from being logged, which means the sample will only be counted once.

0 commit comments

Comments
 (0)