|
| 1 | +import os |
| 2 | +import sys |
1 | 3 | import time |
2 | 4 | from unittest import mock |
3 | 5 |
|
| 6 | +import pytest |
| 7 | + |
4 | 8 | import sentry_sdk |
5 | 9 | from sentry_sdk._span_batcher import SpanBatcher |
6 | 10 |
|
@@ -425,3 +429,67 @@ def test_transport_format(sentry_init, capture_envelopes): |
425 | 429 | assert "value" in value |
426 | 430 | assert "type" in value |
427 | 431 | assert value["type"] in ("string", "boolean", "integer", "double", "array") |
| 432 | + |
| 433 | + |
| 434 | +@pytest.mark.skipif( |
| 435 | + sys.platform == "win32" |
| 436 | + or not hasattr(os, "fork") |
| 437 | + or not hasattr(os, "register_at_fork"), |
| 438 | + reason="requires POSIX fork and os.register_at_fork (Python 3.7+)", |
| 439 | +) |
| 440 | +def test_span_batcher_lock_reset_in_child_after_fork(sentry_init): |
| 441 | + """Regression test for the SpanBatcher fork-deadlock fix. |
| 442 | +
|
| 443 | + If os.fork() runs while another thread holds SpanBatcher._lock, the |
| 444 | + child inherits the lock locked. The holding thread does not exist in |
| 445 | + the child, so the lock can never be released and _ensure_thread |
| 446 | + deadlocks forever. The after-fork hook must replace the lock with a |
| 447 | + fresh one in the child and reset |
| 448 | + _flusher / _flusher_pid / _span_buffer / _running_size / _active / |
| 449 | + _flush_event. |
| 450 | + """ |
| 451 | + sentry_init( |
| 452 | + traces_sample_rate=1.0, |
| 453 | + _experiments={"trace_lifecycle": "stream"}, |
| 454 | + ) |
| 455 | + batcher = sentry_sdk.get_client().span_batcher |
| 456 | + assert batcher is not None |
| 457 | + |
| 458 | + original_lock = batcher._lock |
| 459 | + original_lock.acquire() |
| 460 | + |
| 461 | + batcher._span_buffer["test-trace-id"].append(object()) |
| 462 | + batcher._running_size["test-trace-id"] = 42 |
| 463 | + batcher._active.flag = True |
| 464 | + batcher._flush_event.set() |
| 465 | + batcher._running = False |
| 466 | + |
| 467 | + pid = os.fork() |
| 468 | + if pid == 0: |
| 469 | + replaced = batcher._lock is not original_lock |
| 470 | + unheld = batcher._lock.acquire(blocking=False) |
| 471 | + |
| 472 | + flusher_reset = batcher._flusher is None and batcher._flusher_pid is None |
| 473 | + span_buffer_reset = len(batcher._span_buffer) == 0 |
| 474 | + running_size_reset = len(batcher._running_size) == 0 |
| 475 | + |
| 476 | + active_reset = not getattr(batcher._active, "flag", False) |
| 477 | + event_reset = not batcher._flush_event.is_set() |
| 478 | + running_reset = batcher._running is True |
| 479 | + |
| 480 | + os._exit( |
| 481 | + 0 |
| 482 | + if replaced |
| 483 | + and unheld |
| 484 | + and flusher_reset |
| 485 | + and span_buffer_reset |
| 486 | + and running_size_reset |
| 487 | + and active_reset |
| 488 | + and event_reset |
| 489 | + and running_reset |
| 490 | + else 1 |
| 491 | + ) |
| 492 | + |
| 493 | + original_lock.release() |
| 494 | + _, status = os.waitpid(pid, 0) |
| 495 | + assert os.WIFEXITED(status) and os.WEXITSTATUS(status) == 0 |
0 commit comments