Skip to content

Commit c90dc55

Browse files
committed
Read the request body before sending the response to the user instead of just as the request is getting started
1 parent 2df2cd3 commit c90dc55

2 files changed

Lines changed: 64 additions & 7 deletions

File tree

sentry_sdk/integrations/flask.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from flask.signals import (
4343
before_render_template,
4444
got_request_exception,
45+
request_finished,
4546
request_started,
4647
)
4748
from markupsafe import Markup
@@ -94,6 +95,7 @@ def setup_once() -> None:
9495

9596
before_render_template.connect(_add_sentry_trace)
9697
request_started.connect(_request_started)
98+
request_finished.connect(_request_finished)
9799
got_request_exception.connect(_capture_exception)
98100

99101
old_app = Flask.__call__
@@ -164,8 +166,15 @@ def _request_started(app: "Flask", **kwargs: "Any") -> None:
164166
evt_processor = _make_request_event_processor(app, request, integration)
165167
scope.add_event_processor(evt_processor)
166168

169+
170+
def _request_finished(sender: "Flask", response: "Any", **kwargs: "Any") -> None:
171+
integration = sentry_sdk.get_client().get_integration(FlaskIntegration)
172+
if integration is None:
173+
return
174+
167175
client = sentry_sdk.get_client()
168176
if has_span_streaming_enabled(client.options):
177+
request = flask_request._get_current_object()
169178
_set_request_body_data_on_streaming_segment(request, client)
170179

171180

@@ -178,18 +187,26 @@ def _set_request_body_data_on_streaming_segment(
178187

179188
with capture_internal_exceptions():
180189
content_length = int(request.content_length or 0)
181-
extractor = FlaskRequestExtractor(request)
182190

183191
if not request_body_within_bounds(client, content_length):
184192
data = AnnotatedValue.substituted_because_over_size_limit()
185193
else:
186-
raw_data = None
187-
try:
188-
raw_data = extractor.raw_data()
189-
except _RAW_DATA_EXCEPTIONS:
190-
pass
194+
# Only use data that Werkzeug has already cached — never consume
195+
# wsgi.input ourselves, as that would break user code that reads
196+
# the stream directly.
197+
# You can find where this gets set here:
198+
# https://github.com/pallets/werkzeug/blob/1b00618e787f40dfb21eba29caf8f8be7c8e1d93/src/werkzeug/wrappers/request.py#L444
199+
raw_data = getattr(request, "_cached_data", None)
200+
201+
parsed_body = None
202+
if "form" in request.__dict__:
203+
extractor = FlaskRequestExtractor(request)
204+
parsed_body = extractor.parsed_body()
205+
elif raw_data is not None:
206+
extractor = FlaskRequestExtractor(request)
207+
if extractor.is_json():
208+
parsed_body = extractor.json()
191209

192-
parsed_body = extractor.parsed_body()
193210
if parsed_body is not None:
194211
data = parsed_body
195212
elif raw_data:

tests/integrations/flask/test_flask.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,3 +1355,43 @@ def test_sensitive_header_scrubbing_span_streaming(sentry_init, capture_items, a
13551355
== SENSITIVE_DATA_SUBSTITUTE
13561356
)
13571357
assert span["attributes"]["http.request.header.x-custom-header"] == "passthrough"
1358+
1359+
1360+
def test_wsgi_input_direct_read_does_not_hang_span_streaming(
1361+
sentry_init, capture_items, app
1362+
):
1363+
"""
1364+
Regression test: reading wsgi.input directly must not hang when span streaming is enabled.
1365+
The SDK must not consume wsgi.input before user code runs.
1366+
"""
1367+
sentry_init(
1368+
integrations=[flask_sentry.FlaskIntegration()],
1369+
traces_sample_rate=1.0,
1370+
_experiments={"trace_lifecycle": "stream"},
1371+
max_request_body_size="always",
1372+
)
1373+
1374+
@app.route("/raw-wsgi", methods=["POST"])
1375+
def raw_wsgi_endpoint():
1376+
content_length = int(request.environ.get("CONTENT_LENGTH", 0))
1377+
body = request.environ["wsgi.input"].read(content_length)
1378+
return {"size": len(body), "body": body.decode("utf-8", errors="replace")}
1379+
1380+
items = capture_items("span")
1381+
1382+
client = app.test_client()
1383+
response = client.post(
1384+
"/raw-wsgi", data=b"hello from test", content_type="text/plain"
1385+
)
1386+
assert response.status_code == 200
1387+
assert response.get_json()["body"] == "hello from test"
1388+
1389+
sentry_sdk.flush()
1390+
1391+
assert len(items) == 1
1392+
span = items[0].payload
1393+
1394+
# The SDK should not have captured the body since the user read wsgi.input
1395+
# directly (bypassing Werkzeug's cache). This is an acceptable trade-off
1396+
# vs. consuming the stream and causing user applications to hang.
1397+
assert "http.request.body.data" not in span.get("attributes", {})

0 commit comments

Comments
 (0)