From 8dc068e4ed3c4f44d79828d5871bd92fe8341bbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20Bo=C4=8Dek?= Date: Fri, 13 Dec 2024 09:19:18 +0100 Subject: [PATCH] fix: response leak when using caching adapter and streaming --- cachecontrol/adapter.py | 19 +++++++++++++------ cachecontrol/controller.py | 11 ++++++++++- tests/test_adapter.py | 24 +++++++++++++++++++++++- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/cachecontrol/adapter.py b/cachecontrol/adapter.py index e739a48..9a32634 100644 --- a/cachecontrol/adapter.py +++ b/cachecontrol/adapter.py @@ -5,6 +5,7 @@ import functools import types +import weakref import zlib from typing import TYPE_CHECKING, Any, Collection, Mapping @@ -128,19 +129,25 @@ def build_response( # type: ignore[override] response._fp = CallbackFileWrapper( # type: ignore[assignment] response._fp, # type: ignore[arg-type] functools.partial( - self.controller.cache_response, request, response + self.controller.cache_response, request, weakref.ref(response) ), ) if response.chunked: - super_update_chunk_length = response._update_chunk_length + super_update_chunk_length = response.__class__._update_chunk_length - def _update_chunk_length(self: HTTPResponse) -> None: - super_update_chunk_length() + def _update_chunk_length( + weak_self: weakref.ReferenceType[HTTPResponse], + ) -> None: + self = weak_self() + if self is None: + return + + super_update_chunk_length(self) if self.chunk_left == 0: self._fp._close() # type: ignore[union-attr] - response._update_chunk_length = types.MethodType( # type: ignore[method-assign] - _update_chunk_length, response + response._update_chunk_length = functools.partial( # type: ignore[method-assign] + _update_chunk_length, weakref.ref(response) ) resp: Response = super().build_response(request, response) diff --git a/cachecontrol/controller.py b/cachecontrol/controller.py index f826aec..f1a8010 100644 --- a/cachecontrol/controller.py +++ b/cachecontrol/controller.py @@ -12,6 +12,7 @@ import logging import re import time +import weakref from email.utils import parsedate_tz from typing import TYPE_CHECKING, Collection, Mapping @@ -323,7 +324,7 @@ def _cache_set( def cache_response( self, request: PreparedRequest, - response: HTTPResponse, + response_or_ref: HTTPResponse | weakref.ReferenceType[HTTPResponse], body: bytes | None = None, status_codes: Collection[int] | None = None, ) -> None: @@ -334,6 +335,14 @@ def cache_response( """ # From httplib2: Don't cache 206's since we aren't going to # handle byte range requests + + if isinstance(response_or_ref, weakref.ReferenceType): + response = response_or_ref() + if response is None: + return + else: + response = response_or_ref + cacheable_status_codes = status_codes or self.cacheable_status_codes if response.status not in cacheable_status_codes: logger.debug( diff --git a/tests/test_adapter.py b/tests/test_adapter.py index 2614e09..7fd0c59 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -2,10 +2,13 @@ # # SPDX-License-Identifier: Apache-2.0 +import gc +import weakref from unittest import mock -import pytest +import pytest from requests import Session + from cachecontrol.adapter import CacheControlAdapter from cachecontrol.cache import DictCache from cachecontrol.wrapper import CacheControl @@ -65,3 +68,22 @@ def test_close(self): sess.close() assert cache.close.called + + def test_do_not_leak_response(self, url, sess): + resp = sess.get(url + "stream", stream=True) + resp.raise_for_status() + r1_weak = weakref.ref(resp.raw) + + # This is a mis-use of requests, becase we should either consume + # the body, or call .close(). + # But requests without cachecontrol handle this anyway, because + # urllib3.response.HTTPResponse has a __del__ finalizer on it that closes it + # once there are no more references to it. + # We should not break this. + + resp = None + # Below this point, it should be closed because there are no more references + # to the session response. + + r1 = r1_weak() + assert r1 is None or r1.closed