Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: response leak when using caching adapter and streaming #344

Merged
merged 1 commit into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions cachecontrol/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import functools
import types
import weakref
import zlib
from typing import TYPE_CHECKING, Any, Collection, Mapping

Expand Down Expand Up @@ -128,19 +129,25 @@ def build_response( # type: ignore[override]
response._fp = CallbackFileWrapper( # type: ignore[assignment]
response._fp, # type: ignore[arg-type]
functools.partial(
self.controller.cache_response, request, response
self.controller.cache_response, request, weakref.ref(response)
),
)
if response.chunked:
super_update_chunk_length = response._update_chunk_length
super_update_chunk_length = response.__class__._update_chunk_length

def _update_chunk_length(self: HTTPResponse) -> None:
super_update_chunk_length()
def _update_chunk_length(
weak_self: weakref.ReferenceType[HTTPResponse],
) -> None:
self = weak_self()
if self is None:
return

super_update_chunk_length(self)
if self.chunk_left == 0:
self._fp._close() # type: ignore[union-attr]

response._update_chunk_length = types.MethodType( # type: ignore[method-assign]
_update_chunk_length, response
Comment on lines -142 to -143
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When you assign a method like this, you're creating a circular reference to self, because when you normally call response._update_chunk_length, it actually creates the bound method on-the-fly, it's not set there all the time.

This SO response helped me understand it: https://stackoverflow.com/a/26158130

response._update_chunk_length = functools.partial( # type: ignore[method-assign]
_update_chunk_length, weakref.ref(response)
)

resp: Response = super().build_response(request, response)
Expand Down
13 changes: 12 additions & 1 deletion cachecontrol/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import logging
import re
import time
import weakref
from email.utils import parsedate_tz
from typing import TYPE_CHECKING, Collection, Mapping

Expand Down Expand Up @@ -323,7 +324,7 @@ def _cache_set(
def cache_response(
self,
request: PreparedRequest,
response: HTTPResponse,
response_or_ref: HTTPResponse | weakref.ReferenceType[HTTPResponse],
frostming marked this conversation as resolved.
Show resolved Hide resolved
body: bytes | None = None,
status_codes: Collection[int] | None = None,
) -> None:
Expand All @@ -332,6 +333,16 @@ def cache_response(

This assumes a requests Response object.
"""
if isinstance(response_or_ref, weakref.ReferenceType):
response = response_or_ref()
if response is None:
# The weakref can be None only in case the user used streamed request
# and did not consume or close it, and holds no reference to requests.Response.
# In such case, we don't want to cache the response.
return
else:
response = response_or_ref

# From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests
cacheable_status_codes = status_codes or self.cacheable_status_codes
Expand Down
24 changes: 23 additions & 1 deletion tests/test_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
#
# SPDX-License-Identifier: Apache-2.0

import gc
import weakref
from unittest import mock
import pytest

import pytest
from requests import Session

from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.cache import DictCache
from cachecontrol.wrapper import CacheControl
Expand Down Expand Up @@ -65,3 +68,22 @@ def test_close(self):

sess.close()
assert cache.close.called

def test_do_not_leak_response(self, url, sess):
resp = sess.get(url + "stream", stream=True)
resp.raise_for_status()
r1_weak = weakref.ref(resp.raw)

# This is a mis-use of requests, becase we should either consume
# the body, or call .close().
# But requests without cachecontrol handle this anyway, because
# urllib3.response.HTTPResponse has a __del__ finalizer on it that closes it
# once there are no more references to it.
# We should not break this.

resp = None
# Below this point, it should be closed because there are no more references
# to the session response.

r1 = r1_weak()
assert r1 is None or r1.closed
Loading