Skip to content

Commit f27c827

Browse files
authored
Merge pull request #184 from scrapinghub/weak-cache
create a weak_cache in Injector
2 parents a695488 + e402a04 commit f27c827

File tree

3 files changed

+29
-0
lines changed

3 files changed

+29
-0
lines changed

docs/providers.rst

+8
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,14 @@ which Scrapy has. Although they are quite similar in its intended purpose,
127127
could be anything that could stretch beyond Scrapy's ``Responses`` `(e.g. Network
128128
Database queries, API Calls, AWS S3 files, etc)`.
129129

130+
.. note::
131+
132+
The :class:`scrapy_poet.injection.Injector` maintains a ``.weak_cache`` which
133+
stores the instances created by the providers as long as the corresponding
134+
:class:`scrapy.Request <scrapy.http.Request>` instance exists. This means that
135+
the instances created by earlier providers can be accessed and reused by latter
136+
providers. This is turned on by default and the instances are stored in memory.
137+
130138

131139
Configuring providers
132140
=====================

scrapy_poet/injection.py

+11
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pprint
66
import warnings
77
from typing import Any, Callable, Dict, List, Mapping, Optional, Set, Type, cast
8+
from weakref import WeakKeyDictionary
89

910
import andi
1011
from andi.typeutils import issubclass_safe
@@ -95,6 +96,11 @@ def init_cache(self): # noqa: D102
9596
f"Cache enabled. Folder: {cache_path!r}. Caching errors: {self.caching_errors}"
9697
)
9798

99+
# This is different from the cache above as it only stores instances as long
100+
# as the request exists. This is useful for latter providers to re-use the
101+
# already built instances by earlier providers.
102+
self.weak_cache: WeakKeyDictionary[Request, Dict] = WeakKeyDictionary()
103+
98104
def available_dependencies_for_providers(
99105
self, request: Request, response: Response
100106
): # noqa: D102
@@ -294,6 +300,11 @@ def build_instances_from_providers(
294300
)
295301
instances.update(objs_by_type)
296302

303+
if self.weak_cache.get(request):
304+
self.weak_cache[request].update(objs_by_type)
305+
else:
306+
self.weak_cache[request] = objs_by_type
307+
297308
if self.cache and not cache_hit:
298309
# Save the results in the cache
299310
self.cache[fingerprint] = serialize(objs)

tests/test_injection.py

+10
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def callback(
203203
ClsReqResponse: ClsReqResponse(),
204204
ClsNoProviderRequired: ClsNoProviderRequired(),
205205
}
206+
assert injector.weak_cache.get(request).keys() == {ClsReqResponse, Cls1, Cls2}
206207

207208
instances = yield from injector.build_instances_from_providers(
208209
request, response, plan
@@ -212,6 +213,7 @@ def callback(
212213
Cls2: Cls2(),
213214
ClsReqResponse: ClsReqResponse(),
214215
}
216+
assert injector.weak_cache.get(request).keys() == {ClsReqResponse, Cls1, Cls2}
215217

216218
@inlineCallbacks
217219
def test_build_instances_from_providers_unexpected_return(self):
@@ -230,6 +232,7 @@ def callback(response: DummyResponse, a: Cls1):
230232
yield from injector.build_instances_from_providers(
231233
response.request, response, plan
232234
)
235+
assert injector.weak_cache.get(response.request) is None
233236

234237
assert "Provider" in str(exinf.value)
235238
assert "Cls2" in str(exinf.value)
@@ -256,6 +259,7 @@ def callback(response: DummyResponse, arg: str):
256259
instances = yield from injector.build_instances_from_providers(
257260
response.request, response, plan
258261
)
262+
assert injector.weak_cache.get(response.request).keys() == {str}
259263

260264
assert instances[str] == min(str_list)
261265

@@ -628,6 +632,7 @@ def callback_factory():
628632
if name.startswith(prefix)
629633
}
630634
assert set(poet_stats) == expected
635+
assert injector.weak_cache.get(response.request) is None
631636

632637
@inlineCallbacks
633638
def test_po_provided_via_item(self):
@@ -642,6 +647,7 @@ def callback(response: DummyResponse, item: TestItem):
642647
_ = yield from injector.build_callback_dependencies(response.request, response)
643648
key = "poet/injector/tests.test_injection.TestItemPage"
644649
assert key in set(injector.crawler.stats.get_stats())
650+
assert injector.weak_cache.get(response.request) is None
645651

646652

647653
class TestInjectorOverrides:
@@ -787,6 +793,7 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name):
787793
response.request, response, plan
788794
)
789795
assert cache.exists()
796+
assert injector.weak_cache.get(response.request).keys() == {Price, Name}
790797

791798
validate_instances(instances)
792799

@@ -799,6 +806,7 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name):
799806
instances = yield from injector.build_instances_from_providers(
800807
response.request, response, plan
801808
)
809+
assert injector.weak_cache.get(response.request) is None
802810

803811
# Different providers. They return a different result, but the cache data should prevail.
804812
providers = {
@@ -812,6 +820,7 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name):
812820
instances = yield from injector.build_instances_from_providers(
813821
response.request, response, plan
814822
)
823+
assert injector.weak_cache.get(response.request).keys() == {Price, Name}
815824

816825
validate_instances(instances)
817826

@@ -823,3 +832,4 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name):
823832
instances = yield from injector.build_instances_from_providers(
824833
response.request, response, plan
825834
)
835+
assert injector.weak_cache.get(response.request) is None

0 commit comments

Comments
 (0)