From a35d5de51dc40a40bb7e74330ea5ee4af0a26c65 Mon Sep 17 00:00:00 2001 From: Shazoo Date: Mon, 20 Nov 2017 17:22:16 +0800 Subject: [PATCH] add meta key named "request_need_deltafetch" . user could use this key to force deltafetch store current request footprint . it is usable way to fix re-directing request issue . --- README.rst | 5 +++++ scrapy_deltafetch/middleware.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/README.rst b/README.rst index 65045b3..95a586a 100644 --- a/README.rst +++ b/README.rst @@ -88,3 +88,8 @@ Supported Scrapy request meta keys more efficient for sites that many URLs for the same item. +* ``request_need_deltafetch`` — force current request to be stored by deltafetch + +Example:: + + yield scrapy.Request(detail_url, self.parse_level2_pages, meta={'request_need_deltafetch':True}) \ No newline at end of file diff --git a/scrapy_deltafetch/middleware.py b/scrapy_deltafetch/middleware.py index ded0843..defdf1c 100644 --- a/scrapy_deltafetch/middleware.py +++ b/scrapy_deltafetch/middleware.py @@ -81,6 +81,10 @@ def process_spider_output(self, response, result, spider): if self.stats: self.stats.inc_value('deltafetch/skipped', spider=spider) continue + elif r.meta.get('request_need_deltafetch'): + self.db[key] = str(time.time()) + if self.stats: + self.stats.inc_value('deltafetch/force_stored', spider=spider) elif isinstance(r, (BaseItem, dict)): key = self._get_key(response.request) self.db[key] = str(time.time())