Skip to content

Commit

Permalink
Do not count DummyResponse into downloader stats (#187)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio authored Jan 29, 2024
1 parent 25ee699 commit a695488
Show file tree
Hide file tree
Showing 14 changed files with 84 additions and 103 deletions.
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ Add the following inside Scrapy's ``settings.py`` file:
DOWNLOADER_MIDDLEWARES = {
"scrapy_poet.InjectionMiddleware": 543,
"scrapy.downloadermiddlewares.stats.DownloaderStats": None,
"scrapy_poet.DownloaderStatsMiddleware": 850,
}
SPIDER_MIDDLEWARES = {
"scrapy_poet.RetryMiddleware": 275,
Expand Down
4 changes: 4 additions & 0 deletions docs/intro/advanced-tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ It can be directly used inside the spider as:
custom_settings = {
"DOWNLOADER_MIDDLEWARES": {
"scrapy_poet.InjectionMiddleware": 543,
"scrapy.downloadermiddlewares.stats.DownloaderStats": None,
"scrapy_poet.DownloaderStatsMiddleware": 850,
}
}
Expand Down Expand Up @@ -153,6 +155,8 @@ Let's see it in action:
custom_settings = {
"DOWNLOADER_MIDDLEWARES": {
"scrapy_poet.InjectionMiddleware": 543,
"scrapy.downloadermiddlewares.stats.DownloaderStats": None,
"scrapy_poet.DownloaderStatsMiddleware": 850,
}
}
Expand Down
2 changes: 2 additions & 0 deletions docs/intro/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ of your Scrapy project:
DOWNLOADER_MIDDLEWARES = {
"scrapy_poet.InjectionMiddleware": 543,
"scrapy.downloadermiddlewares.stats.DownloaderStats": None,
"scrapy_poet.DownloaderStatsMiddleware": 850,
}
SPIDER_MIDDLEWARES = {
"scrapy_poet.RetryMiddleware": 275,
Expand Down
11 changes: 11 additions & 0 deletions example/example/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
# https://docs.scrapy.org/en/latest/topics/settings.html
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html

from example.autoextract import AutoextractProductProvider

from scrapy_poet import ScrapyPoetRequestFingerprinter

BOT_NAME = "example"

SPIDER_MODULES = ["example.spiders"]
Expand All @@ -20,4 +23,12 @@

DOWNLOADER_MIDDLEWARES = {
"scrapy_poet.InjectionMiddleware": 543,
"scrapy.downloadermiddlewares.stats.DownloaderStats": None,
"scrapy_poet.DownloaderStatsMiddleware": 850,
}

REQUEST_FINGERPRINTER_CLASS = ScrapyPoetRequestFingerprinter

SPIDER_MIDDLEWARES = {
"scrapy_poet.RetryMiddleware": 275,
}
2 changes: 1 addition & 1 deletion scrapy_poet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .api import AnnotatedResult, DummyResponse, callback_for
from .downloadermiddlewares import InjectionMiddleware
from .downloadermiddlewares import DownloaderStatsMiddleware, InjectionMiddleware
from .page_input_providers import HttpResponseProvider, PageObjectInputProvider
from .spidermiddlewares import RetryMiddleware
from ._request_fingerprinter import ScrapyPoetRequestFingerprinter
12 changes: 11 additions & 1 deletion scrapy_poet/downloadermiddlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
import inspect
import logging
import warnings
from typing import Generator, Optional, Type, TypeVar
from typing import Generator, Optional, Type, TypeVar, Union

from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.downloadermiddlewares.stats import DownloaderStats
from scrapy.http import Request, Response
from twisted.internet.defer import Deferred, inlineCallbacks
from web_poet import RulesRegistry
Expand All @@ -28,6 +29,15 @@
logger = logging.getLogger(__name__)


class DownloaderStatsMiddleware(DownloaderStats):
def process_response(
self, request: Request, response: Response, spider: Spider
) -> Union[Request, Response]:
if isinstance(response, DummyResponse):
return response
return super().process_response(request, response, spider)


DEFAULT_PROVIDERS = {
HttpResponseProvider: 500,
HttpClientProvider: 600,
Expand Down
12 changes: 10 additions & 2 deletions scrapy_poet/utils/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,10 @@ def get_download_handler(crawler, schema):
return crawler.engine.downloader.handlers._get_handler(schema)


def make_crawler(spider_cls, settings):
def make_crawler(spider_cls, settings=None):
settings = settings or {}
settings = {**create_scrapy_settings(), **settings}

if not getattr(spider_cls, "name", None):

class Spider(spider_cls):
Expand Down Expand Up @@ -220,7 +223,7 @@ def process_response(self, request, response, spider):
return response


def create_scrapy_settings(request):
def create_scrapy_settings():
"""Default scrapy-poet settings"""
s = dict(
# collect scraped items to crawler.spider.collected_items
Expand All @@ -231,8 +234,13 @@ def create_scrapy_settings(request):
# collect injected dependencies to crawler.spider.collected_response_deps
InjectedDependenciesCollectorMiddleware: 542,
"scrapy_poet.InjectionMiddleware": 543,
"scrapy.downloadermiddlewares.stats.DownloaderStats": None,
"scrapy_poet.DownloaderStatsMiddleware": 850,
},
REQUEST_FINGERPRINTER_CLASS=ScrapyPoetRequestFingerprinter,
SPIDER_MIDDLEWARES={
"scrapy_poet.RetryMiddleware": 275,
},
)
return Settings(s)

Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@


@pytest.fixture()
def settings(request):
return create_scrapy_settings(request)
def settings():
return create_scrapy_settings()
7 changes: 2 additions & 5 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from web_poet import WebPage, field

from scrapy_poet.utils.mockserver import MockServer
from scrapy_poet.utils.testing import EchoResource, make_crawler
from scrapy_poet.utils.testing import EchoResource, create_scrapy_settings, make_crawler


@inlineCallbacks
Expand All @@ -22,10 +22,7 @@ class CacheSpider(Spider):
name = "cache"

custom_settings = {
"DOWNLOADER_MIDDLEWARES": {
"scrapy_poet.InjectionMiddleware": 543,
},
"REQUEST_FINGERPRINTER_IMPLEMENTATION": "2.7",
**create_scrapy_settings(),
"SCRAPY_POET_CACHE": cache_dir,
}

Expand Down
Loading

0 comments on commit a695488

Please sign in to comment.