Merge pull request #225 from scrapinghub/scrapy-2.13

wRAR · web-flow · commit f2556ea6bc92 · 2025-07-04T16:48:49.000+05:00
Adapt tests to Scrapy 2.13, improve coverage reporting
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -30,7 +30,7 @@ jobs:
         - python-version: "3.12"
         - python-version: "3.13"
         - python-version: "3.13"
-          toxenv: "asyncio"
+          toxenv: "default-reactor"
 
     steps:
     - uses: actions/checkout@v4
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,6 +16,21 @@ regex = true
 [[tool.bumpversion.files]]
 filename = "scrapy_poet/VERSION"
 
+[tool.coverage.run]
+branch = true
+
+[tool.coverage.paths]
+source = [
+    "scrapy_poet",
+    ".tox/**/site-packages/scrapy_poet"
+]
+
+[tool.coverage.report]
+exclude_also = [
+    "if TYPE_CHECKING:",
+    "@(abc\\.)?abstractmethod",
+]
+
 [tool.isort]
 profile = "black"
 multi_line_output = 3
diff --git a/scrapy_poet/utils/mockserver.py b/scrapy_poet/utils/mockserver.py
@@ -5,7 +5,6 @@
 from importlib import import_module
 from subprocess import PIPE, Popen
 
-from twisted.internet import reactor
 from twisted.web.server import Site
 
 
@@ -48,6 +47,8 @@ def __exit__(self, exc_type, exc_value, traceback):
 
 
 def main():
+    from twisted.internet import reactor
+
     parser = argparse.ArgumentParser()
     parser.add_argument("resource")
     parser.add_argument("--port", type=int)
diff --git a/scrapy_poet/utils/testing.py b/scrapy_poet/utils/testing.py
@@ -9,7 +9,6 @@
 from scrapy.settings import Settings
 from scrapy.utils.python import to_bytes
 from scrapy.utils.test import get_crawler as _get_crawler
-from twisted.internet import reactor
 from twisted.internet.defer import inlineCallbacks
 from twisted.internet.task import deferLater
 from twisted.web.resource import Resource
@@ -38,6 +37,8 @@ class LeafResource(Resource):
     isLeaf = True
 
     def deferRequest(self, request, delay, f, *a, **kw):
+        from twisted.internet import reactor
+
         def _cancelrequest(_):
             # silence CancelledError
             d.addErrback(lambda _: None)
@@ -264,6 +265,13 @@ def _get_test_settings():
         settings["ADDONS"] = {
             "scrapy_poet.Addon": 300,
         }
+    try:
+        from scrapy.utils.test import get_reactor_settings
+
+        settings.update(get_reactor_settings())
+    except ImportError:
+        # Scrapy < 2.13.0, no need to change the reactor settings
+        pass
     return settings
 
 
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -1,8 +0,0 @@
-import os
-
-# Note that tox.ini should only set the REACTOR env variable when running
-# pytest with "--reactor=asyncio".
-if os.environ.get("REACTOR", "") == "asyncio":
-    from scrapy.utils.reactor import install_reactor
-
-    install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
diff --git a/tests/test_middleware.py b/tests/test_middleware.py
@@ -1,5 +1,7 @@
+import os
 import socket
-from pathlib import Path
+import subprocess
+import sys
 from textwrap import dedent
 from typing import Optional, Type, Union
 
@@ -11,7 +13,6 @@
 from scrapy import Request
 from scrapy.http import Response
 from scrapy.utils.log import configure_logging
-from scrapy.utils.testproc import ProcessTest
 from twisted.internet.threads import deferToThread
 from url_matcher.util import get_domain
 from web_poet import ApplyRule, HttpResponse, ItemPage, RequestUrl, ResponseUrl, WebPage
@@ -541,7 +542,6 @@ def test_skip_download_request_url_page(settings):
     assert crawler.stats.get_stats().get("downloader/response_count", 0) == 0
 
 
-@inlineCallbacks
 def test_scrapy_shell(tmp_path):
     try:
         import scrapy.addons  # noqa: F401
@@ -564,13 +564,33 @@ def test_scrapy_shell(tmp_path):
             }
         """
     settings = dedent(settings)
-    Path(tmp_path, "settings.py").write_text(settings)
-    pt = ProcessTest()
-    pt.command = "shell"
-    pt.cwd = tmp_path
+    (tmp_path / "settings.py").write_text(settings)
+
+    env = os.environ.copy()
+    env["SCRAPY_SETTINGS_MODULE"] = "settings"
     with MockServer(EchoResource) as server:
-        _, out, err = yield pt.execute(
-            [server.root_url, "-c", "item"], settings="settings"
+        args = (
+            sys.executable,
+            "-m",
+            "scrapy.cmdline",
+            "shell",
+            server.root_url,
+            "-c",
+            "item",
         )
+        p = subprocess.Popen(
+            args,
+            cwd=tmp_path,
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+        try:
+            out, err = p.communicate(timeout=15)
+        except subprocess.TimeoutExpired:
+            p.kill()
+            p.communicate()
+            pytest.fail("Command took too much time to complete")
+
     assert b"Using DummyResponse instead of downloading" not in err
     assert b"{}" in out  # noqa: P103
diff --git a/tests/test_web_poet_rules.py b/tests/test_web_poet_rules.py
@@ -15,7 +15,9 @@
 import attrs
 import pytest
 import scrapy
+from packaging.version import Version
 from pytest_twisted import inlineCallbacks
+from scrapy import __version__ as SCRAPY_VERSION
 from url_matcher import Patterns
 from url_matcher.util import get_domain
 from web_poet import (
@@ -916,10 +918,15 @@ def test_item_return_from_injectable() -> None:
     assert item == ProductFromInjectable(name="product from injectable")
     assert_deps(deps, {"item": ProductFromInjectable})
 
-    # calling the actual page object should not work since it's not inheriting
-    # from ``web_poet.ItemPage``.
+    # calling the actual page object should not work in the same way since it's
+    # not inheriting from ``web_poet.ItemPage``.
     item, deps = yield crawl_item_and_deps(ProductFromInjectablePage)
-    assert item is None
+    if Version(SCRAPY_VERSION) < Version("2.13"):
+        # older Scrapy refuses to return a ProductFromInjectablePage instance
+        assert item is None
+    else:
+        # newer Scrapy returns it
+        assert isinstance(item, ProductFromInjectablePage)
 
     # However, the page object should still be injected into the callback method.
     assert_deps(deps, {"item": ProductFromInjectablePage})
diff --git a/tox.ini b/tox.ini
@@ -7,11 +7,13 @@ deps =
     pytest-cov
     pytest-twisted
     Twisted
-    Scrapy < 2.13.0
+setenv =
+    REACTOR=asyncio
 commands =
     py.test \
-        --cov-report=term --cov-report=html --cov-report= --cov-report=xml --cov=scrapy_poet \
+        --cov-config=pyproject.toml --cov-report=term --cov-report=html --cov-report= --cov-report=xml --cov=scrapy_poet \
         --doctest-modules \
+        --reactor=asyncio \
         {posargs:scrapy_poet tests}
 
 [pinned]
@@ -32,6 +34,8 @@ deps =
 
 [testenv:min]
 basepython = python3.9
+setenv =
+    REACTOR=
 deps =
     {[pinned]deps}
     scrapy==2.6.0
@@ -46,6 +50,8 @@ deps =
 # See: https://github.com/scrapinghub/scrapy-poet/issues/48
 [testenv:pinned-scrapy-2x7]
 basepython=python3.9
+setenv =
+    REACTOR=
 deps =
     {[pinned]deps}
     scrapy==2.7.0
@@ -55,25 +61,25 @@ deps =
 # See: https://github.com/scrapinghub/scrapy-poet/issues/118
 [testenv:pinned-scrapy-2x8]
 basepython=python3.9
+setenv =
+    REACTOR=
 deps =
     {[pinned]deps}
     scrapy==2.8.0
     Twisted<23.8.0
 
-[testenv:asyncio]
+[testenv:default-reactor]
 setenv =
-    REACTOR=asyncio
+    REACTOR=
 commands =
-    {[testenv]commands} --reactor=asyncio
+    {[testenv]commands} --reactor=default
 deps =
     {[testenv]deps}
 
 [testenv:asyncio-min]
 basepython = python3.9
-setenv =
-    {[testenv:asyncio]setenv}
 commands =
-    {[testenv:asyncio]commands}
+    {[testenv]commands}
 deps =
     {[testenv:min]deps}