Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ TBR
* Using the said additional requests needs ``async/await`` support in
``asyncio``. This raises the minimum scrapy requirement to ``scrapy>=2.6.0``.

* add ``async`` support for ``callback_for``.


0.3.0 (2022-01-28)
------------------
Expand Down
1 change: 1 addition & 0 deletions docs/intro/advanced-tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ These are mainly achieved by **scrapy-poet** implementing **providers** for them
* :class:`scrapy_poet.page_input_providers.HttpClientProvider`
* :class:`scrapy_poet.page_input_providers.MetaProvider`

.. _`intro-additional-requests`:

Additional Requests
===================
Expand Down
24 changes: 24 additions & 0 deletions docs/intro/basic-tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,30 @@ returning the result of the ``to_item`` method call. We could use
``response.follow_all(links, callback_for(BookPage))``, without creating
an attribute, but currently it won't work with Scrapy disk queues.

.. tip::

:func:`~.callback_for` also supports `async generators`. So having the
following:

.. code-block:: python

class BookPage(web_poet.ItemWebPage):
async def to_item(self):
return await do_something_async()

callback_for(BookPage)

would result in:

.. code-block:: python

async def parse_book(self, response: DummyResponse, page: BookPage):
yield await page.to_item()

This is useful when the Page Objects uses additional requests, which rely
heavily on ``async/await`` syntax. More info on this in this tutorial
section: :ref:`intro-additional-requests`.

Final result
============

Expand Down
27 changes: 27 additions & 0 deletions scrapy_poet/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Callable, Optional, Type
from inspect import iscoroutinefunction

from scrapy.http import Request, Response

Expand Down Expand Up @@ -67,6 +68,25 @@ def parse(self, response):

parse_book = callback_for(BookPage)

This also produces an async generator callable if the Page Objects's
``to_item()`` method is a coroutine which uses the ``async/await`` syntax.
So having the following:

.. code-block:: python

class BookPage(web_poet.ItemWebPage):
async def to_item(self):
return await do_something_async()

callback_for(BookPage)

would result in:

.. code-block:: python

async def parse_book(self, response: DummyResponse, page: BookPage):
yield await page.to_item()

The generated callback could be used as a spider instance method or passed
as an inline/anonymous argument. Make sure to define it as a spider
attribute (as shown in the example above) if you're planning to use
Expand All @@ -90,5 +110,12 @@ def parse(self, response):
def parse(*args, page: page_cls, **kwargs): # type: ignore
yield page.to_item() # type: ignore

async def async_parse(*args, page: page_cls, **kwargs): # type: ignore
yield await page.to_item() # type: ignore

if iscoroutinefunction(page_cls.to_item):
setattr(async_parse, _CALLBACK_FOR_MARKER, True)
return async_parse

setattr(parse, _CALLBACK_FOR_MARKER, True)
return parse
1 change: 0 additions & 1 deletion scrapy_poet/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from scrapy_poet.utils import scrapy_response_to_http_response


logger = logging.getLogger(__name__)


Expand Down
51 changes: 46 additions & 5 deletions tests/test_callback_for.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ class FakeItemPage(ItemPage):
def to_item(self):
return 'fake item page'

class FakeItemPageAsync(ItemPage):

async def to_item(self):
return 'fake item page'


class FakeItemWebPage(ItemWebPage):

Expand All @@ -28,6 +33,12 @@ class MySpider(scrapy.Spider):
parse_web = callback_for(FakeItemWebPage)


class MySpiderAsync(scrapy.Spider):

name = 'my_spider_async'
parse_item = callback_for(FakeItemPageAsync)


def test_callback_for():
"""Simple test case to ensure it works as expected."""
cb = callback_for(FakeItemPage)
Expand All @@ -39,6 +50,20 @@ def test_callback_for():
assert list(result) == ['fake item page']


@pytest.mark.asyncio
async def test_callback_for_async():
cb = callback_for(FakeItemPageAsync)
assert callable(cb)

fake_page = FakeItemPageAsync()
response = DummyResponse('http://example.com/')
result = cb(response=response, page=fake_page)

assert await result.__anext__() == 'fake item page'
with pytest.raises(StopAsyncIteration):
assert await result.__anext__()


def test_callback_for_instance_method():
spider = MySpider()
response = DummyResponse('http://example.com/')
Expand All @@ -47,12 +72,16 @@ def test_callback_for_instance_method():
assert list(result) == ['fake item page']


def test_callback_for_inline():
callback = callback_for(FakeItemPage)
@pytest.mark.asyncio
async def test_callback_for_instance_method_async():
spider = MySpiderAsync()
response = DummyResponse('http://example.com/')
fake_page = FakeItemPage()
result = callback(response, page=fake_page)
assert list(result) == ['fake item page']
fake_page = FakeItemPageAsync()
result = spider.parse_item(response, page=fake_page)

assert await result.__anext__() == 'fake item page'
with pytest.raises(StopAsyncIteration):
assert await result.__anext__()


def test_default_callback():
Expand Down Expand Up @@ -93,6 +122,18 @@ def test_inline_callback():
assert str(exc.value) == msg


def test_inline_callback_async():
"""Sample request with inline callback using async callback_for."""
spider = MySpiderAsync()
cb = callback_for(FakeItemPageAsync)
request = scrapy.Request('http://example.com/', callback=cb)
with pytest.raises(ValueError) as exc:
request_to_dict(request, spider)

msg = f'Function {cb} is not an instance method in: {spider}'
assert str(exc.value) == msg


def test_invalid_subclass():
"""Classes should inherit from ItemPage."""

Expand Down