Skip to content

Commit 74ab3a8

Browse files
authored
Merge pull request #173 from Gallaecio/request-providers
Add HttpRequestProvider
2 parents aba2b74 + 644e5be commit 74ab3a8

File tree

4 files changed

+67
-3
lines changed

4 files changed

+67
-3
lines changed

scrapy_poet/page_input_providers.py

+25
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from scrapy.utils.defer import maybe_deferred_to_future
2323
from web_poet import (
2424
HttpClient,
25+
HttpRequest,
26+
HttpRequestHeaders,
2527
HttpResponse,
2628
HttpResponseHeaders,
2729
PageParams,
@@ -144,6 +146,29 @@ def __init__(self, injector):
144146
# injection breaks the method overriding rules and mypy then complains.
145147

146148

149+
class HttpRequestProvider(PageObjectInputProvider):
150+
"""This class provides :class:`web_poet.HttpRequest
151+
<web_poet.page_inputs.http.HttpRequest>` instances.
152+
"""
153+
154+
provided_classes = {HttpRequest}
155+
name = "request_data"
156+
157+
def __call__(self, to_provide: Set[Callable], request: Request):
158+
"""Builds a :class:`web_poet.HttpRequest
159+
<web_poet.page_inputs.http.HttpRequest>` instance using a
160+
:class:`scrapy.http.Request` instance.
161+
"""
162+
return [
163+
HttpRequest(
164+
url=RequestUrl(request.url),
165+
method=request.method,
166+
headers=HttpRequestHeaders.from_bytes_dict(request.headers),
167+
body=request.body,
168+
)
169+
]
170+
171+
147172
class HttpResponseProvider(PageObjectInputProvider):
148173
"""This class provides :class:`web_poet.HttpResponse
149174
<web_poet.page_inputs.http.HttpResponse>` instances.

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
"time_machine >= 2.2.0",
3030
"twisted >= 18.9.0",
3131
"url-matcher >= 0.2.0",
32-
"web-poet >= 0.15",
32+
"web-poet >= 0.15.1",
3333
],
3434
classifiers=[
3535
"Development Status :: 3 - Alpha",

tests/test_providers.py

+40-1
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,21 @@
88
from scrapy.settings import Settings
99
from scrapy.utils.test import get_crawler
1010
from twisted.python.failure import Failure
11-
from web_poet import HttpClient, HttpResponse
11+
from web_poet import (
12+
HttpClient,
13+
HttpRequest,
14+
HttpRequestBody,
15+
HttpRequestHeaders,
16+
HttpResponse,
17+
RequestUrl,
18+
)
1219
from web_poet.serialization import SerializedLeafData, register_serialization
1320

1421
from scrapy_poet import HttpResponseProvider
1522
from scrapy_poet.injection import Injector
1623
from scrapy_poet.page_input_providers import (
1724
HttpClientProvider,
25+
HttpRequestProvider,
1826
ItemProvider,
1927
PageObjectInputProvider,
2028
PageParamsProvider,
@@ -204,6 +212,37 @@ async def test_http_client_provider(settings):
204212
assert results[0]._request_downloader == mock_factory.return_value
205213

206214

215+
@ensureDeferred
216+
async def test_http_request_provider(settings):
217+
crawler = get_crawler(Spider, settings)
218+
injector = Injector(crawler)
219+
provider = HttpRequestProvider(injector)
220+
221+
empty_scrapy_request = scrapy.http.Request("https://example.com")
222+
(empty_request,) = provider(set(), empty_scrapy_request)
223+
assert isinstance(empty_request, HttpRequest)
224+
assert isinstance(empty_request.url, RequestUrl)
225+
assert str(empty_request.url) == "https://example.com"
226+
assert empty_request.method == "GET"
227+
assert isinstance(empty_request.headers, HttpRequestHeaders)
228+
assert empty_request.headers == HttpRequestHeaders()
229+
assert isinstance(empty_request.body, HttpRequestBody)
230+
assert empty_request.body == HttpRequestBody()
231+
232+
full_scrapy_request = scrapy.http.Request(
233+
"https://example.com", method="POST", body=b"a", headers={"a": "b"}
234+
)
235+
(full_request,) = provider(set(), full_scrapy_request)
236+
assert isinstance(full_request, HttpRequest)
237+
assert isinstance(full_request.url, RequestUrl)
238+
assert str(full_request.url) == "https://example.com"
239+
assert full_request.method == "POST"
240+
assert isinstance(full_request.headers, HttpRequestHeaders)
241+
assert full_request.headers == HttpRequestHeaders([("a", "b")])
242+
assert isinstance(full_request.body, HttpRequestBody)
243+
assert full_request.body == HttpRequestBody(b"a")
244+
245+
207246
def test_page_params_provider(settings):
208247
crawler = get_crawler(Spider, settings)
209248
injector = Injector(crawler)

tox.ini

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ deps =
2323
sqlitedict==1.5.0
2424
time_machine==2.2.0
2525
url-matcher==0.2.0
26-
web-poet==0.15.0
26+
web-poet==0.15.1
2727

2828
# https://github.com/john-kurkowski/tldextract/issues/305
2929
tldextract<3.6

0 commit comments

Comments
 (0)