From 2fd79d06987dea641cf88f9dec1519345d5e92fd Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Fri, 3 Jun 2022 00:03:43 +0500 Subject: [PATCH] basic w3lib-based URL implementation --- tests/test_page_inputs.py | 27 ----------------------- tests/test_url.py | 45 +++++++++++++++++++++++++++++++++++++++ web_poet/_base.py | 18 +++++++++++++++- 3 files changed, 62 insertions(+), 28 deletions(-) create mode 100644 tests/test_url.py diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py index 57be5b64..ab346161 100644 --- a/tests/test_page_inputs.py +++ b/tests/test_page_inputs.py @@ -446,30 +446,3 @@ def test_browser_html(): assert html.css("p::text").getall() == ["Hello, ", "world!"] assert isinstance(html.selector, parsel.Selector) - -def test_url_base_class(): - url_str = "http://example.com" - url = _Url(url_str) - assert str(url) == url_str - assert repr(url) == "_Url('http://example.com')" - - with pytest.raises(TypeError): - _Url(123) - - -def test_url_subclass(): - url_str = "http://example.com" - - class MyUrl(_Url): - pass - - class MyUrl2(_Url): - pass - - url = MyUrl(url_str) - assert str(url) == url_str - assert url._url == url_str - assert repr(url) == "MyUrl('http://example.com')" - - url2 = MyUrl2(url) - assert str(url2) == str(url) diff --git a/tests/test_url.py b/tests/test_url.py new file mode 100644 index 00000000..7b69fa8b --- /dev/null +++ b/tests/test_url.py @@ -0,0 +1,45 @@ +import pytest + +from web_poet._base import _Url + + +def test_url_base_class(): + url_str = "http://example.com" + url = _Url(url_str) + assert str(url) == url_str + assert repr(url) == "_Url('http://example.com')" + + +def test_url_init_validation(): + with pytest.raises(TypeError): + _Url(123) + + +def test_url_subclasses(): + url_str = "http://example.com" + + class MyUrl(_Url): + pass + + class MyUrl2(_Url): + pass + + url = MyUrl(url_str) + assert str(url) == url_str + assert url._url == url_str + assert repr(url) == "MyUrl('http://example.com')" + + url2 = MyUrl2(url) + assert str(url2) == str(url) + + +def test_urljoin(): + url = _Url("http://example.com/foo/bar?x=y#fragment") + assert str(url.join("baz")) == "http://example.com/foo/baz" + assert str(url / "baz") == "http://example.com/foo/baz" + + +def test_update_query(): + url = _Url("http://example.com/foo/bar?x=y#fragment") + assert str(url % {"foo": "bar"}) == "http://example.com/foo/bar?x=y&foo=bar#fragment" + assert str(url % {"x": "z"}) == "http://example.com/foo/bar?x=z#fragment" \ No newline at end of file diff --git a/web_poet/_base.py b/web_poet/_base.py index 53382896..d88fab80 100644 --- a/web_poet/_base.py +++ b/web_poet/_base.py @@ -3,12 +3,14 @@ In general, users shouldn't import and use the contents of this module. """ - +from urllib.parse import urljoin from typing import Type, TypeVar, List, Dict, Union from multidict import CIMultiDict +from w3lib.url import add_or_replace_parameters T_headers = TypeVar("T_headers", bound="_HttpHeaders") +T_url = TypeVar("T_url", bound="_Url") class _HttpHeaders(CIMultiDict): @@ -43,8 +45,22 @@ def __init__(self, url: Union[str, '_Url']): f"got {url.__class__} instance instead") self._url = str(url) + def join(self: T_url, other: Union[str, '_Url']) -> T_url: + return self.__class__(urljoin(self._url, str(other))) + + def update_query(self: T_url, + new_parameters: Dict[str, str]) -> T_url: + new_url = add_or_replace_parameters(self._url, + new_parameters=new_parameters) + return self.__class__(new_url) + def __str__(self) -> str: return self._url def __repr__(self) -> str: return f"{self.__class__.__name__}({self._url!r})" + + def __mod__(self: T_url, other: Dict[str, str]) -> T_url: + return self.update_query(other) + + __truediv__ = join