scrapinghub · BurnzZ · Jun 1, 2022 · Jun 1, 2022 · Jun 1, 2022 · Jun 1, 2022
diff --git a/setup.py b/setup.py
@@ -25,6 +25,7 @@
         'url-matcher',
         'multidict',
         'w3lib >= 1.22.0',
+        'yarl',
     ],
     classifiers=[
         'Development Status :: 2 - Pre-Alpha',

diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py
@@ -6,6 +6,8 @@
 
 import parsel
 from web_poet.page_inputs import (
+    RequestUrl,
+    ResponseUrl,
     HttpRequest,
     HttpResponse,
     HttpRequestBody,
@@ -16,6 +18,22 @@
 )
 
 
+@pytest.mark.parametrize("cls", [RequestUrl, ResponseUrl])
+def test_url(cls):
+    url_value = "https://example.com/category/product?query=123&id=xyz#frag1"
+
+    url = cls(url_value)
+
+    assert str(url) == url_value
+    assert url.scheme == "https"
+    assert url.host == "example.com"
+    assert url.path == "/category/product"
+    assert url.query_string == "query=123&id=xyz"
+    assert url.fragment == "frag1"
+
+    new_url = cls(url)
+
+
 @pytest.mark.parametrize("body_cls", [HttpRequestBody, HttpResponseBody])
 def test_http_body_hashable(body_cls):
     http_body = body_cls(b"content")

diff --git a/web_poet/__init__.py b/web_poet/__init__.py
@@ -10,8 +10,8 @@
     HttpRequestBody,
     HttpResponseBody,
     Meta,
-    RequestURL,
-    ResponseURL,
+    RequestUrl,
+    ResponseUrl,
 )
 from .overrides import PageObjectRegistry, consume_modules, OverrideRule
 

diff --git a/web_poet/mixins.py b/web_poet/mixins.py
@@ -67,7 +67,7 @@ def base_url(self) -> str:
         # FIXME: move it to HttpResponse
         if self._cached_base_url is None:
             text = self.html[:4096]
-            self._cached_base_url = get_base_url(text, self.url)
+            self._cached_base_url = get_base_url(text, str(self.url))
         return self._cached_base_url
 
     def urljoin(self, url: str) -> str:

diff --git a/web_poet/page_inputs/__init__.py b/web_poet/page_inputs/__init__.py
@@ -7,7 +7,7 @@
     HttpResponseHeaders,
     HttpRequestBody,
     HttpResponseBody,
-    RequestURL,
-    ResponseURL
+    RequestUrl,
+    ResponseUrl
 )
 from .browser import BrowserHtml
diff --git a/web_poet/page_inputs/http.py b/web_poet/page_inputs/http.py
@@ -9,6 +9,7 @@
     http_content_type_encoding
 )
 
+import yarl
 from web_poet._base import _HttpHeaders
 from web_poet.utils import memoizemethod_noargs
 from web_poet.mixins import SelectableMixin
@@ -18,12 +19,46 @@
 _AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]
 
 
-class ResponseURL(str):
+class _Url:
+    def __init__(self, url: Union[str, yarl.URL]):
+        self._url = yarl.URL(str(url))
+
+    def __str__(self) -> str:
+        return str(self._url)
+
+    def __repr__(self) -> str:
+        return str(self._url)
+
+    def __eq__(self, other) -> bool:
+        return str(self._url) == str(other)
+
+    @property
+    def scheme(self) -> str:
+        return self._url.scheme
+
+    @property
+    def host(self) -> Optional[str]:
+        return self._url.host
+
+    @property
+    def path(self) -> str:
+        return self._url.path
+
+    @property
+    def query_string(self) -> str:
+        return self._url.query_string
+
+    @property
+    def fragment(self) -> str:
+        return self._url.fragment
+
+
+class ResponseUrl(_Url):
     """ URL of the response """
     pass
 
 
-class RequestURL(str):
+class RequestUrl(_Url):
     """ URL of the request """
     pass
 
@@ -162,7 +197,7 @@ class HttpRequest:
     **web-poet** like :class:`~.HttpClient`.
     """
 
-    url: RequestURL = attrs.field(converter=RequestURL)
+    url: RequestUrl = attrs.field(converter=RequestUrl)
     method: str = attrs.field(default="GET", kw_only=True)
     headers: HttpRequestHeaders = attrs.field(
         factory=HttpRequestHeaders, converter=HttpRequestHeaders, kw_only=True
@@ -195,7 +230,7 @@ class HttpResponse(SelectableMixin):
     is auto-detected from headers and body content.
     """
 
-    url: ResponseURL = attrs.field(converter=ResponseURL)
+    url: ResponseUrl = attrs.field(converter=ResponseUrl)
     body: HttpResponseBody = attrs.field(converter=HttpResponseBody)
     status: Optional[int] = attrs.field(default=None, kw_only=True)
     headers: HttpResponseHeaders = attrs.field(factory=HttpResponseHeaders,