From 2dc1f0cd44ac7ca3fa05a97d35f35b0bd5ca3269 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Fri, 14 Jun 2024 16:49:03 -0400 Subject: [PATCH 01/29] Add a few annotations. - Add a typing configuration file with pyright set to strict. - Add annotations for normalizers.py and validators.py. --- pyproject.toml | 10 ++++++ src/rfc3986/normalizers.py | 34 ++++++++++++-------- src/rfc3986/validators.py | 64 ++++++++++++++++++++++---------------- 3 files changed, 68 insertions(+), 40 deletions(-) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7cb2895 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[tool.pyright] +include = ["src/rfc3986"] +ignore = ["tests"] +pythonVersion = "3.8" +typeCheckingMode = "strict" + +reportPrivateUsage = "none" +reportImportCycles = "warning" +reportPropertyTypeMismatch = "warning" +reportUnnecessaryTypeIgnoreComment = "warning" diff --git a/src/rfc3986/normalizers.py b/src/rfc3986/normalizers.py index c989201..827eba4 100644 --- a/src/rfc3986/normalizers.py +++ b/src/rfc3986/normalizers.py @@ -13,18 +13,21 @@ # limitations under the License. """Module with functions to normalize components.""" import re +import typing as t from urllib.parse import quote as urlquote from . import compat from . import misc -def normalize_scheme(scheme): +def normalize_scheme(scheme: str) -> str: """Normalize the scheme component.""" return scheme.lower() -def normalize_authority(authority): +def normalize_authority( + authority: t.Tuple[t.Optional[str], t.Optional[str], t.Optional[str]], +) -> str: """Normalize an authority tuple to a string.""" userinfo, host, port = authority result = "" @@ -37,17 +40,17 @@ def normalize_authority(authority): return result -def normalize_username(username): +def normalize_username(username: str) -> str: """Normalize a username to make it safe to include in userinfo.""" return urlquote(username) -def normalize_password(password): +def normalize_password(password: str) -> str: """Normalize a password to make safe for userinfo.""" return urlquote(password) -def normalize_host(host): +def normalize_host(host: str) -> str: """Normalize a host string.""" if misc.IPv6_MATCHER.match(host): percent = host.find("%") @@ -70,7 +73,7 @@ def normalize_host(host): return host.lower() -def normalize_path(path): +def normalize_path(path: str) -> str: """Normalize the path string.""" if not path: return path @@ -79,14 +82,14 @@ def normalize_path(path): return remove_dot_segments(path) -def normalize_query(query): +def normalize_query(query: str) -> str: """Normalize the query string.""" if not query: return query return normalize_percent_characters(query) -def normalize_fragment(fragment): +def normalize_fragment(fragment: str) -> str: """Normalize the fragment string.""" if not fragment: return fragment @@ -96,7 +99,7 @@ def normalize_fragment(fragment): PERCENT_MATCHER = re.compile("%[A-Fa-f0-9]{2}") -def normalize_percent_characters(s): +def normalize_percent_characters(s: str) -> str: """All percent characters should be upper-cased. For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``. @@ -108,14 +111,14 @@ def normalize_percent_characters(s): return s -def remove_dot_segments(s): +def remove_dot_segments(s: str) -> str: """Remove dot segments from the string. See also Section 5.2.4 of :rfc:`3986`. """ # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code segments = s.split("/") # Turn the path into a list of segments - output = [] # Initialize the variable to use to store output + output: list[str] = [] # Initialize the variable to use to store output for segment in segments: # '.' is the current directory, so ignore it, it is superfluous @@ -141,8 +144,13 @@ def remove_dot_segments(s): return "/".join(output) - -def encode_component(uri_component, encoding): +@t.overload +def encode_component(uri_component: None, encoding: str) -> None: + ... +@t.overload +def encode_component(uri_component: str, encoding: str) -> str: + ... +def encode_component(uri_component: t.Optional[str], encoding: str) -> t.Optional[str]: """Encode the specific component in the provided encoding.""" if uri_component is None: return uri_component diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index 21e6eb9..454cc7a 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -12,9 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. """Module containing the validation logic for rfc3986.""" +import typing as t + from . import exceptions from . import misc from . import normalizers +from . import uri class Validator: @@ -50,9 +53,9 @@ class Validator: def __init__(self): """Initialize our default validations.""" - self.allowed_schemes = set() - self.allowed_hosts = set() - self.allowed_ports = set() + self.allowed_schemes: set[str] = set() + self.allowed_hosts: set[str] = set() + self.allowed_ports: set[str] = set() self.allow_password = True self.required_components = { "scheme": False, @@ -65,7 +68,7 @@ def __init__(self): } self.validated_components = self.required_components.copy() - def allow_schemes(self, *schemes): + def allow_schemes(self, *schemes: str): """Require the scheme to be one of the provided schemes. .. versionadded:: 1.0 @@ -81,7 +84,7 @@ def allow_schemes(self, *schemes): self.allowed_schemes.add(normalizers.normalize_scheme(scheme)) return self - def allow_hosts(self, *hosts): + def allow_hosts(self, *hosts: str): """Require the host to be one of the provided hosts. .. versionadded:: 1.0 @@ -97,7 +100,7 @@ def allow_hosts(self, *hosts): self.allowed_hosts.add(normalizers.normalize_host(host)) return self - def allow_ports(self, *ports): + def allow_ports(self, *ports: str): """Require the port to be one of the provided ports. .. versionadded:: 1.0 @@ -141,7 +144,7 @@ def forbid_use_of_password(self): self.allow_password = False return self - def check_validity_of(self, *components): + def check_validity_of(self, *components: str): """Check the validity of the components provided. This can be specified repeatedly. @@ -155,7 +158,7 @@ def check_validity_of(self, *components): :rtype: Validator """ - components = [c.lower() for c in components] + components = tuple(c.lower() for c in components) for component in components: if component not in self.COMPONENT_NAMES: raise ValueError(f'"{component}" is not a valid component') @@ -164,7 +167,7 @@ def check_validity_of(self, *components): ) return self - def require_presence_of(self, *components): + def require_presence_of(self, *components: str): """Require the components provided. This can be specified repeatedly. @@ -178,7 +181,7 @@ def require_presence_of(self, *components): :rtype: Validator """ - components = [c.lower() for c in components] + components = tuple(c.lower() for c in components) for component in components: if component not in self.COMPONENT_NAMES: raise ValueError(f'"{component}" is not a valid component') @@ -187,7 +190,7 @@ def require_presence_of(self, *components): ) return self - def validate(self, uri): + def validate(self, uri: "uri.URIReference"): """Check a URI for conditions specified on this validator. .. versionadded:: 1.0 @@ -229,7 +232,7 @@ def validate(self, uri): ensure_one_of(self.allowed_ports, uri, "port") -def check_password(uri): +def check_password(uri: "uri.URIReference") -> None: """Assert that there is no password present in the uri.""" userinfo = uri.userinfo if not userinfo: @@ -240,7 +243,11 @@ def check_password(uri): raise exceptions.PasswordForbidden(uri) -def ensure_one_of(allowed_values, uri, attribute): +def ensure_one_of( + allowed_values: t.Container[object], + uri: "uri.URIReference", + attribute: str, +) -> None: """Assert that the uri's attribute is one of the allowed values.""" value = getattr(uri, attribute) if value is not None and allowed_values and value not in allowed_values: @@ -251,7 +258,10 @@ def ensure_one_of(allowed_values, uri, attribute): ) -def ensure_required_components_exist(uri, required_components): +def ensure_required_components_exist( + uri: "uri.URIReference", + required_components: t.Iterable[str], +): """Assert that all required components are present in the URI.""" missing_components = sorted( component @@ -262,7 +272,7 @@ def ensure_required_components_exist(uri, required_components): raise exceptions.MissingComponentError(uri, *missing_components) -def is_valid(value, matcher, require): +def is_valid(value: t.Optional[str], matcher: t.Pattern[str], require: bool) -> bool: """Determine if a value is valid based on the provided matcher. :param str value: @@ -273,13 +283,13 @@ def is_valid(value, matcher, require): Whether or not the value is required. """ if require: - return value is not None and matcher.match(value) + return value is not None and bool(matcher.match(value)) # require is False and value is not None - return value is None or matcher.match(value) + return value is None or bool(matcher.match(value)) -def authority_is_valid(authority, host=None, require=False): +def authority_is_valid(authority: str, host: t.Optional[str] = None, require: bool = False) -> bool: """Determine if the authority string is valid. :param str authority: @@ -299,7 +309,7 @@ def authority_is_valid(authority, host=None, require=False): return validated -def host_is_valid(host, require=False): +def host_is_valid(host: t.Optional[str], require: bool = False) -> bool: """Determine if the host string is valid. :param str host: @@ -319,7 +329,7 @@ def host_is_valid(host, require=False): return validated -def scheme_is_valid(scheme, require=False): +def scheme_is_valid(scheme: t.Optional[str], require: bool = False) -> bool: """Determine if the scheme is valid. :param str scheme: @@ -334,7 +344,7 @@ def scheme_is_valid(scheme, require=False): return is_valid(scheme, misc.SCHEME_MATCHER, require) -def path_is_valid(path, require=False): +def path_is_valid(path: t.Optional[str], require: bool = False) -> bool: """Determine if the path component is valid. :param str path: @@ -349,7 +359,7 @@ def path_is_valid(path, require=False): return is_valid(path, misc.PATH_MATCHER, require) -def query_is_valid(query, require=False): +def query_is_valid(query: t.Optional[str], require: bool = False) -> bool: """Determine if the query component is valid. :param str query: @@ -364,7 +374,7 @@ def query_is_valid(query, require=False): return is_valid(query, misc.QUERY_MATCHER, require) -def fragment_is_valid(fragment, require=False): +def fragment_is_valid(fragment: t.Optional[str], require: bool = False) -> bool: """Determine if the fragment component is valid. :param str fragment: @@ -379,7 +389,7 @@ def fragment_is_valid(fragment, require=False): return is_valid(fragment, misc.FRAGMENT_MATCHER, require) -def valid_ipv4_host_address(host): +def valid_ipv4_host_address(host: str) -> bool: """Determine if the given host is a valid IPv4 address.""" # If the host exists, and it might be IPv4, check each byte in the # address. @@ -396,7 +406,7 @@ def valid_ipv4_host_address(host): _SUBAUTHORITY_VALIDATORS = {"userinfo", "host", "port"} -def subauthority_component_is_valid(uri, component): +def subauthority_component_is_valid(uri: "uri.URIReference", component: str) -> bool: """Determine if the userinfo, host, and port are valid.""" try: subauthority_dict = uri.authority_info() @@ -420,9 +430,9 @@ def subauthority_component_is_valid(uri, component): return 0 <= port <= 65535 -def ensure_components_are_valid(uri, validated_components): +def ensure_components_are_valid(uri: "uri.URIReference", validated_components: t.List[str]) -> None: """Assert that all components are valid in the URI.""" - invalid_components = set() + invalid_components: set[str] = set() for component in validated_components: if component in _SUBAUTHORITY_VALIDATORS: if not subauthority_component_is_valid(uri, component): From ae63851b23cd4e379d1552c8fc1a60939b9e4469 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 14 Jun 2024 21:02:04 +0000 Subject: [PATCH 02/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/rfc3986/normalizers.py | 9 ++++++++- src/rfc3986/validators.py | 20 +++++++++++++++----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/rfc3986/normalizers.py b/src/rfc3986/normalizers.py index 827eba4..8f45ec3 100644 --- a/src/rfc3986/normalizers.py +++ b/src/rfc3986/normalizers.py @@ -144,13 +144,20 @@ def remove_dot_segments(s: str) -> str: return "/".join(output) + @t.overload def encode_component(uri_component: None, encoding: str) -> None: ... + + @t.overload def encode_component(uri_component: str, encoding: str) -> str: ... -def encode_component(uri_component: t.Optional[str], encoding: str) -> t.Optional[str]: + + +def encode_component( + uri_component: t.Optional[str], encoding: str +) -> t.Optional[str]: """Encode the specific component in the provided encoding.""" if uri_component is None: return uri_component diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index 454cc7a..13031e3 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -272,7 +272,9 @@ def ensure_required_components_exist( raise exceptions.MissingComponentError(uri, *missing_components) -def is_valid(value: t.Optional[str], matcher: t.Pattern[str], require: bool) -> bool: +def is_valid( + value: t.Optional[str], matcher: t.Pattern[str], require: bool +) -> bool: """Determine if a value is valid based on the provided matcher. :param str value: @@ -289,7 +291,9 @@ def is_valid(value: t.Optional[str], matcher: t.Pattern[str], require: bool) -> return value is None or bool(matcher.match(value)) -def authority_is_valid(authority: str, host: t.Optional[str] = None, require: bool = False) -> bool: +def authority_is_valid( + authority: str, host: t.Optional[str] = None, require: bool = False +) -> bool: """Determine if the authority string is valid. :param str authority: @@ -374,7 +378,9 @@ def query_is_valid(query: t.Optional[str], require: bool = False) -> bool: return is_valid(query, misc.QUERY_MATCHER, require) -def fragment_is_valid(fragment: t.Optional[str], require: bool = False) -> bool: +def fragment_is_valid( + fragment: t.Optional[str], require: bool = False +) -> bool: """Determine if the fragment component is valid. :param str fragment: @@ -406,7 +412,9 @@ def valid_ipv4_host_address(host: str) -> bool: _SUBAUTHORITY_VALIDATORS = {"userinfo", "host", "port"} -def subauthority_component_is_valid(uri: "uri.URIReference", component: str) -> bool: +def subauthority_component_is_valid( + uri: "uri.URIReference", component: str +) -> bool: """Determine if the userinfo, host, and port are valid.""" try: subauthority_dict = uri.authority_info() @@ -430,7 +438,9 @@ def subauthority_component_is_valid(uri: "uri.URIReference", component: str) -> return 0 <= port <= 65535 -def ensure_components_are_valid(uri: "uri.URIReference", validated_components: t.List[str]) -> None: +def ensure_components_are_valid( + uri: "uri.URIReference", validated_components: t.List[str] +) -> None: """Assert that all components are valid in the URI.""" invalid_components: set[str] = set() for component in validated_components: From c28dc0a4f85f3b2eab29037a7a3665634241d624 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Fri, 14 Jun 2024 17:20:46 -0400 Subject: [PATCH 03/29] Add annotations to api.py. --- src/rfc3986/api.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/rfc3986/api.py b/src/rfc3986/api.py index e8a54dd..08db239 100644 --- a/src/rfc3986/api.py +++ b/src/rfc3986/api.py @@ -22,7 +22,7 @@ from .uri import URIReference -def uri_reference(uri, encoding="utf-8"): +def uri_reference(uri: str, encoding: str = "utf-8") -> URIReference: """Parse a URI string into a URIReference. This is a convenience function. You could achieve the same end by using @@ -36,7 +36,7 @@ def uri_reference(uri, encoding="utf-8"): return URIReference.from_string(uri, encoding) -def iri_reference(iri, encoding="utf-8"): +def iri_reference(iri: str, encoding: str= "utf-8") -> IRIReference: """Parse a IRI string into an IRIReference. This is a convenience function. You could achieve the same end by using @@ -50,7 +50,7 @@ def iri_reference(iri, encoding="utf-8"): return IRIReference.from_string(iri, encoding) -def is_valid_uri(uri, encoding="utf-8", **kwargs): +def is_valid_uri(uri: str, encoding: str = "utf-8", **kwargs: bool) -> bool: """Determine if the URI given is valid. This is a convenience function. You could use either @@ -75,7 +75,7 @@ def is_valid_uri(uri, encoding="utf-8", **kwargs): return URIReference.from_string(uri, encoding).is_valid(**kwargs) -def normalize_uri(uri, encoding="utf-8"): +def normalize_uri(uri: str, encoding: str = "utf-8") -> str: """Normalize the given URI. This is a convenience function. You could use either @@ -91,7 +91,7 @@ def normalize_uri(uri, encoding="utf-8"): return normalized_reference.unsplit() -def urlparse(uri, encoding="utf-8"): +def urlparse(uri: str, encoding: str = "utf-8") -> ParseResult: """Parse a given URI and return a ParseResult. This is a partial replacement of the standard library's urlparse function. From 602934cae711c31fb5f4327fc346b11dbd36f8aa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 14 Jun 2024 21:21:15 +0000 Subject: [PATCH 04/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/rfc3986/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rfc3986/api.py b/src/rfc3986/api.py index 08db239..0c4ca18 100644 --- a/src/rfc3986/api.py +++ b/src/rfc3986/api.py @@ -36,7 +36,7 @@ def uri_reference(uri: str, encoding: str = "utf-8") -> URIReference: return URIReference.from_string(uri, encoding) -def iri_reference(iri: str, encoding: str= "utf-8") -> IRIReference: +def iri_reference(iri: str, encoding: str = "utf-8") -> IRIReference: """Parse a IRI string into an IRIReference. This is a convenience function. You could achieve the same end by using From be2828347deb29e75d7bcaf3c93d7cc0ef09ec0f Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Fri, 14 Jun 2024 18:23:36 -0400 Subject: [PATCH 05/29] Preliminary, but added minimalistic typing check to tox and CI, as well as a py.typed file. - The py.typed file is very early, but it's necessary to verify type completeness via `pyright --verifytypes`. Seems like the right kind of typing check, but that's assuming pyright and not mypy is ultimately used. - Reference: https://microsoft.github.io/pyright/#/typed-libraries?id=verifying-type-completeness - Add typing testenv to tox.ini, which will run `pyright --verifytypes rfc3986`. - Add one more matrix slot to the GitHub workflow to run the above typing check in CI on the lowest supported version of python on Ubuntu. - I only added it for Ubuntu because this package and its dependencies are pure python, so the types shouldn't change between operating systems. --- .github/workflows/main.yml | 4 ++++ src/rfc3986/py.typed | 0 tox.ini | 7 ++++++- 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 src/rfc3986/py.typed diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 20d21a3..29631e9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,6 +19,10 @@ jobs: - os: windows-latest python: '3.12' toxenv: py + # typing + - os: ubuntu-latest + python: '3.8' + toxenv: typing # misc - os: ubuntu-latest python: '3.12' diff --git a/src/rfc3986/py.typed b/src/rfc3986/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tox.ini b/tox.ini index 688c016..539879a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{37,38,39,310,311,312},lint +envlist = py{37,38,39,310,311,312},lint,typing [testenv] pip_pre = False @@ -39,6 +39,11 @@ deps = flake8-import-order commands = flake8 {posargs} src/rfc3986 +[testenv:typing] +deps = + pyright +commands = pyright {posargs:--verifytypes rfc3986} + [testenv:venv] commands = {posargs} From 9df0355670ec6783cc65bef0ba658e03ccabb8a8 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Fri, 14 Jun 2024 21:24:41 -0400 Subject: [PATCH 06/29] Add parameter annotations to builder.py. - TODO: Consider changing the insides of the URIBuilder.add_* methods to use `type(self)(...)` instead of `URIBuilder(...)`. That way, subclassing is supported. Would result in the return annotations being Self as well. - Added trailing commas to function parameters in normalizers.py and validators.py for functions with multi-line signatures. --- src/rfc3986/builder.py | 35 ++++++++++++++++++----------------- src/rfc3986/normalizers.py | 3 ++- src/rfc3986/validators.py | 19 +++++++++++++------ 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index 2826b74..0af3880 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Module containing the logic for the URIBuilder object.""" +import typing as t from urllib.parse import parse_qsl from urllib.parse import urlencode @@ -33,13 +34,13 @@ class URIBuilder: def __init__( self, - scheme=None, - userinfo=None, - host=None, - port=None, - path=None, - query=None, - fragment=None, + scheme: t.Optional[str] = None, + userinfo: t.Optional[str] = None, + host: t.Optional[str] = None, + port: t.Optional[str] = None, + path: t.Optional[str] = None, + query: t.Optional[str] = None, + fragment: t.Optional[str] = None, ): """Initialize our URI builder. @@ -76,7 +77,7 @@ def __repr__(self): return formatstr.format(b=self) @classmethod - def from_uri(cls, reference): + def from_uri(cls, reference: t.Union["uri.URIReference", str]): """Initialize the URI builder from another URI. Takes the given URI reference and creates a new URI builder instance @@ -95,7 +96,7 @@ def from_uri(cls, reference): fragment=reference.fragment, ) - def add_scheme(self, scheme): + def add_scheme(self, scheme: str): """Add a scheme to our builder object. After normalizing, this will generate a new URIBuilder instance with @@ -119,7 +120,7 @@ def add_scheme(self, scheme): fragment=self.fragment, ) - def add_credentials(self, username, password): + def add_credentials(self, username: str, password: t.Optional[str]): """Add credentials as the userinfo portion of the URI. .. code-block:: python @@ -152,7 +153,7 @@ def add_credentials(self, username, password): fragment=self.fragment, ) - def add_host(self, host): + def add_host(self, host: str): """Add hostname to the URI. .. code-block:: python @@ -172,7 +173,7 @@ def add_host(self, host): fragment=self.fragment, ) - def add_port(self, port): + def add_port(self, port: t.Union[str, int]): """Add port to the URI. .. code-block:: python @@ -211,7 +212,7 @@ def add_port(self, port): fragment=self.fragment, ) - def add_path(self, path): + def add_path(self, path: str): """Add a path to the URI. .. code-block:: python @@ -238,7 +239,7 @@ def add_path(self, path): fragment=self.fragment, ) - def extend_path(self, path): + def extend_path(self, path: str): """Extend the existing path value with the provided value. .. versionadded:: 1.5.0 @@ -314,7 +315,7 @@ def extend_query_with(self, query_items): return self.add_query_from(original_query_items + query_items) - def add_query(self, query): + def add_query(self, query: str): """Add a pre-formated query string to the URI. .. code-block:: python @@ -334,7 +335,7 @@ def add_query(self, query): fragment=self.fragment, ) - def add_fragment(self, fragment): + def add_fragment(self, fragment: str): """Add a fragment to the URI. .. code-block:: python @@ -354,7 +355,7 @@ def add_fragment(self, fragment): fragment=normalizers.normalize_fragment(fragment), ) - def finalize(self): + def finalize(self) -> "uri.URIReference": """Create a URIReference from our builder. .. code-block:: python diff --git a/src/rfc3986/normalizers.py b/src/rfc3986/normalizers.py index 8f45ec3..4e3c91e 100644 --- a/src/rfc3986/normalizers.py +++ b/src/rfc3986/normalizers.py @@ -156,7 +156,8 @@ def encode_component(uri_component: str, encoding: str) -> str: def encode_component( - uri_component: t.Optional[str], encoding: str + uri_component: t.Optional[str], + encoding: str, ) -> t.Optional[str]: """Encode the specific component in the provided encoding.""" if uri_component is None: diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index 13031e3..d999ad2 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -273,7 +273,9 @@ def ensure_required_components_exist( def is_valid( - value: t.Optional[str], matcher: t.Pattern[str], require: bool + value: t.Optional[str], + matcher: t.Pattern[str], + require: bool, ) -> bool: """Determine if a value is valid based on the provided matcher. @@ -292,7 +294,9 @@ def is_valid( def authority_is_valid( - authority: str, host: t.Optional[str] = None, require: bool = False + authority: str, + host: t.Optional[str] = None, + require: bool = False, ) -> bool: """Determine if the authority string is valid. @@ -379,7 +383,8 @@ def query_is_valid(query: t.Optional[str], require: bool = False) -> bool: def fragment_is_valid( - fragment: t.Optional[str], require: bool = False + fragment: t.Optional[str], + require: bool = False, ) -> bool: """Determine if the fragment component is valid. @@ -413,7 +418,8 @@ def valid_ipv4_host_address(host: str) -> bool: def subauthority_component_is_valid( - uri: "uri.URIReference", component: str + uri: "uri.URIReference", + component: str, ) -> bool: """Determine if the userinfo, host, and port are valid.""" try: @@ -429,7 +435,7 @@ def subauthority_component_is_valid( return True try: - port = int(subauthority_dict["port"]) + port = int(subauthority_dict["port"]) # pyright: ignore[reportArgumentType] # Guarded by "except TypeError". except TypeError: # If the port wasn't provided it'll be None and int(None) raises a # TypeError @@ -439,7 +445,8 @@ def subauthority_component_is_valid( def ensure_components_are_valid( - uri: "uri.URIReference", validated_components: t.List[str] + uri: "uri.URIReference", + validated_components: t.List[str], ) -> None: """Assert that all components are valid in the URI.""" invalid_components: set[str] = set() From 6c3d10907282f016bcc1ece45e5a178b2e8f4d68 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Fri, 14 Jun 2024 21:40:30 -0400 Subject: [PATCH 07/29] Add type annotations to `compat.to_str()` and `compat.to_bytes()`. - I'm not sure if these functions are truly necessary if the library no longer supports Python 2, but the use of it with `encoding` is prevalant enough around the library that it seems worth typing. - The overloads are necessary to account for None being a passthrough value. - Two cases of this are `ParseResult.copy_with()` and `ParseResultBytes.copy_with()`. The `attrs_dict` dictionary in those methods is allowed to have None, and None is allowed for all of the component parameters (from what I can tell). Thus, whether intentional or not, `compat.to_(bytes|str)()`'s ability to let values pass through without modification, so long as they aren't bytes, is depended upon. --- src/rfc3986/compat.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/rfc3986/compat.py b/src/rfc3986/compat.py index 9c1dca9..5235ccf 100644 --- a/src/rfc3986/compat.py +++ b/src/rfc3986/compat.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Compatibility module for Python 2 and 3 support.""" +import typing as t __all__ = ( "to_bytes", @@ -19,14 +20,40 @@ ) -def to_str(b, encoding="utf-8"): +@t.overload +def to_str(b: t.Union[str, bytes], encoding: str = "utf-8") -> str: + ... + + +@t.overload +def to_str(b: None, encoding: str = "utf-8") -> None: + ... + + +def to_str( + b: t.Optional[t.Union[str, bytes]], + encoding: str = "utf-8", +) -> t.Optional[str]: """Ensure that b is text in the specified encoding.""" if hasattr(b, "decode") and not isinstance(b, str): b = b.decode(encoding) return b -def to_bytes(s, encoding="utf-8"): +@t.overload +def to_bytes(s: t.Union[str, bytes], encoding: str = "utf-8") -> bytes: + ... + + +@t.overload +def to_bytes(s: None, encoding: str = "utf-8") -> None: + ... + + +def to_bytes( + s: t.Optional[t.Union[str, bytes]], + encoding: str = "utf-8", +) -> t.Optional[bytes]: """Ensure that s is converted to bytes from the encoding.""" if hasattr(s, "encode") and not isinstance(s, bytes): s = s.encode(encoding) From a7622de9c2f123af272098b5780c2da79703f969 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Fri, 14 Jun 2024 21:50:21 -0400 Subject: [PATCH 08/29] Finish annotating `query_items` parameters in builder.py. - These are passed into `urllib.parse.urlencode()`, so the annotation for that was copied from typeshed and modified slightly. - `_QueryType` in typeshed uses `typing.Sequence` to account for different types of sequence values being passed in, but `URLBuilder.extend_query_with()` in builder.py uses `isinstance(query_items, list)` as flow control, so Sequence is too wide to account for what that method allows. Thus, the type was modified to use `typing.List` in place of `typing.Sequence` where necessary. - Arguably, that isinstance check should be changed to check against `Sequence` instead, but I'd prefer having that double-checked, and this PR's scope is currently mostly limited to annotations anyway. This can be revisited later if necessary. - TODO: Ask if `username is None` check is still needed in `URLBuilder.add_credentials()` if the `username` parameter is annotated with `str`. --- src/rfc3986/builder.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index 0af3880..a5aa107 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -20,6 +20,14 @@ from . import uri from . import uri_reference +# Copied from urllib.parse in typeshed. +_QueryType = t.Union[ + t.Mapping[t.Any, t.Any], + t.Mapping[t.Any, t.Sequence[t.Any]], + t.List[t.Tuple[t.Any, t.Any]], + t.List[t.Tuple[t.Any, t.Sequence[t.Any]]], +] + class URIBuilder: """Object to aid in building up a URI Reference from parts. @@ -268,7 +276,7 @@ def extend_path(self, path: str): return self.add_path(path) - def add_query_from(self, query_items): + def add_query_from(self, query_items: _QueryType): """Generate and add a query a dictionary or list of tuples. .. code-block:: python @@ -294,7 +302,7 @@ def add_query_from(self, query_items): fragment=self.fragment, ) - def extend_query_with(self, query_items): + def extend_query_with(self, query_items: _QueryType): """Extend the existing query string with the new query items. .. versionadded:: 1.5.0 From 8cff806beb78a0b3704b22c9c369b557d2dad835 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 15 Jun 2024 01:50:49 +0000 Subject: [PATCH 09/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/rfc3986/validators.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index d999ad2..499b9a7 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -435,7 +435,9 @@ def subauthority_component_is_valid( return True try: - port = int(subauthority_dict["port"]) # pyright: ignore[reportArgumentType] # Guarded by "except TypeError". + port = int( + subauthority_dict["port"] + ) # pyright: ignore[reportArgumentType] # Guarded by "except TypeError". except TypeError: # If the port wasn't provided it'll be None and int(None) raises a # TypeError From 57f562149167569f7bec738903f275611596b5f0 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Fri, 14 Jun 2024 22:10:56 -0400 Subject: [PATCH 10/29] Account for current flake8 errors with noqa. - Ignore need for docstrings in function overloads. - Ignore a line being too long due to a pyright: ignore + explanation. - Moving the explanation away from that line doesn't make the line short enough to pass. Removing the error code as well would be enough, but removing both has 2 downsides: a) The lack of explanation on the same line makes it harder to immediately see why the pyright: ignore was added when grepping. b) Having an unrestricted pyright: ignore (or type: ignore) can cover up other typechecking errors, which could cause a problem down the line. - Having a pyright: ignore, a noqa, and fmt: off/on for this one line isn't clean, but I'd like a second opinion on how to handle it. --- src/rfc3986/compat.py | 16 ++++++++++------ src/rfc3986/normalizers.py | 4 ++-- src/rfc3986/validators.py | 6 +++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/rfc3986/compat.py b/src/rfc3986/compat.py index 5235ccf..b66e1db 100644 --- a/src/rfc3986/compat.py +++ b/src/rfc3986/compat.py @@ -21,12 +21,14 @@ @t.overload -def to_str(b: t.Union[str, bytes], encoding: str = "utf-8") -> str: - ... +def to_str( # noqa: D103 + b: t.Union[str, bytes], + encoding: str = "utf-8", +) -> str: ... @t.overload -def to_str(b: None, encoding: str = "utf-8") -> None: +def to_str(b: None, encoding: str = "utf-8") -> None: # noqa: D103 ... @@ -41,12 +43,14 @@ def to_str( @t.overload -def to_bytes(s: t.Union[str, bytes], encoding: str = "utf-8") -> bytes: - ... +def to_bytes( # noqa: D103 + s: t.Union[str, bytes], + encoding: str = "utf-8", +) -> bytes: ... @t.overload -def to_bytes(s: None, encoding: str = "utf-8") -> None: +def to_bytes(s: None, encoding: str = "utf-8") -> None: # noqa: D103 ... diff --git a/src/rfc3986/normalizers.py b/src/rfc3986/normalizers.py index 4e3c91e..902c23c 100644 --- a/src/rfc3986/normalizers.py +++ b/src/rfc3986/normalizers.py @@ -146,12 +146,12 @@ def remove_dot_segments(s: str) -> str: @t.overload -def encode_component(uri_component: None, encoding: str) -> None: +def encode_component(uri_component: None, encoding: str) -> None: # noqa: D103 ... @t.overload -def encode_component(uri_component: str, encoding: str) -> str: +def encode_component(uri_component: str, encoding: str) -> str: # noqa: D103 ... diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index 499b9a7..69c3a04 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -435,9 +435,9 @@ def subauthority_component_is_valid( return True try: - port = int( - subauthority_dict["port"] - ) # pyright: ignore[reportArgumentType] # Guarded by "except TypeError". + # fmt: off + port = int(subauthority_dict["port"]) # pyright: ignore[reportArgumentType] # noqa: E501 # Guarded by "except TypeError". + # fmt: on except TypeError: # If the port wasn't provided it'll be None and int(None) raises a # TypeError From f305405472253608257aff89f765b40eaabf904e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 15 Jun 2024 02:12:08 +0000 Subject: [PATCH 11/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/rfc3986/compat.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rfc3986/compat.py b/src/rfc3986/compat.py index b66e1db..5d5146c 100644 --- a/src/rfc3986/compat.py +++ b/src/rfc3986/compat.py @@ -24,7 +24,8 @@ def to_str( # noqa: D103 b: t.Union[str, bytes], encoding: str = "utf-8", -) -> str: ... +) -> str: + ... @t.overload @@ -46,7 +47,8 @@ def to_str( def to_bytes( # noqa: D103 s: t.Union[str, bytes], encoding: str = "utf-8", -) -> bytes: ... +) -> bytes: + ... @t.overload From c7ab1db0ee00a64a75bbecbfea9b754593a4f14c Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Fri, 14 Jun 2024 23:02:49 -0400 Subject: [PATCH 12/29] Annotate `misc.get_path()` while breaking an import cycle. - `uri` can't be eagerly imported within `misc` without causing a circular import, but is necessary for getting the most currently correct parameter type. --- src/rfc3986/misc.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py index 7cbbbec..5e0d925 100644 --- a/src/rfc3986/misc.py +++ b/src/rfc3986/misc.py @@ -18,9 +18,14 @@ expressions for parsing and validating URIs and their components. """ import re +import typing as t from . import abnf_regexp +if t.TYPE_CHECKING: + # Break an import loop. + from . import uri + # These are enumerated for the named tuple used as a superclass of # URIReference URI_COMPONENTS = ["scheme", "authority", "path", "query", "fragment"] @@ -118,7 +123,7 @@ # Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3 -def merge_paths(base_uri, relative_path): +def merge_paths(base_uri: "uri.URIReference", relative_path: str) -> str: """Merge a base URI's path with a relative URI's path.""" if base_uri.path is None and base_uri.authority is not None: return "/" + relative_path From c3b49f28d96263ae6398f6c1705131db5a816210 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Sat, 15 Jun 2024 08:32:04 -0400 Subject: [PATCH 13/29] Add a comment explaining the reason `_QueryType` in builder.py is modified from typeshed. --- src/rfc3986/builder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index a5aa107..9fc2ef8 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -20,10 +20,12 @@ from . import uri from . import uri_reference -# Copied from urllib.parse in typeshed. +# Modified from urllib.parse in typeshed. _QueryType = t.Union[ t.Mapping[t.Any, t.Any], t.Mapping[t.Any, t.Sequence[t.Any]], + # Substituting List for Sequence since one of the add/extend methods + # below has a runtime isinstance check for list. t.List[t.Tuple[t.Any, t.Any]], t.List[t.Tuple[t.Any, t.Sequence[t.Any]]], ] From dd96a347b673be92ba9cf1ca9c6b9ac5984182ee Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Sat, 15 Jun 2024 11:20:44 -0400 Subject: [PATCH 14/29] Fix compat.py and api.py parameter annotations to accept bytearray as well. - This provides parity with the way urllib.parse is typed and the runtime implementation, since bytearray isn't a str and also has a `decode(encoding)` method. - Adjusted compat functions to also accept bytearray for the same reasons. - Adjusted the parameter types of the other functions called in api.py that are part of the `(U|I)RIReference` interfaces to make sure api.py fully type-checks. - TODO: Consider making a typealias for `t.Union[str, bytes, bytearray]` and using that everywhere to avoid verbosity? - TODO: For the **kwargs functions in api.py and `URLMixin.is_valid()`, consider enumerating the possible keyword-only parameters? --- src/rfc3986/_mixin.py | 2 +- src/rfc3986/api.py | 28 +++++++++++++++++++++++----- src/rfc3986/compat.py | 8 ++++---- src/rfc3986/iri.py | 7 ++++++- src/rfc3986/parseresult.py | 7 ++++++- src/rfc3986/uri.py | 7 ++++++- 6 files changed, 46 insertions(+), 13 deletions(-) diff --git a/src/rfc3986/_mixin.py b/src/rfc3986/_mixin.py index d7d3589..a6278c5 100644 --- a/src/rfc3986/_mixin.py +++ b/src/rfc3986/_mixin.py @@ -103,7 +103,7 @@ def is_absolute(self): """ return bool(misc.ABSOLUTE_URI_MATCHER.match(self.unsplit())) - def is_valid(self, **kwargs): + def is_valid(self, **kwargs: bool) -> bool: """Determine if the URI is valid. .. deprecated:: 1.1.0 diff --git a/src/rfc3986/api.py b/src/rfc3986/api.py index 0c4ca18..bd7bde7 100644 --- a/src/rfc3986/api.py +++ b/src/rfc3986/api.py @@ -17,12 +17,17 @@ This module defines functions and provides access to the public attributes and classes of rfc3986. """ +import typing as t + from .iri import IRIReference from .parseresult import ParseResult from .uri import URIReference -def uri_reference(uri: str, encoding: str = "utf-8") -> URIReference: +def uri_reference( + uri: t.Union[str, bytes, bytearray], + encoding: str = "utf-8", +) -> URIReference: """Parse a URI string into a URIReference. This is a convenience function. You could achieve the same end by using @@ -36,7 +41,10 @@ def uri_reference(uri: str, encoding: str = "utf-8") -> URIReference: return URIReference.from_string(uri, encoding) -def iri_reference(iri: str, encoding: str = "utf-8") -> IRIReference: +def iri_reference( + iri: t.Union[str, bytes, bytearray], + encoding: str = "utf-8", +) -> IRIReference: """Parse a IRI string into an IRIReference. This is a convenience function. You could achieve the same end by using @@ -50,7 +58,11 @@ def iri_reference(iri: str, encoding: str = "utf-8") -> IRIReference: return IRIReference.from_string(iri, encoding) -def is_valid_uri(uri: str, encoding: str = "utf-8", **kwargs: bool) -> bool: +def is_valid_uri( + uri: t.Union[str, bytes, bytearray], + encoding: str = "utf-8", + **kwargs: bool, +) -> bool: """Determine if the URI given is valid. This is a convenience function. You could use either @@ -75,7 +87,10 @@ def is_valid_uri(uri: str, encoding: str = "utf-8", **kwargs: bool) -> bool: return URIReference.from_string(uri, encoding).is_valid(**kwargs) -def normalize_uri(uri: str, encoding: str = "utf-8") -> str: +def normalize_uri( + uri: t.Union[str, bytes, bytearray], + encoding: str = "utf-8", +) -> str: """Normalize the given URI. This is a convenience function. You could use either @@ -91,7 +106,10 @@ def normalize_uri(uri: str, encoding: str = "utf-8") -> str: return normalized_reference.unsplit() -def urlparse(uri: str, encoding: str = "utf-8") -> ParseResult: +def urlparse( + uri: t.Union[str, bytes, bytearray], + encoding: str = "utf-8", +) -> ParseResult: """Parse a given URI and return a ParseResult. This is a partial replacement of the standard library's urlparse function. diff --git a/src/rfc3986/compat.py b/src/rfc3986/compat.py index 5d5146c..d509d64 100644 --- a/src/rfc3986/compat.py +++ b/src/rfc3986/compat.py @@ -22,7 +22,7 @@ @t.overload def to_str( # noqa: D103 - b: t.Union[str, bytes], + b: t.Union[str, bytes, bytearray], encoding: str = "utf-8", ) -> str: ... @@ -34,7 +34,7 @@ def to_str(b: None, encoding: str = "utf-8") -> None: # noqa: D103 def to_str( - b: t.Optional[t.Union[str, bytes]], + b: t.Optional[t.Union[str, bytes, bytearray]], encoding: str = "utf-8", ) -> t.Optional[str]: """Ensure that b is text in the specified encoding.""" @@ -45,7 +45,7 @@ def to_str( @t.overload def to_bytes( # noqa: D103 - s: t.Union[str, bytes], + s: t.Union[str, bytes, bytearray], encoding: str = "utf-8", ) -> bytes: ... @@ -57,7 +57,7 @@ def to_bytes(s: None, encoding: str = "utf-8") -> None: # noqa: D103 def to_bytes( - s: t.Optional[t.Union[str, bytes]], + s: t.Optional[t.Union[str, bytes, bytearray]], encoding: str = "utf-8", ) -> t.Optional[bytes]: """Ensure that s is converted to bytes from the encoding.""" diff --git a/src/rfc3986/iri.py b/src/rfc3986/iri.py index 363d6e6..01f51ef 100644 --- a/src/rfc3986/iri.py +++ b/src/rfc3986/iri.py @@ -13,6 +13,7 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +import typing as t from collections import namedtuple from . import compat @@ -80,7 +81,11 @@ def _match_subauthority(self): return misc.ISUBAUTHORITY_MATCHER.match(self.authority) @classmethod - def from_string(cls, iri_string, encoding="utf-8"): + def from_string( + cls, + iri_string: t.Union[str, bytes, bytearray], + encoding: str = "utf-8", + ): """Parse a IRI reference from the given unicode IRI string. :param str iri_string: Unicode IRI to be parsed into a reference. diff --git a/src/rfc3986/parseresult.py b/src/rfc3986/parseresult.py index b69432d..b672548 100644 --- a/src/rfc3986/parseresult.py +++ b/src/rfc3986/parseresult.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Module containing the urlparse compatibility logic.""" +import typing as t from collections import namedtuple from . import compat @@ -155,7 +156,11 @@ def from_parts( @classmethod def from_string( - cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True + cls, + uri_string: t.Union[str, bytes, bytearray], + encoding: str = "utf-8", + strict: bool = True, + lazy_normalize: bool = True, ): """Parse a URI from the given unicode URI string. diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index 9fff75e..f5448d3 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -13,6 +13,7 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +import typing as t from collections import namedtuple from . import compat @@ -140,7 +141,11 @@ def normalize(self): ) @classmethod - def from_string(cls, uri_string, encoding="utf-8"): + def from_string( + cls, + uri_string: t.Union[str, bytes, bytearray], + encoding: str = "utf-8", + ): """Parse a URI reference from the given unicode URI string. :param str uri_string: Unicode URI to be parsed into a reference. From d99f274320c006324ea0ac16955947b96d36f0f6 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Sat, 15 Jun 2024 12:44:26 -0400 Subject: [PATCH 15/29] Started annotating IRIReference and URIReference. - Substituted namedtuple inheritance with common base `typing.NamedTuple` subclass in misc.py, since these classes share almost the exact same interface. - Added a _typing_compat.py module to be able to import typing.Self, or a placeholder for it, in multiple other modules without bloating their code. - Added basic method annotations to the two reference classes. - Not annotations-related: - Move the __hash__ implementation over to IRIReference from URIMixin to be congruent with URIReference. - Made the __eq__ implementations more similar to avoid different behavior in cases of inheritance (rare as that might be). - Added overloads to `normalizers.normalize_query` and `normalizers.normalize_fragment` to clearly indicate that None will get passed through. This behavior is relied upon by the library currently. - Note: The runtime-related changes can be reverted and reattempted later if need be. Still passing all the tests currently. --- src/rfc3986/_mixin.py | 2 -- src/rfc3986/_typing_compat.py | 19 +++++++++++++++++ src/rfc3986/builder.py | 6 +++--- src/rfc3986/iri.py | 39 +++++++++++++++++++++++------------ src/rfc3986/misc.py | 13 +++++++++--- src/rfc3986/normalizers.py | 24 +++++++++++++++++++-- src/rfc3986/uri.py | 28 +++++++++++++++---------- 7 files changed, 97 insertions(+), 34 deletions(-) create mode 100644 src/rfc3986/_typing_compat.py diff --git a/src/rfc3986/_mixin.py b/src/rfc3986/_mixin.py index a6278c5..f55d13f 100644 --- a/src/rfc3986/_mixin.py +++ b/src/rfc3986/_mixin.py @@ -10,8 +10,6 @@ class URIMixin: """Mixin with all shared methods for URIs and IRIs.""" - __hash__ = tuple.__hash__ - def authority_info(self): """Return a dictionary with the ``userinfo``, ``host``, and ``port``. diff --git a/src/rfc3986/_typing_compat.py b/src/rfc3986/_typing_compat.py new file mode 100644 index 0000000..4822ecc --- /dev/null +++ b/src/rfc3986/_typing_compat.py @@ -0,0 +1,19 @@ +import sys +import typing as t + +__all__ = ("Self",) + +if sys.version_info >= (3, 11): + from typing import Self +elif t.TYPE_CHECKING: + from typing_extensions import Self +else: + + class _PlaceholderMeta(type): + # This is meant to make it easier to debug the presence of placeholder + # classes. + def __repr__(self): + return f"placeholder for typing.{self.__name__}" + + class Self(metaclass=_PlaceholderMeta): + """Placeholder for "typing.Self".""" diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index 9fc2ef8..42d4763 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -47,7 +47,7 @@ def __init__( scheme: t.Optional[str] = None, userinfo: t.Optional[str] = None, host: t.Optional[str] = None, - port: t.Optional[str] = None, + port: t.Optional[t.Union[int, str]] = None, path: t.Optional[str] = None, query: t.Optional[str] = None, fragment: t.Optional[str] = None, @@ -60,7 +60,7 @@ def __init__( (optional) :param str host: (optional) - :param int port: + :param int | str port: (optional) :param str path: (optional) @@ -72,7 +72,7 @@ def __init__( self.scheme = scheme self.userinfo = userinfo self.host = host - self.port = port + self.port = str(port) if port is not None else port self.path = path self.query = query self.fragment = fragment diff --git a/src/rfc3986/iri.py b/src/rfc3986/iri.py index 01f51ef..6578bc0 100644 --- a/src/rfc3986/iri.py +++ b/src/rfc3986/iri.py @@ -14,13 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. import typing as t -from collections import namedtuple from . import compat from . import exceptions from . import misc from . import normalizers from . import uri +from ._typing_compat import Self try: @@ -29,9 +29,7 @@ idna = None -class IRIReference( - namedtuple("IRIReference", misc.URI_COMPONENTS), uri.URIMixin -): +class IRIReference(misc.URIReferenceBase, uri.URIMixin): """Immutable object representing a parsed IRI Reference. Can be encoded into an URIReference object via the procedure @@ -42,10 +40,16 @@ class IRIReference( the future. Check for changes to the interface when upgrading. """ - slots = () + encoding: str def __new__( - cls, scheme, authority, path, query, fragment, encoding="utf-8" + cls, + scheme: t.Optional[str], + authority: t.Optional[str], + path: t.Optional[str], + query: t.Optional[str], + fragment: t.Optional[str], + encoding: str = "utf-8", ): """Create a new IRIReference.""" ref = super().__new__( @@ -59,14 +63,16 @@ def __new__( ref.encoding = encoding return ref - def __eq__(self, other): + __hash__ = tuple.__hash__ + + def __eq__(self, other: object): """Compare this reference to another.""" other_ref = other if isinstance(other, tuple): - other_ref = self.__class__(*other) + other_ref = type(self)(*other) elif not isinstance(other, IRIReference): try: - other_ref = self.__class__.from_string(other) + other_ref = self.from_string(other) except TypeError: raise TypeError( "Unable to compare {}() to {}()".format( @@ -77,7 +83,7 @@ def __eq__(self, other): # See http://tools.ietf.org/html/rfc3986#section-6.2 return tuple(self) == tuple(other_ref) - def _match_subauthority(self): + def _match_subauthority(self) -> t.Optional[t.Match[str]]: return misc.ISUBAUTHORITY_MATCHER.match(self.authority) @classmethod @@ -85,7 +91,7 @@ def from_string( cls, iri_string: t.Union[str, bytes, bytearray], encoding: str = "utf-8", - ): + ) -> Self: """Parse a IRI reference from the given unicode IRI string. :param str iri_string: Unicode IRI to be parsed into a reference. @@ -104,7 +110,12 @@ def from_string( encoding, ) - def encode(self, idna_encoder=None): # noqa: C901 + def encode( # noqa: C901 + self, + idna_encoder: t.Optional[ # pyright: ignore[reportRedeclaration] + t.Callable[[str], t.Union[str, bytes]] + ] = None, + ) -> "uri.URIReference": """Encode an IRIReference into a URIReference instance. If the ``idna`` module is installed or the ``rfc3986[idna]`` @@ -127,7 +138,9 @@ def encode(self, idna_encoder=None): # noqa: C901 "and the IRI hostname requires encoding" ) - def idna_encoder(name): + def idna_encoder(name: str) -> t.Union[str, bytes]: + assert idna # Known to not be None at this point. + if any(ord(c) > 128 for c in name): try: return idna.encode( diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py index 5e0d925..7489d8f 100644 --- a/src/rfc3986/misc.py +++ b/src/rfc3986/misc.py @@ -26,9 +26,16 @@ # Break an import loop. from . import uri -# These are enumerated for the named tuple used as a superclass of -# URIReference -URI_COMPONENTS = ["scheme", "authority", "path", "query", "fragment"] + +class URIReferenceBase(t.NamedTuple): + """The namedtuple used as a superclass of URIReference and IRIReference.""" + + scheme: t.Optional[str] + authority: t.Optional[str] + path: t.Optional[str] + query: t.Optional[str] + fragment: t.Optional[str] + important_characters = { "generic_delimiters": abnf_regexp.GENERIC_DELIMITERS, diff --git a/src/rfc3986/normalizers.py b/src/rfc3986/normalizers.py index 902c23c..532bfaf 100644 --- a/src/rfc3986/normalizers.py +++ b/src/rfc3986/normalizers.py @@ -82,14 +82,34 @@ def normalize_path(path: str) -> str: return remove_dot_segments(path) -def normalize_query(query: str) -> str: +@t.overload +def normalize_query(query: str) -> str: # noqa: D103 + ... + + +@t.overload +def normalize_query(query: None) -> None: # noqa: D103 + ... + + +def normalize_query(query: t.Optional[str]) -> t.Optional[str]: """Normalize the query string.""" if not query: return query return normalize_percent_characters(query) -def normalize_fragment(fragment: str) -> str: +@t.overload +def normalize_fragment(fragment: str) -> str: # noqa: D103 + ... + + +@t.overload +def normalize_fragment(fragment: None) -> None: # noqa: D103 + ... + + +def normalize_fragment(fragment: t.Optional[str]) -> t.Optional[str]: """Normalize the fragment string.""" if not fragment: return fragment diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index f5448d3..6747447 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -14,15 +14,15 @@ # See the License for the specific language governing permissions and # limitations under the License. import typing as t -from collections import namedtuple from . import compat from . import misc from . import normalizers from ._mixin import URIMixin +from ._typing_compat import Self -class URIReference(namedtuple("URIReference", misc.URI_COMPONENTS), URIMixin): +class URIReference(misc.URIReferenceBase, URIMixin): """Immutable object representing a parsed URI Reference. .. note:: @@ -80,10 +80,16 @@ class URIReference(namedtuple("URIReference", misc.URI_COMPONENTS), URIMixin): The port parsed from the authority. """ - slots = () + encoding: str def __new__( - cls, scheme, authority, path, query, fragment, encoding="utf-8" + cls, + scheme: t.Optional[str], + authority: t.Optional[str], + path: t.Optional[str], + query: t.Optional[str], + fragment: t.Optional[str], + encoding: str = "utf-8", ): """Create a new URIReference.""" ref = super().__new__( @@ -99,18 +105,18 @@ def __new__( __hash__ = tuple.__hash__ - def __eq__(self, other): + def __eq__(self, other: object): """Compare this reference to another.""" other_ref = other if isinstance(other, tuple): - other_ref = URIReference(*other) + other_ref = type(self)(*other) elif not isinstance(other, URIReference): try: - other_ref = URIReference.from_string(other) + other_ref = self.from_string(other) except TypeError: raise TypeError( - "Unable to compare URIReference() to {}()".format( - type(other).__name__ + "Unable to compare {}() to {}()".format( + type(self).__name__, type(other).__name__ ) ) @@ -118,7 +124,7 @@ def __eq__(self, other): naive_equality = tuple(self) == tuple(other_ref) return naive_equality or self.normalized_equality(other_ref) - def normalize(self): + def normalize(self) -> "URIReference": """Normalize this reference as described in Section 6.2.2. This is not an in-place normalization. Instead this creates a new @@ -145,7 +151,7 @@ def from_string( cls, uri_string: t.Union[str, bytes, bytearray], encoding: str = "utf-8", - ): + ) -> Self: """Parse a URI reference from the given unicode URI string. :param str uri_string: Unicode URI to be parsed into a reference. From f4d150ff1c9cecad58de47a51f2bcf80c771f022 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Sat, 15 Jun 2024 14:51:57 -0400 Subject: [PATCH 16/29] Fix: Removing the bytearray hints. Sticking to bytes and str is simpler and close enough to urlllib.parse's interfaces. - bytearray would end up pervading everything, I think, to the point where it's not worth it. I was hasty in adding those initially. --- src/rfc3986/api.py | 16 +++++----------- src/rfc3986/compat.py | 8 ++++---- src/rfc3986/iri.py | 2 +- src/rfc3986/parseresult.py | 2 +- src/rfc3986/uri.py | 2 +- 5 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/rfc3986/api.py b/src/rfc3986/api.py index bd7bde7..64bd957 100644 --- a/src/rfc3986/api.py +++ b/src/rfc3986/api.py @@ -25,7 +25,7 @@ def uri_reference( - uri: t.Union[str, bytes, bytearray], + uri: t.Union[str, bytes], encoding: str = "utf-8", ) -> URIReference: """Parse a URI string into a URIReference. @@ -42,7 +42,7 @@ def uri_reference( def iri_reference( - iri: t.Union[str, bytes, bytearray], + iri: t.Union[str, bytes], encoding: str = "utf-8", ) -> IRIReference: """Parse a IRI string into an IRIReference. @@ -59,7 +59,7 @@ def iri_reference( def is_valid_uri( - uri: t.Union[str, bytes, bytearray], + uri: t.Union[str, bytes], encoding: str = "utf-8", **kwargs: bool, ) -> bool: @@ -87,10 +87,7 @@ def is_valid_uri( return URIReference.from_string(uri, encoding).is_valid(**kwargs) -def normalize_uri( - uri: t.Union[str, bytes, bytearray], - encoding: str = "utf-8", -) -> str: +def normalize_uri(uri: t.Union[str, bytes], encoding: str = "utf-8") -> str: """Normalize the given URI. This is a convenience function. You could use either @@ -106,10 +103,7 @@ def normalize_uri( return normalized_reference.unsplit() -def urlparse( - uri: t.Union[str, bytes, bytearray], - encoding: str = "utf-8", -) -> ParseResult: +def urlparse(uri: t.Union[str, bytes], encoding: str = "utf-8") -> ParseResult: """Parse a given URI and return a ParseResult. This is a partial replacement of the standard library's urlparse function. diff --git a/src/rfc3986/compat.py b/src/rfc3986/compat.py index d509d64..5d5146c 100644 --- a/src/rfc3986/compat.py +++ b/src/rfc3986/compat.py @@ -22,7 +22,7 @@ @t.overload def to_str( # noqa: D103 - b: t.Union[str, bytes, bytearray], + b: t.Union[str, bytes], encoding: str = "utf-8", ) -> str: ... @@ -34,7 +34,7 @@ def to_str(b: None, encoding: str = "utf-8") -> None: # noqa: D103 def to_str( - b: t.Optional[t.Union[str, bytes, bytearray]], + b: t.Optional[t.Union[str, bytes]], encoding: str = "utf-8", ) -> t.Optional[str]: """Ensure that b is text in the specified encoding.""" @@ -45,7 +45,7 @@ def to_str( @t.overload def to_bytes( # noqa: D103 - s: t.Union[str, bytes, bytearray], + s: t.Union[str, bytes], encoding: str = "utf-8", ) -> bytes: ... @@ -57,7 +57,7 @@ def to_bytes(s: None, encoding: str = "utf-8") -> None: # noqa: D103 def to_bytes( - s: t.Optional[t.Union[str, bytes, bytearray]], + s: t.Optional[t.Union[str, bytes]], encoding: str = "utf-8", ) -> t.Optional[bytes]: """Ensure that s is converted to bytes from the encoding.""" diff --git a/src/rfc3986/iri.py b/src/rfc3986/iri.py index 6578bc0..dab94d3 100644 --- a/src/rfc3986/iri.py +++ b/src/rfc3986/iri.py @@ -89,7 +89,7 @@ def _match_subauthority(self) -> t.Optional[t.Match[str]]: @classmethod def from_string( cls, - iri_string: t.Union[str, bytes, bytearray], + iri_string: t.Union[str, bytes], encoding: str = "utf-8", ) -> Self: """Parse a IRI reference from the given unicode IRI string. diff --git a/src/rfc3986/parseresult.py b/src/rfc3986/parseresult.py index b672548..139d1cb 100644 --- a/src/rfc3986/parseresult.py +++ b/src/rfc3986/parseresult.py @@ -157,7 +157,7 @@ def from_parts( @classmethod def from_string( cls, - uri_string: t.Union[str, bytes, bytearray], + uri_string: t.Union[str, bytes], encoding: str = "utf-8", strict: bool = True, lazy_normalize: bool = True, diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index 6747447..8f7b592 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -149,7 +149,7 @@ def normalize(self) -> "URIReference": @classmethod def from_string( cls, - uri_string: t.Union[str, bytes, bytearray], + uri_string: t.Union[str, bytes], encoding: str = "utf-8", ) -> Self: """Parse a URI reference from the given unicode URI string. From 0179352da97a42d9cfb1dadd18113a164817cf9c Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Sat, 15 Jun 2024 20:44:00 -0400 Subject: [PATCH 17/29] Partially annotated parseresult.py and _mixin.py. - After review, we don't actually need a generic namedtuple to make this work in a type stub. Inline annotations seemingly work fine so far and don't significantly change the runtime. This might be option 4, which can hold until 3.11 is the lowest supported version. - However, in the meantime, `ParseResultMixin` can be made generic in a helpful way with `typing.AnyStr`. - `port` is a strange case and might be annotated too narrowly or incorrectly; based on some of the ways that it's populated, it might sometimes be an int. - Prefixed `_typing_compat.Self` imports with underscore to avoid poluting namespace with public variables. --- src/rfc3986/_mixin.py | 52 ++++++----- src/rfc3986/iri.py | 4 +- src/rfc3986/misc.py | 2 +- src/rfc3986/parseresult.py | 183 ++++++++++++++++++++++--------------- src/rfc3986/uri.py | 4 +- 5 files changed, 143 insertions(+), 102 deletions(-) diff --git a/src/rfc3986/_mixin.py b/src/rfc3986/_mixin.py index f55d13f..2885726 100644 --- a/src/rfc3986/_mixin.py +++ b/src/rfc3986/_mixin.py @@ -1,16 +1,24 @@ """Module containing the implementation of the URIMixin class.""" +import typing as t import warnings from . import exceptions as exc from . import misc from . import normalizers from . import validators +from ._typing_compat import Self as _Self + + +class _AuthorityInfo(t.TypedDict): + userinfo: t.Optional[str] + host: t.Optional[str] + port: t.Optional[str] class URIMixin: """Mixin with all shared methods for URIs and IRIs.""" - def authority_info(self): + def authority_info(self) -> _AuthorityInfo: """Return a dictionary with the ``userinfo``, ``host``, and ``port``. If the authority is not valid, it will raise a @@ -51,11 +59,11 @@ def authority_info(self): return matches - def _match_subauthority(self): + def _match_subauthority(self) -> t.Optional[t.Match[str]]: return misc.SUBAUTHORITY_MATCHER.match(self.authority) @property - def _validator(self): + def _validator(self) -> validators.Validator: v = getattr(self, "_cached_validator", None) if v is not None: return v @@ -65,7 +73,7 @@ def _validator(self): return self._cached_validator @property - def host(self): + def host(self) -> t.Optional[str]: """If present, a string representing the host.""" try: authority = self.authority_info() @@ -74,7 +82,7 @@ def host(self): return authority["host"] @property - def port(self): + def port(self) -> t.Optional[str]: """If present, the port extracted from the authority.""" try: authority = self.authority_info() @@ -83,7 +91,7 @@ def port(self): return authority["port"] @property - def userinfo(self): + def userinfo(self) -> t.Optional[str]: """If present, the userinfo extracted from the authority.""" try: authority = self.authority_info() @@ -91,7 +99,7 @@ def userinfo(self): return None return authority["userinfo"] - def is_absolute(self): + def is_absolute(self) -> bool: """Determine if this URI Reference is an absolute URI. See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation. @@ -135,7 +143,7 @@ def is_valid(self, **kwargs: bool) -> bool: ] return all(v(r) for v, r in validators) - def authority_is_valid(self, require=False): + def authority_is_valid(self, require: bool = False) -> bool: """Determine if the authority component is valid. .. deprecated:: 1.1.0 @@ -165,7 +173,7 @@ def authority_is_valid(self, require=False): require=require, ) - def scheme_is_valid(self, require=False): + def scheme_is_valid(self, require: bool = False) -> bool: """Determine if the scheme component is valid. .. deprecated:: 1.1.0 @@ -184,7 +192,7 @@ def scheme_is_valid(self, require=False): ) return validators.scheme_is_valid(self.scheme, require) - def path_is_valid(self, require=False): + def path_is_valid(self, require: bool = False) -> bool: """Determine if the path component is valid. .. deprecated:: 1.1.0 @@ -203,7 +211,7 @@ def path_is_valid(self, require=False): ) return validators.path_is_valid(self.path, require) - def query_is_valid(self, require=False): + def query_is_valid(self, require: bool = False) -> bool: """Determine if the query component is valid. .. deprecated:: 1.1.0 @@ -222,7 +230,7 @@ def query_is_valid(self, require=False): ) return validators.query_is_valid(self.query, require) - def fragment_is_valid(self, require=False): + def fragment_is_valid(self, require: bool = False) -> bool: """Determine if the fragment component is valid. .. deprecated:: 1.1.0 @@ -241,7 +249,7 @@ def fragment_is_valid(self, require=False): ) return validators.fragment_is_valid(self.fragment, require) - def normalized_equality(self, other_ref): + def normalized_equality(self, other_ref) -> bool: """Compare this URIReference to another URIReference. :param URIReference other_ref: (required), The reference with which @@ -251,7 +259,7 @@ def normalized_equality(self, other_ref): """ return tuple(self.normalize()) == tuple(other_ref.normalize()) - def resolve_with(self, base_uri, strict=False): + def resolve_with(self, base_uri, strict: bool = False) -> _Self: """Use an absolute URI Reference to resolve this relative reference. Assuming this is a relative reference that you would like to resolve, @@ -323,14 +331,14 @@ def resolve_with(self, base_uri, strict=False): ) return target - def unsplit(self): + def unsplit(self) -> str: """Create a URI string from the components. :returns: The URI Reference reconstituted as a string. :rtype: str """ # See http://tools.ietf.org/html/rfc3986#section-5.3 - result_list = [] + result_list: list[str] = [] if self.scheme: result_list.extend([self.scheme, ":"]) if self.authority: @@ -345,12 +353,12 @@ def unsplit(self): def copy_with( self, - scheme=misc.UseExisting, - authority=misc.UseExisting, - path=misc.UseExisting, - query=misc.UseExisting, - fragment=misc.UseExisting, - ): + scheme: t.Optional[str] = misc.UseExisting, + authority: t.Optional[str] = misc.UseExisting, + path: t.Optional[str] = misc.UseExisting, + query: t.Optional[str] = misc.UseExisting, + fragment: t.Optional[str] = misc.UseExisting, + ) -> _Self: """Create a copy of this reference with the new components. :param str scheme: diff --git a/src/rfc3986/iri.py b/src/rfc3986/iri.py index dab94d3..a4da194 100644 --- a/src/rfc3986/iri.py +++ b/src/rfc3986/iri.py @@ -20,7 +20,7 @@ from . import misc from . import normalizers from . import uri -from ._typing_compat import Self +from ._typing_compat import Self as _Self try: @@ -91,7 +91,7 @@ def from_string( cls, iri_string: t.Union[str, bytes], encoding: str = "utf-8", - ) -> Self: + ) -> _Self: """Parse a IRI reference from the given unicode IRI string. :param str iri_string: Unicode IRI to be parsed into a reference. diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py index 7489d8f..0c40502 100644 --- a/src/rfc3986/misc.py +++ b/src/rfc3986/misc.py @@ -140,4 +140,4 @@ def merge_paths(base_uri: "uri.URIReference", relative_path: str) -> str: return path[:index] + "/" + relative_path -UseExisting = object() +UseExisting: t.Any = object() diff --git a/src/rfc3986/parseresult.py b/src/rfc3986/parseresult.py index 139d1cb..ca5b922 100644 --- a/src/rfc3986/parseresult.py +++ b/src/rfc3986/parseresult.py @@ -20,6 +20,7 @@ from . import misc from . import normalizers from . import uri +from ._typing_compat import Self as _Self __all__ = ("ParseResult", "ParseResultBytes") @@ -34,8 +35,11 @@ ) -class ParseResultMixin: - def _generate_authority(self, attributes): +class ParseResultMixin(t.Generic[t.AnyStr]): + def _generate_authority( + self, + attributes: t.Dict[str, t.Optional[t.AnyStr]], + ) -> str: # I swear I did not align the comparisons below. That's just how they # happened to align based on pep8 and attribute lengths. userinfo, host, port = ( @@ -55,28 +59,28 @@ def _generate_authority(self, attributes): return self.authority.decode("utf-8") return self.authority - def geturl(self): + def geturl(self) -> t.AnyStr: """Shim to match the standard library method.""" return self.unsplit() @property - def hostname(self): + def hostname(self) -> t.AnyStr: """Shim to match the standard library.""" return self.host @property - def netloc(self): + def netloc(self) -> t.AnyStr: """Shim to match the standard library.""" return self.authority @property - def params(self): + def params(self) -> t.AnyStr: """Shim to match the standard library.""" return self.query class ParseResult( - namedtuple("ParseResult", PARSED_COMPONENTS), ParseResultMixin + namedtuple("ParseResult", PARSED_COMPONENTS), ParseResultMixin[str] ): """Implementation of urlparse compatibility class. @@ -84,19 +88,27 @@ class ParseResult( urlparse.ParseResult class. """ - slots = () + scheme: t.Optional[str] + userinfo: t.Optional[str] + host: t.Optional[str] + port: t.Optional[str] + path: t.Optional[str] + query: t.Optional[str] + fragment: t.Optional[str] + encoding: str + reference: "uri.URIReference" def __new__( cls, - scheme, - userinfo, - host, - port, - path, - query, - fragment, - uri_ref, - encoding="utf-8", + scheme: t.Optional[str], + userinfo: t.Optional[str], + host: t.Optional[str], + port: t.Optional[str], + path: t.Optional[str], + query: t.Optional[str], + fragment: t.Optional[str], + uri_ref: "uri.URIReference", + encoding: str = "utf-8", ): """Create a new ParseResult.""" parse_result = super().__new__( @@ -116,15 +128,15 @@ def __new__( @classmethod def from_parts( cls, - scheme=None, - userinfo=None, - host=None, - port=None, - path=None, - query=None, - fragment=None, - encoding="utf-8", - ): + scheme: t.Optional[str] = None, + userinfo: t.Optional[str] = None, + host: t.Optional[str] = None, + port: t.Optional[t.Union[int, str]] = None, + path: t.Optional[str] = None, + query: t.Optional[str] = None, + fragment: t.Optional[str] = None, + encoding: str = "utf-8", + ) -> _Self: """Create a ParseResult instance from its parts.""" authority = "" if userinfo is not None: @@ -161,7 +173,7 @@ def from_string( encoding: str = "utf-8", strict: bool = True, lazy_normalize: bool = True, - ): + ) -> _Self: """Parse a URI from the given unicode URI string. :param str uri_string: Unicode URI to be parsed into a reference. @@ -195,20 +207,20 @@ def authority(self): def copy_with( self, - scheme=misc.UseExisting, - userinfo=misc.UseExisting, - host=misc.UseExisting, - port=misc.UseExisting, - path=misc.UseExisting, - query=misc.UseExisting, - fragment=misc.UseExisting, + scheme: t.Optional[str] = misc.UseExisting, + userinfo: t.Optional[str] = misc.UseExisting, + host: t.Optional[str] = misc.UseExisting, + port: t.Optional[str] = misc.UseExisting, + path: t.Optional[str] = misc.UseExisting, + query: t.Optional[str] = misc.UseExisting, + fragment: t.Optional[str] = misc.UseExisting, ): """Create a copy of this instance replacing with specified parts.""" attributes = zip( PARSED_COMPONENTS, (scheme, userinfo, host, port, path, query, fragment), ) - attrs_dict = {} + attrs_dict: t.Dict[str, t.Optional[str]] = {} for name, value in attributes: if value is misc.UseExisting: value = getattr(self, name) @@ -223,7 +235,7 @@ def copy_with( ) return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict) - def encode(self, encoding=None): + def encode(self, encoding: t.Optional[str] = None) -> "ParseResultBytes": """Convert to an instance of ParseResultBytes.""" encoding = encoding or self.encoding attrs = dict( @@ -239,7 +251,7 @@ def encode(self, encoding=None): uri_ref=self.reference, encoding=encoding, **attrs ) - def unsplit(self, use_idna=False): + def unsplit(self, use_idna: bool = False) -> str: """Create a URI string from the components. :returns: The parsed URI reconstituted as a string. @@ -254,22 +266,33 @@ def unsplit(self, use_idna=False): class ParseResultBytes( - namedtuple("ParseResultBytes", PARSED_COMPONENTS), ParseResultMixin + namedtuple("ParseResultBytes", PARSED_COMPONENTS), ParseResultMixin[bytes] ): """Compatibility shim for the urlparse.ParseResultBytes object.""" + scheme: t.Optional[bytes] + userinfo: t.Optional[bytes] + host: t.Optional[bytes] + port: t.Optional[bytes] + path: t.Optional[bytes] + query: t.Optional[bytes] + fragment: t.Optional[bytes] + encoding: str + reference: "uri.URIReference" + lazy_normalize: bool + def __new__( cls, - scheme, - userinfo, - host, - port, - path, - query, - fragment, - uri_ref, - encoding="utf-8", - lazy_normalize=True, + scheme: t.Optional[bytes], + userinfo: t.Optional[bytes], + host: t.Optional[bytes], + port: t.Optional[bytes], + path: t.Optional[bytes], + query: t.Optional[bytes], + fragment: t.Optional[bytes], + uri_ref: "uri.URIReference", + encoding: str = "utf-8", + lazy_normalize: bool = True, ): """Create a new ParseResultBytes instance.""" parse_result = super().__new__( @@ -290,15 +313,15 @@ def __new__( @classmethod def from_parts( cls, - scheme=None, - userinfo=None, - host=None, - port=None, - path=None, - query=None, - fragment=None, - encoding="utf-8", - lazy_normalize=True, + scheme: t.Optional[str] = None, + userinfo: t.Optional[str] = None, + host: t.Optional[str] = None, + port: t.Optional[t.Union[str, int]] = None, + path: t.Optional[str] = None, + query: t.Optional[str] = None, + fragment: t.Optional[str] = None, + encoding: str = "utf-8", + lazy_normalize: bool = True, ): """Create a ParseResult instance from its parts.""" authority = "" @@ -335,8 +358,12 @@ def from_parts( @classmethod def from_string( - cls, uri_string, encoding="utf-8", strict=True, lazy_normalize=True - ): + cls, + uri_string: t.Union[str, bytes], + encoding: str = "utf-8", + strict: bool = True, + lazy_normalize: bool = True, + ) -> _Self: """Parse a URI from the given unicode URI string. :param str uri_string: Unicode URI to be parsed into a reference. @@ -366,21 +393,21 @@ def from_string( ) @property - def authority(self): + def authority(self) -> bytes: """Return the normalized authority.""" return self.reference.authority.encode(self.encoding) def copy_with( self, - scheme=misc.UseExisting, - userinfo=misc.UseExisting, - host=misc.UseExisting, - port=misc.UseExisting, - path=misc.UseExisting, - query=misc.UseExisting, - fragment=misc.UseExisting, - lazy_normalize=True, - ): + scheme: t.Optional[t.Union[str, bytes]] = misc.UseExisting, + userinfo: t.Optional[t.Union[str, bytes]] = misc.UseExisting, + host: t.Optional[t.Union[str, bytes]] = misc.UseExisting, + port: t.Optional[t.Union[str, bytes]] = misc.UseExisting, + path: t.Optional[t.Union[str, bytes]] = misc.UseExisting, + query: t.Optional[t.Union[str, bytes]] = misc.UseExisting, + fragment: t.Optional[t.Union[str, bytes]] = misc.UseExisting, + lazy_normalize: bool = True, + ) -> "ParseResultBytes": """Create a copy of this instance replacing with specified parts.""" attributes = zip( PARSED_COMPONENTS, @@ -393,6 +420,10 @@ def copy_with( if not isinstance(value, bytes) and hasattr(value, "encode"): value = value.encode(self.encoding) attrs_dict[name] = value + + if t.TYPE_CHECKING: + attrs_dict = t.cast(t.Dict[str, t.Optional[bytes]], attrs_dict) + authority = self._generate_authority(attrs_dict) to_str = compat.to_str ref = self.reference.copy_with( @@ -411,7 +442,7 @@ def copy_with( **attrs_dict, ) - def unsplit(self, use_idna=False): + def unsplit(self, use_idna: bool = False) -> bytes: """Create a URI bytes object from the components. :returns: The parsed URI reconstituted as a string. @@ -430,7 +461,9 @@ def unsplit(self, use_idna=False): return uri.encode(self.encoding) -def split_authority(authority): +def split_authority( + authority: str, +) -> t.Tuple[t.Optional[str], t.Optional[str], t.Optional[str]]: # Initialize our expected return values userinfo = host = port = None # Initialize an extra var we may need to use @@ -457,7 +490,7 @@ def split_authority(authority): return userinfo, host, port -def authority_from(reference, strict): +def authority_from(reference: "uri.URIReference", strict: bool): try: subauthority = reference.authority_info() except exceptions.InvalidAuthority: @@ -467,9 +500,9 @@ def authority_from(reference, strict): else: # Thanks to Richard Barrell for this idea: # https://twitter.com/0x2ba22e11/status/617338811975139328 - userinfo, host, port = ( - subauthority.get(p) for p in ("userinfo", "host", "port") - ) + userinfo = subauthority.get("userinfo") + host = subauthority.get("host") + port = subauthority.get("port") if port: if port.isascii() and port.isdigit(): diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index 8f7b592..99b860c 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -19,7 +19,7 @@ from . import misc from . import normalizers from ._mixin import URIMixin -from ._typing_compat import Self +from ._typing_compat import Self as _Self class URIReference(misc.URIReferenceBase, URIMixin): @@ -151,7 +151,7 @@ def from_string( cls, uri_string: t.Union[str, bytes], encoding: str = "utf-8", - ) -> Self: + ) -> _Self: """Parse a URI reference from the given unicode URI string. :param str uri_string: Unicode URI to be parsed into a reference. From 3969c1dfdd72b9fd53a7ad097087132f27ec4527 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Sun, 16 Jun 2024 10:32:12 -0400 Subject: [PATCH 18/29] Finish annotating return types in builder.py. --- src/rfc3986/builder.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index 42d4763..1c224f2 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -19,6 +19,7 @@ from . import normalizers from . import uri from . import uri_reference +from ._typing_compat import Self as _Self # Modified from urllib.parse in typeshed. _QueryType = t.Union[ @@ -87,7 +88,7 @@ def __repr__(self): return formatstr.format(b=self) @classmethod - def from_uri(cls, reference: t.Union["uri.URIReference", str]): + def from_uri(cls, reference: t.Union["uri.URIReference", str]) -> _Self: """Initialize the URI builder from another URI. Takes the given URI reference and creates a new URI builder instance @@ -106,7 +107,7 @@ def from_uri(cls, reference: t.Union["uri.URIReference", str]): fragment=reference.fragment, ) - def add_scheme(self, scheme: str): + def add_scheme(self, scheme: str) -> "URIBuilder": """Add a scheme to our builder object. After normalizing, this will generate a new URIBuilder instance with @@ -130,7 +131,11 @@ def add_scheme(self, scheme: str): fragment=self.fragment, ) - def add_credentials(self, username: str, password: t.Optional[str]): + def add_credentials( + self, + username: str, + password: t.Optional[str], + ) -> "URIBuilder": """Add credentials as the userinfo portion of the URI. .. code-block:: python @@ -163,7 +168,7 @@ def add_credentials(self, username: str, password: t.Optional[str]): fragment=self.fragment, ) - def add_host(self, host: str): + def add_host(self, host: str) -> "URIBuilder": """Add hostname to the URI. .. code-block:: python @@ -183,7 +188,7 @@ def add_host(self, host: str): fragment=self.fragment, ) - def add_port(self, port: t.Union[str, int]): + def add_port(self, port: t.Union[str, int]) -> "URIBuilder": """Add port to the URI. .. code-block:: python @@ -222,7 +227,7 @@ def add_port(self, port: t.Union[str, int]): fragment=self.fragment, ) - def add_path(self, path: str): + def add_path(self, path: str) -> "URIBuilder": """Add a path to the URI. .. code-block:: python @@ -249,7 +254,7 @@ def add_path(self, path: str): fragment=self.fragment, ) - def extend_path(self, path: str): + def extend_path(self, path: str) -> "URIBuilder": """Extend the existing path value with the provided value. .. versionadded:: 1.5.0 @@ -278,7 +283,7 @@ def extend_path(self, path: str): return self.add_path(path) - def add_query_from(self, query_items: _QueryType): + def add_query_from(self, query_items: _QueryType) -> "URIBuilder": """Generate and add a query a dictionary or list of tuples. .. code-block:: python @@ -304,7 +309,7 @@ def add_query_from(self, query_items: _QueryType): fragment=self.fragment, ) - def extend_query_with(self, query_items: _QueryType): + def extend_query_with(self, query_items: _QueryType) -> "URIBuilder": """Extend the existing query string with the new query items. .. versionadded:: 1.5.0 @@ -325,7 +330,7 @@ def extend_query_with(self, query_items: _QueryType): return self.add_query_from(original_query_items + query_items) - def add_query(self, query: str): + def add_query(self, query: str) -> "URIBuilder": """Add a pre-formated query string to the URI. .. code-block:: python @@ -345,7 +350,7 @@ def add_query(self, query: str): fragment=self.fragment, ) - def add_fragment(self, fragment: str): + def add_fragment(self, fragment: str) -> "URIBuilder": """Add a fragment to the URI. .. code-block:: python @@ -390,7 +395,7 @@ def finalize(self) -> "uri.URIReference": self.fragment, ) - def geturl(self): + def geturl(self) -> str: """Generate the URL from this builder. .. versionadded:: 1.5.0 From 9e812f3032c8680055cf1703ceecbb241d28d9fd Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Sun, 16 Jun 2024 12:43:24 -0400 Subject: [PATCH 19/29] Made minor adjustments to a few annotations. - Made int | str order consistent. - The ParseResultMixin shim properties are now marked with t.Optional, since what they wrap can be None. --- src/rfc3986/builder.py | 2 +- src/rfc3986/parseresult.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rfc3986/builder.py b/src/rfc3986/builder.py index 1c224f2..68789db 100644 --- a/src/rfc3986/builder.py +++ b/src/rfc3986/builder.py @@ -188,7 +188,7 @@ def add_host(self, host: str) -> "URIBuilder": fragment=self.fragment, ) - def add_port(self, port: t.Union[str, int]) -> "URIBuilder": + def add_port(self, port: t.Union[int, str]) -> "URIBuilder": """Add port to the URI. .. code-block:: python diff --git a/src/rfc3986/parseresult.py b/src/rfc3986/parseresult.py index ca5b922..b15cdf2 100644 --- a/src/rfc3986/parseresult.py +++ b/src/rfc3986/parseresult.py @@ -64,17 +64,17 @@ def geturl(self) -> t.AnyStr: return self.unsplit() @property - def hostname(self) -> t.AnyStr: + def hostname(self) -> t.Optional[t.AnyStr]: """Shim to match the standard library.""" return self.host @property - def netloc(self) -> t.AnyStr: + def netloc(self) -> t.Optional[t.AnyStr]: """Shim to match the standard library.""" return self.authority @property - def params(self) -> t.AnyStr: + def params(self) -> t.Optional[t.AnyStr]: """Shim to match the standard library.""" return self.query @@ -210,7 +210,7 @@ def copy_with( scheme: t.Optional[str] = misc.UseExisting, userinfo: t.Optional[str] = misc.UseExisting, host: t.Optional[str] = misc.UseExisting, - port: t.Optional[str] = misc.UseExisting, + port: t.Optional[t.Union[int, str]] = misc.UseExisting, path: t.Optional[str] = misc.UseExisting, query: t.Optional[str] = misc.UseExisting, fragment: t.Optional[str] = misc.UseExisting, @@ -316,7 +316,7 @@ def from_parts( scheme: t.Optional[str] = None, userinfo: t.Optional[str] = None, host: t.Optional[str] = None, - port: t.Optional[t.Union[str, int]] = None, + port: t.Optional[t.Union[int, str]] = None, path: t.Optional[str] = None, query: t.Optional[str] = None, fragment: t.Optional[str] = None, From 9862d650941fb0a93d7b84daa15288135da5c69a Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Mon, 17 Jun 2024 07:53:12 -0400 Subject: [PATCH 20/29] Fix port not being marked as `int` in several places. - I was confused on what the interface for port was initially, but a close look at how it's tested gave me a better idea. - Also, add a few explicit return annotations. --- src/rfc3986/_mixin.py | 2 ++ src/rfc3986/iri.py | 4 ++-- src/rfc3986/parseresult.py | 18 +++++++++--------- src/rfc3986/uri.py | 4 ++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/rfc3986/_mixin.py b/src/rfc3986/_mixin.py index 2885726..c4c52bd 100644 --- a/src/rfc3986/_mixin.py +++ b/src/rfc3986/_mixin.py @@ -10,6 +10,8 @@ class _AuthorityInfo(t.TypedDict): + """A typed dict for the authority info triple: userinfo, host, and port.""" + userinfo: t.Optional[str] host: t.Optional[str] port: t.Optional[str] diff --git a/src/rfc3986/iri.py b/src/rfc3986/iri.py index a4da194..e2a292e 100644 --- a/src/rfc3986/iri.py +++ b/src/rfc3986/iri.py @@ -50,7 +50,7 @@ def __new__( query: t.Optional[str], fragment: t.Optional[str], encoding: str = "utf-8", - ): + ) -> _Self: """Create a new IRIReference.""" ref = super().__new__( cls, @@ -65,7 +65,7 @@ def __new__( __hash__ = tuple.__hash__ - def __eq__(self, other: object): + def __eq__(self, other: object) -> bool: """Compare this reference to another.""" other_ref = other if isinstance(other, tuple): diff --git a/src/rfc3986/parseresult.py b/src/rfc3986/parseresult.py index b15cdf2..a65de5e 100644 --- a/src/rfc3986/parseresult.py +++ b/src/rfc3986/parseresult.py @@ -91,7 +91,7 @@ class ParseResult( scheme: t.Optional[str] userinfo: t.Optional[str] host: t.Optional[str] - port: t.Optional[str] + port: t.Optional[int] path: t.Optional[str] query: t.Optional[str] fragment: t.Optional[str] @@ -103,13 +103,13 @@ def __new__( scheme: t.Optional[str], userinfo: t.Optional[str], host: t.Optional[str], - port: t.Optional[str], + port: t.Optional[int], path: t.Optional[str], query: t.Optional[str], fragment: t.Optional[str], uri_ref: "uri.URIReference", encoding: str = "utf-8", - ): + ) -> _Self: """Create a new ParseResult.""" parse_result = super().__new__( cls, @@ -201,7 +201,7 @@ def from_string( ) @property - def authority(self): + def authority(self) -> t.Optional[str]: """Return the normalized authority.""" return self.reference.authority @@ -214,7 +214,7 @@ def copy_with( path: t.Optional[str] = misc.UseExisting, query: t.Optional[str] = misc.UseExisting, fragment: t.Optional[str] = misc.UseExisting, - ): + ) -> "ParseResult": """Create a copy of this instance replacing with specified parts.""" attributes = zip( PARSED_COMPONENTS, @@ -273,7 +273,7 @@ class ParseResultBytes( scheme: t.Optional[bytes] userinfo: t.Optional[bytes] host: t.Optional[bytes] - port: t.Optional[bytes] + port: t.Optional[int] path: t.Optional[bytes] query: t.Optional[bytes] fragment: t.Optional[bytes] @@ -286,14 +286,14 @@ def __new__( scheme: t.Optional[bytes], userinfo: t.Optional[bytes], host: t.Optional[bytes], - port: t.Optional[bytes], + port: t.Optional[int], path: t.Optional[bytes], query: t.Optional[bytes], fragment: t.Optional[bytes], uri_ref: "uri.URIReference", encoding: str = "utf-8", lazy_normalize: bool = True, - ): + ) -> _Self: """Create a new ParseResultBytes instance.""" parse_result = super().__new__( cls, @@ -322,7 +322,7 @@ def from_parts( fragment: t.Optional[str] = None, encoding: str = "utf-8", lazy_normalize: bool = True, - ): + ) -> _Self: """Create a ParseResult instance from its parts.""" authority = "" if userinfo is not None: diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index 99b860c..235a89b 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -90,7 +90,7 @@ def __new__( query: t.Optional[str], fragment: t.Optional[str], encoding: str = "utf-8", - ): + ) -> _Self: """Create a new URIReference.""" ref = super().__new__( cls, @@ -105,7 +105,7 @@ def __new__( __hash__ = tuple.__hash__ - def __eq__(self, other: object): + def __eq__(self, other: object) -> bool: """Compare this reference to another.""" other_ref = other if isinstance(other, tuple): From 3c22353378454ae918bc8e0361700715dc6e86e8 Mon Sep 17 00:00:00 2001 From: Thanos <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Mon, 17 Jun 2024 17:37:59 -0400 Subject: [PATCH 21/29] More annotations that I forgot to break up into multiple commits. - validators.py - Finish annotating return types. - Change ensure_one_of takes a `Collection`, not a `Container`, since it needs to be iterable within `UnpermittedComponentError.__init__`. - Change `authority_is_valid` to permit None as an input; continuation of making sure is_valid allowing None propogates. Also, this behavior is depended on elsewhere in the library (just one spot, I think). - parseresult.py - Add variable annotations to `ParseResultMixin`, and make sure _generate_authority is allowed to return `None`. - Fix `ParseResultBytes.copy_with` not accepting an int for port. - Annotate return type for `authority_from`. - misc.py - Use common base for `URIReference` and `IRIReference` as annotation for `merge_path` and remove circular import. - exceptions.py - Annotate everything. - _mixin.py - Add variable annotations to `URIMixin`; they're under a TYPE_CHECKING block so that only the subclasse's annotations can be found in cases of introspection. Might be overkill. - Use `uri.URIReference` to annotate parameters for various functions. - TODO: Check if these are potentially too wide, since `IRIReference` also exists and inherits from `URIMixin`? - Use hacky "typing.cast within an elided if block" trick to improve typing within `URIMixin.resolve_with`. --- src/rfc3986/_mixin.py | 22 ++++++++++++++++++--- src/rfc3986/exceptions.py | 22 ++++++++++++++------- src/rfc3986/misc.py | 6 +----- src/rfc3986/parseresult.py | 20 ++++++++++++++++--- src/rfc3986/validators.py | 39 ++++++++++++++++++++------------------ 5 files changed, 73 insertions(+), 36 deletions(-) diff --git a/src/rfc3986/_mixin.py b/src/rfc3986/_mixin.py index c4c52bd..da102b7 100644 --- a/src/rfc3986/_mixin.py +++ b/src/rfc3986/_mixin.py @@ -5,6 +5,7 @@ from . import exceptions as exc from . import misc from . import normalizers +from . import uri from . import validators from ._typing_compat import Self as _Self @@ -20,6 +21,14 @@ class _AuthorityInfo(t.TypedDict): class URIMixin: """Mixin with all shared methods for URIs and IRIs.""" + if t.TYPE_CHECKING: + scheme: t.Optional[str] + authority: t.Optional[str] + path: t.Optional[str] + query: t.Optional[str] + fragment: t.Optional[str] + encoding: str + def authority_info(self) -> _AuthorityInfo: """Return a dictionary with the ``userinfo``, ``host``, and ``port``. @@ -251,7 +260,7 @@ def fragment_is_valid(self, require: bool = False) -> bool: ) return validators.fragment_is_valid(self.fragment, require) - def normalized_equality(self, other_ref) -> bool: + def normalized_equality(self, other_ref: "uri.URIReference") -> bool: """Compare this URIReference to another URIReference. :param URIReference other_ref: (required), The reference with which @@ -261,7 +270,11 @@ def normalized_equality(self, other_ref) -> bool: """ return tuple(self.normalize()) == tuple(other_ref.normalize()) - def resolve_with(self, base_uri, strict: bool = False) -> _Self: + def resolve_with( + self, + base_uri: t.Union[str, "uri.URIReference"], + strict: bool = False, + ) -> _Self: """Use an absolute URI Reference to resolve this relative reference. Assuming this is a relative reference that you would like to resolve, @@ -280,6 +293,9 @@ def resolve_with(self, base_uri, strict: bool = False) -> _Self: if not isinstance(base_uri, URIMixin): base_uri = type(self).from_string(base_uri) + if t.TYPE_CHECKING: + base_uri = t.cast(uri.URIReference, base_uri) + try: self._validator.validate(base_uri) except exc.ValidationError: @@ -388,6 +404,6 @@ def copy_with( for key, value in list(attributes.items()): if value is misc.UseExisting: del attributes[key] - uri = self._replace(**attributes) + uri: "uri.URIReference" = self._replace(**attributes) uri.encoding = self.encoding return uri diff --git a/src/rfc3986/exceptions.py b/src/rfc3986/exceptions.py index d513ddc..ecdd666 100644 --- a/src/rfc3986/exceptions.py +++ b/src/rfc3986/exceptions.py @@ -1,5 +1,8 @@ """Exceptions module for rfc3986.""" +import typing as t + from . import compat +from . import uri class RFC3986Exception(Exception): @@ -11,7 +14,7 @@ class RFC3986Exception(Exception): class InvalidAuthority(RFC3986Exception): """Exception when the authority string is invalid.""" - def __init__(self, authority): + def __init__(self, authority: t.Union[str, bytes]) -> None: """Initialize the exception with the invalid authority.""" super().__init__( f"The authority ({compat.to_str(authority)}) is not valid." @@ -21,7 +24,7 @@ def __init__(self, authority): class InvalidPort(RFC3986Exception): """Exception when the port is invalid.""" - def __init__(self, port): + def __init__(self, port: str) -> None: """Initialize the exception with the invalid port.""" super().__init__(f'The port ("{port}") is not valid.') @@ -29,7 +32,7 @@ def __init__(self, port): class ResolutionError(RFC3986Exception): """Exception to indicate a failure to resolve a URI.""" - def __init__(self, uri): + def __init__(self, uri: "uri.URIReference") -> None: """Initialize the error with the failed URI.""" super().__init__( "{} does not meet the requirements for resolution.".format( @@ -47,7 +50,7 @@ class ValidationError(RFC3986Exception): class MissingComponentError(ValidationError): """Exception raised when a required component is missing.""" - def __init__(self, uri, *component_names): + def __init__(self, uri: "uri.URIReference", *component_names: str) -> None: """Initialize the error with the missing component name.""" verb = "was" if len(component_names) > 1: @@ -66,7 +69,12 @@ def __init__(self, uri, *component_names): class UnpermittedComponentError(ValidationError): """Exception raised when a component has an unpermitted value.""" - def __init__(self, component_name, component_value, allowed_values): + def __init__( + self, + component_name: str, + component_value: t.Any, + allowed_values: t.Collection[t.Any], + ) -> None: """Initialize the error with the unpermitted component.""" super().__init__( "{} was required to be one of {!r} but was {!r}".format( @@ -86,7 +94,7 @@ def __init__(self, component_name, component_value, allowed_values): class PasswordForbidden(ValidationError): """Exception raised when a URL has a password in the userinfo section.""" - def __init__(self, uri): + def __init__(self, uri: t.Union[str, "uri.URIReference"]) -> None: """Initialize the error with the URI that failed validation.""" unsplit = getattr(uri, "unsplit", lambda: uri) super().__init__( @@ -100,7 +108,7 @@ def __init__(self, uri): class InvalidComponentsError(ValidationError): """Exception raised when one or more components are invalid.""" - def __init__(self, uri, *component_names): + def __init__(self, uri: "uri.URIReference", *component_names: str) -> None: """Initialize the error with the invalid component name(s).""" verb = "was" if len(component_names) > 1: diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py index 0c40502..0892143 100644 --- a/src/rfc3986/misc.py +++ b/src/rfc3986/misc.py @@ -22,10 +22,6 @@ from . import abnf_regexp -if t.TYPE_CHECKING: - # Break an import loop. - from . import uri - class URIReferenceBase(t.NamedTuple): """The namedtuple used as a superclass of URIReference and IRIReference.""" @@ -130,7 +126,7 @@ class URIReferenceBase(t.NamedTuple): # Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3 -def merge_paths(base_uri: "uri.URIReference", relative_path: str) -> str: +def merge_paths(base_uri: URIReferenceBase, relative_path: str) -> str: """Merge a base URI's path with a relative URI's path.""" if base_uri.path is None and base_uri.authority is not None: return "/" + relative_path diff --git a/src/rfc3986/parseresult.py b/src/rfc3986/parseresult.py index a65de5e..1241d64 100644 --- a/src/rfc3986/parseresult.py +++ b/src/rfc3986/parseresult.py @@ -36,10 +36,21 @@ class ParseResultMixin(t.Generic[t.AnyStr]): + if t.TYPE_CHECKING: + userinfo: t.Optional[t.AnyStr] + host: t.Optional[t.AnyStr] + port: t.Optional[int] + query: t.Optional[t.AnyStr] + encoding: str + + @property + def authority(self) -> t.Optional[t.AnyStr]: + ... + def _generate_authority( self, attributes: t.Dict[str, t.Optional[t.AnyStr]], - ) -> str: + ) -> t.Optional[str]: # I swear I did not align the comparisons below. That's just how they # happened to align based on pep8 and attribute lengths. userinfo, host, port = ( @@ -402,7 +413,7 @@ def copy_with( scheme: t.Optional[t.Union[str, bytes]] = misc.UseExisting, userinfo: t.Optional[t.Union[str, bytes]] = misc.UseExisting, host: t.Optional[t.Union[str, bytes]] = misc.UseExisting, - port: t.Optional[t.Union[str, bytes]] = misc.UseExisting, + port: t.Optional[t.Union[int, str, bytes]] = misc.UseExisting, path: t.Optional[t.Union[str, bytes]] = misc.UseExisting, query: t.Optional[t.Union[str, bytes]] = misc.UseExisting, fragment: t.Optional[t.Union[str, bytes]] = misc.UseExisting, @@ -490,7 +501,10 @@ def split_authority( return userinfo, host, port -def authority_from(reference: "uri.URIReference", strict: bool): +def authority_from( + reference: "uri.URIReference", + strict: bool, +) -> t.Tuple[t.Optional[str], t.Optional[str], t.Optional[int]]: try: subauthority = reference.authority_info() except exceptions.InvalidAuthority: diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index 69c3a04..27031f7 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -18,6 +18,7 @@ from . import misc from . import normalizers from . import uri +from ._typing_compat import Self as _Self class Validator: @@ -51,13 +52,13 @@ class Validator: ["scheme", "userinfo", "host", "port", "path", "query", "fragment"] ) - def __init__(self): + def __init__(self) -> None: """Initialize our default validations.""" - self.allowed_schemes: set[str] = set() - self.allowed_hosts: set[str] = set() - self.allowed_ports: set[str] = set() - self.allow_password = True - self.required_components = { + self.allowed_schemes: t.Set[str] = set() + self.allowed_hosts: t.Set[str] = set() + self.allowed_ports: t.Set[str] = set() + self.allow_password: bool = True + self.required_components: t.Dict[str, bool] = { "scheme": False, "userinfo": False, "host": False, @@ -66,9 +67,11 @@ def __init__(self): "query": False, "fragment": False, } - self.validated_components = self.required_components.copy() + self.validated_components: t.Dict[ + str, bool + ] = self.required_components.copy() - def allow_schemes(self, *schemes: str): + def allow_schemes(self, *schemes: str) -> _Self: """Require the scheme to be one of the provided schemes. .. versionadded:: 1.0 @@ -84,7 +87,7 @@ def allow_schemes(self, *schemes: str): self.allowed_schemes.add(normalizers.normalize_scheme(scheme)) return self - def allow_hosts(self, *hosts: str): + def allow_hosts(self, *hosts: str) -> _Self: """Require the host to be one of the provided hosts. .. versionadded:: 1.0 @@ -100,7 +103,7 @@ def allow_hosts(self, *hosts: str): self.allowed_hosts.add(normalizers.normalize_host(host)) return self - def allow_ports(self, *ports: str): + def allow_ports(self, *ports: str) -> _Self: """Require the port to be one of the provided ports. .. versionadded:: 1.0 @@ -118,7 +121,7 @@ def allow_ports(self, *ports: str): self.allowed_ports.add(port) return self - def allow_use_of_password(self): + def allow_use_of_password(self) -> _Self: """Allow passwords to be present in the URI. .. versionadded:: 1.0 @@ -131,7 +134,7 @@ def allow_use_of_password(self): self.allow_password = True return self - def forbid_use_of_password(self): + def forbid_use_of_password(self) -> _Self: """Prevent passwords from being included in the URI. .. versionadded:: 1.0 @@ -144,7 +147,7 @@ def forbid_use_of_password(self): self.allow_password = False return self - def check_validity_of(self, *components: str): + def check_validity_of(self, *components: str) -> _Self: """Check the validity of the components provided. This can be specified repeatedly. @@ -167,7 +170,7 @@ def check_validity_of(self, *components: str): ) return self - def require_presence_of(self, *components: str): + def require_presence_of(self, *components: str) -> _Self: """Require the components provided. This can be specified repeatedly. @@ -190,7 +193,7 @@ def require_presence_of(self, *components: str): ) return self - def validate(self, uri: "uri.URIReference"): + def validate(self, uri: "uri.URIReference") -> None: """Check a URI for conditions specified on this validator. .. versionadded:: 1.0 @@ -244,7 +247,7 @@ def check_password(uri: "uri.URIReference") -> None: def ensure_one_of( - allowed_values: t.Container[object], + allowed_values: t.Collection[object], uri: "uri.URIReference", attribute: str, ) -> None: @@ -261,7 +264,7 @@ def ensure_one_of( def ensure_required_components_exist( uri: "uri.URIReference", required_components: t.Iterable[str], -): +) -> None: """Assert that all required components are present in the URI.""" missing_components = sorted( component @@ -294,7 +297,7 @@ def is_valid( def authority_is_valid( - authority: str, + authority: t.Optional[str], host: t.Optional[str] = None, require: bool = False, ) -> bool: From ee237082a393b8fb1aeb2ec9e3b079302df8b1ca Mon Sep 17 00:00:00 2001 From: Sachaa-Thanasius <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Thu, 20 Jun 2024 06:17:29 -0400 Subject: [PATCH 22/29] Fix variable annotation for `uri` in `URIMixin`.copy_with. Co-authored-by: sigmavirus24 <240830+sigmavirus24@users.noreply.github.com> --- src/rfc3986/_mixin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rfc3986/_mixin.py b/src/rfc3986/_mixin.py index da102b7..6f95eca 100644 --- a/src/rfc3986/_mixin.py +++ b/src/rfc3986/_mixin.py @@ -404,6 +404,6 @@ def copy_with( for key, value in list(attributes.items()): if value is misc.UseExisting: del attributes[key] - uri: "uri.URIReference" = self._replace(**attributes) + uri: _Self = self._replace(**attributes) uri.encoding = self.encoding return uri From fadc96289cc78db08b41f469c0af999f16a71c0d Mon Sep 17 00:00:00 2001 From: Sachaa-Thanasius <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Thu, 20 Jun 2024 08:43:24 -0400 Subject: [PATCH 23/29] Fix annotation for `misc.UseExisting` to be `Final` to avoid reassignment without warnings from a type checker. - Also add a noqa to _mixin.URIMixin.resolve_with, since the extra `if TYPE_CHECKING`` statement pushed it over the complexity limit. Co-authored-by: Ian Stapleton Cordasco --- src/rfc3986/_mixin.py | 2 +- src/rfc3986/misc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rfc3986/_mixin.py b/src/rfc3986/_mixin.py index 6f95eca..1c4d067 100644 --- a/src/rfc3986/_mixin.py +++ b/src/rfc3986/_mixin.py @@ -270,7 +270,7 @@ def normalized_equality(self, other_ref: "uri.URIReference") -> bool: """ return tuple(self.normalize()) == tuple(other_ref.normalize()) - def resolve_with( + def resolve_with( # noqa: C901 self, base_uri: t.Union[str, "uri.URIReference"], strict: bool = False, diff --git a/src/rfc3986/misc.py b/src/rfc3986/misc.py index 0892143..10d0ecb 100644 --- a/src/rfc3986/misc.py +++ b/src/rfc3986/misc.py @@ -136,4 +136,4 @@ def merge_paths(base_uri: URIReferenceBase, relative_path: str) -> str: return path[:index] + "/" + relative_path -UseExisting: t.Any = object() +UseExisting: t.Final[t.Any] = object() From c923049cef41160bf42ee0c78cc523a56b9e1f57 Mon Sep 17 00:00:00 2001 From: Sachaa-Thanasius <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Sat, 22 Jun 2024 15:33:34 -0400 Subject: [PATCH 24/29] Change how port is determined/validated in `validators.subauthority_component_is_valid` to avoid testing via `int(...)`. - Also makes the linters and type-checker happier. --- src/rfc3986/validators.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index 27031f7..fe22ba4 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -437,16 +437,16 @@ def subauthority_component_is_valid( elif component != "port": return True - try: - # fmt: off - port = int(subauthority_dict["port"]) # pyright: ignore[reportArgumentType] # noqa: E501 # Guarded by "except TypeError". - # fmt: on - except TypeError: - # If the port wasn't provided it'll be None and int(None) raises a - # TypeError + port = subauthority_dict["port"] + + if port is None: return True - return 0 <= port <= 65535 + # We know it has to have fewer than 6 digits if it exists. + if not (port.isdigit() and len(port) < 6): + return False + + return 0 <= int(port) <= 65535 def ensure_components_are_valid( From 7fc9af07b14f98c5270908c86a4cfe6715ae78c4 Mon Sep 17 00:00:00 2001 From: Sachaa-Thanasius <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Tue, 2 Jul 2024 22:22:10 -0400 Subject: [PATCH 25/29] Replace reorder-python-imports and flake8-import-order with isort, along with some other minor changes. - Update .pre-commit-config.yaml and tox.ini in accordance to the above. - Run `pre-commit autoupdate` while I'm at it. Can be reverted if need be. - Somewhat centralize black and isort config in pyproject.toml to avoid having to keep it in sync in multiple places. --- .pre-commit-config.yaml | 19 ++++++++----------- pyproject.toml | 9 +++++++++ src/rfc3986/__init__.py | 4 ++-- src/rfc3986/_mixin.py | 1 + src/rfc3986/compat.py | 6 ++---- src/rfc3986/exceptions.py | 1 + src/rfc3986/iri.py | 2 +- src/rfc3986/parseresult.py | 3 +-- src/rfc3986/uri.py | 1 + src/rfc3986/validators.py | 6 +++--- tests/test_api.py | 2 +- tests/test_builder.py | 3 ++- tests/test_parseresult.py | 3 ++- tests/test_unicode_support.py | 1 - tests/test_uri.py | 3 ++- tests/test_validators.py | 1 + tox.ini | 6 +++--- 17 files changed, 40 insertions(+), 31 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 098a010..405e529 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,34 +1,31 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-yaml - id: debug-statements - id: end-of-file-fixer - id: trailing-whitespace -- repo: https://github.com/asottile/reorder-python-imports - rev: v3.12.0 +- repo: https://github.com/pycqa/isort + rev: 5.13.2 hooks: - - id: reorder-python-imports - args: [--application-directories, '.:src', --py37-plus] + - id: isort - repo: https://github.com/psf/black - rev: 23.12.1 + rev: 24.4.2 hooks: - id: black - args: [--line-length=79] - repo: https://github.com/asottile/pyupgrade - rev: v3.15.0 + rev: v3.16.0 hooks: - id: pyupgrade - args: [--py37-plus] + args: [--py38-plus] - repo: https://github.com/pycqa/flake8 - rev: 7.0.0 + rev: 7.1.0 hooks: - id: flake8 exclude: ^(tests/|docs/|setup.py) additional_dependencies: - flake8-docstrings - - flake8-import-order - repo: https://github.com/asottile/setup-cfg-fmt rev: v2.5.0 hooks: diff --git a/pyproject.toml b/pyproject.toml index 7cb2895..f2ffbab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,12 @@ +[tool.black] +line-length = 79 +target-version = ["py38"] + +[tool.isort] +profile = "black" +line_length = 79 +force_single_line = true + [tool.pyright] include = ["src/rfc3986"] ignore = ["tests"] diff --git a/src/rfc3986/__init__.py b/src/rfc3986/__init__.py index bc2da11..e71de1f 100644 --- a/src/rfc3986/__init__.py +++ b/src/rfc3986/__init__.py @@ -19,12 +19,12 @@ :copyright: (c) 2014 Rackspace :license: Apache v2.0, see LICENSE for details """ -from .api import iri_reference from .api import IRIReference +from .api import URIReference +from .api import iri_reference from .api import is_valid_uri from .api import normalize_uri from .api import uri_reference -from .api import URIReference from .api import urlparse from .parseresult import ParseResult diff --git a/src/rfc3986/_mixin.py b/src/rfc3986/_mixin.py index 1c4d067..b03b772 100644 --- a/src/rfc3986/_mixin.py +++ b/src/rfc3986/_mixin.py @@ -1,4 +1,5 @@ """Module containing the implementation of the URIMixin class.""" + import typing as t import warnings diff --git a/src/rfc3986/compat.py b/src/rfc3986/compat.py index 5d5146c..b66e1db 100644 --- a/src/rfc3986/compat.py +++ b/src/rfc3986/compat.py @@ -24,8 +24,7 @@ def to_str( # noqa: D103 b: t.Union[str, bytes], encoding: str = "utf-8", -) -> str: - ... +) -> str: ... @t.overload @@ -47,8 +46,7 @@ def to_str( def to_bytes( # noqa: D103 s: t.Union[str, bytes], encoding: str = "utf-8", -) -> bytes: - ... +) -> bytes: ... @t.overload diff --git a/src/rfc3986/exceptions.py b/src/rfc3986/exceptions.py index ecdd666..d0e853b 100644 --- a/src/rfc3986/exceptions.py +++ b/src/rfc3986/exceptions.py @@ -1,4 +1,5 @@ """Exceptions module for rfc3986.""" + import typing as t from . import compat diff --git a/src/rfc3986/iri.py b/src/rfc3986/iri.py index e2a292e..205221e 100644 --- a/src/rfc3986/iri.py +++ b/src/rfc3986/iri.py @@ -1,4 +1,5 @@ """Module containing the implementation of the IRIReference class.""" + # Copyright (c) 2014 Rackspace # Copyright (c) 2015 Ian Stapleton Cordasco # Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,7 +23,6 @@ from . import uri from ._typing_compat import Self as _Self - try: import idna except ImportError: # pragma: no cover diff --git a/src/rfc3986/parseresult.py b/src/rfc3986/parseresult.py index 1241d64..80cc300 100644 --- a/src/rfc3986/parseresult.py +++ b/src/rfc3986/parseresult.py @@ -44,8 +44,7 @@ class ParseResultMixin(t.Generic[t.AnyStr]): encoding: str @property - def authority(self) -> t.Optional[t.AnyStr]: - ... + def authority(self) -> t.Optional[t.AnyStr]: ... def _generate_authority( self, diff --git a/src/rfc3986/uri.py b/src/rfc3986/uri.py index 235a89b..e382498 100644 --- a/src/rfc3986/uri.py +++ b/src/rfc3986/uri.py @@ -1,4 +1,5 @@ """Module containing the implementation of the URIReference class.""" + # Copyright (c) 2014 Rackspace # Copyright (c) 2015 Ian Stapleton Cordasco # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index fe22ba4..862663a 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -67,9 +67,9 @@ def __init__(self) -> None: "query": False, "fragment": False, } - self.validated_components: t.Dict[ - str, bool - ] = self.required_components.copy() + self.validated_components: t.Dict[str, bool] = ( + self.required_components.copy() + ) def allow_schemes(self, *schemes: str) -> _Self: """Require the scheme to be one of the provided schemes. diff --git a/tests/test_api.py b/tests/test_api.py index 3b310ed..dacae72 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,7 +1,7 @@ +from rfc3986.api import URIReference from rfc3986.api import is_valid_uri from rfc3986.api import normalize_uri from rfc3986.api import uri_reference -from rfc3986.api import URIReference def test_uri_reference(): diff --git a/tests/test_builder.py b/tests/test_builder.py index 338b3a2..fb6ec3c 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -19,7 +19,8 @@ import pytest -from rfc3986 import builder, uri_reference +from rfc3986 import builder +from rfc3986 import uri_reference def test_builder_default(): diff --git a/tests/test_parseresult.py b/tests/test_parseresult.py index 3e94138..5788448 100644 --- a/tests/test_parseresult.py +++ b/tests/test_parseresult.py @@ -14,10 +14,11 @@ import pytest import rfc3986 -from . import base from rfc3986 import exceptions from rfc3986 import parseresult as pr +from . import base + INVALID_PORTS = [ "443:80", "443:80:443", diff --git a/tests/test_unicode_support.py b/tests/test_unicode_support.py index 6a35244..8c388fb 100644 --- a/tests/test_unicode_support.py +++ b/tests/test_unicode_support.py @@ -5,7 +5,6 @@ from rfc3986 import uri_reference from rfc3986 import urlparse - SNOWMAN = b"\xe2\x98\x83" SNOWMAN_PARAMS = b"http://example.com?utf8=" + SNOWMAN SNOWMAN_HOST = b"http://" + SNOWMAN + b".com" diff --git a/tests/test_uri.py b/tests/test_uri.py index 1c5b0ff..06ac2a8 100644 --- a/tests/test_uri.py +++ b/tests/test_uri.py @@ -1,11 +1,12 @@ import pytest -from . import base from rfc3986.exceptions import InvalidAuthority from rfc3986.exceptions import ResolutionError from rfc3986.misc import URI_MATCHER from rfc3986.uri import URIReference +from . import base + @pytest.fixture def scheme_and_path_uri(): diff --git a/tests/test_validators.py b/tests/test_validators.py index 0ec7449..2bab3a1 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -1,4 +1,5 @@ """Tests for the validators module.""" + import pytest import rfc3986 diff --git a/tox.ini b/tox.ini index 539879a..7099d8b 100644 --- a/tox.ini +++ b/tox.ini @@ -25,9 +25,11 @@ basepython = python3 skip_install = true deps = {[testenv:flake8]deps} + isort black commands = - black -l 79 {env:BLACK_ARGS:} -t py37 src/rfc3986 tests/ + isort src/rfc3986 tests/ + black {env:BLACK_ARGS:} src/rfc3986 tests/ {[testenv:flake8]commands} [testenv:flake8] @@ -36,7 +38,6 @@ skip_install = true deps = flake8 flake8-docstrings - flake8-import-order commands = flake8 {posargs} src/rfc3986 [testenv:typing] @@ -92,4 +93,3 @@ exclude = .cache, .eggs max-complexity = 10 -import-order-style = google From bf1a44d10047f49fc4507f4db490a80d98cdc306 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Jul 2024 02:22:43 +0000 Subject: [PATCH 26/29] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/source/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 5ee2603..163800d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,7 +19,6 @@ # sys.path.insert(0, os.path.abspath('.')) import rfc3986 - # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. From 283f910fff531f0cc386767de1372c690929372b Mon Sep 17 00:00:00 2001 From: Sachaa-Thanasius <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Wed, 3 Jul 2024 08:44:50 -0400 Subject: [PATCH 27/29] Add exclude lines to .coveragerc to account for a) `if t.TYPE_CHECKING` blocks and b) lines that are only ellipses, as well as some temporary pragma comments in _typing_compat.py. This seems to account for most of the missing coverage with the current configuration, excluding line 447 in validators.py. --- .coveragerc | 2 ++ src/rfc3986/_typing_compat.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.coveragerc b/.coveragerc index 15791b3..f74bf58 100644 --- a/.coveragerc +++ b/.coveragerc @@ -3,4 +3,6 @@ exclude_lines = .* # Python \d.* .* # nocov: Python \d.* .* # pragma: no cover.* + ^\s*(?:el)?if t\.TYPE_CHECKING:$ + ^ +\.\.\.$ fail_under = 100 diff --git a/src/rfc3986/_typing_compat.py b/src/rfc3986/_typing_compat.py index 4822ecc..5e4e36c 100644 --- a/src/rfc3986/_typing_compat.py +++ b/src/rfc3986/_typing_compat.py @@ -3,11 +3,11 @@ __all__ = ("Self",) -if sys.version_info >= (3, 11): +if sys.version_info >= (3, 11): # pragma: no cover from typing import Self elif t.TYPE_CHECKING: from typing_extensions import Self -else: +else: # pragma: no cover class _PlaceholderMeta(type): # This is meant to make it easier to debug the presence of placeholder From 1443cd1cd1502fd22b663cedc44b9e8b1b7fc835 Mon Sep 17 00:00:00 2001 From: Sachaa-Thanasius <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Wed, 3 Jul 2024 09:17:52 -0400 Subject: [PATCH 28/29] Add `#pragma: no cover` to final line missing coverage, as well as a comment for justification. --- src/rfc3986/validators.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/rfc3986/validators.py b/src/rfc3986/validators.py index 862663a..0727586 100644 --- a/src/rfc3986/validators.py +++ b/src/rfc3986/validators.py @@ -443,7 +443,11 @@ def subauthority_component_is_valid( return True # We know it has to have fewer than 6 digits if it exists. - if not (port.isdigit() and len(port) < 6): + if not (port.isdigit() and len(port) < 6): # pragma: no cover + # This branch can only execute when this function is called directly + # with a URI reference manually constructed with an invalid port. + # Such a use case is unsupported, since this function isn't part of + # the public API. return False return 0 <= int(port) <= 65535 From be1a4e82144f278862635e02db9e8862ac2e1e6c Mon Sep 17 00:00:00 2001 From: Sachaa-Thanasius <111999343+Sachaa-Thanasius@users.noreply.github.com> Date: Sat, 6 Jul 2024 01:17:32 -0400 Subject: [PATCH 29/29] Adjust typing check to use wrapper/shim script for now. --- tests/verify_types.py | 77 +++++++++++++++++++++++++++++++++++++++++++ tox.ini | 2 +- 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 tests/verify_types.py diff --git a/tests/verify_types.py b/tests/verify_types.py new file mode 100644 index 0000000..b0b2350 --- /dev/null +++ b/tests/verify_types.py @@ -0,0 +1,77 @@ +"""This script is a shim around `pyright --verifytypes` to determine if the +current typing coverage meets the expected coverage. The previous command by +itself won't suffice, since its expected coverage can't be modified from 100%. +Useful while still adding annotations to the library. +""" + +import argparse +import json +import subprocess +from decimal import Decimal + +PYRIGHT_CMD = ("pyright", "--verifytypes", "rfc3986", "--outputjson") + + +def validate_coverage(inp: str) -> Decimal: + """Ensure the given coverage score is between 0 and 100 (inclusive).""" + + coverage = Decimal(inp) + if not (0 <= coverage <= 100): + raise ValueError + return coverage + + +def main() -> int: + """Determine if rfc3986's typing coverage meets our expected coverage.""" + + parser = argparse.ArgumentParser() + parser.add_argument( + "--fail-under", + default=Decimal("75"), + type=validate_coverage, + help="The target typing coverage to not fall below (default: 75).", + ) + parser.add_argument( + "--quiet", + action="store_true", + help="Whether to hide the full output from `pyright --verifytypes`.", + ) + + args = parser.parse_args() + + expected_coverage: Decimal = args.fail_under / 100 + quiet: bool = args.quiet + + try: + output = subprocess.check_output( + PYRIGHT_CMD, + stderr=subprocess.STDOUT, + text=True, + ) + except subprocess.CalledProcessError as exc: + output = exc.output + + verifytypes_output = json.loads(output) + raw_score = verifytypes_output["typeCompleteness"]["completenessScore"] + actual_coverage = Decimal(raw_score) + + if not quiet: + # Switch back to normal output instead of json, for readability. + subprocess.run(PYRIGHT_CMD[:-1]) + + if actual_coverage >= expected_coverage: + print( + f"OK - Required typing coverage of {expected_coverage:.2%} " + f"reached. Total typing coverage: {actual_coverage:.2%}." + ) + return 0 + else: + print( + f"FAIL - Required typing coverage of {expected_coverage:.2%} not " + f"reached. Total typing coverage: {actual_coverage:.2%}." + ) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tox.ini b/tox.ini index 7099d8b..bd4b757 100644 --- a/tox.ini +++ b/tox.ini @@ -43,7 +43,7 @@ commands = flake8 {posargs} src/rfc3986 [testenv:typing] deps = pyright -commands = pyright {posargs:--verifytypes rfc3986} +commands = python tests/verify_types.py [testenv:venv] commands = {posargs}