Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POC for hyperscan usage in UrlDispatcher #9907

Draft
wants to merge 39 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
2e7c5a7
hyperscan
asvetlov Nov 15, 2024
1a5a10d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 15, 2024
def5f07
Fix mypy
asvetlov Nov 15, 2024
bea95da
Reforman
asvetlov Nov 15, 2024
bd1f61f
Update setup.cfg
asvetlov Nov 15, 2024
934f755
step
asvetlov Nov 16, 2024
2940323
step
asvetlov Nov 16, 2024
b41363b
merge
asvetlov Nov 16, 2024
87056a2
fix
asvetlov Nov 16, 2024
ac2f8e3
fix
asvetlov Nov 16, 2024
91e890a
Drop debug info
asvetlov Nov 16, 2024
e0eb966
Tune deps
asvetlov Nov 16, 2024
d6c019b
fix
asvetlov Nov 16, 2024
5db0bfd
fix
asvetlov Nov 16, 2024
7ce5180
fix
asvetlov Nov 16, 2024
d8c5da1
fix
asvetlov Nov 16, 2024
d27eea9
tune
asvetlov Nov 16, 2024
03719e9
tune
asvetlov Nov 16, 2024
45f947f
Merge branch 'master' into hyperscan
asvetlov Nov 16, 2024
89bac7a
inline
asvetlov Nov 16, 2024
505e7f4
Merge branch 'master' into hyperscan
asvetlov Nov 16, 2024
4a29903
Use dict lookup for plain resources
asvetlov Nov 16, 2024
ed931e2
Merge branch 'master' into hyperscan
asvetlov Nov 16, 2024
51c26c0
Add separate dict for prefix resources
asvetlov Nov 16, 2024
e6e9e48
Tune
asvetlov Nov 16, 2024
f452675
Refactor prefixed resources routing
asvetlov Nov 17, 2024
f56a12a
address review
asvetlov Nov 17, 2024
58a605e
Merge branch 'master' into hyperscan
asvetlov Nov 17, 2024
e14bad9
tune
asvetlov Nov 17, 2024
a622920
Merge branch 'master' into hyperscan
asvetlov Nov 17, 2024
3563173
comment
asvetlov Nov 17, 2024
00e6331
Fix prefix resource lookup
asvetlov Nov 18, 2024
55a0c9d
Alternative strategy for prefix resource matching
asvetlov Nov 18, 2024
a8bed53
relax deps
asvetlov Nov 18, 2024
2ac1eca
fix
asvetlov Nov 18, 2024
a718f6e
fix
asvetlov Nov 18, 2024
c82a824
Merge branch 'master' into hyperscan
asvetlov Nov 18, 2024
e1cbdf0
Merge branch 'master' into hyperscan
asvetlov Nov 19, 2024
a87dc32
Merge branch 'master' into hyperscan
asvetlov Nov 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,6 @@ ignore_missing_imports = True

[mypy-gunicorn.*]
ignore_missing_imports = True

[mypy-hyperscan]
ignore_missing_imports = True
asvetlov marked this conversation as resolved.
Show resolved Hide resolved
154 changes: 145 additions & 9 deletions aiohttp/web_urldispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from .abc import AbstractMatchInfo, AbstractRouter, AbstractView
from .helpers import DEBUG
from .http import HttpVersion11
from .log import web_logger
from .typedefs import Handler, PathLike
from .web_exceptions import (
HTTPException,
Expand Down Expand Up @@ -68,6 +69,14 @@
)


try:
import hyperscan

HAS_HYPERSCAN = True
except ImportError:
HAS_HYPERSCAN = False


if TYPE_CHECKING:
from .web_app import Application

Expand Down Expand Up @@ -735,6 +744,7 @@
def add_prefix(self, prefix: str) -> None:
super().add_prefix(prefix)
self._add_prefix_to_resources(prefix)
self._app.router._rebuild()

def _add_prefix_to_resources(self, prefix: str) -> None:
router = self._app.router
Expand Down Expand Up @@ -998,28 +1008,98 @@
self._named_resources: Dict[str, AbstractResource] = {}
self._resource_index: dict[str, list[AbstractResource]] = {}
self._matched_sub_app_resources: List[MatchedSubAppResource] = []
self._hyperdb: Optional[hyperscan.Database] = None # type: ignore[no-any-unimported]
self._plain_resources: dict[str, PlainResource] = {}
self._prefix_resources: dict[tuple[str, ...], list[PrefixResource]] = {}
self._max_prefix_cardinality = 0
self._has_variable_resources = True

def _on_match(
self, id_: int, from_: int, to: int, flags: int, found: list[int]
) -> Optional[bool]:
found.append(id_)
return None

async def resolve(self, request: Request) -> UrlMappingMatchInfo:
resource_index = self._resource_index
allowed_methods: Set[str] = set()
allowed_methods: set[str] = set()
path = request.rel_url.path_safe

# plain resource lookup
if (plain_resource := self._plain_resources.get(path)) is not None:
match_dict, allowed = await plain_resource.resolve(request)
if match_dict is not None:
return match_dict
else:
allowed_methods |= allowed

# prefix resource lookup
parts = tuple(
path[1:].split("/", self._max_prefix_cardinality)[
: self._max_prefix_cardinality
]
)
prefix_resources = self._prefix_resources
# Walk the url parts looking for candidates. We walk the url backwards
# to ensure the most explicit match is found first. If there are multiple
# candidates for a given url part because there are multiple resources
# registered for the same canonical path, we resolve them in a linear
# fashion to ensure registration order is respected.
url_part = request.rel_url.path_safe
while url_part:
for candidate in resource_index.get(url_part, ()):
match_dict, allowed = await candidate.resolve(request)
while True:
for prefix_resource in prefix_resources.get(parts, ()):
match_dict, allowed = await prefix_resource.resolve(request)
if match_dict is not None:
return match_dict
else:
allowed_methods |= allowed
if url_part == "/":
if len(parts) <= 1:
break
url_part = url_part.rpartition("/")[0] or "/"

parts = parts[:-1]

Check warning on line 1056 in aiohttp/web_urldispatcher.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/web_urldispatcher.py#L1056

Added line #L1056 was not covered by tests

# variable resource lookup
if self._has_variable_resources:
if self._hyperdb is not None:
found: list[int] = []
resources = self._resources

self._hyperdb.scan(
path.encode("utf8"),
match_event_handler=self._on_match,
context=found,
)
if len(found) > 1:
# Multiple matches are found,
# use the FIRST match.
# Match ids are basically indexes in self._resources.
found.sort()

Check warning on line 1073 in aiohttp/web_urldispatcher.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/web_urldispatcher.py#L1073

Added line #L1073 was not covered by tests

for idx in found:
resource = resources[idx]
match_dict, allowed = await resource.resolve(request)
if match_dict is not None:
return match_dict
else:
allowed_methods |= allowed

Check warning on line 1081 in aiohttp/web_urldispatcher.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/web_urldispatcher.py#L1081

Added line #L1081 was not covered by tests
else:
url_part = path
resource_index = self._resource_index

# Walk the url parts looking for candidates. We walk the url backwards
# to ensure the most explicit match is found first. If there are multiple
# candidates for a given url part because there are multiple resources
# registered for the same canonical path, we resolve them in a linear
# fashion to ensure registration order is respected.
while url_part:
for candidate in resource_index.get(url_part, ()):
match_dict, allowed = await candidate.resolve(request)
if match_dict is not None:
return match_dict
else:
allowed_methods |= allowed
if url_part == "/":
break
url_part = url_part.rpartition("/")[0] or "/"

# domain resource lookup
#
# We didn't find any candidates, so we'll try the matched sub-app
# resources which we have to walk in a linear fashion because they
Expand Down Expand Up @@ -1239,6 +1319,62 @@
super().freeze()
for resource in self._resources:
resource.freeze()
self._rebuild()

def _rebuild(self) -> None:
self._hyperdb = None
self._plain_resources.clear()
self._prefix_resources.clear()
self._max_prefix_cardinality = 0
patterns: list[bytes] = []
ids: list[int] = []
for id_, resource in enumerate(self._resources):
if isinstance(resource, PlainResource):
self._plain_resources[resource.get_info()["path"]] = resource
elif isinstance(resource, DynamicResource):
pattern = resource.get_info()["pattern"].pattern
patterns.append(f"^{pattern}$".encode())
ids.append(id_)
elif isinstance(resource, PrefixResource):
if isinstance(resource, MatchedSubAppResource):
# wildcard resources doesn't fit hyperscan table
Dreamsorcerer marked this conversation as resolved.
Show resolved Hide resolved
continue

Check warning on line 1341 in aiohttp/web_urldispatcher.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/web_urldispatcher.py#L1341

Added line #L1341 was not covered by tests
prefix = resource.get_info()["prefix"]
# There may be multiple resources for a prefix
# so we keep them in a list to ensure that registration
# order is respected.
parts = tuple(prefix.split("/")[1:])
self._prefix_resources.setdefault(parts, []).append(resource)
self._max_prefix_cardinality = max(
self._max_prefix_cardinality, len(parts)
)
# breakpoint()
else:
raise RuntimeError(f"Unsupported resource type {type(resource)}")

Check warning on line 1353 in aiohttp/web_urldispatcher.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/web_urldispatcher.py#L1353

Added line #L1353 was not covered by tests

count = len(patterns)
self._has_variable_resources = count > 0
if self._has_variable_resources:
if HAS_HYPERSCAN:
self._hyperdb = hyperscan.Database()
try:
self._hyperdb.compile(
expressions=patterns,
ids=ids,
elements=count,
flags=[
hyperscan.HS_FLAG_UTF8
| hyperscan.HS_FLAG_UCP
| hyperscan.HS_FLAG_SINGLEMATCH
]
* count,
)
except hyperscan.error as exc:
web_logger.warning(

Check warning on line 1373 in aiohttp/web_urldispatcher.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/web_urldispatcher.py#L1372-L1373

Added lines #L1372 - L1373 were not covered by tests
"Cannot compile hyperscan database: %s, switching to fallback url resolver",
repr(exc),
)
self._hyperdb = None

Check warning on line 1377 in aiohttp/web_urldispatcher.py

View check run for this annotation

Codecov / codecov/patch

aiohttp/web_urldispatcher.py#L1377

Added line #L1377 was not covered by tests

def add_routes(self, routes: Iterable[AbstractRouteDef]) -> List[AbstractRoute]:
"""Append routes to route table.
Expand Down
1 change: 1 addition & 0 deletions requirements/runtime-deps.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ async-timeout >= 4.0, < 6.0 ; python_version < "3.11"
Brotli; platform_python_implementation == 'CPython'
brotlicffi; platform_python_implementation != 'CPython'
frozenlist >= 1.1.1
hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin")
multidict >=4.5, < 7.0
propcache >= 0.2.0
yarl >= 1.17.0, < 2.0
2 changes: 2 additions & 0 deletions requirements/runtime-deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ frozenlist==1.5.0
# via
# -r requirements/runtime-deps.in
# aiosignal
hyperscan == 0.7.8 ; platform_python_implementation == "CPython" and (sys_platform=="linux" or sys_platform=="darwin")
# via -r requirements/runtime-deps.in
idna==3.6
# via yarl
multidict==6.1.0
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ speedups =
aiodns >= 3.2.0; sys_platform=="linux" or sys_platform=="darwin"
Brotli; platform_python_implementation == 'CPython'
brotlicffi; platform_python_implementation != 'CPython'
hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin")

[options.packages.find]
exclude =
Expand Down
Loading