Skip to content

Commit 2dc2a9e

Browse files
authored
feat: cache IANA TLDs for faster lookups (#390)
- Cache TLDs if environment variable `PYVLD_CACHE_TLD` is `True`
1 parent 5489605 commit 2dc2a9e

File tree

1 file changed

+27
-8
lines changed

1 file changed

+27
-8
lines changed

src/validators/domain.py

+27-8
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,39 @@
11
"""Domain."""
22

33
# standard
4+
from os import environ
45
from pathlib import Path
56
import re
7+
from typing import Optional, Set
68

79
# local
810
from .utils import validator
911

1012

11-
def _iana_tld():
12-
"""Load IANA TLDs as a Generator."""
13-
# source: https://data.iana.org/TLD/tlds-alpha-by-domain.txt
14-
with Path(__file__).parent.joinpath("_tld.txt").open() as tld_f:
15-
_ = next(tld_f) # ignore the first line
16-
for line in tld_f:
17-
yield line.strip()
13+
class _IanaTLD:
14+
"""Read IANA TLDs, and optionally cache them."""
15+
16+
_full_cache: Optional[Set[str]] = None
17+
# source: https://www.statista.com/statistics/265677
18+
_popular_cache = {"COM", "ORG", "RU", "DE", "NET", "BR", "UK", "JP", "FR", "IT"}
19+
20+
@classmethod
21+
def _retrieve(cls):
22+
with Path(__file__).parent.joinpath("_tld.txt").open() as tld_f:
23+
_ = next(tld_f) # ignore the first line
24+
for line in tld_f:
25+
yield line.strip()
26+
27+
@classmethod
28+
def check(cls, tld: str):
29+
if tld in cls._popular_cache:
30+
return True
31+
if cls._full_cache is None:
32+
if environ.get("PYVLD_CACHE_TLD") == "True":
33+
cls._full_cache = set(cls._retrieve())
34+
else:
35+
return tld in cls._retrieve()
36+
return tld in cls._full_cache
1837

1938

2039
@validator
@@ -56,7 +75,7 @@ def domain(
5675
if not value:
5776
return False
5877

59-
if consider_tld and value.rstrip(".").rsplit(".", 1)[-1].upper() not in _iana_tld():
78+
if consider_tld and not _IanaTLD.check(value.rstrip(".").rsplit(".", 1)[-1].upper()):
6079
return False
6180

6281
try:

0 commit comments

Comments
 (0)