From 96cc3222b758bce81bef39d78454ed327f8b04fc Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Mon, 23 Jan 2023 14:12:57 -0500 Subject: [PATCH 1/5] Change Public Suffix List helper packages Switch from using the deprecated (and very out-of-date) publicsuffix package to using the currently maintained publicsuffixlist package. The publicsuffixlist package has a compatibility layer specifically for code that is already using the publicsuffix package so changes are minimal. --- setup.py | 2 +- src/pshtt/pshtt.py | 64 +++++++++++++++++++++++++--------------------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/setup.py b/setup.py index af748c7b..ce25344f 100644 --- a/setup.py +++ b/setup.py @@ -94,7 +94,7 @@ def get_version(version_file): py_modules=[splitext(basename(path))[0] for path in glob("src/*.py")], install_requires=[ "docopt>=0.6.2", - "publicsuffix>=1.1.0", + "publicsuffixlist[update]>=0.9.2 ", "pyopenssl>=17.5.0", "pytablereader>=0.15.0", "pytablewriter>=0.27.2", diff --git a/src/pshtt/pshtt.py b/src/pshtt/pshtt.py index d50647d5..51c0f780 100644 --- a/src/pshtt/pshtt.py +++ b/src/pshtt/pshtt.py @@ -9,13 +9,11 @@ import re import sys from urllib import parse as urlparse -from urllib.error import URLError # Third-Party Libraries import OpenSSL - -# Unable to find type stubs for the publicsuffix package. -from publicsuffix import PublicSuffixList, fetch # type: ignore +from publicsuffixlist.compat import PublicSuffixList # type: ignore +from publicsuffixlist.update import updatePSL # type: ignore import requests # Unable to find type stubs for the sslyze package. @@ -1594,21 +1592,33 @@ def load_preload_list(): return fully_preloaded -# Returns an instantiated PublicSuffixList object, and the -# list of lines read from the file. -def load_suffix_list(): +# Returns an instantiated PublicSuffixList object. +def load_suffix_list(cache_suffix_list=None, update_list=False): """Download and load the public suffix list.""" - # File does not exist, download current list and cache it at given location. - utils.debug("Downloading the Public Suffix List...", divider=True) - try: - cache_file = fetch() - except URLError as err: - logging.exception("Unable to download the Public Suffix List...") - utils.debug(err) - return [] - content = cache_file.readlines() - suffixes = PublicSuffixList(content) - return suffixes, content + if update_list: + utils.debug("Downloading the Public Suffix List...", divider=True) + try: + # Update the local copy + if cache_suffix_list: + updatePSL(cache_suffix_list) + # Update the built-in copy + else: + updatePSL() + except Exception as err: + logging.exception("Unable to download the Public Suffix List...") + utils.debug(err) + return None + + # Use the local copy + if cache_suffix_list: + utils.debug("Using cached Chrome preload list.", divider=True) + with codecs.open(cache_suffix_list, encoding="utf-8") as cache_file: + suffixes = PublicSuffixList(cache_file) + # Use the built-in copy + else: + suffixes = PublicSuffixList() + + return suffixes def initialize_external_data( @@ -1696,18 +1706,14 @@ def initialize_external_data( # Load Mozilla's current Public Suffix list. if SUFFIX_LIST is None: - if cache_suffix_list and os.path.exists(cache_suffix_list): - utils.debug("Using cached suffix list.", divider=True) - with codecs.open(cache_suffix_list, encoding="utf-8") as cache_file: - SUFFIX_LIST = PublicSuffixList(cache_file) + if cache_suffix_list: + # Retrieve the list if the path does not exist otherwise use the cached copy + SUFFIX_LIST = load_suffix_list( + cache_suffix_list, not os.path.exists(cache_suffix_list) + ) else: - SUFFIX_LIST, raw_content = load_suffix_list() - - if cache_suffix_list: - utils.debug( - "Caching suffix list at %s", cache_suffix_list, divider=True - ) - utils.write("".join(raw_content), cache_suffix_list) + # Load the built-in PSL + SUFFIX_LIST = load_suffix_list() def inspect_domains(domains, options): From 466064f105e29ac781fed7d6f1653eeeb3c44212 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Mon, 23 Jan 2023 14:54:32 -0500 Subject: [PATCH 2/5] Bump version from 0.6.9 to 0.6.10 --- src/pshtt/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pshtt/_version.py b/src/pshtt/_version.py index 0cb57609..fae5dfbf 100644 --- a/src/pshtt/_version.py +++ b/src/pshtt/_version.py @@ -1,2 +1,2 @@ """This file defines the version of this module.""" -__version__ = "0.6.9" +__version__ = "0.6.10" From 02dae09ae0ce291127420177ed0b14413698c7bc Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Mon, 23 Jan 2023 14:54:47 -0500 Subject: [PATCH 3/5] Bump version from 0.6.10 to 0.6.10-rc.1 --- src/pshtt/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pshtt/_version.py b/src/pshtt/_version.py index fae5dfbf..267ed0d3 100644 --- a/src/pshtt/_version.py +++ b/src/pshtt/_version.py @@ -1,2 +1,2 @@ """This file defines the version of this module.""" -__version__ = "0.6.10" +__version__ = "0.6.10-rc.1" From f697aa601364ccb1ba8b6beb57717db1ab8752a2 Mon Sep 17 00:00:00 2001 From: Shane Frasier Date: Mon, 30 Jan 2023 10:52:23 -0500 Subject: [PATCH 4/5] Correct log message Co-authored-by: dav3r --- src/pshtt/pshtt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pshtt/pshtt.py b/src/pshtt/pshtt.py index 51c0f780..4cc799d6 100644 --- a/src/pshtt/pshtt.py +++ b/src/pshtt/pshtt.py @@ -1611,7 +1611,7 @@ def load_suffix_list(cache_suffix_list=None, update_list=False): # Use the local copy if cache_suffix_list: - utils.debug("Using cached Chrome preload list.", divider=True) + utils.debug("Using cached Public Suffix List.", divider=True) with codecs.open(cache_suffix_list, encoding="utf-8") as cache_file: suffixes = PublicSuffixList(cache_file) # Use the built-in copy From fb108ad298153d4e442c9d83e6e6f8c7c0fd2cd0 Mon Sep 17 00:00:00 2001 From: Jeremy Frasier Date: Mon, 30 Jan 2023 13:33:19 -0500 Subject: [PATCH 5/5] Finalize version from 0.6.10-rc.1 to 0.6.10 --- src/pshtt/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pshtt/_version.py b/src/pshtt/_version.py index 267ed0d3..fae5dfbf 100644 --- a/src/pshtt/_version.py +++ b/src/pshtt/_version.py @@ -1,2 +1,2 @@ """This file defines the version of this module.""" -__version__ = "0.6.10-rc.1" +__version__ = "0.6.10"