diff --git a/noisy.py b/noisy.py index 341389c..5b56096 100644 --- a/noisy.py +++ b/noisy.py @@ -101,7 +101,10 @@ def _is_blacklisted(self, url): :param url: full URL :return: boolean indicating whether a URL is blacklisted or not """ - return any(blacklisted_url in url for blacklisted_url in self._config["blacklisted_urls"]) + try: + return any(blacklisted_url in url for blacklisted_url in self._config["blacklisted_urls"]) + except UnicodeDecodeError: + return True def _should_accept_url(self, url): """ @@ -171,7 +174,7 @@ def _browse_from_links(self, depth=0): # remove the dead-end link from our list self._remove_and_blacklist(random_link) - except requests.exceptions.RequestException: + except (requests.exceptions.RequestException, UnicodeDecodeError): logging.debug("Exception on URL: %s, removing from list and trying again!" % random_link) self._remove_and_blacklist(random_link)