Skip to content

Commit

Permalink
handle error if local subpage has error
Browse files Browse the repository at this point in the history
  • Loading branch information
ScholliYT committed Jul 22, 2020
1 parent 3e8d0e5 commit 6d4bf35
Showing 1 changed file with 25 additions and 8 deletions.
33 changes: 25 additions & 8 deletions deadseeker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'''
deadseeker.py
Seeking out your 404s in around 50 lines of vanilla Python.
Seeking out your 404s in around 100 lines of Python.
'''

import sys
Expand Down Expand Up @@ -41,11 +41,22 @@ def scanner(self):
while self.pages_to_check:
page = self.pages_to_check.pop()
req = Request(page, headers={'User-Agent': agent})
res = request.urlopen(req)
if 'html' in res.headers['content-type']:
with res as f:
body = f.read().decode('utf-8', errors='ignore')
self.feed(body)
try:
res = self.make_request(req)
if 'html' in res.headers['content-type']:
with res as f:
body = f.read().decode('utf-8', errors='ignore')
self.feed(body)
except urllib.error.HTTPError as e:
print(f'::error ::HTTPError: {e.code} - {page}') # (e.g. 404, 501, etc)
self.error_occured = True
except urllib.error.URLError as e:
print(f'::error ::URLError: {e.reason} - {page}') # (e.g. conn. refused)
self.error_occured = True
except ValueError as e:
print(f'::error ::ValueError {e} - {page}') # (e.g. missing protocol http)
self.error_occured = True


def handle_starttag(self, tag, attrs):
'''Override parent method and check tag for our attributes'''
Expand Down Expand Up @@ -80,16 +91,22 @@ def handle_link(self, link):
if self.home in link:
self.pages_to_check.appendleft(link)

@backoff.on_exception(backoff.expo, (urllib.error.HTTPError, urllib.error.URLError), max_time=int(os.environ['INPUT_MAX_RETRY_TIME']), max_tries=int(os.environ['INPUT_MAX_RETRIES'])) # retry on error
def make_request(self, req):
res = request.urlopen(req)
return res

@backoff.on_exception(backoff.expo, (urllib.error.HTTPError, urllib.error.URLError), max_time=int(os.environ['INPUT_MAX_RETRY_TIME']), max_tries=int(os.environ['INPUT_MAX_RETRIES'])) # retry on error
def make_statuscode_request(self, req):
statusCode = request.urlopen(req).getcode()
return statusCode

# read env variables
website_url = os.environ['INPUT_WEBSITE_URL']
verbose = os.environ['INPUT_VERBOSE']
verbose = os.environ['INPUT_VERBOSE'] == 'True'
print("Checking website: " + str(website_url))
print("Verbose mode on: " + str(verbose))

logging.getLogger('backoff').addHandler(logging.StreamHandler())
if verbose:
logging.getLogger('backoff').addHandler(logging.StreamHandler())
LinkParser(website_url, verbose)

0 comments on commit 6d4bf35

Please sign in to comment.