Skip to content

Commit

Permalink
Merge pull request #57 from ScholliYT/fix-http-redirects
Browse files Browse the repository at this point in the history
Fix http redirects
  • Loading branch information
ScholliYT authored May 29, 2023
2 parents b4827fb + a074e6a commit 56e9486
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 3 deletions.
4 changes: 4 additions & 0 deletions deadseeker/responsefetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ async def _do_get(
async with session.get(url) as response:
timer.stop()
resp.status = response.status
# Because of redirects the url might have changed. Update it here.
# This fixes https://github.com/ScholliYT/Broken-Links-Crawler-Action/issues/39
urltarget.url = str(response.real_url)

if has_html(response) and is_onsite(urltarget):
resp.html = await response.text()

Expand Down
4 changes: 4 additions & 0 deletions test/mock_server/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,9 @@ <h1>Index</h1>
<a href="/page1.html">To Page 1</a>
<a href="/page4.html">To Page 4</a>
<a href="/subpages/subpage1.html">To Subpage 1</a>
<!-- This link redirects to /subpages/subsubpages/ see https://github.com/ScholliYT/Broken-Links-Crawler-Action/issues/39 -->
<a href="/subpages/subsubpages">To Subpage index</a>
<!-- This link has no redirect see https://github.com/ScholliYT/Broken-Links-Crawler-Action/issues/39 -->
<a href="/subpages/subsubpages/">To Subpage index /</a>
</body>
</html>
12 changes: 12 additions & 0 deletions test/mock_server/subpages/redirect/redirect.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!doctype html>
<html lang="">
<head>
<meta charset="utf-8">
<title>Redirect</title>
</head>
<body>
<h1>Redirect</h1>
<a href="/page3.html">To Page 3</a>
<a href="/page1.html">To Page 1</a>
</body>
</html>
13 changes: 13 additions & 0 deletions test/mock_server/subpages/subsubpages/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<!doctype html>
<html lang="">
<head>
<meta charset="utf-8">
<title>Subpage index</title>
</head>
<body>
<h1>Subpage index</h1>
<a href="/page2.html">To Page 2</a>
<a href="/subpages/redirect/redirect.html">To redirected page via absolute link</a>
<a href="../redirect/redirect.html">To redirected page via relative link</a>
</body>
</html>
19 changes: 16 additions & 3 deletions test/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ def do_HEAD(self):
self.send_response(HTTPStatus.METHOD_NOT_ALLOWED)
self.end_headers()
return
if not self.check_error():
if not self.check_error() and not self.check_redirect():
super().do_HEAD()

def do_GET(self):
if not self.check_error():
if not self.check_error() and not self.check_redirect():
super().do_GET()

def check_error(self):
Expand All @@ -39,6 +39,15 @@ def check_error(self):
return True
return False

def check_redirect(self):
if self.path.endswith('/subpages'):
self.send_response(HTTPStatus.MOVED_PERMANENTLY)
new_url = self.path.replace('/subpages', '/subpages/')
self.send_header('Location', new_url)
self.end_headers()
return True
return False


def get_free_port():
s = socket.socket(socket.AF_INET, type=socket.SOCK_STREAM)
Expand Down Expand Up @@ -121,7 +130,11 @@ def test_messagesLogged(self):
f'200 - {self.url}/subpages/subpage1.html - ',
f'200 - {self.url}/page2.html - ',
f'200 - {self.url}/subpages/subpage2.html - ',
f'200 - {self.url}/index.html - '
f'200 - {self.url}/index.html - ',
f'200 - {self.url}/subpages/redirect/redirect.html - ',
# duplicate because of fix for https://github.com/ScholliYT/Broken-Links-Crawler-Action/issues/39
f'200 - {self.url}/subpages/subsubpages/ - ',
f'200 - {self.url}/subpages/subsubpages/ - ',
]
actual_infos: List[str] = []
for call in info_mock.call_args_list:
Expand Down

0 comments on commit 56e9486

Please sign in to comment.