|
3 | 3 | import shutil
|
4 | 4 | import subprocess
|
5 | 5 | import sys
|
6 |
| -from urllib import request |
7 |
| -import urllib.error |
| 6 | +import requests |
8 | 7 | import webbrowser
|
9 | 8 | from itertools import repeat
|
10 | 9 | from pathlib import Path
|
|
15 | 14 | Tuple,
|
16 | 15 | )
|
17 | 16 |
|
18 |
| - |
| 17 | +import re |
19 | 18 | import nox
|
20 | 19 | from nox import Session
|
21 | 20 |
|
@@ -67,27 +66,18 @@ def should_filter(url: str) -> bool:
|
67 | 66 | return url.startswith("mailto") or url in _filtered
|
68 | 67 |
|
69 | 68 | for file in files:
|
70 |
| - cmd = ["python", "-m", "urlscan", "-n", f"{file}"] |
71 |
| - result = subprocess.run(cmd, capture_output=True) |
72 |
| - if result.returncode != 0: |
73 |
| - stderr = result.stderr.decode("utf8") |
74 |
| - msg = f"Could not retrieve url's from file: {file}, details: {stderr}" |
75 |
| - raise Exception(msg) |
76 |
| - stdout = result.stdout.decode("utf8").strip() |
77 |
| - _urls = (url.strip() for url in stdout.split("\n")) |
78 |
| - _urls = (url for url in _urls if url) # filter empty strings and none |
79 |
| - yield from zip(repeat(file), filter(lambda url: not should_filter(url), _urls)) |
| 69 | + urls = re.findall( r"http[s]?://[^\s<>'\"\,\)\]]+[^\s<>'\"\,\.\)\]]" , file.open().read()) |
| 70 | + yield from zip(repeat(file), filter(lambda url: not should_filter(url), urls)) |
80 | 71 |
|
81 | 72 |
|
82 | 73 | def _doc_links_check(url: str) -> Tuple[Optional[int], str]:
|
83 | 74 | """Checks if an url is still working (can be accessed)"""
|
84 | 75 | try:
|
85 | 76 | # User-Agent needs to be faked otherwise some webpages will deny access with a 403
|
86 |
| - req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/10.0"}) |
87 |
| - result = request.urlopen(req) |
88 |
| - return result.code, f"{result.msg}" |
89 |
| - except urllib.error.HTTPError as ex: |
90 |
| - return ex.code, f"{ex}" |
| 77 | + result = requests.get(url, timeout=5) |
| 78 | + return result.status_code, f"{result.reason}" |
| 79 | + except requests.exceptions.RequestException as ex: |
| 80 | + print("error:", ex) |
91 | 81 |
|
92 | 82 |
|
93 | 83 | def _git_diff_changes_main() -> int:
|
@@ -150,10 +140,15 @@ def docs_list_links(session: Session) -> None:
|
150 | 140 | def docs_links_check(session: Session) -> None:
|
151 | 141 | """Checks whether all links in the documentation are accessible."""
|
152 | 142 | errors = []
|
153 |
| - for path, url in _doc_urls(_doc_files(PROJECT_CONFIG.root)): |
| 143 | + urls = list(_doc_urls(_doc_files(PROJECT_CONFIG.root))) |
| 144 | + urls_count = len(urls) |
| 145 | + count = 1 |
| 146 | + for path, url in urls: |
| 147 | + print(f"({count}/{urls_count}): {url}") |
154 | 148 | status, details = _doc_links_check(url)
|
155 | 149 | if status != 200:
|
156 | 150 | errors.append((path, url, status, details))
|
| 151 | + count += 1 |
157 | 152 |
|
158 | 153 | if errors:
|
159 | 154 | session.error(
|
|
0 commit comments