Skip to content

Commit aa7cf9e

Browse files
add doc:links and docs:links:check
1 parent c55cef1 commit aa7cf9e

File tree

2 files changed

+18
-19
lines changed

2 files changed

+18
-19
lines changed

doc/conf.py

+4
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,7 @@
7878
"github_url": "https://github.com/exasol/python-toolbox",
7979
"accent_color": "grass",
8080
}
81+
# -- Configure link checking behavior ----------------------------------------
82+
linkcheck_ignore = [
83+
84+
]

exasol/toolbox/nox/_documentation.py

+14-19
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
import shutil
44
import subprocess
55
import sys
6-
from urllib import request
7-
import urllib.error
6+
import requests
87
import webbrowser
98
from itertools import repeat
109
from pathlib import Path
@@ -15,7 +14,7 @@
1514
Tuple,
1615
)
1716

18-
17+
import re
1918
import nox
2019
from nox import Session
2120

@@ -67,27 +66,18 @@ def should_filter(url: str) -> bool:
6766
return url.startswith("mailto") or url in _filtered
6867

6968
for file in files:
70-
cmd = ["python", "-m", "urlscan", "-n", f"{file}"]
71-
result = subprocess.run(cmd, capture_output=True)
72-
if result.returncode != 0:
73-
stderr = result.stderr.decode("utf8")
74-
msg = f"Could not retrieve url's from file: {file}, details: {stderr}"
75-
raise Exception(msg)
76-
stdout = result.stdout.decode("utf8").strip()
77-
_urls = (url.strip() for url in stdout.split("\n"))
78-
_urls = (url for url in _urls if url) # filter empty strings and none
79-
yield from zip(repeat(file), filter(lambda url: not should_filter(url), _urls))
69+
urls = re.findall( r"http[s]?://[^\s<>'\"\,\)\]]+[^\s<>'\"\,\.\)\]]" , file.open().read())
70+
yield from zip(repeat(file), filter(lambda url: not should_filter(url), urls))
8071

8172

8273
def _doc_links_check(url: str) -> Tuple[Optional[int], str]:
8374
"""Checks if an url is still working (can be accessed)"""
8475
try:
8576
# User-Agent needs to be faked otherwise some webpages will deny access with a 403
86-
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/10.0"})
87-
result = request.urlopen(req)
88-
return result.code, f"{result.msg}"
89-
except urllib.error.HTTPError as ex:
90-
return ex.code, f"{ex}"
77+
result = requests.get(url, timeout=5)
78+
return result.status_code, f"{result.reason}"
79+
except requests.exceptions.RequestException as ex:
80+
print("error:", ex)
9181

9282

9383
def _git_diff_changes_main() -> int:
@@ -150,10 +140,15 @@ def docs_list_links(session: Session) -> None:
150140
def docs_links_check(session: Session) -> None:
151141
"""Checks whether all links in the documentation are accessible."""
152142
errors = []
153-
for path, url in _doc_urls(_doc_files(PROJECT_CONFIG.root)):
143+
urls = list(_doc_urls(_doc_files(PROJECT_CONFIG.root)))
144+
urls_count = len(urls)
145+
count = 1
146+
for path, url in urls:
147+
print(f"({count}/{urls_count}): {url}")
154148
status, details = _doc_links_check(url)
155149
if status != 200:
156150
errors.append((path, url, status, details))
151+
count += 1
157152

158153
if errors:
159154
session.error(

0 commit comments

Comments
 (0)