Skip to content

Commit b82df7e

Browse files
authored
Merge branch 'main' into v6
2 parents d156b0d + e3d9715 commit b82df7e

File tree

5 files changed

+61
-46
lines changed

5 files changed

+61
-46
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ concurrency:
1212
cancel-in-progress: true
1313

1414
env:
15-
NODE_OPTIONS: "--max_old_space_size=16384"
15+
NODE_OPTIONS: "--max_old_space_size=8192"
1616

1717
jobs:
1818
astrocheck:

scripts/lib/linkcheck/base/page.ts

Lines changed: 56 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,14 @@ export class HtmlPage {
1818
* Example: `/en/getting-started/`
1919
*/
2020
readonly pathname: string;
21-
22-
readonly dom: Document;
23-
24-
readonly anchors: Element[];
21+
/**
22+
* A list of all anchor elements on the page.
23+
*/
24+
readonly anchors: Array<{
25+
label: string;
26+
name: string;
27+
href: string;
28+
}>;
2529
/**
2630
* A list of unique link hrefs on the page.
2731
*/
@@ -46,12 +50,11 @@ export class HtmlPage {
4650
*/
4751
readonly isRedirect: boolean;
4852
/**
49-
* The element containing the page's main content.
53+
* Indicates whether the page contains content.
5054
*
51-
* Prefers the first `<article>` element, with a fallback to `<body>` if no article was found,
52-
* and finally `null` if the page even doesn't have a body.
55+
* Set to `false` if the page even doesn't have a `<body>` element (a partial or invalid page).
5356
*/
54-
readonly mainContent: Element | null;
57+
readonly hasContent: boolean;
5558
/**
5659
* The language of the page's main content.
5760
*
@@ -71,25 +74,34 @@ export class HtmlPage {
7174

7275
constructor({ html, href, pathname }: { html: string; href: string; pathname: string }) {
7376
// Attempt to read the HTML file and parse its DOM
74-
this.dom = parseDocument(html);
77+
const parser = new DocumentParser(parseDocument(html));
7578
this.href = href;
7679
this.pathname = pathname;
7780

7881
// Provide commonly used data as properties
79-
this.anchors = DomUtils.getElementsByTagName('a', this.dom, true);
82+
this.anchors = DomUtils.getElementsByTagName('a', parser.dom, true).map((el) => ({
83+
// Pass the strings through Buffer to allow Node to reallocate them into independent memory
84+
// instead of using slices of the original large string containing the full HTML document.
85+
//
86+
// This reduces memory usage significantly, at time of writing, 2.1Gib -> 300MiB.
87+
label: Buffer.from(DomUtils.innerText(el)).toString(),
88+
name: el.attribs.name && Buffer.from(el.attribs.name).toString(),
89+
href: el.attribs.href && Buffer.from(el.attribs.href).toString(),
90+
}));
8091

8192
// Build a list of unique link hrefs on the page
82-
this.uniqueLinkHrefs = [...new Set(this.anchors.map((el) => decodeURI(el.attribs.href)))];
93+
this.uniqueLinkHrefs = [...new Set(this.anchors.map((el) => decodeURI(el.href)))];
8394

8495
// Build a list of hashes that can be used as URL fragments to jump to parts of the page
85-
const anchorNames = this.anchors
86-
.map((el) => el.attribs.name)
87-
.filter((name) => name !== undefined);
88-
const ids = this.findAll((el) => Boolean(el.attribs.id)).map((el) => el.attribs.id);
96+
const anchorNames = this.anchors.map((el) => el.name).filter((name) => name !== undefined);
97+
const ids = parser
98+
.findAll((el) => Boolean(el.attribs.id))
99+
// Same reason as above.
100+
.map((el) => Buffer.from(el.attribs.id).toString());
89101
this.hashes = [...anchorNames, ...ids].map((name) => `#${name}`);
90102

91103
// Check if the page redirects somewhere else using meta refresh
92-
const metaRefreshElement = this.findFirst(
104+
const metaRefreshElement = parser.findFirst(
93105
(el) =>
94106
el.tagName.toLowerCase() === 'meta' && el.attribs['http-equiv']?.toLowerCase() === 'refresh'
95107
);
@@ -99,22 +111,24 @@ export class HtmlPage {
99111
this.isRedirect = Boolean(this.redirectTargetUrl);
100112

101113
// Get the page's canonical URL (if any)
102-
const linkCanonicalElement = this.findFirst(
114+
const linkCanonicalElement = parser.findFirst(
103115
(el) =>
104116
el.tagName.toLowerCase() === 'link' && el.attribs['rel']?.toLowerCase() === 'canonical'
105117
);
106118
this.canonicalUrl =
107119
(linkCanonicalElement && new URL(linkCanonicalElement.attribs['href'])) || null;
108120

109121
// Attempt to find the page's main content element
110-
this.mainContent =
111-
this.findFirst((el) => el.tagName.toLowerCase() === 'main') ||
112-
this.findFirst((el) => el.tagName.toLowerCase() === 'body');
122+
const mainContent =
123+
parser.findFirst((el) => el.tagName.toLowerCase() === 'main') ||
124+
parser.findFirst((el) => el.tagName.toLowerCase() === 'body');
125+
126+
this.hasContent = Boolean(mainContent);
113127

114128
// Attempt to determine the main content language by traversing the tree upwards
115129
// until we find an element with a `lang` attribute
116130
const mainContentParentWithLang =
117-
this.mainContent && this.findParent(this.mainContent, (el) => Boolean(el.attribs?.lang));
131+
mainContent && parser.findParent(mainContent, (el) => Boolean(el.attribs?.lang));
118132
this.mainContentLang = mainContentParentWithLang?.attribs.lang || null;
119133

120134
// Attempt to determine the page's pathname-based language
@@ -125,23 +139,6 @@ export class HtmlPage {
125139
Boolean(this.pathnameLang) && this.pathnameLang !== 'en' && this.mainContentLang === 'en';
126140
}
127141

128-
findFirst(test: (elem: Element) => boolean) {
129-
return DomUtils.findOne(test, this.dom.children);
130-
}
131-
132-
findAll(test: (elem: Element) => boolean) {
133-
return DomUtils.findAll(test, this.dom.children);
134-
}
135-
136-
findParent(start: Element, test: (elem: Element) => boolean) {
137-
let el: Element | null = start;
138-
while (el) {
139-
if (test(el)) return el;
140-
el = DomUtils.getParent(el) as Element;
141-
}
142-
return null;
143-
}
144-
145142
/**
146143
* Determines the URL pathname that should be used to link to this page
147144
* from a page with the given source language.
@@ -163,3 +160,24 @@ export class HtmlPage {
163160
if (firstPathPart.match(/^[a-z]{2}(-[a-zA-Z]{2})?$/)) return firstPathPart;
164161
}
165162
}
163+
164+
class DocumentParser {
165+
constructor(public readonly dom: Document) {}
166+
167+
findFirst(test: (elem: Element) => boolean) {
168+
return DomUtils.findOne(test, this.dom.children);
169+
}
170+
171+
findAll(test: (elem: Element) => boolean) {
172+
return DomUtils.findAll(test, this.dom.children);
173+
}
174+
175+
findParent(start: Element, test: (elem: Element) => boolean) {
176+
let el: Element | null = start;
177+
while (el) {
178+
if (test(el)) return el;
179+
el = DomUtils.getParent(el) as Element;
180+
}
181+
return null;
182+
}
183+
}

scripts/lib/linkcheck/checks/good-link-label.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import { DomUtils } from 'htmlparser2';
21
import kleur from 'kleur';
32
import { dedentMd } from '../../output.mjs';
43
import { CheckBase, type CheckHtmlPageContext } from '../base/check';
@@ -20,15 +19,13 @@ export class GoodLabels extends CheckBase {
2019
if (context.page.isLanguageFallback) return;
2120

2221
context.page.anchors.forEach((anchor) => {
23-
const linkLabel = DomUtils.innerText(anchor)
24-
.replace(/[\n\s\t]+/g, ' ')
25-
.trim();
22+
const linkLabel = anchor.label.replace(/[\n\s\t]+/g, ' ').trim();
2623

2724
if (!blocklist.has(linkLabel.toLowerCase())) return;
2825

2926
context.report({
3027
type: GoodLabels.BadLabel,
31-
linkHref: anchor.attribs.href,
28+
linkHref: anchor.href,
3229
annotationText: dedentMd`Found link label “${linkLabel}”.
3330
Please use descriptive accessible text for labels instead
3431
of short undescriptive labels like “here” or “read more”.`,

scripts/lib/linkcheck/steps/build-index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ function parsePage(pathname: string, options: LinkCheckerOptions): HtmlPage {
5151
const htmlPage = new HtmlPage({ html, href, pathname });
5252

5353
// Do not allow pages without main content unless they are a redirect
54-
if (!htmlPage.isRedirect && !htmlPage.mainContent)
54+
if (!htmlPage.isRedirect && !htmlPage.hasContent)
5555
throw new Error('Failed to find main content - page has no <article> or <body>');
5656

5757
// Do not allow pages without a main content "lang" attribute unless they are a redirect

src/content/nav/de.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { navDictionary } from '../../util/navDictionary';
22

33
export default navDictionary({
4-
start: "Los geht's",
4+
start: 'Los geht’s',
55
'start.welcome': 'Willkommen, Welt!',
66
'start.newProject': 'Ein neues Projekt starten',
77
'start.config': 'Konfiguration',

0 commit comments

Comments
 (0)