Skip to content
This repository was archived by the owner on May 20, 2025. It is now read-only.

Commit 8bada07

Browse files
committed
Improves broken link checker resilience
Enhances the broken link checker by adding retry logic for rate limiting (429) and timeout (408) errors, using exponential backoff. Introduces a mechanism to accept specific rate-limited URLs. Processes links in batches to avoid overwhelming the system. Skips checking empty or already visited URLs.
1 parent 18518d8 commit 8bada07

1 file changed

Lines changed: 155 additions & 52 deletions

File tree

cypress/e2e/broken-links.cy.ts

Lines changed: 155 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ const REDIRECT_CODES = [301, 302, 304, 307, 308]
77
// other non standard codes, like 999 from linkedin
88
const OTHER_CODES = [999]
99

10+
// URLs that we accept 429s for
11+
const ACCEPTED_RATE_LIMITED_URLS = [
12+
'https://github.com/nitrictech/nitric',
13+
// Add more URLs here as needed
14+
]
15+
1016
const IGNORED_URLS = [
1117
'googleads.g.doubleclick.net',
1218
'youtube.com/api',
@@ -36,16 +42,47 @@ const IGNORED_URLS = [
3642
const rootBaseUrl = Cypress.config('baseUrl')
3743

3844
const isInternalUrl = (url: string) => {
39-
// check against the base url
40-
// and check if the url does not contain a file extension
41-
return url.startsWith(rootBaseUrl) && !url.includes('.')
45+
return (
46+
url.startsWith(rootBaseUrl) || url.startsWith('./') || url.startsWith('../')
47+
)
48+
}
49+
50+
const getCleanInternalUrl = (url: string, currentPage: string) => {
51+
if (url.startsWith(rootBaseUrl)) {
52+
return url.replace(rootBaseUrl, '')
53+
}
54+
55+
// Handle relative paths
56+
if (url.startsWith('./') || url.startsWith('../')) {
57+
// Get the directory of the current page
58+
const currentDir = currentPage.substring(
59+
0,
60+
currentPage.lastIndexOf('/') + 1,
61+
)
62+
// Resolve the relative path
63+
const fullPath = new URL(url, `${rootBaseUrl}${currentDir}`).pathname
64+
return fullPath.replace(rootBaseUrl, '')
65+
}
66+
67+
return url
4268
}
4369

4470
const isExternalUrl = (url: string) => {
4571
return !url.includes('localhost')
4672
}
4773

48-
const req = (url: string, retryCount = 0, followRedirect = false): any => {
74+
const isAcceptedRateLimitedUrl = (url: string) => {
75+
return ACCEPTED_RATE_LIMITED_URLS.some((acceptedUrl) =>
76+
url.startsWith(acceptedUrl),
77+
)
78+
}
79+
80+
const req = (
81+
url: string,
82+
retryCount = 0,
83+
followRedirect = false,
84+
visitedLinks: Record<string, boolean> = {},
85+
): any => {
4986
return cy
5087
.request({
5188
url,
@@ -54,11 +91,34 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
5491
gzip: false,
5592
})
5693
.then((resp) => {
57-
// retry on timeout and too many requests
58-
if ([408, 429].includes(resp.status) && retryCount < 3) {
59-
cy.log(`request ${url} timed out, retrying again...`)
60-
cy.wait(500)
61-
return req(url, retryCount + 1)
94+
// Handle rate limiting (429) with exponential backoff
95+
if (resp.status === 429 && retryCount < 3) {
96+
const retryAfter = resp.headers['retry-after']
97+
? parseInt(
98+
Array.isArray(resp.headers['retry-after'])
99+
? resp.headers['retry-after'][0]
100+
: resp.headers['retry-after'],
101+
)
102+
: null
103+
const waitTime = retryAfter
104+
? retryAfter * 1000
105+
: Math.min(500 * Math.pow(2, retryCount), 5000)
106+
107+
cy.log(
108+
`Rate limited for ${url}, waiting ${waitTime}ms before retry ${retryCount + 1}/3`,
109+
)
110+
cy.wait(waitTime)
111+
return req(url, retryCount + 1, followRedirect, visitedLinks)
112+
}
113+
114+
// Handle timeouts with exponential backoff
115+
if (resp.status === 408 && retryCount < 3) {
116+
const waitTime = Math.min(200 * Math.pow(2, retryCount), 2000)
117+
cy.log(
118+
`Request timeout for ${url}, waiting ${waitTime}ms before retry ${retryCount + 1}/3`,
119+
)
120+
cy.wait(waitTime)
121+
return req(url, retryCount + 1, followRedirect, visitedLinks)
62122
}
63123

64124
return resp
@@ -67,6 +127,7 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
67127

68128
describe('Broken links test suite', () => {
69129
const VISITED_SUCCESSFUL_LINKS = {}
130+
const BATCH_SIZE = 10 // Process links in batches of 10
70131

71132
pages.forEach((page) => {
72133
it(`Should visit page ${page} and check all links`, () => {
@@ -84,61 +145,103 @@ describe('Broken links test suite', () => {
84145
(l) => href?.includes(l) || src?.includes(l),
85146
)
86147
})
87-
.each((link) => {
88-
cy.log(`link: ${link[0].textContent}`)
89-
const baseUrl = link.prop('href') || link.prop('src')
90-
91-
const url = baseUrl.split('#')[0]
92-
93-
if (VISITED_SUCCESSFUL_LINKS[url]) {
94-
cy.log(`link already checked`)
95-
expect(VISITED_SUCCESSFUL_LINKS[url]).to.be.true
96-
} else {
97-
// if the link is internal then check the link against the pages fixture (sitemap)
98-
if (isInternalUrl(url)) {
99-
// clean the url by removing the base url and query params
100-
const rootBaseUrlRegex = new RegExp(`^${rootBaseUrl}`)
101-
let cleanUrl = url.replace(rootBaseUrlRegex, '')
102-
const queryIndex = cleanUrl.indexOf('?')
103-
cleanUrl =
104-
queryIndex !== -1 ? cleanUrl.slice(0, queryIndex) : cleanUrl
105-
106-
cy.log(`checking internal link: ${cleanUrl}`)
107-
if (!pages.includes(cleanUrl)) {
108-
assert.fail(`${cleanUrl} is not part of the pages fixture`)
109-
} else {
110-
VISITED_SUCCESSFUL_LINKS[url] = true
111-
}
148+
.then(($links) => {
149+
const linkPromises = []
150+
const linksToCheck = []
112151

152+
$links.each((_i, link) => {
153+
const baseUrl =
154+
link.getAttribute('href') || link.getAttribute('src')
155+
if (!baseUrl) {
156+
cy.log('Skipping link with no href/src:', link)
113157
return
114158
}
115159

116-
cy.wait(25)
117-
118-
req(url).then((res: Cypress.Response<any>) => {
119-
let acceptableCodes = CORRECT_CODES
120-
if (REDIRECT_CODES.includes(res.status) && !isExternalUrl(url)) {
121-
assert.fail(
122-
`${url} returned ${res.status} to ${res.headers['location']}`,
123-
)
124-
} else {
125-
acceptableCodes = [
126-
...CORRECT_CODES,
127-
...REDIRECT_CODES,
128-
...OTHER_CODES,
129-
]
160+
// Skip if the URL is just a hash fragment
161+
if (baseUrl.startsWith('#')) {
162+
cy.log('Skipping hash fragment:', baseUrl)
163+
return
164+
}
165+
166+
const url = baseUrl.split('#')[0]
167+
if (!url) {
168+
cy.log('Skipping empty URL from:', baseUrl)
169+
return
170+
}
171+
172+
if (VISITED_SUCCESSFUL_LINKS[url]) {
173+
cy.log(`Skipping already checked link: ${url}`)
174+
return
175+
}
176+
177+
linksToCheck.push(url)
178+
})
179+
180+
// Process links in batches
181+
for (let i = 0; i < linksToCheck.length; i += BATCH_SIZE) {
182+
const batch = linksToCheck.slice(i, i + BATCH_SIZE)
183+
const batchPromises = batch.map((url) => {
184+
if (!url) {
185+
cy.log('Skipping empty URL in batch')
186+
return Promise.resolve()
130187
}
131188

132-
if (acceptableCodes.includes(res.status)) {
189+
if (isInternalUrl(url)) {
190+
const cleanUrl = getCleanInternalUrl(url, page)
191+
if (!pages.includes(cleanUrl)) {
192+
assert.fail(`${cleanUrl} is not part of the pages fixture`)
193+
}
133194
VISITED_SUCCESSFUL_LINKS[url] = true
195+
return Promise.resolve()
134196
}
135197

136-
expect(res.status).oneOf(
137-
acceptableCodes,
138-
`${url} returned ${res.status}`,
198+
return req(url, 0, false, VISITED_SUCCESSFUL_LINKS).then(
199+
(res: Cypress.Response<any>) => {
200+
let acceptableCodes = CORRECT_CODES
201+
if (
202+
REDIRECT_CODES.includes(res.status) &&
203+
!isExternalUrl(url)
204+
) {
205+
assert.fail(
206+
`${url} returned ${res.status} to ${res.headers['location']}`,
207+
)
208+
} else if (res.status === 429) {
209+
// After all retries, if we still get a 429, only mark as successful for accepted URLs
210+
if (isAcceptedRateLimitedUrl(url)) {
211+
cy.log(
212+
`Rate limited for accepted URL ${url} after all retries, marking as successful`,
213+
)
214+
VISITED_SUCCESSFUL_LINKS[url] = true
215+
return
216+
} else {
217+
assert.fail(
218+
`${url} returned 429 (Rate Limited) and is not in the accepted list`,
219+
)
220+
}
221+
} else {
222+
acceptableCodes = [
223+
...CORRECT_CODES,
224+
...REDIRECT_CODES,
225+
...OTHER_CODES,
226+
]
227+
}
228+
229+
if (acceptableCodes.includes(res.status)) {
230+
VISITED_SUCCESSFUL_LINKS[url] = true
231+
}
232+
233+
expect(res.status).oneOf(
234+
acceptableCodes,
235+
`${url} returned ${res.status}`,
236+
)
237+
},
139238
)
140239
})
240+
241+
linkPromises.push(Promise.all(batchPromises))
141242
}
243+
244+
return Promise.all(linkPromises)
142245
})
143246
})
144247
})

0 commit comments

Comments
 (0)