Skip to content
This repository was archived by the owner on Aug 15, 2025. It is now read-only.

Commit da84957

Browse files
committed
refactor(web-page): group context extractor script argument into context object
1 parent bc163f5 commit da84957

File tree

5 files changed

+34
-28
lines changed

5 files changed

+34
-28
lines changed

src/api/web_page/content/get.test.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { mock, test } from 'node:test';
44
import type { Browser } from 'playwright/index.js';
55

66
import { registerWebPageContentGetRoutes } from './get.js';
7+
import type { WebPageContext } from './web_page_context.js';
78
import {
89
createBrowserContextMock,
910
createBrowserMock,
@@ -119,8 +120,8 @@ await test('[/api/web_page/content] can proxy requests', async () => {
119120
await test('[/api/web_page/content] can inject content extractor', async (t) => {
120121
t.mock.method(Date, 'now', () => 123000);
121122

122-
const extractContentMock = mock.fn((previousContent: unknown) => {
123-
return Promise.resolve({ message: (previousContent as { message: string }).message.toUpperCase() });
123+
const extractContentMock = mock.fn((context: WebPageContext) => {
124+
return Promise.resolve({ message: (context.previous as { message: string }).message.toUpperCase() });
124125
});
125126

126127
const windowMock = createWindowMock({ __secutils: { extractContent: extractContentMock } });
@@ -170,7 +171,9 @@ await test('[/api/web_page/content] can inject content extractor', async (t) =>
170171

171172
// Make sure we called includeResource.
172173
assert.strictEqual(extractContentMock.mock.callCount(), 1);
173-
assert.deepEqual(extractContentMock.mock.calls[0].arguments, [{ message: 'hello' }, [], {}]);
174+
assert.deepEqual(extractContentMock.mock.calls[0].arguments, [
175+
{ previous: { message: 'hello' }, externalResources: [], responseHeaders: {} },
176+
]);
174177
});
175178

176179
await test('[/api/web_page/content] reports errors in content extractor', async (t) => {
@@ -211,5 +214,7 @@ await test('[/api/web_page/content] reports errors in content extractor', async
211214

212215
// Make sure we called includeResource.
213216
assert.strictEqual(extractContentMapMock.mock.callCount(), 1);
214-
assert.deepEqual(extractContentMapMock.mock.calls[0].arguments, ['previous', [], {}]);
217+
assert.deepEqual(extractContentMapMock.mock.calls[0].arguments, [
218+
{ previous: 'previous', externalResources: [], responseHeaders: {} },
219+
]);
215220
});

src/api/web_page/content/get.ts

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@ import type { FastifyBaseLogger } from 'fastify';
66
import jsBeautify from 'js-beautify';
77
import type { Browser, JSHandle, Page, Response } from 'playwright';
88

9+
import type { WebPageContext } from './web_page_context.js';
910
import { createObjectHash } from '../../../utilities/index.js';
1011
import type { ApiResult } from '../../api_result.js';
1112
import type { ApiRouteParams } from '../../api_route_params.js';
1213
import { Diagnostics } from '../../diagnostics.js';
1314
import { DEFAULT_DELAY_MS, DEFAULT_TIMEOUT_MS } from '../constants.js';
14-
import { type FetchedResource, FetchInterceptor } from '../fetch_interceptor.js';
15+
import { FetchInterceptor } from '../fetch_interceptor.js';
1516
import type { SecutilsWindow } from '../index.js';
1617

1718
// Maximum size of the content in bytes (200KB).
@@ -169,9 +170,7 @@ async function getContent(
169170
if (scripts?.extractContent) {
170171
log.debug(`[${url}] Adding "extractContent" function: ${scripts.extractContent}.`);
171172
await page.addInitScript({
172-
content: `self.__secutils = { async extractContent(previousContent, externalResources, responseHeaders) {
173-
${scripts.extractContent} }
174-
}`,
173+
content: `self.__secutils = { async extractContent(context) { ${scripts.extractContent} } };`,
175174
});
176175
}
177176

@@ -221,7 +220,11 @@ async function getContent(
221220
const externalResources = await fetchInterceptor.stop();
222221
extractedContent = jsonStableStringify(
223222
scripts?.extractContent
224-
? await extractContent(page, previousContent, externalResources, (await response?.allHeaders()) ?? {})
223+
? await extractContent(page, {
224+
previous: previousContent,
225+
externalResources,
226+
responseHeaders: (await response?.allHeaders()) ?? {},
227+
})
225228
: jsBeautify.html_beautify(await page.content()),
226229
);
227230
} catch (err) {
@@ -256,15 +259,10 @@ async function getContent(
256259
return { type: 'success', data: { timestamp, content: extractedContent } };
257260
}
258261

259-
async function extractContent(
260-
page: Page,
261-
previousContent: string | undefined,
262-
externalResources: FetchedResource[],
263-
responseHeaders: Record<string, string>,
264-
): Promise<unknown> {
262+
async function extractContent(page: Page, context: WebPageContext<string>): Promise<unknown> {
265263
const targetWindow = await page.evaluateHandle<Window>('window');
266264
return await page.evaluate(
267-
async ([targetWindow, previousContent, externalResources, responseHeaders]) => {
265+
async ([targetWindow, context]) => {
268266
const extractContent = targetWindow.__secutils?.extractContent;
269267
if (extractContent && typeof extractContent !== 'function') {
270268
console.error(`[browser] Invalid "extractContent" function: ${typeof extractContent}`);
@@ -274,11 +272,10 @@ async function extractContent(
274272

275273
try {
276274
return typeof extractContent === 'function'
277-
? (await extractContent(
278-
previousContent !== undefined ? JSON.parse(previousContent) : previousContent,
279-
externalResources,
280-
responseHeaders,
281-
)) ?? null
275+
? (await extractContent({
276+
...context,
277+
previous: context.previous !== undefined ? JSON.parse(context.previous) : context.previous,
278+
})) ?? null
282279
: null;
283280
} catch (err: unknown) {
284281
console.error(`[browser] Content extractor script has thrown an exception: ${(err as Error)?.message ?? err}.`);
@@ -287,6 +284,6 @@ async function extractContent(
287284
throw new Error(`Content extractor script has thrown an exception: ${(err as Error)?.message ?? err}.`);
288285
}
289286
},
290-
[targetWindow as JSHandle<SecutilsWindow>, previousContent, externalResources, responseHeaders] as const,
287+
[targetWindow as JSHandle<SecutilsWindow>, context] as const,
291288
);
292289
}

src/api/web_page/content/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
export { registerWebPageContentGetRoutes } from './get.js';
2+
export type { WebPageContext } from './web_page_context.js';
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import type { FetchedResource } from '../fetch_interceptor.js';
2+
3+
export interface WebPageContext<T = unknown> {
4+
previous?: T;
5+
responseHeaders: Record<string, string>;
6+
externalResources: FetchedResource[];
7+
}

src/api/web_page/index.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
import { registerWebPageContentGetRoutes } from './content/index.js';
2-
import type { FetchedResource } from './fetch_interceptor.js';
2+
import type { WebPageContext } from './content/index.js';
33
import { registerWebPageResourcesListRoutes } from './resources/index.js';
44
import type { WebPageResourceWithRawData } from './resources/list.js';
55
import type { ApiRouteParams } from '../api_route_params.js';
66

77
export interface SecutilsWindow extends Window {
88
__secutils?: {
99
resourceFilterMap?: (resource: WebPageResourceWithRawData) => WebPageResourceWithRawData | null;
10-
extractContent?: (
11-
previousContent: unknown,
12-
externalResources: FetchedResource[],
13-
responseHeaders: Record<string, string>,
14-
) => Promise<unknown>;
10+
extractContent?: (context: WebPageContext) => Promise<unknown>;
1511
};
1612
}
1713

0 commit comments

Comments
 (0)