-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
feat(core): Add data collection filtering utilities #20989
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
73bf3a2
38d6140
963b031
9e0d7ae
459d124
f84df0d
de81668
9a146c7
86b9de4
94e722f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| import type { CollectBehavior } from '../../types/datacollection'; | ||
| import { parseCookie } from '../cookie'; | ||
| import { FILTERED_VALUE as FILTERED } from './filtering-snippets'; | ||
| import { filterKeyValueData } from './filterKeyValueData'; | ||
|
|
||
| /** | ||
| * Filters a cookie string according to a `CollectBehavior`. | ||
| * | ||
| * When individual cookies can be parsed, each key-value pair is filtered | ||
| * independently. When parsing fails, the entire string is replaced with `[Filtered]`. | ||
| */ | ||
| export function filterCookies(cookieString: string, behavior: CollectBehavior): Record<string, string> | string { | ||
| if (behavior === false) { | ||
| return {}; | ||
| } | ||
|
|
||
| try { | ||
| const parsed = parseCookie(cookieString); | ||
|
|
||
| if (Object.keys(parsed).length === 0) { | ||
| return {}; | ||
| } | ||
|
|
||
| return filterKeyValueData(parsed, behavior); | ||
|
chargome marked this conversation as resolved.
Outdated
|
||
| } catch { | ||
| return FILTERED; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| import type { CollectBehavior } from '../../types/datacollection'; | ||
| import { FILTERED_VALUE as FILTERED, SENSITIVE_KEY_SNIPPETS } from './filtering-snippets'; | ||
|
|
||
| function isSensitiveKey(key: string): boolean { | ||
| const lower = key.toLowerCase(); | ||
| return SENSITIVE_KEY_SNIPPETS.some(snippet => lower.includes(snippet)); | ||
| } | ||
|
|
||
| /** | ||
| * Filters a key-value record according to a `CollectBehavior`. | ||
| * | ||
| * Key names are always preserved. Values are either kept, replaced with | ||
| * `[Filtered]`, or the entire record is dropped (off mode). | ||
| */ | ||
| export function filterKeyValueData(data: Record<string, string>, behavior: CollectBehavior): Record<string, string> { | ||
| if (behavior === false) { | ||
| return {}; | ||
| } | ||
|
|
||
| const result: Record<string, string> = {}; | ||
|
|
||
| if (behavior === true) { | ||
| for (const key of Object.keys(data)) { | ||
| result[key] = isSensitiveKey(key) ? FILTERED : data[key]!; | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| if ('deny' in behavior) { | ||
| const lowerTerms = behavior.deny.map(t => t.toLowerCase()); | ||
| for (const key of Object.keys(data)) { | ||
| const lower = key.toLowerCase(); | ||
| const isDenied = isSensitiveKey(key) || lowerTerms.some(term => lower.includes(term)); | ||
| result[key] = isDenied ? FILTERED : data[key]!; | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| // allowList mode | ||
| const lowerTerms = behavior.allow.map(t => t.toLowerCase()); | ||
| for (const key of Object.keys(data)) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. l/m: We are iterating 3 times over wdyt?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The paths are mutually exclusive (each path returns early), so it is already O(n) 👍
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok that makes sense then. |
||
| if (isSensitiveKey(key)) { | ||
| result[key] = FILTERED; | ||
| } else { | ||
| const lower = key.toLowerCase(); | ||
| const isAllowed = lowerTerms.some(term => lower.includes(term)); | ||
| result[key] = isAllowed ? data[key]! : FILTERED; | ||
| } | ||
| } | ||
| return result; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| import type { CollectBehavior } from '../../types/datacollection'; | ||
| import { FILTERED_VALUE as FILTERED } from './filtering-snippets'; | ||
| import { filterKeyValueData } from './filterKeyValueData'; | ||
|
|
||
| function parseQueryParams(queryString: string): Record<string, string> | undefined { | ||
| try { | ||
| const params = new URLSearchParams(queryString); | ||
| const result: Record<string, string> = {}; | ||
| params.forEach((value, key) => { | ||
| result[key] = value; | ||
| }); | ||
| return Object.keys(result).length > 0 ? result : undefined; | ||
| } catch { | ||
| return undefined; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Filters a query parameter string according to a `CollectBehavior`. | ||
| * | ||
| * When individual params can be parsed, each key-value pair is filtered | ||
| * independently. When parsing fails, the entire string is replaced with `[Filtered]`. | ||
| */ | ||
| export function filterQueryParams(queryString: string, behavior: CollectBehavior): Record<string, string> | string { | ||
| if (behavior === false) { | ||
| return {}; | ||
| } | ||
|
|
||
| const parsed = parseQueryParams(queryString); | ||
|
chargome marked this conversation as resolved.
Outdated
|
||
|
|
||
| if (parsed == null) { | ||
| return FILTERED; | ||
| } | ||
|
|
||
| return filterKeyValueData(parsed, behavior); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| import type { DataCollection, ResolvedDataCollection } from '../../types/datacollection'; | ||
| import { defaultPiiToCollectionOptions } from './defaultPiiToCollectionOptions'; | ||
|
|
||
| const DEFAULTS: ResolvedDataCollection = { | ||
| userInfo: false, | ||
| cookies: true, | ||
| httpHeaders: { request: true, response: true }, | ||
| httpBodies: [], | ||
| queryParams: true, | ||
| genAI: { inputs: true, outputs: true }, | ||
| stackFrameVariables: true, | ||
| frameContextLines: 5, | ||
| }; | ||
|
|
||
| /** | ||
| * Resolves the effective `DataCollection` configuration from client options. | ||
| * | ||
| * Precedence: | ||
| * 1. Fields explicitly set in `dataCollection` | ||
| * 2. If `sendDefaultPii` is set and `dataCollection` is absent, bridge via `defaultPiiToCollectionOptions` | ||
| * 3. Spec defaults | ||
| */ | ||
| export function resolveDataCollectionOptions(options: { | ||
| dataCollection?: DataCollection; | ||
| sendDefaultPii?: boolean; | ||
| }): ResolvedDataCollection { | ||
| const base = options.dataCollection != null ? DEFAULTS : defaultPiiToCollectionOptions(options.sendDefaultPii); | ||
|
|
||
| const dc = options.dataCollection ?? {}; | ||
|
|
||
| return { | ||
| userInfo: dc.userInfo ?? base.userInfo, | ||
| cookies: dc.cookies ?? base.cookies, | ||
| httpHeaders: { | ||
| request: dc.httpHeaders?.request ?? base.httpHeaders.request, | ||
| response: dc.httpHeaders?.response ?? base.httpHeaders.response, | ||
| }, | ||
| httpBodies: dc.httpBodies ?? base.httpBodies, | ||
| queryParams: dc.queryParams ?? base.queryParams, | ||
| genAI: { | ||
| inputs: dc.genAI?.inputs ?? base.genAI.inputs, | ||
| outputs: dc.genAI?.outputs ?? base.genAI.outputs, | ||
| }, | ||
| stackFrameVariables: dc.stackFrameVariables ?? base.stackFrameVariables, | ||
| frameContextLines: dc.frameContextLines ?? base.frameContextLines, | ||
| }; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| import { describe, expect, it } from 'vitest'; | ||
| import { filterCookies } from '../../../../src/utils/data-collection/filterCookies'; | ||
|
|
||
| describe('filterCookies', () => { | ||
| describe('off mode (false)', () => { | ||
| it('returns empty record', () => { | ||
| expect(filterCookies('theme=dark; user_session=abc123', false)).toEqual({}); | ||
| }); | ||
| }); | ||
|
|
||
| describe('denyList mode (true)', () => { | ||
| it('filters sensitive cookie names and preserves safe ones', () => { | ||
| const result = filterCookies('theme=dark; user_session=abc123; locale=en', true); | ||
|
|
||
| expect(result).toEqual({ | ||
| theme: 'dark', | ||
| user_session: '[Filtered]', // matches "session" | ||
| locale: 'en', | ||
| }); | ||
| }); | ||
|
|
||
| it('filters auth-related cookies', () => { | ||
| const result = filterCookies('auth_token=xyz; color=blue', true); | ||
|
|
||
| expect(result).toEqual({ | ||
| auth_token: '[Filtered]', // matches "auth" and "token" | ||
| color: 'blue', | ||
| }); | ||
| }); | ||
| }); | ||
|
|
||
| describe('denyList mode ({ deny: [...] })', () => { | ||
| it('applies extra deny terms on top of built-in denylist', () => { | ||
| const result = filterCookies('theme=dark; tracking_id=abc', { deny: ['tracking'] }); | ||
|
|
||
| expect(result).toEqual({ | ||
| theme: 'dark', | ||
| tracking_id: '[Filtered]', | ||
| }); | ||
| }); | ||
| }); | ||
|
|
||
| describe('allowList mode ({ allow: [...] })', () => { | ||
| it('only allows specified cookie names to pass through', () => { | ||
| const result = filterCookies('theme=dark; user_session=abc; locale=en', { | ||
| allow: ['theme', 'locale'], | ||
| }); | ||
|
|
||
| expect(result).toEqual({ | ||
| theme: 'dark', | ||
| user_session: '[Filtered]', // sensitive denylist overrides | ||
| locale: 'en', | ||
| }); | ||
| }); | ||
|
|
||
| it('sensitive denylist overrides allowlist', () => { | ||
| const result = filterCookies('auth_token=secret', { allow: ['auth_token'] }); | ||
|
|
||
| expect(result).toEqual({ | ||
| auth_token: '[Filtered]', // "auth" and "token" match sensitive denylist | ||
| }); | ||
| }); | ||
| }); | ||
|
|
||
| describe('empty and unparseable input', () => { | ||
| it('returns empty record for empty string', () => { | ||
| expect(filterCookies('', true)).toEqual({}); | ||
| }); | ||
|
|
||
| it('returns empty record for string with no key-value pairs', () => { | ||
| expect(filterCookies(';;;', true)).toEqual({}); | ||
| }); | ||
|
|
||
| it('returns [Filtered] when parsing throws', () => { | ||
| // parseCookie doesn't throw for malformed strings, so this path | ||
| // is a safety net — verified via the catch block existence | ||
|
chargome marked this conversation as resolved.
Outdated
|
||
| }); | ||
| }); | ||
|
|
||
| describe('edge cases', () => { | ||
| it('handles cookies with = in the value', () => { | ||
| const result = filterCookies('data=base64==; theme=light', true); | ||
|
|
||
| expect(result).toEqual({ | ||
| data: 'base64==', | ||
| theme: 'light', | ||
| }); | ||
| }); | ||
|
|
||
| it('handles quoted cookie values', () => { | ||
| const result = filterCookies('theme="dark mode"', true); | ||
|
|
||
| expect(result).toEqual({ | ||
| theme: 'dark mode', | ||
| }); | ||
| }); | ||
| }); | ||
| }); | ||
Uh oh!
There was an error while loading. Please reload this page.