Skip to content

Commit 7edc9ae

Browse files
committed
fix: data transformer should handle more edge cases with more tests
1 parent 9ac534b commit 7edc9ae

13 files changed

+2479
-283
lines changed

.secrets.baseline

Lines changed: 114 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"files": null,
44
"lines": null
55
},
6-
"generated_at": "2025-10-28T23:54:36Z",
6+
"generated_at": "2025-10-30T18:42:05Z",
77
"plugins_used": [
88
{
99
"name": "AWSKeyDetector"
@@ -245,21 +245,21 @@
245245
{
246246
"hashed_secret": "6eff76dbb551ea7769517f3a7188862f85a7354e",
247247
"is_verified": false,
248-
"line_number": 69,
248+
"line_number": 79,
249249
"type": "Base64 High Entropy String",
250250
"verified_result": null
251251
},
252252
{
253253
"hashed_secret": "4f043b03a11e8b760986f07490ee76f48c6c7342",
254254
"is_verified": false,
255-
"line_number": 71,
255+
"line_number": 81,
256256
"type": "Base64 High Entropy String",
257257
"verified_result": null
258258
},
259259
{
260260
"hashed_secret": "618eb19f52dfe817f292f54554346cad3e3a88b3",
261261
"is_verified": false,
262-
"line_number": 71,
262+
"line_number": 81,
263263
"type": "Base64 High Entropy String",
264264
"verified_result": null
265265
}
@@ -268,7 +268,116 @@
268268
{
269269
"hashed_secret": "6eff76dbb551ea7769517f3a7188862f85a7354e",
270270
"is_verified": false,
271-
"line_number": 236,
271+
"line_number": 234,
272+
"type": "Base64 High Entropy String",
273+
"verified_result": null
274+
}
275+
],
276+
"apps/web/app/Helpers/data-transformer.edge-cases.test.ts": [
277+
{
278+
"hashed_secret": "9716c4510d038dc8f853fc40dc96f8a2a87bb51c",
279+
"is_verified": false,
280+
"line_number": 162,
281+
"type": "Base64 High Entropy String",
282+
"verified_result": null
283+
},
284+
{
285+
"hashed_secret": "4f043b03a11e8b760986f07490ee76f48c6c7342",
286+
"is_verified": false,
287+
"line_number": 165,
288+
"type": "Base64 High Entropy String",
289+
"verified_result": null
290+
},
291+
{
292+
"hashed_secret": "9aa7cde469abc42954177388692fa3c14663338d",
293+
"is_verified": false,
294+
"line_number": 166,
295+
"type": "Base64 High Entropy String",
296+
"verified_result": null
297+
}
298+
],
299+
"apps/web/app/Helpers/data-transformer.realworld.test.ts": [
300+
{
301+
"hashed_secret": "c5b49f145071d1dfaa7b852fbac2cc2d452c033d",
302+
"is_verified": false,
303+
"line_number": 15,
304+
"type": "Base64 High Entropy String",
305+
"verified_result": null
306+
},
307+
{
308+
"hashed_secret": "565d3a24396607e26491162b13a69050b37c7be7",
309+
"is_verified": false,
310+
"line_number": 17,
311+
"type": "Base64 High Entropy String",
312+
"verified_result": null
313+
},
314+
{
315+
"hashed_secret": "9690ab4554c9150bb66f9019049b7ed18998d6ab",
316+
"is_verified": false,
317+
"line_number": 22,
318+
"type": "Base64 High Entropy String",
319+
"verified_result": null
320+
},
321+
{
322+
"hashed_secret": "4f043b03a11e8b760986f07490ee76f48c6c7342",
323+
"is_verified": false,
324+
"line_number": 25,
325+
"type": "Base64 High Entropy String",
326+
"verified_result": null
327+
},
328+
{
329+
"hashed_secret": "9aa7cde469abc42954177388692fa3c14663338d",
330+
"is_verified": false,
331+
"line_number": 26,
332+
"type": "Base64 High Entropy String",
333+
"verified_result": null
334+
},
335+
{
336+
"hashed_secret": "b7e41a1408b0de53b6a18b0383983df52151bffd",
337+
"is_verified": false,
338+
"line_number": 60,
339+
"type": "Base64 High Entropy String",
340+
"verified_result": null
341+
},
342+
{
343+
"hashed_secret": "7f8a4c8efb7a9d741a4131e6382406d04920a55c",
344+
"is_verified": false,
345+
"line_number": 75,
346+
"type": "Base64 High Entropy String",
347+
"verified_result": null
348+
},
349+
{
350+
"hashed_secret": "ef584f952dbcdb807b1f2a5b688a6b2ac7beaf76",
351+
"is_verified": false,
352+
"line_number": 158,
353+
"type": "Base64 High Entropy String",
354+
"verified_result": null
355+
},
356+
{
357+
"hashed_secret": "fca9f0a000cc0d99a6b89eff22b280d43b3dc23a",
358+
"is_verified": false,
359+
"line_number": 168,
360+
"type": "Base64 High Entropy String",
361+
"verified_result": null
362+
},
363+
{
364+
"hashed_secret": "ad5a4eb98aace66b683002aa7038bb800f8b0a65",
365+
"is_verified": false,
366+
"line_number": 174,
367+
"type": "Base64 High Entropy String",
368+
"verified_result": null
369+
},
370+
{
371+
"hashed_secret": "28681ff8a70bc722645c0ebe5c21fbd8a2ee904a",
372+
"is_verified": false,
373+
"line_number": 183,
374+
"type": "Base64 High Entropy String",
375+
"verified_result": null
376+
},
377+
{
378+
"hashed_secret": "dd65cdacb216bbeca512abfb1ecd15d1c53ac7bd",
379+
"is_verified": false,
380+
"line_number": 431,
272381
"type": "Base64 High Entropy String",
273382
"verified_result": null
274383
}

apps/api/src/common/interceptors/data-transform.interceptor.ts

Lines changed: 38 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@ export interface TransformOptions {
1717
deep?: boolean;
1818
}
1919

20-
function padBase64(value: string): string {
21-
const remainder = value.length % 4;
22-
if (remainder === 0) return value;
23-
return value + "=".repeat(4 - remainder);
24-
}
25-
20+
/**
21+
* Check if a string contains mostly printable text
22+
* Used to validate that decoded base64 produces readable content
23+
*/
2624
function isPrintableText(value: string): boolean {
2725
if (!value) return true;
2826

@@ -43,14 +41,33 @@ function isPrintableText(value: string): boolean {
4341
return printableCount / value.length >= 0.85;
4442
}
4543

46-
function decodeBase64String(value: string): string | null {
44+
/**
45+
* Strictly validate and decode a base64 string
46+
* Returns decoded string only if:
47+
* 1. Input is valid base64 format
48+
* 2. Decoded content is printable text
49+
* 3. Re-encoding produces the same result (round-trip validation)
50+
*/
51+
function tryDecodeBase64(value: string): string | null {
52+
if (!value || typeof value !== "string") return null;
53+
54+
const trimmed = value.trim();
55+
56+
if (trimmed.length < 4) return null;
57+
58+
if (!BASE64_REGEX.test(trimmed)) return null;
59+
60+
const paddingNeeded = (4 - (trimmed.length % 4)) % 4;
61+
const padded = trimmed + "=".repeat(paddingNeeded);
62+
4763
try {
48-
const decoded = Buffer.from(value, "base64").toString("utf8");
64+
const decoded = Buffer.from(padded, "base64").toString("utf8");
65+
4966
if (!isPrintableText(decoded)) {
5067
return null;
5168
}
5269

53-
const normalizedInput = value.replaceAll(/=+$/g, "");
70+
const normalizedInput = trimmed.replaceAll(/=+$/g, "");
5471
const reencoded = Buffer.from(decoded, "utf8")
5572
.toString("base64")
5673
.replaceAll(/=+$/g, "");
@@ -61,46 +78,22 @@ function decodeBase64String(value: string): string | null {
6178
}
6279
}
6380

64-
function findBase64Payload(rawValue: string): Base64Payload | null {
65-
if (!rawValue) return null;
66-
67-
const trimmed = rawValue.trim();
68-
if (!trimmed) return null;
69-
70-
const primaryCandidate =
71-
trimmed.length >= 8 &&
72-
BASE64_FULL_REGEX.test(trimmed) &&
73-
decodeBase64String(trimmed);
74-
75-
if (typeof primaryCandidate === "string") {
76-
return { candidate: trimmed, decoded: primaryCandidate };
77-
}
78-
79-
const matches = trimmed.match(BASE64_SEGMENT_REGEX);
80-
if (!matches) return null;
81-
82-
for (const match of matches) {
83-
if (!match) continue;
84-
const padded = padBase64(match);
85-
const decoded = decodeBase64String(padded);
86-
if (decoded !== null) {
87-
return { candidate: padded, decoded };
88-
}
89-
}
90-
91-
return null;
92-
}
93-
81+
/**
82+
* Decode multiple layers of base64 encoding
83+
* Handles cases where data was encoded multiple times
84+
*/
9485
function decodeBase64Layers(value: string): string {
86+
if (!value || typeof value !== "string") return value;
87+
9588
let current = value;
9689
let depth = 0;
9790

9891
while (depth < MAX_BASE64_DEPTH) {
99-
const payload = findBase64Payload(current);
100-
if (!payload) break;
92+
const decoded = tryDecodeBase64(current);
10193

102-
const decoded = payload.decoded;
103-
if (decoded === current) break;
94+
if (decoded === null || decoded === current) {
95+
break;
96+
}
10497

10598
current = decoded;
10699
depth += 1;
@@ -146,16 +139,9 @@ function matchesConfiguredField(
146139

147140
export const TRANSFORM_METADATA_KEY = "data-transform";
148141

149-
const HTML_TAG_REGEX = /<\/?[a-z][\S\s]*>/i;
150-
const BASE64_FULL_REGEX = /^[\d+/A-Za-z]+={0,2}$/;
151-
const BASE64_SEGMENT_REGEX = /[\d+/=A-Za-z]{4,}/g;
142+
const BASE64_REGEX = /^[\d+/A-Za-z]+=*$/;
152143
const MAX_BASE64_DEPTH = 5;
153144

154-
interface Base64Payload {
155-
candidate: string;
156-
decoded: string;
157-
}
158-
159145
/**
160146
* Decorator to configure data transformation for endpoints
161147
*/
@@ -326,7 +312,6 @@ export class DataTransformInterceptor implements NestInterceptor {
326312
fieldPath: string,
327313
operation: "encode" | "decode",
328314
): boolean {
329-
// Only transform explicitly configured fields
330315
if (!fields || fields.length === 0) {
331316
return false;
332317
}
@@ -336,7 +321,6 @@ export class DataTransformInterceptor implements NestInterceptor {
336321
return false;
337322
}
338323

339-
// Skip encoding short numeric strings (like "45", "2027")
340324
if (typeof value === "string") {
341325
const trimmedValue = value.trim();
342326
if (
@@ -366,10 +350,9 @@ export class DataTransformInterceptor implements NestInterceptor {
366350
private decodeValue(value: unknown): unknown {
367351
if (typeof value !== "string") return value;
368352

369-
// Handle compressed data with 'comp:' prefix
370353
if (value.startsWith("comp:")) {
371354
try {
372-
const base64Data = value.slice(5); // Remove 'comp:' prefix
355+
const base64Data = value.slice(5);
373356
const decoded = Buffer.from(base64Data, "base64").toString("utf8");
374357
const fullyDecoded = decodeBase64Layers(decoded);
375358
try {

0 commit comments

Comments
 (0)