@@ -80,7 +80,7 @@ export function evaluate(trace: AgentTrace, expect: Expectations): AssertionResu
8080
8181 // output_matches
8282 if ( expect . output_matches ) {
83- let re : RegExp ;
83+ let re : RegExp | null = null ;
8484 try {
8585 re = new RegExp ( expect . output_matches ) ;
8686 } catch ( err : any ) {
@@ -90,14 +90,15 @@ export function evaluate(trace: AgentTrace, expect: Expectations): AssertionResu
9090 expected : expect . output_matches ,
9191 message : `Invalid regex: /${ expect . output_matches } / — ${ err . message } ` ,
9292 } ) ;
93- return results ;
9493 }
95- results . push ( {
96- name : `output_matches: /${ expect . output_matches } /` ,
97- passed : re . test ( outputs ) ,
98- expected : expect . output_matches ,
99- actual : outputs . slice ( 0 , 200 ) ,
100- } ) ;
94+ if ( re ) {
95+ results . push ( {
96+ name : `output_matches: /${ expect . output_matches } /` ,
97+ passed : re . test ( outputs ) ,
98+ expected : expect . output_matches ,
99+ actual : outputs . slice ( 0 , 200 ) ,
100+ } ) ;
101+ }
101102 }
102103
103104 // max_steps
@@ -278,17 +279,10 @@ export function evaluate(trace: AgentTrace, expect: Expectations): AssertionResu
278279 }
279280 }
280281
281- // custom
282+ // custom — safe property-access evaluator (no arbitrary code execution)
282283 if ( expect . custom ) {
283284 try {
284- const fn = new Function (
285- 'trace' ,
286- 'steps' ,
287- 'toolCalls' ,
288- 'outputs' ,
289- `return (${ expect . custom } )` ,
290- ) ;
291- const result = fn ( trace , trace . steps , toolCalls , outputs ) ;
285+ const result = evaluateSafeExpression ( expect . custom , { trace, steps : trace . steps , toolCalls, outputs } ) ;
292286 results . push ( {
293287 name : `custom: ${ expect . custom . slice ( 0 , 60 ) } ` ,
294288 passed : ! ! result ,
@@ -459,6 +453,77 @@ function deepPartialMatch(actual: Record<string, any>, expected: Record<string,
459453 return true ;
460454}
461455
456+ /**
457+ * Safe expression evaluator for custom assertions.
458+ * Only allows property access (dot notation, bracket notation) and comparison operators.
459+ * NO function calls, NO assignment, NO constructors, NO template literals.
460+ *
461+ * Supported: trace.steps.length > 0, toolCalls.length === 3, outputs.includes("hello")
462+ * Blocked: process.exit(), require('fs'), new Function(), import(), eval(), etc.
463+ */
464+ export function evaluateSafeExpression (
465+ expr : string ,
466+ context : Record < string , any > ,
467+ ) : any {
468+ // Block dangerous patterns
469+ const BLOCKED_PATTERNS = [
470+ / \b n e w \s + / , // new Function(), new (anything)
471+ / \b i m p o r t \s * \( / , // dynamic import()
472+ / \b e v a l \s * \( / , // eval()
473+ / \b r e q u i r e \s * \( / , // require()
474+ / \b F u n c t i o n \s * \( / , // Function()
475+ / \b _ _ p r o t o _ _ \b / , // prototype pollution
476+ / \b c o n s t r u c t o r \b / , // constructor access
477+ / \b p r o t o t y p e \b / , // prototype access
478+ / \b p r o c e s s \b / , // process.exit, process.env
479+ / \b g l o b a l ( T h i s ) ? \b / , // globalThis
480+ / \b w i n d o w \b / , // browser global
481+ / \b s e l f \b / , // worker global
482+ / ` / , // template literals (can execute code)
483+ / \$ \{ / , // template literal interpolation
484+ / ; \s * \w / , // multiple statements
485+ / \b w h i l e \b / , // loops
486+ / \b f o r \b / , // loops
487+ / \b d e l e t e \b / , // delete operator
488+ / \b v o i d \b / , // void operator
489+ / \b t y p e o f \b / , // typeof (unnecessary for assertions)
490+ / (?< ! [ = ! < > ] ) = (? ! = ) / , // assignment (but not ==, ===, !=, !==, <=, >=)
491+ / \b t h i s \b / , // this reference
492+ ] ;
493+
494+ for ( const pattern of BLOCKED_PATTERNS ) {
495+ if ( pattern . test ( expr ) ) {
496+ throw new Error (
497+ `Unsafe expression blocked: pattern "${ pattern . source } " matched. ` +
498+ `Custom assertions only support property access and comparisons.` ,
499+ ) ;
500+ }
501+ }
502+
503+ // Only allow: identifiers, dots, brackets, numbers, strings, comparisons, boolean operators, parens
504+ // This is a whitelist approach on top of the blocklist
505+ const ALLOWED = / ^ [ \w \s . [ \] ( ) ' " < > = ! & | + \- * / % , ? : ] + $ / ;
506+ if ( ! ALLOWED . test ( expr ) ) {
507+ throw new Error (
508+ `Unsafe expression blocked: contains disallowed characters. ` +
509+ `Custom assertions only support property access and comparisons.` ,
510+ ) ;
511+ }
512+
513+ // Use Function with frozen context objects to prevent mutation
514+ const frozenContext : Record < string , any > = { } ;
515+ for ( const [ key , value ] of Object . entries ( context ) ) {
516+ frozenContext [ key ] = typeof value === 'object' && value !== null ? Object . freeze ( value ) : value ;
517+ }
518+
519+ const keys = Object . keys ( frozenContext ) ;
520+ const values = Object . values ( frozenContext ) ;
521+
522+ // Build a function with limited scope
523+ const fn = new Function ( ...keys , `"use strict"; return (${ expr } )` ) ;
524+ return fn ( ...values ) ;
525+ }
526+
462527/**
463528 * Standalone assertion: compare a trace against a named golden snapshot.
464529 * Returns an AssertionResult compatible with AgentProbe's result format.
0 commit comments