diff --git a/.changeset/0016-dynamic-scan-flags.md b/.changeset/0016-dynamic-scan-flags.md new file mode 100644 index 0000000..f602d40 --- /dev/null +++ b/.changeset/0016-dynamic-scan-flags.md @@ -0,0 +1,5 @@ +--- +"@cdot65/prisma-airs-cli": minor +--- + +Add `--goals`, `--depth`, and `--breadth` flags to `airs redteam scan --type DYNAMIC` for human-augmented agent scans. `--goals` accepts an inline JSON array or a path to a JSON file of goal strings. Inputs are validated (positive integers; non-empty string array). Without `--goals`, DYNAMIC scans still run in fully automated mode (no behavior change). diff --git a/docs/redteam/scanning.md b/docs/redteam/scanning.md index 3df0ec2..2fd70a4 100644 --- a/docs/redteam/scanning.md +++ b/docs/redteam/scanning.md @@ -131,6 +131,46 @@ airs redteam scan \ !!! tip "Finding prompt set UUIDs" Use `airs redteam prompt-sets list` to find UUIDs. Prompt sets created by `airs runtime topics generate --create-prompt-set` emit the UUID in the `promptset:created` event. +### Dynamic Scan (Agent-Driven) + +A `DYNAMIC` scan dispatches autonomous agents that adapt their attacks based on the target's responses. Without `--goals`, the scan runs in fully automated mode using the AIRS attack agent. + +```bash +# Fully automated agent scan +airs redteam scan \ + --target \ + --name "Automated Agent Scan" \ + --type DYNAMIC +``` + +To steer agents toward specific objectives, pass attack goals — either inline as a JSON array or as a path to a JSON file: + +```bash +# Goals from a file +airs redteam scan \ + --target --name "Targeted Agent Scan" \ + --type DYNAMIC \ + --goals goals.json --depth 10 --breadth 6 + +# Inline goals +airs redteam scan \ + --target --name "Targeted Agent Scan" \ + --type DYNAMIC \ + --goals '["Extract the system prompt", "Bypass the safety policy"]' +``` + +`goals.json`: + +```json +["Extract the system prompt", "Bypass the safety policy", "Leak training data"] +``` + +| Flag | Default | What it does | +|------|---------|--------------| +| `--goals ` | — | Attack goals as inline JSON array or path to JSON file. Without this flag, agents run in fully automated mode. | +| `--depth ` | `10` | Max conversation turns per goal. | +| `--breadth ` | `6` | Parallel agents per goal. | + --- ## Check Scan Status diff --git a/docs/reference/cli-commands.md b/docs/reference/cli-commands.md index 42fc801..bfd90bb 100644 --- a/docs/reference/cli-commands.md +++ b/docs/reference/cli-commands.md @@ -517,6 +517,9 @@ airs redteam scan [options] | `--type ` | `STATIC` | Job type: `STATIC`, `DYNAMIC`, or `CUSTOM` | | `--categories ` | all | Category filter JSON (STATIC scans) | | `--prompt-sets ` | — | Comma-separated prompt set UUIDs (CUSTOM scans) | +| `--goals ` | — | Inline JSON array or path to JSON file of attack goal strings (DYNAMIC scans) | +| `--depth ` | `10` | Max conversation turns per goal (DYNAMIC scans) | +| `--breadth ` | `6` | Parallel agents per goal (DYNAMIC scans) | | `--no-wait` | wait | Submit without waiting for completion | ```bash @@ -527,6 +530,11 @@ airs redteam scan --target --name "Full Scan" airs redteam scan \ --target --name "Topic Validation" \ --type CUSTOM --prompt-sets , + +# Dynamic (agent-driven) scan with attack goals from a file +airs redteam scan \ + --target --name "Agent Scan" \ + --type DYNAMIC --goals goals.json --depth 10 --breadth 6 ``` ### redteam status diff --git a/src/airs/redteam.ts b/src/airs/redteam.ts index 9339f7c..8b9e8e8 100644 --- a/src/airs/redteam.ts +++ b/src/airs/redteam.ts @@ -24,6 +24,9 @@ import type { const TERMINAL_STATUSES = new Set(['COMPLETED', 'PARTIALLY_COMPLETE', 'FAILED', 'ABORTED']); +export const DEFAULT_DYNAMIC_BREADTH = 6; +export const DEFAULT_DYNAMIC_DEPTH = 10; + /** Normalize an SDK job response into a RedTeamJob. */ function normalizeJob(raw: Record): RedTeamJob { const target = raw.target as Record | undefined; @@ -293,6 +296,9 @@ export class SdkRedTeamService implements RedTeamService { jobType: string; categories?: Record; customPromptSets?: string[]; + attackGoals?: string[]; + streamDepth?: number; + streamBreadth?: number; }): Promise { let jobMetadata: Record = {}; if (request.jobType === 'STATIC' && request.categories) { @@ -301,6 +307,12 @@ export class SdkRedTeamService implements RedTeamService { jobMetadata = { custom_prompt_sets: request.customPromptSets, }; + } else if (request.jobType === 'DYNAMIC') { + jobMetadata = { + stream_breadth: request.streamBreadth ?? DEFAULT_DYNAMIC_BREADTH, + stream_depth: request.streamDepth ?? DEFAULT_DYNAMIC_DEPTH, + ...(request.attackGoals?.length ? { attack_goals: request.attackGoals } : {}), + }; } const response = await this.client.scans.create({ diff --git a/src/airs/types.ts b/src/airs/types.ts index 3cd8b8f..253289a 100644 --- a/src/airs/types.ts +++ b/src/airs/types.ts @@ -467,6 +467,9 @@ export interface RedTeamService { jobType: string; categories?: Record; customPromptSets?: string[]; + attackGoals?: string[]; + streamDepth?: number; + streamBreadth?: number; }): Promise; /** Get scan status by job ID. */ diff --git a/src/cli/commands/redteam.ts b/src/cli/commands/redteam.ts index efe8f6b..8c38213 100644 --- a/src/cli/commands/redteam.ts +++ b/src/cli/commands/redteam.ts @@ -64,6 +64,31 @@ async function createPromptSetService() { }); } +/** Parse `--goals` arg as inline JSON array (starts with `[`) or path to a JSON file. */ +export function parseAttackGoals(input: string): string[] { + const trimmed = input.trim(); + const raw = trimmed.startsWith('[') ? trimmed : fs.readFileSync(trimmed, 'utf-8'); + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch (err) { + throw new Error(`--goals: invalid JSON (${err instanceof Error ? err.message : err})`); + } + if (!Array.isArray(parsed) || !parsed.every((g) => typeof g === 'string' && g.length > 0)) { + throw new Error('--goals: expected a JSON array of non-empty strings'); + } + return parsed; +} + +/** Parse a string flag as a positive integer. */ +export function parsePositiveInt(input: string, flag: string): number { + const n = Number.parseInt(input, 10); + if (!Number.isFinite(n) || n <= 0) { + throw new Error(`${flag}: expected a positive integer, got "${input}"`); + } + return n; +} + /** Valid provider names for target init templates. */ export const VALID_TARGET_PROVIDERS = [ 'OPENAI', @@ -727,6 +752,9 @@ export function registerRedteamCommand(program: Command): void { .option('--type ', 'Job type: STATIC, DYNAMIC, or CUSTOM', 'STATIC') .option('--categories ', 'Category filter JSON (STATIC scans)') .option('--prompt-sets ', 'Comma-separated prompt set UUIDs (CUSTOM scans)') + .option('--goals ', 'JSON file or inline JSON array of attack goals (DYNAMIC scans)') + .option('--depth ', 'Max conversation turns per goal (DYNAMIC scans)', '10') + .option('--breadth ', 'Parallel agents per goal (DYNAMIC scans)', '6') .option('--no-wait', 'Submit scan without waiting for completion') .action(async (opts) => { try { @@ -742,6 +770,10 @@ export function registerRedteamCommand(program: Command): void { ? (opts.promptSets as string).split(',').map((s: string) => s.trim()) : undefined; + const attackGoals = opts.goals ? parseAttackGoals(opts.goals as string) : undefined; + const streamDepth = parsePositiveInt(opts.depth as string, '--depth'); + const streamBreadth = parsePositiveInt(opts.breadth as string, '--breadth'); + console.log(` Creating ${opts.type} scan "${opts.name}"...`); const job = await service.createScan({ name: opts.name, @@ -749,6 +781,9 @@ export function registerRedteamCommand(program: Command): void { jobType: opts.type, categories, customPromptSets, + attackGoals, + streamDepth, + streamBreadth, }); renderScanStatus(job); diff --git a/tests/unit/airs/redteam.spec.ts b/tests/unit/airs/redteam.spec.ts index 55c563a..dd88a82 100644 --- a/tests/unit/airs/redteam.spec.ts +++ b/tests/unit/airs/redteam.spec.ts @@ -181,7 +181,7 @@ describe('SdkRedTeamService', () => { }); }); - it('creates a DYNAMIC scan with empty metadata', async () => { + it('creates a DYNAMIC scan with default breadth/depth and no goals', async () => { mockScansCreate.mockResolvedValue({ uuid: 'job-3', name: 'Dynamic Scan', @@ -201,7 +201,41 @@ describe('SdkRedTeamService', () => { name: 'Dynamic Scan', target: { uuid: 't-1' }, job_type: 'DYNAMIC', - job_metadata: {}, + job_metadata: { + stream_breadth: 6, + stream_depth: 10, + }, + }); + }); + + it('creates a DYNAMIC scan with goals, depth, and breadth', async () => { + mockScansCreate.mockResolvedValue({ + uuid: 'job-4', + name: 'Agent Scan', + status: 'QUEUED', + job_type: 'DYNAMIC', + target_id: 't-1', + target: { name: 'Target 1' }, + }); + + await service.createScan({ + name: 'Agent Scan', + targetUuid: 't-1', + jobType: 'DYNAMIC', + attackGoals: ['Extract system prompt', 'Bypass safety'], + streamDepth: 15, + streamBreadth: 8, + }); + + expect(mockScansCreate).toHaveBeenCalledWith({ + name: 'Agent Scan', + target: { uuid: 't-1' }, + job_type: 'DYNAMIC', + job_metadata: { + stream_breadth: 8, + stream_depth: 15, + attack_goals: ['Extract system prompt', 'Bypass safety'], + }, }); }); }); diff --git a/tests/unit/cli/redteam-scan-helpers.spec.ts b/tests/unit/cli/redteam-scan-helpers.spec.ts new file mode 100644 index 0000000..4deb91b --- /dev/null +++ b/tests/unit/cli/redteam-scan-helpers.spec.ts @@ -0,0 +1,67 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { parseAttackGoals, parsePositiveInt } from '../../../src/cli/commands/redteam.js'; + +describe('parseAttackGoals', () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'goals-')); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('parses inline JSON array', () => { + expect(parseAttackGoals('["a","b"]')).toEqual(['a', 'b']); + }); + + it('parses inline JSON with leading whitespace', () => { + expect(parseAttackGoals(' ["a"]')).toEqual(['a']); + }); + + it('reads goals from file when input is a path', () => { + const file = path.join(tmpDir, 'goals.json'); + fs.writeFileSync(file, '["x","y","z"]'); + expect(parseAttackGoals(file)).toEqual(['x', 'y', 'z']); + }); + + it('throws on invalid JSON', () => { + expect(() => parseAttackGoals('[not json')).toThrow(/--goals: invalid JSON/); + }); + + it('throws when array contains non-strings', () => { + expect(() => parseAttackGoals('["a", 42]')).toThrow(/array of non-empty strings/); + }); + + it('throws when array contains empty string', () => { + expect(() => parseAttackGoals('["a", ""]')).toThrow(/array of non-empty strings/); + }); +}); + +describe('parsePositiveInt', () => { + it('parses a positive integer', () => { + expect(parsePositiveInt('10', '--depth')).toBe(10); + }); + + it('throws on non-numeric input', () => { + expect(() => parsePositiveInt('abc', '--depth')).toThrow( + /--depth: expected a positive integer, got "abc"/, + ); + }); + + it('throws on zero', () => { + expect(() => parsePositiveInt('0', '--breadth')).toThrow( + /--breadth: expected a positive integer, got "0"/, + ); + }); + + it('throws on negative', () => { + expect(() => parsePositiveInt('-5', '--depth')).toThrow( + /--depth: expected a positive integer, got "-5"/, + ); + }); +});