Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/0016-dynamic-scan-flags.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@cdot65/prisma-airs-cli": minor
---

Add `--goals`, `--depth`, and `--breadth` flags to `airs redteam scan --type DYNAMIC` for human-augmented agent scans. `--goals` accepts an inline JSON array or a path to a JSON file of goal strings. Inputs are validated (positive integers; non-empty string array). Without `--goals`, DYNAMIC scans still run in fully automated mode (no behavior change).
40 changes: 40 additions & 0 deletions docs/redteam/scanning.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,46 @@ airs redteam scan \
!!! tip "Finding prompt set UUIDs"
Use `airs redteam prompt-sets list` to find UUIDs. Prompt sets created by `airs runtime topics generate --create-prompt-set` emit the UUID in the `promptset:created` event.

### Dynamic Scan (Agent-Driven)

A `DYNAMIC` scan dispatches autonomous agents that adapt their attacks based on the target's responses. Without `--goals`, the scan runs in fully automated mode using the AIRS attack agent.

```bash
# Fully automated agent scan
airs redteam scan \
--target <uuid> \
--name "Automated Agent Scan" \
--type DYNAMIC
```

To steer agents toward specific objectives, pass attack goals — either inline as a JSON array or as a path to a JSON file:

```bash
# Goals from a file
airs redteam scan \
--target <uuid> --name "Targeted Agent Scan" \
--type DYNAMIC \
--goals goals.json --depth 10 --breadth 6

# Inline goals
airs redteam scan \
--target <uuid> --name "Targeted Agent Scan" \
--type DYNAMIC \
--goals '["Extract the system prompt", "Bypass the safety policy"]'
```

`goals.json`:

```json
["Extract the system prompt", "Bypass the safety policy", "Leak training data"]
```

| Flag | Default | What it does |
|------|---------|--------------|
| `--goals <file\|json>` | — | Attack goals as inline JSON array or path to JSON file. Without this flag, agents run in fully automated mode. |
| `--depth <n>` | `10` | Max conversation turns per goal. |
| `--breadth <n>` | `6` | Parallel agents per goal. |

---

## Check Scan Status
Expand Down
8 changes: 8 additions & 0 deletions docs/reference/cli-commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,9 @@ airs redteam scan [options]
| `--type <type>` | `STATIC` | Job type: `STATIC`, `DYNAMIC`, or `CUSTOM` |
| `--categories <json>` | all | Category filter JSON (STATIC scans) |
| `--prompt-sets <uuids>` | — | Comma-separated prompt set UUIDs (CUSTOM scans) |
| `--goals <file\|json>` | — | Inline JSON array or path to JSON file of attack goal strings (DYNAMIC scans) |
| `--depth <n>` | `10` | Max conversation turns per goal (DYNAMIC scans) |
| `--breadth <n>` | `6` | Parallel agents per goal (DYNAMIC scans) |
| `--no-wait` | wait | Submit without waiting for completion |

```bash
Expand All @@ -527,6 +530,11 @@ airs redteam scan --target <uuid> --name "Full Scan"
airs redteam scan \
--target <uuid> --name "Topic Validation" \
--type CUSTOM --prompt-sets <uuid1>,<uuid2>

# Dynamic (agent-driven) scan with attack goals from a file
airs redteam scan \
--target <uuid> --name "Agent Scan" \
--type DYNAMIC --goals goals.json --depth 10 --breadth 6
```

### redteam status
Expand Down
12 changes: 12 additions & 0 deletions src/airs/redteam.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import type {

const TERMINAL_STATUSES = new Set(['COMPLETED', 'PARTIALLY_COMPLETE', 'FAILED', 'ABORTED']);

export const DEFAULT_DYNAMIC_BREADTH = 6;
export const DEFAULT_DYNAMIC_DEPTH = 10;

/** Normalize an SDK job response into a RedTeamJob. */
function normalizeJob(raw: Record<string, unknown>): RedTeamJob {
const target = raw.target as Record<string, unknown> | undefined;
Expand Down Expand Up @@ -293,6 +296,9 @@ export class SdkRedTeamService implements RedTeamService {
jobType: string;
categories?: Record<string, unknown>;
customPromptSets?: string[];
attackGoals?: string[];
streamDepth?: number;
streamBreadth?: number;
}): Promise<RedTeamJob> {
let jobMetadata: Record<string, unknown> = {};
if (request.jobType === 'STATIC' && request.categories) {
Expand All @@ -301,6 +307,12 @@ export class SdkRedTeamService implements RedTeamService {
jobMetadata = {
custom_prompt_sets: request.customPromptSets,
};
} else if (request.jobType === 'DYNAMIC') {
jobMetadata = {
stream_breadth: request.streamBreadth ?? DEFAULT_DYNAMIC_BREADTH,
stream_depth: request.streamDepth ?? DEFAULT_DYNAMIC_DEPTH,
...(request.attackGoals?.length ? { attack_goals: request.attackGoals } : {}),
};
}

const response = await this.client.scans.create({
Expand Down
3 changes: 3 additions & 0 deletions src/airs/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,9 @@ export interface RedTeamService {
jobType: string;
categories?: Record<string, unknown>;
customPromptSets?: string[];
attackGoals?: string[];
streamDepth?: number;
streamBreadth?: number;
}): Promise<RedTeamJob>;

/** Get scan status by job ID. */
Expand Down
35 changes: 35 additions & 0 deletions src/cli/commands/redteam.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,31 @@ async function createPromptSetService() {
});
}

/** Parse `--goals` arg as inline JSON array (starts with `[`) or path to a JSON file. */
export function parseAttackGoals(input: string): string[] {
const trimmed = input.trim();
const raw = trimmed.startsWith('[') ? trimmed : fs.readFileSync(trimmed, 'utf-8');
let parsed: unknown;
try {
parsed = JSON.parse(raw);
} catch (err) {
throw new Error(`--goals: invalid JSON (${err instanceof Error ? err.message : err})`);
}
if (!Array.isArray(parsed) || !parsed.every((g) => typeof g === 'string' && g.length > 0)) {
throw new Error('--goals: expected a JSON array of non-empty strings');
}
return parsed;
}

/** Parse a string flag as a positive integer. */
export function parsePositiveInt(input: string, flag: string): number {
const n = Number.parseInt(input, 10);
if (!Number.isFinite(n) || n <= 0) {
throw new Error(`${flag}: expected a positive integer, got "${input}"`);
}
return n;
}

/** Valid provider names for target init templates. */
export const VALID_TARGET_PROVIDERS = [
'OPENAI',
Expand Down Expand Up @@ -727,6 +752,9 @@ export function registerRedteamCommand(program: Command): void {
.option('--type <type>', 'Job type: STATIC, DYNAMIC, or CUSTOM', 'STATIC')
.option('--categories <json>', 'Category filter JSON (STATIC scans)')
.option('--prompt-sets <uuids>', 'Comma-separated prompt set UUIDs (CUSTOM scans)')
.option('--goals <file>', 'JSON file or inline JSON array of attack goals (DYNAMIC scans)')
.option('--depth <number>', 'Max conversation turns per goal (DYNAMIC scans)', '10')
.option('--breadth <number>', 'Parallel agents per goal (DYNAMIC scans)', '6')
.option('--no-wait', 'Submit scan without waiting for completion')
.action(async (opts) => {
try {
Expand All @@ -742,13 +770,20 @@ export function registerRedteamCommand(program: Command): void {
? (opts.promptSets as string).split(',').map((s: string) => s.trim())
: undefined;

const attackGoals = opts.goals ? parseAttackGoals(opts.goals as string) : undefined;
const streamDepth = parsePositiveInt(opts.depth as string, '--depth');
const streamBreadth = parsePositiveInt(opts.breadth as string, '--breadth');

console.log(` Creating ${opts.type} scan "${opts.name}"...`);
const job = await service.createScan({
name: opts.name,
targetUuid: opts.target,
jobType: opts.type,
categories,
customPromptSets,
attackGoals,
streamDepth,
streamBreadth,
});

renderScanStatus(job);
Expand Down
38 changes: 36 additions & 2 deletions tests/unit/airs/redteam.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ describe('SdkRedTeamService', () => {
});
});

it('creates a DYNAMIC scan with empty metadata', async () => {
it('creates a DYNAMIC scan with default breadth/depth and no goals', async () => {
mockScansCreate.mockResolvedValue({
uuid: 'job-3',
name: 'Dynamic Scan',
Expand All @@ -201,7 +201,41 @@ describe('SdkRedTeamService', () => {
name: 'Dynamic Scan',
target: { uuid: 't-1' },
job_type: 'DYNAMIC',
job_metadata: {},
job_metadata: {
stream_breadth: 6,
stream_depth: 10,
},
});
});

it('creates a DYNAMIC scan with goals, depth, and breadth', async () => {
mockScansCreate.mockResolvedValue({
uuid: 'job-4',
name: 'Agent Scan',
status: 'QUEUED',
job_type: 'DYNAMIC',
target_id: 't-1',
target: { name: 'Target 1' },
});

await service.createScan({
name: 'Agent Scan',
targetUuid: 't-1',
jobType: 'DYNAMIC',
attackGoals: ['Extract system prompt', 'Bypass safety'],
streamDepth: 15,
streamBreadth: 8,
});

expect(mockScansCreate).toHaveBeenCalledWith({
name: 'Agent Scan',
target: { uuid: 't-1' },
job_type: 'DYNAMIC',
job_metadata: {
stream_breadth: 8,
stream_depth: 15,
attack_goals: ['Extract system prompt', 'Bypass safety'],
},
});
});
});
Expand Down
67 changes: 67 additions & 0 deletions tests/unit/cli/redteam-scan-helpers.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import * as fs from 'node:fs';
import * as os from 'node:os';
import * as path from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { parseAttackGoals, parsePositiveInt } from '../../../src/cli/commands/redteam.js';

describe('parseAttackGoals', () => {
let tmpDir: string;

beforeEach(() => {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'goals-'));
});

afterEach(() => {
fs.rmSync(tmpDir, { recursive: true, force: true });
});

it('parses inline JSON array', () => {
expect(parseAttackGoals('["a","b"]')).toEqual(['a', 'b']);
});

it('parses inline JSON with leading whitespace', () => {
expect(parseAttackGoals(' ["a"]')).toEqual(['a']);
});

it('reads goals from file when input is a path', () => {
const file = path.join(tmpDir, 'goals.json');
fs.writeFileSync(file, '["x","y","z"]');
expect(parseAttackGoals(file)).toEqual(['x', 'y', 'z']);
});

it('throws on invalid JSON', () => {
expect(() => parseAttackGoals('[not json')).toThrow(/--goals: invalid JSON/);
});

it('throws when array contains non-strings', () => {
expect(() => parseAttackGoals('["a", 42]')).toThrow(/array of non-empty strings/);
});

it('throws when array contains empty string', () => {
expect(() => parseAttackGoals('["a", ""]')).toThrow(/array of non-empty strings/);
});
});

describe('parsePositiveInt', () => {
it('parses a positive integer', () => {
expect(parsePositiveInt('10', '--depth')).toBe(10);
});

it('throws on non-numeric input', () => {
expect(() => parsePositiveInt('abc', '--depth')).toThrow(
/--depth: expected a positive integer, got "abc"/,
);
});

it('throws on zero', () => {
expect(() => parsePositiveInt('0', '--breadth')).toThrow(
/--breadth: expected a positive integer, got "0"/,
);
});

it('throws on negative', () => {
expect(() => parsePositiveInt('-5', '--depth')).toThrow(
/--depth: expected a positive integer, got "-5"/,
);
});
});
Loading