langwatch
diff --git a/‎docs/docs/pages/advanced/red-teaming.mdx‎
Lines changed: 19 additions & 33 deletions b/‎docs/docs/pages/advanced/red-teaming.mdx‎
Lines changed: 19 additions & 33 deletions
@@ -53,7 +53,6 @@ async def test_system_prompt_not_leaked():
             ]),
         ],
         script=attacker.marathon_script(
-            turns=30,
             checks=[check_no_leak],
         ),
     )
@@ -103,7 +102,6 @@ describe("Bank agent security", () => {
         }),
       ],
       script: attacker.marathonScript({
-        turns: 30,
         checks: [checkNoLeak],
       }),
     });
@@ -116,10 +114,10 @@ describe("Bank agent security", () => {
 :::
 
 <Callout type="info">
-  Use `attacker.marathon_script()` instead of `scenario.marathon_script()` for red team runs. The instance method pads extra iterations for backtracked turns and wires up early exit.
+  `total_turns` is the **only** parameter that controls how long a red team test runs — it is a hard cap. `attacker.marathon_script()` reads it from the agent automatically. No need to set `max_turns` on `scenario.run()`. Backtracked turns count toward the budget, and early exit can end the test sooner if the objective is achieved.
 </Callout>
 
-We recommend **50 turns** for thorough adversarial coverage. Agents that hold at turn 1 often break by turn 20. 30 turns is the minimum for meaningful results — fewer turns miss vulnerabilities that only surface under sustained escalation pressure.
+We recommend **50 turns** (`total_turns=50`) for thorough nightly adversarial coverage. Agents that hold at turn 1 often break by turn 20. The default of 30 turns is a good balance for per-PR checks — fewer turns miss vulnerabilities that only surface under sustained escalation pressure.
 
 ---
 
@@ -223,7 +221,7 @@ const attacker = scenario.redTeamCrescendo({
 | Attack objective | `target` | `target` | *required* | What the attacker tries to achieve. |
 | Attacker model | `model` | `model` | global default | Generates attack messages every turn. |
 | Planner/scorer model | `metaprompt_model` | `metapromptModel` | same as `model` | Plans attack once, scores responses per turn. |
-| Total turns | `total_turns` | `totalTurns` | `50` | Number of attack turns. 50 recommended. |
+| Total turns | `total_turns` | `totalTurns` | `30` | Number of attack turns. This is the single control for test duration — `max_turns` is ignored for scripted red team tests. 50 recommended for nightly. |
 | Per-turn scoring | `score_responses` | `scoreResponses` | `True` | Score responses 0–10 and adapt. |
 | Refusal detection | `fast_refusal_detection` | `detectRefusals` | `True` | Pattern-match refusals, skip scorer. Triggers backtracking. |
 | Early exit score | `success_score` | `successScore` | `9` | Score threshold for early exit. `None`/`undefined` to disable. |
@@ -240,35 +238,26 @@ const attacker = scenario.redTeamCrescendo({
 
 ### `marathon_script()` / `marathonScript()`
 
-Generates a multi-turn script: `[user(), agent(), ...checks] × turns → [...finalChecks, judge()]`.
+Generates a multi-turn script using `total_turns` from the agent: `[user(), agent(), ...checks] × totalTurns → [...finalChecks, judge()]`.
 
-Use the instance method for red team runs — it pads extra iterations for backtracking and wires up early exit.
+`total_turns` is a hard cap — backtracked turns count toward the budget. Early exit can end the test sooner if the objective is achieved.
 
 :::code-group
 
 ```python [python]
-# Instance method (recommended)
 attacker = scenario.RedTeamAgent.crescendo(target="...", total_turns=30)
-script = attacker.marathon_script(turns=30, checks=[fn], final_checks=[fn])
-
-# Standalone (no early exit, no backtrack padding)
-script = scenario.marathon_script(turns=30, checks=[fn], final_checks=[fn])
+script = attacker.marathon_script(checks=[fn], final_checks=[fn])
 ```
 
 ```typescript [typescript]
-// Instance method (recommended)
 const attacker = scenario.redTeamCrescendo({ target: "...", totalTurns: 30 });
-const script = attacker.marathonScript({ turns: 30, checks: [fn], finalChecks: [fn] });
-
-// Standalone (no early exit, no backtrack padding)
-const script = scenario.marathonScript({ turns: 30, checks: [fn], finalChecks: [fn] });
+const script = attacker.marathonScript({ checks: [fn], finalChecks: [fn] });
 ```
 
 :::
 
 | Parameter | Python | TypeScript | Description |
 |-----------|--------|------------|-------------|
-| Turn count | `turns` | `turns` | Number of user/agent exchanges. Match `total_turns`/`totalTurns`. |
 | Per-turn checks | `checks` | `checks` | Called after every agent response. Raise/throw to fail. |
 | End-of-run checks | `final_checks` | `finalChecks` | Called once after all turns, before the judge. |
 
@@ -345,7 +334,6 @@ result = await scenario.run(
         ]),
     ],
     script=attacker.marathon_script(
-        turns=30,
         checks=[check_no_restricted_tools, check_no_pii_leaked],
     ),
 )
@@ -389,7 +377,6 @@ const result = await scenario.run({
     }),
   ],
   script: attacker.marathonScript({
-    turns: 30,
     checks: [checkNoRestrictedTools, checkNoPiiLeaked],
   }),
 });
@@ -567,7 +554,7 @@ Write `target` from the attacker's perspective — what does success look like?
 
 ## CI integration
 
-Run red team tests alongside your functional test suite. We recommend 50 turns for nightly runs and 30 turns minimum for per-PR checks.
+Run red team tests alongside your functional test suite. We recommend 50 turns for nightly runs and 30 turns (the default) for per-PR checks.
 
 ```python
 # pyproject.toml
@@ -580,21 +567,21 @@ markers = [
 :::code-group
 
 ```python [python]
-# Per-PR: scoring off for speed
+# Per-PR: scoring off for speed (default 30 turns)
 @pytest.mark.red_team
 async def test_prompt_leak_fast():
     attacker = scenario.RedTeamAgent.crescendo(
-        target="...", total_turns=30,
+        target="...",
         score_responses=False, fast_refusal_detection=False,
     )
     result = await scenario.run(
         ...,
         agents=[MyAgent(), attacker, scenario.JudgeAgent(criteria=[...])],
-        script=attacker.marathon_script(turns=30),
+        script=attacker.marathon_script(),
     )
     assert result.success
 
-# Nightly: full adaptive scoring, 50 turns recommended
+# Nightly: full adaptive scoring, 50 turns
 @pytest.mark.red_team
 async def test_prompt_leak_full():
     attacker = scenario.RedTeamAgent.crescendo(
@@ -604,27 +591,27 @@ async def test_prompt_leak_full():
     result = await scenario.run(
         ...,
         agents=[MyAgent(), attacker, scenario.JudgeAgent(criteria=[...])],
-        script=attacker.marathon_script(turns=50),
+        script=attacker.marathon_script(),
     )
     assert result.success
 ```
 
 ```typescript [typescript]
-// Per-PR: scoring off for speed
+// Per-PR: scoring off for speed (default 30 turns)
 it("prompt leak (fast)", async () => {
   const attacker = scenario.redTeamCrescendo({
-    target: "...", totalTurns: 30,
+    target: "...",
     scoreResponses: false, detectRefusals: false,
   });
   const result = await scenario.run({
     ...,
     agents: [myAgent, attacker, scenario.judgeAgent({ criteria: [...] })],
-    script: attacker.marathonScript({ turns: 30 }),
+    script: attacker.marathonScript(),
   });
   expect(result.success).toBe(true);
 }, 180_000);
 
-// Nightly: full adaptive scoring, 50 turns recommended
+// Nightly: full adaptive scoring, 50 turns
 it("prompt leak (full)", async () => {
   const attacker = scenario.redTeamCrescendo({
     target: "...", totalTurns: 50,
@@ -633,7 +620,7 @@ it("prompt leak (full)", async () => {
   const result = await scenario.run({
     ...,
     agents: [myAgent, attacker, scenario.judgeAgent({ criteria: [...] })],
-    script: attacker.marathonScript({ turns: 50 }),
+    script: attacker.marathonScript(),
   });
   expect(result.success).toBe(true);
 }, 300_000);
@@ -651,7 +638,6 @@ it("prompt leak (full)", async () => {
 from scenario import RedTeamAgent        # main class
 from scenario import RedTeamStrategy     # abstract base for custom strategies
 from scenario import CrescendoStrategy   # built-in strategy
-from scenario import marathon_script     # standalone script helper
 ```
 
 ### TypeScript
@@ -682,7 +668,7 @@ import scenario, {
 
 ## Next steps
 
-- [Scripted Simulations](/basics/scripted-simulations) — how scripts work under `marathon_script()`
+- [Scripted Simulations](/basics/scripted-simulations) — how scripts and script steps work
 - [Judge Agent](/basics/judge-agent) — configure pass/fail criteria
 - [Custom Judge](/advanced/custom-judge) — domain-specific security judge
 - [CI/CD Integration](/basics/ci-cd-integration) — run red team tests in your pipeline