PrimeIntellect-ai · philippnormann · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 Documenting changes which affect configuration usage patterns (added/moved/removed/renamed fields, notable logic changes).
 
+- **`trainer.model.lora.init_adapter_path`**: Added optional RL LoRA warm-start path. Loads a saved LoRA adapter into the created adapter slot before training updates. (2026-04-01)
 - **`log.file` and `log.env_worker_logs` removed**: Removed `log.file` (from `LogConfig` and `SharedLogConfig`) and `log.env_worker_logs` (from `LogConfig`). Python file logging is replaced by deployment-level capture. Existing configs using these fields must delete them. Log paths unified: `.stdout` files renamed to `.log`, SLURM logs moved from `slurm/` to `logs/`. (2026-03-31)
 - **`trainer.log.ranks_filter` (NEW)**: Added `ranks_filter: list[int]` to `TrainerLogConfig` (default: `[0]`). Controls which ranks appear in trainer console output via torchrun's `--local-ranks-filter`. (2026-03-31)
 - **`wandb.log_extras.sample_ratio` / monitor sample logging defaults**: `wandb.log_extras.sample_ratio` is now actually applied to W&B sample-table logging via the shared monitor sampler (it was previously a no-op for WandB). Separately, the orchestrator no longer hard-caps sample logging to 8 rollouts before monitor-level sampling runs, so when monitor `sample_ratio` is `None`, monitors now receive and may log the full rollout batch for a step instead of at most 8 rollouts. This affects both W&B and Prime monitor sample logging behavior. (2026-03-27)

diff --git a/configs/ci/integration/rl_lora_init/resume.toml b/configs/ci/integration/rl_lora_init/resume.toml
@@ -0,0 +1,34 @@
+max_steps = 25
+seq_len = 2048
+
+[ckpt]
+resume_step = 20
+
+[model]
+name = "PrimeIntellect/Qwen3-0.6B"
+
+[trainer.optim]
+lr = 2e-5
+
+[trainer.model.lora]
+rank = 8
+
+[trainer.ckpt.weights]
+save_adapter_separately = true
+
+[orchestrator]
+batch_size = 64
+rollouts_per_example = 16
+
+[orchestrator.model.lora]
+name = "r8-init-cont"
+
+[orchestrator.sampling]
+max_tokens = 128
+
+[[orchestrator.env]]
+id = "reverse-text"
+
+[inference]
+enable_lora = true
+gpu_memory_utilization = 0.7
diff --git a/configs/ci/integration/rl_lora_init/sft.toml b/configs/ci/integration/rl_lora_init/sft.toml
@@ -0,0 +1,29 @@
+max_steps = 100
+
+[ckpt]
+
+[ckpt.weights]
+save_adapter_separately = true
+
+[model]
+name = "PrimeIntellect/Qwen3-0.6B"
+
+[model.lora]
+rank = 8
+target_modules = [
+  "q_proj",
+  "k_proj",
+  "v_proj",
+  "o_proj",
+  "gate_proj",
+  "up_proj",
+  "down_proj",
+]
+
+[data]
+name = "PrimeIntellect/Reverse-Text-SFT"
+batch_size = 16
+seq_len = 1024
+
+[optim]
+lr = 2e-4
diff --git a/configs/ci/integration/rl_lora_init/start.toml b/configs/ci/integration/rl_lora_init/start.toml
@@ -0,0 +1,33 @@
+max_steps = 20
+seq_len = 2048
+
+[ckpt]
+
+[model]
+name = "PrimeIntellect/Qwen3-0.6B"
+
+[trainer.optim]
+lr = 2e-5
+
+[trainer.model.lora]
+rank = 8
+
+[trainer.ckpt.weights]
+save_adapter_separately = true
+
+[orchestrator]
+batch_size = 64
+rollouts_per_example = 16
+
+[orchestrator.model.lora]
+name = "r8-init-cont"
+
+[orchestrator.sampling]
+max_tokens = 128
+
+[[orchestrator.env]]
+id = "reverse-text"
+
+[inference]
+enable_lora = true
+gpu_memory_utilization = 0.7
diff --git a/src/prime_rl/configs/trainer.py b/src/prime_rl/configs/trainer.py
@@ -158,6 +158,13 @@ class LoRAConfig(BaseConfig):
         ),
     ] = []
 
+    init_adapter_path: Annotated[
+        Path | None,
+        Field(
+            description="Optional adapter path to warm-start training. Loaded into the created adapter slot before RL updates.",
+        ),
+    ] = None
+
 
 class DebugModelConfig(BaseConfig):
     """Debugging feature around model and distributed training."""