BloopAI · benmarten · Jan 8, 2026
diff --git a/crates/db/.sqlx/query-0307b8705a516585e115eff1fbf85c27a9fb391453b98267fbda24a14118a8d2.json b/crates/db/.sqlx/query-0307b8705a516585e115eff1fbf85c27a9fb391453b98267fbda24a14118a8d2.json
diff --git a/crates/db/src/models/execution_process.rs b/crates/db/src/models/execution_process.rs
@@ -446,6 +446,25 @@ impl ExecutionProcess {
         .await
     }
 
+    /// Count failed cleanup scripts in a session (for retry limiting)
+    pub async fn count_failed_cleanup_scripts_in_session(
+        pool: &SqlitePool,
+        session_id: Uuid,
+    ) -> Result<i64, sqlx::Error> {
+        let count: i64 = sqlx::query_scalar!(
+            r#"SELECT COUNT(*) as "count!: i64"
+               FROM execution_processes ep
+               WHERE ep.session_id = $1
+                 AND ep.run_reason = 'cleanupscript'
+                 AND ep.status = 'failed'
+                 AND ep.dropped = FALSE"#,
+            session_id
+        )
+        .fetch_one(pool)
+        .await?;
+        Ok(count)
+    }
+
     /// Create a new execution process
     ///
     /// Note: We intentionally avoid using a transaction here. SQLite update

diff --git a/crates/local-deployment/src/container.rs b/crates/local-deployment/src/container.rs
@@ -65,6 +65,11 @@ use uuid::Uuid;
 
 use crate::{command, copy};
 
+/// Maximum number of times the agent will be automatically triggered to fix cleanup script failures.
+/// After this many failed attempts, the task will be finalized without further retries.
+/// This prevents infinite loops when the cleanup script has a persistent issue.
+const MAX_CLEANUP_RETRY_ATTEMPTS: i64 = 3;
+
 #[derive(Clone)]
 pub struct LocalContainerService {
     db: DBService,
@@ -431,7 +436,46 @@ impl LocalContainerService {
                     ExecutionProcessStatus::Running
                 );
 
-                if success || cleanup_done {
+                let cleanup_failed = matches!(
+                    ctx.execution_process.run_reason,
+                    ExecutionProcessRunReason::CleanupScript
+                ) && matches!(
+                    ctx.execution_process.status,
+                    ExecutionProcessStatus::Failed
+                );
+
+                if cleanup_failed {
+                    // Check how many times we've already retried
+                    let failed_count = ExecutionProcess::count_failed_cleanup_scripts_in_session(
+                        &db.pool,
+                        ctx.session.id,
+                    )
+                    .await
+                    .unwrap_or(0);
+
+                    if failed_count < MAX_CLEANUP_RETRY_ATTEMPTS {
+                        // Cleanup script failed - trigger agent to fix the issue
+                        tracing::info!(
+                            "Cleanup script failed for workspace {} (attempt {}/{}) - triggering agent to fix",
+                            ctx.workspace.id,
+                            failed_count,
+                            MAX_CLEANUP_RETRY_ATTEMPTS
+                        );
+                        if let Err(e) = container.start_agent_on_cleanup_failure(&ctx).await {
+                            tracing::error!("Failed to start agent on cleanup failure: {}", e);
+                            // Fall back to finalization if we can't start the agent
+                            container.finalize_task(publisher.as_ref().ok(), &ctx).await;
+                        }
+                    } else {
+                        // Max retries exceeded - finalize and let the user know
+                        tracing::warn!(
+                            "Cleanup script failed {} times for workspace {} - max retries exceeded, finalizing task",
+                            failed_count,
+                            ctx.workspace.id
+                        );
+                        container.finalize_task(publisher.as_ref().ok(), &ctx).await;
+                    }
+                } else if success || cleanup_done {
                     // Commit changes (if any) and get feedback about whether changes were made
                     let changes_committed = match container.try_commit_changes(&ctx).await {
                         Ok(committed) => committed,
@@ -787,6 +831,68 @@ impl LocalContainerService {
         Ok(())
     }
 
+    /// Start an agent follow-up to fix a cleanup script failure
+    async fn start_agent_on_cleanup_failure(
+        &self,
+        ctx: &ExecutionContext,
+    ) -> Result<ExecutionProcess, ContainerError> {
+        // Get executor profile from the latest CodingAgent process in this session
+        let executor_profile_id =
+            ExecutionProcess::latest_executor_profile_for_session(&self.db.pool, ctx.session.id)
+                .await
+                .map_err(|e| {
+                    ContainerError::Other(anyhow!("Failed to get executor profile: {e}"))
+                })?
+                .ok_or_else(|| {
+                    ContainerError::Other(anyhow!("No executor profile found for cleanup agent"))
+                })?;
+
+        // Get latest agent session ID for session continuity (from coding agent turns)
+        let latest_agent_session_id = ExecutionProcess::find_latest_coding_agent_turn_session_id(
+            &self.db.pool,
+            ctx.session.id,
+        )
+        .await?;
+
+        let project_repos =
+            ProjectRepo::find_by_project_id_with_names(&self.db.pool, ctx.project.id).await?;
+        let cleanup_action = self.cleanup_actions_for_repos(&project_repos);
+
+        let working_dir = ctx
+            .workspace
+            .agent_working_dir
+            .as_ref()
+            .filter(|dir| !dir.is_empty())
+            .cloned();
+
+        let prompt = "The cleanup script failed. Please fix the issues and try again.".to_string();
+
+        let action_type = if let Some(agent_session_id) = latest_agent_session_id {
+            ExecutorActionType::CodingAgentFollowUpRequest(CodingAgentFollowUpRequest {
+                prompt,
+                session_id: agent_session_id,
+                executor_profile_id: executor_profile_id.clone(),
+                working_dir: working_dir.clone(),
+            })
+        } else {
+            ExecutorActionType::CodingAgentInitialRequest(CodingAgentInitialRequest {
+                prompt,
+                executor_profile_id: executor_profile_id.clone(),
+                working_dir,
+            })
+        };
+
+        let action = ExecutorAction::new(action_type, cleanup_action.map(Box::new));
+
+        self.start_execution(
+            &ctx.workspace,
+            &ctx.session,
+            &action,
+            &ExecutionProcessRunReason::CodingAgent,
+        )
+        .await
+    }
+
     /// Start a follow-up execution from a queued message
     async fn start_queued_follow_up(
         &self,