Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions crates/db/src/models/execution_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,25 @@ impl ExecutionProcess {
.await
}

/// Count failed cleanup scripts in a session (for retry limiting)
pub async fn count_failed_cleanup_scripts_in_session(
pool: &SqlitePool,
session_id: Uuid,
) -> Result<i64, sqlx::Error> {
let count: i64 = sqlx::query_scalar!(
r#"SELECT COUNT(*) as "count!: i64"
FROM execution_processes ep
WHERE ep.session_id = $1
AND ep.run_reason = 'cleanupscript'
AND ep.status = 'failed'
AND ep.dropped = FALSE"#,
session_id
)
.fetch_one(pool)
.await?;
Ok(count)
}

/// Create a new execution process
///
/// Note: We intentionally avoid using a transaction here. SQLite update
Expand Down
108 changes: 107 additions & 1 deletion crates/local-deployment/src/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ use uuid::Uuid;

use crate::{command, copy};

/// Maximum number of times the agent will be automatically triggered to fix cleanup script failures.
/// After this many failed attempts, the task will be finalized without further retries.
/// This prevents infinite loops when the cleanup script has a persistent issue.
const MAX_CLEANUP_RETRY_ATTEMPTS: i64 = 3;

#[derive(Clone)]
pub struct LocalContainerService {
db: DBService,
Expand Down Expand Up @@ -431,7 +436,46 @@ impl LocalContainerService {
ExecutionProcessStatus::Running
);

if success || cleanup_done {
let cleanup_failed = matches!(
ctx.execution_process.run_reason,
ExecutionProcessRunReason::CleanupScript
) && matches!(
ctx.execution_process.status,
ExecutionProcessStatus::Failed
);

if cleanup_failed {
// Check how many times we've already retried
let failed_count = ExecutionProcess::count_failed_cleanup_scripts_in_session(
&db.pool,
ctx.session.id,
)
.await
.unwrap_or(0);

if failed_count < MAX_CLEANUP_RETRY_ATTEMPTS {
// Cleanup script failed - trigger agent to fix the issue
tracing::info!(
"Cleanup script failed for workspace {} (attempt {}/{}) - triggering agent to fix",
ctx.workspace.id,
failed_count,
MAX_CLEANUP_RETRY_ATTEMPTS
);
if let Err(e) = container.start_agent_on_cleanup_failure(&ctx).await {
tracing::error!("Failed to start agent on cleanup failure: {}", e);
// Fall back to finalization if we can't start the agent
container.finalize_task(publisher.as_ref().ok(), &ctx).await;
}
} else {
// Max retries exceeded - finalize and let the user know
tracing::warn!(
"Cleanup script failed {} times for workspace {} - max retries exceeded, finalizing task",
failed_count,
ctx.workspace.id
);
container.finalize_task(publisher.as_ref().ok(), &ctx).await;
}
} else if success || cleanup_done {
// Commit changes (if any) and get feedback about whether changes were made
let changes_committed = match container.try_commit_changes(&ctx).await {
Ok(committed) => committed,
Expand Down Expand Up @@ -787,6 +831,68 @@ impl LocalContainerService {
Ok(())
}

/// Start an agent follow-up to fix a cleanup script failure
async fn start_agent_on_cleanup_failure(
&self,
ctx: &ExecutionContext,
) -> Result<ExecutionProcess, ContainerError> {
// Get executor profile from the latest CodingAgent process in this session
let executor_profile_id =
ExecutionProcess::latest_executor_profile_for_session(&self.db.pool, ctx.session.id)
.await
.map_err(|e| {
ContainerError::Other(anyhow!("Failed to get executor profile: {e}"))
})?
.ok_or_else(|| {
ContainerError::Other(anyhow!("No executor profile found for cleanup agent"))
})?;

// Get latest agent session ID for session continuity (from coding agent turns)
let latest_agent_session_id = ExecutionProcess::find_latest_coding_agent_turn_session_id(
&self.db.pool,
ctx.session.id,
)
.await?;

let project_repos =
ProjectRepo::find_by_project_id_with_names(&self.db.pool, ctx.project.id).await?;
let cleanup_action = self.cleanup_actions_for_repos(&project_repos);

let working_dir = ctx
.workspace
.agent_working_dir
.as_ref()
.filter(|dir| !dir.is_empty())
.cloned();

let prompt = "The cleanup script failed. Please fix the issues and try again.".to_string();

let action_type = if let Some(agent_session_id) = latest_agent_session_id {
ExecutorActionType::CodingAgentFollowUpRequest(CodingAgentFollowUpRequest {
prompt,
session_id: agent_session_id,
executor_profile_id: executor_profile_id.clone(),
working_dir: working_dir.clone(),
})
} else {
ExecutorActionType::CodingAgentInitialRequest(CodingAgentInitialRequest {
prompt,
executor_profile_id: executor_profile_id.clone(),
working_dir,
})
};

let action = ExecutorAction::new(action_type, cleanup_action.map(Box::new));

self.start_execution(
&ctx.workspace,
&ctx.session,
&action,
&ExecutionProcessRunReason::CodingAgent,
)
.await
}

/// Start a follow-up execution from a queued message
async fn start_queued_follow_up(
&self,
Expand Down
Loading