Hmbown · Hmbown · Jun 6, 2026 · Jun 6, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -66,6 +66,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   visibility trail, and @BigBenLabs, @lzx1545642258, @yangdaowan,
   @mangdehuang, @VerrPower, @hejia-v, @nasus9527, and @ygzhang-cn for the
   GUI/VS Code demand and validation trail.
+- Added inline live-output refresh for background shell Exec cards keyed by the
+  exact shell task id, so long-running commands can show bounded stdout/stderr
+  tails without consuming deltas or matching by command text. Thanks
+  @donglovejava for the live shell-output direction in #2048.
 - Added a static prompt composer override for embedders that need to replace
   the byte-stable base/personality prompt segment while leaving mode metadata,
   approval policy, tool taxonomy, Context Management, and the Compaction Relay

diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md
@@ -66,6 +66,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   visibility trail, and @BigBenLabs, @lzx1545642258, @yangdaowan,
   @mangdehuang, @VerrPower, @hejia-v, @nasus9527, and @ygzhang-cn for the
   GUI/VS Code demand and validation trail.
+- Added inline live-output refresh for background shell Exec cards keyed by the
+  exact shell task id, so long-running commands can show bounded stdout/stderr
+  tails without consuming deltas or matching by command text. Thanks
+  @donglovejava for the live shell-output direction in #2048.
 - Added a static prompt composer override for embedders that need to replace
   the byte-stable base/personality prompt segment while leaving mode metadata,
   approval policy, tool taxonomy, Context Management, and the Compaction Relay

diff --git a/crates/tui/src/tui/active_cell.rs b/crates/tui/src/tui/active_cell.rs
@@ -331,6 +331,8 @@ mod tests {
             command: command.to_string(),
             status: ToolStatus::Running,
             output: None,
+            live_output: None,
+            shell_task_id: None,
             started_at: Some(Instant::now()),
             duration_ms: None,
             source: ExecSource::Assistant,

diff --git a/crates/tui/src/tui/history.rs b/crates/tui/src/tui/history.rs
@@ -890,6 +890,8 @@ pub struct ExecCell {
     pub command: String,
     pub status: ToolStatus,
     pub output: Option<String>,
+    pub live_output: Option<String>,
+    pub shell_task_id: Option<String>,
     pub started_at: Option<Instant>,
     pub duration_ms: Option<u64>,
     pub source: ExecSource,
@@ -946,7 +948,7 @@ impl ExecCell {
         }
 
         if self.interaction.is_none() {
-            if let Some(output) = self.output.as_ref() {
+            if let Some(output) = self.output.as_ref().or(self.live_output.as_ref()) {
                 lines.extend(render_exec_output_mode(
                     output,
                     width,
@@ -4306,6 +4308,8 @@ mod tests {
             command: "echo hi".to_string(),
             status: ToolStatus::Running,
             output: None,
+            live_output: None,
+            shell_task_id: None,
             started_at,
             duration_ms: None,
             source: ExecSource::Assistant,
@@ -4645,6 +4649,8 @@ mod tests {
             command: "ls".to_string(),
             status: ToolStatus::Success,
             output: Some("a\nb\n".to_string()),
+            live_output: None,
+            shell_task_id: None,
             started_at: None,
             duration_ms: Some(10),
             source: ExecSource::Assistant,
@@ -4675,6 +4681,8 @@ mod tests {
             command: "cargo test --workspace --all-features".to_string(),
             status: ToolStatus::Running,
             output: None,
+            live_output: None,
+            shell_task_id: None,
             started_at: None,
             duration_ms: None,
             source: ExecSource::Assistant,
@@ -4991,6 +4999,8 @@ mod tests {
             command: "false".to_string(),
             status: ToolStatus::Failed,
             output: Some("boom".to_string()),
+            live_output: None,
+            shell_task_id: None,
             started_at: None,
             duration_ms: Some(42),
             source: ExecSource::Assistant,
@@ -5046,6 +5056,49 @@ mod tests {
         lines.iter().map(line_text).collect::<Vec<_>>().join("\n")
     }
 
+    #[test]
+    fn exec_cell_renders_live_shell_output_before_final_output() {
+        let cell = ExecCell {
+            command: "cargo test".to_string(),
+            status: ToolStatus::Running,
+            output: None,
+            live_output: Some("running line 1\nrunning line 2".to_string()),
+            shell_task_id: Some("shell_live".to_string()),
+            started_at: None,
+            duration_ms: None,
+            source: ExecSource::Assistant,
+            interaction: None,
+            output_summary: None,
+        };
+
+        let text = lines_text(&cell.lines_with_motion(80, true));
+
+        assert!(text.contains("running line 1"));
+        assert!(text.contains("running line 2"));
+        assert!(!text.contains("Ctrl+B opens shell controls"));
+    }
+
+    #[test]
+    fn exec_cell_prefers_final_output_over_live_shell_tail() {
+        let cell = ExecCell {
+            command: "cargo test".to_string(),
+            status: ToolStatus::Success,
+            output: Some("final output".to_string()),
+            live_output: Some("stale live tail".to_string()),
+            shell_task_id: Some("shell_live".to_string()),
+            started_at: None,
+            duration_ms: None,
+            source: ExecSource::Assistant,
+            interaction: None,
+            output_summary: None,
+        };
+
+        let text = lines_text(&cell.lines_with_motion(80, true));
+
+        assert!(text.contains("final output"));
+        assert!(!text.contains("stale live tail"));
+    }
+
     #[test]
     fn long_thinking_display_is_shorter_than_transcript() {
         // Build a multi-paragraph thinking body so the live view has
@@ -5162,6 +5215,8 @@ mod tests {
             command: "noisy_script.sh".to_string(),
             status: ToolStatus::Success,
             output: Some(output),
+            live_output: None,
+            shell_task_id: None,
             started_at: None,
             duration_ms: Some(120),
             source: ExecSource::Assistant,
@@ -5613,6 +5668,8 @@ mod tests {
             command: command.to_string(),
             status: ToolStatus::Success,
             output: Some("ok".to_string()),
+            live_output: None,
+            shell_task_id: None,
             started_at: None,
             duration_ms: None,
             source: ExecSource::Assistant,

diff --git a/crates/tui/src/tui/sidebar.rs b/crates/tui/src/tui/sidebar.rs
@@ -2874,6 +2874,8 @@ mod tests {
                     command: command.to_string(),
                     status: ToolStatus::Running,
                     output: None,
+                    live_output: None,
+                    shell_task_id: None,
                     started_at: None,
                     duration_ms: Some(ACTIVE_TOOL_STALE_RUNNING_ROW_TTL.as_millis() as u64 + 1),
                     source: ExecSource::Assistant,
@@ -2906,6 +2908,8 @@ mod tests {
                 command: "cargo test --workspace".to_string(),
                 status: ToolStatus::Running,
                 output: None,
+                live_output: None,
+                shell_task_id: None,
                 started_at: Some(std::time::Instant::now()),
                 duration_ms: None,
                 source: ExecSource::Assistant,
@@ -3040,6 +3044,8 @@ mod tests {
                     .to_string(),
                 status: ToolStatus::Failed,
                 output: Some("Lint pending\nTest pending".to_string()),
+                live_output: None,
+                shell_task_id: None,
                 started_at: None,
                 duration_ms: Some(15_000),
                 source: ExecSource::Assistant,
@@ -3080,6 +3086,8 @@ mod tests {
             command: "cargo test -p codewhale-tui".to_string(),
             status: ToolStatus::Failed,
             output: Some("test failed".to_string()),
+            live_output: None,
+            shell_task_id: None,
             started_at: None,
             duration_ms: Some(1_250),
             source: ExecSource::Assistant,
@@ -3109,6 +3117,8 @@ mod tests {
             command: "cargo check".to_string(),
             status: ToolStatus::Success,
             output: Some("Finished".to_string()),
+            live_output: None,
+            shell_task_id: None,
             started_at: None,
             duration_ms: Some(1_250),
             source: ExecSource::Assistant,

diff --git a/crates/tui/src/tui/tool_routing.rs b/crates/tui/src/tui/tool_routing.rs
@@ -101,6 +101,8 @@ pub(super) fn handle_tool_call_started(
                     command,
                     status: ToolStatus::Running,
                     output: None,
+                    live_output: None,
+                    shell_task_id: None,
                     started_at: Some(Instant::now()),
                     duration_ms: None,
                     source,
@@ -133,6 +135,8 @@ pub(super) fn handle_tool_call_started(
                 command,
                 status: ToolStatus::Running,
                 output: None,
+                live_output: None,
+                shell_task_id: None,
                 started_at: Some(Instant::now()),
                 duration_ms: None,
                 source,
@@ -506,6 +510,16 @@ pub(super) fn handle_tool_call_complete(
             HistoryCell::Tool(ToolCell::Exec(exec)) => {
                 exec.status = status;
                 if let Ok(tool_result) = result.as_ref() {
+                    let shell_task_id = tool_result
+                        .metadata
+                        .as_ref()
+                        .and_then(|m| m.get("task_id"))
+                        .and_then(serde_json::Value::as_str)
+                        .filter(|task_id| !task_id.trim().is_empty())
+                        .map(str::to_string);
+                    if shell_task_id.is_some() {
+                        exec.shell_task_id = shell_task_id;
+                    }
                     if let Some(meta_command) = tool_result
                         .metadata
                         .as_ref()
@@ -538,6 +552,12 @@ pub(super) fn handle_tool_call_complete(
                         exec.output = Some(tool_result.content.clone());
                         exec.output_summary =
                             Some(super::history::summarize_tool_output(&tool_result.content));
+                        exec.live_output = None;
+                    } else if status == ToolStatus::Running
+                        && exec.interaction.is_none()
+                        && !tool_result.content.is_empty()
+                    {
+                        exec.live_output = Some(tool_result.content.clone());
                     }
                 } else if let Err(err) = result.as_ref()
                     && exec.interaction.is_none()

diff --git a/crates/tui/src/tui/transcript.rs b/crates/tui/src/tui/transcript.rs
@@ -616,6 +616,8 @@ mod tests {
             command: command.to_string(),
             status: ToolStatus::Running,
             output: None,
+            live_output: None,
+            shell_task_id: None,
             started_at: None,
             duration_ms: None,
             source: ExecSource::Assistant,

diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs
@@ -65,6 +65,7 @@ use crate::settings::Settings;
 use crate::task_manager::{
     NewTaskRequest, SharedTaskManager, TaskManager, TaskManagerConfig, TaskStatus, TaskSummary,
 };
+use crate::tools::shell::{ShellJobSnapshot, ShellStatus};
 use crate::tools::spec::{RuntimeToolServices, ToolResult};
 use crate::tools::subagent::SubAgentStatus;
 use crate::tui::app::HuntVerdict;
@@ -1014,6 +1015,89 @@ async fn refresh_active_task_panel(app: &mut App, task_manager: &SharedTaskManag
     app.task_panel = entries;
 }
 
+fn refresh_shell_exec_live_output(app: &mut App) -> bool {
+    let Some(shell_mgr) = app.runtime_services.shell_manager.as_ref().cloned() else {
+        return false;
+    };
+    let jobs = {
+        let Ok(mut mgr) = shell_mgr.lock() else {
+            return false;
+        };
+        mgr.list_jobs()
+            .into_iter()
+            .map(|job| (job.id.clone(), job))
+            .collect::<std::collections::HashMap<_, _>>()
+    };
+    if jobs.is_empty() {
+        return false;
+    }
+
+    let mut changed = false;
+    for index in 0..app.virtual_cell_count() {
+        let Some((task_id, next_status, next_live, next_duration)) =
+            shell_exec_live_update(app, index, &jobs)
+        else {
+            continue;
+        };
+        let Some(HistoryCell::Tool(ToolCell::Exec(exec))) = app.cell_at_virtual_index_mut(index)
+        else {
+            continue;
+        };
+        if exec.output.is_some() || exec.shell_task_id.as_deref() != Some(task_id.as_str()) {
+            continue;
+        }
+        exec.status = next_status;
+        exec.live_output = next_live;
+        exec.duration_ms = Some(next_duration);
+        changed = true;
+    }
+    changed
+}
+
+fn shell_exec_live_update(
+    app: &App,
+    index: usize,
+    jobs: &std::collections::HashMap<String, ShellJobSnapshot>,
+) -> Option<(String, ToolStatus, Option<String>, u64)> {
+    let HistoryCell::Tool(ToolCell::Exec(exec)) = app.cell_at_virtual_index(index)? else {
+        return None;
+    };
+    if exec.output.is_some() {
+        return None;
+    }
+    let task_id = exec.shell_task_id.as_deref()?;
+    let job = jobs.get(task_id)?;
+    let next_status = shell_job_tool_status(&job.status);
+    let next_live = shell_job_live_output(job).or_else(|| exec.live_output.clone());
+    if exec.status == next_status
+        && exec.live_output == next_live
+        && exec.duration_ms == Some(job.elapsed_ms)
+    {
+        return None;
+    }
+    Some((task_id.to_string(), next_status, next_live, job.elapsed_ms))
+}
+
+fn shell_job_tool_status(status: &ShellStatus) -> ToolStatus {
+    match status {
+        ShellStatus::Running => ToolStatus::Running,
+        ShellStatus::Completed => ToolStatus::Success,
+        ShellStatus::Failed | ShellStatus::Killed | ShellStatus::TimedOut => ToolStatus::Failed,
+    }
+}
+
+fn shell_job_live_output(job: &ShellJobSnapshot) -> Option<String> {
+    match (job.stdout_tail.is_empty(), job.stderr_tail.is_empty()) {
+        (true, true) => None,
+        (false, true) => Some(job.stdout_tail.clone()),
+        (true, false) => Some(format!("STDERR:\n{}", job.stderr_tail)),
+        (false, false) => Some(format!(
+            "{}\n\nSTDERR:\n{}",
+            job.stdout_tail, job.stderr_tail
+        )),
+    }
+}
+
 fn active_reasoning_task_entries(app: &App) -> Vec<TaskPanelEntry> {
     let Some(active) = app.active_cell.as_ref() else {
         return Vec::new();
@@ -1301,6 +1385,9 @@ async fn run_event_loop(
 
         if last_task_refresh.elapsed() >= Duration::from_millis(2500) {
             refresh_active_task_panel(app, &task_manager).await;
+            if refresh_shell_exec_live_output(app) {
+                app.needs_redraw = true;
+            }
             last_task_refresh = Instant::now();
             app.needs_redraw = true;
         }