Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions crates/tui/src/tui/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1182,6 +1182,9 @@ pub struct App {
pub next_history_revision: u64,
pub api_messages: Vec<Message>,
pub is_loading: bool,
/// Ghost-text follow-up suggestion shown in the composer when empty.
/// Generated asynchronously after each completed turn; cleared on new input.
pub prompt_suggestion: Option<String>,
Comment on lines +1185 to +1187

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The comment and PR description state that the suggestion is "cleared on new input" (typing dismisses it). However, the current implementation only hides the ghost text when input_text is non-empty, but does not actually clear prompt_suggestion (set it to None). If a user types a character and then deletes it, the same suggestion will reappear. To match the described behavior, app.prompt_suggestion should be explicitly set to None when new input is inserted (e.g., inside insert_char and insert_str).

/// Degraded connectivity mode; new user inputs are queued for later retry.
pub offline_mode: bool,
/// Whether an `EngineEvent::Error` has already been posted for the
Expand Down Expand Up @@ -1521,6 +1524,8 @@ pub struct App {
/// DeepSeek account balance, refreshed once per turn completion.
/// Shared cell updated by background fetch tasks; read lock in the UI thread.
pub balance_cell: std::sync::Arc<std::sync::Mutex<Option<crate::pricing::BalanceInfo>>>,
/// Shared cell for async prompt suggestion delivery from background task.
pub prompt_suggestion_cell: std::sync::Arc<std::sync::Mutex<Option<String>>>,
/// Tracks whether the initial balance fetch has been attempted for this session.
pub balance_initiated: bool,
/// Timestamp of the last balance fetch, used to debounce rapid requests.
Expand Down Expand Up @@ -1991,6 +1996,7 @@ impl App {
next_history_revision: 1,
api_messages: Vec::new(),
is_loading: false,
prompt_suggestion: None,
offline_mode: false,
turn_error_posted: false,
status_message: None,
Expand Down Expand Up @@ -2145,6 +2151,7 @@ impl App {
turn_last_activity_at: None,
cumulative_turn_duration: std::time::Duration::ZERO,
balance_cell: std::sync::Arc::new(std::sync::Mutex::new(None)),
prompt_suggestion_cell: std::sync::Arc::new(std::sync::Mutex::new(None)),
balance_initiated: false,
last_balance_fetch: None,
runtime_turn_id: None,
Expand Down
1 change: 1 addition & 0 deletions crates/tui/src/tui/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub mod paste;
pub mod paste_burst;
pub mod persistence_actor;
pub mod plan_prompt;
pub mod prompt_suggestion;
pub mod provider_picker;
pub mod scrolling;
pub mod selection;
Expand Down
117 changes: 117 additions & 0 deletions crates/tui/src/tui/prompt_suggestion.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
//! Ghost-text follow-up prompt suggestion.
//!
//! After each completed turn, a lightweight API call generates ONE short
//! follow-up question the user might want to ask next. The suggestion is
//! rendered as dimmed ghost text in the composer when the input is empty.

use reqwest::header::{AUTHORIZATION, CONTENT_TYPE};
use serde_json::Value;
use tracing::debug;

/// Generate a follow-up prompt suggestion based on recent messages.
///
/// Sends the conversation summary to the API with a system prompt that
/// asks for a single short follow-up question. Returns `None` on failure
/// or empty result — callers treat this as best-effort.
pub async fn generate_suggestion(
api_key: &str,
base_url: &str,
model: &str,
recent_messages: &str,
) -> Option<String> {
let client = reqwest::Client::new();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Creating a new reqwest::Client on every function call prevents connection reuse (keep-alive) and forces a new TCP/TLS handshake for every suggestion request. Since this is called after every completed turn, reusing a static client via std::sync::LazyLock is much more efficient.

    static CLIENT: std::sync::LazyLock<reqwest::Client> = std::sync::LazyLock::new(reqwest::Client::new);
    let client = &*CLIENT;

Comment thread
greptile-apps[bot] marked this conversation as resolved.
Outdated
let body = serde_json::json!({
"model": model,
"messages": [
{
"role": "system",
"content": "\
You are a helpful assistant. Based on the recent conversation context, generate \
ONE short follow-up question (under 60 characters) the user might want to ask \
next. Reply with ONLY the question text, nothing else — no quotes, no explanations, \
no prefixes."
},
{
"role": "user",
"content": format!(
"Recent conversation:\n{recent_messages}\n\n\
Generate ONE short follow-up question the user might ask next:"
)
}
],
"max_tokens": 64,
"temperature": 0.3,
"stream": false
});

let url = format!("{}/chat/completions", base_url.trim_end_matches('/'));
debug!(%url, %model, "generating prompt suggestion");
let response = match client
.post(&url)
.header(AUTHORIZATION, format!("Bearer {api_key}"))
.header(CONTENT_TYPE, "application/json")
.timeout(std::time::Duration::from_secs(10))
.json(&body)
.send()
.await
{
Ok(r) => r,
Err(_) => return None,
};

let value: Value = match response.json().await {
Ok(v) => v,
Err(_) => return None,
};

let suggestion = value["choices"][0]["message"]["content"]
.as_str()
.map(|s| s.trim().trim_matches('"').to_string())
.filter(|s| !s.is_empty() && s.len() <= 200)?;
Comment thread
greptile-apps[bot] marked this conversation as resolved.

debug!(text = %suggestion, "prompt suggestion generated");
Some(suggestion)
}

/// Extract the first text line from a single message.
fn message_summary(m: &crate::models::Message) -> Option<String> {
let role = match m.role.as_str() {
"user" => "User",
"assistant" => "Assistant",
_ => return None,
};
let text = m
.content
.iter()
.filter_map(|block| match block {
crate::models::ContentBlock::Text { text, .. } => Some(text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
.join(" ");
let first_line = text.lines().next().unwrap_or("").trim();
if first_line.is_empty() {
return None;
}
let truncated: String = first_line
.chars()
.take(120)
.chain(if first_line.chars().count() > 120 {
Some('…')
} else {
None
})
.collect();
Some(format!("{role}: {truncated}"))
}

/// Build a one-line-per-message summary of recent conversation context.
/// Takes the last N messages, skipping tool-only messages.
pub fn summarize_recent_messages(messages: &[crate::models::Message], limit: usize) -> String {
let start = messages.len().saturating_sub(limit);
messages[start..]
.iter()
.filter_map(message_summary)
.collect::<Vec<_>>()
.join("\n")
}
48 changes: 48 additions & 0 deletions crates/tui/src/tui/ui.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1265,6 +1265,13 @@ async fn run_event_loop(
app.needs_redraw = true;
}

// Poll prompt suggestion cell from background generation task.
if let Ok(mut guard) = app.prompt_suggestion_cell.lock() {
if let Some(suggestion) = guard.take() {
app.prompt_suggestion = Some(suggestion);
}
}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Using lock() on a std::sync::Mutex inside the main UI event loop can block the UI thread and cause frame drops or stuttering if the background thread is holding the lock. Since this is a polling operation on every tick, it is highly recommended to use try_lock() instead to ensure it remains completely non-blocking.

        if let Ok(mut guard) = app.prompt_suggestion_cell.try_lock() {
            if let Some(suggestion) = guard.take() {
                app.prompt_suggestion = Some(suggestion);
            }
        }

Comment thread
greptile-apps[bot] marked this conversation as resolved.

// First, poll for engine events (non-blocking)
let mut received_engine_event = false;
let mut transcript_batch_updated = false;
Expand Down Expand Up @@ -1618,6 +1625,7 @@ async fn run_event_loop(
app.is_loading = true;
app.offline_mode = false;
app.turn_error_posted = false;
app.prompt_suggestion = None;
Comment thread
greptile-apps[bot] marked this conversation as resolved.
app.dispatch_started_at = None;
current_streaming_text.clear();
app.streaming_state.reset();
Expand Down Expand Up @@ -1819,6 +1827,38 @@ async fn run_event_loop(
}
}

// Generate ghost-text follow-up suggestion asynchronously.
if status == crate::core::events::TurnOutcomeStatus::Completed
&& app.api_messages.len() >= 2
{
let suggestion_cell = app.prompt_suggestion_cell.clone();
let api_key = config.deepseek_api_key().unwrap_or_default();
let base_url = config.deepseek_base_url();
let model = config.default_model();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The PR description states that a lightweight API call (e.g., v4-flash) is used to generate the follow-up suggestion. However, config.default_model() typically resolves to the user's primary/pro model (e.g., deepseek-v4-pro), which is significantly more expensive and slower. Consider explicitly resolving or defaulting to a cheaper/faster model (like deepseek-v4-flash) for this background task to avoid unexpected API costs.

Comment thread
greptile-apps[bot] marked this conversation as resolved.
let messages: Vec<crate::models::Message> =
app.api_messages.clone();
if !api_key.is_empty() {
tokio::spawn(async move {
let summary =
crate::tui::prompt_suggestion::summarize_recent_messages(
&messages, 8,
);
if let Some(suggestion) =
crate::tui::prompt_suggestion::generate_suggestion(
&api_key,
&base_url,
&model,
&summary,
)
.await
&& let Ok(mut guard) = suggestion_cell.lock()
{
*guard = Some(suggestion);
}
});
}
}

// Generate post-turn receipt for completed turns.
// Also push a persistent status toast so users always
// see the outcome in the footer (not just the 8-second
Expand Down Expand Up @@ -3591,6 +3631,14 @@ async fn run_event_loop(
if app.is_loading && queue_current_draft_for_next_turn(app) {
continue;
}
if app.input.is_empty()
&& let Some(suggestion) = app.prompt_suggestion.take()
{
app.input = suggestion;
app.cursor_position = app.input.chars().count();
app.needs_redraw = true;
continue;
}
let prior_model = app.model.clone();
let prior_mode = app.mode;
app.cycle_mode();
Expand Down
39 changes: 27 additions & 12 deletions crates/tui/src/tui/widgets/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -659,17 +659,24 @@ impl Renderable for ComposerWidget<'_> {

let mut input_lines = Vec::new();
if input_text.is_empty() {
let placeholder = if self.app.is_history_search_active() {
self.app
.tr(crate::localization::MessageId::HistorySearchPlaceholder)
if let Some(ref suggestion) = self.app.prompt_suggestion {
input_lines.push(Line::from(Span::styled(
suggestion.as_str(),
Style::default().fg(palette::TEXT_HINT),
)));
} else {
self.app
.tr(crate::localization::MessageId::ComposerPlaceholder)
};
input_lines.push(Line::from(Span::styled(
placeholder,
Style::default().fg(palette::TEXT_MUTED).italic(),
)));
let placeholder = if self.app.is_history_search_active() {
self.app
.tr(crate::localization::MessageId::HistorySearchPlaceholder)
} else {
self.app
.tr(crate::localization::MessageId::ComposerPlaceholder)
};
input_lines.push(Line::from(Span::styled(
placeholder,
Style::default().fg(palette::TEXT_MUTED).italic(),
)));
}
} else if let Some((sel_start, sel_end)) = self.app.selection_range() {
let line_ranges: Vec<(usize, usize)> =
wrap_input_lines_for_mouse(&self.app.input, content_width)
Expand Down Expand Up @@ -704,12 +711,16 @@ impl Renderable for ComposerWidget<'_> {
// wrap the single Line at render time, so we must estimate the wrapped
// row count ourselves to keep padding accurate on narrow widths.
let visual_rows = if input_text.is_empty() {
let placeholder = if self.app.is_history_search_active() {
let placeholder = if let Some(ref suggestion) = self.app.prompt_suggestion {
suggestion.as_str()
} else if self.app.is_history_search_active() {
self.app
.tr(crate::localization::MessageId::HistorySearchPlaceholder)
.as_ref()
} else {
self.app
.tr(crate::localization::MessageId::ComposerPlaceholder)
.as_ref()
};
placeholder_visual_lines_for(placeholder, content_width)
} else {
Expand Down Expand Up @@ -1009,12 +1020,16 @@ impl Renderable for ComposerWidget<'_> {
let (visible_lines, cursor_row, cursor_col) =
layout_input(input_text, input_cursor, content_width, input_rows_budget);
let visual_rows = if input_text.is_empty() {
let placeholder = if self.app.is_history_search_active() {
let placeholder = if let Some(ref suggestion) = self.app.prompt_suggestion {
suggestion.as_str()
} else if self.app.is_history_search_active() {
self.app
.tr(crate::localization::MessageId::HistorySearchPlaceholder)
.as_ref()
} else {
self.app
.tr(crate::localization::MessageId::ComposerPlaceholder)
.as_ref()
};
placeholder_visual_lines_for(placeholder, content_width)
} else {
Expand Down
Loading