-
-
Notifications
You must be signed in to change notification settings - Fork 995
added more 3 vars for LLM post-processing transcript #704
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,21 +3,26 @@ use crate::apple_intelligence; | |
| use crate::audio_feedback::{play_feedback_sound, play_feedback_sound_blocking, SoundType}; | ||
| use crate::managers::audio::AudioRecordingManager; | ||
| use crate::managers::history::HistoryManager; | ||
| use crate::managers::model::{EngineType, ModelManager}; | ||
| use crate::managers::transcription::TranscriptionManager; | ||
| use crate::settings::{get_settings, AppSettings, APPLE_INTELLIGENCE_PROVIDER_ID}; | ||
| use crate::shortcut; | ||
| use crate::tray::{change_tray_icon, TrayIconState}; | ||
| use crate::utils::{self, show_recording_overlay, show_transcribing_overlay}; | ||
| use crate::ManagedToggleState; | ||
| use crate::{active_app, transcript_context, ManagedToggleState}; | ||
| use ferrous_opencc::{config::BuiltinConfig, OpenCC}; | ||
| use log::{debug, error}; | ||
| use once_cell::sync::Lazy; | ||
| use std::collections::HashMap; | ||
| use std::sync::Arc; | ||
| use std::sync::{Arc, Mutex}; | ||
| use std::time::Instant; | ||
| use tauri::AppHandle; | ||
| use tauri::Manager; | ||
|
|
||
| /// Tracks the frontmost application captured at recording start, keyed by binding_id | ||
| static RECORDING_APP_CONTEXT: Lazy<Mutex<HashMap<String, String>>> = | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: entries are inserted in |
||
| Lazy::new(|| Mutex::new(HashMap::new())); | ||
|
|
||
| // Shortcut Action Trait | ||
| pub trait ShortcutAction: Send + Sync { | ||
| fn start(&self, app: &AppHandle, binding_id: &str, shortcut_str: &str); | ||
|
|
@@ -27,9 +32,24 @@ pub trait ShortcutAction: Send + Sync { | |
| // Transcribe Action | ||
| struct TranscribeAction; | ||
|
|
||
| /// Context information for LLM post-processing prompts. | ||
| /// These fields are available as variables in the prompt template. | ||
| #[derive(Clone, Debug, Default)] | ||
| pub struct PostProcessContext { | ||
| /// The name of the frontmost application when transcription started | ||
| pub current_app: String, | ||
| /// Short excerpt from previous transcript in the same app (last 200 words, expires after 5 min) | ||
| pub short_prev_transcript: String, | ||
| /// Current local time formatted as "Tuesday, February 3, 2026 10:33:39 AM" | ||
| pub time_local: String, | ||
| /// The selected language for transcription (e.g., "en", "zh-Hans"), or "auto" if not specified | ||
| pub language: String, | ||
| } | ||
|
|
||
| async fn maybe_post_process_transcription( | ||
| settings: &AppSettings, | ||
| transcription: &str, | ||
| context: &PostProcessContext, | ||
| ) -> Option<String> { | ||
| if !settings.post_process_enabled { | ||
| return None; | ||
|
|
@@ -90,9 +110,20 @@ async fn maybe_post_process_transcription( | |
| provider.id, model | ||
| ); | ||
|
|
||
| // Replace ${output} variable in the prompt with the actual text | ||
| let processed_prompt = prompt.replace("${output}", transcription); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the change logs the entire prompt including transcription text AND app name. this is a privacy concern since the log file could contain sensitive speech content recommend reverting |
||
| debug!("Processed prompt length: {} chars", processed_prompt.len()); | ||
| // Replace variables in the prompt with actual values | ||
| // Available variables: | ||
| // ${output} - The transcription text | ||
| // ${current_app} - Name of the frontmost application | ||
| // ${short_prev_transcript} - Recent transcript from same app (last 200 words, 5 min expiry) | ||
| // ${time_local} - Current local time (e.g., "Tuesday, February 3, 2026 10:33:39 AM") | ||
| // ${language} - Selected transcription language (e.g., "en", "zh-Hans") or "auto" | ||
| let processed_prompt = prompt | ||
| .replace("${output}", transcription) | ||
| .replace("${current_app}", &context.current_app) | ||
| .replace("${short_prev_transcript}", &context.short_prev_transcript) | ||
| .replace("${time_local}", &context.time_local) | ||
| .replace("${language}", &context.language); | ||
| debug!("Processed prompt : {}", processed_prompt); | ||
|
|
||
| if provider.id == APPLE_INTELLIGENCE_PROVIDER_ID { | ||
| #[cfg(all(target_os = "macos", target_arch = "aarch64"))] | ||
|
|
@@ -218,6 +249,16 @@ impl ShortcutAction for TranscribeAction { | |
| let start_time = Instant::now(); | ||
| debug!("TranscribeAction::start called for binding: {}", binding_id); | ||
|
|
||
| // Capture the frontmost application name for LLM context | ||
| // This is done early before any UI changes that might affect focus | ||
| let frontmost_app = active_app::get_frontmost_app_name().unwrap_or_default(); | ||
| debug!("Captured frontmost app: '{}'", frontmost_app); | ||
|
|
||
| // Store the captured app name for use when transcription completes | ||
| if let Ok(mut context) = RECORDING_APP_CONTEXT.lock() { | ||
| context.insert(binding_id.to_string(), frontmost_app); | ||
| } | ||
|
|
||
| // Load model in the background | ||
| let tm = app.state::<Arc<TranscriptionManager>>(); | ||
| tm.initiate_model_load(); | ||
|
|
@@ -290,6 +331,17 @@ impl ShortcutAction for TranscribeAction { | |
| let stop_time = Instant::now(); | ||
| debug!("TranscribeAction::stop called for binding: {}", binding_id); | ||
|
|
||
| // Retrieve the captured frontmost app name from recording start | ||
| let current_app = RECORDING_APP_CONTEXT | ||
| .lock() | ||
| .ok() | ||
| .and_then(|mut ctx| ctx.remove(binding_id)) | ||
| .unwrap_or_default(); | ||
| debug!( | ||
| "Retrieved frontmost app for binding '{}': '{}'", | ||
| binding_id, current_app | ||
| ); | ||
|
|
||
| let ah = app.clone(); | ||
| let rm = Arc::clone(&app.state::<Arc<AudioRecordingManager>>()); | ||
| let tm = Arc::clone(&app.state::<Arc<TranscriptionManager>>()); | ||
|
|
@@ -343,10 +395,51 @@ impl ShortcutAction for TranscribeAction { | |
| final_text = converted_text; | ||
| } | ||
|
|
||
| // Build context for LLM post-processing | ||
| // Get previous transcript from same app (last 200 words, 5 min expiry) | ||
| let short_prev_transcript = | ||
| transcript_context::get_short_prev_transcript(¤t_app); | ||
|
|
||
| // Generate formatted local time: "Tuesday, February 3, 2026 10:33:39 AM" | ||
| let time_local = chrono::Local::now() | ||
| .format("%A, %B %-d, %Y %-I:%M:%S %p") | ||
| .to_string(); | ||
|
|
||
| let pp_context = PostProcessContext { | ||
| current_app: current_app.clone(), | ||
| short_prev_transcript, | ||
| time_local, | ||
| language: { | ||
| // Only use selected_language for Whisper models, 'auto' for other models (Parakeet, Moonshine) | ||
| let mm = ah.state::<Arc<ModelManager>>(); | ||
| let is_whisper = mm | ||
| .get_model_info(&settings.selected_model) | ||
| .map(|m| matches!(m.engine_type, EngineType::Whisper)) | ||
| .unwrap_or(false); | ||
|
|
||
| if is_whisper && !settings.selected_language.is_empty() { | ||
| settings.selected_language.clone() | ||
| } else { | ||
| "auto".to_string() | ||
| } | ||
| }, | ||
| }; | ||
| debug!( | ||
| "Post-process context: app='{}', prev_transcript_len={}, time='{}', language='{}'", | ||
| pp_context.current_app, | ||
| pp_context.short_prev_transcript.len(), | ||
| pp_context.time_local, | ||
| pp_context.language | ||
| ); | ||
|
|
||
| // Then apply regular post-processing if enabled | ||
| // Uses final_text which may already have Chinese conversion applied | ||
| if let Some(processed_text) = | ||
| maybe_post_process_transcription(&settings, &final_text).await | ||
| if let Some(processed_text) = maybe_post_process_transcription( | ||
| &settings, | ||
| &final_text, | ||
| &pp_context, | ||
| ) | ||
| .await | ||
| { | ||
| post_processed_text = Some(processed_text.clone()); | ||
| final_text = processed_text; | ||
|
|
@@ -366,6 +459,13 @@ impl ShortcutAction for TranscribeAction { | |
| post_processed_text = Some(final_text.clone()); | ||
| } | ||
|
|
||
| // Update the transcript context for this app | ||
| // Use the original transcription (before post-processing) for context | ||
| transcript_context::update_transcript_context( | ||
| ¤t_app, | ||
| &transcription, | ||
| ); | ||
|
|
||
| // Save to history with post-processed text and prompt | ||
| let hm_clone = Arc::clone(&hm); | ||
| let transcription_for_history = transcription.clone(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| /// Module for getting the frontmost/active application name. | ||
| /// This is platform-specific and returns the name of the application | ||
| /// that has keyboard focus when the user starts transcribing. | ||
|
|
||
| #[cfg(target_os = "macos")] | ||
| pub fn get_frontmost_app_name() -> Option<String> { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm so macOS impl returns app name via |
||
| use objc::{msg_send, sel, sel_impl}; | ||
| use std::ffi::CStr; | ||
|
|
||
| unsafe { | ||
| // Get NSWorkspace shared instance | ||
| let workspace: *mut objc::runtime::Object = | ||
| msg_send![objc::class!(NSWorkspace), sharedWorkspace]; | ||
| if workspace.is_null() { | ||
| return None; | ||
| } | ||
|
|
||
| // Get frontmost application (NSRunningApplication) | ||
| let frontmost_app: *mut objc::runtime::Object = | ||
| msg_send![workspace, frontmostApplication]; | ||
| if frontmost_app.is_null() { | ||
| return None; | ||
| } | ||
|
|
||
| // Get localized name of the application | ||
| let name: *mut objc::runtime::Object = msg_send![frontmost_app, localizedName]; | ||
| if name.is_null() { | ||
| return None; | ||
| } | ||
|
|
||
| // Convert NSString to Rust String | ||
| let utf8_ptr: *const i8 = msg_send![name, UTF8String]; | ||
| if utf8_ptr.is_null() { | ||
| return None; | ||
| } | ||
|
|
||
| let c_str = CStr::from_ptr(utf8_ptr); | ||
| match c_str.to_str() { | ||
| Ok(s) if !s.is_empty() => Some(s.to_string()), | ||
| _ => None, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[cfg(target_os = "windows")] | ||
| pub fn get_frontmost_app_name() -> Option<String> { | ||
| use std::ffi::OsString; | ||
| use std::os::windows::ffi::OsStringExt; | ||
| use windows::Win32::Foundation::HWND; | ||
| use windows::Win32::UI::WindowsAndMessaging::{ | ||
| GetForegroundWindow, GetWindowTextLengthW, GetWindowTextW, | ||
| }; | ||
|
|
||
| unsafe { | ||
| let hwnd: HWND = GetForegroundWindow(); | ||
| if hwnd.0.is_null() { | ||
| return None; | ||
| } | ||
|
|
||
| let length = GetWindowTextLengthW(hwnd); | ||
| if length == 0 { | ||
| return None; | ||
| } | ||
|
|
||
| let mut buffer: Vec<u16> = vec![0; (length + 1) as usize]; | ||
| let chars_copied = GetWindowTextW(hwnd, &mut buffer); | ||
|
|
||
| if chars_copied > 0 { | ||
| buffer.truncate(chars_copied as usize); | ||
| let title = OsString::from_wide(&buffer) | ||
| .to_string_lossy() | ||
| .into_owned(); | ||
| if !title.is_empty() { | ||
| return Some(title); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| None | ||
| } | ||
|
|
||
| #[cfg(target_os = "linux")] | ||
| pub fn get_frontmost_app_name() -> Option<String> { | ||
| use std::process::Command; | ||
|
|
||
| // Try xdotool first (X11) | ||
| if let Ok(output) = Command::new("xdotool") | ||
| .args(["getactivewindow", "getwindowname"]) | ||
| .output() | ||
| { | ||
| if output.status.success() { | ||
| let name = String::from_utf8_lossy(&output.stdout).trim().to_string(); | ||
| if !name.is_empty() { | ||
| return Some(name); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Fallback for Wayland - try to get from environment or use a generic name | ||
| // Most Wayland compositors don't expose window info to external tools | ||
| if std::env::var("WAYLAND_DISPLAY").is_ok() { | ||
| // On Wayland, we can't easily get the active window name | ||
| // Return None and let the caller handle it | ||
| return None; | ||
| } | ||
|
|
||
| None | ||
| } | ||
|
|
||
| #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] | ||
| pub fn get_frontmost_app_name() -> Option<String> { | ||
| None | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
| #[test] | ||
| fn test_get_frontmost_app_returns_something_or_none() { | ||
| // This test just ensures the function doesn't panic | ||
| let _result = get_frontmost_app_name(); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
crate is deprecated in favor of
objc2which is already a transitive dep in the project. maybe we look intoobjc2-app-kitor a simpler approach for the macOS active app detection