Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 126 additions & 18 deletions src-tauri/src/actions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ use crate::managers::transcription::TranscriptionManager;
use crate::settings::{get_settings, AppSettings, APPLE_INTELLIGENCE_PROVIDER_ID};
use crate::shortcut;
use crate::tray::{change_tray_icon, TrayIconState};
use crate::utils::{self, show_recording_overlay, show_transcribing_overlay};
use crate::utils::{
self, show_recording_overlay, show_transcribing_overlay, show_translating_overlay,
};
use crate::ManagedToggleState;
use ferrous_opencc::{config::BuiltinConfig, OpenCC};
use log::{debug, error};
Expand All @@ -25,10 +27,20 @@ pub trait ShortcutAction: Send + Sync {
}

// Transcribe Action
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum TranscribePostAction {
None,
PostProcess,
Translate,
}

struct TranscribeAction {
post_process: bool,
post_action: TranscribePostAction,
}

const OPENROUTER_PROVIDER_ID: &str = "openrouter";
const DEFAULT_OPENROUTER_TRANSLATION_MODEL: &str = "openai/gpt-4o-mini";

async fn post_process_transcription(settings: &AppSettings, transcription: &str) -> Option<String> {
let provider = match settings.active_post_process_provider().cloned() {
Some(provider) => provider,
Expand Down Expand Up @@ -85,8 +97,11 @@ async fn post_process_transcription(settings: &AppSettings, transcription: &str)
provider.id, model
);

// Replace ${output} variable in the prompt with the actual text
let processed_prompt = prompt.replace("${output}", transcription);
// Replace template variables in the prompt
let processed_prompt = prompt.replace("${output}", transcription).replace(
"${translate_target_language}",
settings.translate_target_language.as_str(),
);
debug!("Processed prompt length: {} chars", processed_prompt.len());

if provider.id == APPLE_INTELLIGENCE_PROVIDER_ID {
Expand Down Expand Up @@ -164,6 +179,82 @@ async fn post_process_transcription(settings: &AppSettings, transcription: &str)
}
}

async fn translate_transcription(settings: &AppSettings, transcription: &str) -> Option<String> {
let target_language = settings.translate_target_language.trim();
if target_language.is_empty() || target_language == "auto" {
debug!("Translation skipped because translate_target_language is empty or 'auto'");
return None;
}

let provider = match settings
.post_process_provider(OPENROUTER_PROVIDER_ID)
.cloned()
{
Some(provider) => provider,
None => {
debug!("Translation skipped because OpenRouter provider is missing");
return None;
}
};

let api_key = settings
.post_process_api_keys
.get(OPENROUTER_PROVIDER_ID)
.cloned()
.unwrap_or_default();
if api_key.trim().is_empty() {
debug!("Translation skipped because OpenRouter API key is not configured");
return None;
}

let model = settings
.post_process_models
.get(OPENROUTER_PROVIDER_ID)
.cloned()
.unwrap_or_default();
let model = if model.trim().is_empty() {
DEFAULT_OPENROUTER_TRANSLATION_MODEL.to_string()
} else {
model
};

let prompt = format!(
"Translate the text below into the language identified by this BCP-47 code: {target_language}.\n\nRules:\n- Preserve meaning and formatting (including line breaks)\n- Return only the translated text\n- If the text is already in the target language, return it unchanged\n\nText:\n{transcription}"
);

debug!(
"Starting LLM translation via OpenRouter (model: {}, target: {})",
model, target_language
);

match crate::llm_client::send_chat_completion(&provider, api_key, &model, prompt).await {
Ok(Some(content)) => {
let content = content
.replace('\u{200B}', "")
.replace('\u{200C}', "")
.replace('\u{200D}', "")
.replace('\u{FEFF}', "");
if content.trim().is_empty() {
debug!("Translation returned empty response");
None
} else {
Some(content)
}
}
Ok(None) => {
error!("LLM API response has no content");
None
}
Err(e) => {
error!(
"LLM translation failed for provider '{}': {}. Falling back to original transcription.",
provider.id, e
);
None
}
}
}

async fn maybe_convert_chinese_variant(
settings: &AppSettings,
transcription: &str,
Expand Down Expand Up @@ -300,7 +391,7 @@ impl ShortcutAction for TranscribeAction {
play_feedback_sound(app, SoundType::Stop);

let binding_id = binding_id.to_string(); // Clone binding_id for the async task
let post_process = self.post_process;
let post_action = self.post_action;

tauri::async_runtime::spawn(async move {
let binding_id = binding_id.clone(); // Clone for the inner async task
Expand Down Expand Up @@ -341,23 +432,32 @@ impl ShortcutAction for TranscribeAction {

// Then apply LLM post-processing if this is the post-process hotkey
// Uses final_text which may already have Chinese conversion applied
let processed = if post_process {
post_process_transcription(&settings, &final_text).await
} else {
None
let processed = match post_action {
TranscribePostAction::PostProcess => {
post_process_transcription(&settings, &final_text).await
}
TranscribePostAction::Translate => {
show_translating_overlay(&ah);
translate_transcription(&settings, &final_text).await
}
TranscribePostAction::None => None,
};
if let Some(processed_text) = processed {
post_processed_text = Some(processed_text.clone());
final_text = processed_text;

// Get the prompt that was used
if let Some(prompt_id) = &settings.post_process_selected_prompt_id {
if let Some(prompt) = settings
.post_process_prompts
.iter()
.find(|p| &p.id == prompt_id)
if post_action == TranscribePostAction::PostProcess {
// Get the prompt that was used
if let Some(prompt_id) =
&settings.post_process_selected_prompt_id
{
post_process_prompt = Some(prompt.prompt.clone());
if let Some(prompt) = settings
.post_process_prompts
.iter()
.find(|p| &p.id == prompt_id)
{
post_process_prompt = Some(prompt.prompt.clone());
}
}
}
} else if final_text != transcription {
Expand Down Expand Up @@ -474,12 +574,20 @@ pub static ACTION_MAP: Lazy<HashMap<String, Arc<dyn ShortcutAction>>> = Lazy::ne
map.insert(
"transcribe".to_string(),
Arc::new(TranscribeAction {
post_process: false,
post_action: TranscribePostAction::None,
}) as Arc<dyn ShortcutAction>,
);
map.insert(
"transcribe_with_post_process".to_string(),
Arc::new(TranscribeAction { post_process: true }) as Arc<dyn ShortcutAction>,
Arc::new(TranscribeAction {
post_action: TranscribePostAction::PostProcess,
}) as Arc<dyn ShortcutAction>,
);
map.insert(
"transcribe_with_translation".to_string(),
Arc::new(TranscribeAction {
post_action: TranscribePostAction::Translate,
}) as Arc<dyn ShortcutAction>,
);
map.insert(
"cancel".to_string(),
Expand Down
1 change: 1 addition & 0 deletions src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ pub fn run() {
shortcut::change_autostart_setting,
shortcut::change_translate_to_english_setting,
shortcut::change_selected_language_setting,
shortcut::change_translate_target_language_setting,
shortcut::change_overlay_position_setting,
shortcut::change_debug_mode_setting,
shortcut::change_word_correction_threshold_setting,
Expand Down
22 changes: 22 additions & 0 deletions src-tauri/src/overlay.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,28 @@ pub fn show_transcribing_overlay(app_handle: &AppHandle) {
}
}

/// Shows the translating overlay window
pub fn show_translating_overlay(app_handle: &AppHandle) {
// Check if overlay should be shown based on position setting
let settings = settings::get_settings(app_handle);
if settings.overlay_position == OverlayPosition::None {
return;
}

update_overlay_position(app_handle);

if let Some(overlay_window) = app_handle.get_webview_window("recording_overlay") {
let _ = overlay_window.show();

// On Windows, aggressively re-assert "topmost" in the native Z-order after showing
#[cfg(target_os = "windows")]
force_overlay_topmost(&overlay_window);

// Emit event to switch to translating state
let _ = overlay_window.emit("show-overlay", "translating");
}
}

/// Updates the overlay window position based on current settings
pub fn update_overlay_position(app_handle: &AppHandle) {
if let Some(overlay_window) = app_handle.get_webview_window("recording_overlay") {
Expand Down
29 changes: 29 additions & 0 deletions src-tauri/src/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ pub struct AppSettings {
pub translate_to_english: bool,
#[serde(default = "default_selected_language")]
pub selected_language: String,
#[serde(default = "default_translate_target_language")]
pub translate_target_language: String,
#[serde(default = "default_overlay_position")]
pub overlay_position: OverlayPosition,
#[serde(default = "default_debug_mode")]
Expand Down Expand Up @@ -331,6 +333,10 @@ fn default_translate_to_english() -> bool {
false
}

fn default_translate_target_language() -> String {
"en".to_string()
}

fn default_start_hidden() -> bool {
false
}
Expand Down Expand Up @@ -582,6 +588,28 @@ pub fn get_default_settings() -> AppSettings {
current_binding: default_post_process_shortcut.to_string(),
},
);

#[cfg(target_os = "windows")]
let default_translate_shortcut = "ctrl+alt+space";
#[cfg(target_os = "macos")]
let default_translate_shortcut = "option+cmd+space";
#[cfg(target_os = "linux")]
let default_translate_shortcut = "ctrl+alt+space";
#[cfg(not(any(target_os = "windows", target_os = "macos", target_os = "linux")))]
let default_translate_shortcut = "alt+ctrl+space";

bindings.insert(
"transcribe_with_translation".to_string(),
ShortcutBinding {
id: "transcribe_with_translation".to_string(),
name: "Transcribe and Translate".to_string(),
description:
"Converts your speech into text and translates it to your selected language."
.to_string(),
default_binding: default_translate_shortcut.to_string(),
current_binding: default_translate_shortcut.to_string(),
},
);
bindings.insert(
"cancel".to_string(),
ShortcutBinding {
Expand Down Expand Up @@ -609,6 +637,7 @@ pub fn get_default_settings() -> AppSettings {
selected_output_device: None,
translate_to_english: false,
selected_language: "auto".to_string(),
translate_target_language: default_translate_target_language(),
overlay_position: default_overlay_position(),
debug_mode: false,
log_level: default_log_level(),
Expand Down
12 changes: 12 additions & 0 deletions src-tauri/src/shortcut/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,18 @@ pub fn change_selected_language_setting(app: AppHandle, language: String) -> Res
Ok(())
}

#[tauri::command]
#[specta::specta]
pub fn change_translate_target_language_setting(
app: AppHandle,
language: String,
) -> Result<(), String> {
let mut settings = settings::get_settings(&app);
settings.translate_target_language = language;
settings::write_settings(&app, settings);
Ok(())
}

#[tauri::command]
#[specta::specta]
pub fn change_overlay_position_setting(app: AppHandle, position: String) -> Result<(), String> {
Expand Down
10 changes: 9 additions & 1 deletion src/bindings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,14 @@ async changeSelectedLanguageSetting(language: string) : Promise<Result<null, str
else return { status: "error", error: e as any };
}
},
async changeTranslateTargetLanguageSetting(language: string) : Promise<Result<null, string>> {
try {
return { status: "ok", data: await TAURI_INVOKE("change_translate_target_language_setting", { language }) };
} catch (e) {
if(e instanceof Error) throw e;
else return { status: "error", error: e as any };
}
},
async changeOverlayPositionSetting(position: string) : Promise<Result<null, string>> {
try {
return { status: "ok", data: await TAURI_INVOKE("change_overlay_position_setting", { position }) };
Expand Down Expand Up @@ -703,7 +711,7 @@ async isLaptop() : Promise<Result<boolean, string>> {

/** user-defined types **/

export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; keyboard_implementation?: KeyboardImplementation; paste_delay_ms?: number }
export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; translate_target_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; keyboard_implementation?: KeyboardImplementation; paste_delay_ms?: number }
export type AudioDevice = { index: string; name: string; is_default: boolean }
export type BindingResponse = { success: boolean; binding: ShortcutBinding | null; error: string | null }
export type ClipboardHandling = "dont_modify" | "copy_to_clipboard"
Expand Down
Loading