diff --git a/src-tauri/src/actions.rs b/src-tauri/src/actions.rs index a26f060a3..5dada8182 100644 --- a/src-tauri/src/actions.rs +++ b/src-tauri/src/actions.rs @@ -7,7 +7,9 @@ use crate::managers::transcription::TranscriptionManager; use crate::settings::{get_settings, AppSettings, APPLE_INTELLIGENCE_PROVIDER_ID}; use crate::shortcut; use crate::tray::{change_tray_icon, TrayIconState}; -use crate::utils::{self, show_recording_overlay, show_transcribing_overlay}; +use crate::utils::{ + self, show_recording_overlay, show_transcribing_overlay, show_translating_overlay, +}; use crate::ManagedToggleState; use ferrous_opencc::{config::BuiltinConfig, OpenCC}; use log::{debug, error}; @@ -25,10 +27,20 @@ pub trait ShortcutAction: Send + Sync { } // Transcribe Action +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum TranscribePostAction { + None, + PostProcess, + Translate, +} + struct TranscribeAction { - post_process: bool, + post_action: TranscribePostAction, } +const OPENROUTER_PROVIDER_ID: &str = "openrouter"; +const DEFAULT_OPENROUTER_TRANSLATION_MODEL: &str = "openai/gpt-4o-mini"; + async fn post_process_transcription(settings: &AppSettings, transcription: &str) -> Option { let provider = match settings.active_post_process_provider().cloned() { Some(provider) => provider, @@ -85,8 +97,11 @@ async fn post_process_transcription(settings: &AppSettings, transcription: &str) provider.id, model ); - // Replace ${output} variable in the prompt with the actual text - let processed_prompt = prompt.replace("${output}", transcription); + // Replace template variables in the prompt + let processed_prompt = prompt.replace("${output}", transcription).replace( + "${translate_target_language}", + settings.translate_target_language.as_str(), + ); debug!("Processed prompt length: {} chars", processed_prompt.len()); if provider.id == APPLE_INTELLIGENCE_PROVIDER_ID { @@ -164,6 +179,82 @@ async fn post_process_transcription(settings: &AppSettings, transcription: &str) } } +async fn translate_transcription(settings: &AppSettings, transcription: &str) -> Option { + let target_language = settings.translate_target_language.trim(); + if target_language.is_empty() || target_language == "auto" { + debug!("Translation skipped because translate_target_language is empty or 'auto'"); + return None; + } + + let provider = match settings + .post_process_provider(OPENROUTER_PROVIDER_ID) + .cloned() + { + Some(provider) => provider, + None => { + debug!("Translation skipped because OpenRouter provider is missing"); + return None; + } + }; + + let api_key = settings + .post_process_api_keys + .get(OPENROUTER_PROVIDER_ID) + .cloned() + .unwrap_or_default(); + if api_key.trim().is_empty() { + debug!("Translation skipped because OpenRouter API key is not configured"); + return None; + } + + let model = settings + .post_process_models + .get(OPENROUTER_PROVIDER_ID) + .cloned() + .unwrap_or_default(); + let model = if model.trim().is_empty() { + DEFAULT_OPENROUTER_TRANSLATION_MODEL.to_string() + } else { + model + }; + + let prompt = format!( + "Translate the text below into the language identified by this BCP-47 code: {target_language}.\n\nRules:\n- Preserve meaning and formatting (including line breaks)\n- Return only the translated text\n- If the text is already in the target language, return it unchanged\n\nText:\n{transcription}" + ); + + debug!( + "Starting LLM translation via OpenRouter (model: {}, target: {})", + model, target_language + ); + + match crate::llm_client::send_chat_completion(&provider, api_key, &model, prompt).await { + Ok(Some(content)) => { + let content = content + .replace('\u{200B}', "") + .replace('\u{200C}', "") + .replace('\u{200D}', "") + .replace('\u{FEFF}', ""); + if content.trim().is_empty() { + debug!("Translation returned empty response"); + None + } else { + Some(content) + } + } + Ok(None) => { + error!("LLM API response has no content"); + None + } + Err(e) => { + error!( + "LLM translation failed for provider '{}': {}. Falling back to original transcription.", + provider.id, e + ); + None + } + } +} + async fn maybe_convert_chinese_variant( settings: &AppSettings, transcription: &str, @@ -300,7 +391,7 @@ impl ShortcutAction for TranscribeAction { play_feedback_sound(app, SoundType::Stop); let binding_id = binding_id.to_string(); // Clone binding_id for the async task - let post_process = self.post_process; + let post_action = self.post_action; tauri::async_runtime::spawn(async move { let binding_id = binding_id.clone(); // Clone for the inner async task @@ -341,23 +432,32 @@ impl ShortcutAction for TranscribeAction { // Then apply LLM post-processing if this is the post-process hotkey // Uses final_text which may already have Chinese conversion applied - let processed = if post_process { - post_process_transcription(&settings, &final_text).await - } else { - None + let processed = match post_action { + TranscribePostAction::PostProcess => { + post_process_transcription(&settings, &final_text).await + } + TranscribePostAction::Translate => { + show_translating_overlay(&ah); + translate_transcription(&settings, &final_text).await + } + TranscribePostAction::None => None, }; if let Some(processed_text) = processed { post_processed_text = Some(processed_text.clone()); final_text = processed_text; - // Get the prompt that was used - if let Some(prompt_id) = &settings.post_process_selected_prompt_id { - if let Some(prompt) = settings - .post_process_prompts - .iter() - .find(|p| &p.id == prompt_id) + if post_action == TranscribePostAction::PostProcess { + // Get the prompt that was used + if let Some(prompt_id) = + &settings.post_process_selected_prompt_id { - post_process_prompt = Some(prompt.prompt.clone()); + if let Some(prompt) = settings + .post_process_prompts + .iter() + .find(|p| &p.id == prompt_id) + { + post_process_prompt = Some(prompt.prompt.clone()); + } } } } else if final_text != transcription { @@ -474,12 +574,20 @@ pub static ACTION_MAP: Lazy>> = Lazy::ne map.insert( "transcribe".to_string(), Arc::new(TranscribeAction { - post_process: false, + post_action: TranscribePostAction::None, }) as Arc, ); map.insert( "transcribe_with_post_process".to_string(), - Arc::new(TranscribeAction { post_process: true }) as Arc, + Arc::new(TranscribeAction { + post_action: TranscribePostAction::PostProcess, + }) as Arc, + ); + map.insert( + "transcribe_with_translation".to_string(), + Arc::new(TranscribeAction { + post_action: TranscribePostAction::Translate, + }) as Arc, ); map.insert( "cancel".to_string(), diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index c80a8b98f..20e660c2d 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -248,6 +248,7 @@ pub fn run() { shortcut::change_autostart_setting, shortcut::change_translate_to_english_setting, shortcut::change_selected_language_setting, + shortcut::change_translate_target_language_setting, shortcut::change_overlay_position_setting, shortcut::change_debug_mode_setting, shortcut::change_word_correction_threshold_setting, diff --git a/src-tauri/src/overlay.rs b/src-tauri/src/overlay.rs index 5fa51a112..333bdc657 100644 --- a/src-tauri/src/overlay.rs +++ b/src-tauri/src/overlay.rs @@ -325,6 +325,28 @@ pub fn show_transcribing_overlay(app_handle: &AppHandle) { } } +/// Shows the translating overlay window +pub fn show_translating_overlay(app_handle: &AppHandle) { + // Check if overlay should be shown based on position setting + let settings = settings::get_settings(app_handle); + if settings.overlay_position == OverlayPosition::None { + return; + } + + update_overlay_position(app_handle); + + if let Some(overlay_window) = app_handle.get_webview_window("recording_overlay") { + let _ = overlay_window.show(); + + // On Windows, aggressively re-assert "topmost" in the native Z-order after showing + #[cfg(target_os = "windows")] + force_overlay_topmost(&overlay_window); + + // Emit event to switch to translating state + let _ = overlay_window.emit("show-overlay", "translating"); + } +} + /// Updates the overlay window position based on current settings pub fn update_overlay_position(app_handle: &AppHandle) { if let Some(overlay_window) = app_handle.get_webview_window("recording_overlay") { diff --git a/src-tauri/src/settings.rs b/src-tauri/src/settings.rs index 44402bc16..e5540844e 100644 --- a/src-tauri/src/settings.rs +++ b/src-tauri/src/settings.rs @@ -271,6 +271,8 @@ pub struct AppSettings { pub translate_to_english: bool, #[serde(default = "default_selected_language")] pub selected_language: String, + #[serde(default = "default_translate_target_language")] + pub translate_target_language: String, #[serde(default = "default_overlay_position")] pub overlay_position: OverlayPosition, #[serde(default = "default_debug_mode")] @@ -331,6 +333,10 @@ fn default_translate_to_english() -> bool { false } +fn default_translate_target_language() -> String { + "en".to_string() +} + fn default_start_hidden() -> bool { false } @@ -582,6 +588,28 @@ pub fn get_default_settings() -> AppSettings { current_binding: default_post_process_shortcut.to_string(), }, ); + + #[cfg(target_os = "windows")] + let default_translate_shortcut = "ctrl+alt+space"; + #[cfg(target_os = "macos")] + let default_translate_shortcut = "option+cmd+space"; + #[cfg(target_os = "linux")] + let default_translate_shortcut = "ctrl+alt+space"; + #[cfg(not(any(target_os = "windows", target_os = "macos", target_os = "linux")))] + let default_translate_shortcut = "alt+ctrl+space"; + + bindings.insert( + "transcribe_with_translation".to_string(), + ShortcutBinding { + id: "transcribe_with_translation".to_string(), + name: "Transcribe and Translate".to_string(), + description: + "Converts your speech into text and translates it to your selected language." + .to_string(), + default_binding: default_translate_shortcut.to_string(), + current_binding: default_translate_shortcut.to_string(), + }, + ); bindings.insert( "cancel".to_string(), ShortcutBinding { @@ -609,6 +637,7 @@ pub fn get_default_settings() -> AppSettings { selected_output_device: None, translate_to_english: false, selected_language: "auto".to_string(), + translate_target_language: default_translate_target_language(), overlay_position: default_overlay_position(), debug_mode: false, log_level: default_log_level(), diff --git a/src-tauri/src/shortcut/mod.rs b/src-tauri/src/shortcut/mod.rs index 6ff5a04b7..0205a78c8 100644 --- a/src-tauri/src/shortcut/mod.rs +++ b/src-tauri/src/shortcut/mod.rs @@ -532,6 +532,18 @@ pub fn change_selected_language_setting(app: AppHandle, language: String) -> Res Ok(()) } +#[tauri::command] +#[specta::specta] +pub fn change_translate_target_language_setting( + app: AppHandle, + language: String, +) -> Result<(), String> { + let mut settings = settings::get_settings(&app); + settings.translate_target_language = language; + settings::write_settings(&app, settings); + Ok(()) +} + #[tauri::command] #[specta::specta] pub fn change_overlay_position_setting(app: AppHandle, position: String) -> Result<(), String> { diff --git a/src/bindings.ts b/src/bindings.ts index 325dc7f77..60eced467 100644 --- a/src/bindings.ts +++ b/src/bindings.ts @@ -85,6 +85,14 @@ async changeSelectedLanguageSetting(language: string) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("change_translate_target_language_setting", { language }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, async changeOverlayPositionSetting(position: string) : Promise> { try { return { status: "ok", data: await TAURI_INVOKE("change_overlay_position_setting", { position }) }; @@ -703,7 +711,7 @@ async isLaptop() : Promise> { /** user-defined types **/ -export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; keyboard_implementation?: KeyboardImplementation; paste_delay_ms?: number } +export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; translate_target_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; keyboard_implementation?: KeyboardImplementation; paste_delay_ms?: number } export type AudioDevice = { index: string; name: string; is_default: boolean } export type BindingResponse = { success: boolean; binding: ShortcutBinding | null; error: string | null } export type ClipboardHandling = "dont_modify" | "copy_to_clipboard" diff --git a/src/components/settings/TranslateTargetLanguage.tsx b/src/components/settings/TranslateTargetLanguage.tsx new file mode 100644 index 000000000..4f4bb7029 --- /dev/null +++ b/src/components/settings/TranslateTargetLanguage.tsx @@ -0,0 +1,186 @@ +import React, { useEffect, useMemo, useRef, useState } from "react"; +import { useTranslation } from "react-i18next"; +import { SettingContainer } from "../ui/SettingContainer"; +import { ResetButton } from "../ui/ResetButton"; +import { useSettings } from "../../hooks/useSettings"; +import { LANGUAGES } from "../../lib/constants/languages"; + +interface TranslateTargetLanguageProps { + descriptionMode?: "inline" | "tooltip"; + grouped?: boolean; +} + +export const TranslateTargetLanguage: React.FC< + TranslateTargetLanguageProps +> = ({ descriptionMode = "tooltip", grouped = false }) => { + const { t } = useTranslation(); + const { getSetting, updateSetting, resetSetting, isUpdating } = useSettings(); + const [isOpen, setIsOpen] = useState(false); + const [searchQuery, setSearchQuery] = useState(""); + const dropdownRef = useRef(null); + const searchInputRef = useRef(null); + + const selectedLanguage = getSetting("translate_target_language") || "en"; + + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if ( + dropdownRef.current && + !dropdownRef.current.contains(event.target as Node) + ) { + setIsOpen(false); + setSearchQuery(""); + } + }; + + document.addEventListener("mousedown", handleClickOutside); + return () => { + document.removeEventListener("mousedown", handleClickOutside); + }; + }, []); + + useEffect(() => { + if (isOpen && searchInputRef.current) { + searchInputRef.current.focus(); + } + }, [isOpen]); + + const languageOptions = useMemo( + () => LANGUAGES.filter((lang) => lang.value !== "auto"), + [], + ); + + const filteredLanguages = useMemo( + () => + languageOptions.filter((language) => + language.label.toLowerCase().includes(searchQuery.toLowerCase()), + ), + [languageOptions, searchQuery], + ); + + const selectedLanguageName = + languageOptions.find((lang) => lang.value === selectedLanguage)?.label || + selectedLanguage; + + const handleLanguageSelect = async (languageCode: string) => { + await updateSetting("translate_target_language", languageCode); + setIsOpen(false); + setSearchQuery(""); + }; + + const handleReset = async () => { + await resetSetting("translate_target_language"); + }; + + const handleToggle = () => { + if (isUpdating("translate_target_language")) return; + setIsOpen(!isOpen); + }; + + const handleSearchChange = (event: React.ChangeEvent) => { + setSearchQuery(event.target.value); + }; + + const handleKeyDown = (event: React.KeyboardEvent) => { + if (event.key === "Enter" && filteredLanguages.length > 0) { + handleLanguageSelect(filteredLanguages[0].value); + } else if (event.key === "Escape") { + setIsOpen(false); + setSearchQuery(""); + } + }; + + return ( + +
+
+ + + {isOpen && !isUpdating("translate_target_language") && ( +
+
+ +
+ +
+ {filteredLanguages.length === 0 ? ( +
+ {t("settings.general.language.noResults")} +
+ ) : ( + filteredLanguages.map((language) => ( + + )) + )} +
+
+ )} +
+ +
+ + {isUpdating("translate_target_language") && ( +
+
+
+ )} +
+ ); +}; + +TranslateTargetLanguage.displayName = "TranslateTargetLanguage"; diff --git a/src/components/settings/advanced/AdvancedSettings.tsx b/src/components/settings/advanced/AdvancedSettings.tsx index a6104aa74..5b043bf5b 100644 --- a/src/components/settings/advanced/AdvancedSettings.tsx +++ b/src/components/settings/advanced/AdvancedSettings.tsx @@ -2,6 +2,7 @@ import React from "react"; import { useTranslation } from "react-i18next"; import { ShowOverlay } from "../ShowOverlay"; import { TranslateToEnglish } from "../TranslateToEnglish"; +import { TranslateTargetLanguage } from "../TranslateTargetLanguage"; import { ModelUnloadTimeoutSetting } from "../ModelUnloadTimeout"; import { CustomWords } from "../CustomWords"; import { SettingsGroup } from "../../ui/SettingsGroup"; @@ -46,6 +47,7 @@ export const AdvancedSettings: React.FC = () => { {showTranslateToEnglish && ( )} + diff --git a/src/components/settings/general/GeneralSettings.tsx b/src/components/settings/general/GeneralSettings.tsx index ca0f56ebc..68be24529 100644 --- a/src/components/settings/general/GeneralSettings.tsx +++ b/src/components/settings/general/GeneralSettings.tsx @@ -22,6 +22,10 @@ export const GeneralSettings: React.FC = () => {
+ {showLanguageSelector && ( )} diff --git a/src/i18n/locales/en/translation.json b/src/i18n/locales/en/translation.json index 8ad3740c5..4d77fff8d 100644 --- a/src/i18n/locales/en/translation.json +++ b/src/i18n/locales/en/translation.json @@ -123,6 +123,10 @@ "transcribe_with_post_process": { "name": "Post-Processing Hotkey", "description": "Optional: A dedicated hotkey that always applies AI post-processing to your transcription." + }, + "transcribe_with_translation": { + "name": "Translate Hotkey", + "description": "Optional: A dedicated hotkey that transcribes, then translates into your selected language." } }, "errors": { @@ -221,6 +225,10 @@ "description": "Automatically translate speech from other languages to English during transcription.", "descriptionUnsupported": "Translation is not supported by the {{model}} model." }, + "translateTargetLanguage": { + "title": "Translate To", + "description": "Select the target language used by the Translate Hotkey." + }, "modelUnload": { "title": "Unload Model", "description": "Automatically free GPU/CPU memory when the model hasn't been used for the specified time", @@ -285,7 +293,7 @@ "title": "Prompt", "selectedPrompt": { "title": "Selected Prompt", - "description": "Select a template for refining transcriptions or create a new one. Use ${output} inside the prompt text to reference the captured transcript." + "description": "Select a template for refining transcriptions or create a new one. Use ${output} to reference the transcript, and ${translate_target_language} to reference your Translate To language." }, "noPrompts": "No prompts available", "selectPrompt": "Select a prompt", @@ -293,8 +301,8 @@ "promptLabel": "Prompt Label", "promptLabelPlaceholder": "Enter prompt name", "promptInstructions": "Prompt Instructions", - "promptInstructionsPlaceholder": "Write the instructions to run after transcription. Example: Improve grammar and clarity for the following text: ${output}", - "promptTip": "Tip: Use ${output} to insert the transcribed text in your prompt.", + "promptInstructionsPlaceholder": "Write the instructions to run after transcription. Example: Translate this to ${translate_target_language}: ${output}", + "promptTip": "Tip: Use ${output} to insert the transcript, and ${translate_target_language} to insert your Translate To language.", "updatePrompt": "Update Prompt", "deletePrompt": "Delete Prompt", "createPrompt": "Create Prompt", @@ -466,6 +474,7 @@ "description": "Change the language of the Handy interface" }, "overlay": { - "transcribing": "Transcribing..." + "transcribing": "Transcribing...", + "translating": "Translating..." } } diff --git a/src/overlay/RecordingOverlay.tsx b/src/overlay/RecordingOverlay.tsx index b90cdc208..1069158f7 100644 --- a/src/overlay/RecordingOverlay.tsx +++ b/src/overlay/RecordingOverlay.tsx @@ -11,7 +11,7 @@ import { commands } from "@/bindings"; import i18n, { syncLanguageFromSettings } from "@/i18n"; import { getLanguageDirection } from "@/lib/utils/rtl"; -type OverlayState = "recording" | "transcribing"; +type OverlayState = "recording" | "transcribing" | "translating"; const RecordingOverlay: React.FC = () => { const { t } = useTranslation(); @@ -96,6 +96,9 @@ const RecordingOverlay: React.FC = () => { {state === "transcribing" && (
{t("overlay.transcribing")}
)} + {state === "translating" && ( +
{t("overlay.translating")}
+ )}
diff --git a/src/stores/settingsStore.ts b/src/stores/settingsStore.ts index 620ab7053..4b8433341 100644 --- a/src/stores/settingsStore.ts +++ b/src/stores/settingsStore.ts @@ -103,6 +103,8 @@ const settingUpdaters: { commands.updateRecordingRetentionPeriod(value as string), translate_to_english: (value) => commands.changeTranslateToEnglishSetting(value as boolean), + translate_target_language: (value) => + commands.changeTranslateTargetLanguageSetting(value as string), selected_language: (value) => commands.changeSelectedLanguageSetting(value as string), overlay_position: (value) =>