From 571e1dc1b76f534c8a66b8def216406c330a952b Mon Sep 17 00:00:00 2001 From: CJ Pais Date: Fri, 12 Dec 2025 20:15:49 +0700 Subject: [PATCH 1/4] most basic hacked up whisperfile --- src-tauri/Cargo.lock | 101 +++--------- src-tauri/Cargo.toml | 2 +- src-tauri/src/commands/mod.rs | 1 + src-tauri/src/commands/whisperfile.rs | 18 +++ src-tauri/src/lib.rs | 3 + src-tauri/src/managers/mod.rs | 1 + src-tauri/src/managers/transcription.rs | 140 ++++++++++++++--- src-tauri/src/managers/whisperfile.rs | 86 ++++++++++ src-tauri/src/settings.rs | 16 ++ src-tauri/src/shortcut.rs | 17 ++ src/bindings.ts | 22 ++- .../settings/debug/DebugSettings.tsx | 2 + .../settings/debug/WhisperRuntimeSelector.tsx | 147 ++++++++++++++++++ src/i18n/locales/en/translation.json | 4 + src/i18n/locales/es/translation.json | 4 + src/i18n/locales/fr/translation.json | 4 + src/i18n/locales/vi/translation.json | 4 + src/stores/settingsStore.ts | 1 + 18 files changed, 473 insertions(+), 100 deletions(-) create mode 100644 src-tauri/src/commands/whisperfile.rs create mode 100644 src-tauri/src/managers/whisperfile.rs create mode 100644 src/components/settings/debug/WhisperRuntimeSelector.tsx diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 7c143d272..ef0b383ee 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -256,32 +256,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-openai" -version = "0.29.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c58fd812d4b7152e0f748254c03927f27126a5d83fccf265b2baddaaa1aeea41" -dependencies = [ - "async-openai-macros", - "backoff", - "base64 0.22.1", - "bytes", - "derive_builder", - "eventsource-stream", - "futures", - "rand 0.9.2", - "reqwest", - "reqwest-eventsource", - "secrecy", - "serde", - "serde_json", - "thiserror 2.0.17", - "tokio", - "tokio-stream", - "tokio-util", - "tracing", -] - [[package]] name = "async-openai" version = "0.30.1" @@ -483,25 +457,22 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.69.5" +version = "0.71.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" dependencies = [ "bitflags 2.10.0", "cexpr", "clang-sys", "itertools", - "lazy_static", - "lazycell", "log", "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash 1.1.0", + "rustc-hash", "shlex", "syn 2.0.108", - "which", ] [[package]] @@ -2407,7 +2378,7 @@ name = "handy" version = "0.6.7" dependencies = [ "anyhow", - "async-openai 0.30.1", + "async-openai", "chrono", "cpal", "enigo", @@ -2512,15 +2483,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "home" -version = "0.5.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" -dependencies = [ - "windows-sys 0.61.2", -] - [[package]] name = "hound" version = "3.5.1" @@ -2955,9 +2917,9 @@ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itertools" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] @@ -3074,12 +3036,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "libappindicator" version = "0.9.0" @@ -4582,7 +4538,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.1.1", + "rustc-hash", "rustls", "socket2", "thiserror 2.0.17", @@ -4602,7 +4558,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash 2.1.1", + "rustc-hash", "rustls", "rustls-pki-types", "slab", @@ -5078,12 +5034,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hash" version = "2.1.1" @@ -5145,6 +5095,7 @@ version = "0.23.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" dependencies = [ + "log", "once_cell", "ring", "rustls-pki-types", @@ -6993,12 +6944,8 @@ dependencies = [ [[package]] name = "transcribe-rs" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "856646076d3d9739998ba693ebfff3afe95c0cdb71d4fead32e761b11496094f" +version = "0.1.5" dependencies = [ - "async-openai 0.29.6", - "async-trait", "derive_builder", "env_logger", "hound", @@ -7007,8 +6954,10 @@ dependencies = [ "once_cell", "ort", "regex", + "serde", + "serde_json", "thiserror 2.0.17", - "tokio", + "ureq", "whisper-rs", ] @@ -7164,15 +7113,18 @@ checksum = "99ba1025f18a4a3fc3e9b48c868e9beb4f24f4b4b1a325bada26bd4119f46537" dependencies = [ "base64 0.22.1", "der", + "flate2", "log", "native-tls", "percent-encoding", + "rustls", "rustls-pemfile", "rustls-pki-types", "socks", "ureq-proto", "utf-8", "webpki-root-certs", + "webpki-roots", ] [[package]] @@ -7651,32 +7603,21 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3" -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix 0.38.44", -] - [[package]] name = "whisper-rs" -version = "0.13.2" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b6fc553156b521663bfa8e713e7ad58c7ca262d46de9998cd7f2e4de5ba0d9" +checksum = "71ea5d2401f30f51d08126a2d133fee4c1955136519d7ac6cf6f5ac0a91e6bc8" dependencies = [ + "libc", "whisper-rs-sys", ] [[package]] name = "whisper-rs-sys" -version = "0.11.1" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bab42b2c319e3a1e0280137c59368072348d3277873c7588b6466a127dca58" +checksum = "b5e2a6e06e7ac7b8f53c53a5f50bb0bc823ba69b63ecd887339f807a5598bbd2" dependencies = [ "bindgen", "cfg-if", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 5df7b31bf..d57fe2b7c 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -66,7 +66,7 @@ chrono = "0.4" rusqlite = { version = "0.37", features = ["bundled"] } tar = "0.4.44" flate2 = "1.0" -transcribe-rs = "0.1.4" +transcribe-rs = { path = "../../transcribe-rs", features = ["whisper", "parakeet", "whisperfile"] } ferrous-opencc = "0.2.3" specta = "=2.0.0-rc.22" specta-typescript = "0.0.9" diff --git a/src-tauri/src/commands/mod.rs b/src-tauri/src/commands/mod.rs index 35feee0c9..e92de5b2a 100644 --- a/src-tauri/src/commands/mod.rs +++ b/src-tauri/src/commands/mod.rs @@ -2,6 +2,7 @@ pub mod audio; pub mod history; pub mod models; pub mod transcription; +pub mod whisperfile; use crate::settings::{get_settings, write_settings, AppSettings, LogLevel}; use crate::utils::cancel_current_operation; diff --git a/src-tauri/src/commands/whisperfile.rs b/src-tauri/src/commands/whisperfile.rs new file mode 100644 index 000000000..ccf1f63c4 --- /dev/null +++ b/src-tauri/src/commands/whisperfile.rs @@ -0,0 +1,18 @@ +use crate::managers::whisperfile; +use tauri::AppHandle; + +#[tauri::command] +#[specta::specta] +pub async fn download_whisperfile_binary(app: AppHandle) -> Result { + let path = whisperfile::download_whisperfile(&app) + .await + .map_err(|e| format!("Failed to download whisperfile: {}", e))?; + + Ok(path.to_string_lossy().to_string()) +} + +#[tauri::command] +#[specta::specta] +pub fn is_whisperfile_binary_downloaded(app: AppHandle) -> bool { + whisperfile::is_whisperfile_downloaded(&app) +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index ea9f85c8d..2e06b4d11 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -243,6 +243,7 @@ pub fn run() { shortcut::change_selected_language_setting, shortcut::change_overlay_position_setting, shortcut::change_debug_mode_setting, + shortcut::change_whisper_runtime_setting, shortcut::change_word_correction_threshold_setting, shortcut::change_paste_method_setting, shortcut::change_clipboard_handling_setting, @@ -305,6 +306,8 @@ pub fn run() { commands::history::delete_history_entry, commands::history::update_history_limit, commands::history::update_recording_retention_period, + commands::whisperfile::download_whisperfile_binary, + commands::whisperfile::is_whisperfile_binary_downloaded, helpers::clamshell::is_laptop, ]); diff --git a/src-tauri/src/managers/mod.rs b/src-tauri/src/managers/mod.rs index 1239dc26b..bc023271a 100644 --- a/src-tauri/src/managers/mod.rs +++ b/src-tauri/src/managers/mod.rs @@ -2,3 +2,4 @@ pub mod audio; pub mod history; pub mod model; pub mod transcription; +pub mod whisperfile; diff --git a/src-tauri/src/managers/transcription.rs b/src-tauri/src/managers/transcription.rs index 2418db9db..5b7dc573f 100644 --- a/src-tauri/src/managers/transcription.rs +++ b/src-tauri/src/managers/transcription.rs @@ -1,6 +1,7 @@ use crate::audio_toolkit::apply_custom_words; use crate::managers::model::{EngineType, ModelManager}; -use crate::settings::{get_settings, ModelUnloadTimeout}; +use crate::managers::whisperfile::get_whisperfile_path; +use crate::settings::{get_settings, ModelUnloadTimeout, WhisperRuntime}; use anyhow::Result; use log::{debug, error, info, warn}; use serde::Serialize; @@ -15,6 +16,7 @@ use transcribe_rs::{ ParakeetEngine, ParakeetInferenceParams, ParakeetModelParams, TimestampGranularity, }, whisper::{WhisperEngine, WhisperInferenceParams}, + whisperfile::{WhisperfileEngine, WhisperfileInferenceParams, WhisperfileModelParams}, }, TranscriptionEngine, }; @@ -30,6 +32,7 @@ pub struct ModelStateEvent { enum LoadedEngine { Whisper(WhisperEngine), Parakeet(ParakeetEngine), + Whisperfile(WhisperfileEngine), } #[derive(Clone)] @@ -136,15 +139,24 @@ impl TranscriptionManager { let unload_start = std::time::Instant::now(); debug!("Starting to unload model"); - { + let old_engine = { let mut engine = self.engine.lock().unwrap(); if let Some(ref mut loaded_engine) = *engine { match loaded_engine { LoadedEngine::Whisper(ref mut whisper) => whisper.unload_model(), LoadedEngine::Parakeet(ref mut parakeet) => parakeet.unload_model(), + LoadedEngine::Whisperfile(ref mut whisperfile) => whisperfile.unload_model(), } } - *engine = None; // Drop the engine to free memory + engine.take() // Take the engine out to drop it safely + }; + + // Drop the old engine in a separate thread to avoid Tokio runtime drop panic + // This is necessary because WhisperfileEngine contains a Tokio runtime + if old_engine.is_some() { + thread::spawn(move || { + drop(old_engine); + }); } { let mut current_model = self.current_model_id.lock().unwrap(); @@ -205,25 +217,79 @@ impl TranscriptionManager { } let model_path = self.model_manager.get_model_path(model_id)?; + let settings = get_settings(&self.app_handle); // Create appropriate engine based on model type let loaded_engine = match model_info.engine_type { EngineType::Whisper => { - let mut engine = WhisperEngine::new(); - engine.load_model(&model_path).map_err(|e| { - let error_msg = format!("Failed to load whisper model {}: {}", model_id, e); - let _ = self.app_handle.emit( - "model-state-changed", - ModelStateEvent { - event_type: "loading_failed".to_string(), - model_id: Some(model_id.to_string()), - model_name: Some(model_info.name.clone()), - error: Some(error_msg.clone()), - }, - ); - anyhow::anyhow!(error_msg) - })?; - LoadedEngine::Whisper(engine) + // Check if we should use Whisperfile runtime + if settings.whisper_runtime == WhisperRuntime::Whisperfile { + let binary_path = get_whisperfile_path(&self.app_handle).map_err(|e| { + let error_msg = format!("Failed to get whisperfile path: {}", e); + let _ = self.app_handle.emit( + "model-state-changed", + ModelStateEvent { + event_type: "loading_failed".to_string(), + model_id: Some(model_id.to_string()), + model_name: Some(model_info.name.clone()), + error: Some(error_msg.clone()), + }, + ); + anyhow::anyhow!(error_msg) + })?; + + if !binary_path.exists() { + let error_msg = + "Whisperfile binary not found. Please download it in Settings > Debug." + .to_string(); + let _ = self.app_handle.emit( + "model-state-changed", + ModelStateEvent { + event_type: "loading_failed".to_string(), + model_id: Some(model_id.to_string()), + model_name: Some(model_info.name.clone()), + error: Some(error_msg.clone()), + }, + ); + return Err(anyhow::anyhow!(error_msg)); + } + + info!("Using Whisperfile runtime with binary at {:?}", binary_path); + let mut engine = WhisperfileEngine::new(binary_path); + let params = WhisperfileModelParams::default(); + engine.load_model_with_params(&model_path, params).map_err(|e| { + let error_msg = + format!("Failed to load whisperfile model {}: {}", model_id, e); + let _ = self.app_handle.emit( + "model-state-changed", + ModelStateEvent { + event_type: "loading_failed".to_string(), + model_id: Some(model_id.to_string()), + model_name: Some(model_info.name.clone()), + error: Some(error_msg.clone()), + }, + ); + anyhow::anyhow!(error_msg) + })?; + LoadedEngine::Whisperfile(engine) + } else { + // Standard Whisper runtime + let mut engine = WhisperEngine::new(); + engine.load_model(&model_path).map_err(|e| { + let error_msg = format!("Failed to load whisper model {}: {}", model_id, e); + let _ = self.app_handle.emit( + "model-state-changed", + ModelStateEvent { + event_type: "loading_failed".to_string(), + model_id: Some(model_id.to_string()), + model_name: Some(model_info.name.clone()), + error: Some(error_msg.clone()), + }, + ); + anyhow::anyhow!(error_msg) + })?; + LoadedEngine::Whisper(engine) + } } EngineType::Parakeet => { let mut engine = ParakeetEngine::new(); @@ -248,9 +314,21 @@ impl TranscriptionManager { }; // Update the current engine and model ID + // First, take the old engine out to drop it safely in a separate thread + // This prevents "Cannot drop a runtime in a context where blocking is not allowed" panic { let mut engine = self.engine.lock().unwrap(); + let old_engine = engine.take(); *engine = Some(loaded_engine); + + // Drop the old engine in a separate thread to avoid Tokio runtime drop panic + // WhisperfileEngine contains a Tokio runtime, and dropping it from within + // an async context (when called from set_active_model) causes a panic + if old_engine.is_some() { + thread::spawn(move || { + drop(old_engine); + }); + } } { let mut current_model = self.current_model_id.lock().unwrap(); @@ -384,6 +462,32 @@ impl TranscriptionManager { .transcribe_samples(audio, Some(params)) .map_err(|e| anyhow::anyhow!("Parakeet transcription failed: {}", e))? } + LoadedEngine::Whisperfile(whisperfile_engine) => { + // Normalize language code (same as Whisper) + let whisperfile_language = if settings.selected_language == "auto" { + None + } else { + let normalized = if settings.selected_language == "zh-Hans" + || settings.selected_language == "zh-Hant" + { + "zh".to_string() + } else { + settings.selected_language.clone() + }; + Some(normalized) + }; + + let params = WhisperfileInferenceParams { + language: whisperfile_language, + translate: settings.translate_to_english, + temperature: None, + response_format: Some("verbose_json".to_string()), + }; + + whisperfile_engine + .transcribe_samples(audio, Some(params)) + .map_err(|e| anyhow::anyhow!("Whisperfile transcription failed: {}", e))? + } } }; diff --git a/src-tauri/src/managers/whisperfile.rs b/src-tauri/src/managers/whisperfile.rs new file mode 100644 index 000000000..f9e6ebdfb --- /dev/null +++ b/src-tauri/src/managers/whisperfile.rs @@ -0,0 +1,86 @@ +use anyhow::Result; +use futures_util::StreamExt; +use log::info; +use serde::Serialize; +use std::fs::{self, File}; +use std::io::Write; +use std::path::PathBuf; +use tauri::{AppHandle, Emitter, Manager}; + +const WHISPERFILE_URL: &str = + "https://github.com/mozilla-ai/llamafile/releases/download/0.9.3/whisperfile-0.9.3"; +const WHISPERFILE_FILENAME: &str = "whisperfile-0.9.3"; + +#[derive(Debug, Clone, Serialize)] +pub struct WhisperfileDownloadProgress { + pub downloaded: u64, + pub total: u64, + pub percentage: f64, +} + +pub fn get_whisperfile_path(app_handle: &AppHandle) -> Result { + Ok(app_handle + .path() + .app_data_dir() + .map_err(|e| anyhow::anyhow!("Failed to get app data dir: {}", e))? + .join("binaries") + .join(WHISPERFILE_FILENAME)) +} + +pub fn is_whisperfile_downloaded(app_handle: &AppHandle) -> bool { + get_whisperfile_path(app_handle) + .map(|p| p.exists()) + .unwrap_or(false) +} + +pub async fn download_whisperfile(app_handle: &AppHandle) -> Result { + let path = get_whisperfile_path(app_handle)?; + + if path.exists() { + info!("Whisperfile already downloaded at {:?}", path); + return Ok(path); + } + + info!("Downloading whisperfile from {}", WHISPERFILE_URL); + + // Create parent directory + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + + let response = reqwest::get(WHISPERFILE_URL).await?; + let total = response.content_length().unwrap_or(0); + let mut file = File::create(&path)?; + let mut downloaded: u64 = 0; + let mut stream = response.bytes_stream(); + + while let Some(chunk) = stream.next().await { + let chunk = chunk?; + file.write_all(&chunk)?; + downloaded += chunk.len() as u64; + + let progress = WhisperfileDownloadProgress { + downloaded, + total, + percentage: if total > 0 { + (downloaded as f64 / total as f64) * 100.0 + } else { + 0.0 + }, + }; + + let _ = app_handle.emit("whisperfile-download-progress", progress); + } + + // Make executable on Unix + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(&path)?.permissions(); + perms.set_mode(0o755); + fs::set_permissions(&path, perms)?; + } + + info!("Whisperfile downloaded successfully to {:?}", path); + Ok(path) +} diff --git a/src-tauri/src/settings.rs b/src-tauri/src/settings.rs index c2b98bd3e..7ccc9001a 100644 --- a/src-tauri/src/settings.rs +++ b/src-tauri/src/settings.rs @@ -151,6 +151,19 @@ pub enum RecordingRetentionPeriod { Months3, } +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Type)] +#[serde(rename_all = "snake_case")] +pub enum WhisperRuntime { + Whisper, + Whisperfile, +} + +impl Default for WhisperRuntime { + fn default() -> Self { + WhisperRuntime::Whisper + } +} + impl Default for ModelUnloadTimeout { fn default() -> Self { ModelUnloadTimeout::Never @@ -260,6 +273,8 @@ pub struct AppSettings { #[serde(default = "default_log_level")] pub log_level: LogLevel, #[serde(default)] + pub whisper_runtime: WhisperRuntime, + #[serde(default)] pub custom_words: Vec, #[serde(default)] pub model_unload_timeout: ModelUnloadTimeout, @@ -538,6 +553,7 @@ pub fn get_default_settings() -> AppSettings { overlay_position: default_overlay_position(), debug_mode: false, log_level: default_log_level(), + whisper_runtime: WhisperRuntime::default(), custom_words: Vec::new(), model_unload_timeout: ModelUnloadTimeout::Never, word_correction_threshold: default_word_correction_threshold(), diff --git a/src-tauri/src/shortcut.rs b/src-tauri/src/shortcut.rs index 288815c3a..8581f6dea 100644 --- a/src-tauri/src/shortcut.rs +++ b/src-tauri/src/shortcut.rs @@ -257,6 +257,23 @@ pub fn change_start_hidden_setting(app: AppHandle, enabled: bool) -> Result<(), Ok(()) } +#[tauri::command] +#[specta::specta] +pub fn change_whisper_runtime_setting(app: AppHandle, runtime: String) -> Result<(), String> { + let mut settings = settings::get_settings(&app); + let parsed = match runtime.as_str() { + "whisper" => settings::WhisperRuntime::Whisper, + "whisperfile" => settings::WhisperRuntime::Whisperfile, + other => { + warn!("Invalid whisper runtime '{}', defaulting to whisper", other); + settings::WhisperRuntime::Whisper + } + }; + settings.whisper_runtime = parsed; + settings::write_settings(&app, settings); + Ok(()) +} + #[tauri::command] #[specta::specta] pub fn change_autostart_setting(app: AppHandle, enabled: bool) -> Result<(), String> { diff --git a/src/bindings.ts b/src/bindings.ts index 702dd2ddb..4c55c3e5f 100644 --- a/src/bindings.ts +++ b/src/bindings.ts @@ -101,6 +101,14 @@ async changeDebugModeSetting(enabled: boolean) : Promise> { else return { status: "error", error: e as any }; } }, +async changeWhisperRuntimeSetting(runtime: string) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("change_whisper_runtime_setting", { runtime }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, async changeWordCorrectionThresholdSetting(threshold: number) : Promise> { try { return { status: "ok", data: await TAURI_INVOKE("change_word_correction_threshold_setting", { threshold }) }; @@ -584,6 +592,17 @@ async updateRecordingRetentionPeriod(period: string) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("download_whisperfile_binary") }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, +async isWhisperfileBinaryDownloaded() : Promise { + return await TAURI_INVOKE("is_whisperfile_binary_downloaded"); +}, /** * Checks if the Mac is a laptop by detecting battery presence * @@ -610,7 +629,7 @@ async isLaptop() : Promise> { /** user-defined types **/ -export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: string; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean } +export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; whisper_runtime?: WhisperRuntime; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: string; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean } export type AudioDevice = { index: string; name: string; is_default: boolean } export type BindingResponse = { success: boolean; binding: ShortcutBinding | null; error: string | null } export type ClipboardHandling = "dont_modify" | "copy_to_clipboard" @@ -628,6 +647,7 @@ export type PostProcessProvider = { id: string; label: string; base_url: string; export type RecordingRetentionPeriod = "never" | "preserve_limit" | "days_3" | "weeks_2" | "months_3" export type ShortcutBinding = { id: string; name: string; description: string; default_binding: string; current_binding: string } export type SoundTheme = "marimba" | "pop" | "custom" +export type WhisperRuntime = "whisper" | "whisperfile" /** tauri-specta globals **/ diff --git a/src/components/settings/debug/DebugSettings.tsx b/src/components/settings/debug/DebugSettings.tsx index 4914010e0..03d4610b2 100644 --- a/src/components/settings/debug/DebugSettings.tsx +++ b/src/components/settings/debug/DebugSettings.tsx @@ -4,6 +4,7 @@ import { type } from "@tauri-apps/plugin-os"; import { WordCorrectionThreshold } from "./WordCorrectionThreshold"; import { LogDirectory } from "./LogDirectory"; import { LogLevelSelector } from "./LogLevelSelector"; +import { WhisperRuntimeSelector } from "./WhisperRuntimeSelector"; import { SettingsGroup } from "../../ui/SettingsGroup"; import { HistoryLimit } from "../HistoryLimit"; import { AlwaysOnMicrophone } from "../AlwaysOnMicrophone"; @@ -28,6 +29,7 @@ export const DebugSettings: React.FC = () => { + = ({ + descriptionMode = "tooltip", + grouped = false, +}) => { + const { t } = useTranslation(); + const { settings, updateSetting, isUpdating } = useSettings(); + const currentRuntime = settings?.whisper_runtime ?? "whisper"; + + const [isDownloaded, setIsDownloaded] = useState(null); + const [isDownloading, setIsDownloading] = useState(false); + const [downloadProgress, setDownloadProgress] = useState(0); + + // Check if binary is downloaded on mount and when runtime changes + useEffect(() => { + const checkDownloaded = async () => { + try { + const downloaded = await commands.isWhisperfileBinaryDownloaded(); + setIsDownloaded(downloaded); + } catch (error) { + console.error("Failed to check whisperfile status:", error); + setIsDownloaded(false); + } + }; + checkDownloaded(); + }, [currentRuntime]); + + // Listen for download progress events + useEffect(() => { + const unlisten = listen( + "whisperfile-download-progress", + (event) => { + setDownloadProgress(Math.round(event.payload.percentage)); + } + ); + + return () => { + unlisten.then((fn) => fn()); + }; + }, []); + + const handleSelect = async (value: string) => { + if (value === currentRuntime) return; + + try { + await updateSetting("whisper_runtime", value as WhisperRuntime); + } catch (error) { + console.error("Failed to update whisper runtime:", error); + } + }; + + const handleDownload = async () => { + setIsDownloading(true); + setDownloadProgress(0); + try { + const result = await commands.downloadWhisperfileBinary(); + if (result.status === "ok") { + setIsDownloaded(true); + } else { + console.error("Failed to download whisperfile:", result.error); + } + } catch (error) { + console.error("Failed to download whisperfile:", error); + } finally { + setIsDownloading(false); + } + }; + + const showDownloadSection = + currentRuntime === "whisperfile" && isDownloaded === false; + + return ( +
+ + + + + {showDownloadSection && ( +
+

+ Whisperfile binary not found. Download required (~150 MB). +

+ {isDownloading ? ( +
+
+
+
+

+ Downloading... {downloadProgress}% +

+
+ ) : ( + + )} +
+ )} + + {currentRuntime === "whisperfile" && isDownloaded === true && ( +
+

+ Whisperfile binary ready. Reload model to use. +

+
+ )} +
+ ); +}; diff --git a/src/i18n/locales/en/translation.json b/src/i18n/locales/en/translation.json index 6e5f92ca8..4de07ca0b 100644 --- a/src/i18n/locales/en/translation.json +++ b/src/i18n/locales/en/translation.json @@ -275,6 +275,10 @@ "title": "Log Level", "description": "Set the verbosity of logging" }, + "whisperRuntime": { + "title": "Whisper Runtime", + "description": "Choose between standard Whisper or experimental Whisperfile. Model must be reloaded for changes to take effect." + }, "updateChecks": { "label": "Check for Updates", "description": "Automatically check for new versions of Handy" diff --git a/src/i18n/locales/es/translation.json b/src/i18n/locales/es/translation.json index 4c72d3c66..3a020a402 100644 --- a/src/i18n/locales/es/translation.json +++ b/src/i18n/locales/es/translation.json @@ -275,6 +275,10 @@ "title": "Nivel de Registro", "description": "Establece el nivel de detalle del registro" }, + "whisperRuntime": { + "title": "Motor de Whisper", + "description": "Elige entre Whisper estándar o Whisperfile experimental. El modelo debe recargarse para que los cambios surtan efecto." + }, "updateChecks": { "label": "Buscar Actualizaciones", "description": "Buscar automáticamente nuevas versiones de Handy" diff --git a/src/i18n/locales/fr/translation.json b/src/i18n/locales/fr/translation.json index 1b373df7c..06352a897 100644 --- a/src/i18n/locales/fr/translation.json +++ b/src/i18n/locales/fr/translation.json @@ -276,6 +276,10 @@ "title": "Niveau de journalisation", "description": "Définir le niveau de détail de la journalisation" }, + "whisperRuntime": { + "title": "Moteur Whisper", + "description": "Choisir entre Whisper standard ou Whisperfile expérimental. Le modèle doit être rechargé pour que les changements prennent effet." + }, "updateChecks": { "label": "Vérifier les mises à jour", "description": "Vérifier automatiquement les nouvelles versions de Handy" diff --git a/src/i18n/locales/vi/translation.json b/src/i18n/locales/vi/translation.json index 4ab2946cc..598c32d89 100644 --- a/src/i18n/locales/vi/translation.json +++ b/src/i18n/locales/vi/translation.json @@ -276,6 +276,10 @@ "title": "Mức nhật ký", "description": "Đặt mức độ chi tiết của nhật ký" }, + "whisperRuntime": { + "title": "Công cụ Whisper", + "description": "Chọn giữa Whisper tiêu chuẩn hoặc Whisperfile thử nghiệm. Mô hình cần được tải lại để thay đổi có hiệu lực." + }, "updateChecks": { "label": "Kiểm tra cập nhật", "description": "Tự động kiểm tra phiên bản mới của Handy" diff --git a/src/stores/settingsStore.ts b/src/stores/settingsStore.ts index 67d3c4fc8..a1492ff14 100644 --- a/src/stores/settingsStore.ts +++ b/src/stores/settingsStore.ts @@ -108,6 +108,7 @@ const settingUpdaters: { overlay_position: (value) => commands.changeOverlayPositionSetting(value as string), debug_mode: (value) => commands.changeDebugModeSetting(value as boolean), + whisper_runtime: (value) => commands.changeWhisperRuntimeSetting(value as string), custom_words: (value) => commands.updateCustomWords(value as string[]), word_correction_threshold: (value) => commands.changeWordCorrectionThresholdSetting(value as number), From 8dde84111a875859f81b978c4dbeff4203e7ec34 Mon Sep 17 00:00:00 2001 From: CJ Pais Date: Sat, 13 Dec 2025 13:09:07 +0700 Subject: [PATCH 2/4] use 0.2.0 transcribe-rs --- src-tauri/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index d57fe2b7c..ee7bb7894 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -66,7 +66,7 @@ chrono = "0.4" rusqlite = { version = "0.37", features = ["bundled"] } tar = "0.4.44" flate2 = "1.0" -transcribe-rs = { path = "../../transcribe-rs", features = ["whisper", "parakeet", "whisperfile"] } +transcribe-rs = { version="0.2.0", features = ["whisper", "parakeet", "whisperfile"] } ferrous-opencc = "0.2.3" specta = "=2.0.0-rc.22" specta-typescript = "0.0.9" From 92116457a314a77ac76940d968013d7000e4295b Mon Sep 17 00:00:00 2001 From: CJ Pais Date: Sat, 13 Dec 2025 13:09:20 +0700 Subject: [PATCH 3/4] Update Cargo.lock --- src-tauri/Cargo.lock | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index ef0b383ee..d8243daad 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -6944,7 +6944,9 @@ dependencies = [ [[package]] name = "transcribe-rs" -version = "0.1.5" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06b34ced239a20ddef245dbe84c9976f871521c5fa479576f5580e2d24814f12" dependencies = [ "derive_builder", "env_logger", From a31b630fc0642c99cb582a1dec84b0eb7d04f957 Mon Sep 17 00:00:00 2001 From: CJ Pais Date: Mon, 15 Dec 2025 17:50:24 +0700 Subject: [PATCH 4/4] move to transcribe-rs 0.2.1 --- src-tauri/Cargo.lock | 63 ++++++++++++++++++++++++++++++++++---------- src-tauri/Cargo.toml | 2 +- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index d8243daad..c64e66b42 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -457,22 +457,25 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.71.1" +version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ "bitflags 2.10.0", "cexpr", "clang-sys", "itertools", + "lazy_static", + "lazycell", "log", "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", "syn 2.0.108", + "which", ] [[package]] @@ -2483,6 +2486,15 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "hound" version = "3.5.1" @@ -2917,9 +2929,9 @@ checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itertools" -version = "0.13.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] @@ -3036,6 +3048,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "libappindicator" version = "0.9.0" @@ -4538,7 +4556,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "socket2", "thiserror 2.0.17", @@ -4558,7 +4576,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "rustls-pki-types", "slab", @@ -5034,6 +5052,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" @@ -6944,9 +6968,9 @@ dependencies = [ [[package]] name = "transcribe-rs" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06b34ced239a20ddef245dbe84c9976f871521c5fa479576f5580e2d24814f12" +checksum = "9d8fb085c53485dfaa3f0e377c2c8f5e4affb991ddf7cb04fe32e49fc3e81bc4" dependencies = [ "derive_builder", "env_logger", @@ -7605,21 +7629,32 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3" +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.44", +] + [[package]] name = "whisper-rs" -version = "0.15.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71ea5d2401f30f51d08126a2d133fee4c1955136519d7ac6cf6f5ac0a91e6bc8" +checksum = "40b6fc553156b521663bfa8e713e7ad58c7ca262d46de9998cd7f2e4de5ba0d9" dependencies = [ - "libc", "whisper-rs-sys", ] [[package]] name = "whisper-rs-sys" -version = "0.14.1" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e2a6e06e7ac7b8f53c53a5f50bb0bc823ba69b63ecd887339f807a5598bbd2" +checksum = "76bab42b2c319e3a1e0280137c59368072348d3277873c7588b6466a127dca58" dependencies = [ "bindgen", "cfg-if", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index ee7bb7894..7c7a0ee25 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -66,7 +66,7 @@ chrono = "0.4" rusqlite = { version = "0.37", features = ["bundled"] } tar = "0.4.44" flate2 = "1.0" -transcribe-rs = { version="0.2.0", features = ["whisper", "parakeet", "whisperfile"] } +transcribe-rs = { version="0.2.1", features = ["whisper", "parakeet", "whisperfile"] } ferrous-opencc = "0.2.3" specta = "=2.0.0-rc.22" specta-typescript = "0.0.9"