csheaff · csheaff · Feb 15, 2026 · Feb 15, 2026 · Feb 15, 2026 · Feb 15, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -30,7 +30,7 @@ jobs:
         run: sudo apt-get update && sudo apt-get install -y bats
 
       - name: Install test dependencies
-        run: sudo apt-get install -y ydotool pipewire libnotify-bin socat
+        run: sudo apt-get install -y wtype ydotool pipewire libnotify-bin socat
 
       - name: Run tests
         run: bats test/talktype.bats test/server.bats
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,71 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## What is talktype
+
+Push-to-talk speech-to-text for Linux. Press a hotkey to record, press again to transcribe and type at cursor. No GUI — just a keyboard shortcut bound to the `talktype` script. Works on Wayland (GNOME, Sway, Hyprland) and X11.
+
+## Build and install
+
+```bash
+make install      # Full setup: system deps + Python venv + symlink to ~/.local/bin/talktype
+make deps         # System packages only (requires sudo): ydotool, ffmpeg, pipewire, etc.
+make venv         # Python venv with faster-whisper only
+make parakeet     # Install Parakeet backend venv (in backends/.parakeet-venv/)
+make moonshine    # Install Moonshine backend venv (in backends/.moonshine-venv/)
+make model        # Pre-download Whisper model
+make clean        # Remove .venv
+make uninstall    # Remove ~/.local/bin/talktype symlink
+```
+
+## Testing
+
+Tests use [BATS](https://github.com/bats-core/bats-core) (Bash Automated Testing System):
+
+```bash
+make test                    # Run all tests
+bats test/talktype.bats      # Core tests (recording lifecycle, transcription, error handling)
+bats test/server.bats        # Server mode tests (daemon lifecycle, socket communication)
+bats test/backends.bats      # Integration tests against real backends + NASA audio fixture
+```
+
+Tests use mocks in `test/mocks/` to avoid requiring actual GPU, models, or system tools. The mock daemon (`test/mock-daemon.py`) simulates server backends.
+
+## Linting
+
+CI runs ShellCheck on all Bash scripts and Python syntax checks on all Python files:
+
+```bash
+shellcheck talktype transcribe-server backends/*-server
+python3 -m py_compile transcribe whisper-daemon.py backends/*-daemon.py
+```
+
+## Architecture
+
+**Core flow:** hotkey → `talktype` (Bash) → record audio (ffmpeg/pw-record) → call `$TALKTYPE_CMD` with WAV path → type result via ydotool.
+
+**Main script** (`talktype`, ~116 lines Bash): manages recording state via PID file (`$TALKTYPE_DIR/rec.pid`), sends desktop notifications, delegates transcription to `$TALKTYPE_CMD`.
+
+**Backend pattern — two modes per backend:**
+- **Direct invocation** (`transcribe`, `backends/parakeet`, `backends/moonshine`): Python scripts that load model, transcribe, exit. Simple but slow (model reload each time).
+- **Server mode** (`transcribe-server`, `backends/*-server` + `*-daemon.py`): Bash wrapper manages a Python Unix socket daemon that keeps the model in memory. Subcommands: `start`, `stop`, `transcribe`. Auto-starts daemon if not running.
+
+**Adding a custom backend:** Any executable that takes a WAV file path as its last argument and prints text to stdout. Set `TALKTYPE_CMD` in config.
+
+## Configuration
+
+Config file: `~/.config/talktype/config` (sourced as shell script by `talktype`). Key variables:
+
+- `TALKTYPE_CMD` — transcription command (default: direct faster-whisper via `transcribe`)
+- `TALKTYPE_VENV` — Python venv path (default: `.venv` in script dir)
+- `TALKTYPE_DIR` — runtime dir for PID/audio files (default: `$XDG_RUNTIME_DIR/talktype`)
+- `TALKTYPE_TYPE_CMD` — typing tool (`auto`, `wtype`, `ydotool`, `xdotool`, or custom command; default: `auto`)
+- `WHISPER_MODEL`, `WHISPER_LANG`, `WHISPER_DEVICE`, `WHISPER_COMPUTE` — Whisper settings
+
+## Key conventions
+
+- Core is intentionally pure Bash. Python is only used for ML model invocation.
+- Follows Unix philosophy: small scripts, stdin/stdout interfaces, pluggable components.
+- Server daemons communicate via Unix sockets using `socat`.
+- State files (PID, audio, notification ID) live in `$TALKTYPE_DIR` (XDG runtime dir).
diff --git a/Makefile b/Makefile
@@ -12,7 +12,7 @@ install: deps venv
 
 # Install system dependencies (requires sudo)
 deps:
-	sudo apt install -y ydotool ffmpeg pipewire libnotify-bin python3-venv socat
+	sudo apt install -y wtype ydotool ffmpeg pipewire libnotify-bin python3-venv socat
 
 # Create Python venv with faster-whisper (default backend)
 venv: .venv/.done

diff --git a/README.md b/README.md
@@ -20,8 +20,10 @@ Or bring your own — anything that reads a WAV and prints text works.
 
 - Linux (Wayland or X11)
 - Audio recorder: [ffmpeg](https://ffmpeg.org/) (preferred) or PipeWire (`pw-record`)
-- [ydotool](https://github.com/ReimuNotMoe/ydotool) for typing text
-  (user must be in the `input` group — see Install)
+- Typing tool (one of):
+  - [wtype](https://github.com/atx/wtype) — recommended for Wayland, no daemon needed
+  - [ydotool](https://github.com/ReimuNotMoe/ydotool) + `ydotoold` — Wayland & X11
+  - [xdotool](https://github.com/jordansissel/xdotool) — X11 only
 - [socat](https://linux.die.net/man/1/socat) (for server-backed transcription)
 
 For the default backend (faster-whisper):
@@ -36,12 +38,14 @@ make install
 ```
 
 This will:
-1. Install system packages (`ydotool`, etc.)
+1. Install system packages (`wtype`, `ydotool`, etc.)
 2. Create a Python venv with `faster-whisper`
 3. Symlink `talktype` into `~/.local/bin/`
 
 ### ydotool permissions
 
+> **Note:** Only needed if you use ydotool. If you use wtype (Wayland) or xdotool (X11), skip this.
+
 `ydotool` needs access to `/dev/uinput`. Add yourself to the `input` group:
 
 ```bash
@@ -74,6 +78,10 @@ EOF
 Any `TALKTYPE_*` variable can go in this file. Environment variables still work
 and are applied after the config file, so they override it.
 
+Set `TALKTYPE_TYPE_CMD` to control which typing tool is used (`auto`, `wtype`,
+`ydotool`, `xdotool`, or any custom command). Default is `auto`, which picks
+the best available tool: wtype (Wayland) → ydotool+daemon → xdotool (X11) → ydotool (bare, with warning).
+
 ## Setup
 
 Bind `talktype` to a keyboard shortcut:
@@ -182,7 +190,7 @@ contract — use whatever model, language, or runtime you want.
                                             ↓
                                      $TALKTYPE_CMD audio.wav
                                             ↓
-                                     ydotool type → text appears at cursor
+                                     type_text → text appears at cursor
 ```
 
 The `talktype` script is ~80 lines of bash. Transcription backends are

diff --git a/talktype b/talktype
@@ -9,7 +9,7 @@
 # Transcription is pluggable: set TALKTYPE_CMD to any command that
 # takes a WAV file path as its last argument and prints text to stdout.
 #
-# Requires: ydotool, pw-record (PipeWire)
+# Requires: wtype/ydotool/xdotool, ffmpeg/pw-record
 #
 set -euo pipefail
 
@@ -58,10 +58,49 @@ notify_close() {
     fi
 }
 
+# ── Typing tool selection ──
+warn_ydotool_no_daemon() {
+    local warnfile="$TALKTYPE_DIR/.ydotool-warned"
+    [ -f "$warnfile" ] && return
+    touch "$warnfile"
+    echo "Warning: ydotool without ydotoold leaks virtual input devices (see issue #7). Install wtype (Wayland) or run ydotoold." >&2
+    notify-send -t 5000 -i dialog-warning "TalkType" "ydotool without daemon — may leak input devices" 2>/dev/null || true
+}
+
+type_text() {
+    local text="$1"
+    local cmd="${TALKTYPE_TYPE_CMD:-auto}"
+
+    if [ "$cmd" = "auto" ]; then
+        if [ -n "${WAYLAND_DISPLAY:-}" ] && command -v wtype &>/dev/null; then
+            cmd=wtype
+        elif command -v ydotool &>/dev/null && pgrep -x ydotoold &>/dev/null; then
+            cmd=ydotool
+        elif [ -n "${DISPLAY:-}" ] && command -v xdotool &>/dev/null; then
+            cmd=xdotool
+        elif command -v ydotool &>/dev/null; then
+            warn_ydotool_no_daemon
+            cmd=ydotool
+        fi
+    fi
+
+    case "$cmd" in
+        wtype)    wtype -- "$text" ;;
+        ydotool)  ydotool type --key-delay 20 -- "$text" ;;
+        xdotool)  xdotool type -- "$text" ;;
+        *)        $cmd "$text" ;;
+    esac
+}
+
 # ── Check core dependencies ──
 check_deps() {
     local missing=()
-    command -v ydotool    &>/dev/null || missing+=(ydotool)
+    local type_cmd="${TALKTYPE_TYPE_CMD:-auto}"
+    if [ "$type_cmd" = "auto" ]; then
+        command -v wtype &>/dev/null || command -v ydotool &>/dev/null || command -v xdotool &>/dev/null || missing+=("wtype, ydotool, or xdotool")
+    else
+        command -v "$type_cmd" &>/dev/null || missing+=("$type_cmd")
+    fi
     command -v ffmpeg &>/dev/null || command -v pw-record &>/dev/null || missing+=("ffmpeg or pipewire")
     command -v notify-send &>/dev/null || missing+=(libnotify-bin)
 
@@ -96,8 +135,8 @@ if [ -f "$PIDFILE" ]; then
 
     notify_close
 
-    # Type text at cursor via ydotool
-    ydotool type --key-delay 20 -- "$TEXT"
+    # Type text at cursor
+    type_text "$TEXT"
 
 # ── Otherwise → start recording ──
 else

diff --git a/test/mocks/wtype b/test/mocks/wtype
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+# Mock wtype: log the command and args
+echo "$@" >> "$TALKTYPE_DIR/wtype.log"
diff --git a/test/mocks/xdotool b/test/mocks/xdotool
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+# Mock xdotool: log the command and args
+echo "$@" >> "$TALKTYPE_DIR/xdotool.log"
diff --git a/test/talktype.bats b/test/talktype.bats
@@ -8,6 +8,7 @@ setup() {
     export TALKTYPE_CONFIG="/dev/null"
     export TALKTYPE_DIR="$BATS_TEST_TMPDIR/talktype"
     export TALKTYPE_CMD="$BATS_TEST_DIRNAME/mock-transcribe"
+    export WAYLAND_DISPLAY=wayland-0
 
     # Put mocks on PATH before real commands
     export PATH="$BATS_TEST_DIRNAME/mocks:$PATH"
@@ -65,6 +66,7 @@ start_fake_recording() {
 
 @test "transcribed text is typed via ydotool" {
     start_fake_recording
+    export TALKTYPE_TYPE_CMD=ydotool
 
     run "$TALKTYPE"
     [ "$status" -eq 0 ]
@@ -76,6 +78,7 @@ start_fake_recording() {
 @test "custom TALKTYPE_CMD is used for transcription" {
     start_fake_recording
     export TALKTYPE_CMD="$BATS_TEST_DIRNAME/mock-transcribe-custom"
+    export TALKTYPE_TYPE_CMD=ydotool
 
     run "$TALKTYPE"
     [ "$status" -eq 0 ]
@@ -92,8 +95,9 @@ start_fake_recording() {
     run "$TALKTYPE"
     [ "$status" -eq 0 ]
 
-    # ydotool should NOT have been called
+    # No typing tool should have been called
     [ ! -f "$TALKTYPE_DIR/ydotool.log" ]
+    [ ! -f "$TALKTYPE_DIR/wtype.log" ]
 }
 
 # ── Error handling ──
@@ -102,6 +106,7 @@ start_fake_recording() {
     # Simulate a crashed recording: PID file points to a dead process
     echo "99999" > "$TALKTYPE_DIR/rec.pid"
     echo "audio data" > "$TALKTYPE_DIR/rec.wav"
+    export TALKTYPE_TYPE_CMD=ydotool
 
     run "$TALKTYPE"
     [ "$status" -eq 0 ]
@@ -120,8 +125,9 @@ start_fake_recording() {
     # Script should fail (set -e catches the non-zero exit)
     [ "$status" -ne 0 ]
 
-    # ydotool should NOT have been called
+    # No typing tool should have been called
     [ ! -f "$TALKTYPE_DIR/ydotool.log" ]
+    [ ! -f "$TALKTYPE_DIR/wtype.log" ]
 }
 
 # ── Recorder selection ──
@@ -161,25 +167,47 @@ start_fake_recording() {
     [[ "$(cat "$TALKTYPE_DIR/recorder.log")" == "pw-record" ]]
 }
 
+# ── Typing tool selection ──
+
+@test "wtype is preferred on Wayland when available" {
+    start_fake_recording
+
+    run "$TALKTYPE"
+    [ "$status" -eq 0 ]
+
+    [ -f "$TALKTYPE_DIR/wtype.log" ]
+    [ ! -f "$TALKTYPE_DIR/ydotool.log" ]
+    [[ "$(cat "$TALKTYPE_DIR/wtype.log")" == *"hello world"* ]]
+}
+
+@test "TALKTYPE_TYPE_CMD overrides auto-detection" {
+    start_fake_recording
+    export TALKTYPE_TYPE_CMD=xdotool
+
+    run "$TALKTYPE"
+    [ "$status" -eq 0 ]
+
+    [ -f "$TALKTYPE_DIR/xdotool.log" ]
+    [ ! -f "$TALKTYPE_DIR/wtype.log" ]
+    [[ "$(cat "$TALKTYPE_DIR/xdotool.log")" == *"hello world"* ]]
+}
+
 # ── Dependency checking ──
 
-@test "fails when a required tool is missing" {
-    # Create a minimal PATH with only the tools we want (no ydotool)
+@test "fails when no typing tool is available" {
+    # Create a minimal PATH with only recorder + notify (no typing tools)
     local sparse="$BATS_TEST_TMPDIR/sparse_path"
     mkdir -p "$sparse"
-    ln -sf "$(command -v pw-record)" "$sparse/pw-record"
-    ln -sf "$(command -v notify-send)" "$sparse/notify-send"
-    ln -sf "$(command -v bash)" "$sparse/bash"
-    ln -sf "$(command -v mkdir)" "$sparse/mkdir"
-    ln -sf "$(command -v cat)" "$sparse/cat"
-    ln -sf "$(command -v kill)" "$sparse/kill"
-    ln -sf "$(command -v sleep)" "$sparse/sleep"
-    ln -sf "$(command -v echo)" "$sparse/echo"
-    ln -sf "$(command -v rm)" "$sparse/rm"
+    ln -sf "$BATS_TEST_DIRNAME/mocks/pw-record" "$sparse/pw-record"
+    ln -sf "$BATS_TEST_DIRNAME/mocks/notify-send" "$sparse/notify-send"
+    for cmd in bash mkdir cat kill sleep echo rm pgrep; do
+        local path
+        path=$(command -v "$cmd" 2>/dev/null) && ln -sf "$path" "$sparse/$cmd"
+    done
 
     PATH="$sparse"
 
     run "$TALKTYPE"
     [ "$status" -eq 1 ]
-    [[ "$output" == *"Missing"*"ydotool"* ]]
+    [[ "$output" == *"Missing"* ]]
 }