Skip to content

Commit 4557df5

Browse files
authored
Add a persistent Parakeet helper for low-latency host integrations (#18861)
Factor the Parakeet transcription core into a shared ParakeetTranscriber class and add a persistent parakeet_helper binary (stdin/stdout protocol) for long-lived host integrations. Built in the existing Parakeet CMake presets; README documents the helper workflow. Merged main and resolved the main.cpp conflict against the TransducerRunner refactor by delegating to ParakeetTranscriber (Token is the shared asr::Token alias). Parakeet CI (test-mlx-parakeet, test-parakeet-xnnpack-linux) passing.
1 parent 30108d7 commit 4557df5

9 files changed

Lines changed: 1320 additions & 249 deletions

examples/models/parakeet/CMakeLists.txt

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -109,32 +109,49 @@ if(EXECUTORCH_BUILD_VULKAN)
109109
executorch_target_link_options_shared_lib(vulkan_backend)
110110
endif()
111111

112-
add_executable(parakeet_runner main.cpp timestamp_utils.cpp tokenizer_utils.cpp)
113-
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
114-
target_link_options_gc_sections(parakeet_runner)
115-
if(NOT APPLE AND NOT MSVC)
116-
target_link_options(parakeet_runner PRIVATE "LINKER:-s")
117-
endif()
118-
endif()
112+
set(parakeet_shared_sources parakeet_transcriber.cpp timestamp_utils.cpp
113+
tokenizer_utils.cpp
114+
)
119115

120-
# Copy MLX metallib for runtime if MLX delegate is enabled
121-
if(TARGET mlxdelegate)
122-
executorch_target_copy_mlx_metallib(parakeet_runner)
123-
endif()
116+
set(parakeet_common_include_directories
117+
${_common_include_directories} ${EXECUTORCH_ROOT}/third-party/json/include
118+
)
124119

125-
target_include_directories(
126-
parakeet_runner PUBLIC ${_common_include_directories}
120+
add_executable(parakeet_runner main.cpp ${parakeet_shared_sources})
121+
add_executable(
122+
parakeet_helper parakeet_helper.cpp parakeet_helper_protocol.cpp
123+
${parakeet_shared_sources}
127124
)
128-
target_link_libraries(parakeet_runner PUBLIC ${link_libraries})
129-
target_compile_options(parakeet_runner PUBLIC ${_common_compile_options})
125+
126+
foreach(parakeet_target parakeet_runner parakeet_helper)
127+
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
128+
target_link_options_gc_sections(${parakeet_target})
129+
if(NOT APPLE AND NOT MSVC)
130+
target_link_options(${parakeet_target} PRIVATE "LINKER:-s")
131+
endif()
132+
endif()
133+
134+
if(TARGET mlxdelegate)
135+
executorch_target_copy_mlx_metallib(${parakeet_target})
136+
endif()
137+
138+
target_include_directories(
139+
${parakeet_target} PUBLIC ${parakeet_common_include_directories}
140+
)
141+
target_link_libraries(${parakeet_target} PUBLIC ${link_libraries})
142+
target_compile_options(${parakeet_target} PUBLIC ${_common_compile_options})
143+
endforeach()
130144

131145
# On Windows, copy required DLLs to the executable directory
132146
if(MSVC AND EXECUTORCH_BUILD_CUDA)
133-
add_custom_command(
134-
TARGET parakeet_runner
135-
POST_BUILD
136-
COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:aoti_cuda_shims>
137-
$<TARGET_FILE_DIR:parakeet_runner>
138-
COMMENT "Copying aoti_cuda_shims.dll to parakeet_runner directory"
139-
)
147+
foreach(parakeet_target parakeet_runner parakeet_helper)
148+
add_custom_command(
149+
TARGET ${parakeet_target}
150+
POST_BUILD
151+
COMMAND
152+
${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:aoti_cuda_shims>
153+
$<TARGET_FILE_DIR:${parakeet_target}>
154+
COMMENT "Copying aoti_cuda_shims.dll to ${parakeet_target} directory"
155+
)
156+
endforeach()
140157
endif()

examples/models/parakeet/CMakePresets.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,42 +89,42 @@
8989
"displayName": "Build Parakeet runner (CPU)",
9090
"configurePreset": "parakeet-cpu",
9191
"configuration": "Release",
92-
"targets": ["parakeet_runner"]
92+
"targets": ["parakeet_runner", "parakeet_helper"]
9393
},
9494
{
9595
"name": "parakeet-cuda",
9696
"displayName": "Build Parakeet runner (CUDA)",
9797
"configurePreset": "parakeet-cuda",
9898
"configuration": "Release",
99-
"targets": ["parakeet_runner"]
99+
"targets": ["parakeet_runner", "parakeet_helper"]
100100
},
101101
{
102102
"name": "parakeet-cuda-debug",
103103
"displayName": "Build Parakeet runner (CUDA, Debug)",
104104
"configurePreset": "parakeet-cuda-debug",
105105
"configuration": "Debug",
106-
"targets": ["parakeet_runner"]
106+
"targets": ["parakeet_runner", "parakeet_helper"]
107107
},
108108
{
109109
"name": "parakeet-metal",
110110
"displayName": "Build Parakeet runner (Metal)",
111111
"configurePreset": "parakeet-metal",
112112
"configuration": "Release",
113-
"targets": ["parakeet_runner"]
113+
"targets": ["parakeet_runner", "parakeet_helper"]
114114
},
115115
{
116116
"name": "parakeet-mlx",
117117
"displayName": "Build Parakeet runner (MLX)",
118118
"configurePreset": "parakeet-mlx",
119119
"configuration": "Release",
120-
"targets": ["parakeet_runner"]
120+
"targets": ["parakeet_runner", "parakeet_helper"]
121121
},
122122
{
123123
"name": "parakeet-vulkan",
124124
"displayName": "Build Parakeet runner (Vulkan)",
125125
"configurePreset": "parakeet-vulkan",
126126
"configuration": "Release",
127-
"targets": ["parakeet_runner"]
127+
"targets": ["parakeet_runner", "parakeet_helper"]
128128
}
129129
],
130130
"workflowPresets": [

examples/models/parakeet/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,11 @@ make parakeet-cuda
242242
make parakeet-mlx
243243
```
244244

245+
Each Parakeet build now produces both:
246+
247+
- `parakeet_runner` for one-shot CLI transcription from an audio file
248+
- `parakeet_helper` for long-lived host integrations that keep the model warm and stream PCM requests over stdin/stdout
249+
245250
On Windows (PowerShell), use CMake workflow presets directly:
246251

247252
```powershell
@@ -310,6 +315,26 @@ If your generator is single-config, the runner may be at `.\cmake-out\examples\m
310315
| `--data_path` | Path to data file (.ptd) for delegate data (required for CUDA/CUDA-Windows) |
311316
| `--timestamps` | Timestamp output mode: `none\|token\|word\|segment\|all` (default: `segment`) |
312317

318+
### Persistent Helper
319+
320+
The helper binary uses the same Parakeet transcription stack as `parakeet_runner`,
321+
but keeps the model loaded across multiple requests so host apps can avoid repeated
322+
startup and model load overhead.
323+
324+
Example:
325+
326+
```bash
327+
# Metal
328+
DYLD_LIBRARY_PATH=/usr/lib ./cmake-out/examples/models/parakeet/parakeet_helper \
329+
--model_path examples/models/parakeet/parakeet_metal/model.pte \
330+
--tokenizer_path examples/models/parakeet/parakeet_metal/tokenizer.model
331+
```
332+
333+
The helper accepts framed requests over stdin, validates 16 kHz mono float32 PCM
334+
payloads, and returns status/result messages over stdout. It is intended for app
335+
integrations such as the macOS `ExecuWhisper` frontend in the separate
336+
`executorch-examples` repository.
337+
313338
### Mobile App
314339

315340
Check out a [demo Android app](https://github.com/meta-pytorch/executorch-examples/tree/main/parakeet/android/ParakeetApp) for Parakeet in the separate `executorch-examples` repository.

0 commit comments

Comments
 (0)