Skip to content

Commit

Permalink
examples : add "command" tool (ggerganov#171)
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Nov 25, 2022
1 parent b8ce25d commit bc88eb1
Show file tree
Hide file tree
Showing 9 changed files with 735 additions and 51 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ build-sanitize-thread/

main
stream
command
bench
sync.sh
compile_commands.json
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ libwhisper.so: ggml.o whisper.o
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)

clean:
rm -f *.o main stream bench libwhisper.a libwhisper.so
rm -f *.o main stream command bench libwhisper.a libwhisper.so

#
# Examples
Expand All @@ -149,6 +149,9 @@ main: examples/main/main.cpp ggml.o whisper.o
stream: examples/stream/stream.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)

command: examples/command/command.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/command/command.cpp ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)

bench: examples/bench/bench.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)

Expand Down
51 changes: 26 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,26 +98,27 @@ c++ -I. -I./examples -O3 -std=c++11 -pthread examples/main/main.cpp whisper.o gg
usage: ./main [options] file0.wav file1.wav ...

options:
-h, --help show this help message and exit
-t N, --threads N number of threads to use during computation (default: 4)
-p N, --processors N number of processors to use during computation (default: 1)
-ot N, --offset-t N time offset in milliseconds (default: 0)
-on N, --offset-n N segment index offset (default: 0)
-mc N, --max-context N maximum number of text context tokens to store (default: max)
-ml N, --max-len N maximum segment length in characters (default: 0)
-wt N, --word-thold N word timestamp probability threshold (default: 0.010000)
-v, --verbose verbose output
--translate translate from source language to english
-otxt, --output-txt output result in a text file
-ovtt, --output-vtt output result in a vtt file
-osrt, --output-srt output result in a srt file
-owts, --output-words output script for generating karaoke video
-ps, --print_special print special tokens
-pc, --print_colors print colors
-nt, --no_timestamps do not print timestamps
-l LANG, --language LANG spoken language (default: en)
-m FNAME, --model FNAME model path (default: models/ggml-base.en.bin)
-f FNAME, --file FNAME input WAV file path
-h, --help [default] show this help message and exit
-t N, --threads N [4 ] number of threads to use during computation
-p N, --processors N [1 ] number of processors to use during computation
-ot N, --offset-t N [0 ] time offset in milliseconds
-on N, --offset-n N [0 ] segment index offset
-d N, --duration N [0 ] duration of audio to process in milliseconds
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
-ml N, --max-len N [0 ] maximum segment length in characters
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
-su, --speed-up [false ] speed up audio by x2 (reduced accuracy)
-tr, --translate [false ] translate from source language to english
-otxt, --output-txt [false ] output result in a text file
-ovtt, --output-vtt [false ] output result in a vtt file
-osrt, --output-srt [false ] output result in a srt file
-owts, --output-words [false ] output script for generating karaoke video
-ps, --print-special [false ] print special tokens
-pc, --print-colors [false ] print colors
-nt, --no-timestamps [true ] do not print timestamps
-l LANG, --language LANG [en ] spoken language
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
-f FNAME, --file FNAME [ ] input WAV file path

bash ./models/download-ggml-model.sh base.en
Downloading ggml model base.en ...
Expand Down Expand Up @@ -149,13 +150,13 @@ whisper_model_load: n_text_layer = 6
whisper_model_load: n_mels = 80
whisper_model_load: f16 = 1
whisper_model_load: type = 2
whisper_model_load: mem_required = 670.00 MB
whisper_model_load: adding 1607 extra tokens
whisper_model_load: ggml ctx size = 140.60 MB
whisper_model_load: memory size = 22.83 MB
whisper_model_load: model size = 140.54 MB
whisper_model_load: mem_required = 506.00 MB
whisper_model_load: ggml ctx size = 140.60 MB
whisper_model_load: memory size = 22.83 MB
whisper_model_load: model size = 140.54 MB

system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |

main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...

Expand Down
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@ if (EMSCRIPTEN)
else()
add_subdirectory(main)
add_subdirectory(stream)
add_subdirectory(command)
add_subdirectory(bench)
endif()
7 changes: 7 additions & 0 deletions examples/command/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
if (WHISPER_SUPPORT_SDL2)
# command
set(TARGET command)
add_executable(${TARGET} command.cpp)
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
endif ()
26 changes: 26 additions & 0 deletions examples/command/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# command

This is a basic Voice Assistant example that accepts voice commands from the microphone.
More info is available in [issue #171](https://github.com/ggerganov/whisper.cpp/issues/171).

```java
# Run with default arguments and small model
./command -m ./models/ggml-small.en.bin -t 8

# On Raspberry Pi, use tiny or base models + "-ac 768" for better performance
./bin/command -m ../models/ggml-tiny.en.bin -ac 768
```

## Building

The `command` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:

```bash
# Install SDL2 on Linux
sudo apt-get install libsdl2-dev

# Install SDL2 on Mac OS
brew install sdl2

make command
```
Loading

0 comments on commit bc88eb1

Please sign in to comment.