Skip to content

Commit

Permalink
refactoring : move main + stream in examples + other stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Oct 25, 2022
1 parent 4c68f4c commit c6710ef
Show file tree
Hide file tree
Showing 18 changed files with 205 additions and 102 deletions.
20 changes: 12 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
sync.sh
main
stream
*.o
.cache
.cache/
.vs/
.vscode/
.DS_Store

build/
build-em/
build-debug/
build-release/
out/
.vs/
.vscode/
build-sanitize-addr/
build-sanitize-thread/

main
stream
bench
sync.sh
compile_commands.json
.DS_Store

examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
Expand Down
35 changes: 7 additions & 28 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" O

if (NOT MSVC)
if (WHISPER_SANITIZE_THREAD)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
endif()

Expand Down Expand Up @@ -133,7 +133,9 @@ else()
endif()
endif()

#
# whisper - this is the main library of the project
#

set(TARGET whisper)

Expand Down Expand Up @@ -167,40 +169,17 @@ install(TARGETS ${TARGET}
ARCHIVE DESTINATION lib/static
)

#
# bindings
#

add_subdirectory(bindings)

#
# programs, examples and tests
#

if (WHISPER_STANDALONE)
if (NOT EMSCRIPTEN)
# TODO: move to examples
# main
set(TARGET main)
add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})

# TODO: move to examples
if (WHISPER_SUPPORT_SDL2)
if (WHISPER_SUPPORT_SDL2)
# SDL2
find_package(SDL2 REQUIRED)

string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)

message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
endif()

# stream
set(TARGET stream)
add_executable(${TARGET} stream.cpp)
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
endif ()
endif()

if (WHISPER_BUILD_TESTS)
enable_testing()
add_subdirectory(tests)
Expand Down
22 changes: 11 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,10 @@ endif
# Compile flags
#

CFLAGS = -O3 -std=c11
CXXFLAGS = -O3 -std=c++11
CFLAGS = -I. -O3 -std=c11
CXXFLAGS = -I. -I./examples -O3 -std=c++11
LDFLAGS =

CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function
CXXFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-function

# OS specific
# TODO: support Windows
ifeq ($(UNAME_S),Linux)
Expand Down Expand Up @@ -76,8 +73,8 @@ endif
# Build library + main
#

main: main.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) main.cpp whisper.o ggml.o -o main $(LDFLAGS)
main: examples/main/main.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/main/main.cpp whisper.o ggml.o -o main $(LDFLAGS)
./main -h

ggml.o: ggml.c ggml.h
Expand All @@ -90,16 +87,19 @@ libwhisper.a: ggml.o whisper.o
ar rcs libwhisper.a ggml.o whisper.o

clean:
rm -f *.o main stream libwhisper.a
rm -f *.o main stream bench libwhisper.a

#
# Examples
#

CC_SDL=`sdl2-config --cflags --libs`

stream: stream.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
stream: examples/stream/stream.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)

bench: examples/bench/bench.cpp ggml.o whisper.o
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)

#
# Audio samples
Expand Down Expand Up @@ -139,7 +139,7 @@ samples:
.PHONY: large

tiny.en tiny base.en base small.en small medium.en medium large: main
bash ./download-ggml-model.sh $@
bash ./models/download-ggml-model.sh $@
@echo ""
@echo "==============================================="
@echo "Running $@ on all samples in ./samples ..."
Expand Down
36 changes: 23 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,32 @@ Supported platforms:
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/issues/7)
- [x] [Android](https://github.com/ggerganov/whisper.cpp/issues/30)

The entire implementation of the model is contained in 2 source files:

- [ggml.h](ggml.h) / [ggml.c](ggml.c)
- [whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)

Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device:

https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4

## Usage
## Quick start

To build the main program, run `make`. You can then transcribe a `.wav` file like this:
First, download one of the Whisper models converted in [ggml format](models). For example:

```bash
./main -f input.wav
bash ./models/download-ggml-model.sh base.en
```

Before running the program, make sure to download one of the ggml Whisper models. For example:
Now build the [main](examples/main) example and transcribe an audio file like this:

```bash
bash ./download-ggml-model.sh base.en
# build the main example
make

# transcribe an audio file
./main -f input.wav
```

---
Expand Down Expand Up @@ -73,7 +82,7 @@ options:
-m FNAME, --model FNAME model path (default: models/ggml-base.en.bin)
-f FNAME, --file FNAME input WAV file path

bash ./download-ggml-model.sh base.en
bash ./models/download-ggml-model.sh base.en
Downloading ggml model base.en ...
models/ggml-base.en.bin 100%[=============================================>] 141.11M 3.13MB/s in 79s
Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
Expand Down Expand Up @@ -232,7 +241,7 @@ whisper_print_timings: total time = 33686.27 ms
## Real-time audio input example
This is a naive example of performing real-time inference on audio from your microphone.
The `stream` tool samples the audio every half a second and runs the transcription continously.
The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continously.
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
```java
Expand All @@ -241,7 +250,7 @@ More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/i
https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
The `stream` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
The [stream](examples/stream) tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
```bash
# Install SDL2 on Linux
Expand All @@ -264,8 +273,9 @@ to highlight words with high or low confidence:
- The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
- The high-level C-style API is implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
- Simple usage is demonstrated in [main.cpp](main.cpp)
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](stream.cpp)
- Sample usage is demonstrated in [main.cpp](examples/main)
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
- Various other examples are available in the [examples](examples) folder
The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
Expand All @@ -279,11 +289,11 @@ the Accelerate framework utilizes the special-purpose AMX coprocessor available
This should be similar to the [GreedyDecoder](https://github.com/openai/whisper/blob/main/whisper/decoding.py#L249-L274)
from the original python implementation, so in order to make a fair comparison between the 2 implementations, make sure
to run the python code with the following parameters:
```
whisper --best_of None --beam_size None ...
```
In the future, `whisper.cpp` will support more sampling strategies.
## Memory usage
Expand All @@ -306,7 +316,7 @@ The original models are converted to a custom binary format. This allows to pack
- vocabulary
- weights
You can download the converted models using the [download-ggml-model.sh](download-ggml-model.sh) script or from here:
You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script or from here:
https://ggml.ggerganov.com
Expand Down
15 changes: 14 additions & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,24 @@ find_package(Threads REQUIRED)

# third-party

#add_subdirectory(third-party)
if (WHISPER_SUPPORT_SDL2)
# SDL2
find_package(SDL2 REQUIRED)

string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)

message(STATUS "SDL2_INCLUDE_DIRS = ${SDL2_INCLUDE_DIRS}")
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
endif()

# examples

include_directories(${CMAKE_CURRENT_SOURCE_DIR})

if (EMSCRIPTEN)
add_subdirectory(whisper.wasm)
else()
add_subdirectory(main)
add_subdirectory(stream)
add_subdirectory(bench)
endif()
3 changes: 3 additions & 0 deletions examples/bench/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set(TARGET bench)
add_executable(${TARGET} bench.cpp)
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
3 changes: 3 additions & 0 deletions examples/bench/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# bench

TODO
78 changes: 78 additions & 0 deletions examples/bench/bench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#include "whisper.h"

#include <cstdio>
#include <string>
#include <thread>

// command-line parameters
struct whisper_params {
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());

std::string model = "models/ggml-base.en.bin";
};

void whisper_print_usage(int argc, char ** argv, const whisper_params & params);

bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];

if (arg == "-t" || arg == "--threads") {
params.n_threads = std::stoi(argv[++i]);
} else if (arg == "-m" || arg == "--model") {
params.model = argv[++i];
} else if (arg == "-h" || arg == "--help") {
whisper_print_usage(argc, argv, params);
exit(0);
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
whisper_print_usage(argc, argv, params);
exit(0);
}
}

return true;
}

void whisper_print_usage(int argc, char ** argv, const whisper_params & params) {
fprintf(stderr, "\n");
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help show this help message and exit\n");
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
fprintf(stderr, " -m FNAME, --model FNAME model path (default: %s)\n", params.model.c_str());
fprintf(stderr, "\n");
}

int main(int argc, char ** argv) {
whisper_params params;

if (whisper_params_parse(argc, argv, params) == false) {
return 1;
}

// whisper init

struct whisper_context * ctx = whisper_init(params.model.c_str());

if (ctx == nullptr) {
fprintf(stderr, "error: failed to initialize whisper context\n");
return 2;
}

if (int ret = whisper_set_mel(ctx, nullptr, 0, WHISPER_N_MEL)) {
fprintf(stderr, "error: failed to set mel: %d\n", ret);
return 3;
}

if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
fprintf(stderr, "error: failed to encode model: %d\n", ret);
return 4;
}

whisper_print_timings(ctx);
whisper_free(ctx);

return 0;
}
File renamed without changes.
3 changes: 3 additions & 0 deletions examples/main/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set(TARGET main)
add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
Empty file added examples/main/README.md
Empty file.
Loading

0 comments on commit c6710ef

Please sign in to comment.