Skip to content

Commit

Permalink
Adding sanitizer tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Oct 8, 2022
1 parent 29b041f commit 2f06933
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 10 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
- name: Build
run: |
make
ctest --output-on-failure
ctest -L gh --output-on-failure
ubuntu-latest-clang:
runs-on: ubuntu-latest
Expand All @@ -87,7 +87,7 @@ jobs:
- name: Build
run: |
make
ctest --output-on-failure
ctest -L gh --output-on-failure
ubuntu-latest-gcc-sanitized:
runs-on: ubuntu-latest
Expand All @@ -112,4 +112,4 @@ jobs:
- name: Build
run: |
make
ctest --output-on-failure
ctest -L gh --output-on-failure
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ stream
*.o
.cache
build/
compile_commands.json
62 changes: 62 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
set(TEST_TARGET test-main-tiny)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-tiny.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;gh")

set(TEST_TARGET test-main-tiny.en)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-tiny.en.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;en;gh")

set(TEST_TARGET test-main-base)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-base.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "base")

set(TEST_TARGET test-main-base.en)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-base.en.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "base;en")

set(TEST_TARGET test-main-small)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-small.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "small")

set(TEST_TARGET test-main-small.en)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-small.en.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "small;en")

set(TEST_TARGET test-main-medium)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-medium.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "medium")

set(TEST_TARGET test-main-medium.en)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-medium.en.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "medium;en")

set(TEST_TARGET test-main-large)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-large.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "large")
11 changes: 9 additions & 2 deletions whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {

// load weights
{
int n_loaded = 0;
size_t total_size = 0;

while (true) {
Expand Down Expand Up @@ -1004,9 +1005,17 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {

//printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
total_size += ggml_nbytes(tensor);
n_loaded++;
}

printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0);

if (n_loaded == 0) {
printf("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
} else if (n_loaded != model.tensors.size()) {
fprintf(stderr, "%s: ERROR not all tensors loaded from model file - expected %zu, got %d\n", __func__, model.tensors.size(), n_loaded);
return false;
}
}

fin.close();
Expand Down Expand Up @@ -1772,8 +1781,6 @@ bool whisper_decode(
}

// the most basic sampling scheme - select the top token
// TODO: beam search
// TODO: temperature
whisper_vocab::id whisper_sample_best(
const whisper_vocab & vocab,
const float * probs, bool need_timestamp) {
Expand Down
11 changes: 6 additions & 5 deletions whisper.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,12 @@ extern "C" {
// return the id of the specified language, returns -1 if not found
WHISPER_API int whisper_lang_id(const char * lang);

WHISPER_API int whisper_n_len (struct whisper_context * ctx); // mel length
WHISPER_API int whisper_n_vocab (struct whisper_context * ctx);
WHISPER_API int whisper_n_text_ctx (struct whisper_context * ctx);
WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);
WHISPER_API float * whisper_get_probs (struct whisper_context * ctx);
WHISPER_API int whisper_n_len (struct whisper_context * ctx); // mel length
WHISPER_API int whisper_n_vocab (struct whisper_context * ctx);
WHISPER_API int whisper_n_text_ctx (struct whisper_context * ctx);
WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);

WHISPER_API float * whisper_get_probs(struct whisper_context * ctx);

WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);

Expand Down

0 comments on commit 2f06933

Please sign in to comment.