Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 56 additions & 8 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1251,56 +1251,104 @@ jobs:
# TODO: simplify the following workflows using a matrix
# TODO: run lighter CI on PRs and the full CI only on master (if needed)
ggml-ci-x64-cpu-low-perf:
runs-on: [self-hosted, Linux, X64, CPU, low-perf]
runs-on: ubuntu-22.04

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: ggml-org/[email protected]
with:
key: ggml-ci-x64-cpu-low-perf
evict-old-files: 1d

- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential libcurl4-openssl-dev

- name: Test
id: ggml-ci
run: |
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

ggml-ci-arm64-cpu-low-perf:
runs-on: [self-hosted, Linux, ARM64, CPU, low-perf]
runs-on: ubuntu-22.04-arm

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: ggml-org/[email protected]
with:
key: ggml-ci-arm64-cpu-low-perf
evict-old-files: 1d

- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential libcurl4-openssl-dev

- name: Test
id: ggml-ci
run: |
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

ggml-ci-x64-cpu-high-perf:
runs-on: [self-hosted, Linux, X64, CPU, high-perf]
runs-on: ubuntu-22.04

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: ggml-org/[email protected]
with:
key: ggml-ci-x64-cpu-high-perf
evict-old-files: 1d

- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential libcurl4-openssl-dev

- name: Test
id: ggml-ci
run: |
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
LLAMA_ARG_THREADS=$(nproc) bash ./ci/run.sh ./tmp/results ./tmp/mnt

ggml-ci-arm64-cpu-high-perf:
runs-on: [self-hosted, Linux, ARM64, CPU, high-perf]
runs-on: ubuntu-22.04-arm

steps:
- name: Clone
id: checkout
uses: actions/checkout@v4

- name: ccache
uses: ggml-org/[email protected]
with:
key: ggml-ci-arm64-cpu-high-perf
evict-old-files: 1d

- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential libcurl4-openssl-dev

- name: Test
id: ggml-ci
run: |
GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt

ggml-ci-x64-nvidia-cuda:
runs-on: [self-hosted, Linux, X64, NVIDIA]
Expand Down
26 changes: 15 additions & 11 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ if [ ! -z ${GG_BUILD_MUSA} ]; then
MUSA_ARCH=${MUSA_ARCH:-21}
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
fi

if [ ! -z ${GG_BUILD_NO_SVE} ]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm"
fi
## helpers

# download a file if it does not exist or if it is outdated
Expand Down Expand Up @@ -345,16 +349,16 @@ function gg_run_qwen3_0_6b {

wiki_test="${path_wiki}/wiki.test.raw"

./bin/llama-quantize ${model_bf16} ${model_q8_0} q8_0
./bin/llama-quantize ${model_bf16} ${model_q4_0} q4_0
./bin/llama-quantize ${model_bf16} ${model_q4_1} q4_1
./bin/llama-quantize ${model_bf16} ${model_q5_0} q5_0
./bin/llama-quantize ${model_bf16} ${model_q5_1} q5_1
./bin/llama-quantize ${model_bf16} ${model_q2_k} q2_k
./bin/llama-quantize ${model_bf16} ${model_q3_k} q3_k
./bin/llama-quantize ${model_bf16} ${model_q4_k} q4_k
./bin/llama-quantize ${model_bf16} ${model_q5_k} q5_k
./bin/llama-quantize ${model_bf16} ${model_q6_k} q6_k
./bin/llama-quantize ${model_bf16} ${model_q8_0} q8_0 $(nproc)
./bin/llama-quantize ${model_bf16} ${model_q4_0} q4_0 $(nproc)
./bin/llama-quantize ${model_bf16} ${model_q4_1} q4_1 $(nproc)
./bin/llama-quantize ${model_bf16} ${model_q5_0} q5_0 $(nproc)
./bin/llama-quantize ${model_bf16} ${model_q5_1} q5_1 $(nproc)
./bin/llama-quantize ${model_bf16} ${model_q2_k} q2_k $(nproc)
./bin/llama-quantize ${model_bf16} ${model_q3_k} q3_k $(nproc)
./bin/llama-quantize ${model_bf16} ${model_q4_k} q4_k $(nproc)
./bin/llama-quantize ${model_bf16} ${model_q5_k} q5_k $(nproc)
./bin/llama-quantize ${model_bf16} ${model_q6_k} q6_k $(nproc)

(time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
(time ./bin/llama-cli -no-cnv --model ${model_bf16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-bf16.log
Expand Down Expand Up @@ -427,7 +431,7 @@ function gg_run_qwen3_0_6b {
function gg_sum_qwen3_0_6b {
gg_printf '### %s\n\n' "${ci}"

gg_printf 'Pythia 2.8B:\n'
gg_printf 'Qwen3 0.6B:\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
Expand Down
11 changes: 1 addition & 10 deletions tests/test-quantize-perf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,14 +260,7 @@ int main(int argc, char * argv[]) {

int64_t iterations = params.iterations;


// Initialize GGML, ensures float conversion tables are initialized
struct ggml_init_params ggml_params = {
/* .mem_size = */ 1*1024,
/* .mem_buffer = */ NULL,
/* .no_alloc = */ true,
};
struct ggml_context * ctx = ggml_init(ggml_params);
ggml_cpu_init();

for (int i = 0; i < GGML_TYPE_COUNT; i++) {
ggml_type type = (ggml_type) i;
Expand Down Expand Up @@ -359,7 +352,5 @@ int main(int argc, char * argv[]) {
}
}

ggml_free(ctx);

return 0;
}
Loading