Skip to content

Commit b4458d4

Browse files
threading: support for GGML_SCHED_PRIO_LOW, update thread info on Windows to avoid throttling
We talked about adding LOW priority for GGML threads in the original threadpool PR. It might be useful for some cases to avoid contention. Latest Windows ARM64 releases started parking (offlining) the CPU cores more aggresively which results in suboptimal performance with n_threads > 4. To deal with that we now disable Power Throttling for our threads for the NORMAL and higher priorities.
1 parent 2f74c35 commit b4458d4

File tree

5 files changed

+27
-3
lines changed

5 files changed

+27
-3
lines changed

common/arg.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1219,9 +1219,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
12191219
));
12201220
add_opt(common_arg(
12211221
{"--prio"}, "N",
1222-
string_format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority),
1222+
string_format("set process/thread priority : low(-1), normal(0), medium(1), high(2), realtime(3) (default: %d)\n", params.cpuparams.priority),
12231223
[](common_params & params, int prio) {
1224-
if (prio < 0 || prio > 3) {
1224+
if (prio < GGML_SCHED_PRIO_LOW || prio > GGML_SCHED_PRIO_REALTIME) {
12251225
throw std::invalid_argument("invalid value");
12261226
}
12271227
params.cpuparams.priority = (enum ggml_sched_priority) prio;

common/common.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
203203

204204
DWORD p = NORMAL_PRIORITY_CLASS;
205205
switch (prio) {
206+
case GGML_SCHED_PRIO_LOW: p = BELOW_NORMAL_PRIORITY_CLASS; break;
206207
case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
207208
case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
208209
case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
@@ -228,6 +229,7 @@ bool set_process_priority(enum ggml_sched_priority prio) {
228229

229230
int p = 0;
230231
switch (prio) {
232+
case GGML_SCHED_PRIO_LOW: p = 5; break;
231233
case GGML_SCHED_PRIO_NORMAL: p = 0; break;
232234
case GGML_SCHED_PRIO_MEDIUM: p = -5; break;
233235
case GGML_SCHED_PRIO_HIGH: p = -10; break;

examples/llama-bench/llama-bench.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ static void print_usage(int /* argc */, char ** argv) {
266266
join(cmd_params_defaults.embeddings, ",").c_str());
267267
printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
268268
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
269-
printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
269+
printf(" --prio <-1,0|1|2|3> (default: %d)\n", cmd_params_defaults.prio);
270270
printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay);
271271
printf(" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n",
272272
output_format_str(cmd_params_defaults.output_format));

ggml/include/ggml.h

+1
Original file line numberDiff line numberDiff line change
@@ -2138,6 +2138,7 @@ extern "C" {
21382138

21392139
// scheduling priorities
21402140
enum ggml_sched_priority {
2141+
GGML_SCHED_PRIO_LOW = -1,
21412142
GGML_SCHED_PRIO_NORMAL,
21422143
GGML_SCHED_PRIO_MEDIUM,
21432144
GGML_SCHED_PRIO_HIGH,

ggml/src/ggml-cpu/ggml-cpu.c

+21
Original file line numberDiff line numberDiff line change
@@ -2417,12 +2417,30 @@ static bool ggml_thread_apply_priority(int32_t prio) {
24172417
// This is up to the applications.
24182418
DWORD p = THREAD_PRIORITY_NORMAL;
24192419
switch (prio) {
2420+
case GGML_SCHED_PRIO_LOW: p = THREAD_PRIORITY_BELOW_NORMAL; break;
24202421
case GGML_SCHED_PRIO_NORMAL: p = THREAD_PRIORITY_NORMAL; break;
24212422
case GGML_SCHED_PRIO_MEDIUM: p = THREAD_PRIORITY_ABOVE_NORMAL; break;
24222423
case GGML_SCHED_PRIO_HIGH: p = THREAD_PRIORITY_HIGHEST; break;
24232424
case GGML_SCHED_PRIO_REALTIME: p = THREAD_PRIORITY_TIME_CRITICAL; break;
24242425
}
24252426

2427+
if (prio != GGML_SCHED_PRIO_LOW) {
2428+
// Tell Windows that this thread should not be throttled (needs its own CPU core).
2429+
// Newer Windows 11 ARM64 versions aggresively park (offline) CPU cores and often place
2430+
// all our threads onto the first 4 cores which results in terrible performance with
2431+
// n_threads > 4
2432+
THREAD_POWER_THROTTLING_STATE p;
2433+
ZeroMemory(&p, sizeof(p));
2434+
p.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION;
2435+
p.ControlMask = THREAD_POWER_THROTTLING_EXECUTION_SPEED;
2436+
p.StateMask = 0;
2437+
2438+
if (!SetThreadInformation(GetCurrentThread(), ThreadPowerThrottling, &p, sizeof(p))) {
2439+
fprintf(stderr, "warn: failed to disable thread power throttling %d : (%d)\n", prio, (int) GetLastError());
2440+
return false;
2441+
}
2442+
}
2443+
24262444
if (prio == GGML_SCHED_PRIO_NORMAL) {
24272445
// Keep inherited policy/priority
24282446
return true;
@@ -2450,6 +2468,8 @@ static bool ggml_thread_apply_priority(int32_t prio) {
24502468
struct sched_param p;
24512469
int32_t policy = SCHED_OTHER;
24522470
switch (prio) {
2471+
// TODO: there seems to be no way to set lower prio on Apple platforms
2472+
case GGML_SCHED_PRIO_LOW: policy = SCHED_OTHER; p.sched_priority = 0; break;
24532473
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
24542474
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
24552475
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;
@@ -2506,6 +2526,7 @@ static bool ggml_thread_apply_priority(int32_t prio) {
25062526
struct sched_param p;
25072527
int32_t policy = SCHED_OTHER;
25082528
switch (prio) {
2529+
case GGML_SCHED_PRIO_LOW: policy = SCHED_BATCH; p.sched_priority = 0; break;
25092530
case GGML_SCHED_PRIO_NORMAL: policy = SCHED_OTHER; p.sched_priority = 0; break;
25102531
case GGML_SCHED_PRIO_MEDIUM: policy = SCHED_FIFO; p.sched_priority = 40; break;
25112532
case GGML_SCHED_PRIO_HIGH: policy = SCHED_FIFO; p.sched_priority = 80; break;

0 commit comments

Comments
 (0)