Skip to content

Commit eb062bb

Browse files
authored
Windows fixes (#31)
* Apply fixes suggested to build on windows Issue: ggml-org#22 * Remove unsupported VLAs * MSVC: Remove features that are only available on MSVC C++20. * Fix zero initialization of the other fields. * Change the use of vector for stack allocations.
1 parent 7027a97 commit eb062bb

File tree

4 files changed

+30
-19
lines changed

4 files changed

+30
-19
lines changed

ggml.c

+10-10
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
407407
const int nb = k / QK;
408408
const size_t bs = sizeof(float) + QK/2;
409409

410-
uint8_t * restrict pd = (uint8_t *) (y + 0*bs);
411-
uint8_t * restrict pb = (uint8_t *) (y + 0*bs + sizeof(float));
410+
uint8_t * restrict pd = ((uint8_t *)y + 0*bs);
411+
uint8_t * restrict pb = ((uint8_t *)y + 0*bs + sizeof(float));
412412

413413
uint8_t pp[QK/2];
414414

@@ -654,8 +654,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
654654
const int nb = k / QK;
655655
const size_t bs = sizeof(float) + QK/2;
656656

657-
const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
658-
const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
657+
const uint8_t * restrict pd = ((const uint8_t *)x + 0*bs);
658+
const uint8_t * restrict pb = ((const uint8_t *)x + 0*bs + sizeof(float));
659659

660660
// scalar
661661
for (int i = 0; i < nb; i++) {
@@ -1301,11 +1301,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
13011301

13021302
const size_t bs = sizeof(float) + QK/2;
13031303

1304-
const uint8_t * restrict pd0 = (const uint8_t *) (x + 0*bs);
1305-
const uint8_t * restrict pd1 = (const uint8_t *) (y + 0*bs);
1304+
const uint8_t * restrict pd0 = ((const uint8_t *)x + 0*bs);
1305+
const uint8_t * restrict pd1 = ((const uint8_t *)y + 0*bs);
13061306

1307-
const uint8_t * restrict pb0 = (const uint8_t *) (x + 0*bs + sizeof(float));
1308-
const uint8_t * restrict pb1 = (const uint8_t *) (y + 0*bs + sizeof(float));
1307+
const uint8_t * restrict pb0 = ((const uint8_t *)x + 0*bs + sizeof(float));
1308+
const uint8_t * restrict pb1 = ((const uint8_t *)y + 0*bs + sizeof(float));
13091309

13101310
float sumf = 0.0;
13111311

@@ -1731,8 +1731,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res
17311731
const int nb = n / QK;
17321732
const size_t bs = sizeof(float) + QK/2;
17331733

1734-
const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
1735-
const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
1734+
const uint8_t * restrict pd = ((const uint8_t *)x + 0*bs);
1735+
const uint8_t * restrict pb = ((const uint8_t *)x + 0*bs + sizeof(float));
17361736

17371737
#if __ARM_NEON
17381738
#if QK == 32

main.cpp

+7-5
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,8 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
209209
// create the ggml context
210210
{
211211
struct ggml_init_params params = {
212-
.mem_size = ctx_size,
213-
.mem_buffer = NULL,
212+
/*.mem_size =*/ ctx_size,
213+
/*.mem_buffer =*/ NULL,
214214
};
215215

216216
model.ctx = ggml_init(params);
@@ -546,12 +546,13 @@ bool llama_eval(
546546
}
547547

548548
struct ggml_init_params params = {
549-
.mem_size = buf_size,
550-
.mem_buffer = buf,
549+
/*.mem_size =*/ buf_size,
550+
/*.mem_buffer =*/ buf,
551551
};
552552

553553
struct ggml_context * ctx0 = ggml_init(params);
554-
struct ggml_cgraph gf = { .n_threads = n_threads };
554+
ggml_cgraph gf = {};
555+
gf.n_threads = n_threads;
555556

556557
struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
557558
memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));
@@ -733,6 +734,7 @@ bool llama_eval(
733734
}
734735

735736
int main(int argc, char ** argv) {
737+
ggml_time_init();
736738
const int64_t t_main_start_us = ggml_time_us();
737739

738740
gpt_params params;

quantize.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
289289
// ./llama-quantize models/llama/ggml-model.bin models/llama/ggml-model-quant.bin type
290290
//
291291
int main(int argc, char ** argv) {
292+
ggml_time_init();
292293
if (argc != 4) {
293294
fprintf(stderr, "usage: %s model-f32.bin model-quant.bin type\n", argv[0]);
294295
fprintf(stderr, " type = 2 - q4_0\n");

utils.cpp

+12-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@
55
#include <fstream>
66
#include <regex>
77

8+
#if defined(_MSC_VER) || defined(__MINGW32__)
9+
#include <malloc.h> // using malloc.h with MSC/MINGW
10+
#elif !defined(__FreeBSD__)
11+
#include <alloca.h>
12+
#endif
13+
814
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
915
for (int i = 1; i < argc; i++) {
1016
std::string arg = argv[i];
@@ -472,7 +478,8 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
472478

473479
assert(k % qk == 0);
474480

475-
uint8_t pp[qk/2];
481+
const size_t pp_size = qk / 2;
482+
uint8_t *pp = static_cast<uint8_t*>(alloca(pp_size));
476483

477484
char * pdst = (char *) dst;
478485

@@ -511,7 +518,7 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
511518
pp[l/2] = vi0 | (vi1 << 4);
512519
}
513520

514-
memcpy(pb, pp, sizeof(pp));
521+
memcpy(pb, pp, pp_size);
515522
pb += bs;
516523
}
517524
}
@@ -526,7 +533,8 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
526533

527534
assert(k % qk == 0);
528535

529-
uint8_t pp[qk/2];
536+
const size_t pp_size = qk / 2;
537+
uint8_t *pp = static_cast<uint8_t*>(alloca(pp_size));
530538

531539
char * pdst = (char *) dst;
532540

@@ -570,7 +578,7 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
570578
pp[l/2] = vi0 | (vi1 << 4);
571579
}
572580

573-
memcpy(pb + i*qk/2, pp, sizeof(pp));
581+
memcpy(pb + i*qk/2, pp, pp_size);
574582
}
575583
}
576584
}

0 commit comments

Comments
 (0)