moving changes from laptop [no ci]

pwilkin · pwilkin · commit 724750396239 · 2025-11-11T18:57:51.000+01:00
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
@@ -532,7 +532,7 @@ extern "C" {
         GGML_OP_ARGSORT,
         GGML_OP_LEAKY_RELU,
         GGML_OP_TRI,
-        GGML_OP_CONST,
+        GGML_OP_FILL,
 
         GGML_OP_FLASH_ATTN_EXT,
         GGML_OP_FLASH_ATTN_BACK,
@@ -2227,40 +2227,10 @@ extern "C" {
             struct ggml_tensor *    a,
             enum ggml_tri_type      type);
 
-    // Create a tensor of dimensions ne0, ne1, ne2, ne3 filled with the constant c
-    GGML_API struct ggml_tensor * ggml_const(
+    // Fill tensor a with constant c
+    GGML_API struct ggml_tensor * ggml_fill(
             struct ggml_context *   ctx,
-            int64_t                 ne0,
-            int64_t                 ne1,
-            int64_t                 ne2,
-            int64_t                 ne3,
-            float                   c);
-
-    // Convenience calls
-    GGML_API struct ggml_tensor * ggml_const_1d(
-            struct ggml_context *   ctx,
-            int64_t                 ne0,
-            float                   c);
-
-    GGML_API struct ggml_tensor * ggml_const_2d(
-            struct ggml_context *   ctx,
-            int64_t                 ne0,
-            int64_t                 ne1,
-            float                   c);
-
-    GGML_API struct ggml_tensor * ggml_const_3d(
-            struct ggml_context *   ctx,
-            int64_t                 ne0,
-            int64_t                 ne1,
-            int64_t                 ne2,
-            float                   c);
-
-    GGML_API struct ggml_tensor * ggml_const_4d(
-            struct ggml_context *   ctx,
-            int64_t                 ne0,
-            int64_t                 ne1,
-            int64_t                 ne2,
-            int64_t                 ne3,
+            struct ggml_tensor *    a,
             float                   c);
 
     // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
@@ -2431,16 +2401,26 @@ extern "C" {
             struct ggml_tensor  * b,
             struct ggml_tensor  * state);
 
-    /* Solves a specific equation of the form Ax=B, where A is a lower triangular matrix
+    /* Solves a specific equation of the form Ax=B, where A is a triangular matrix
+    *  without zeroes on the diagonal (i.e. invertible).
     *  B can have any number of columns, but must have the same number of rows as A
     *  If A is [n, n] and B is [n, m], then the result will be [n, m] as well
     *  Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
-    *  where n > 100 sparingly, pre-chunk if necessary
+    *  where n > 100 sparingly, pre-chunk if necessary.
+    *
+    *  If left = false, solves xA=B instead
+    *  If lower = false, assumes upper triangular instead
+    *  If unitriangular = true, assumes diagonal of A to be all ones (will override actual values)
+    *
+    *  TODO: currently only lower, right, non-unitriangular variant is implemented
     */
     GGML_API struct ggml_tensor * ggml_solve_tri(
         struct ggml_context * ctx,
         struct ggml_tensor  * a,
-        struct ggml_tensor  * b);
+        struct ggml_tensor  * b,
+        bool                  left,
+        bool                  lower,
+        bool                  unitriangular);
 
     // custom operators
 
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -1951,9 +1951,9 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
             {
                 ggml_compute_forward_tri(params, tensor);
             } break;
-        case GGML_OP_CONST:
+        case GGML_OP_FILL:
             {
-                ggml_compute_forward_const(params, tensor);
+                ggml_compute_forward_fill(params, tensor);
             } break;
         case GGML_OP_FLASH_ATTN_EXT:
             {
@@ -2158,7 +2158,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
         case GGML_OP_ACC:
         case GGML_OP_CUMSUM:
         case GGML_OP_TRI:
-        case GGML_OP_CONST:
+        case GGML_OP_FILL:
             {
                 n_tasks = n_threads;
             } break;
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
@@ -2211,7 +2211,7 @@ static void ggml_compute_forward_const_f32(const ggml_compute_params * params, g
     }
 }
 
-void ggml_compute_forward_const(const ggml_compute_params * params, ggml_tensor * dst) {
+void ggml_compute_forward_fill(const ggml_compute_params * params, ggml_tensor * dst) {
     ggml_compute_forward_const_f32(params, dst);
 }
 
diff --git a/ggml/src/ggml-cpu/ops.h b/ggml/src/ggml-cpu/ops.h
@@ -87,7 +87,7 @@ void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params *
 void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_leaky_relu(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_tri(const struct ggml_compute_params * params, struct ggml_tensor * dst);
-void ggml_compute_forward_const(const struct ggml_compute_params * params, struct ggml_tensor * dst);
+void ggml_compute_forward_fill(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_flash_attn_ext(const struct ggml_compute_params * params, struct ggml_tensor * dst);
 void ggml_compute_forward_flash_attn_back(
         const struct ggml_compute_params * params,
diff --git a/ggml/src/ggml-cpu/vec.h b/ggml/src/ggml-cpu/vec.h
@@ -1422,8 +1422,6 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
 //   r            - current row index
 //   dst          - output array
 //   src          - input array
-//   keep_org_val - if true, keep original value where mask applies; otherwise use constant 'c'
-//   c            - constant value to use when not keeping original value
 //   bipred       - the predicate on coordinates, derived from tri_type
 inline static void ggml_vec_tri_f32(const int n, const int r, float * dst, const float * src, bool (*bipred)(int, int)) {
     for (int i = 0; i < n; ++i) {
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -5098,42 +5098,20 @@ struct ggml_tensor * ggml_tri(
 
 // ggml_const
 
-struct ggml_tensor * ggml_const(
+struct ggml_tensor * ggml_fill(
     struct ggml_context *   ctx,
-    const int64_t           ne0,
-    const int64_t           ne1,
-    const int64_t           ne2,
-    const int64_t           ne3,
+    struct ggml_tensor *    dst,
     const float             c) {
 
-    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, ne0, ne1, ne2, ne3);
+    struct ggml_tensor * result = ggml_view_tensor(ctx, dst);
 
     ggml_set_op_params_f32(result, 0, c);
 
-    result->op = GGML_OP_CONST;
+    result->op = GGML_OP_FILL;
 
     return result;
 }
 
-struct ggml_tensor * ggml_const_1d(struct ggml_context * ctx, const int64_t ne0, const float c) {
-    return ggml_const(ctx, ne0, 1, 1, 1, c);
-}
-
-struct ggml_tensor * ggml_const_2d(struct ggml_context * ctx, const int64_t ne0, const int64_t ne1,
-                                   const float c) {
-    return ggml_const(ctx, ne0, ne1, 1, 1, c);
-}
-
-struct ggml_tensor * ggml_const_3d(struct ggml_context * ctx, const int64_t ne0, const int64_t ne1,
-                                   const int64_t ne2, const float c) {
-    return ggml_const(ctx, ne0, ne1, ne2, 1, c);
-}
-
-struct ggml_tensor * ggml_const_4d(struct ggml_context * ctx, const int64_t ne0, const int64_t ne1,
-                                   const int64_t ne2, const int64_t ne3, const float c) {
-    return ggml_const(ctx, ne0, ne1, ne2, ne3, c);
-}
-
 // ggml_argsort
 
 struct ggml_tensor * ggml_argsort(
@@ -5993,7 +5971,10 @@ struct ggml_tensor * ggml_opt_step_sgd(
 struct ggml_tensor * ggml_solve_tri(
         struct ggml_context * ctx,
         struct ggml_tensor  * a,
-        struct ggml_tensor  * b) {
+        struct ggml_tensor  * b,
+        bool                  lower,
+        bool                  left,
+        bool                  unitriangular) {
 
     // A must be square and lower diagonal
     GGML_ASSERT(a->ne[0] == a->ne[1]);
@@ -6007,6 +5988,8 @@ struct ggml_tensor * ggml_solve_tri(
     GGML_ASSERT(ggml_is_contiguous(a));
     GGML_ASSERT(ggml_is_contiguous(b));
 
+    GGML_ASSERT(lower && left && !unitriangular); // TODO: support other variants
+
     struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
 
     result->op      = GGML_OP_SOLVE_TRI;
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
@@ -5971,20 +5971,24 @@ struct test_tri : public test_case {
     }
 };
 
-// GGML_OP_CONST
-struct test_const : public test_case {
+// GGML_OP_FILL
+struct test_fill : public test_case {
     const ggml_type              type;
     const std::array<int64_t, 4> ne;
     float                        c;
 
     std::string vars() override { return VARS_TO_STR3(type, ne, c); }
 
-    test_const(float c, ggml_type type = GGML_TYPE_F32,
+    test_fill(float c, ggml_type type = GGML_TYPE_F32,
             std::array<int64_t, 4> ne = { 10, 10, 4, 3 })
         : type(type), ne(ne), c(c) {}
 
     ggml_tensor * build_graph(ggml_context * ctx) override {
-        ggml_tensor * out = ggml_const(ctx, ne[0], ne[1], ne[2], ne[3], c);
+        ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_fill(ctx, a, c);
 
         ggml_set_name(out, "out");
 
@@ -5995,27 +5999,27 @@ struct test_const : public test_case {
 // GGML_OP_SOLVE_TRI
 struct test_solve_tri : public test_case {
     const ggml_type              type;
-    const std::array<int64_t, 4> ne;
-    const std::array<int64_t, 4> ne2;
+    const std::array<int64_t, 4> neLHS;
+    const std::array<int64_t, 4> neRHS;
 
-    std::string vars() override { return VARS_TO_STR3(type, ne, ne2); }
+    std::string vars() override { return VARS_TO_STR3(type, neLHS, neRHS); }
 
     test_solve_tri(ggml_type type = GGML_TYPE_F32,
-            std::array<int64_t, 4> ne = { 10, 10, 4, 3 },
-            std::array<int64_t, 4> ne2 = { 3, 10, 4, 3 }
+            std::array<int64_t, 4> neLHS = { 10, 10, 4, 3 },
+            std::array<int64_t, 4> neRHS = { 3, 10, 4, 3 }
         )
-        : type(type), ne(ne), ne2(ne2) {}
+        : type(type), neLHS(neLHS), neRHS(neRHS) {}
 
     ggml_tensor * build_graph(ggml_context * ctx) override {
-        ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
+        ggml_tensor * a = ggml_new_tensor_4d(ctx, type, neLHS[0], neLHS[1], neLHS[2], neLHS[3]);
         ggml_set_param(a);
         ggml_set_name(a, "a");
 
-        ggml_tensor * b = ggml_new_tensor_4d(ctx, type, ne2[0], ne2[1], ne2[2], ne2[3]);
+        ggml_tensor * b = ggml_new_tensor_4d(ctx, type, neRHS[0], neRHS[1], neRHS[2], neRHS[3]);
         ggml_set_param(b);
         ggml_set_name(b, "b");
 
-        ggml_tensor * out = ggml_solve_tri(ctx, a, b);
+        ggml_tensor * out = ggml_solve_tri(ctx, a, b, true, true, false);
         ggml_set_name(out, "out");
 
         return out;
@@ -6024,7 +6028,7 @@ struct test_solve_tri : public test_case {
     void initialize_tensors(ggml_context * ctx) override {
         for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
             if (strcmp(t->name, "a") == 0) {
-                init_tensor_causal(t, 0.1, 1.0f);
+                init_tensor_tril(t, 0.1, 1.0f);
             } else {
                 init_tensor_uniform(t, 0.1, 1.0f);
             }
@@ -7528,9 +7532,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
     test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER));
     test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER_DIAG));
 
-    test_cases.emplace_back(new test_const(0.0f));
-    test_cases.emplace_back(new test_const(2.0f, GGML_TYPE_F32, { 303, 207, 11, 3 }));
-    test_cases.emplace_back(new test_const(-152.0f, GGML_TYPE_F32, { 800, 600, 4, 4 }));
+    test_cases.emplace_back(new test_fill(0.0f));
+    test_cases.emplace_back(new test_fill(2.0f, GGML_TYPE_F32, { 303, 207, 11, 3 }));
+    test_cases.emplace_back(new test_fill(-152.0f, GGML_TYPE_F32, { 800, 600, 4, 4 }));
 
     test_cases.emplace_back(new test_solve_tri());
     test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 11, 11, 1, 1 }, { 5, 11, 1, 1 }));

Original file line number	Diff line number	Diff line change
`@@ -1951,9 +1951,9 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm`
`1951`	`1951`	`{`
`1952`	`1952`	`ggml_compute_forward_tri(params, tensor);`
`1953`	`1953`	`} break;`
`1954`		`- case GGML_OP_CONST:`
	`1954`	`+ case GGML_OP_FILL:`
`1955`	`1955`	`{`
`1956`		`- ggml_compute_forward_const(params, tensor);`
	`1956`	`+ ggml_compute_forward_fill(params, tensor);`
`1957`	`1957`	`} break;`
`1958`	`1958`	`case GGML_OP_FLASH_ATTN_EXT:`
`1959`	`1959`	`{`
`@@ -2158,7 +2158,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {`
`2158`	`2158`	`case GGML_OP_ACC:`
`2159`	`2159`	`case GGML_OP_CUMSUM:`
`2160`	`2160`	`case GGML_OP_TRI:`
`2161`		`- case GGML_OP_CONST:`
	`2161`	`+ case GGML_OP_FILL:`
`2162`	`2162`	`{`
`2163`	`2163`	`n_tasks = n_threads;`
`2164`	`2164`	`} break;`
Original file line number	Diff line number	Diff line change
`@@ -2211,7 +2211,7 @@ static void ggml_compute_forward_const_f32(const ggml_compute_params * params, g`
`2211`	`2211`	`}`
`2212`	`2212`	`}`
`2213`	`2213`
`2214`		`-void ggml_compute_forward_const(const ggml_compute_params * params, ggml_tensor * dst) {`
	`2214`	`+void ggml_compute_forward_fill(const ggml_compute_params * params, ggml_tensor * dst) {`
`2215`	`2215`	`ggml_compute_forward_const_f32(params, dst);`
`2216`	`2216`	`}`
`2217`	`2217`