Skip to content

Commit 29dcae2

Browse files
committed
pre-rebase
1 parent c05a724 commit 29dcae2

File tree

5 files changed

+38
-32
lines changed

5 files changed

+38
-32
lines changed

docs/ops.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Legend:
1616
|-----------|------|------|------|------|------|------|------|------|------|
1717
| ABS |||| 🟡 | 🟡 || 🟡 |||
1818
| ACC ||||||||||
19-
| ADD ||| || 🟡 | 🟡 ||||
19+
| ADD ||| || 🟡 | 🟡 ||||
2020
| ADD1 ||||||||||
2121
| ADD_ID ||||||||||
2222
| ARANGE ||||||||||
@@ -25,7 +25,6 @@ Legend:
2525
| CEIL |||| 🟡 ||||||
2626
| CLAMP ||||| 🟡 | 🟡 || 🟡 ||
2727
| CONCAT |||| 🟡 || 🟡 ||||
28-
| CONST ||||||||||
2928
| CONT || 🟡 |||| 🟡 | 🟡 | 🟡 ||
3029
| CONV_2D ||||||||||
3130
| CONV_2D_DW ||||||||||
@@ -39,11 +38,12 @@ Legend:
3938
| CROSS_ENTROPY_LOSS_BACK ||||||||||
4039
| CUMSUM ||||||||||
4140
| DIAG_MASK_INF ||||| 🟡 | 🟡 ||||
42-
| DIV ||| || 🟡 | 🟡 ||||
41+
| DIV ||| || 🟡 | 🟡 ||||
4342
| DUP |||| 🟡 | 🟡 | 🟡 || 🟡 ||
4443
| ELU |||| 🟡 | 🟡 || 🟡 |||
4544
| EXP |||| 🟡 | 🟡 || 🟡 |||
4645
| EXPM1 |||| 🟡 ||||||
46+
| FILL ||||||||||
4747
| FLASH_ATTN_EXT || 🟡 || 🟡 | 🟡 ||| 🟡 ||
4848
| FLOOR |||| 🟡 ||||||
4949
| GATED_LINEAR_ATTN ||||||||||
@@ -65,7 +65,7 @@ Legend:
6565
| LEAKY_RELU ||||||||||
6666
| LOG ||||||||||
6767
| MEAN ||||||||||
68-
| MUL ||| || 🟡 | 🟡 ||||
68+
| MUL ||| || 🟡 | 🟡 ||||
6969
| MUL_MAT | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 |
7070
| MUL_MAT_ID || 🟡 |||| 🟡 | 🟡 |||
7171
| NEG |||| 🟡 | 🟡 || 🟡 |||
@@ -102,13 +102,13 @@ Legend:
102102
| SOFTPLUS |||| 🟡 ||||||
103103
| SOFT_MAX || 🟡 ||||||||
104104
| SOFT_MAX_BACK ||| 🟡 | 🟡 ||| 🟡 |||
105-
| SOLVE_TRI ||| |||||||
105+
| SOLVE_TRI ||| |||||||
106106
| SQR ||||| 🟡 ||| 🟡 ||
107107
| SQRT ||||| 🟡 |||||
108108
| SSM_CONV ||||||||||
109109
| SSM_SCAN ||||||||||
110110
| STEP |||| 🟡 | 🟡 || 🟡 |||
111-
| SUB ||| || 🟡 | 🟡 ||||
111+
| SUB ||| || 🟡 | 🟡 ||||
112112
| SUM |||| 🟡 ||||||
113113
| SUM_ROWS |||| 🟡 ||| 🟡 |||
114114
| SWIGLU ||||| 🟡 ||| 🟡 ||
@@ -117,7 +117,6 @@ Legend:
117117
| TIMESTEP_EMBEDDING ||||||||||
118118
| TOPK_MOE ||||||||||
119119
| TRI ||||||||||
120-
| TRI_SOLVE ||||||||||
121120
| TRUNC |||| 🟡 ||||||
122121
| UPSCALE || 🟡 ||| 🟡 || 🟡 |||
123122
| XIELU ||||||||||

docs/ops/CPU.csv

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5427,6 +5427,7 @@
54275427
"CPU","CPY","type_src=f16,type_dst=f16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
54285428
"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
54295429
"CPU","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","CPU"
5430+
"CPU","CPY","type_src=f32,type_dst=f32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","CPU"
54305431
"CPU","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","CPU"
54315432
"CPU","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","CPU"
54325433
"CPU","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","CPU"
@@ -8418,6 +8419,8 @@
84188419
"CPU","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","CPU"
84198420
"CPU","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","CPU"
84208421
"CPU","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","CPU"
8422+
"CPU","EXPM1","type=f16,ne=[10,5,4,3]","support","1","yes","CPU"
8423+
"CPU","SOFTPLUS","type=f16,ne=[10,5,4,3]","support","1","yes","CPU"
84218424
"CPU","SQR","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
84228425
"CPU","SQRT","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
84238426
"CPU","LOG","type=f16,ne=[7,1,5,3]","support","1","yes","CPU"
@@ -8440,6 +8443,8 @@
84408443
"CPU","CEIL","type=f32,ne=[10,2,2,2]","support","1","yes","CPU"
84418444
"CPU","ROUND","type=f32,ne=[10,2,2,2]","support","1","yes","CPU"
84428445
"CPU","TRUNC","type=f32,ne=[10,2,2,2]","support","1","yes","CPU"
8446+
"CPU","EXPM1","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
8447+
"CPU","SOFTPLUS","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
84438448
"CPU","SQR","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
84448449
"CPU","SQRT","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
84458450
"CPU","LOG","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
@@ -8451,10 +8456,6 @@
84518456
"CPU","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
84528457
"CPU","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
84538458
"CPU","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","CPU"
8454-
"CPU","ADD","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
8455-
"CPU","SUB","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
8456-
"CPU","DIV","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
8457-
"CPU","MUL","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
84588459
"CPU","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","CPU"
84598460
"CPU","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","CPU"
84608461
"CPU","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","CPU"
@@ -9252,15 +9253,20 @@
92529253
"CPU","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","CPU"
92539254
"CPU","CUMSUM","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
92549255
"CPU","XIELU","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
9255-
"CPU","TRI","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
9256-
"CPU","TRI","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
9257-
"CPU","TRI","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
9258-
"CPU","TRI","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
9259-
"CPU","TRI","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
9260-
"CPU","TRI","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
9261-
"CPU","TRI","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
9262-
"CPU","TRI","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
9263-
"CPU","TRI_SOLVE","type=f32,ne=[10,5,4,3]","support","1","yes","CPU"
9256+
"CPU","TRI","type=f32,ne=[10,10,4,3],tri_type=3","support","1","yes","CPU"
9257+
"CPU","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","1","yes","CPU"
9258+
"CPU","TRI","type=f32,ne=[10,10,4,3],tri_type=1","support","1","yes","CPU"
9259+
"CPU","TRI","type=f32,ne=[10,10,4,3],tri_type=0","support","1","yes","CPU"
9260+
"CPU","FILL","type=f32,ne=[10,10,4,3],c=0.000000","support","1","yes","CPU"
9261+
"CPU","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","CPU"
9262+
"CPU","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","CPU"
9263+
"CPU","SOLVE_TRI","type=f32,neLHS=[10,10,4,3],neRHS=[3,10,4,3]","support","1","yes","CPU"
9264+
"CPU","SOLVE_TRI","type=f32,neLHS=[11,11,1,1],neRHS=[5,11,1,1]","support","1","yes","CPU"
9265+
"CPU","SOLVE_TRI","type=f32,neLHS=[17,17,2,4],neRHS=[9,17,2,4]","support","1","yes","CPU"
9266+
"CPU","SOLVE_TRI","type=f32,neLHS=[30,30,7,1],neRHS=[8,30,7,1]","support","1","yes","CPU"
9267+
"CPU","SOLVE_TRI","type=f32,neLHS=[42,42,5,2],neRHS=[10,42,5,2]","support","1","yes","CPU"
9268+
"CPU","SOLVE_TRI","type=f32,neLHS=[64,64,2,2],neRHS=[10,64,2,2]","support","1","yes","CPU"
9269+
"CPU","SOLVE_TRI","type=f32,neLHS=[100,100,4,4],neRHS=[41,100,4,4]","support","1","yes","CPU"
92649270
"CPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","CPU"
92659271
"CPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","CPU"
92669272
"CPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","1","yes","CPU"

docs/ops/CUDA.csv

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9257,16 +9257,16 @@
92579257
"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","0","no","CUDA"
92589258
"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=1","support","0","no","CUDA"
92599259
"CUDA0","TRI","type=f32,ne=[10,10,4,3],tri_type=0","support","0","no","CUDA"
9260-
"CUDA0","CONST","type=f32,ne=[10,10,4,3],c=0.000000","support","0","no","CUDA"
9261-
"CUDA0","CONST","type=f32,ne=[303,207,11,3],c=2.000000","support","0","no","CUDA"
9262-
"CUDA0","CONST","type=f32,ne=[800,600,4,4],c=-152.000000","support","0","no","CUDA"
9263-
"CUDA0","SOLVE_TRI","type=f32,ne=[10,10,4,3],ne2=[3,10,4,3]","support","0","no","CUDA"
9264-
"CUDA0","SOLVE_TRI","type=f32,ne=[11,11,1,1],ne2=[5,11,1,1]","support","0","no","CUDA"
9265-
"CUDA0","SOLVE_TRI","type=f32,ne=[17,17,2,4],ne2=[9,17,2,4]","support","0","no","CUDA"
9266-
"CUDA0","SOLVE_TRI","type=f32,ne=[30,30,7,1],ne2=[8,30,7,1]","support","0","no","CUDA"
9267-
"CUDA0","SOLVE_TRI","type=f32,ne=[42,42,5,2],ne2=[10,42,5,2]","support","0","no","CUDA"
9268-
"CUDA0","SOLVE_TRI","type=f32,ne=[64,64,2,2],ne2=[10,64,2,2]","support","0","no","CUDA"
9269-
"CUDA0","SOLVE_TRI","type=f32,ne=[100,100,4,4],ne2=[41,100,4,4]","support","0","no","CUDA"
9260+
"CUDA0","FILL","type=f32,ne=[10,10,4,3],c=0.000000","support","0","no","CUDA"
9261+
"CUDA0","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","0","no","CUDA"
9262+
"CUDA0","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","0","no","CUDA"
9263+
"CUDA0","SOLVE_TRI","type=f32,neLHS=[10,10,4,3],neRHS=[3,10,4,3]","support","0","no","CUDA"
9264+
"CUDA0","SOLVE_TRI","type=f32,neLHS=[11,11,1,1],neRHS=[5,11,1,1]","support","0","no","CUDA"
9265+
"CUDA0","SOLVE_TRI","type=f32,neLHS=[17,17,2,4],neRHS=[9,17,2,4]","support","0","no","CUDA"
9266+
"CUDA0","SOLVE_TRI","type=f32,neLHS=[30,30,7,1],neRHS=[8,30,7,1]","support","0","no","CUDA"
9267+
"CUDA0","SOLVE_TRI","type=f32,neLHS=[42,42,5,2],neRHS=[10,42,5,2]","support","0","no","CUDA"
9268+
"CUDA0","SOLVE_TRI","type=f32,neLHS=[64,64,2,2],neRHS=[10,64,2,2]","support","0","no","CUDA"
9269+
"CUDA0","SOLVE_TRI","type=f32,neLHS=[100,100,4,4],neRHS=[41,100,4,4]","support","0","no","CUDA"
92709270
"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","CUDA"
92719271
"CUDA0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","CUDA"
92729272
"CUDA0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","0","no","CUDA"

ggml/include/ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2228,6 +2228,7 @@ extern "C" {
22282228
enum ggml_tri_type type);
22292229

22302230
// Fill tensor a with constant c
2231+
// In-place, returns view(a)
22312232
GGML_API struct ggml_tensor * ggml_fill(
22322233
struct ggml_context * ctx,
22332234
struct ggml_tensor * a,

ggml/src/ggml.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -992,7 +992,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
992992
"ARGSORT",
993993
"LEAKY_RELU",
994994
"TRI",
995-
"CONST",
995+
"FILL",
996996

997997
"FLASH_ATTN_EXT",
998998
"FLASH_ATTN_BACK",
@@ -1100,7 +1100,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
11001100
"argsort(x)",
11011101
"leaky_relu(x)",
11021102
"tri(x)",
1103-
"const(c)",
1103+
"fill(x, c)",
11041104

11051105
"flash_attn_ext(x)",
11061106
"flash_attn_back(x)",

0 commit comments

Comments
 (0)