Skip to content

Commit fc105f2

Browse files
authored
Merge pull request #9458 from SuperAuguste/popcount-ctz-clz
Vector support for `@popCount`, `@ctz`, and `@clz`
2 parents a9a4fd3 + dd79615 commit fc105f2

File tree

7 files changed

+258
-49
lines changed

7 files changed

+258
-49
lines changed

doc/langref.html.in

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7366,18 +7366,20 @@ pub const CallOptions = struct {
73667366
{#header_close#}
73677367

73687368
{#header_open|@clz#}
7369-
<pre>{#syntax#}@clz(comptime T: type, integer: T){#endsyntax#}</pre>
7369+
<pre>{#syntax#}@clz(comptime T: type, operand: T){#endsyntax#}</pre>
7370+
<p>{#syntax#}T{#endsyntax#} must be an integer type.</p>
7371+
<p>{#syntax#}operand{#endsyntax#} may be an {#link|integer|Integers#} or {#link|vector|Vectors#}.</p>
73707372
<p>
7371-
This function counts the number of most-significant (leading in a big-Endian sense) zeroes in {#syntax#}integer{#endsyntax#}.
7373+
This function counts the number of most-significant (leading in a big-Endian sense) zeroes in an integer.
73727374
</p>
73737375
<p>
7374-
If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},
7376+
If {#syntax#}operand{#endsyntax#} is a {#link|comptime#}-known integer,
73757377
the return type is {#syntax#}comptime_int{#endsyntax#}.
7376-
Otherwise, the return type is an unsigned integer with the minimum number
7378+
Otherwise, the return type is an unsigned integer or vector of unsigned integers with the minimum number
73777379
of bits that can represent the bit count of the integer type.
73787380
</p>
73797381
<p>
7380-
If {#syntax#}integer{#endsyntax#} is zero, {#syntax#}@clz{#endsyntax#} returns the bit width
7382+
If {#syntax#}operand{#endsyntax#} is zero, {#syntax#}@clz{#endsyntax#} returns the bit width
73817383
of integer type {#syntax#}T{#endsyntax#}.
73827384
</p>
73837385
{#see_also|@ctz|@popCount#}
@@ -7509,18 +7511,20 @@ test "main" {
75097511
{#header_close#}
75107512

75117513
{#header_open|@ctz#}
7512-
<pre>{#syntax#}@ctz(comptime T: type, integer: T){#endsyntax#}</pre>
7514+
<pre>{#syntax#}@ctz(comptime T: type, operand: T){#endsyntax#}</pre>
7515+
<p>{#syntax#}T{#endsyntax#} must be an integer type.</p>
7516+
<p>{#syntax#}operand{#endsyntax#} may be an {#link|integer|Integers#} or {#link|vector|Vectors#}.</p>
75137517
<p>
7514-
This function counts the number of least-significant (trailing in a big-Endian sense) zeroes in {#syntax#}integer{#endsyntax#}.
7518+
This function counts the number of least-significant (trailing in a big-Endian sense) zeroes in an integer.
75157519
</p>
75167520
<p>
7517-
If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},
7521+
If {#syntax#}operand{#endsyntax#} is a {#link|comptime#}-known integer,
75187522
the return type is {#syntax#}comptime_int{#endsyntax#}.
7519-
Otherwise, the return type is an unsigned integer with the minimum number
7523+
Otherwise, the return type is an unsigned integer or vector of unsigned integers with the minimum number
75207524
of bits that can represent the bit count of the integer type.
75217525
</p>
75227526
<p>
7523-
If {#syntax#}integer{#endsyntax#} is zero, {#syntax#}@ctz{#endsyntax#} returns
7527+
If {#syntax#}operand{#endsyntax#} is zero, {#syntax#}@ctz{#endsyntax#} returns
75247528
the bit width of integer type {#syntax#}T{#endsyntax#}.
75257529
</p>
75267530
{#see_also|@clz|@popCount#}
@@ -8105,12 +8109,14 @@ test "@wasmMemoryGrow" {
81058109
{#header_close#}
81068110

81078111
{#header_open|@popCount#}
8108-
<pre>{#syntax#}@popCount(comptime T: type, integer: T){#endsyntax#}</pre>
8112+
<pre>{#syntax#}@popCount(comptime T: type, operand: T){#endsyntax#}</pre>
8113+
<p>{#syntax#}T{#endsyntax#} must be an integer type.</p>
8114+
<p>{#syntax#}operand{#endsyntax#} may be an {#link|integer|Integers#} or {#link|vector|Vectors#}.</p>
81098115
<p>Counts the number of bits set in an integer.</p>
81108116
<p>
8111-
If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},
8117+
If {#syntax#}operand{#endsyntax#} is a {#link|comptime#}-known integer,
81128118
the return type is {#syntax#}comptime_int{#endsyntax#}.
8113-
Otherwise, the return type is an unsigned integer with the minimum number
8119+
Otherwise, the return type is an unsigned integer or vector of unsigned integers with the minimum number
81148120
of bits that can represent the bit count of the integer type.
81158121
</p>
81168122
{#see_also|@ctz|@clz#}

src/stage1/all_types.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,12 +1907,15 @@ struct ZigLLVMFnKey {
19071907
union {
19081908
struct {
19091909
uint32_t bit_count;
1910+
uint32_t vector_len; // 0 means not a vector
19101911
} ctz;
19111912
struct {
19121913
uint32_t bit_count;
1914+
uint32_t vector_len; // 0 means not a vector
19131915
} clz;
19141916
struct {
19151917
uint32_t bit_count;
1918+
uint32_t vector_len; // 0 means not a vector
19161919
} pop_count;
19171920
struct {
19181921
BuiltinFnId op;

src/stage1/analyze.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7883,11 +7883,14 @@ bool type_id_eql(TypeId const *a, TypeId const *b) {
78837883
uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey const *x) {
78847884
switch (x->id) {
78857885
case ZigLLVMFnIdCtz:
7886-
return (uint32_t)(x->data.ctz.bit_count) * (uint32_t)810453934;
7886+
return (uint32_t)(x->data.ctz.bit_count) * (uint32_t)810453934 +
7887+
(uint32_t)(x->data.ctz.vector_len) * (((uint32_t)x->id << 5) + 1025);
78877888
case ZigLLVMFnIdClz:
7888-
return (uint32_t)(x->data.clz.bit_count) * (uint32_t)2428952817;
7889+
return (uint32_t)(x->data.clz.bit_count) * (uint32_t)2428952817 +
7890+
(uint32_t)(x->data.clz.vector_len) * (((uint32_t)x->id << 5) + 1025);
78897891
case ZigLLVMFnIdPopCount:
7890-
return (uint32_t)(x->data.clz.bit_count) * (uint32_t)101195049;
7892+
return (uint32_t)(x->data.pop_count.bit_count) * (uint32_t)101195049 +
7893+
(uint32_t)(x->data.pop_count.vector_len) * (((uint32_t)x->id << 5) + 1025);
78917894
case ZigLLVMFnIdFloatOp:
78927895
return (uint32_t)(x->data.floating.bit_count) * ((uint32_t)x->id + 1025) +
78937896
(uint32_t)(x->data.floating.vector_len) * (((uint32_t)x->id << 5) + 1025) +

src/stage1/codegen.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5070,6 +5070,7 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *expr_type, BuiltinFn
50705070
n_args = 1;
50715071
key.id = ZigLLVMFnIdPopCount;
50725072
key.data.pop_count.bit_count = (uint32_t)int_type->data.integral.bit_count;
5073+
key.data.pop_count.vector_len = vector_len;
50735074
} else if (fn_id == BuiltinFnIdBswap) {
50745075
fn_name = "bswap";
50755076
n_args = 1;

src/stage1/ir.cpp

Lines changed: 168 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15945,85 +15945,239 @@ static Stage1AirInst *ir_analyze_instruction_optional_unwrap_ptr(IrAnalyze *ira,
1594515945
}
1594615946

1594715947
static Stage1AirInst *ir_analyze_instruction_ctz(IrAnalyze *ira, Stage1ZirInstCtz *instruction) {
15948+
Error err;
15949+
1594815950
ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
1594915951
if (type_is_invalid(int_type))
1595015952
return ira->codegen->invalid_inst_gen;
1595115953

15952-
Stage1AirInst *op = ir_implicit_cast(ira, instruction->op->child, int_type);
15954+
Stage1AirInst *uncasted_op = instruction->op->child;
15955+
if (type_is_invalid(uncasted_op->value->type))
15956+
return ira->codegen->invalid_inst_gen;
15957+
15958+
uint32_t vector_len = UINT32_MAX; // means not a vector
15959+
if (uncasted_op->value->type->id == ZigTypeIdArray) {
15960+
bool can_be_vec_elem;
15961+
if ((err = is_valid_vector_elem_type(ira->codegen, uncasted_op->value->type->data.array.child_type,
15962+
&can_be_vec_elem)))
15963+
{
15964+
return ira->codegen->invalid_inst_gen;
15965+
}
15966+
if (can_be_vec_elem) {
15967+
vector_len = uncasted_op->value->type->data.array.len;
15968+
}
15969+
} else if (uncasted_op->value->type->id == ZigTypeIdVector) {
15970+
vector_len = uncasted_op->value->type->data.vector.len;
15971+
}
15972+
15973+
bool is_vector = (vector_len != UINT32_MAX);
15974+
ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
15975+
15976+
Stage1AirInst *op = ir_implicit_cast(ira, uncasted_op, op_type);
1595315977
if (type_is_invalid(op->value->type))
1595415978
return ira->codegen->invalid_inst_gen;
1595515979

1595615980
if (int_type->data.integral.bit_count == 0)
1595715981
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, 0);
1595815982

15983+
ZigType *smallest_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
15984+
1595915985
if (instr_is_comptime(op)) {
1596015986
ZigValue *val = ir_resolve_const(ira, op, UndefOk);
1596115987
if (val == nullptr)
1596215988
return ira->codegen->invalid_inst_gen;
1596315989
if (val->special == ConstValSpecialUndef)
1596415990
return ir_const_undef(ira, instruction->base.scope, instruction->base.source_node, ira->codegen->builtin_types.entry_num_lit_int);
15965-
size_t result_usize = bigint_ctz(&op->value->data.x_bigint, int_type->data.integral.bit_count);
15966-
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result_usize);
15991+
15992+
if (is_vector) {
15993+
ZigType *smallest_vec_type = get_vector_type(ira->codegen, vector_len, smallest_type);
15994+
Stage1AirInst *result = ir_const(ira, instruction->base.scope, instruction->base.source_node, smallest_vec_type);
15995+
expand_undef_array(ira->codegen, val);
15996+
result->value->data.x_array.data.s_none.elements = ira->codegen->pass1_arena->allocate<ZigValue>(smallest_vec_type->data.vector.len);
15997+
for (unsigned i = 0; i < smallest_vec_type->data.vector.len; i += 1) {
15998+
ZigValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
15999+
if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
16000+
op_elem_val, UndefOk)))
16001+
{
16002+
return ira->codegen->invalid_inst_gen;
16003+
}
16004+
ZigValue *result_elem_val = &result->value->data.x_array.data.s_none.elements[i];
16005+
result_elem_val->type = smallest_type;
16006+
result_elem_val->special = op_elem_val->special;
16007+
if (op_elem_val->special == ConstValSpecialUndef)
16008+
continue;
16009+
size_t value = bigint_ctz(&op_elem_val->data.x_bigint, int_type->data.integral.bit_count);
16010+
bigint_init_unsigned(&result->value->data.x_array.data.s_none.elements[i].data.x_bigint, value);
16011+
}
16012+
return result;
16013+
} else {
16014+
size_t result_usize = bigint_ctz(&op->value->data.x_bigint, int_type->data.integral.bit_count);
16015+
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result_usize);
16016+
}
1596716017
}
1596816018

15969-
ZigType *return_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
16019+
ZigType *return_type = is_vector ? get_vector_type(ira->codegen, vector_len, smallest_type) : smallest_type;
1597016020
return ir_build_ctz_gen(ira, instruction->base.scope, instruction->base.source_node, return_type, op);
1597116021
}
1597216022

1597316023
static Stage1AirInst *ir_analyze_instruction_clz(IrAnalyze *ira, Stage1ZirInstClz *instruction) {
16024+
Error err;
16025+
1597416026
ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
1597516027
if (type_is_invalid(int_type))
1597616028
return ira->codegen->invalid_inst_gen;
1597716029

15978-
Stage1AirInst *op = ir_implicit_cast(ira, instruction->op->child, int_type);
16030+
Stage1AirInst *uncasted_op = instruction->op->child;
16031+
if (type_is_invalid(uncasted_op->value->type))
16032+
return ira->codegen->invalid_inst_gen;
16033+
16034+
uint32_t vector_len = UINT32_MAX; // means not a vector
16035+
if (uncasted_op->value->type->id == ZigTypeIdArray) {
16036+
bool can_be_vec_elem;
16037+
if ((err = is_valid_vector_elem_type(ira->codegen, uncasted_op->value->type->data.array.child_type,
16038+
&can_be_vec_elem)))
16039+
{
16040+
return ira->codegen->invalid_inst_gen;
16041+
}
16042+
if (can_be_vec_elem) {
16043+
vector_len = uncasted_op->value->type->data.array.len;
16044+
}
16045+
} else if (uncasted_op->value->type->id == ZigTypeIdVector) {
16046+
vector_len = uncasted_op->value->type->data.vector.len;
16047+
}
16048+
16049+
bool is_vector = (vector_len != UINT32_MAX);
16050+
ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
16051+
16052+
Stage1AirInst *op = ir_implicit_cast(ira, uncasted_op, op_type);
1597916053
if (type_is_invalid(op->value->type))
1598016054
return ira->codegen->invalid_inst_gen;
1598116055

1598216056
if (int_type->data.integral.bit_count == 0)
1598316057
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, 0);
1598416058

16059+
ZigType *smallest_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
16060+
1598516061
if (instr_is_comptime(op)) {
1598616062
ZigValue *val = ir_resolve_const(ira, op, UndefOk);
1598716063
if (val == nullptr)
1598816064
return ira->codegen->invalid_inst_gen;
1598916065
if (val->special == ConstValSpecialUndef)
1599016066
return ir_const_undef(ira, instruction->base.scope, instruction->base.source_node, ira->codegen->builtin_types.entry_num_lit_int);
15991-
size_t result_usize = bigint_clz(&op->value->data.x_bigint, int_type->data.integral.bit_count);
15992-
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result_usize);
16067+
16068+
if (is_vector) {
16069+
ZigType *smallest_vec_type = get_vector_type(ira->codegen, vector_len, smallest_type);
16070+
Stage1AirInst *result = ir_const(ira, instruction->base.scope, instruction->base.source_node, smallest_vec_type);
16071+
expand_undef_array(ira->codegen, val);
16072+
result->value->data.x_array.data.s_none.elements = ira->codegen->pass1_arena->allocate<ZigValue>(smallest_vec_type->data.vector.len);
16073+
for (unsigned i = 0; i < smallest_vec_type->data.vector.len; i += 1) {
16074+
ZigValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
16075+
if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
16076+
op_elem_val, UndefOk)))
16077+
{
16078+
return ira->codegen->invalid_inst_gen;
16079+
}
16080+
ZigValue *result_elem_val = &result->value->data.x_array.data.s_none.elements[i];
16081+
result_elem_val->type = smallest_type;
16082+
result_elem_val->special = op_elem_val->special;
16083+
if (op_elem_val->special == ConstValSpecialUndef)
16084+
continue;
16085+
size_t value = bigint_clz(&op_elem_val->data.x_bigint, int_type->data.integral.bit_count);
16086+
bigint_init_unsigned(&result->value->data.x_array.data.s_none.elements[i].data.x_bigint, value);
16087+
}
16088+
return result;
16089+
} else {
16090+
size_t result_usize = bigint_clz(&op->value->data.x_bigint, int_type->data.integral.bit_count);
16091+
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result_usize);
16092+
}
1599316093
}
1599416094

15995-
ZigType *return_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
16095+
ZigType *return_type = is_vector ? get_vector_type(ira->codegen, vector_len, smallest_type) : smallest_type;
1599616096
return ir_build_clz_gen(ira, instruction->base.scope, instruction->base.source_node, return_type, op);
1599716097
}
1599816098

1599916099
static Stage1AirInst *ir_analyze_instruction_pop_count(IrAnalyze *ira, Stage1ZirInstPopCount *instruction) {
16100+
Error err;
16101+
1600016102
ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
1600116103
if (type_is_invalid(int_type))
1600216104
return ira->codegen->invalid_inst_gen;
1600316105

16004-
Stage1AirInst *op = ir_implicit_cast(ira, instruction->op->child, int_type);
16106+
Stage1AirInst *uncasted_op = instruction->op->child;
16107+
if (type_is_invalid(uncasted_op->value->type))
16108+
return ira->codegen->invalid_inst_gen;
16109+
16110+
uint32_t vector_len = UINT32_MAX; // means not a vector
16111+
if (uncasted_op->value->type->id == ZigTypeIdArray) {
16112+
bool can_be_vec_elem;
16113+
if ((err = is_valid_vector_elem_type(ira->codegen, uncasted_op->value->type->data.array.child_type,
16114+
&can_be_vec_elem)))
16115+
{
16116+
return ira->codegen->invalid_inst_gen;
16117+
}
16118+
if (can_be_vec_elem) {
16119+
vector_len = uncasted_op->value->type->data.array.len;
16120+
}
16121+
} else if (uncasted_op->value->type->id == ZigTypeIdVector) {
16122+
vector_len = uncasted_op->value->type->data.vector.len;
16123+
}
16124+
16125+
bool is_vector = (vector_len != UINT32_MAX);
16126+
ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
16127+
16128+
Stage1AirInst *op = ir_implicit_cast(ira, uncasted_op, op_type);
1600516129
if (type_is_invalid(op->value->type))
1600616130
return ira->codegen->invalid_inst_gen;
1600716131

1600816132
if (int_type->data.integral.bit_count == 0)
1600916133
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, 0);
1601016134

16135+
ZigType *smallest_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
16136+
1601116137
if (instr_is_comptime(op)) {
1601216138
ZigValue *val = ir_resolve_const(ira, op, UndefOk);
1601316139
if (val == nullptr)
1601416140
return ira->codegen->invalid_inst_gen;
1601516141
if (val->special == ConstValSpecialUndef)
1601616142
return ir_const_undef(ira, instruction->base.scope, instruction->base.source_node, ira->codegen->builtin_types.entry_num_lit_int);
16143+
16144+
if (is_vector) {
16145+
ZigType *smallest_vec_type = get_vector_type(ira->codegen, vector_len, smallest_type);
16146+
Stage1AirInst *result = ir_const(ira, instruction->base.scope, instruction->base.source_node, smallest_vec_type);
16147+
expand_undef_array(ira->codegen, val);
16148+
result->value->data.x_array.data.s_none.elements = ira->codegen->pass1_arena->allocate<ZigValue>(smallest_vec_type->data.vector.len);
16149+
for (unsigned i = 0; i < smallest_vec_type->data.vector.len; i += 1) {
16150+
ZigValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
16151+
if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
16152+
op_elem_val, UndefOk)))
16153+
{
16154+
return ira->codegen->invalid_inst_gen;
16155+
}
16156+
ZigValue *result_elem_val = &result->value->data.x_array.data.s_none.elements[i];
16157+
result_elem_val->type = smallest_type;
16158+
result_elem_val->special = op_elem_val->special;
16159+
if (op_elem_val->special == ConstValSpecialUndef)
16160+
continue;
1601716161

16018-
if (bigint_cmp_zero(&val->data.x_bigint) != CmpLT) {
16019-
size_t result = bigint_popcount_unsigned(&val->data.x_bigint);
16162+
if (bigint_cmp_zero(&op_elem_val->data.x_bigint) != CmpLT) {
16163+
size_t value = bigint_popcount_unsigned(&op_elem_val->data.x_bigint);
16164+
bigint_init_unsigned(&result->value->data.x_array.data.s_none.elements[i].data.x_bigint, value);
16165+
}
16166+
size_t value = bigint_popcount_signed(&op_elem_val->data.x_bigint, int_type->data.integral.bit_count);
16167+
bigint_init_unsigned(&result->value->data.x_array.data.s_none.elements[i].data.x_bigint, value);
16168+
}
16169+
return result;
16170+
} else {
16171+
if (bigint_cmp_zero(&val->data.x_bigint) != CmpLT) {
16172+
size_t result = bigint_popcount_unsigned(&val->data.x_bigint);
16173+
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result);
16174+
}
16175+
size_t result = bigint_popcount_signed(&val->data.x_bigint, int_type->data.integral.bit_count);
1602016176
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result);
1602116177
}
16022-
size_t result = bigint_popcount_signed(&val->data.x_bigint, int_type->data.integral.bit_count);
16023-
return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result);
1602416178
}
1602516179

16026-
ZigType *return_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
16180+
ZigType *return_type = is_vector ? get_vector_type(ira->codegen, vector_len, smallest_type) : smallest_type;
1602716181
return ir_build_pop_count_gen(ira, instruction->base.scope, instruction->base.source_node, return_type, op);
1602816182
}
1602916183

0 commit comments

Comments
 (0)