Skip to content

Commit

Permalink
compiler: simplify generic functions, fix issues with inline calls
Browse files Browse the repository at this point in the history
The original motivation here was to fix regressions caused by #22414.
However, while working on this, I ended up discussing a language
simplification with Andrew, which changes things a little from how they
worked before #22414.

The main user-facing change here is that any reference to a prior
function parameter, even if potentially comptime-known at the usage
site or even not analyzed, now makes a function generic. This applies
even if the parameter being referenced is not a `comptime` parameter,
since it could still be populated when performing an inline call. This
is a breaking language change.

The detection of this is done in AstGen; when evaluating a parameter
type or return type, we track whether it referenced any prior parameter,
and if so, we mark this type as being "generic" in ZIR. This will cause
Sema to not evaluate it until the time of instantiation or inline call.

A lovely consequence of this from an implementation perspective is that
it eliminates the need for most of the "generic poison" system. In
particular, `error.GenericPoison` is now completely unnecessary, because
we identify generic expressions earlier in the pipeline; this simplifies
the compiler and avoids redundant work. This also entirely eliminates
the concept of the "generic poison value". The only remnant of this
system is the "generic poison type" (`Type.generic_poison` and
`InternPool.Index.generic_poison_type`). This type is used in two
places:

* During semantic analysis, to represent an unknown result type.
* When storing generic function types, to represent a generic parameter/return type.

It's possible that these use cases should instead use `.none`, but I
leave that investigation to a future adventurer.

One last thing. Prior to #22414, inline calls were a little inefficient,
because they re-evaluated even non-generic parameter types whenever they
were called. Changing this behavior is what ultimately led to #22538.
Well, because the new logic will mark a type expression as generic if
there is any change its resolved type could differ in an inline call,
this redundant work is unnecessary! So, this is another way in which the
new design reduces redundant work and complexity.

Resolves: #22494
Resolves: #22532
Resolves: #22538
  • Loading branch information
mlugg committed Jan 21, 2025
1 parent 216e0f3 commit 0ec6b2d
Show file tree
Hide file tree
Showing 23 changed files with 265 additions and 378 deletions.
34 changes: 30 additions & 4 deletions lib/std/zig/AstGen.zig
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ fn setExtra(astgen: *AstGen, index: usize, extra: anytype) void {
Zir.Inst.SwitchBlock.Bits,
Zir.Inst.SwitchBlockErrUnion.Bits,
Zir.Inst.FuncFancy.Bits,
Zir.Inst.Param.Type,
Zir.Inst.Func.RetTy,
=> @bitCast(@field(extra, field.name)),

else => @compileError("bad field type"),
Expand Down Expand Up @@ -1384,7 +1386,7 @@ fn fnProtoExprInner(
const tag: Zir.Inst.Tag = if (is_comptime) .param_comptime else .param;
// We pass `prev_param_insts` as `&.{}` here because a function prototype can't refer to previous
// arguments (we haven't set up scopes here).
const param_inst = try block_scope.addParam(&param_gz, &.{}, tag, name_token, param_name);
const param_inst = try block_scope.addParam(&param_gz, &.{}, false, tag, name_token, param_name);
assert(param_inst_expected == param_inst);
}
}
Expand Down Expand Up @@ -1416,6 +1418,7 @@ fn fnProtoExprInner(

.ret_param_refs = &.{},
.param_insts = &.{},
.ret_ty_is_generic = false,

.param_block = block_inst,
.body_gz = null,
Expand Down Expand Up @@ -4336,6 +4339,9 @@ fn fnDeclInner(
// Note that the capacity here may not be sufficient, as this does not include `anytype` parameters.
var param_insts: std.ArrayListUnmanaged(Zir.Inst.Index) = try .initCapacity(astgen.arena, fn_proto.ast.params.len);

// We use this as `is_used_or_discarded` to figure out if parameters / return types are generic.
var any_param_used = false;

var noalias_bits: u32 = 0;
var params_scope = scope;
const is_var_args = is_var_args: {
Expand Down Expand Up @@ -4409,16 +4415,18 @@ fn fnDeclInner(
} else param: {
const param_type_node = param.type_expr;
assert(param_type_node != 0);
any_param_used = false; // we will check this later
var param_gz = decl_gz.makeSubBlock(scope);
defer param_gz.unstack();
const param_type = try fullBodyExpr(&param_gz, params_scope, coerced_type_ri, param_type_node, .normal);
const param_inst_expected: Zir.Inst.Index = @enumFromInt(astgen.instructions.len + 1);
_ = try param_gz.addBreakWithSrcNode(.break_inline, param_inst_expected, param_type, param_type_node);
const param_type_is_generic = any_param_used;

const main_tokens = tree.nodes.items(.main_token);
const name_token = param.name_token orelse main_tokens[param_type_node];
const tag: Zir.Inst.Tag = if (is_comptime) .param_comptime else .param;
const param_inst = try decl_gz.addParam(&param_gz, param_insts.items, tag, name_token, param_name);
const param_inst = try decl_gz.addParam(&param_gz, param_insts.items, param_type_is_generic, tag, name_token, param_name);
assert(param_inst_expected == param_inst);
break :param param_inst.toRef();
};
Expand All @@ -4433,6 +4441,7 @@ fn fnDeclInner(
.inst = param_inst,
.token_src = param.name_token.?,
.id_cat = .@"function parameter",
.is_used_or_discarded = &any_param_used,
};
params_scope = &sub_scope.base;
try param_insts.append(astgen.arena, param_inst.toIndex().?);
Expand All @@ -4446,6 +4455,7 @@ fn fnDeclInner(

var ret_gz = decl_gz.makeSubBlock(params_scope);
defer ret_gz.unstack();
any_param_used = false; // we will check this later
const ret_ref: Zir.Inst.Ref = inst: {
// Parameters are in scope for the return type, so we use `params_scope` here.
// The calling convention will not have parameters in scope, so we'll just use `scope`.
Expand All @@ -4459,6 +4469,7 @@ fn fnDeclInner(
break :inst inst;
};
const ret_body_param_refs = try astgen.fetchRemoveRefEntries(param_insts.items);
const ret_ty_is_generic = any_param_used;

// We're jumping back in source, so restore the cursor.
astgen.restoreSourceCursor(saved_cursor);
Expand Down Expand Up @@ -4556,6 +4567,7 @@ fn fnDeclInner(
.ret_ref = ret_ref,
.ret_gz = &ret_gz,
.ret_param_refs = ret_body_param_refs,
.ret_ty_is_generic = ret_ty_is_generic,
.lbrace_line = lbrace_line,
.lbrace_column = lbrace_column,
.param_block = decl_inst,
Expand Down Expand Up @@ -5028,6 +5040,7 @@ fn testDecl(

.ret_param_refs = &.{},
.param_insts = &.{},
.ret_ty_is_generic = false,

.lbrace_line = lbrace_line,
.lbrace_column = lbrace_column,
Expand Down Expand Up @@ -8546,6 +8559,8 @@ fn localVarRef(
local_val.used = ident_token;
}

if (local_val.is_used_or_discarded) |ptr| ptr.* = true;

const value_inst = if (num_namespaces_out != 0) try tunnelThroughClosure(
gz,
ident,
Expand Down Expand Up @@ -11876,6 +11891,7 @@ const Scope = struct {
/// Track the identifier where it is discarded, like this `_ = foo;`.
/// 0 means never discarded.
discarded: Ast.TokenIndex = 0,
is_used_or_discarded: ?*bool = null,
/// String table index.
name: Zir.NullTerminatedString,
id_cat: IdCat,
Expand Down Expand Up @@ -12223,6 +12239,7 @@ const GenZir = struct {

ret_param_refs: []Zir.Inst.Index,
param_insts: []Zir.Inst.Index, // refs to params in `body_gz` should still be in `astgen.ref_table`
ret_ty_is_generic: bool,

cc_ref: Zir.Inst.Ref,
ret_ref: Zir.Inst.Ref,
Expand Down Expand Up @@ -12322,6 +12339,8 @@ const GenZir = struct {

.has_cc_body = cc_body.len != 0,
.has_ret_ty_body = ret_body.len != 0,

.ret_ty_is_generic = args.ret_ty_is_generic,
},
});

Expand Down Expand Up @@ -12372,7 +12391,10 @@ const GenZir = struct {

const payload_index = astgen.addExtraAssumeCapacity(Zir.Inst.Func{
.param_block = args.param_block,
.ret_body_len = ret_body_len,
.ret_ty = .{
.body_len = @intCast(ret_body_len),
.is_generic = args.ret_ty_is_generic,
},
.body_len = body_len,
});
const zir_datas = astgen.instructions.items(.data);
Expand Down Expand Up @@ -12535,6 +12557,7 @@ const GenZir = struct {
/// Previous parameters, which might be referenced in `param_gz` (the new parameter type).
/// `ref`s of these instructions will be put into this param's type body, and removed from `AstGen.ref_table`.
prev_param_insts: []const Zir.Inst.Index,
ty_is_generic: bool,
tag: Zir.Inst.Tag,
/// Absolute token index. This function does the conversion to Decl offset.
abs_tok_index: Ast.TokenIndex,
Expand All @@ -12548,7 +12571,10 @@ const GenZir = struct {

const payload_index = gz.astgen.addExtraAssumeCapacity(Zir.Inst.Param{
.name = name,
.body_len = @intCast(body_len),
.type = .{
.body_len = @intCast(body_len),
.is_generic = ty_is_generic,
},
});
gz.astgen.appendBodyWithFixupsExtraRefsArrayList(&gz.astgen.extra, param_body, prev_param_insts);
param_gz.unstack();
Expand Down
56 changes: 38 additions & 18 deletions lib/std/zig/Zir.zig
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ pub fn extraData(code: Zir, comptime T: type, index: usize) ExtraData(T) {
Inst.SwitchBlockErrUnion.Bits,
Inst.FuncFancy.Bits,
Inst.Declaration.Flags,
Inst.Param.Type,
Inst.Func.RetTy,
=> @bitCast(code.extra[i]),

else => @compileError("bad field type"),
Expand Down Expand Up @@ -2126,7 +2128,7 @@ pub const Inst = struct {
ref_start_index = static_len,
_,

pub const static_len = 71;
pub const static_len = 70;

pub fn toRef(i: Index) Inst.Ref {
return @enumFromInt(@intFromEnum(Index.ref_start_index) + @intFromEnum(i));
Expand Down Expand Up @@ -2229,7 +2231,6 @@ pub const Inst = struct {
bool_true,
bool_false,
empty_tuple,
generic_poison,

/// This Ref does not correspond to any ZIR instruction or constant
/// value and may instead be used as a sentinel to indicate null.
Expand Down Expand Up @@ -2472,24 +2473,31 @@ pub const Inst = struct {
};

/// Trailing:
/// if (ret_body_len == 1) {
/// if (ret_ty.body_len == 1) {
/// 0. return_type: Ref
/// }
/// if (ret_body_len > 1) {
/// 1. return_type: Index // for each ret_body_len
/// if (ret_ty.body_len > 1) {
/// 1. return_type: Index // for each ret_ty.body_len
/// }
/// 2. body: Index // for each body_len
/// 3. src_locs: SrcLocs // if body_len != 0
/// 4. proto_hash: std.zig.SrcHash // if body_len != 0; hash of function prototype
pub const Func = struct {
/// If this is 0 it means a void return type.
/// If this is 1 it means return_type is a simple Ref
ret_body_len: u32,
ret_ty: RetTy,
/// Points to the block that contains the param instructions for this function.
/// If this is a `declaration`, it refers to the declaration's value body.
param_block: Index,
body_len: u32,

pub const RetTy = packed struct(u32) {
/// 0 means `void`.
/// 1 means the type is a simple `Ref`.
/// Otherwise, the length of a trailing body.
body_len: u31,
/// Whether the return type is generic, i.e. refers to one or more previous parameters.
is_generic: bool,
};

pub const SrcLocs = struct {
/// Line index in the source file relative to the parent decl.
lbrace_line: u32,
Expand Down Expand Up @@ -2539,7 +2547,8 @@ pub const Inst = struct {
has_ret_ty_ref: bool,
has_ret_ty_body: bool,
has_any_noalias: bool,
_: u24 = undefined,
ret_ty_is_generic: bool,
_: u23 = undefined,
};
};

Expand Down Expand Up @@ -3708,8 +3717,14 @@ pub const Inst = struct {
pub const Param = struct {
/// Null-terminated string index.
name: NullTerminatedString,
/// The body contains the type of the parameter.
body_len: u32,
type: Type,

pub const Type = packed struct(u32) {
/// The body contains the type of the parameter.
body_len: u31,
/// Whether the type is generic, i.e. refers to one or more previous parameters.
is_generic: bool,
};
};

/// Trailing:
Expand Down Expand Up @@ -4492,19 +4507,19 @@ fn findTrackableInner(

if (extra.data.body_len == 0) {
// This is just a prototype. No need to track.
assert(extra.data.ret_body_len < 2);
assert(extra.data.ret_ty.body_len < 2);
return;
}

assert(contents.func_decl == null);
contents.func_decl = inst;

var extra_index: usize = extra.end;
switch (extra.data.ret_body_len) {
switch (extra.data.ret_ty.body_len) {
0 => {},
1 => extra_index += 1,
else => {
const body = zir.bodySlice(extra_index, extra.data.ret_body_len);
const body = zir.bodySlice(extra_index, extra.data.ret_ty.body_len);
extra_index += body.len;
try zir.findTrackableBody(gpa, contents, defers, body);
},
Expand Down Expand Up @@ -4595,7 +4610,7 @@ fn findTrackableInner(
.param, .param_comptime => {
const inst_data = datas[@intFromEnum(inst)].pl_tok;
const extra = zir.extraData(Inst.Param, inst_data.payload_index);
const body = zir.bodySlice(extra.end, extra.data.body_len);
const body = zir.bodySlice(extra.end, extra.data.type.body_len);
try zir.findTrackableBody(gpa, contents, defers, body);
},

Expand Down Expand Up @@ -4738,6 +4753,7 @@ pub const FnInfo = struct {
ret_ty_body: []const Inst.Index,
body: []const Inst.Index,
ret_ty_ref: Zir.Inst.Ref,
ret_ty_is_generic: bool,
total_params_len: u32,
inferred_error_set: bool,
};
Expand Down Expand Up @@ -4779,6 +4795,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
body: []const Inst.Index,
ret_ty_ref: Inst.Ref,
ret_ty_body: []const Inst.Index,
ret_ty_is_generic: bool,
ies: bool,
} = switch (tags[@intFromEnum(fn_inst)]) {
.func, .func_inferred => |tag| blk: {
Expand All @@ -4789,7 +4806,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
var ret_ty_ref: Inst.Ref = .none;
var ret_ty_body: []const Inst.Index = &.{};

switch (extra.data.ret_body_len) {
switch (extra.data.ret_ty.body_len) {
0 => {
ret_ty_ref = .void_type;
},
Expand All @@ -4798,7 +4815,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
extra_index += 1;
},
else => {
ret_ty_body = zir.bodySlice(extra_index, extra.data.ret_body_len);
ret_ty_body = zir.bodySlice(extra_index, extra.data.ret_ty.body_len);
extra_index += ret_ty_body.len;
},
}
Expand All @@ -4811,6 +4828,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
.ret_ty_ref = ret_ty_ref,
.ret_ty_body = ret_ty_body,
.body = body,
.ret_ty_is_generic = extra.data.ret_ty.is_generic,
.ies = tag == .func_inferred,
};
},
Expand Down Expand Up @@ -4848,6 +4866,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
.ret_ty_ref = ret_ty_ref,
.ret_ty_body = ret_ty_body,
.body = body,
.ret_ty_is_generic = extra.data.bits.ret_ty_is_generic,
.ies = extra.data.bits.is_inferred_error,
};
},
Expand All @@ -4870,6 +4889,7 @@ pub fn getFnInfo(zir: Zir, fn_inst: Inst.Index) FnInfo {
.ret_ty_ref = info.ret_ty_ref,
.body = info.body,
.total_params_len = total_params_len,
.ret_ty_is_generic = info.ret_ty_is_generic,
.inferred_error_set = info.ies,
};
}
Expand Down Expand Up @@ -4967,7 +4987,7 @@ pub fn getAssociatedSrcHash(zir: Zir, inst: Zir.Inst.Index) ?std.zig.SrcHash {
return null;
}
const extra_index = extra.end +
extra.data.ret_body_len +
extra.data.ret_ty.body_len +
extra.data.body_len +
@typeInfo(Inst.Func.SrcLocs).@"struct".fields.len;
return @bitCast([4]u32{
Expand Down
1 change: 0 additions & 1 deletion src/Air.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,6 @@ pub const Inst = struct {
bool_true = @intFromEnum(InternPool.Index.bool_true),
bool_false = @intFromEnum(InternPool.Index.bool_false),
empty_tuple = @intFromEnum(InternPool.Index.empty_tuple),
generic_poison = @intFromEnum(InternPool.Index.generic_poison),

/// This Ref does not correspond to any AIR instruction or constant
/// value and may instead be used as a sentinel to indicate null.
Expand Down
5 changes: 2 additions & 3 deletions src/Air/types_resolved.zig
Original file line number Diff line number Diff line change
Expand Up @@ -455,9 +455,8 @@ pub fn checkVal(val: Value, zcu: *Zcu) bool {

pub fn checkType(ty: Type, zcu: *Zcu) bool {
const ip = &zcu.intern_pool;
return switch (ty.zigTypeTagOrPoison(zcu) catch |err| switch (err) {
error.GenericPoison => return true,
}) {
if (ty.isGenericPoison()) return true;
return switch (ty.zigTypeTag(zcu)) {
.type,
.void,
.bool,
Expand Down
Loading

0 comments on commit 0ec6b2d

Please sign in to comment.