Skip to content

Commit 7596848

Browse files
authored
implement a better statement selection logic (#654)
Specifically, this commit aims to review the implementation of `add_control_flow!` and improves its accuracy. Ideally, it should pass JET's existing test cases as well as the newly added ones, including the test cases from JuliaDebug/LoweredCodeUtils.jl#99. The goal is to share the same high-precision CFG selection logic between LoweredCodeUtils and JET. The new algorithm is based on what was proposed in [^Wei84]. If there is even one active block in the blocks reachable from a conditional branch up to its successors' nearest common post-dominator (referred to as **INFL** in the paper), it is necessary to follow that conditional branch and execute the code. Otherwise, execution can be short-circuited from the conditional branch to the nearest common post-dominator. COMBAK: It is important to note that in Julia's IR (`CodeInfo`), "short-circuiting" a specific code region is not a simple task. Simply ignoring the path to the post-dominator does not guarantee fall-through to the post-dominator. Therefore, a more careful implementation is required for this aspect. [Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984.
1 parent 2dfcd4e commit 7596848

File tree

3 files changed

+292
-79
lines changed

3 files changed

+292
-79
lines changed

src/JET.jl

+9-8
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,15 @@ using Core: Builtin, IntrinsicFunction, Intrinsics, SimpleVector, svec
3636
using Core.IR
3737

3838
using .CC: @nospecs, ,
39-
AbstractInterpreter, AbstractLattice, ArgInfo, Bottom, CFG, CachedMethodTable, CallMeta,
40-
ConstCallInfo, InferenceParams, InferenceResult, InferenceState, InternalMethodTable,
41-
InvokeCallInfo, MethodCallResult, MethodMatchInfo, MethodMatches, NOT_FOUND,
42-
OptimizationState, OptimizationParams, OverlayMethodTable, StmtInfo, UnionSplitInfo,
43-
UnionSplitMethodMatches, VarState, VarTable, WorldRange, WorldView,
44-
argextype, argtype_by_index, argtypes_to_type, hasintersect, ignorelimited,
45-
instanceof_tfunc, istopfunction, singleton_type, slot_id, specialize_method,
46-
tmeet, tmerge, typeinf_lattice, widenconst, widenlattice
39+
AbstractInterpreter, AbstractLattice, ArgInfo, BasicBlock, Bottom, CFG, CachedMethodTable,
40+
CallMeta, ConstCallInfo, InferenceParams, InferenceResult, InferenceState,
41+
InternalMethodTable, InvokeCallInfo, MethodCallResult, MethodMatchInfo, MethodMatches,
42+
NOT_FOUND, OptimizationState, OptimizationParams, OverlayMethodTable, StmtInfo,
43+
UnionSplitInfo, UnionSplitMethodMatches, VarState, VarTable, WorldRange, WorldView,
44+
argextype, argtype_by_index, argtypes_to_type, compute_basic_blocks, construct_domtree,
45+
construct_postdomtree, hasintersect, ignorelimited, instanceof_tfunc, istopfunction,
46+
nearest_common_dominator, singleton_type, slot_id, specialize_method, tmeet, tmerge,
47+
typeinf_lattice, widenconst, widenlattice
4748

4849
using Base: IdSet, get_world_counter
4950

src/toplevel/virtualprocess.jl

+172-62
Original file line numberDiff line numberDiff line change
@@ -1091,16 +1091,44 @@ end
10911091

10921092
# select statements that should be concretized, and actually interpreted rather than abstracted
10931093
function select_statements(mod::Module, src::CodeInfo)
1094-
stmts = src.code
10951094
cl = LoweredCodeUtils.CodeLinks(mod, src) # make `CodeEdges` hold `CodeLinks`?
10961095
edges = LoweredCodeUtils.CodeEdges(src, cl)
1097-
1098-
concretize = falses(length(stmts))
1099-
1100-
select_direct_requirement!(concretize, stmts, edges)
1101-
1096+
concretize = falses(length(src.code))
1097+
select_direct_requirement!(concretize, src.code, edges)
11021098
select_dependencies!(concretize, src, edges, cl)
1099+
return concretize
1100+
end
11031101

1102+
# just for testing, and debugging
1103+
function select_statements(mod::Module, src::CodeInfo, names::Symbol...)
1104+
cl = LoweredCodeUtils.CodeLinks(mod, src) # make `CodeEdges` hold `CodeLinks`?
1105+
edges = LoweredCodeUtils.CodeEdges(src, cl)
1106+
concretize = falses(length(src.code))
1107+
objs = Set{GlobalRef}(GlobalRef(mod, name) for name in names)
1108+
LoweredCodeUtils.add_requests!(concretize, objs, edges, ())
1109+
select_dependencies!(concretize, src, edges, cl)
1110+
return concretize
1111+
end
1112+
function select_statements(mod::Module, src::CodeInfo, slots::SlotNumber...)
1113+
cl = LoweredCodeUtils.CodeLinks(mod, src) # make `CodeEdges` hold `CodeLinks`?
1114+
edges = LoweredCodeUtils.CodeEdges(src, cl)
1115+
concretize = falses(length(src.code))
1116+
for slot in slots
1117+
for d in cl.slotassigns[slot.id]
1118+
concretize[d] = true
1119+
end
1120+
end
1121+
select_dependencies!(concretize, src, edges, cl)
1122+
return concretize
1123+
end
1124+
function select_statements(mod::Module, src::CodeInfo, idxs::Int...)
1125+
cl = LoweredCodeUtils.CodeLinks(mod, src) # make `CodeEdges` hold `CodeLinks`?
1126+
edges = LoweredCodeUtils.CodeEdges(src, cl)
1127+
concretize = falses(length(src.code))
1128+
for idx = idxs
1129+
concretize[idx] |= true
1130+
end
1131+
select_dependencies!(concretize, src, edges, cl)
11041132
return concretize
11051133
end
11061134

@@ -1173,66 +1201,41 @@ end
11731201

11741202
# The goal of this function is to request concretization of the minimal necessary control
11751203
# flow to evaluate statements whose concretization have already been requested.
1176-
# The basic approach is to check if there are any active successors for each basic block,
1177-
# and if there is an active successor and the terminator is not a fall-through, then request
1178-
# the concretization of that terminator. Additionally, for conditional terminators, a simple
1179-
# optimization using post-domination analysis is also performed.
1180-
function add_control_flow!(concretize::BitVector, src::CodeInfo, cfg::CFG, postdomtree)
1204+
# The basic algorithm is based on what was proposed in [^Wei84]. If there is even one active
1205+
# block in the blocks reachable from a conditional branch up to its successors' nearest
1206+
# common post-dominator (referred to as 𝑰𝑵𝑭𝑳 in the paper), it is necessary to follow
1207+
# that conditional branch and execute the code. Otherwise, execution can be short-circuited
1208+
# from the conditional branch to the nearest common post-dominator.
1209+
#
1210+
# COMBAK: It is important to note that in Julia's intermediate code representation (`CodeInfo`),
1211+
# "short-circuiting" a specific code region is not a simple task. Simply ignoring the path
1212+
# to the post-dominator does not guarantee fall-through to the post-dominator. Therefore,
1213+
# a more careful implementation is required for this aspect.
1214+
#
1215+
# [Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984.
1216+
function add_control_flow!(concretize::BitVector, src::CodeInfo, cfg::CFG, domtree, postdomtree)
11811217
local changed::Bool = false
11821218
function mark_concretize!(idx::Int)
11831219
if !concretize[idx]
1184-
concretize[idx] = true
1220+
changed |= concretize[idx] = true
11851221
return true
11861222
end
11871223
return false
11881224
end
1189-
nblocks = length(cfg.blocks)
1190-
for bbidx = 1:nblocks
1191-
bb = cfg.blocks[bbidx] # forward traversal
1225+
for bbidx = 1:length(cfg.blocks) # forward traversal
1226+
bb = cfg.blocks[bbidx]
11921227
nsuccs = length(bb.succs)
11931228
if nsuccs == 0
11941229
continue
11951230
elseif nsuccs == 1
1196-
terminator_idx = bb.stmts[end]
1197-
if src.code[terminator_idx] isa GotoNode
1198-
# If the destination of this `GotoNode` is not active, it's fine to ignore
1199-
# the control flow caused by this `GotoNode` and treat it as a fall-through.
1200-
# If the block that is fallen through to is active and has a dependency on
1201-
# this goto block, then the concretization of this goto block should already
1202-
# be requested (at some point of the higher concretization convergence cycle
1203-
# of `select_dependencies`), and thus, this `GotoNode` will be concretized.
1204-
if any(@view concretize[cfg.blocks[only(bb.succs)].stmts])
1205-
changed |= mark_concretize!(terminator_idx)
1206-
end
1207-
end
1231+
continue # leave a fall-through terminator unmarked: `GotoNode`s are marked later
12081232
elseif nsuccs == 2
1209-
terminator_idx = bb.stmts[end]
1210-
@assert is_conditional_terminator(src.code[terminator_idx]) "invalid IR"
1211-
succ1, succ2 = bb.succs
1212-
succ1_req = any(@view concretize[cfg.blocks[succ1].stmts])
1213-
succ2_req = any(@view concretize[cfg.blocks[succ2].stmts])
1214-
if succ1_req
1215-
if succ2_req
1216-
changed |= mark_concretize!(terminator_idx)
1217-
else
1218-
active_bb, inactive_bb = succ1, succ2
1219-
@goto asymmetric_case
1220-
end
1221-
elseif succ2_req
1222-
active_bb, inactive_bb = succ2, succ1
1223-
@label asymmetric_case
1224-
# We can ignore the control flow of this conditional terminator and treat
1225-
# it as a fall-through if only one of its successors is active and the
1226-
# active block post-dominates the inactive one, since the post-domination
1227-
# ensures that the active basic block will be reached regardless of the
1228-
# control flow.
1229-
if CC.postdominates(postdomtree, active_bb, inactive_bb)
1230-
# fall through this block
1231-
else
1232-
changed |= mark_concretize!(terminator_idx)
1233-
end
1233+
termidx = bb.stmts[end]
1234+
@assert is_conditional_terminator(src.code[termidx]) "invalid IR"
1235+
if is_conditional_block_active(concretize, bb, cfg, postdomtree)
1236+
mark_concretize!(termidx)
12341237
else
1235-
# both successors are inactive, just fall through this block
1238+
# fall-through to the post dominator block (by short-circuiting all statements between)
12361239
end
12371240
end
12381241
end
@@ -1242,6 +1245,46 @@ end
12421245
is_conditional_terminator(@nospecialize stmt) = stmt isa GotoIfNot ||
12431246
(@static @isdefined(EnterNode) ? stmt isa EnterNode : isexpr(stmt, :enter))
12441247

1248+
function is_conditional_block_active(concretize::BitVector, bb::BasicBlock, cfg::CFG, postdomtree)
1249+
return visit_𝑰𝑵𝑭𝑳_blocks(bb, cfg, postdomtree) do postdominator::Int, 𝑰𝑵𝑭𝑳::BitSet
1250+
for blk in 𝑰𝑵𝑭𝑳
1251+
if blk == postdominator
1252+
continue # skip the post-dominator block and continue to a next infl block
1253+
end
1254+
if any(@view concretize[cfg.blocks[blk].stmts])
1255+
return true
1256+
end
1257+
end
1258+
return false
1259+
end
1260+
end
1261+
1262+
function visit_𝑰𝑵𝑭𝑳_blocks(func, bb::BasicBlock, cfg::CFG, postdomtree)
1263+
succ1, succ2 = bb.succs
1264+
postdominator = nearest_common_dominator(postdomtree, succ1, succ2)
1265+
inflblks = reachable_blocks(cfg, succ1, postdominator) reachable_blocks(cfg, succ2, postdominator)
1266+
return func(postdominator, inflblks)
1267+
end
1268+
1269+
function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Int)
1270+
worklist = Int[from_bb]
1271+
visited = BitSet(from_bb)
1272+
if to_bb == from_bb
1273+
return visited
1274+
end
1275+
push!(visited, to_bb)
1276+
function visit!(bb::Int)
1277+
if bb visited
1278+
push!(visited, bb)
1279+
push!(worklist, bb)
1280+
end
1281+
end
1282+
while !isempty(worklist)
1283+
foreach(visit!, cfg.blocks[pop!(worklist)].succs)
1284+
end
1285+
return visited
1286+
end
1287+
12451288
function add_required_inplace!(concretize::BitVector, src::CodeInfo, edges, cl)
12461289
changed = false
12471290
for i = 1:length(src.code)
@@ -1272,31 +1315,98 @@ function is_arg_requested(@nospecialize(arg), concretize, edges, cl)
12721315
return false
12731316
end
12741317

1318+
# The purpose of this function is to find other statements that affect the execution of the
1319+
# statements choosen by `select_direct_dependencies!`. In other words, it extracts the
1320+
# minimal amount of code necessary to realize the required concretization.
1321+
# This technique is generally referred to as "program slicing," and specifically as
1322+
# "static program slicing". The basic algorithm implemented here is an extension of the one
1323+
# proposed in https://dl.acm.org/doi/10.5555/800078.802557, which is especially tuned for
1324+
# Julia's intermediate code representation.
12751325
function select_dependencies!(concretize::BitVector, src::CodeInfo, edges, cl)
12761326
typedefs = LoweredCodeUtils.find_typedefs(src)
1277-
cfg = CC.compute_basic_blocks(src.code)
1278-
postdomtree = CC.construct_postdomtree(cfg.blocks)
1327+
cfg = compute_basic_blocks(src.code)
1328+
domtree = construct_domtree(cfg.blocks)
1329+
postdomtree = construct_postdomtree(cfg.blocks)
12791330

12801331
while true
12811332
changed = false
12821333

1283-
# discover struct/method definitions at the beginning,
1284-
# and propagate the definition requirements by tracking SSA precedessors
1334+
# Discover Dtruct/method definitions at the beginning,
1335+
# and propagate the definition requirements by tracking SSA precedessors.
1336+
# (TODO maybe hoist this out of the loop?)
12851337
changed |= LoweredCodeUtils.add_typedefs!(concretize, src, edges, typedefs, ())
12861338
changed |= add_ssa_preds!(concretize, src, edges, ())
12871339

1288-
# mark some common inplace operations like `push!(x, ...)` and `setindex!(x, ...)`
1289-
# when `x` has been marked already: otherwise we may end up using it with invalid state
1340+
# Mark some common inplace operations like `push!(x, ...)` and `setindex!(x, ...)`
1341+
# when `x` has been marked already: otherwise we may end up using it with invalid state.
1342+
# However, note that this is an incomplete approach, and note that the slice created
1343+
# by this routine will not be sound because of this. This is because
1344+
# `add_required_inplace!` only requires certain special-cased function calls and
1345+
# does not take into account the possibility that arguments may be mutated in
1346+
# arbitrary function calls. Ideally, function calls should be required while
1347+
# considering the effects of these statements, or by some sort of an
1348+
# inter-procedural program slicing
12901349
changed |= add_required_inplace!(concretize, src, edges, cl)
12911350
changed |= add_ssa_preds!(concretize, src, edges, ())
12921351

1293-
# mark necessary control flows,
1294-
# and propagate the definition requirements by tracking SSA precedessors
1295-
changed |= add_control_flow!(concretize, src, cfg, postdomtree)
1352+
# Mark necessary control flows.
1353+
changed |= add_control_flow!(concretize, src, cfg, domtree, postdomtree)
12961354
changed |= add_ssa_preds!(concretize, src, edges, ())
12971355

12981356
changed || break
12991357
end
1358+
1359+
# now mark the active goto nodes
1360+
add_active_gotos!(concretize, src, cfg, postdomtree)
1361+
1362+
nothing
1363+
end
1364+
1365+
function add_active_gotos!(concretize::BitVector, src::CodeInfo, cfg::CFG, postdomtree)
1366+
dead_blocks = compute_dead_blocks(concretize, src, cfg, postdomtree)
1367+
changed = false
1368+
for bbidx = 1:length(cfg.blocks)
1369+
if bbidx dead_blocks
1370+
bb = cfg.blocks[bbidx]
1371+
nsuccs = length(bb.succs)
1372+
if nsuccs == 1
1373+
termidx = bb.stmts[end]
1374+
if src.code[termidx] isa GotoNode
1375+
changed |= concretize[termidx] = true
1376+
end
1377+
end
1378+
end
1379+
end
1380+
return changed
1381+
end
1382+
1383+
# find dead blocks using the same approach as `add_control_flow!`, for the converged `concretize`
1384+
function compute_dead_blocks(concretize::BitVector, src::CodeInfo, cfg::CFG, postdomtree)
1385+
dead_blocks = BitSet()
1386+
for bbidx = 1:length(cfg.blocks)
1387+
bb = cfg.blocks[bbidx]
1388+
nsuccs = length(bb.succs)
1389+
if nsuccs == 2
1390+
termidx = bb.stmts[end]
1391+
@assert is_conditional_terminator(src.code[termidx]) "invalid IR"
1392+
visit_𝑰𝑵𝑭𝑳_blocks(bb, cfg, postdomtree) do postdominator::Int, 𝑰𝑵𝑭𝑳::BitSet
1393+
is_active_inflblks = false
1394+
for blk in 𝑰𝑵𝑭𝑳
1395+
if blk == postdominator
1396+
continue # skip the post-dominator block and continue to a next infl block
1397+
end
1398+
if any(@view concretize[cfg.blocks[blk].stmts])
1399+
is_active_inflblks |= true
1400+
break
1401+
end
1402+
end
1403+
if !is_active_inflblks
1404+
union!(dead_blocks, delete!(𝑰𝑵𝑭𝑳, postdominator))
1405+
end
1406+
end
1407+
end
1408+
end
1409+
return dead_blocks
13001410
end
13011411

13021412
function JuliaInterpreter.step_expr!(interp::ConcreteInterpreter, frame::Frame, @nospecialize(node), istoplevel::Bool)

0 commit comments

Comments
 (0)