diff --git a/charon/src/transform/control_flow/ullbc_to_llbc.rs b/charon/src/transform/control_flow/ullbc_to_llbc.rs index a8a83d20b..47354a65f 100644 --- a/charon/src/transform/control_flow/ullbc_to_llbc.rs +++ b/charon/src/transform/control_flow/ullbc_to_llbc.rs @@ -20,7 +20,7 @@ use petgraph::visit::{ }; use smallvec::SmallVec; use std::cmp::Reverse; -use std::collections::{HashMap, HashSet}; +use std::collections::HashSet; use std::mem; use crate::common::ensure_sufficient_stack; @@ -166,7 +166,7 @@ struct Irreducible(BlockId); impl<'a> CfgInfo<'a> { /// Build the CFGs (the "regular" CFG and the CFG without backward edges) and /// compute some information like the loop entries and the switch blocks. - fn build(body: &'a src::BodyContents) -> Result { + fn build(ctx: &TransformCtx, body: &'a src::BodyContents) -> Result { let start_block = BlockId::ZERO; let empty_flow = body.map_ref(|_| BigRational::new(0u64.into(), 1u64.into())); @@ -303,14 +303,21 @@ impl<'a> CfgInfo<'a> { block_data[block_id].immediately_dominates = dominatees; } - Ok(CfgInfo { + let mut cfg = CfgInfo { cfg, fwd_cfg, loop_entries, switch_blocks, dominator_tree, block_data, - }) + }; + + // Pick an exit block for each loop, if we find one. + ExitInfo::compute_loop_exits(ctx, &mut cfg); + // Pick an exit block for each switch, if we find one. + ExitInfo::compute_switch_exits(&mut cfg); + + Ok(cfg) } fn block_data(&self, block_id: BlockId) -> &BlockData<'a> { @@ -414,9 +421,11 @@ struct LoopExitRank { } impl ExitInfo { - /// Compute the nodes that exit the given loop header. These are the first node on each path - /// that we find that exits the loop. - fn compute_loop_exit_candidates(cfg: &CfgInfo, loop_header: src::BlockId) -> Vec { + /// Compute the first node on each path that exits the loop. + fn compute_loop_exit_starting_points( + cfg: &CfgInfo, + loop_header: src::BlockId, + ) -> Vec { let mut loop_exits = Vec::new(); // Do a dfs from the loop header while keeping track of the path from the loop header to // the current node. @@ -468,7 +477,7 @@ impl ExitInfo { loop_header: src::BlockId, ) -> SeqHashMap { let mut loop_exits: SeqHashMap = SeqHashMap::new(); - for block_id in Self::compute_loop_exit_candidates(cfg, loop_header) { + for block_id in Self::compute_loop_exit_starting_points(cfg, loop_header) { for bid in cfg.block_data(block_id).reachable_including_self() { loop_exits .entry(bid) @@ -582,12 +591,8 @@ impl ExitInfo { /// s /// } /// ``` - fn compute_loop_exits( - ctx: &TransformCtx, - cfg: &CfgInfo, - ) -> HashMap { + fn compute_loop_exits(ctx: &TransformCtx, cfg: &mut CfgInfo) { let mut exits: HashSet = HashSet::new(); - let mut chosen_loop_exits: HashMap = HashMap::new(); // For every loop in topological order. for loop_id in cfg .loop_entries @@ -613,70 +618,87 @@ impl ExitInfo { // If candidate already selected for another loop: ignore .filter(|(candidate_id, _)| !exits.contains(candidate_id)) .max_set_by_key(|&(_, rank)| rank); - let chosen_exit = if best_exits.is_empty() { - None - } else { - // If there is exactly one best candidate, use it. Otherwise we need to split - // further. - let best_exits = best_exits.into_iter().map(|(bid, _)| bid); - match best_exits.exactly_one() { - Ok(best_exit) => Some(best_exit), - Err(best_exits) => { - // Remove the candidates which only lead to errors (panic or unreachable). - // If there is exactly one candidate we select it, otherwise we do not select any - // exit. - // We don't want to select any exit if we are in the below situation - // (all paths lead to errors). We added a sanity check below to - // catch the situations where there are several exits which don't - // lead to errors. - // - // Example: - // ======== - // ``` - // loop { - // match ls { - // List::Nil => { - // panic!() // <-- best candidate - // } - // List::Cons(x, tl) => { - // if i == 0 { - // return x; - // } else { - // ls = tl; - // i -= 1; - // } - // } - // _ => { - // unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case) - // } - // } - // } - // ``` - best_exits - .filter(|&bid| !cfg.block_data[bid].only_reach_error) - .exactly_one() - .map_err(|mut candidates| { - // Adding this sanity check so that we can see when there are several - // candidates. - let span = cfg.block_data[loop_id].contents.terminator.span; - sanity_check!(ctx, span, candidates.next().is_none()); - }) - .ok() - } + // If there is exactly one best candidate, use it. Otherwise we need to split + // further. + let chosen_exit = match best_exits.into_iter().map(|(bid, _)| bid).exactly_one() { + Ok(best_exit) => Some(best_exit), + Err(best_exits) => { + // Remove the candidates which only lead to errors (panic or unreachable). + // If there is exactly one candidate we select it, otherwise we do not select any + // exit. + // We don't want to select any exit if we are in the below situation + // (all paths lead to errors). We added a sanity check below to + // catch the situations where there are several exits which don't + // lead to errors. + // + // Example: + // ======== + // ``` + // loop { + // match ls { + // List::Nil => { + // panic!() // <-- best candidate + // } + // List::Cons(x, tl) => { + // if i == 0 { + // return x; + // } else { + // ls = tl; + // i -= 1; + // } + // } + // _ => { + // unreachable!(); // <-- best candidate (Rustc introduces an `unreachable` case) + // } + // } + // } + // ``` + best_exits + .filter(|&bid| !cfg.block_data[bid].only_reach_error) + .exactly_one() + .map_err(|mut candidates| { + // Adding this sanity check so that we can see when there are several + // candidates. + let span = cfg.block_data[loop_id].contents.terminator.span; + sanity_check!(ctx, span, candidates.next().is_none()); + }) + .ok() } }; if let Some(exit_id) = chosen_exit { exits.insert(exit_id); - chosen_loop_exits.insert(loop_id, exit_id); + cfg.block_data[loop_id].exit_info.loop_exit = Some(exit_id); } } - - // Return the chosen exits - trace!("Chosen loop exits: {:?}", chosen_loop_exits); - chosen_loop_exits } - /// See [`Self::compute`] for explanations about what "exits" are. + /// Let's consider the following piece of code: + /// ```text + /// if cond1 { ... } else { ... }; + /// if cond2 { ... } else { ... }; + /// ``` + /// Once converted to MIR, the control-flow is destructured, which means we + /// have gotos everywhere. When reconstructing the control-flow, we have + /// to be careful about the point where we should join the two branches of + /// the first if. + /// For instance, if we don't notice they should be joined at some point (i.e, + /// whatever the branch we take, there is a moment when we go to the exact + /// same place, just before the second if), we might generate code like + /// this, with some duplicata: + /// ```text + /// if cond1 { ...; if cond2 { ... } else { ...} } + /// else { ...; if cond2 { ... } else { ...} } + /// ``` + /// + /// Such a reconstructed program is valid, but it is definitely non-optimal: + /// it is very different from the original program (making it less clean and + /// clear), more bloated, and might involve duplicating the proof effort. + /// + /// For this reason, we need to find the "exit" of the first switch, which is + /// the point where the two branches join. Note that this can be a bit tricky, + /// because there may be more than two branches (if we do `switch(x) { ... }`), + /// and some of them might not join (if they contain a `break`, `panic`, + /// `return`, etc.). /// /// In order to compute the switch exits, we simply recursively compute a /// topologically ordered set of "filtered successors" as follows (note @@ -697,51 +719,20 @@ impl ExitInfo { /// Note that with nested switches, the branches of the inner switches might /// goto the exits of the outer switches: for this reason, we give precedence /// to the outer switches. - fn compute_switch_exits(cfg: &CfgInfo) -> HashMap { - // Compute the successors info map, starting at the root node - trace!( - "- cfg.cfg:\n{:?}\n- cfg.cfg_no_be:\n{:?}\n- cfg.switch_blocks:\n{:?}", - cfg.cfg, cfg.fwd_cfg, cfg.switch_blocks - ); - - // Debugging: print all the successors - { - trace!("Successors info:\n{}\n", { - let mut out = vec![]; - for (bid, info) in cfg.block_data.iter_indexed() { - out.push(format!("{} -> {{flow: {:?}}}", bid, &info.flow).to_string()); - } - out.join("\n") - }); - } - + fn compute_switch_exits(cfg: &mut CfgInfo) { // We need to give precedence to the outer switches: we thus iterate // over the switch blocks in topological order. - let sorted_switch_blocks = cfg + let mut exits_set = HashSet::new(); + for bid in cfg .switch_blocks .iter() .copied() - .sorted_unstable_by_key(|&bid| (cfg.topo_rank(bid), bid)); - - // For every node which is a switch, retrieve the exit. - // As the set of intersection of successors is topologically sorted, the - // exit should be the first node in the set (if the set is non empty). - // Also, we need to explore the nodes in topological order, to give - // precedence to the outer switches. - let mut exits_set = HashSet::new(); - let mut exits = HashMap::new(); - for bid in sorted_switch_blocks { - trace!("Finding exit candidate for: {bid:?}"); + .sorted_unstable_by_key(|&bid| (cfg.topo_rank(bid), bid)) + { let block_data = &cfg.block_data[bid]; - // Find the best successor: this is the node with the highest flow, and the - // highest reverse topological rank. - // - // Remark: in order to rank the nodes, we also use the negation of the - // rank given by the topological order. The last elements of the set - // have the highest flow, that is they are the nodes to which the maximum - // number of paths converge. If several nodes have the same flow, we want - // to take the highest one in the hierarchy: hence the use of the inverse - // of the topological rank. + // Find the best successor: this is the node with the highest flow, and the lowest + // topological rank. If several nodes have the same flow, we want to take the highest + // one in the hierarchy: hence the use of the topological rank. // // Ex.: // ```text @@ -765,190 +756,42 @@ impl ExitInfo { let rank = Reverse(cfg.topo_rank(id)); ((flow, rank), id) }); - let exit_candidate: Option<_> = - best_exit.filter(|exit_id| !exits_set.contains(exit_id)); - if let Some(exit_id) = exit_candidate { - // We have an exit candidate: check that it was not already - // taken by an external switch - trace!("{bid:?} has an exit candidate: {exit_id:?}"); - // It was not taken by an external switch. - // - // We must check that we can't reach the exit of an external switch from one of the - // branches, without going through the exit candidate. - // We do this by simply checking that we can't reach any exits (and use the fact - // that we explore the switch by using a topological order to not discard valid - // exit candidates). - // - // The reason is that it can lead to code like the following: - // ``` - // if ... { // if #1 - // if ... { // if #2 - // ... - // // here, we have a `goto b1`, where b1 is the exit - // // of if #2: we thus stop translating the blocks. - // } - // else { - // ... - // // here, we have a `goto b2`, where b2 is the exit - // // of if #1: we thus stop translating the blocks. - // } - // // We insert code for the block b1 here (which is the exit of - // // the exit of if #2). However, this block should only - // // be executed in the branch "then" of the if #2, not in - // // the branch "else". - // ... - // } - // else { - // ... - // } - // ``` - if cfg.all_paths_go_through(bid, exit_id, &exits_set) { - trace!("Keeping the exit candidate"); - // No intersection: ok - exits_set.insert(exit_id); - exits.insert(bid, exit_id); - } else { - trace!( - "Ignoring the exit candidate because of an intersection with external switches" - ); - } - } else { - trace!("{bid:?} has no successors"); - }; - } - - exits - } - - /// Compute the exits for the loops and the switches (switch on integer and - /// if ... then ... else ...). We need to do this because control-flow in MIR - /// is destructured: we have gotos everywhere. - /// - /// Let's consider the following piece of code: - /// ```text - /// if cond1 { ... } else { ... }; - /// if cond2 { ... } else { ... }; - /// ``` - /// Once converted to MIR, the control-flow is destructured, which means we - /// have gotos everywhere. When reconstructing the control-flow, we have - /// to be careful about the point where we should join the two branches of - /// the first if. - /// For instance, if we don't notice they should be joined at some point (i.e, - /// whatever the branch we take, there is a moment when we go to the exact - /// same place, just before the second if), we might generate code like - /// this, with some duplicata: - /// ```text - /// if cond1 { ...; if cond2 { ... } else { ...} } - /// else { ...; if cond2 { ... } else { ...} } - /// ``` - /// - /// Such a reconstructed program is valid, but it is definitely non-optimal: - /// it is very different from the original program (making it less clean and - /// clear), more bloated, and might involve duplicating the proof effort. - /// - /// For this reason, we need to find the "exit" of the first loop, which is - /// the point where the two branches join. Note that this can be a bit tricky, - /// because there may be more than two branches (if we do `switch(x) { ... }`), - /// and some of them might not join (if they contain a `break`, `panic`, - /// `return`, etc.). - /// - /// Finally, some similar issues arise for loops. For instance, let's consider - /// the following piece of code: - /// ```text - /// while cond1 { - /// e1; - /// if cond2 { - /// break; - /// } - /// e2; - /// } - /// e3; - /// return; - /// ``` - /// - /// Note that in MIR, the loop gets desugared to an if ... then ... else .... - /// From the MIR, We want to generate something like this: - /// ```text - /// loop { - /// if cond1 { - /// e1; - /// if cond2 { - /// break; - /// } - /// e2; - /// continue; - /// } - /// else { - /// break; - /// } - /// }; - /// e3; - /// return; - /// ``` - /// - /// But if we don't pay attention, we might end up with that, once again with - /// duplications: - /// ```text - /// loop { - /// if cond1 { - /// e1; - /// if cond2 { - /// e3; - /// return; - /// } - /// e2; - /// continue; - /// } - /// else { - /// e3; - /// return; - /// } - /// } - /// ``` - /// We thus have to notice that if the loop condition is false, we goto the same - /// block as when following the goto introduced by the break inside the loop, and - /// this block is dubbed the "loop exit". - /// - /// The following function thus computes the "exits" for loops and switches, which - /// are basically the points where control-flow joins. - fn compute(ctx: &TransformCtx, cfg_info: &mut CfgInfo) { - // Compute the loop exits - let loop_exits: HashMap = Self::compute_loop_exits(ctx, cfg_info); - trace!("loop_exits:\n{:?}", loop_exits); - - // Compute the switch exits - let switch_exits: HashMap = Self::compute_switch_exits(cfg_info); - trace!("switch_exits:\n{:?}", switch_exits); - - // Keep track of the exits which were already attributed - let mut all_exits = HashSet::new(); - - // We need to give precedence to the outer switches and loops: we thus iterate - // over the blocks in topological order. - let sorted_blocks = cfg_info - .loop_entries - .iter() - .chain(cfg_info.switch_blocks.iter()) - .copied() - .sorted_unstable_by_key(|&bid| (cfg_info.topo_rank(bid), bid)); - for bid in sorted_blocks { - let block_data = &mut cfg_info.block_data[bid]; - let exit_info = &mut block_data.exit_info; - if block_data.is_loop_header { - // For loops, we always register the exit (if there is one). - // However, the exit may be owned by an outer switch (note that we already took - // care of spreading the exits between the inner/outer loops) - if let Some(&exit_id) = loop_exits.get(&bid) { - exit_info.loop_exit = Some(exit_id); - all_exits.insert(exit_id); - } - } else { - // For switches: check that the exit was not already given to a loop - if let Some(&exit_id) = switch_exits.get(&bid) - && all_exits.insert(exit_id) - { - exit_info.switch_exit = Some(exit_id); - } + // We have an exit candidate: we first check that it was not already taken by an + // external switch. + // + // We then check that we can't reach the exit of an external switch from one of the + // branches, without going through the exit candidate. We do this by simply checking + // that we can't reach any of the exits of outer switches. + // + // The reason is that it can lead to code like the following: + // ``` + // if ... { // if #1 + // if ... { // if #2 + // ... + // // here, we have a `goto b1`, where b1 is the exit + // // of if #2: we thus stop translating the blocks. + // } + // else { + // ... + // // here, we have a `goto b2`, where b2 is the exit + // // of if #1: we thus stop translating the blocks. + // } + // // We insert code for the block b1 here (which is the exit of + // // the exit of if #2). However, this block should only + // // be executed in the branch "then" of the if #2, not in + // // the branch "else". + // ... + // } + // else { + // ... + // } + // ``` + if let Some(exit_id) = best_exit + && !exits_set.contains(&exit_id) + && cfg.all_paths_go_through(bid, exit_id, &exits_set) + { + exits_set.insert(exit_id); + cfg.block_data[bid].exit_info.switch_exit = Some(exit_id); } } } @@ -1000,12 +843,9 @@ struct ReconstructCtx<'a> { impl<'a> ReconstructCtx<'a> { fn build(ctx: &TransformCtx, src_body: &'a src::ExprBody) -> Result { - // Explore the function body to create the control-flow graph without backward - // edges, and identify the loop entries (which are destinations of backward edges). - let mut cfg = CfgInfo::build(&src_body.body)?; - - // Find the exit block for all the loops and switches, if such an exit point exists. - ExitInfo::compute(ctx, &mut cfg); + // Compute all sorts of graph-related information about the control-flow graph, including + // reachability, the dominator tree, loop entries, and loop/switch exits. + let cfg = CfgInfo::build(ctx, &src_body.body)?; // Translate the body by reconstructing the loops and the // conditional branchings. @@ -1250,7 +1090,9 @@ impl<'a> ReconstructCtx<'a> { match self.mode { ReconstructMode::Flow => { // We only support next-block jumps to merge nodes. - if let Some(bid) = block_data.exit_info.switch_exit { + if let Some(bid) = block_data.exit_info.switch_exit + && !block_data.is_loop_header + { self.special_jump_stack.push((bid, SpecialJump::NextBlock)); } } diff --git a/charon/tests/ui/control-flow/issue-297-cfg.out b/charon/tests/ui/control-flow/issue-297-cfg.out index fd91fc04a..510c007d4 100644 --- a/charon/tests/ui/control-flow/issue-297-cfg.out +++ b/charon/tests/ui/control-flow/issue-297-cfg.out @@ -622,7 +622,6 @@ fn f2<'_0, '_1>(@1: &'_0 [u8], @2: &'_1 mut [i16]) -> usize storage_dead(_9) match _8 { Option::None => { - break 0 }, Option::Some => { storage_live(bytes_11) @@ -768,6 +767,7 @@ fn f2<'_0, '_1>(@1: &'_0 [u8], @2: &'_1 mut [i16]) -> usize continue 0 }, } + break 0 } storage_dead(_10) storage_dead(_8) diff --git a/charon/tests/ui/control-flow/loop-break.out b/charon/tests/ui/control-flow/loop-break.out index 4aaec2b45..efb88a874 100644 --- a/charon/tests/ui/control-flow/loop-break.out +++ b/charon/tests/ui/control-flow/loop-break.out @@ -70,18 +70,17 @@ fn main() storage_live(_3) _3 = foo() if move _3 { - break 0 } else { storage_live(_4) _4 = bar() if move _4 { - break 0 } else { storage_dead(_4) storage_dead(_3) continue 0 } } + break 0 } storage_dead(_4) storage_dead(_3) diff --git a/charon/tests/ui/control-flow/loops.out b/charon/tests/ui/control-flow/loops.out index 77038e1d8..969571818 100644 --- a/charon/tests/ui/control-flow/loops.out +++ b/charon/tests/ui/control-flow/loops.out @@ -1634,7 +1634,6 @@ pub fn nested_loops_enum(@1: usize, @2: usize) -> usize storage_dead(_8) match _7 { Option::None => { - break 0 }, Option::Some => { _10 = copy s_3 panic.+ const 1 : usize @@ -1644,6 +1643,7 @@ pub fn nested_loops_enum(@1: usize, @2: usize) -> usize continue 0 }, } + break 0 } storage_dead(_9) storage_dead(_7) @@ -1669,7 +1669,6 @@ pub fn nested_loops_enum(@1: usize, @2: usize) -> usize storage_dead(_16) match _15 { Option::None => { - break 0 }, Option::Some => { storage_live(_18) @@ -1692,7 +1691,6 @@ pub fn nested_loops_enum(@1: usize, @2: usize) -> usize storage_dead(_23) match _22 { Option::None => { - break 0 }, Option::Some => { _25 = copy s_3 panic.+ const 1 : usize @@ -1702,6 +1700,7 @@ pub fn nested_loops_enum(@1: usize, @2: usize) -> usize continue 0 }, } + break 0 } storage_dead(_24) storage_dead(_22) @@ -1712,6 +1711,7 @@ pub fn nested_loops_enum(@1: usize, @2: usize) -> usize continue 0 }, } + break 0 } storage_dead(_17) storage_dead(_15) @@ -1767,7 +1767,6 @@ pub fn loop_inside_if(@1: bool, @2: u32) -> u32 storage_dead(_10) match _9 { Option::None => { - break 0 }, Option::Some => { storage_live(i_12) @@ -1783,6 +1782,7 @@ pub fn loop_inside_if(@1: bool, @2: u32) -> u32 continue 0 }, } + break 0 } storage_dead(_11) storage_dead(_9) diff --git a/charon/tests/ui/gosim-demo.out b/charon/tests/ui/gosim-demo.out index a1d78d158..edf75ad2d 100644 --- a/charon/tests/ui/gosim-demo.out +++ b/charon/tests/ui/gosim-demo.out @@ -574,7 +574,6 @@ where storage_dead(_6) match _5 { Option::None => { - break 0 }, Option::Some => { storage_live(x_8) @@ -622,6 +621,7 @@ where continue 0 }, } + break 0 } _0 = () storage_dead(_7) diff --git a/charon/tests/ui/issue-45-misc.out b/charon/tests/ui/issue-45-misc.out index 82e5628e6..649508f40 100644 --- a/charon/tests/ui/issue-45-misc.out +++ b/charon/tests/ui/issue-45-misc.out @@ -687,7 +687,6 @@ fn cbd(@1: [u8; 33 : usize]) storage_dead(_6) match _5 { Option::None => { - break 0 }, Option::Some => { storage_live(i_8) @@ -709,6 +708,7 @@ fn cbd(@1: [u8; 33 : usize]) continue 0 }, } + break 0 } _0 = () storage_dead(_7) diff --git a/charon/tests/ui/iterator.out b/charon/tests/ui/iterator.out index dbb6c235b..50c38fe21 100644 --- a/charon/tests/ui/iterator.out +++ b/charon/tests/ui/iterator.out @@ -5253,7 +5253,6 @@ fn main() storage_dead(_8) match _7 { Option::None => { - break 0 }, Option::Some => { storage_live(v_10) @@ -5269,6 +5268,7 @@ fn main() continue 0 }, } + break 0 } storage_dead(_9) storage_dead(_7) @@ -5299,7 +5299,6 @@ fn main() storage_dead(_19) match _18 { Option::None => { - break 0 }, Option::Some => { storage_live(v_21) @@ -5319,6 +5318,7 @@ fn main() continue 0 }, } + break 0 } storage_dead(_20) storage_dead(_18) @@ -5347,7 +5347,6 @@ fn main() storage_dead(_31) match _30 { Option::None => { - break 0 }, Option::Some => { _33 = copy i_2 panic.+ const 1 : i32 @@ -5357,6 +5356,7 @@ fn main() continue 0 }, } + break 0 } storage_dead(_32) storage_dead(_30) @@ -5385,7 +5385,6 @@ fn main() storage_dead(_40) match _39 { Option::None => { - break 0 }, Option::Some => { _42 = copy i_2 panic.+ const 1 : i32 @@ -5395,6 +5394,7 @@ fn main() continue 0 }, } + break 0 } storage_dead(_41) storage_dead(_39) diff --git a/charon/tests/ui/simple/range-iter.out b/charon/tests/ui/simple/range-iter.out index f8945c138..28d0b845b 100644 --- a/charon/tests/ui/simple/range-iter.out +++ b/charon/tests/ui/simple/range-iter.out @@ -464,7 +464,6 @@ fn iter(@1: usize) storage_dead(_7) match _6 { Option::None => { - break 0 }, Option::Some => { storage_live(i_9) @@ -475,6 +474,7 @@ fn iter(@1: usize) continue 0 }, } + break 0 } _0 = () storage_dead(_8)