Skip to content

Commit 523c507

Browse files
committed
Auto merge of rust-lang#138157 - scottmcm:inline-more-tiny-things, r=oli-obk
Allow more top-down inlining for single-BB callees This means that things like `<usize as Step>::forward_unchecked` and `<PartialOrd for f32>::le` will inline even if we've already done a bunch of inlining to find the calls to them. Fixes rust-lang#138136 ~~Draft as it's built atop rust-lang#138135, which adds a mir-opt test that's a nice demonstration of this. To see just this change, look at <https://github.com/rust-lang/rust/pull/138157/commits/48f63e3be552605c2933056b77bf23a326757f92>~~ Rebased to be just the inlining change, as the other existing tests show it great.
2 parents addae07 + 91af4aa commit 523c507

19 files changed

+443
-231
lines changed

compiler/rustc_mir_transform/src/cost_checker.rs

+26-21
Original file line numberDiff line numberDiff line change
@@ -37,29 +37,11 @@ impl<'b, 'tcx> CostChecker<'b, 'tcx> {
3737
/// and even the full `Inline` doesn't call `visit_body`, so there's nowhere
3838
/// to put this logic in the visitor.
3939
pub(super) fn add_function_level_costs(&mut self) {
40-
fn is_call_like(bbd: &BasicBlockData<'_>) -> bool {
41-
use TerminatorKind::*;
42-
match bbd.terminator().kind {
43-
Call { .. } | TailCall { .. } | Drop { .. } | Assert { .. } | InlineAsm { .. } => {
44-
true
45-
}
46-
47-
Goto { .. }
48-
| SwitchInt { .. }
49-
| UnwindResume
50-
| UnwindTerminate(_)
51-
| Return
52-
| Unreachable => false,
53-
54-
Yield { .. } | CoroutineDrop | FalseEdge { .. } | FalseUnwind { .. } => {
55-
unreachable!()
56-
}
57-
}
58-
}
59-
6040
// If the only has one Call (or similar), inlining isn't increasing the total
6141
// number of calls, so give extra encouragement to inlining that.
62-
if self.callee_body.basic_blocks.iter().filter(|bbd| is_call_like(bbd)).count() == 1 {
42+
if self.callee_body.basic_blocks.iter().filter(|bbd| is_call_like(bbd.terminator())).count()
43+
== 1
44+
{
6345
self.bonus += CALL_PENALTY;
6446
}
6547
}
@@ -193,3 +175,26 @@ impl<'tcx> Visitor<'tcx> for CostChecker<'_, 'tcx> {
193175
}
194176
}
195177
}
178+
179+
/// A terminator that's more call-like (might do a bunch of work, might panic, etc)
180+
/// than it is goto-/return-like (no side effects, etc).
181+
///
182+
/// Used to treat multi-call functions (which could inline exponentially)
183+
/// different from those that only do one or none of these "complex" things.
184+
pub(super) fn is_call_like(terminator: &Terminator<'_>) -> bool {
185+
use TerminatorKind::*;
186+
match terminator.kind {
187+
Call { .. } | TailCall { .. } | Drop { .. } | Assert { .. } | InlineAsm { .. } => true,
188+
189+
Goto { .. }
190+
| SwitchInt { .. }
191+
| UnwindResume
192+
| UnwindTerminate(_)
193+
| Return
194+
| Unreachable => false,
195+
196+
Yield { .. } | CoroutineDrop | FalseEdge { .. } | FalseUnwind { .. } => {
197+
unreachable!()
198+
}
199+
}
200+
}

compiler/rustc_mir_transform/src/inline.rs

+57-37
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Inlining pass for MIR functions.
22
3+
use std::assert_matches::debug_assert_matches;
34
use std::iter;
45
use std::ops::{Range, RangeFrom};
56

@@ -18,14 +19,15 @@ use rustc_session::config::{DebugInfo, OptLevel};
1819
use rustc_span::source_map::Spanned;
1920
use tracing::{debug, instrument, trace, trace_span};
2021

21-
use crate::cost_checker::CostChecker;
22+
use crate::cost_checker::{CostChecker, is_call_like};
2223
use crate::deref_separator::deref_finder;
2324
use crate::simplify::simplify_cfg;
2425
use crate::validate::validate_types;
2526
use crate::{check_inline, util};
2627

2728
pub(crate) mod cycle;
2829

30+
const HISTORY_DEPTH_LIMIT: usize = 20;
2931
const TOP_DOWN_DEPTH_LIMIT: usize = 5;
3032

3133
#[derive(Clone, Debug)]
@@ -117,6 +119,11 @@ trait Inliner<'tcx> {
117119
/// Should inlining happen for a given callee?
118120
fn should_inline_for_callee(&self, def_id: DefId) -> bool;
119121

122+
fn check_codegen_attributes_extra(
123+
&self,
124+
callee_attrs: &CodegenFnAttrs,
125+
) -> Result<(), &'static str>;
126+
120127
fn check_caller_mir_body(&self, body: &Body<'tcx>) -> bool;
121128

122129
/// Returns inlining decision that is based on the examination of callee MIR body.
@@ -128,10 +135,6 @@ trait Inliner<'tcx> {
128135
callee_attrs: &CodegenFnAttrs,
129136
) -> Result<(), &'static str>;
130137

131-
// How many callsites in a body are we allowed to inline? We need to limit this in order
132-
// to prevent super-linear growth in MIR size.
133-
fn inline_limit_for_block(&self) -> Option<usize>;
134-
135138
/// Called when inlining succeeds.
136139
fn on_inline_success(
137140
&mut self,
@@ -142,9 +145,6 @@ trait Inliner<'tcx> {
142145

143146
/// Called when inlining failed or was not performed.
144147
fn on_inline_failure(&self, callsite: &CallSite<'tcx>, reason: &'static str);
145-
146-
/// Called when the inline limit for a body is reached.
147-
fn on_inline_limit_reached(&self) -> bool;
148148
}
149149

150150
struct ForceInliner<'tcx> {
@@ -191,6 +191,14 @@ impl<'tcx> Inliner<'tcx> for ForceInliner<'tcx> {
191191
ForceInline::should_run_pass_for_callee(self.tcx(), def_id)
192192
}
193193

194+
fn check_codegen_attributes_extra(
195+
&self,
196+
callee_attrs: &CodegenFnAttrs,
197+
) -> Result<(), &'static str> {
198+
debug_assert_matches!(callee_attrs.inline, InlineAttr::Force { .. });
199+
Ok(())
200+
}
201+
194202
fn check_caller_mir_body(&self, _: &Body<'tcx>) -> bool {
195203
true
196204
}
@@ -224,10 +232,6 @@ impl<'tcx> Inliner<'tcx> for ForceInliner<'tcx> {
224232
}
225233
}
226234

227-
fn inline_limit_for_block(&self) -> Option<usize> {
228-
Some(usize::MAX)
229-
}
230-
231235
fn on_inline_success(
232236
&mut self,
233237
callsite: &CallSite<'tcx>,
@@ -261,10 +265,6 @@ impl<'tcx> Inliner<'tcx> for ForceInliner<'tcx> {
261265
justification: justification.map(|sym| crate::errors::ForceInlineJustification { sym }),
262266
});
263267
}
264-
265-
fn on_inline_limit_reached(&self) -> bool {
266-
false
267-
}
268268
}
269269

270270
struct NormalInliner<'tcx> {
@@ -278,13 +278,23 @@ struct NormalInliner<'tcx> {
278278
/// The number of `DefId`s is finite, so checking history is enough
279279
/// to ensure that we do not loop endlessly while inlining.
280280
history: Vec<DefId>,
281+
/// How many (multi-call) callsites have we inlined for the top-level call?
282+
///
283+
/// We need to limit this in order to prevent super-linear growth in MIR size.
284+
top_down_counter: usize,
281285
/// Indicates that the caller body has been modified.
282286
changed: bool,
283287
/// Indicates that the caller is #[inline] and just calls another function,
284288
/// and thus we can inline less into it as it'll be inlined itself.
285289
caller_is_inline_forwarder: bool,
286290
}
287291

292+
impl<'tcx> NormalInliner<'tcx> {
293+
fn past_depth_limit(&self) -> bool {
294+
self.history.len() > HISTORY_DEPTH_LIMIT || self.top_down_counter > TOP_DOWN_DEPTH_LIMIT
295+
}
296+
}
297+
288298
impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
289299
fn new(tcx: TyCtxt<'tcx>, def_id: DefId, body: &Body<'tcx>) -> Self {
290300
let typing_env = body.typing_env(tcx);
@@ -295,6 +305,7 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
295305
typing_env,
296306
def_id,
297307
history: Vec::new(),
308+
top_down_counter: 0,
298309
changed: false,
299310
caller_is_inline_forwarder: matches!(
300311
codegen_fn_attrs.inline,
@@ -327,6 +338,17 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
327338
true
328339
}
329340

341+
fn check_codegen_attributes_extra(
342+
&self,
343+
callee_attrs: &CodegenFnAttrs,
344+
) -> Result<(), &'static str> {
345+
if self.past_depth_limit() && matches!(callee_attrs.inline, InlineAttr::None) {
346+
Err("Past depth limit so not inspecting unmarked callee")
347+
} else {
348+
Ok(())
349+
}
350+
}
351+
330352
fn check_caller_mir_body(&self, body: &Body<'tcx>) -> bool {
331353
// Avoid inlining into coroutines, since their `optimized_mir` is used for layout computation,
332354
// which can create a cycle, even when no attempt is made to inline the function in the other
@@ -351,7 +373,11 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
351373
return Err("body has errors");
352374
}
353375

354-
let mut threshold = if self.caller_is_inline_forwarder {
376+
if self.past_depth_limit() && callee_body.basic_blocks.len() > 1 {
377+
return Err("Not inlining multi-block body as we're past a depth limit");
378+
}
379+
380+
let mut threshold = if self.caller_is_inline_forwarder || self.past_depth_limit() {
355381
tcx.sess.opts.unstable_opts.inline_mir_forwarder_threshold.unwrap_or(30)
356382
} else if tcx.cross_crate_inlinable(callsite.callee.def_id()) {
357383
tcx.sess.opts.unstable_opts.inline_mir_hint_threshold.unwrap_or(100)
@@ -431,14 +457,6 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
431457
}
432458
}
433459

434-
fn inline_limit_for_block(&self) -> Option<usize> {
435-
match self.history.len() {
436-
0 => Some(usize::MAX),
437-
1..=TOP_DOWN_DEPTH_LIMIT => Some(1),
438-
_ => None,
439-
}
440-
}
441-
442460
fn on_inline_success(
443461
&mut self,
444462
callsite: &CallSite<'tcx>,
@@ -447,13 +465,21 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
447465
) {
448466
self.changed = true;
449467

468+
let new_calls_count = new_blocks
469+
.clone()
470+
.filter(|&bb| is_call_like(caller_body.basic_blocks[bb].terminator()))
471+
.count();
472+
if new_calls_count > 1 {
473+
self.top_down_counter += 1;
474+
}
475+
450476
self.history.push(callsite.callee.def_id());
451477
process_blocks(self, caller_body, new_blocks);
452478
self.history.pop();
453-
}
454479

455-
fn on_inline_limit_reached(&self) -> bool {
456-
true
480+
if self.history.is_empty() {
481+
self.top_down_counter = 0;
482+
}
457483
}
458484

459485
fn on_inline_failure(&self, _: &CallSite<'tcx>, _: &'static str) {}
@@ -482,8 +508,6 @@ fn process_blocks<'tcx, I: Inliner<'tcx>>(
482508
caller_body: &mut Body<'tcx>,
483509
blocks: Range<BasicBlock>,
484510
) {
485-
let Some(inline_limit) = inliner.inline_limit_for_block() else { return };
486-
let mut inlined_count = 0;
487511
for bb in blocks {
488512
let bb_data = &caller_body[bb];
489513
if bb_data.is_cleanup {
@@ -505,13 +529,6 @@ fn process_blocks<'tcx, I: Inliner<'tcx>>(
505529
Ok(new_blocks) => {
506530
debug!("inlined {}", callsite.callee);
507531
inliner.on_inline_success(&callsite, caller_body, new_blocks);
508-
509-
inlined_count += 1;
510-
if inlined_count == inline_limit {
511-
if inliner.on_inline_limit_reached() {
512-
return;
513-
}
514-
}
515532
}
516533
}
517534
}
@@ -584,6 +601,7 @@ fn try_inlining<'tcx, I: Inliner<'tcx>>(
584601
let callee_attrs = tcx.codegen_fn_attrs(callsite.callee.def_id());
585602
check_inline::is_inline_valid_on_fn(tcx, callsite.callee.def_id())?;
586603
check_codegen_attributes(inliner, callsite, callee_attrs)?;
604+
inliner.check_codegen_attributes_extra(callee_attrs)?;
587605

588606
let terminator = caller_body[callsite.block].terminator.as_ref().unwrap();
589607
let TerminatorKind::Call { args, destination, .. } = &terminator.kind else { bug!() };
@@ -770,6 +788,8 @@ fn check_codegen_attributes<'tcx, I: Inliner<'tcx>>(
770788
return Err("has DoNotOptimize attribute");
771789
}
772790

791+
inliner.check_codegen_attributes_extra(callee_attrs)?;
792+
773793
// Reachability pass defines which functions are eligible for inlining. Generally inlining
774794
// other functions is incorrect because they could reference symbols that aren't exported.
775795
let is_generic = callsite.callee.args.non_erasable_generics().next().is_some();

library/core/src/mem/mod.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -856,8 +856,13 @@ pub const fn replace<T>(dest: &mut T, src: T) -> T {
856856
// such that the old value is not duplicated. Nothing is dropped and
857857
// nothing here can panic.
858858
unsafe {
859-
let result = ptr::read(dest);
860-
ptr::write(dest, src);
859+
// Ideally we wouldn't use the intrinsics here, but going through the
860+
// `ptr` methods introduces two unnecessary UbChecks, so until we can
861+
// remove those for pointers that come from references, this uses the
862+
// intrinsics instead so this stays very cheap in MIR (and debug).
863+
864+
let result = crate::intrinsics::read_via_copy(dest);
865+
crate::intrinsics::write_via_move(dest, src);
861866
result
862867
}
863868
}

src/tools/miri/tests/fail/both_borrows/aliasing_mut4.tree.stderr

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
error: Undefined Behavior: write access through <TAG> at ALLOC[0x0] is forbidden
22
--> RUSTLIB/core/src/mem/mod.rs:LL:CC
33
|
4-
LL | ptr::write(dest, src);
5-
| ^^^^^^^^^^^^^^^^^^^^^ write access through <TAG> at ALLOC[0x0] is forbidden
4+
LL | crate::intrinsics::write_via_move(dest, src);
5+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ write access through <TAG> at ALLOC[0x0] is forbidden
66
|
77
= help: this indicates a potential bug in the program: it performed an invalid operation, but the Tree Borrows rules it violated are still experimental
88
= help: the accessed tag <TAG> is foreign to the protected tag <TAG> (i.e., it is not a child)

tests/codegen/range-loop.rs

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//@ ignore-std-debug-assertions
2+
//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes
3+
4+
#![crate_type = "lib"]
5+
6+
// Ensure that MIR optimizations have cleaned things up enough that the IR we
7+
// emit is good even without running the LLVM optimizations.
8+
9+
// CHECK-NOT: define
10+
11+
// CHECK-LABEL: define{{.+}}void @call_for_zero_to_n
12+
#[no_mangle]
13+
pub fn call_for_zero_to_n(n: u32, f: fn(u32)) {
14+
// CHECK: start:
15+
// CHECK-NOT: alloca
16+
// CHECK: %[[IND:.+]] = alloca [4 x i8]
17+
// CHECK-NEXT: %[[ALWAYS_SOME_OPTION:.+]] = alloca
18+
// CHECK-NOT: alloca
19+
// CHECK: store i32 0, ptr %[[IND]],
20+
// CHECK: br label %[[HEAD:.+]]
21+
22+
// CHECK: [[HEAD]]:
23+
// CHECK: %[[T1:.+]] = load i32, ptr %[[IND]],
24+
// CHECK: %[[NOT_DONE:.+]] = icmp ult i32 %[[T1]], %n
25+
// CHECK: br i1 %[[NOT_DONE]], label %[[BODY:.+]], label %[[BREAK:.+]]
26+
27+
// CHECK: [[BREAK]]:
28+
// CHECK: ret void
29+
30+
// CHECK: [[BODY]]:
31+
// CHECK: %[[T2:.+]] = load i32, ptr %[[IND]],
32+
// CHECK: %[[T3:.+]] = add nuw i32 %[[T2]], 1
33+
// CHECK: store i32 %[[T3]], ptr %[[IND]],
34+
35+
// CHECK: store i32 %[[T2]]
36+
// CHECK: %[[T4:.+]] = load i32
37+
// CHECK: call void %f(i32{{.+}}%[[T4]])
38+
39+
for i in 0..n {
40+
f(i);
41+
}
42+
}
43+
44+
// CHECK-NOT: define

tests/mir-opt/inline/exponential_runtime.rs

+5
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,15 @@ fn main() {
8787
// CHECK-LABEL: fn main(
8888
// CHECK-NOT: inlined
8989
// CHECK: (inlined <() as G>::call)
90+
// CHECK-NOT: inlined
9091
// CHECK: (inlined <() as F>::call)
92+
// CHECK-NOT: inlined
9193
// CHECK: (inlined <() as E>::call)
94+
// CHECK-NOT: inlined
9295
// CHECK: (inlined <() as D>::call)
96+
// CHECK-NOT: inlined
9397
// CHECK: (inlined <() as C>::call)
98+
// CHECK-NOT: inlined
9499
// CHECK: (inlined <() as B>::call)
95100
// CHECK-NOT: inlined
96101
<() as G>::call();

0 commit comments

Comments
 (0)