Skip to content

Commit 10923a0

Browse files
committed
Auto merge of rust-lang#138157 - scottmcm:inline-more-tiny-things, r=<try>
Allow more top-down inlining for single-BB callees This means that things like `<usize as Step>::forward_unchecked` and `<PartialOrd for f32>::le` will inline even if we've already done a bunch of inlining to find the calls to them. Fixes rust-lang#138136 Draft as it's built atop rust-lang#138135, which adds a mir-opt test that's a nice demonstration of this.
2 parents 03eb454 + 48f63e3 commit 10923a0

18 files changed

+600
-207
lines changed

compiler/rustc_mir_transform/src/inline.rs

+31-35
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use crate::{check_inline, util};
2626

2727
pub(crate) mod cycle;
2828

29+
const HISTORY_DEPTH_LIMIT: usize = 20;
2930
const TOP_DOWN_DEPTH_LIMIT: usize = 5;
3031

3132
#[derive(Clone, Debug)]
@@ -128,10 +129,6 @@ trait Inliner<'tcx> {
128129
callee_attrs: &CodegenFnAttrs,
129130
) -> Result<(), &'static str>;
130131

131-
// How many callsites in a body are we allowed to inline? We need to limit this in order
132-
// to prevent super-linear growth in MIR size.
133-
fn inline_limit_for_block(&self) -> Option<usize>;
134-
135132
/// Called when inlining succeeds.
136133
fn on_inline_success(
137134
&mut self,
@@ -142,9 +139,6 @@ trait Inliner<'tcx> {
142139

143140
/// Called when inlining failed or was not performed.
144141
fn on_inline_failure(&self, callsite: &CallSite<'tcx>, reason: &'static str);
145-
146-
/// Called when the inline limit for a body is reached.
147-
fn on_inline_limit_reached(&self) -> bool;
148142
}
149143

150144
struct ForceInliner<'tcx> {
@@ -224,10 +218,6 @@ impl<'tcx> Inliner<'tcx> for ForceInliner<'tcx> {
224218
}
225219
}
226220

227-
fn inline_limit_for_block(&self) -> Option<usize> {
228-
Some(usize::MAX)
229-
}
230-
231221
fn on_inline_success(
232222
&mut self,
233223
callsite: &CallSite<'tcx>,
@@ -261,10 +251,6 @@ impl<'tcx> Inliner<'tcx> for ForceInliner<'tcx> {
261251
justification: justification.map(|sym| crate::errors::ForceInlineJustification { sym }),
262252
});
263253
}
264-
265-
fn on_inline_limit_reached(&self) -> bool {
266-
false
267-
}
268254
}
269255

270256
struct NormalInliner<'tcx> {
@@ -278,6 +264,10 @@ struct NormalInliner<'tcx> {
278264
/// The number of `DefId`s is finite, so checking history is enough
279265
/// to ensure that we do not loop endlessly while inlining.
280266
history: Vec<DefId>,
267+
/// How many (multi-call) callsites have we inlined for the top-level call?
268+
///
269+
/// We need to limit this in order to prevent super-linear growth in MIR size.
270+
top_down_counter: usize,
281271
/// Indicates that the caller body has been modified.
282272
changed: bool,
283273
/// Indicates that the caller is #[inline] and just calls another function,
@@ -295,6 +285,7 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
295285
typing_env,
296286
def_id,
297287
history: Vec::new(),
288+
top_down_counter: 0,
298289
changed: false,
299290
caller_is_inline_forwarder: matches!(
300291
codegen_fn_attrs.inline,
@@ -351,6 +342,15 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
351342
return Err("body has errors");
352343
}
353344

345+
if callee_body.basic_blocks.len() > 1 {
346+
if self.history.len() > HISTORY_DEPTH_LIMIT {
347+
return Err("Too many blocks for deep history");
348+
}
349+
if self.top_down_counter > TOP_DOWN_DEPTH_LIMIT {
350+
return Err("Too many blocks for more top-down inlining");
351+
}
352+
}
353+
354354
let mut threshold = if self.caller_is_inline_forwarder {
355355
tcx.sess.opts.unstable_opts.inline_mir_forwarder_threshold.unwrap_or(30)
356356
} else if tcx.cross_crate_inlinable(callsite.callee.def_id()) {
@@ -431,14 +431,6 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
431431
}
432432
}
433433

434-
fn inline_limit_for_block(&self) -> Option<usize> {
435-
match self.history.len() {
436-
0 => Some(usize::MAX),
437-
1..=TOP_DOWN_DEPTH_LIMIT => Some(1),
438-
_ => None,
439-
}
440-
}
441-
442434
fn on_inline_success(
443435
&mut self,
444436
callsite: &CallSite<'tcx>,
@@ -447,13 +439,26 @@ impl<'tcx> Inliner<'tcx> for NormalInliner<'tcx> {
447439
) {
448440
self.changed = true;
449441

442+
let new_calls_count = new_blocks
443+
.clone()
444+
.filter(|&bb| {
445+
matches!(
446+
caller_body.basic_blocks[bb].terminator().kind,
447+
TerminatorKind::Call { .. },
448+
)
449+
})
450+
.count();
451+
if new_calls_count > 1 {
452+
self.top_down_counter += 1;
453+
}
454+
450455
self.history.push(callsite.callee.def_id());
451456
process_blocks(self, caller_body, new_blocks);
452457
self.history.pop();
453-
}
454458

455-
fn on_inline_limit_reached(&self) -> bool {
456-
true
459+
if self.history.is_empty() {
460+
self.top_down_counter = 0;
461+
}
457462
}
458463

459464
fn on_inline_failure(&self, _: &CallSite<'tcx>, _: &'static str) {}
@@ -482,8 +487,6 @@ fn process_blocks<'tcx, I: Inliner<'tcx>>(
482487
caller_body: &mut Body<'tcx>,
483488
blocks: Range<BasicBlock>,
484489
) {
485-
let Some(inline_limit) = inliner.inline_limit_for_block() else { return };
486-
let mut inlined_count = 0;
487490
for bb in blocks {
488491
let bb_data = &caller_body[bb];
489492
if bb_data.is_cleanup {
@@ -505,13 +508,6 @@ fn process_blocks<'tcx, I: Inliner<'tcx>>(
505508
Ok(new_blocks) => {
506509
debug!("inlined {}", callsite.callee);
507510
inliner.on_inline_success(&callsite, caller_body, new_blocks);
508-
509-
inlined_count += 1;
510-
if inlined_count == inline_limit {
511-
if inliner.on_inline_limit_reached() {
512-
return;
513-
}
514-
}
515511
}
516512
}
517513
}

library/core/src/cmp.rs

+84
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ mod bytewise;
2929
pub(crate) use bytewise::BytewiseEq;
3030

3131
use self::Ordering::*;
32+
use crate::ops::ControlFlow::{self, Break, Continue};
3233

3334
/// Trait for comparisons using the equality operator.
3435
///
@@ -1446,6 +1447,54 @@ pub macro PartialOrd($item:item) {
14461447
/* compiler built-in */
14471448
}
14481449

1450+
/// Helpers for chaining together field PartialOrds into the full type's ordering.
1451+
///
1452+
/// If the two values are equal, returns `ControlFlow::Continue`.
1453+
/// If the two values are not equal, returns `ControlFlow::Break(self OP other)`.
1454+
///
1455+
/// This allows simple types like `i32` and `f64` to just emit two comparisons
1456+
/// directly, instead of needing to optimize the 3-way comparison.
1457+
///
1458+
/// Currently this is done using specialization, but it doesn't need that:
1459+
/// it could be provided methods on `PartialOrd` instead and work fine.
1460+
pub(crate) trait SpecChainingPartialOrd<Rhs>: PartialOrd<Rhs> {
1461+
fn spec_chain_lt(&self, other: &Rhs) -> ControlFlow<bool>;
1462+
fn spec_chain_le(&self, other: &Rhs) -> ControlFlow<bool>;
1463+
fn spec_chain_gt(&self, other: &Rhs) -> ControlFlow<bool>;
1464+
fn spec_chain_ge(&self, other: &Rhs) -> ControlFlow<bool>;
1465+
}
1466+
1467+
impl<T: PartialOrd<U>, U> SpecChainingPartialOrd<U> for T {
1468+
#[inline]
1469+
default fn spec_chain_lt(&self, other: &U) -> ControlFlow<bool> {
1470+
match PartialOrd::partial_cmp(self, other) {
1471+
Some(Equal) => Continue(()),
1472+
c => Break(c.is_some_and(Ordering::is_lt)),
1473+
}
1474+
}
1475+
#[inline]
1476+
default fn spec_chain_le(&self, other: &U) -> ControlFlow<bool> {
1477+
match PartialOrd::partial_cmp(self, other) {
1478+
Some(Equal) => Continue(()),
1479+
c => Break(c.is_some_and(Ordering::is_le)),
1480+
}
1481+
}
1482+
#[inline]
1483+
default fn spec_chain_gt(&self, other: &U) -> ControlFlow<bool> {
1484+
match PartialOrd::partial_cmp(self, other) {
1485+
Some(Equal) => Continue(()),
1486+
c => Break(c.is_some_and(Ordering::is_gt)),
1487+
}
1488+
}
1489+
#[inline]
1490+
default fn spec_chain_ge(&self, other: &U) -> ControlFlow<bool> {
1491+
match PartialOrd::partial_cmp(self, other) {
1492+
Some(Equal) => Continue(()),
1493+
c => Break(c.is_some_and(Ordering::is_ge)),
1494+
}
1495+
}
1496+
}
1497+
14491498
/// Compares and returns the minimum of two values.
14501499
///
14511500
/// Returns the first argument if the comparison determines them to be equal.
@@ -1741,6 +1790,7 @@ where
17411790
mod impls {
17421791
use crate::cmp::Ordering::{self, Equal, Greater, Less};
17431792
use crate::hint::unreachable_unchecked;
1793+
use crate::ops::ControlFlow::{self, Break, Continue};
17441794

17451795
macro_rules! partial_eq_impl {
17461796
($($t:ty)*) => ($(
@@ -1779,6 +1829,36 @@ mod impls {
17791829

17801830
eq_impl! { () bool char usize u8 u16 u32 u64 u128 isize i8 i16 i32 i64 i128 }
17811831

1832+
macro_rules! chaining_impl {
1833+
($t:ty) => {
1834+
// These implementations are the same for `Ord` or `PartialOrd` types
1835+
// because if either is NAN the `==` test will fail so we end up in
1836+
// the `Break` case and the comparison will correctly return `false`.
1837+
impl super::SpecChainingPartialOrd<$t> for $t {
1838+
#[inline]
1839+
fn spec_chain_lt(&self, other: &Self) -> ControlFlow<bool> {
1840+
let (lhs, rhs) = (*self, *other);
1841+
if lhs == rhs { Continue(()) } else { Break(lhs < rhs) }
1842+
}
1843+
#[inline]
1844+
fn spec_chain_le(&self, other: &Self) -> ControlFlow<bool> {
1845+
let (lhs, rhs) = (*self, *other);
1846+
if lhs == rhs { Continue(()) } else { Break(lhs <= rhs) }
1847+
}
1848+
#[inline]
1849+
fn spec_chain_gt(&self, other: &Self) -> ControlFlow<bool> {
1850+
let (lhs, rhs) = (*self, *other);
1851+
if lhs == rhs { Continue(()) } else { Break(lhs > rhs) }
1852+
}
1853+
#[inline]
1854+
fn spec_chain_ge(&self, other: &Self) -> ControlFlow<bool> {
1855+
let (lhs, rhs) = (*self, *other);
1856+
if lhs == rhs { Continue(()) } else { Break(lhs >= rhs) }
1857+
}
1858+
}
1859+
};
1860+
}
1861+
17821862
macro_rules! partial_ord_impl {
17831863
($($t:ty)*) => ($(
17841864
#[stable(feature = "rust1", since = "1.0.0")]
@@ -1801,6 +1881,8 @@ mod impls {
18011881
#[inline(always)]
18021882
fn gt(&self, other: &$t) -> bool { (*self) > (*other) }
18031883
}
1884+
1885+
chaining_impl!($t);
18041886
)*)
18051887
}
18061888

@@ -1840,6 +1922,8 @@ mod impls {
18401922
fn gt(&self, other: &$t) -> bool { (*self) > (*other) }
18411923
}
18421924

1925+
chaining_impl!($t);
1926+
18431927
#[stable(feature = "rust1", since = "1.0.0")]
18441928
impl Ord for $t {
18451929
#[inline]

library/core/src/tuple.rs

+14-11
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// See core/src/primitive_docs.rs for documentation.
22

33
use crate::cmp::Ordering::{self, *};
4+
use crate::cmp::SpecChainingPartialOrd;
45
use crate::marker::{ConstParamTy_, StructuralPartialEq, UnsizedConstParamTy};
6+
use crate::ops::ControlFlow::{Break, Continue};
57

68
// Recursive macro for implementing n-ary tuple functions and operations
79
//
@@ -80,19 +82,19 @@ macro_rules! tuple_impls {
8082
}
8183
#[inline]
8284
fn lt(&self, other: &($($T,)+)) -> bool {
83-
lexical_ord!(lt, Less, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
85+
lexical_ord!(lt, spec_chain_lt, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
8486
}
8587
#[inline]
8688
fn le(&self, other: &($($T,)+)) -> bool {
87-
lexical_ord!(le, Less, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
89+
lexical_ord!(le, spec_chain_le, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
8890
}
8991
#[inline]
9092
fn ge(&self, other: &($($T,)+)) -> bool {
91-
lexical_ord!(ge, Greater, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
93+
lexical_ord!(ge, spec_chain_ge, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
9294
}
9395
#[inline]
9496
fn gt(&self, other: &($($T,)+)) -> bool {
95-
lexical_ord!(gt, Greater, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
97+
lexical_ord!(gt, spec_chain_gt, $( ${ignore($T)} self.${index()}, other.${index()} ),+)
9698
}
9799
}
98100
}
@@ -171,15 +173,16 @@ macro_rules! maybe_tuple_doc {
171173
// `(a1, a2, a3) < (b1, b2, b3)` would be `lexical_ord!(lt, opt_is_lt, a1, b1,
172174
// a2, b2, a3, b3)` (and similarly for `lexical_cmp`)
173175
//
174-
// `$ne_rel` is only used to determine the result after checking that they're
175-
// not equal, so `lt` and `le` can both just use `Less`.
176+
// `$chain_rel` is the method from `SpecChainingPartialOrd` to use for all but the
177+
// final value, to produce better results for simple primitives.
176178
macro_rules! lexical_ord {
177-
($rel: ident, $ne_rel: ident, $a:expr, $b:expr, $($rest_a:expr, $rest_b:expr),+) => {{
178-
let c = PartialOrd::partial_cmp(&$a, &$b);
179-
if c != Some(Equal) { c == Some($ne_rel) }
180-
else { lexical_ord!($rel, $ne_rel, $($rest_a, $rest_b),+) }
179+
($rel: ident, $chain_rel: ident, $a:expr, $b:expr, $($rest_a:expr, $rest_b:expr),+) => {{
180+
match SpecChainingPartialOrd::$chain_rel(&$a, &$b) {
181+
Break(val) => val,
182+
Continue(()) => lexical_ord!($rel, $chain_rel, $($rest_a, $rest_b),+),
183+
}
181184
}};
182-
($rel: ident, $ne_rel: ident, $a:expr, $b:expr) => {
185+
($rel: ident, $chain_rel: ident, $a:expr, $b:expr) => {
183186
// Use the specific method for the last element
184187
PartialOrd::$rel(&$a, &$b)
185188
};

tests/mir-opt/inline/exponential_runtime.rs

+5
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,15 @@ fn main() {
8787
// CHECK-LABEL: fn main(
8888
// CHECK-NOT: inlined
8989
// CHECK: (inlined <() as G>::call)
90+
// CHECK-NOT: inlined
9091
// CHECK: (inlined <() as F>::call)
92+
// CHECK-NOT: inlined
9193
// CHECK: (inlined <() as E>::call)
94+
// CHECK-NOT: inlined
9295
// CHECK: (inlined <() as D>::call)
96+
// CHECK-NOT: inlined
9397
// CHECK: (inlined <() as C>::call)
98+
// CHECK-NOT: inlined
9499
// CHECK: (inlined <() as B>::call)
95100
// CHECK-NOT: inlined
96101
<() as G>::call();

tests/mir-opt/inline/inline_diverging.h.Inline.panic-abort.diff

+5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
let _1: (!, !);
77
+ let mut _2: fn() -> ! {sleep};
88
+ let mut _7: ();
9+
+ let mut _8: ();
910
+ scope 1 (inlined call_twice::<!, fn() -> ! {sleep}>) {
1011
+ debug f => _2;
1112
+ let mut _3: &fn() -> ! {sleep};
@@ -17,6 +18,10 @@
1718
+ scope 3 {
1819
+ debug b => _6;
1920
+ }
21+
+ scope 6 (inlined <fn() -> ! {sleep} as Fn<()>>::call - shim(fn() -> ! {sleep})) {
22+
+ scope 7 (inlined sleep) {
23+
+ }
24+
+ }
2025
+ }
2126
+ scope 4 (inlined <fn() -> ! {sleep} as Fn<()>>::call - shim(fn() -> ! {sleep})) {
2227
+ scope 5 (inlined sleep) {

tests/mir-opt/inline/inline_diverging.h.Inline.panic-unwind.diff

+5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
let _1: (!, !);
77
+ let mut _2: fn() -> ! {sleep};
88
+ let mut _8: ();
9+
+ let mut _9: ();
910
+ scope 1 (inlined call_twice::<!, fn() -> ! {sleep}>) {
1011
+ debug f => _2;
1112
+ let mut _3: &fn() -> ! {sleep};
@@ -18,6 +19,10 @@
1819
+ scope 3 {
1920
+ debug b => _6;
2021
+ }
22+
+ scope 6 (inlined <fn() -> ! {sleep} as Fn<()>>::call - shim(fn() -> ! {sleep})) {
23+
+ scope 7 (inlined sleep) {
24+
+ }
25+
+ }
2126
+ }
2227
+ scope 4 (inlined <fn() -> ! {sleep} as Fn<()>>::call - shim(fn() -> ! {sleep})) {
2328
+ scope 5 (inlined sleep) {

0 commit comments

Comments
 (0)