Skip to content

Commit ae16fc2

Browse files
committed
Change inlining to favor three underlying division functions
1 parent eb45fc2 commit ae16fc2

File tree

1 file changed

+28
-7
lines changed
  • src/int/specialized_div_rem

1 file changed

+28
-7
lines changed

src/int/specialized_div_rem/mod.rs

+28-7
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,13 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
113113
zero_div_fn()
114114
}
115115

116+
// `inline(never)` is placed on unsigned division functions so that there are just three division
117+
// functions (`u32_div_rem`, `u64_div_rem`, and `u128_div_rem`) backing all `compiler-builtins`
118+
// division functions. The signed functions like `i32_div_rem` will get inlined into the
119+
// `compiler-builtins` signed division functions, so that they directly call the three division
120+
// functions. Otherwise, LLVM may try to inline the unsigned division functions 4 times into the
121+
// signed division functions, which results in an explosion in code size.
122+
116123
// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
117124
// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
118125
// faster if the target pointer width is at least 64.
@@ -129,7 +136,9 @@ impl_trifecta!(
129136
u32,
130137
u64,
131138
u128,
132-
i128,;
139+
i128,
140+
inline(never);
141+
inline
133142
);
134143

135144
// If the pointer width less than 64, then the target architecture almost certainly does not have
@@ -148,7 +157,9 @@ impl_delegate!(
148157
u32,
149158
u64,
150159
u128,
151-
i128,;
160+
i128,
161+
inline(never);
162+
inline
152163
);
153164

154165
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -190,7 +201,9 @@ impl_asymmetric!(
190201
u32,
191202
u64,
192203
u128,
193-
i128,;
204+
i128,
205+
inline(never);
206+
inline
194207
);
195208

196209
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -223,7 +236,9 @@ impl_delegate!(
223236
u16,
224237
u32,
225238
u64,
226-
i64,;
239+
i64,
240+
inline(never);
241+
inline
227242
);
228243

229244
// When not on x86 and the pointer width is 64, use `binary_long`.
@@ -238,7 +253,9 @@ impl_binary_long!(
238253
u64_normalization_shift,
239254
64,
240255
u64,
241-
i64,;
256+
i64,
257+
inline(never);
258+
inline
242259
);
243260

244261
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -280,7 +297,9 @@ impl_asymmetric!(
280297
u16,
281298
u32,
282299
u64,
283-
i64,;
300+
i64,
301+
inline(never);
302+
inline
284303
);
285304

286305
// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
@@ -291,5 +310,7 @@ impl_binary_long!(
291310
u32_normalization_shift,
292311
32,
293312
u32,
294-
i32,;
313+
i32,
314+
inline(never);
315+
inline
295316
);

0 commit comments

Comments
 (0)