@@ -113,6 +113,13 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
113
113
zero_div_fn ( )
114
114
}
115
115
116
+ // `inline(never)` is placed on unsigned division functions so that there are just three division
117
+ // functions (`u32_div_rem`, `u64_div_rem`, and `u128_div_rem`) backing all `compiler-builtins`
118
+ // division functions. The signed functions like `i32_div_rem` will get inlined into the
119
+ // `compiler-builtins` signed division functions, so that they directly call the three division
120
+ // functions. Otherwise, LLVM may try to inline the unsigned division functions 4 times into the
121
+ // signed division functions, which results in an explosion in code size.
122
+
116
123
// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
117
124
// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
118
125
// faster if the target pointer width is at least 64.
@@ -129,7 +136,9 @@ impl_trifecta!(
129
136
u32 ,
130
137
u64 ,
131
138
u128 ,
132
- i128 , ;
139
+ i128 ,
140
+ inline( never) ;
141
+ inline
133
142
) ;
134
143
135
144
// If the pointer width less than 64, then the target architecture almost certainly does not have
@@ -148,7 +157,9 @@ impl_delegate!(
148
157
u32 ,
149
158
u64 ,
150
159
u128 ,
151
- i128 , ;
160
+ i128 ,
161
+ inline( never) ;
162
+ inline
152
163
) ;
153
164
154
165
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -190,7 +201,9 @@ impl_asymmetric!(
190
201
u32 ,
191
202
u64 ,
192
203
u128 ,
193
- i128 , ;
204
+ i128 ,
205
+ inline( never) ;
206
+ inline
194
207
) ;
195
208
196
209
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -223,7 +236,9 @@ impl_delegate!(
223
236
u16 ,
224
237
u32 ,
225
238
u64 ,
226
- i64 , ;
239
+ i64 ,
240
+ inline( never) ;
241
+ inline
227
242
) ;
228
243
229
244
// When not on x86 and the pointer width is 64, use `binary_long`.
@@ -238,7 +253,9 @@ impl_binary_long!(
238
253
u64_normalization_shift,
239
254
64 ,
240
255
u64 ,
241
- i64 , ;
256
+ i64 ,
257
+ inline( never) ;
258
+ inline
242
259
) ;
243
260
244
261
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -280,7 +297,9 @@ impl_asymmetric!(
280
297
u16 ,
281
298
u32 ,
282
299
u64 ,
283
- i64 , ;
300
+ i64 ,
301
+ inline( never) ;
302
+ inline
284
303
) ;
285
304
286
305
// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
@@ -291,5 +310,7 @@ impl_binary_long!(
291
310
u32_normalization_shift,
292
311
32 ,
293
312
u32 ,
294
- i32 , ;
313
+ i32 ,
314
+ inline( never) ;
315
+ inline
295
316
) ;
0 commit comments