Skip to content

Commit 9ea4757

Browse files
bors[bot]cuviper
andauthored
Merge #216
216: Reduce the computational complexity of formatting r=cuviper a=cuviper This only matters for very large values, more than about 1000 digits, but it does make a big difference for those. For the included benchmarks, I got these base-10 results before: ```text test to_str_radix_10 ... bench: 2,095 ns/iter (+/- 20) test to_str_radix_10_2 ... bench: 150,702 ns/iter (+/- 1,231) ``` The `_2` variant is over 10,000 bits. With the new code, I get these results: ```text test to_str_radix_10 ... bench: 2,065 ns/iter (+/- 22) test to_str_radix_10_2 ... bench: 44,130 ns/iter (+/- 231) ``` Co-authored-by: Josh Stone <[email protected]>
2 parents 3753602 + 3af4287 commit 9ea4757

File tree

4 files changed

+79
-34
lines changed

4 files changed

+79
-34
lines changed

benches/bigint.rs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -174,35 +174,40 @@ fn fib_to_string(b: &mut Bencher) {
174174
b.iter(|| fib.to_string());
175175
}
176176

177-
fn to_str_radix_bench(b: &mut Bencher, radix: u32) {
177+
fn to_str_radix_bench(b: &mut Bencher, radix: u32, bits: u64) {
178178
let mut rng = get_rng();
179-
let x = rng.gen_bigint(1009);
179+
let x = rng.gen_bigint(bits);
180180
b.iter(|| x.to_str_radix(radix));
181181
}
182182

183183
#[bench]
184184
fn to_str_radix_02(b: &mut Bencher) {
185-
to_str_radix_bench(b, 2);
185+
to_str_radix_bench(b, 2, 1009);
186186
}
187187

188188
#[bench]
189189
fn to_str_radix_08(b: &mut Bencher) {
190-
to_str_radix_bench(b, 8);
190+
to_str_radix_bench(b, 8, 1009);
191191
}
192192

193193
#[bench]
194194
fn to_str_radix_10(b: &mut Bencher) {
195-
to_str_radix_bench(b, 10);
195+
to_str_radix_bench(b, 10, 1009);
196+
}
197+
198+
#[bench]
199+
fn to_str_radix_10_2(b: &mut Bencher) {
200+
to_str_radix_bench(b, 10, 10009);
196201
}
197202

198203
#[bench]
199204
fn to_str_radix_16(b: &mut Bencher) {
200-
to_str_radix_bench(b, 16);
205+
to_str_radix_bench(b, 16, 1009);
201206
}
202207

203208
#[bench]
204209
fn to_str_radix_36(b: &mut Bencher) {
205-
to_str_radix_bench(b, 36);
210+
to_str_radix_bench(b, 36, 1009);
206211
}
207212

208213
fn from_str_radix_bench(b: &mut Bencher, radix: u32) {

src/biguint/convert.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use core::cmp::Ordering::{Equal, Greater, Less};
1515
use core::convert::TryFrom;
1616
use core::mem;
1717
use core::str::FromStr;
18-
use num_integer::Integer;
18+
use num_integer::{Integer, Roots};
1919
use num_traits::float::FloatCore;
2020
use num_traits::{FromPrimitive, Num, PrimInt, ToPrimitive, Zero};
2121

@@ -665,6 +665,39 @@ pub(super) fn to_radix_digits_le(u: &BigUint, radix: u32) -> Vec<u8> {
665665
let (base, power) = get_radix_base(radix, big_digit::HALF_BITS);
666666
let radix = radix as BigDigit;
667667

668+
// For very large numbers, the O(n²) loop of repeated `div_rem_digit` dominates the
669+
// performance. We can mitigate this by dividing into chunks of a larger base first.
670+
// The threshold for this was chosen by anecdotal performance measurements to
671+
// approximate where this starts to make a noticeable difference.
672+
if digits.data.len() >= 64 {
673+
let mut big_base = BigUint::from(base * base);
674+
let mut big_power = 2usize;
675+
676+
// Choose a target base length near √n.
677+
let target_len = digits.data.len().sqrt();
678+
while big_base.data.len() < target_len {
679+
big_base = &big_base * &big_base;
680+
big_power *= 2;
681+
}
682+
683+
// This outer loop will run approximately √n times.
684+
while digits > big_base {
685+
// This is still the dominating factor, with n digits divided by √n digits.
686+
let (q, mut big_r) = digits.div_rem(&big_base);
687+
digits = q;
688+
689+
// This inner loop now has O(√n²)=O(n) behavior altogether.
690+
for _ in 0..big_power {
691+
let (q, mut r) = div_rem_digit(big_r, base);
692+
big_r = q;
693+
for _ in 0..power {
694+
res.push((r % radix) as u8);
695+
r /= radix;
696+
}
697+
}
698+
}
699+
}
700+
668701
while digits.data.len() > 1 {
669702
let (q, mut r) = div_rem_digit(digits, base);
670703
for _ in 0..power {

src/biguint/division.rs

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use super::addition::__add2;
22
#[cfg(not(u64_digit))]
33
use super::u32_to_u128;
4-
use super::BigUint;
4+
use super::{cmp_slice, BigUint};
55

66
use crate::big_digit::{self, BigDigit, DoubleBigDigit};
77
use crate::UsizePromotion;
@@ -153,14 +153,14 @@ fn div_rem(mut u: BigUint, mut d: BigUint) -> (BigUint, BigUint) {
153153
//
154154
let shift = d.data.last().unwrap().leading_zeros() as usize;
155155

156-
let (q, r) = if shift == 0 {
156+
if shift == 0 {
157157
// no need to clone d
158-
div_rem_core(u, &d)
158+
div_rem_core(u, &d.data)
159159
} else {
160-
div_rem_core(u << shift, &(d << shift))
161-
};
162-
// renormalize the remainder
163-
(q, r >> shift)
160+
let (q, r) = div_rem_core(u << shift, &(d << shift).data);
161+
// renormalize the remainder
162+
(q, r >> shift)
163+
}
164164
}
165165

166166
pub(super) fn div_rem_ref(u: &BigUint, d: &BigUint) -> (BigUint, BigUint) {
@@ -195,24 +195,21 @@ pub(super) fn div_rem_ref(u: &BigUint, d: &BigUint) -> (BigUint, BigUint) {
195195
//
196196
let shift = d.data.last().unwrap().leading_zeros() as usize;
197197

198-
let (q, r) = if shift == 0 {
198+
if shift == 0 {
199199
// no need to clone d
200-
div_rem_core(u.clone(), d)
200+
div_rem_core(u.clone(), &d.data)
201201
} else {
202-
div_rem_core(u << shift, &(d << shift))
203-
};
204-
// renormalize the remainder
205-
(q, r >> shift)
202+
let (q, r) = div_rem_core(u << shift, &(d << shift).data);
203+
// renormalize the remainder
204+
(q, r >> shift)
205+
}
206206
}
207207

208208
/// An implementation of the base division algorithm.
209209
/// Knuth, TAOCP vol 2 section 4.3.1, algorithm D, with an improvement from exercises 19-21.
210-
fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) {
211-
debug_assert!(
212-
a.data.len() >= b.data.len()
213-
&& b.data.len() > 1
214-
&& b.data.last().unwrap().leading_zeros() == 0
215-
);
210+
fn div_rem_core(mut a: BigUint, b: &[BigDigit]) -> (BigUint, BigUint) {
211+
debug_assert!(a.data.len() >= b.len() && b.len() > 1);
212+
debug_assert!(b.last().unwrap().leading_zeros() == 0);
216213

217214
// The algorithm works by incrementally calculating "guesses", q0, for the next digit of the
218215
// quotient. Once we have any number q0 such that (q0 << j) * b <= a, we can set
@@ -235,16 +232,16 @@ fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) {
235232
let mut a0 = 0;
236233

237234
// [b1, b0] are the two most significant digits of the divisor. They never change.
238-
let b0 = *b.data.last().unwrap();
239-
let b1 = b.data[b.data.len() - 2];
235+
let b0 = *b.last().unwrap();
236+
let b1 = b[b.len() - 2];
240237

241-
let q_len = a.data.len() - b.data.len() + 1;
238+
let q_len = a.data.len() - b.len() + 1;
242239
let mut q = BigUint {
243240
data: vec![0; q_len],
244241
};
245242

246243
for j in (0..q_len).rev() {
247-
debug_assert!(a.data.len() == b.data.len() + j);
244+
debug_assert!(a.data.len() == b.len() + j);
248245

249246
let a1 = *a.data.last().unwrap();
250247
let a2 = a.data[a.data.len() - 2];
@@ -280,11 +277,11 @@ fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) {
280277
// q0 is now either the correct quotient digit, or in rare cases 1 too large.
281278
// Subtract (q0 << j) from a. This may overflow, in which case we will have to correct.
282279

283-
let mut borrow = sub_mul_digit_same_len(&mut a.data[j..], &b.data, q0);
280+
let mut borrow = sub_mul_digit_same_len(&mut a.data[j..], b, q0);
284281
if borrow > a0 {
285282
// q0 is too large. We need to add back one multiple of b.
286283
q0 -= 1;
287-
borrow -= __add2(&mut a.data[j..], &b.data);
284+
borrow -= __add2(&mut a.data[j..], b);
288285
}
289286
// The top digit of a, stored in a0, has now been zeroed.
290287
debug_assert!(borrow == a0);
@@ -298,7 +295,7 @@ fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) {
298295
a.data.push(a0);
299296
a.normalize();
300297

301-
debug_assert!(a < *b);
298+
debug_assert_eq!(cmp_slice(&a.data, b), Less);
302299

303300
(q.normalized(), a)
304301
}

tests/biguint.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,6 +1600,16 @@ fn test_all_str_radix() {
16001600
}
16011601
}
16021602

1603+
#[test]
1604+
fn test_big_str() {
1605+
for n in 2..=20_u32 {
1606+
let x: BigUint = BigUint::from(n).pow(10_000_u32);
1607+
let s = x.to_string();
1608+
let y: BigUint = s.parse().unwrap();
1609+
assert_eq!(x, y);
1610+
}
1611+
}
1612+
16031613
#[test]
16041614
fn test_lower_hex() {
16051615
let a = BigUint::parse_bytes(b"A", 16).unwrap();

0 commit comments

Comments
 (0)