Skip to content

Commit 0da3327

Browse files
Faster _fmpz_vec_scalar_divexact for single-word divisors (#2371)
1 parent 6ac52df commit 0da3327

File tree

6 files changed

+624
-42
lines changed

6 files changed

+624
-42
lines changed

src/fmpz_mpoly/test/t-scalar_divexact_fmpz.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ TEST_FUNCTION_START(fmpz_mpoly_scalar_divexact_fmpz, state)
9292
fmpz_mpoly_randtest_bits(f, state, len, coeff_bits, exp_bits, ctx);
9393
fmpz_mpoly_randtest_bits(h, state, len, coeff_bits, exp_bits, ctx);
9494

95-
fmpz_randtest(c, state, n_randint(state, 200));
95+
fmpz_randtest_not_zero(c, state, n_randint(state, 200) + 1);
9696

9797
fmpz_mpoly_scalar_mul_fmpz(f, f, c, ctx);
9898

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#include "ulong_extras.h"
2+
#include "fmpz.h"
3+
#include "fmpz_vec.h"
4+
#include "profiler.h"
5+
6+
void
7+
_fmpz_vec_scalar_divexact_ui_naive(fmpz * vec1, const fmpz * vec2,
8+
slong len2, ulong c)
9+
{
10+
slong i;
11+
for (i = 0; i < len2; i++)
12+
fmpz_divexact_ui(vec1 + i, vec2 + i, c);
13+
}
14+
15+
int main()
16+
{
17+
slong len, bits;
18+
19+
flint_rand_t state;
20+
flint_rand_init(state);
21+
22+
fmpz *A, *Ac, *B;
23+
ulong c;
24+
25+
double t1, t2, FLINT_SET_BUT_UNUSED(__);
26+
27+
flint_printf(" len bits(A*c) bits(c) old new speedup\n");
28+
29+
for (len = 1; len <= 10; len++)
30+
{
31+
for (bits = 5; bits <= 10000; bits *= 2)
32+
{
33+
A = _fmpz_vec_init(len);
34+
Ac = _fmpz_vec_init(len);
35+
B = _fmpz_vec_init(len);
36+
37+
_fmpz_vec_randtest(A, state, len, bits);
38+
c = n_randtest_not_zero(state);
39+
_fmpz_vec_scalar_mul_ui(Ac, A, len, c);
40+
41+
TIMEIT_START
42+
_fmpz_vec_scalar_divexact_ui_naive(B, Ac, len, c);
43+
TIMEIT_STOP_VALUES(__, t1)
44+
TIMEIT_START
45+
_fmpz_vec_scalar_divexact_ui(B, Ac, len, c);
46+
TIMEIT_STOP_VALUES(__, t2)
47+
48+
flint_printf("%6wd %6wd %6wd %8g %8g %.3f\n",
49+
len, _fmpz_vec_max_bits(Ac, len), FLINT_BIT_COUNT(c),
50+
t1, t2, t1 / t2);
51+
52+
_fmpz_vec_clear(A, len);
53+
_fmpz_vec_clear(Ac, len);
54+
_fmpz_vec_clear(B, len);
55+
}
56+
}
57+
58+
flint_rand_clear(state);
59+
}
60+

src/fmpz_vec/scalar.c

Lines changed: 3 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -126,46 +126,6 @@ _fmpz_vec_scalar_addmul_ui(fmpz * vec1, const fmpz * vec2, slong len2, ulong c)
126126
fmpz_addmul_ui(vec1 + ix, vec2 + ix, c);
127127
}
128128

129-
void
130-
_fmpz_vec_scalar_divexact_fmpz(fmpz * vec1, const fmpz * vec2,
131-
slong len2, const fmpz_t x)
132-
{
133-
fmpz c = *x;
134-
135-
if (!COEFF_IS_MPZ(c))
136-
{
137-
if (c == 1)
138-
_fmpz_vec_set(vec1, vec2, len2);
139-
else if (c == -1)
140-
_fmpz_vec_neg(vec1, vec2, len2);
141-
else
142-
_fmpz_vec_scalar_divexact_si(vec1, vec2, len2, c);
143-
}
144-
else
145-
{
146-
slong i;
147-
for (i = 0; i < len2; i++)
148-
fmpz_divexact(vec1 + i, vec2 + i, x);
149-
}
150-
}
151-
152-
void
153-
_fmpz_vec_scalar_divexact_si(fmpz * vec1, const fmpz * vec2, slong len2, slong c)
154-
{
155-
slong i;
156-
for (i = 0; i < len2; i++)
157-
fmpz_divexact_si(vec1 + i, vec2 + i, c);
158-
}
159-
160-
void
161-
_fmpz_vec_scalar_divexact_ui(fmpz * vec1, const fmpz * vec2,
162-
slong len2, ulong c)
163-
{
164-
slong i;
165-
for (i = 0; i < len2; i++)
166-
fmpz_divexact_ui(vec1 + i, vec2 + i, c);
167-
}
168-
169129
void
170130
_fmpz_vec_scalar_fdiv_q_2exp(fmpz * vec1, const fmpz * vec2, slong len2,
171131
ulong exp)
@@ -209,6 +169,8 @@ _fmpz_vec_scalar_fdiv_r_2exp(fmpz * vec1, const fmpz * vec2, slong len2,
209169
fmpz_fdiv_r_2exp(vec1 + i, vec2 + i, exp);
210170
}
211171

172+
/* todo: preinvert p when appropriate; or better, make sure we don't
173+
call this function in places where we have an fmpz_mod */
212174
void _fmpz_vec_scalar_mod_fmpz(fmpz *res, const fmpz *vec, slong len, const fmpz_t p)
213175
{
214176
slong i;
@@ -267,6 +229,7 @@ _fmpz_vec_scalar_mul_ui(fmpz * vec1, const fmpz * vec2, slong len2, ulong c)
267229
fmpz_mul_ui(vec1 + i, vec2 + i, c);
268230
}
269231

232+
/* todo: preinvert p when appropriate */
270233
void _fmpz_vec_scalar_smod_fmpz(fmpz *res, const fmpz *vec, slong len, const fmpz_t p)
271234
{
272235
slong i;

0 commit comments

Comments
 (0)