From c6e6991b376658976b123053174618d63a7ada22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albin=20Ahlb=C3=A4ck?= Date: Mon, 27 May 2024 02:13:50 +0200 Subject: [PATCH 1/4] Start on tuning suite --- Makefile.in | 78 +++++--- src/limb_types.h | 2 + src/tune/README.md | 58 ++++++ src/tune/clock.h | 67 +++++++ src/tune/n_mod_vec/aors_0.c | 25 +++ src/tune/n_mod_vec/aors_1.c | 25 +++ src/tune/n_mod_vec/param.c | 59 ++++++ src/tune/n_mod_vec/tune_aors.c | 47 +++++ src/tune/tune.c | 300 ++++++++++++++++++++++++++++++ src/tune/tune.h | 67 +++++++ src/tune/ulong_extras/param.c | 59 ++++++ src/tune/ulong_extras/tune_xgcd.c | 42 +++++ src/tune/ulong_extras/xgcd_0.c | 22 +++ src/tune/ulong_extras/xgcd_1.c | 22 +++ src/ulong_extras.h | 11 +- src/ulong_extras/gcdinv.c | 48 ++++- src/ulong_extras/xgcd.c | 44 +++++ 17 files changed, 946 insertions(+), 30 deletions(-) create mode 100644 src/tune/README.md create mode 100644 src/tune/clock.h create mode 100644 src/tune/n_mod_vec/aors_0.c create mode 100644 src/tune/n_mod_vec/aors_1.c create mode 100644 src/tune/n_mod_vec/param.c create mode 100644 src/tune/n_mod_vec/tune_aors.c create mode 100644 src/tune/tune.c create mode 100644 src/tune/tune.h create mode 100644 src/tune/ulong_extras/param.c create mode 100644 src/tune/ulong_extras/tune_xgcd.c create mode 100644 src/tune/ulong_extras/xgcd_0.c create mode 100644 src/tune/ulong_extras/xgcd_1.c diff --git a/Makefile.in b/Makefile.in index 832e95da91..775c11ba86 100644 --- a/Makefile.in +++ b/Makefile.in @@ -12,9 +12,11 @@ FLINT_DIR:=. SRC_DIR:=src BUILD_DIR:=build +TUNE_DIR:=src/tune ABS_FLINT_DIR:='$(patsubst %/,%, $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))' ABS_SRC_DIR:=$(ABS_FLINT_DIR)/$(SRC_DIR) ABS_BUILD_DIR:=$(ABS_FLINT_DIR)/$(SRC_DIR) +ABS_TUNE_DIR:=$(ABS_FLINT_DIR)/$(TUNE_DIR) FLINT_VERSION:=@FLINT_VERSION_FULL@ FLINT_MAJOR_SO:=@FLINT_MAJOR_SO@ @@ -81,6 +83,7 @@ CFLAGS:=@CFLAGS@ TESTCFLAGS:=@TESTCFLAGS@ CPPFLAGS:=@CPPFLAGS@ -DBUILDING_FLINT CPPFLAGS2:=-L$(FLINT_DIR) $(CPPFLAGS) +CPPFLAGS3:=-I$(TUNE_DIR) $(CPPFLAGS2) LIB_CPPFLAGS:=@LIB_CPPFLAGS@ CXXFLAGS:=@CXXFLAGS@ LIBS:=@LIBS@ @@ -218,8 +221,15 @@ TEMPLATE_DIRS := \ fq_poly_templates fq_poly_factor_templates \ fq_embed_templates fq_templates +_TUNE_DIRS := \ + ulong_extras + +TUNE_DIRS := $(patsubst %, $(TUNE_DIR)/%, $(_TUNE_DIRS)) +TUNE_BUILD_DIRS := $(patsubst %, $(BUILD_DIR)/tune/%, $(_TUNE_DIRS)) + BUILD_DIRS := \ $(BUILD_DIR) \ + $(TUNE_BUILD_DIRS) \ $(patsubst %, $(BUILD_DIR)/%, $(DIRS)) \ $(patsubst %, $(BUILD_DIR)/%/profile, $(DIRS)) \ $(patsubst %, $(BUILD_DIR)/%/test, $(DIRS)) \ @@ -299,12 +309,12 @@ ifneq ($(WANT_NTL), 0) interfaces_TEST_SOURCES := $(SRC_DIR)/interfaces/test/t-NTL-interface.cpp endif -define xxx_TUNE_SOURCES -$(1)_TUNE_SOURCES := $(wildcard $(SRC_DIR)/$(1)/tune/*.c) +define xxx_OLD_TUNE_SOURCES +$(1)_OLD_TUNE_SOURCES := $(wildcard $(SRC_DIR)/$(1)/tune/*.c) endef -_TUNE_SOURCES := $(wildcard $(SRC_DIR)/tune/*.c) -$(foreach dir, $(DIRS), $(eval $(call xxx_TUNE_SOURCES,$(dir)))) -TUNE_SOURCES := $(foreach dir,$(DIRS),$($(dir)_TUNE_SOURCES)) $(_TUNE_SOURCES) +_OLD_TUNE_SOURCES := $(wildcard $(SRC_DIR)/tune/*.c) +$(foreach dir, $(DIRS), $(eval $(call xxx_OLD_TUNE_SOURCES,$(dir)))) +OLD_TUNE_SOURCES := $(foreach dir,$(DIRS),$($(dir)_OLD_TUNE_SOURCES)) $(_OLD_TUNE_SOURCES) EXMP_SOURCES := $(wildcard $(FLINT_DIR)/examples/*.c) @@ -359,12 +369,12 @@ interfaces_TESTS := $(BUILD_DIR)/interfaces/test/t-NTL-interface$(EXEEXT) endif TESTS := $(_TESTS) $(foreach dir,$(DIRS),$($(dir)_TESTS)) $(interfaces_TESTS) -define xxx_TUNES -$(1)_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$($(1)_TUNE_SOURCES)) +define xxx_OLD_TUNES +$(1)_OLD_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$($(1)_OLD_TUNE_SOURCES)) endef -_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$(_TUNE_SOURCES)) -$(foreach dir, $(DIRS), $(eval $(call xxx_TUNES,$(dir)))) -TUNES := $(foreach dir,$(DIRS),$($(dir)_TUNES)) $(_TUNES) +_OLD_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$(_OLD_TUNE_SOURCES)) +$(foreach dir, $(DIRS), $(eval $(call xxx_OLD_TUNES,$(dir)))) +OLD_TUNES := $(foreach dir,$(DIRS),$($(dir)_OLD_TUNES)) $(_OLD_TUNES) EXMPS := $(patsubst $(FLINT_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$(EXMP_SOURCES)) @@ -675,30 +685,20 @@ endif endif ifeq ($(SHARED), 0) -$(BUILD_DIR)/tune/%$(EXEEXT): $(SRC_DIR)/tune/%.c $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR)/tune - @echo " CC $(<:$(SRC_DIR)/%=%)" - @$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS) -else -$(BUILD_DIR)/tune/%$(EXEEXT): $(SRC_DIR)/tune/%.c | $(FLINT_DIR)/$(FLINT_LIB_FULL) $(BUILD_DIR)/tune - @echo " CC $(<:$(SRC_DIR)/%=%)" - @$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS) -endif - -ifeq ($(SHARED), 0) -define xxx_TUNES_rule +define xxx_OLD_TUNES_rule $(BUILD_DIR)/$(1)/tune/%$(EXEEXT): $(SRC_DIR)/$(1)/tune/%.c $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR)/$(1)/tune @echo " CC $$(<:$(SRC_DIR)/%=%)" @$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $$< -o $$@ $(EXE_LDFLAGS) $(LIBS2) $$(DEPFLAGS) endef else -define xxx_TUNES_rule +define xxx_OLD_TUNES_rule $(BUILD_DIR)/$(1)/tune/%$(EXEEXT): $(SRC_DIR)/$(1)/tune/%.c | $(FLINT_DIR)/$(FLINT_LIB_FULL) $(BUILD_DIR)/$(1)/tune @echo " CC $$(<:$(SRC_DIR)/%=%)" @$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $$< -o $$@ $(EXE_LDFLAGS) $(LIBS2) $$(DEPFLAGS) endef endif -$(foreach dir, $(DIRS), $(eval $(call xxx_TUNES_rule,$(dir)))) +$(foreach dir, $(DIRS), $(eval $(call xxx_OLD_TUNES_rule,$(dir)))) ifeq ($(SHARED), 0) $(BUILD_DIR)/examples/%$(EXEEXT): $(FLINT_DIR)/examples/%.c $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR)/examples $(BUILD_DIR)/include @@ -834,7 +834,35 @@ endif # tuning ################################################################################ -tune: library $(TUNES) +# Only old tunes, i.e. under src/MOD/tune/ +old_tune: library $(OLD_TUNES) + +TUNE_SOURCE:=$(TUNE_DIR)/tune.c +TUNE_DEPS_SOURCES:=$(foreach dir,$(TUNE_DIRS),$(wildcard $(dir)/*.c)) + +_TUNE_HEADERS:=tune.h clock.h +TUNE_HEADERS:=$(patsubst %,$(TUNE_DIR)/%,$(_TUNE_HEADERS)) + +TUNE_DEPS_OBJS:=$(patsubst $(TUNE_DIR)/%.c,$(BUILD_DIR)/tune/%.o,$(TUNE_DEPS_SOURCES)) + +TUNE_EXE:=$(BUILD_DIR)/tuneup$(EXEEXT) + +# Sloppy to say that all TUNE_BUILD_DIRS have to be built. +$(BUILD_DIR)/tune/%.o: $(TUNE_DIR)/%.c $(TUNE_HEADERS) | $(TUNE_BUILD_DIRS) + @echo " CC $(<:$(SRC_DIR)/%=%)" + @$(CC) $(CFLAGS) $(CPPFLAGS3) $(LIB_CPPFLAGS) -c $< -o $@ $(DEPFLAGS) + +ifeq ($(SHARED), 0) +$(TUNE_EXE): $(TUNE_SOURCE) $(TUNE_DEPS_OBJS) $(TUNE_HEADERS) $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR) + @echo " CC $(<:$(SRC_DIR)/%=%)" + @$(CC) $(CFLAGS) $(CPPFLAGS3) $(LIB_CPPFLAGS) $(TUNE_DEPS_OBJS) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS) +else +$(TUNE_EXE): $(TUNE_SOURCE) $(TUNE_DEPS_OBJS) $(TUNE_HEADERS) | $(FLINT_DIR)/$(FLINT_LIB_FULL) $(BUILD_DIR) + @echo " CC $(<:$(SRC_DIR)/%=%)" + @$(CC) $(CFLAGS) $(CPPFLAGS3) $(LIB_CPPFLAGS) $(TUNE_DEPS_OBJS) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS) +endif + +tune: $(TUNE_EXE) ################################################################################ # valgrind @@ -988,5 +1016,5 @@ dist: print-%: @echo "$*=$($*)" -.PHONY: all library shared static examples checkexamples profile tests check tune valgrind clean distclean install uninstall dist %_TEST_RUN %_TEST_RUN_% %_TEST_DGB_RUN_ARGS %_VALGRIND_RUN print-% coverage coverage_html debug +.PHONY: all library shared static examples checkexamples profile tests check tune old_tune valgrind clean distclean install uninstall dist %_TEST_RUN %_TEST_RUN_% %_TEST_DGB_RUN_ARGS %_VALGRIND_RUN print-% coverage coverage_html debug .PRECIOUS: $(mpn_extras_PIC_S_SOURCES) $(mpn_extras_S_SOURCES) diff --git a/src/limb_types.h b/src/limb_types.h index 05d52238ad..0c6131f653 100644 --- a/src/limb_types.h +++ b/src/limb_types.h @@ -18,6 +18,8 @@ extern "C" { #endif +typedef struct { ulong m0, m1; } nn_pair_t; + #define FLINT_MAX_FACTORS_IN_LIMB 15 typedef struct diff --git a/src/tune/README.md b/src/tune/README.md new file mode 100644 index 0000000000..f2ebb5685c --- /dev/null +++ b/src/tune/README.md @@ -0,0 +1,58 @@ +# Tuning-suite + +Currently working-in-progress, but feedback is much appreciated. + +## Usage + +Run `make tune` followed by `./build/tuneup`. This pushes all the optimized +parameters into `stdout`. These can then be used to optimize the parameter file +`flint-mparam.h` for your system. + +### Set CPU frequency + +If tuner is using clock ticks (currently only for x86-64), you can specify your +clock frequency by pushing `export FLINT_CPU_FREQUENCY=3.1e9` to tell the tuner +that your CPU frequency is 3.1 GHz. + +### Options + +Currently, no command-line options are allowed apart from `-h` and `--help` to +display the usual help message. + +However, it would be optimal to be able to specify: + +- Functions intended to benchmark (currently does all available) +- Minimum number of runs +- Warmup runs (currently, 10 is the default) +- Minimum amount of time to run each function (?) +- Precision required to terminate successfully (currently 1.25 %) +- Percentage of runs required to be within said precision (currently 13.5 %) + +## Issues + +Please open up any issues at . + +## Requirements + +- FLINT was built with Autotools. +- Either that + * `clock_gettime` is available on the system, or that + * compiler is GCC compatible and architecture is x86. + +## How it works + +The program works in the following order: + +1. Parses options +2. Sets default values +3. For each function (that is, each variant of each function) tested: + a. Run a couple of warmups, which are trashed + b. Run hotlaps, of which the time is saved + c. Check if there is a smallest time elapsed $t$ for running a function of + which at least $k$ of the runs have a time in the interval + $[t, (1 + p) t]$, where $p$ is the precision and $k / n$ is the percentage + of runs required to be within said precision, where $n$ is the total + number of runs. If no such $t$ was found, abort. +4. With all $t$ obtained from each families of functions, determine cutoff + points, methods used, etc. +5. Print the associated `#define` into `stdout`. diff --git a/src/tune/clock.h b/src/tune/clock.h new file mode 100644 index 0000000000..83f447bf3d --- /dev/null +++ b/src/tune/clock.h @@ -0,0 +1,67 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#ifndef FLINT_CLOCK_H +#define FLINT_CLOCK_H + +#include +#include +#include "flint.h" + +#define FLINT_CPU_FREQUENCY_DEFAULT 3.2e9 + +#if FLINT64 && defined(__amd64__) +typedef ulong flint_time_t[1]; + +FLINT_FORCE_INLINE +double flint_time_nsec_diff(flint_time_t t1, flint_time_t t0) +{ + char * str = getenv("FLINT_CPU_FREQUENCY"); + double freq; + double seconds; + + if (str == NULL) + freq = FLINT_CPU_FREQUENCY_DEFAULT; + else + freq = strtod(str, NULL); + + seconds = (double) (*t1 - *t0) / freq; + + return seconds * 10e9; +} + +FLINT_FORCE_INLINE +void flint_time_get(flint_time_t t0) +{ + __asm__ volatile ( + "rdtsc\n\t" + "shl $32, %%rdx\n\t" + "or %%rdx, %0" + : "=a" (*t0) : : "rdx"); +} +#else +typedef struct timespec flint_time_t[1]; + +FLINT_FORCE_INLINE +double flint_time_nsec_diff(flint_time_t t1, flint_time_t t0) +{ + return 1000000000.0 * (t1->tv_sec - t0->tv_sec) + + (double) (t1->tv_nsec - t0->tv_nsec); +} + +FLINT_FORCE_INLINE +void flint_time_get(flint_time_t t0) +{ + return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t0); +} +#endif + +#endif /* FLINT_CLOCK_H */ diff --git a/src/tune/n_mod_vec/aors_0.c b/src/tune/n_mod_vec/aors_0.c new file mode 100644 index 0000000000..f5197c3a2a --- /dev/null +++ b/src/tune/n_mod_vec/aors_0.c @@ -0,0 +1,25 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "flint-mparam.h" +#include "n_mod_vec.h" + +#undef N_MOD_VEC_ADD_METHOD +#undef N_MOD_VEC_SUB_METHOD +#define TUNE_PROGRAM 1 + +#define N_MOD_VEC_ADD_METHOD 0 +#define N_MOD_VEC_SUB_METHOD 0 + +#define _n_mod_vec_add _n_mod_vec_add_0 +#define _n_mod_vec_sub _n_mod_vec_sub_0 + +#include "n_mod_vec/aors.c" diff --git a/src/tune/n_mod_vec/aors_1.c b/src/tune/n_mod_vec/aors_1.c new file mode 100644 index 0000000000..2c17b0d6c1 --- /dev/null +++ b/src/tune/n_mod_vec/aors_1.c @@ -0,0 +1,25 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "flint-mparam.h" +#include "n_mod_vec.h" + +#undef N_MOD_VEC_ADD_METHOD +#undef N_MOD_VEC_SUB_METHOD +#define TUNE_PROGRAM 1 + +#define N_MOD_VEC_ADD_METHOD 1 +#define N_MOD_VEC_SUB_METHOD 1 + +#define _n_mod_vec_add _n_mod_vec_add_1 +#define _n_mod_vec_sub _n_mod_vec_sub_1 + +#include "n_mod_vec/aors.c" diff --git a/src/tune/n_mod_vec/param.c b/src/tune/n_mod_vec/param.c new file mode 100644 index 0000000000..cf7f00f56d --- /dev/null +++ b/src/tune/n_mod_vec/param.c @@ -0,0 +1,59 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "tune.h" +#include "n_mod.h" +#include "n_mod_vec.h" + +#if FLINT64 +# define N_0 UWORD(7365182178263871635) +#else +# define N_0 UWORD(1236571635) +#endif + +void * n_mod_vec_param_init_generate_0(void) +{ + struct n_mod_vec_param_0 * param; + nn_ptr rp, ap, bp; + slong len; + flint_rand_t state; + + param = flint_malloc(sizeof(struct n_mod_vec_param_0)); + flint_rand_init(state); + + len = 1000; + rp = flint_malloc(sizeof(ulong) * len); + ap = flint_malloc(sizeof(ulong) * len); + bp = flint_malloc(sizeof(ulong) * len); + n_mod_ctx_init(param->ctx, N_0); + + _n_mod_vec_rand(ap, state, len, param->ctx); + _n_mod_vec_rand(bp, state, len, param->ctx); + + param->rp = rp; + param->ap = ap; + param->bp = bp; + param->len = len; + flint_rand_clear(state); + + return param; +} + +void n_mod_vec_param_clear(void * vparam) +{ + struct n_mod_vec_param_0 * param = vparam; + + flint_free(param->rp); + flint_free(param->ap); + flint_free(param->bp); + n_mod_ctx_clear(param->ctx); + flint_free(param); +} diff --git a/src/tune/n_mod_vec/tune_aors.c b/src/tune/n_mod_vec/tune_aors.c new file mode 100644 index 0000000000..52ee2575f1 --- /dev/null +++ b/src/tune/n_mod_vec/tune_aors.c @@ -0,0 +1,47 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "clock.h" +#include "tune.h" +#include "n_mod.h" +#include "n_mod_vec.h" + +void _n_mod_vec_add_0(nn_ptr, nn_srcptr, nn_srcptr, slong, ulong); +void _n_mod_vec_sub_0(nn_ptr, nn_srcptr, nn_srcptr, slong, ulong); +void _n_mod_vec_add_1(nn_ptr, nn_srcptr, nn_srcptr, slong, ulong); +void _n_mod_vec_sub_1(nn_ptr, nn_srcptr, nn_srcptr, slong, ulong); + +#define DEFINE_IT(name, func) \ +double name(void * vparam) \ +{ \ + struct n_mod_vec_param_0 * param = vparam; \ + nn_ptr rp, ap, bp; \ + slong len; \ + ulong nu; \ + flint_time_t t0, t1; \ + \ + rp = param->rp; \ + ap = param->ap; \ + bp = param->bp; \ + len = param->len; \ + nu = param->ctx->nu; \ + \ + flint_time_get(t0); \ + func(rp, ap, bp, len, nu); \ + flint_time_get(t1); \ + \ + return flint_time_nsec_diff(t1, t0); \ +} + +DEFINE_IT(_tune_n_mod_vec_add_0, _n_mod_vec_add_0) +DEFINE_IT(_tune_n_mod_vec_add_1, _n_mod_vec_add_1) +DEFINE_IT(_tune_n_mod_vec_sub_0, _n_mod_vec_sub_0) +DEFINE_IT(_tune_n_mod_vec_sub_1, _n_mod_vec_sub_1) diff --git a/src/tune/tune.c b/src/tune/tune.c new file mode 100644 index 0000000000..f336535342 --- /dev/null +++ b/src/tune/tune.c @@ -0,0 +1,300 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +/* TODO: + * Allow to pass function inputs. + * Allow to pass number of warmup runs. + * Allow to pass number of runs. + * Allow to pass minimum amount of milliseconds each function has to run. + * Support considering other things than simply the mean run time. +*/ + +/* NOT-TODO: + * Do not allow passing inputs of a specific kind. +*/ + +#include +#include +#include +#include +#include "tune.h" + +#if 0 +static void print_copyright(void) +{ + printf("/*\n" + " Copyright (C) 2024 Albin Ahlbäck\n" + "\n" + " This file is part of FLINT.\n" + "\n" + " FLINT is free software: you can redistribute it and/or modify it under\n" + " the terms of the GNU Lesser General Public License (LGPL) as published\n" + " by the Free Software Foundation; either version 3 of the License, or\n" + " (at your option) any later version. See .\n" + "*/\n" + "\n"); +} +#endif + +int compare_doubles(const void * ap, const void * bp) +{ + double a = *((const double *) ap); + double b = *((const double *) bp); + + return (a < b) ? -1 : (a > b) ? 1 : 0; +} + +static double measure_func(tune_func_t fun, void * params, int runs, int warmups) +{ + double * times = malloc(sizeof(double) * runs); + int ix; + const double deviation = 1.0125; /* 1.25 % larger */ + const double percentage_within_deviation = 0.135; /* 13.5 % */ + const int check_step = percentage_within_deviation * runs; + double ret; + + for (ix = 0; ix < warmups; ix++) + fun(params); + + for (ix = 0; ix < runs; ix++) + times[ix] = fun(params); + + qsort(times, runs, sizeof(double), compare_doubles); + + for (ix = 0; ix + check_step < runs; ix++) + { + if (deviation * times[ix] >= times[ix + check_step]) + { + ret = times[ix]; + break; + } + } + + if (ix + check_step == runs) + { + fprintf(stderr, "Could not find appropriate measurements.\n"); + exit(1); + } + + free(times); + + return ret; +} + +#define SPEEDUP(more_time, less_time) ((double) (more_time) / (double) (less_time) - 1.0) + +#if 0 +static void print_define(const char * str, int val) +{ + printf("#define %-36 %4d\n", str, val); +} +#endif + +static void +print_define_with_speedup(const char * str, int val, int other_val, double speedup) +{ + printf("#define %-36s %4d /* %.2f%% faster than %d */\n", + str, val, speedup, other_val); +} + +static void tune_n_xgcd(int warmups, int min_runs) +{ + void * params = n_param_init_generate_0(); + double t0, t1; + int chosen_method, other_method; + double speedup; + + t0 = measure_func(_tune_n_xgcd_0, params, warmups, min_runs); + t1 = measure_func(_tune_n_xgcd_1, params, warmups, min_runs); + + if (t0 <= t1) + { + chosen_method = 0; + other_method = 1; + speedup = SPEEDUP(t1, t0); + } + else + { + chosen_method = 1; + other_method = 0; + speedup = SPEEDUP(t0, t1); + } + + print_define_with_speedup("N_GCDEXT_METHOD", chosen_method, other_method, speedup); + + n_param_clear(params); +} + +#if WANT_N_MOD +static void tune_n_mod_vec_add(int warmups, int min_runs) +{ + void * params = n_mod_vec_param_init_generate_0(); + double t0, t1; + int chosen_method, other_method; + double speedup; + + t0 = measure_func(_tune_n_mod_vec_add_0, params, warmups, min_runs); + t1 = measure_func(_tune_n_mod_vec_add_1, params, warmups, min_runs); + + if (t0 <= t1) + { + chosen_method = 0; + other_method = 1; + speedup = SPEEDUP(t1, t0); + } + else + { + chosen_method = 1; + other_method = 0; + speedup = SPEEDUP(t0, t1); + } + + print_define_with_speedup("N_MOD_VEC_ADD_METHOD", chosen_method, other_method, speedup); + + n_mod_vec_param_clear(params); +} + +static void tune_n_mod_vec_sub(int warmups, int min_runs) +{ + void * params = n_mod_vec_param_init_generate_0(); + double t0, t1; + int chosen_method, other_method; + double speedup; + + t0 = measure_func(_tune_n_mod_vec_sub_0, params, warmups, min_runs); + t1 = measure_func(_tune_n_mod_vec_sub_1, params, warmups, min_runs); + + if (t0 <= t1) + { + chosen_method = 0; + other_method = 1; + speedup = SPEEDUP(t1, t0); + } + else + { + chosen_method = 1; + other_method = 0; + speedup = SPEEDUP(t0, t1); + } + + print_define_with_speedup("N_MOD_VEC_SUB_METHOD", chosen_method, other_method, speedup); + + n_mod_vec_param_clear(params); +} +#endif + +struct tune_t +{ + char * name; + void (* tune_function)(int, int); +}; + +static const struct tune_t tunes[] = +{ + {"n_xgcd", tune_n_xgcd}, +#if WANT_N_MOD + {"_n_mod_vec_add", tune_n_mod_vec_add}, + {"_n_mod_vec_sub", tune_n_mod_vec_sub}, +#endif +}; + +#define DEFAULT(x, val) do { if ((x) <= 0) (x) = (val); } while (0) +#define numberof(x) (sizeof(x) / sizeof((x)[0])) + +/* FIXME: Use funcs if funcs != NULL */ +static void +distribute_tune(int num, const char ** funcs, int warmups, int min_runs) +{ + int ix; + + DEFAULT(warmups, 10); + DEFAULT(min_runs, 1000); + + if (funcs == NULL) + { + for (ix = 0; ix < numberof(tunes); ix++) + tunes[ix].tune_function(warmups, min_runs); + } + else + { + for (ix = 0; ix < num; ix++) + { + int jx; + + for (jx = 0; jx < numberof(tunes); jx++) + if (strcmp(funcs[ix], tunes[jx].name) == 0) + { + tunes[jx].tune_function(warmups, min_runs); + break; + } + + if (jx == numberof(tunes)) + { + printf("Unknown function %s.\n", funcs[ix]); + exit(1); + } + } + } +} + +static void usage(int argc, char ** argv) +{ + /* FIXME: Write something more nice */ + printf("Usage: %s [options]\n" + "\n" + "Tunes the parameters, that could then be pushed into flint-mparam.h\n" + "\n" + "Options:\n" + " -h, --help Display this help message\n" + "\n", argv[0]); +} + +int main(int argc, char ** argv) +{ + int c; + + while (1) + { + int option_index = 0; + static struct option long_options[] = + { + {"help", no_argument, NULL, 'h'}, + { NULL, 0, NULL, 0} + }; + + c = getopt_long(argc, argv, "h", long_options, &option_index); + if (c == -1) + break; + + switch (c) + { + case 'h': + usage(argc, argv); + exit(0); + + default: + usage(argc, argv); + exit(1); + } + } + + if (optind < argc) + { + /* distribute_tune(argc - optind, argv + optind); */ + printf("Currently does not support arguments.\n\n"); + usage(argc, argv); + exit(1); + } + else + distribute_tune(0, NULL, -1, -1); + + exit(0); +} diff --git a/src/tune/tune.h b/src/tune/tune.h new file mode 100644 index 0000000000..4aac8012fc --- /dev/null +++ b/src/tune/tune.h @@ -0,0 +1,67 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#ifndef FLINT_TUNE_H +#define FLINT_TUNE_H + +#define WANT_N_MOD 0 + +#include "flint.h" +#if WANT_N_MOD +# include "n_mod.h" +#endif + +/* tune_func_t + + Should run a function once and return the time to run the main function. */ +typedef double (* tune_func_t)(void *); + +/* ulong_extras **************************************************************/ + +struct n_param_0 +{ + nn_ptr ap; + nn_ptr bp; + nn_ptr xp; + nn_ptr yp; + slong len; +}; + +/* Only sets xp and yp, and orders xp[ix] >= yp[ix] */ +void * n_param_init_generate_0(void); +void n_param_clear(void *); + +double _tune_n_xgcd_0(void *); +double _tune_n_xgcd_1(void *); + +/* n_mod_vec *****************************************************************/ +#if WANT_N_MOD +struct n_mod_vec_param_0 +{ + nn_ptr rp; + nn_ptr ap; + nn_ptr bp; + slong len; + n_mod_ctx_t ctx; +}; + +/* Only sets ap and bp */ +void * n_mod_vec_param_init_generate_0(void); +void n_mod_vec_param_clear(void *); + +double _tune_n_mod_vec_add_0(void *); +double _tune_n_mod_vec_add_1(void *); + +double _tune_n_mod_vec_sub_0(void *); +double _tune_n_mod_vec_sub_1(void *); +#endif + +#endif /* FLINT_TUNE_H */ diff --git a/src/tune/ulong_extras/param.c b/src/tune/ulong_extras/param.c new file mode 100644 index 0000000000..d4edb7ccd1 --- /dev/null +++ b/src/tune/ulong_extras/param.c @@ -0,0 +1,59 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "tune.h" + +void * n_param_init_generate_0(void) +{ + struct n_param_0 * param; + nn_ptr ap, bp, xp, yp; + slong len; + slong ix; + flint_rand_t state; + + param = flint_malloc(sizeof(struct n_param_0)); + flint_rand_init(state); + + len = 100; + ap = flint_malloc(sizeof(ulong) * len); + bp = flint_malloc(sizeof(ulong) * len); + xp = flint_malloc(sizeof(ulong) * len); + yp = flint_malloc(sizeof(ulong) * len); + + for (ix = 0; ix < len; ix++) + { + xp[ix] = n_randlimb(state); + yp[ix] = n_randlimb(state); + if (xp[ix] < yp[ix]) + FLINT_SWAP(ulong, xp[ix], yp[ix]); + } + + param->ap = ap; + param->bp = bp; + param->xp = xp; + param->yp = yp; + param->len = len; + + flint_rand_clear(state); + + return param; +} + +void n_param_clear(void * vparam) +{ + struct n_param_0 * param = vparam; + + flint_free(param->ap); + flint_free(param->bp); + flint_free(param->xp); + flint_free(param->yp); + flint_free(param); +} diff --git a/src/tune/ulong_extras/tune_xgcd.c b/src/tune/ulong_extras/tune_xgcd.c new file mode 100644 index 0000000000..95999e0522 --- /dev/null +++ b/src/tune/ulong_extras/tune_xgcd.c @@ -0,0 +1,42 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "clock.h" +#include "tune.h" + +ulong n_xgcd_0(nn_ptr, nn_ptr, ulong, ulong); +ulong n_xgcd_1(nn_ptr, nn_ptr, ulong, ulong); + +#define DEFINE_IT(name, func) \ +double name(void * vparam) \ +{ \ + struct n_param_0 * param = vparam; \ + nn_ptr ap, bp, xp, yp; \ + slong len; \ + flint_time_t t0, t1; \ + slong ix; \ + \ + ap = param->ap; \ + bp = param->bp; \ + xp = param->xp; \ + yp = param->yp; \ + len = param->len; \ + \ + flint_time_get(t0); \ + for (ix = 0; ix < len; ix++) \ + func(ap, bp, xp[ix], yp[ix]); \ + flint_time_get(t1); \ + \ + return flint_time_nsec_diff(t1, t0); \ +} + +DEFINE_IT(_tune_n_xgcd_0, n_xgcd_0) +DEFINE_IT(_tune_n_xgcd_1, n_xgcd_0) diff --git a/src/tune/ulong_extras/xgcd_0.c b/src/tune/ulong_extras/xgcd_0.c new file mode 100644 index 0000000000..9a622af929 --- /dev/null +++ b/src/tune/ulong_extras/xgcd_0.c @@ -0,0 +1,22 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "flint-mparam.h" +#include "ulong_extras.h" + +#undef N_GCDEXT_METHOD +#define TUNE_PROGRAM 1 + +#define N_GCDEXT_METHOD 0 + +#define n_xgcd n_xgcd_0 + +#include "ulong_extras/xgcd.c" diff --git a/src/tune/ulong_extras/xgcd_1.c b/src/tune/ulong_extras/xgcd_1.c new file mode 100644 index 0000000000..9cdd58cd4c --- /dev/null +++ b/src/tune/ulong_extras/xgcd_1.c @@ -0,0 +1,22 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "flint-mparam.h" +#include "ulong_extras.h" + +#undef N_GCDEXT_METHOD +#define TUNE_PROGRAM 1 + +#define N_GCDEXT_METHOD 1 + +#define n_xgcd n_xgcd_1 + +#include "ulong_extras/xgcd.c" diff --git a/src/ulong_extras.h b/src/ulong_extras.h index 729e83dbec..38f56f7cde 100644 --- a/src/ulong_extras.h +++ b/src/ulong_extras.h @@ -110,7 +110,16 @@ ulong n_gcd(ulong x, ulong y) } ulong n_xgcd(ulong * a, ulong * b, ulong x, ulong y); -ulong n_gcdinv(ulong * a, ulong x, ulong y); + +nn_pair_t _n_gcdinv(ulong x, ulong y); + +ULONG_EXTRAS_INLINE ulong n_gcdinv(ulong * a, ulong x, ulong y) +{ + nn_pair_t ret = _n_gcdinv(x, y); + *a = ret.m0; + return ret.m1; +} + ulong n_CRT(ulong r1, ulong m1, ulong r2, ulong m2); /* Checked arithmetic ********************************************************/ diff --git a/src/ulong_extras/gcdinv.c b/src/ulong_extras/gcdinv.c index 0ae711ca86..d87cd03786 100644 --- a/src/ulong_extras/gcdinv.c +++ b/src/ulong_extras/gcdinv.c @@ -11,9 +11,11 @@ #include "ulong_extras.h" -ulong -n_gcdinv(ulong * s, ulong x, ulong y) +#if N_GCDEXT_METHOD == 0 +nn_pair_t +_n_gcdinv(ulong x, ulong y) { + nn_pair_t ret; slong v1, v2, t2; ulong d, r, quot, rem; @@ -111,7 +113,45 @@ n_gcdinv(ulong * s, ulong x, ulong y) if (v1 < WORD(0)) v1 += y; - (*s) = v1; + ret.m0 = v1; + ret.m1 = x; - return x; + return ret; } +#elif N_GCDEXT_METHOD == 1 +nn_pair_t +_n_gcdinv(ulong x, ulong y) +{ + nn_pair_t ret; + slong v1, v2, t2; + ulong d, r, quot, rem; + + FLINT_ASSERT(y > x); + + v1 = 0; + v2 = 1; + r = x; + x = y; + + while (r) + { + quot = x / r; + rem = x - r * quot; + x = r; + t2 = v2; + v2 = v1 - quot * v2; + v1 = t2; + r = rem; + } + + if (v1 < WORD(0)) + v1 += y; + + ret.m0 = v1; + ret.m1 = x; + + return ret; +} +#else +# error +#endif diff --git a/src/ulong_extras/xgcd.c b/src/ulong_extras/xgcd.c index 18f19e6eba..61482b8f97 100644 --- a/src/ulong_extras/xgcd.c +++ b/src/ulong_extras/xgcd.c @@ -11,6 +11,7 @@ #include "ulong_extras.h" +#if N_GCDEXT_METHOD == 0 ulong n_xgcd(ulong * a, ulong * b, ulong x, ulong y) { @@ -145,3 +146,46 @@ n_xgcd(ulong * a, ulong * b, ulong x, ulong y) return u3; } +#elif N_GCDEXT_METHOD == 1 +ulong +n_xgcd(ulong * a, ulong * b, ulong x, ulong y) +{ + slong u1, u2, v1, v2, t1, t2; + ulong u3, v3, quot, rem; + + FLINT_ASSERT(x >= y); + + u1 = v2 = 1; + u2 = v1 = 0; + u3 = x; + v3 = y; + + while (v3) + { + quot = u3 / v3; + rem = u3 - v3 * quot; + t1 = u2; + u2 = u1 - quot * u2; + u1 = t1; + u3 = v3; + t2 = v2; + v2 = v1 - quot * v2; + v1 = t2; + v3 = rem; + } + + /* Remarkably, |u1| < x/2, thus comparison with 0 is valid */ + if (u1 <= WORD(0)) + { + u1 += y; + v1 -= x; + } + + *a = u1; + *b = -v1; + + return u3; +} +#else +# error +#endif From b6905965d9ea617d21b66722481e29e8aee9d380 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albin=20Ahlb=C3=A4ck?= Date: Mon, 27 May 2024 20:58:02 +0200 Subject: [PATCH 2/4] Prepare flint_mpn_(mul|sqr)high_n for tuning Move related parameters to flint-mparam.h --- src/mpn_extras.h | 13 +- src/mpn_extras/arm64/applem1/flint-mparam.h | 152 ++++++++++++++++++ src/mpn_extras/arm64/flint-mparam.h | 152 ++++++++++++++++++ src/mpn_extras/generic/flint-mparam.h | 152 ++++++++++++++++++ src/mpn_extras/mulhigh.c | 78 +-------- src/mpn_extras/sqrhigh.c | 72 +-------- .../x86_64/broadwell/flint-mparam.h | 152 ++++++++++++++++++ src/mpn_extras/x86_64/flint-mparam.h | 152 ++++++++++++++++++ src/mpn_extras/x86_64/skylake/flint-mparam.h | 152 ++++++++++++++++++ src/mpn_extras/x86_64/zen3/flint-mparam.h | 152 ++++++++++++++++++ 10 files changed, 1074 insertions(+), 153 deletions(-) diff --git a/src/mpn_extras.h b/src/mpn_extras.h index 365776e11f..68e997ffea 100644 --- a/src/mpn_extras.h +++ b/src/mpn_extras.h @@ -22,6 +22,7 @@ #endif #include +#include "flint-mparam.h" #include "longlong.h" #ifdef __cplusplus @@ -594,13 +595,7 @@ FLINT_DLL extern const flint_mpn_sqrhigh_normalised_func_t flint_mpn_sqrhigh_nor #endif -/* FIXME: this tuning is for x86_64_adx with fft_small */ -/* NOTE: we assume that the same cutoff is optimal for both mulhigh and mullow */ -#define FLINT_MPN_MULHIGH_MULDERS_CUTOFF 50 -#define FLINT_MPN_MULHIGH_MUL_CUTOFF 2000 -#define FLINT_MPN_MULHIGH_K_TAB_SIZE 2048 - -FLINT_DLL extern const signed short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE]; +FLINT_DLL extern const short flint_mpn_mulhigh_k_tab[]; mp_limb_t flint_mpn_mullow_basecase(mp_ptr res, mp_srcptr u, mp_srcptr v, mp_size_t n); void _flint_mpn_mullow_n_mulders_recursive(mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n); @@ -667,10 +662,6 @@ void flint_mpn_mul_or_mulhigh_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t flint_mpn_mul_n(rp, xp, yp, n); } -#define FLINT_MPN_SQRHIGH_MULDERS_CUTOFF 90 -#define FLINT_MPN_SQRHIGH_SQR_CUTOFF 2000 -#define FLINT_MPN_SQRHIGH_K_TAB_SIZE 2048 - #if FLINT_HAVE_ASSEMBLY_x86_64_adx mp_limb_t _flint_mpn_sqrhigh_basecase_even(mp_ptr, mp_srcptr, mp_size_t); mp_limb_t _flint_mpn_sqrhigh_basecase_odd(mp_ptr, mp_srcptr, mp_size_t); diff --git a/src/mpn_extras/arm64/applem1/flint-mparam.h b/src/mpn_extras/arm64/applem1/flint-mparam.h index 2e739d4f77..77ca7e07c7 100644 --- a/src/mpn_extras/arm64/applem1/flint-mparam.h +++ b/src/mpn_extras/arm64/applem1/flint-mparam.h @@ -27,4 +27,156 @@ #define FFT_N_NUM 19 #define FFT_MULMOD_2EXPP1_CUTOFF 128 +/* FIXME: This tuning is for x86_64_adx with fft_small */ +/* NOTE: We assume that the same cutoff is optimal for both mulhigh and mullow */ +#define FLINT_MPN_MULHIGH_MULDERS_CUTOFF 50 +#define FLINT_MPN_MULHIGH_MUL_CUTOFF 2000 +#define FLINT_MPN_MULHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_SQRHIGH_MULDERS_CUTOFF 90 +#define FLINT_MPN_SQRHIGH_SQR_CUTOFF 2000 +#define FLINT_MPN_SQRHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_MULHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 16, 15, 15, 18, 18, \ + 18, 19, 20, 18, 22, 22, 20, 20, 26, 22, 22, 22, 24, 24, 24, 26, 25, 26, 30, 30, 28, 30, 31, 32, 32, 30, 36, 36, 36, 36, \ + 38, 39, 39, 38, 39, 40, 40, 40, 44, 40, 44, 44, 40, 44, 44, 48, 44, 48, 44, 48, 48, 52, 52, 52, 44, 52, 52, 52, 52, 56, \ + 60, 60, 52, 60, 60, 52, 52, 60, 64, 72, 56, 60, 72, 60, 60, 60, 76, 64, 60, 60, 72, 60, 72, 80, 72, 72, 80, 72, 68, 76, \ + 88, 76, 68, 76, 72, 72, 80, 88, 72, 72, 88, 72, 80, 76, 76, 80, 80, 88, 80, 88, 84, 88, 80, 96, 80, 80, 88, 80, 88, 88, \ + 80, 88, 96, 96, 88, 96, 92, 96, 96, 92, 100, 88, 96, 104, 88, 108, 96, 104, 104, 104, 112, 112, 108, 104, 104, 112, 112, 120, 104, 112, \ + 120, 112, 112, 120, 124, 124, 116, 124, 108, 120, 124, 116, 120, 120, 116, 120, 124, 120, 120, 140, 120, 120, 120, 120, 144, 120, 132, 144, 136, 140, \ + 144, 144, 144, 144, 144, 144, 144, 144, 140, 156, 140, 140, 144, 144, 144, 160, 144, 144, 156, 156, 144, 160, 160, 160, 160, 152, 160, 156, 156, 156, \ + 160, 160, 144, 160, 164, 156, 156, 156, 172, 156, 156, 160, 176, 160, 160, 164, 176, 156, 160, 160, 156, 156, 160, 160, 156, 160, 172, 160, 188, 172, \ + 172, 172, 160, 172, 176, 160, 160, 176, 180, 176, 164, 188, 192, 176, 172, 188, 188, 188, 172, 188, 192, 188, 180, 192, 192, 188, 188, 192, 188, 188, \ + 188, 188, 192, 160, 156, 204, 160, 164, 164, 164, 164, 176, 180, 168, 172, 184, 188, 200, 216, 188, 164, 188, 220, 188, 208, 176, 180, 188, 172, 188, \ + 184, 188, 204, 208, 220, 196, 220, 196, 208, 212, 188, 220, 176, 176, 184, 192, 208, 184, 188, 196, 204, 244, 208, 212, 212, 228, 256, 188, 204, 196, \ + 188, 192, 192, 192, 212, 188, 292, 212, 220, 236, 228, 248, 260, 224, 264, 196, 200, 196, 212, 208, 204, 216, 208, 228, 216, 220, 252, 220, 268, 264, \ + 284, 268, 300, 220, 208, 212, 220, 236, 244, 224, 252, 252, 260, 264, 256, 256, 292, 272, 288, 292, 328, 224, 256, 236, 252, 268, 256, 252, 260, 272, \ + 284, 296, 300, 280, 300, 284, 252, 236, 328, 324, 264, 264, 256, 264, 280, 268, 284, 284, 292, 304, 260, 304, 264, 256, 328, 328, 260, 276, 328, 284, \ + 276, 296, 300, 320, 320, 304, 328, 304, 272, 268, 280, 268, 288, 292, 288, 284, 316, 288, 328, 328, 300, 328, 328, 280, 264, 328, 300, 328, 316, 324, \ + 300, 324, 300, 324, 316, 316, 328, 348, 276, 376, 288, 296, 296, 304, 320, 316, 328, 328, 324, 328, 340, 384, 348, 376, 300, 396, 304, 300, 304, 324, \ + 300, 324, 328, 328, 328, 440, 448, 384, 376, 456, 464, 384, 376, 472, 480, 376, 352, 328, 376, 352, 376, 392, 392, 384, 456, 456, 480, 448, 456, 456, \ + 472, 472, 472, 352, 464, 472, 472, 472, 480, 440, 480, 480, 480, 480, 456, 472, 472, 464, 464, 464, 456, 472, 480, 472, 480, 480, 480, 480, 448, 456, \ + 480, 448, 456, 464, 456, 464, 456, 480, 472, 464, 464, 472, 472, 472, 480, 472, 480, 480, 472, 480, 480, 480, 480, 464, 464, 464, 456, 472, 464, 480, \ + 472, 472, 480, 472, 480, 480, 464, 464, 472, 464, 472, 472, 480, 464, 480, 472, 480, 480, 576, 576, 560, 480, 472, 480, 568, 480, 480, 464, 480, 472, \ + 480, 576, 480, 552, 560, 560, 560, 560, 568, 560, 560, 576, 576, 560, 568, 472, 480, 480, 544, 568, 552, 544, 560, 544, 560, 568, 552, 576, 568, 560, \ + 576, 576, 568, 576, 560, 576, 568, 536, 576, 568, 560, 544, 560, 552, 560, 568, 560, 576, 568, 560, 560, 560, 568, 576, 568, 576, 576, 576, 576, 544, \ + 576, 576, 568, 576, 560, 576, 576, 576, 544, 552, 568, 576, 552, 560, 576, 560, 568, 560, 576, 560, 544, 576, 576, 576, 576, 568, 576, 568, 560, 576, \ + 552, 552, 576, 560, 568, 568, 568, 576, 576, 576, 560, 552, 576, 560, 568, 560, 576, 560, 568, 560, 568, 568, 568, 576, 552, 576, 560, 576, 576, 560, \ + 568, 576, 568, 576, 576, 576, 576, 560, 576, 568, 568, 568, 560, 560, 576, 576, 568, 568, 576, 560, 576, 576, 568, 576, 560, 576, 576, 568, 576, 568, \ + 576, 568, 576, 576, 568, 576, 576, 576, 576, 568, 576, 576, 568, 568, 576, 576, 784, 576, 776, 576, 568, 576, 576, 576, 576, 576, 576, 576, 776, 776, \ + 776, 776, 776, 776, 776, 784, 776, 776, 784, 776, 776, 776, 800, 776, 776, 776, 776, 776, 776, 800, 776, 808, 792, 800, 776, 792, 776, 776, 776, 776, \ + 792, 776, 776, 784, 792, 784, 800, 776, 784, 808, 784, 776, 776, 776, 808, 784, 792, 776, 792, 832, 800, 800, 816, 792, 816, 816, 856, 808, 848, 824, \ + 870, 832, 792, 776, 784, 784, 784, 784, 800, 792, 800, 792, 784, 800, 800, 800, 816, 824, 824, 824, 832, 816, 816, 832, 824, 824, 848, 832, 856, 856, \ + 840, 872, 864, 872, 872, 880, 880, 880, 872, 888, 880, 880, 872, 880, 880, 880, 840, 872, 872, 848, 880, 848, 856, 840, 848, 840, 880, 872, 856, 872, \ + 856, 888, 880, 872, 888, 880, 872, 920, 888, 872, 880, 872, 888, 888, 888, 880, 880, 928, 880, 928, 928, 928, 920, 920, 904, 912, 880, 904, 928, 872, \ + 872, 880, 888, 880, 896, 880, 872, 896, 888, 896, 896, 928, 904, 896, 896, 912, 904, 904, 920, 880, 912, 920, 928, 928, 880, 920, 920, 880, 888, 904, \ + 896, 904, 928, 896, 912, 896, 912, 920, 912, 912, 928, 928, 928, 920, 928, 928, 928, 928, 928, 928, 912, 904, 912, 896, 904, 904, 920, 920, 920, 928, \ + 928, 920, 928, 928, 928, 912, 928, 912, 928, 928, 928, 912, 912, 912, 928, 928, 928, 896, 928, 928, 912, 928, 928, 928, 912, 928, 912, 928, 928, 912, \ + 928, 912, 928, 928, 928, 928, 928, 912, 928, 928, 928, 928, 912, 912, 928, 912, 928, 1024, 928, 928, 928, 928, 928, 928, 1056, 912, 928, 928, 1024, 1024, \ + 928, 928, 1024, 928, 928, 928, 928, 928, 928, 1040, 1040, 928, 1056, 1024, 1072, 1024, 1040, 1040, 1040, 1024, 1088, 1056, 1056, 1088, 1040, 1056, 1072, 1072, 1056, 1056, \ + 1024, 1088, 1040, 1024, 1040, 1040, 1024, 1056, 1056, 1056, 1040, 1072, 1056, 1040, 1056, 1056, 1056, 1056, 1056, 1056, 1056, 1120, 1056, 1088, 1056, 1120, 1088, 1072, 1104, 1104, \ + 1104, 1120, 1088, 1088, 1072, 1088, 1120, 1104, 1088, 1104, 1088, 1072, 1104, 1088, 1120, 1088, 1072, 1072, 1072, 1088, 1088, 1072, 1072, 1088, 1104, 1152, 1104, 1104, 1088, 1104, \ + 1136, 1088, 1104, 1152, 1152, 1152, 1136, 1120, 1136, 1152, 1120, 1152, 1088, 1120, 1104, 1120, 1136, 1104, 1136, 1088, 1136, 1104, 1088, 1104, 1120, 1104, 1104, 1120, 1136, 1136, \ + 1120, 1136, 1136, 1136, 1120, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1104, 1152, 1120, 1136, 1120, 1120, 1152, 1120, 1136, 1152, 1136, 1152, 1120, 1152, 1136, 1136, 1136, 1136, \ + 1136, 1152, 1152, 1152, 1152, 1120, 1120, 1152, 1136, 1136, 1136, 1152, 1152, 1120, 1152, 1152, 1152, 1152, 1152, 1104, 1152, 1152, 1120, 1136, 1152, 1120, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1344, 1152, 1344, 1344, 1344, 1152, 1152, 1152, 1152, 1344, 1328, 1328, 1328, 1152, 1344, 1152, 1344, 1152, 1344, 1152, 1152, 1328, 1152, 1328, 1344, 1328, 1344, 1328, 1344, \ + 1344, 1312, 1328, 1328, 1328, 1344, 1344, 1344, 1328, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, \ + 1328, 1344, 1344, 1344, 1344, 1328, 1344, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1573, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1583, 1568, 1568, 1552, 1568, 1552, 1568, \ + 1568, 1568, 1584, 1568, 1584, 1568, 1568, 1568, 1552, 1552, 1552, 1568, 1552, 1584, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1584, 1552, 1584, 1552, 1617, 1568, 1584, \ + 1552, 1552, 1584, 1584, 1552, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1632, 1636, 1632, 1638, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1648, 1669, 1648, 1671, 1632, 1664, 1648, 1632, 1648, 1648, 1648, 1632, \ + 1632, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1648, 1664, 1648, 1648, 1648, 1648, 1648, 1648, \ + 1680, 1664, 1680, 1696, 1680, 1680, 1664, 1680, 1680, 1648, 1632, 1680, 1696, 1632, 1648, 1648, 1632, 1680, 1680, 1664, 1664, 1664, 1648, 1680, 1664, 1680, 1664, 1680, 1664, 1664, \ + 1680, 1696, 1664, 1696, 1712, 1712, 1696, 1680, 1712, 1696, 1728, 1712, 1696, 1728, 1728, 1712, 1728, 1648, 1680, 1696, 1712, 1696, 1712, 1696, 1696, 1680, 1696, 1696, 1696, 1712, \ + 1696, 1696, 1696, 1696, 1712, 1728, 1696, 1728, 1696, 1696, 1712, 1728, 1712, 1728, 1712, 1680, 1696, 1728, 1712, 1696, 1696, 1696, 1712, 1712, 1728, 1696, 1728, 1712, 1712, 1728, \ + 1696, 1696, 1696, 1712, 1696, 1728, 1712, 1712, 1712, 1728, 1696, 1712, 1728, 1728, 1696, 1728, 1728, 1728, 1728, 1728, 1680, 1712, 1728, 1696, 1728, 1728, 1728, 1728, 1696, 1728, \ + 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1728, 1696, 1728, 1712, 1712, 1712, 1712, 1728, 1712, 1712, 1712, 1728, 1712, 1728, 1728, 1728, \ + 1728, 1712, 1728, 1728, 1712, 1728, 1728, 1712, 1712, 1728, 1712, 1712, 1728, 1728, 1712, 1728, 1712, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1712, \ + 1728, 1712, 1712, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, \ + 1728, 1728, 1712, 1728, 1728, 1824, 1728, 1728, 1728, 1728, 1824, 1728, 1840, 1728, 1728, 1728, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1728, 1728, 1856, 1856, 1856, 1856, 1728, \ + 1728, 1856, 1856, 1728, 1856, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1856, 1856, 1856, 1840, 1856, 1856, 1856, 1856, 1856, 1856, 1856, 1840, 1904, 1856, 1856, 1840, 1840, 1856, \ + 1856, 1840, 1856, 1840, 1856, 1856, 1856, 1856, 1952, 1856, 1856, 1856, 1856, 1952, 1904, 1904, 1856, 1856, 1856, 1920, 1952, 2001, 1952, 1984, 1952, 1936, 1952, 1904, 1968, 1920, \ + 1984, 1920, 1968, 1920, 1936, 1856, 2000, 1920, 1936, 1952, 2000, 1968, 1984, 1968, 1984, 2000, 1952, 2000, 2016, 1984, 2000, 2016, 1984, 1664, 2016, 1984, 2016, 1968, 2016, 2016, \ + 1744, 2016, 2016, 1968, 2000, 1728, 1712, 1696 + +#define FLINT_MPN_SQRHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 31, 0, 31, \ + 0, 32, 0, 34, 0, 36, 36, 40, 0, 40, 40, 40, 0, 44, 0, 44, 44, 48, 0, 52, 48, 52, 44, 44, 48, 48, 48, 48, 48, 48, \ + 48, 48, 48, 48, 52, 52, 52, 52, 52, 56, 56, 52, 56, 56, 56, 56, 56, 56, 56, 56, 60, 60, 68, 64, 60, 60, 60, 60, 64, 64, \ + 64, 68, 68, 68, 64, 64, 68, 68, 72, 72, 68, 68, 68, 72, 76, 80, 72, 72, 72, 88, 76, 76, 80, 80, 76, 80, 76, 80, 84, 80, \ + 88, 88, 80, 84, 88, 80, 80, 84, 92, 88, 92, 88, 88, 88, 88, 96, 88, 108, 100, 92, 88, 88, 104, 100, 100, 92, 104, 108, 100, 92, \ + 104, 100, 104, 96, 108, 104, 96, 96, 104, 100, 100, 104, 112, 116, 108, 104, 104, 116, 108, 104, 104, 120, 116, 104, 108, 132, 116, 108, 120, 108, \ + 108, 108, 132, 108, 120, 112, 112, 116, 132, 128, 116, 124, 128, 116, 132, 120, 132, 120, 124, 120, 132, 120, 124, 128, 120, 128, 128, 132, 144, 124, \ + 128, 124, 140, 128, 128, 124, 136, 132, 128, 128, 140, 144, 128, 128, 140, 136, 132, 144, 148, 152, 144, 132, 160, 156, 140, 144, 156, 144, 140, 144, \ + 140, 156, 156, 156, 140, 144, 168, 156, 156, 164, 168, 156, 156, 160, 144, 144, 180, 156, 152, 168, 156, 160, 156, 148, 180, 168, 180, 156, 164, 156, \ + 156, 172, 156, 156, 156, 180, 180, 172, 180, 168, 164, 172, 164, 172, 176, 176, 168, 176, 172, 176, 180, 168, 176, 180, 180, 180, 192, 184, 180, 176, \ + 204, 176, 188, 180, 188, 180, 204, 180, 192, 204, 180, 192, 180, 204, 228, 192, 188, 192, 204, 180, 192, 216, 200, 216, 228, 216, 204, 216, 188, 216, \ + 216, 204, 216, 192, 204, 212, 228, 204, 228, 216, 204, 216, 192, 204, 204, 212, 204, 216, 204, 216, 228, 228, 224, 212, 204, 216, 216, 224, 228, 212, \ + 216, 228, 212, 204, 204, 216, 216, 216, 228, 216, 220, 224, 228, 220, 228, 228, 236, 224, 260, 224, 228, 228, 260, 228, 248, 252, 264, 248, 216, 212, \ + 212, 212, 220, 216, 216, 220, 216, 224, 216, 228, 232, 224, 220, 224, 240, 244, 236, 244, 232, 256, 288, 240, 236, 288, 260, 260, 264, 232, 228, 228, \ + 228, 236, 236, 228, 236, 236, 236, 248, 256, 260, 232, 236, 264, 256, 260, 252, 284, 252, 264, 276, 244, 296, 244, 240, 244, 248, 240, 252, 256, 260, \ + 260, 252, 252, 272, 252, 272, 260, 296, 268, 260, 256, 276, 272, 264, 312, 284, 256, 252, 252, 252, 264, 252, 256, 260, 264, 276, 292, 268, 264, 276, \ + 264, 260, 264, 304, 288, 296, 296, 296, 288, 296, 272, 280, 264, 264, 272, 264, 288, 288, 280, 288, 272, 280, 296, 296, 280, 288, 296, 320, 344, 320, \ + 344, 272, 344, 344, 304, 288, 280, 280, 312, 280, 280, 304, 304, 312, 304, 296, 288, 328, 320, 352, 320, 320, 328, 360, 344, 344, 360, 288, 288, 304, \ + 288, 296, 320, 320, 312, 312, 304, 328, 336, 312, 312, 360, 336, 344, 344, 336, 360, 360, 296, 296, 304, 360, 328, 328, 312, 320, 320, 328, 312, 344, \ + 344, 328, 344, 344, 368, 360, 352, 360, 392, 392, 368, 320, 312, 392, 312, 328, 344, 336, 344, 328, 360, 352, 352, 360, 360, 360, 368, 408, 360, 376, \ + 392, 392, 376, 336, 344, 352, 360, 352, 344, 344, 384, 344, 360, 376, 392, 368, 360, 408, 408, 448, 432, 384, 392, 336, 360, 344, 360, 360, 368, 376, \ + 376, 360, 368, 408, 368, 376, 376, 376, 432, 376, 384, 464, 432, 344, 432, 376, 344, 344, 384, 344, 384, 376, 400, 432, 456, 432, 456, 392, 432, 392, \ + 448, 456, 360, 376, 456, 408, 384, 368, 376, 432, 376, 472, 464, 504, 448, 360, 408, 456, 376, 408, 424, 424, 448, 440, 456, 392, 408, 384, 408, 392, \ + 384, 416, 424, 432, 400, 472, 480, 408, 432, 432, 464, 456, 504, 464, 456, 472, 496, 416, 392, 424, 504, 400, 440, 432, 472, 448, 456, 456, 432, 448, \ + 456, 504, 504, 512, 496, 504, 424, 416, 408, 432, 544, 432, 440, 456, 448, 448, 464, 472, 480, 552, 552, 544, 552, 560, 552, 544, 544, 544, 552, 552, \ + 552, 552, 552, 552, 552, 560, 552, 552, 560, 560, 560, 560, 568, 576, 568, 592, 576, 560, 560, 560, 560, 568, 568, 576, 568, 568, 576, 568, 568, 576, \ + 568, 568, 600, 592, 576, 608, 576, 576, 576, 576, 584, 584, 576, 592, 592, 600, 584, 592, 584, 600, 600, 600, 584, 624, 584, 592, 600, 592, 616, 592, \ + 592, 592, 592, 592, 592, 592, 600, 608, 600, 608, 600, 600, 600, 624, 616, 608, 616, 600, 632, 608, 608, 608, 616, 608, 608, 608, 608, 616, 608, 664, \ + 664, 632, 656, 616, 664, 624, 616, 632, 632, 616, 624, 624, 624, 632, 624, 624, 624, 624, 648, 632, 624, 624, 872, 872, 656, 664, 696, 872, 872, 872, \ + 632, 872, 872, 896, 872, 896, 872, 896, 880, 872, 896, 872, 888, 872, 872, 872, 872, 872, 888, 872, 880, 824, 872, 856, 880, 888, 800, 848, 800, 880, \ + 848, 800, 808, 872, 872, 864, 824, 840, 872, 872, 872, 872, 872, 872, 872, 872, 872, 872, 880, 928, 872, 920, 872, 920, 888, 880, 872, 880, 872, 872, \ + 872, 888, 880, 888, 888, 872, 888, 872, 880, 896, 920, 920, 880, 928, 904, 872, 888, 904, 872, 872, 872, 872, 880, 888, 872, 872, 872, 880, 872, 904, \ + 920, 888, 888, 872, 880, 880, 888, 896, 896, 880, 872, 904, 880, 912, 896, 872, 904, 904, 912, 880, 880, 920, 912, 896, 928, 928, 872, 872, 872, 872, \ + 880, 880, 888, 888, 888, 912, 880, 880, 880, 912, 896, 896, 928, 896, 912, 912, 896, 928, 928, 928, 928, 912, 880, 928, 880, 880, 896, 880, 896, 880, \ + 896, 880, 880, 912, 912, 880, 896, 880, 896, 912, 880, 880, 928, 912, 912, 928, 928, 896, 928, 912, 928, 928, 928, 928, 928, 928, 880, 880, 880, 880, \ + 880, 912, 896, 912, 912, 896, 928, 896, 928, 896, 928, 928, 928, 928, 912, 880, 928, 912, 896, 928, 896, 880, 912, 896, 912, 896, 896, 896, 928, 896, \ + 912, 928, 912, 928, 912, 928, 912, 880, 928, 928, 880, 928, 928, 880, 896, 928, 880, 896, 912, 928, 928, 896, 912, 928, 912, 928, 1024, 928, 928, 1040, \ + 912, 928, 1024, 1024, 928, 928, 928, 928, 912, 1040, 1072, 912, 1072, 1072, 1056, 1040, 1072, 1088, 1104, 1040, 1040, 928, 1024, 928, 928, 1056, 928, 928, 1056, 1040, \ + 928, 1056, 1024, 1056, 1056, 1056, 1104, 1024, 1056, 1072, 1056, 1056, 1088, 1072, 1120, 1104, 1072, 1072, 1072, 1104, 1120, 1024, 1120, 1120, 1056, 1024, 1040, 1024, 1040, 1040, \ + 1040, 1024, 1040, 1056, 1040, 1088, 1040, 1056, 1120, 1040, 1040, 1072, 1072, 1088, 1104, 1088, 1104, 1088, 1088, 1056, 1120, 1104, 1152, 1104, 1088, 1120, 1104, 1120, 1024, 1120, \ + 1152, 1136, 1152, 1056, 1056, 1072, 1056, 1152, 1088, 1056, 1104, 1072, 1104, 1088, 1120, 1072, 1104, 1088, 1088, 1120, 1120, 1120, 1152, 1136, 1120, 1120, 1152, 1120, 1136, 1152, \ + 1152, 1072, 1136, 1136, 1088, 1152, 1136, 1136, 1104, 1056, 1072, 1088, 1104, 1104, 1120, 1120, 1104, 1104, 1104, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1136, 1072, \ + 1152, 1136, 1152, 1152, 1104, 1152, 1104, 1152, 1152, 1088, 1120, 1152, 1136, 1136, 1120, 1136, 1120, 1152, 1136, 1136, 1120, 1136, 1152, 1136, 1152, 1136, 1136, 1120, 1152, 1152, \ + 1104, 1152, 1152, 1152, 1104, 1120, 1104, 1120, 1120, 1120, 1120, 1136, 1136, 1136, 1136, 1152, 1120, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1104, \ + 1120, 1152, 1136, 1136, 1136, 1136, 1136, 1136, 1120, 1152, 1152, 1152, 1136, 1104, 1152, 1136, 1136, 1152, 1136, 1120, 1136, 1152, 1152, 1136, 1136, 1120, 1136, 1136, 1136, 1120, \ + 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1104, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1152, \ + 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, \ + 1120, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1136, \ + 1152, 1152, 1152, 1104, 1120, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1152, 1120, 1152, 1136, 1152, 1152, 1120, 1152, 1120, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1120, 1152, 1152, 1136, 1152, 1136, 1552, 1552, 1136, 1136, 1152, 1552, 1136, 1136, 1584, 1552, 1552, 1552, \ + 1152, 1552, 1552, 1152, 1584, 1552, 1152, 1152, 1552, 1584, 1152, 1152, 1552, 1584, 1568, 1552, 1552, 1552, 1152, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1625, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1568, 1632, 1552, 1632, 1568, 1584, 1568, 1568, 1584, 1600, 1552, 1552, 1632, 1600, 1632, 1632, 1632, \ + 1632, 1568, 1584, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1552, 1662, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1568, 1600, 1632, 1552, 1616, 1632, 1632, 1632, 1584, 1632, \ + 1632, 1552, 1632, 1632, 1632, 1568, 1600, 1632, 1648, 1616, 1632, 1648, 1632, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1632, 1664, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1648, 1648, 1648, 1616, 1632, 1680, 1632, 1680, 1648, 1632, 1664, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1648, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1648, 1664, 1648, 1648, 1632, 1648, 1632, 1680, 1664, 1648, 1680, 1664, 1696, 1632, 1664, 1680, 1632, 1648, 1696, \ + 1680, 1632, 1632, 1648, 1648, 1648, 1632, 1664, 1632, 1648, 1648, 1632, 1648, 1648, 1632, 1664, 1664, 1680, 1680, 1632, 1664, 1664, 1648, 1648, 1712, 1728, 1664, 1712, 1696, 1712, \ + 1680, 1632, 1728, 1648, 1728, 1728, 1712, 1648, 1632, 1632, 1664, 1648, 1664, 1664, 1680, 1712, 1664, 1632, 1696, 1664, 1712, 1696, 1680, 1680, 1696, 1680, 1680, 1712, 1648, 1712, \ + 1632, 1728, 1696, 1648, 1712, 1632, 1712, 1696, 1648, 1680, 1648, 1664, 1648, 1696, 1712, 1648, 1648, 1680, 1664, 1696, 1728, 1696, 1712, 1728, 1696, 1728, 1712, 1664, 1680, 1728, \ + 1712, 1712, 1712, 1728, 1728, 1712, 1728, 1696, 1728, 1712, 1696, 1728, 1728, 1680, 1728, 1712, 1664, 1680, 1680, 1696, 1712, 1696, 1696, 1696, 1728, 1696, 1728, 1728, 1696, 1712, \ + 1712, 1664, 1712, 1680, 1664, 1728, 1728, 1664, 1696, 1696, 1680, 1712, 1680, 1712, 1696, 1728, 1696, 1696, 1728, 1696, 1712, 1696, 1712, 1712, 1712, 1728, 1712, 1696, 1728, 1680, \ + 1696, 1712, 1712, 1728, 1712, 1728, 1696, 1728, 1776, 1728, 1696, 1776, 1728, 1728, 1712, 1712, 1824, 1856, 1728, 1824, 1728, 1728, 1712, 1728, 1728, 1728, 1856, 1696, 1728, 1840, \ + 1712, 1712, 1824, 1856, 1792, 1712, 1840, 1728, 1808, 1728, 1824, 1824, 1840, 1824, 1824, 1856, 1856, 1824, 1856, 1840, 1696, 1856, 1840, 1840, 1856, 1856, 1824, 1712, 1792, 1856, \ + 1824, 1728, 1808, 1792, 1856, 1728, 1792, 1840, 1808, 1808, 1712, 1808, 1840, 1808, 1824, 1824, 1824, 1840, 1824, 1840, 1856, 1824, 1728, 1856, 1856, 1824, 1856, 1856, 1792, 1792, \ + 1856, 1824, 1856, 1824, 1824, 1856, 1808, 1824, 1856, 1856, 1840, 1840, 1840, 1840, 1856, 1840, 1840, 1856, 1824, 1840, 1808, 1824, 1840, 1856, 1856, 1824, 1856, 1840, 1840, 1840, \ + 1824, 1824, 1840, 1840, 1840, 1856, 1856, 1856 + #endif diff --git a/src/mpn_extras/arm64/flint-mparam.h b/src/mpn_extras/arm64/flint-mparam.h index 0112747333..8c5e03d736 100644 --- a/src/mpn_extras/arm64/flint-mparam.h +++ b/src/mpn_extras/arm64/flint-mparam.h @@ -29,4 +29,156 @@ #define FFT_N_NUM 19 #define FFT_MULMOD_2EXPP1_CUTOFF 128 +/* FIXME: This tuning is for x86_64_adx with fft_small */ +/* NOTE: We assume that the same cutoff is optimal for both mulhigh and mullow */ +#define FLINT_MPN_MULHIGH_MULDERS_CUTOFF 50 +#define FLINT_MPN_MULHIGH_MUL_CUTOFF 2000 +#define FLINT_MPN_MULHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_SQRHIGH_MULDERS_CUTOFF 90 +#define FLINT_MPN_SQRHIGH_SQR_CUTOFF 2000 +#define FLINT_MPN_SQRHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_MULHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 16, 15, 15, 18, 18, \ + 18, 19, 20, 18, 22, 22, 20, 20, 26, 22, 22, 22, 24, 24, 24, 26, 25, 26, 30, 30, 28, 30, 31, 32, 32, 30, 36, 36, 36, 36, \ + 38, 39, 39, 38, 39, 40, 40, 40, 44, 40, 44, 44, 40, 44, 44, 48, 44, 48, 44, 48, 48, 52, 52, 52, 44, 52, 52, 52, 52, 56, \ + 60, 60, 52, 60, 60, 52, 52, 60, 64, 72, 56, 60, 72, 60, 60, 60, 76, 64, 60, 60, 72, 60, 72, 80, 72, 72, 80, 72, 68, 76, \ + 88, 76, 68, 76, 72, 72, 80, 88, 72, 72, 88, 72, 80, 76, 76, 80, 80, 88, 80, 88, 84, 88, 80, 96, 80, 80, 88, 80, 88, 88, \ + 80, 88, 96, 96, 88, 96, 92, 96, 96, 92, 100, 88, 96, 104, 88, 108, 96, 104, 104, 104, 112, 112, 108, 104, 104, 112, 112, 120, 104, 112, \ + 120, 112, 112, 120, 124, 124, 116, 124, 108, 120, 124, 116, 120, 120, 116, 120, 124, 120, 120, 140, 120, 120, 120, 120, 144, 120, 132, 144, 136, 140, \ + 144, 144, 144, 144, 144, 144, 144, 144, 140, 156, 140, 140, 144, 144, 144, 160, 144, 144, 156, 156, 144, 160, 160, 160, 160, 152, 160, 156, 156, 156, \ + 160, 160, 144, 160, 164, 156, 156, 156, 172, 156, 156, 160, 176, 160, 160, 164, 176, 156, 160, 160, 156, 156, 160, 160, 156, 160, 172, 160, 188, 172, \ + 172, 172, 160, 172, 176, 160, 160, 176, 180, 176, 164, 188, 192, 176, 172, 188, 188, 188, 172, 188, 192, 188, 180, 192, 192, 188, 188, 192, 188, 188, \ + 188, 188, 192, 160, 156, 204, 160, 164, 164, 164, 164, 176, 180, 168, 172, 184, 188, 200, 216, 188, 164, 188, 220, 188, 208, 176, 180, 188, 172, 188, \ + 184, 188, 204, 208, 220, 196, 220, 196, 208, 212, 188, 220, 176, 176, 184, 192, 208, 184, 188, 196, 204, 244, 208, 212, 212, 228, 256, 188, 204, 196, \ + 188, 192, 192, 192, 212, 188, 292, 212, 220, 236, 228, 248, 260, 224, 264, 196, 200, 196, 212, 208, 204, 216, 208, 228, 216, 220, 252, 220, 268, 264, \ + 284, 268, 300, 220, 208, 212, 220, 236, 244, 224, 252, 252, 260, 264, 256, 256, 292, 272, 288, 292, 328, 224, 256, 236, 252, 268, 256, 252, 260, 272, \ + 284, 296, 300, 280, 300, 284, 252, 236, 328, 324, 264, 264, 256, 264, 280, 268, 284, 284, 292, 304, 260, 304, 264, 256, 328, 328, 260, 276, 328, 284, \ + 276, 296, 300, 320, 320, 304, 328, 304, 272, 268, 280, 268, 288, 292, 288, 284, 316, 288, 328, 328, 300, 328, 328, 280, 264, 328, 300, 328, 316, 324, \ + 300, 324, 300, 324, 316, 316, 328, 348, 276, 376, 288, 296, 296, 304, 320, 316, 328, 328, 324, 328, 340, 384, 348, 376, 300, 396, 304, 300, 304, 324, \ + 300, 324, 328, 328, 328, 440, 448, 384, 376, 456, 464, 384, 376, 472, 480, 376, 352, 328, 376, 352, 376, 392, 392, 384, 456, 456, 480, 448, 456, 456, \ + 472, 472, 472, 352, 464, 472, 472, 472, 480, 440, 480, 480, 480, 480, 456, 472, 472, 464, 464, 464, 456, 472, 480, 472, 480, 480, 480, 480, 448, 456, \ + 480, 448, 456, 464, 456, 464, 456, 480, 472, 464, 464, 472, 472, 472, 480, 472, 480, 480, 472, 480, 480, 480, 480, 464, 464, 464, 456, 472, 464, 480, \ + 472, 472, 480, 472, 480, 480, 464, 464, 472, 464, 472, 472, 480, 464, 480, 472, 480, 480, 576, 576, 560, 480, 472, 480, 568, 480, 480, 464, 480, 472, \ + 480, 576, 480, 552, 560, 560, 560, 560, 568, 560, 560, 576, 576, 560, 568, 472, 480, 480, 544, 568, 552, 544, 560, 544, 560, 568, 552, 576, 568, 560, \ + 576, 576, 568, 576, 560, 576, 568, 536, 576, 568, 560, 544, 560, 552, 560, 568, 560, 576, 568, 560, 560, 560, 568, 576, 568, 576, 576, 576, 576, 544, \ + 576, 576, 568, 576, 560, 576, 576, 576, 544, 552, 568, 576, 552, 560, 576, 560, 568, 560, 576, 560, 544, 576, 576, 576, 576, 568, 576, 568, 560, 576, \ + 552, 552, 576, 560, 568, 568, 568, 576, 576, 576, 560, 552, 576, 560, 568, 560, 576, 560, 568, 560, 568, 568, 568, 576, 552, 576, 560, 576, 576, 560, \ + 568, 576, 568, 576, 576, 576, 576, 560, 576, 568, 568, 568, 560, 560, 576, 576, 568, 568, 576, 560, 576, 576, 568, 576, 560, 576, 576, 568, 576, 568, \ + 576, 568, 576, 576, 568, 576, 576, 576, 576, 568, 576, 576, 568, 568, 576, 576, 784, 576, 776, 576, 568, 576, 576, 576, 576, 576, 576, 576, 776, 776, \ + 776, 776, 776, 776, 776, 784, 776, 776, 784, 776, 776, 776, 800, 776, 776, 776, 776, 776, 776, 800, 776, 808, 792, 800, 776, 792, 776, 776, 776, 776, \ + 792, 776, 776, 784, 792, 784, 800, 776, 784, 808, 784, 776, 776, 776, 808, 784, 792, 776, 792, 832, 800, 800, 816, 792, 816, 816, 856, 808, 848, 824, \ + 870, 832, 792, 776, 784, 784, 784, 784, 800, 792, 800, 792, 784, 800, 800, 800, 816, 824, 824, 824, 832, 816, 816, 832, 824, 824, 848, 832, 856, 856, \ + 840, 872, 864, 872, 872, 880, 880, 880, 872, 888, 880, 880, 872, 880, 880, 880, 840, 872, 872, 848, 880, 848, 856, 840, 848, 840, 880, 872, 856, 872, \ + 856, 888, 880, 872, 888, 880, 872, 920, 888, 872, 880, 872, 888, 888, 888, 880, 880, 928, 880, 928, 928, 928, 920, 920, 904, 912, 880, 904, 928, 872, \ + 872, 880, 888, 880, 896, 880, 872, 896, 888, 896, 896, 928, 904, 896, 896, 912, 904, 904, 920, 880, 912, 920, 928, 928, 880, 920, 920, 880, 888, 904, \ + 896, 904, 928, 896, 912, 896, 912, 920, 912, 912, 928, 928, 928, 920, 928, 928, 928, 928, 928, 928, 912, 904, 912, 896, 904, 904, 920, 920, 920, 928, \ + 928, 920, 928, 928, 928, 912, 928, 912, 928, 928, 928, 912, 912, 912, 928, 928, 928, 896, 928, 928, 912, 928, 928, 928, 912, 928, 912, 928, 928, 912, \ + 928, 912, 928, 928, 928, 928, 928, 912, 928, 928, 928, 928, 912, 912, 928, 912, 928, 1024, 928, 928, 928, 928, 928, 928, 1056, 912, 928, 928, 1024, 1024, \ + 928, 928, 1024, 928, 928, 928, 928, 928, 928, 1040, 1040, 928, 1056, 1024, 1072, 1024, 1040, 1040, 1040, 1024, 1088, 1056, 1056, 1088, 1040, 1056, 1072, 1072, 1056, 1056, \ + 1024, 1088, 1040, 1024, 1040, 1040, 1024, 1056, 1056, 1056, 1040, 1072, 1056, 1040, 1056, 1056, 1056, 1056, 1056, 1056, 1056, 1120, 1056, 1088, 1056, 1120, 1088, 1072, 1104, 1104, \ + 1104, 1120, 1088, 1088, 1072, 1088, 1120, 1104, 1088, 1104, 1088, 1072, 1104, 1088, 1120, 1088, 1072, 1072, 1072, 1088, 1088, 1072, 1072, 1088, 1104, 1152, 1104, 1104, 1088, 1104, \ + 1136, 1088, 1104, 1152, 1152, 1152, 1136, 1120, 1136, 1152, 1120, 1152, 1088, 1120, 1104, 1120, 1136, 1104, 1136, 1088, 1136, 1104, 1088, 1104, 1120, 1104, 1104, 1120, 1136, 1136, \ + 1120, 1136, 1136, 1136, 1120, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1104, 1152, 1120, 1136, 1120, 1120, 1152, 1120, 1136, 1152, 1136, 1152, 1120, 1152, 1136, 1136, 1136, 1136, \ + 1136, 1152, 1152, 1152, 1152, 1120, 1120, 1152, 1136, 1136, 1136, 1152, 1152, 1120, 1152, 1152, 1152, 1152, 1152, 1104, 1152, 1152, 1120, 1136, 1152, 1120, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1344, 1152, 1344, 1344, 1344, 1152, 1152, 1152, 1152, 1344, 1328, 1328, 1328, 1152, 1344, 1152, 1344, 1152, 1344, 1152, 1152, 1328, 1152, 1328, 1344, 1328, 1344, 1328, 1344, \ + 1344, 1312, 1328, 1328, 1328, 1344, 1344, 1344, 1328, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, \ + 1328, 1344, 1344, 1344, 1344, 1328, 1344, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1573, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1583, 1568, 1568, 1552, 1568, 1552, 1568, \ + 1568, 1568, 1584, 1568, 1584, 1568, 1568, 1568, 1552, 1552, 1552, 1568, 1552, 1584, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1584, 1552, 1584, 1552, 1617, 1568, 1584, \ + 1552, 1552, 1584, 1584, 1552, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1632, 1636, 1632, 1638, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1648, 1669, 1648, 1671, 1632, 1664, 1648, 1632, 1648, 1648, 1648, 1632, \ + 1632, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1648, 1664, 1648, 1648, 1648, 1648, 1648, 1648, \ + 1680, 1664, 1680, 1696, 1680, 1680, 1664, 1680, 1680, 1648, 1632, 1680, 1696, 1632, 1648, 1648, 1632, 1680, 1680, 1664, 1664, 1664, 1648, 1680, 1664, 1680, 1664, 1680, 1664, 1664, \ + 1680, 1696, 1664, 1696, 1712, 1712, 1696, 1680, 1712, 1696, 1728, 1712, 1696, 1728, 1728, 1712, 1728, 1648, 1680, 1696, 1712, 1696, 1712, 1696, 1696, 1680, 1696, 1696, 1696, 1712, \ + 1696, 1696, 1696, 1696, 1712, 1728, 1696, 1728, 1696, 1696, 1712, 1728, 1712, 1728, 1712, 1680, 1696, 1728, 1712, 1696, 1696, 1696, 1712, 1712, 1728, 1696, 1728, 1712, 1712, 1728, \ + 1696, 1696, 1696, 1712, 1696, 1728, 1712, 1712, 1712, 1728, 1696, 1712, 1728, 1728, 1696, 1728, 1728, 1728, 1728, 1728, 1680, 1712, 1728, 1696, 1728, 1728, 1728, 1728, 1696, 1728, \ + 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1728, 1696, 1728, 1712, 1712, 1712, 1712, 1728, 1712, 1712, 1712, 1728, 1712, 1728, 1728, 1728, \ + 1728, 1712, 1728, 1728, 1712, 1728, 1728, 1712, 1712, 1728, 1712, 1712, 1728, 1728, 1712, 1728, 1712, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1712, \ + 1728, 1712, 1712, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, \ + 1728, 1728, 1712, 1728, 1728, 1824, 1728, 1728, 1728, 1728, 1824, 1728, 1840, 1728, 1728, 1728, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1728, 1728, 1856, 1856, 1856, 1856, 1728, \ + 1728, 1856, 1856, 1728, 1856, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1856, 1856, 1856, 1840, 1856, 1856, 1856, 1856, 1856, 1856, 1856, 1840, 1904, 1856, 1856, 1840, 1840, 1856, \ + 1856, 1840, 1856, 1840, 1856, 1856, 1856, 1856, 1952, 1856, 1856, 1856, 1856, 1952, 1904, 1904, 1856, 1856, 1856, 1920, 1952, 2001, 1952, 1984, 1952, 1936, 1952, 1904, 1968, 1920, \ + 1984, 1920, 1968, 1920, 1936, 1856, 2000, 1920, 1936, 1952, 2000, 1968, 1984, 1968, 1984, 2000, 1952, 2000, 2016, 1984, 2000, 2016, 1984, 1664, 2016, 1984, 2016, 1968, 2016, 2016, \ + 1744, 2016, 2016, 1968, 2000, 1728, 1712, 1696 + +#define FLINT_MPN_SQRHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 31, 0, 31, \ + 0, 32, 0, 34, 0, 36, 36, 40, 0, 40, 40, 40, 0, 44, 0, 44, 44, 48, 0, 52, 48, 52, 44, 44, 48, 48, 48, 48, 48, 48, \ + 48, 48, 48, 48, 52, 52, 52, 52, 52, 56, 56, 52, 56, 56, 56, 56, 56, 56, 56, 56, 60, 60, 68, 64, 60, 60, 60, 60, 64, 64, \ + 64, 68, 68, 68, 64, 64, 68, 68, 72, 72, 68, 68, 68, 72, 76, 80, 72, 72, 72, 88, 76, 76, 80, 80, 76, 80, 76, 80, 84, 80, \ + 88, 88, 80, 84, 88, 80, 80, 84, 92, 88, 92, 88, 88, 88, 88, 96, 88, 108, 100, 92, 88, 88, 104, 100, 100, 92, 104, 108, 100, 92, \ + 104, 100, 104, 96, 108, 104, 96, 96, 104, 100, 100, 104, 112, 116, 108, 104, 104, 116, 108, 104, 104, 120, 116, 104, 108, 132, 116, 108, 120, 108, \ + 108, 108, 132, 108, 120, 112, 112, 116, 132, 128, 116, 124, 128, 116, 132, 120, 132, 120, 124, 120, 132, 120, 124, 128, 120, 128, 128, 132, 144, 124, \ + 128, 124, 140, 128, 128, 124, 136, 132, 128, 128, 140, 144, 128, 128, 140, 136, 132, 144, 148, 152, 144, 132, 160, 156, 140, 144, 156, 144, 140, 144, \ + 140, 156, 156, 156, 140, 144, 168, 156, 156, 164, 168, 156, 156, 160, 144, 144, 180, 156, 152, 168, 156, 160, 156, 148, 180, 168, 180, 156, 164, 156, \ + 156, 172, 156, 156, 156, 180, 180, 172, 180, 168, 164, 172, 164, 172, 176, 176, 168, 176, 172, 176, 180, 168, 176, 180, 180, 180, 192, 184, 180, 176, \ + 204, 176, 188, 180, 188, 180, 204, 180, 192, 204, 180, 192, 180, 204, 228, 192, 188, 192, 204, 180, 192, 216, 200, 216, 228, 216, 204, 216, 188, 216, \ + 216, 204, 216, 192, 204, 212, 228, 204, 228, 216, 204, 216, 192, 204, 204, 212, 204, 216, 204, 216, 228, 228, 224, 212, 204, 216, 216, 224, 228, 212, \ + 216, 228, 212, 204, 204, 216, 216, 216, 228, 216, 220, 224, 228, 220, 228, 228, 236, 224, 260, 224, 228, 228, 260, 228, 248, 252, 264, 248, 216, 212, \ + 212, 212, 220, 216, 216, 220, 216, 224, 216, 228, 232, 224, 220, 224, 240, 244, 236, 244, 232, 256, 288, 240, 236, 288, 260, 260, 264, 232, 228, 228, \ + 228, 236, 236, 228, 236, 236, 236, 248, 256, 260, 232, 236, 264, 256, 260, 252, 284, 252, 264, 276, 244, 296, 244, 240, 244, 248, 240, 252, 256, 260, \ + 260, 252, 252, 272, 252, 272, 260, 296, 268, 260, 256, 276, 272, 264, 312, 284, 256, 252, 252, 252, 264, 252, 256, 260, 264, 276, 292, 268, 264, 276, \ + 264, 260, 264, 304, 288, 296, 296, 296, 288, 296, 272, 280, 264, 264, 272, 264, 288, 288, 280, 288, 272, 280, 296, 296, 280, 288, 296, 320, 344, 320, \ + 344, 272, 344, 344, 304, 288, 280, 280, 312, 280, 280, 304, 304, 312, 304, 296, 288, 328, 320, 352, 320, 320, 328, 360, 344, 344, 360, 288, 288, 304, \ + 288, 296, 320, 320, 312, 312, 304, 328, 336, 312, 312, 360, 336, 344, 344, 336, 360, 360, 296, 296, 304, 360, 328, 328, 312, 320, 320, 328, 312, 344, \ + 344, 328, 344, 344, 368, 360, 352, 360, 392, 392, 368, 320, 312, 392, 312, 328, 344, 336, 344, 328, 360, 352, 352, 360, 360, 360, 368, 408, 360, 376, \ + 392, 392, 376, 336, 344, 352, 360, 352, 344, 344, 384, 344, 360, 376, 392, 368, 360, 408, 408, 448, 432, 384, 392, 336, 360, 344, 360, 360, 368, 376, \ + 376, 360, 368, 408, 368, 376, 376, 376, 432, 376, 384, 464, 432, 344, 432, 376, 344, 344, 384, 344, 384, 376, 400, 432, 456, 432, 456, 392, 432, 392, \ + 448, 456, 360, 376, 456, 408, 384, 368, 376, 432, 376, 472, 464, 504, 448, 360, 408, 456, 376, 408, 424, 424, 448, 440, 456, 392, 408, 384, 408, 392, \ + 384, 416, 424, 432, 400, 472, 480, 408, 432, 432, 464, 456, 504, 464, 456, 472, 496, 416, 392, 424, 504, 400, 440, 432, 472, 448, 456, 456, 432, 448, \ + 456, 504, 504, 512, 496, 504, 424, 416, 408, 432, 544, 432, 440, 456, 448, 448, 464, 472, 480, 552, 552, 544, 552, 560, 552, 544, 544, 544, 552, 552, \ + 552, 552, 552, 552, 552, 560, 552, 552, 560, 560, 560, 560, 568, 576, 568, 592, 576, 560, 560, 560, 560, 568, 568, 576, 568, 568, 576, 568, 568, 576, \ + 568, 568, 600, 592, 576, 608, 576, 576, 576, 576, 584, 584, 576, 592, 592, 600, 584, 592, 584, 600, 600, 600, 584, 624, 584, 592, 600, 592, 616, 592, \ + 592, 592, 592, 592, 592, 592, 600, 608, 600, 608, 600, 600, 600, 624, 616, 608, 616, 600, 632, 608, 608, 608, 616, 608, 608, 608, 608, 616, 608, 664, \ + 664, 632, 656, 616, 664, 624, 616, 632, 632, 616, 624, 624, 624, 632, 624, 624, 624, 624, 648, 632, 624, 624, 872, 872, 656, 664, 696, 872, 872, 872, \ + 632, 872, 872, 896, 872, 896, 872, 896, 880, 872, 896, 872, 888, 872, 872, 872, 872, 872, 888, 872, 880, 824, 872, 856, 880, 888, 800, 848, 800, 880, \ + 848, 800, 808, 872, 872, 864, 824, 840, 872, 872, 872, 872, 872, 872, 872, 872, 872, 872, 880, 928, 872, 920, 872, 920, 888, 880, 872, 880, 872, 872, \ + 872, 888, 880, 888, 888, 872, 888, 872, 880, 896, 920, 920, 880, 928, 904, 872, 888, 904, 872, 872, 872, 872, 880, 888, 872, 872, 872, 880, 872, 904, \ + 920, 888, 888, 872, 880, 880, 888, 896, 896, 880, 872, 904, 880, 912, 896, 872, 904, 904, 912, 880, 880, 920, 912, 896, 928, 928, 872, 872, 872, 872, \ + 880, 880, 888, 888, 888, 912, 880, 880, 880, 912, 896, 896, 928, 896, 912, 912, 896, 928, 928, 928, 928, 912, 880, 928, 880, 880, 896, 880, 896, 880, \ + 896, 880, 880, 912, 912, 880, 896, 880, 896, 912, 880, 880, 928, 912, 912, 928, 928, 896, 928, 912, 928, 928, 928, 928, 928, 928, 880, 880, 880, 880, \ + 880, 912, 896, 912, 912, 896, 928, 896, 928, 896, 928, 928, 928, 928, 912, 880, 928, 912, 896, 928, 896, 880, 912, 896, 912, 896, 896, 896, 928, 896, \ + 912, 928, 912, 928, 912, 928, 912, 880, 928, 928, 880, 928, 928, 880, 896, 928, 880, 896, 912, 928, 928, 896, 912, 928, 912, 928, 1024, 928, 928, 1040, \ + 912, 928, 1024, 1024, 928, 928, 928, 928, 912, 1040, 1072, 912, 1072, 1072, 1056, 1040, 1072, 1088, 1104, 1040, 1040, 928, 1024, 928, 928, 1056, 928, 928, 1056, 1040, \ + 928, 1056, 1024, 1056, 1056, 1056, 1104, 1024, 1056, 1072, 1056, 1056, 1088, 1072, 1120, 1104, 1072, 1072, 1072, 1104, 1120, 1024, 1120, 1120, 1056, 1024, 1040, 1024, 1040, 1040, \ + 1040, 1024, 1040, 1056, 1040, 1088, 1040, 1056, 1120, 1040, 1040, 1072, 1072, 1088, 1104, 1088, 1104, 1088, 1088, 1056, 1120, 1104, 1152, 1104, 1088, 1120, 1104, 1120, 1024, 1120, \ + 1152, 1136, 1152, 1056, 1056, 1072, 1056, 1152, 1088, 1056, 1104, 1072, 1104, 1088, 1120, 1072, 1104, 1088, 1088, 1120, 1120, 1120, 1152, 1136, 1120, 1120, 1152, 1120, 1136, 1152, \ + 1152, 1072, 1136, 1136, 1088, 1152, 1136, 1136, 1104, 1056, 1072, 1088, 1104, 1104, 1120, 1120, 1104, 1104, 1104, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1136, 1072, \ + 1152, 1136, 1152, 1152, 1104, 1152, 1104, 1152, 1152, 1088, 1120, 1152, 1136, 1136, 1120, 1136, 1120, 1152, 1136, 1136, 1120, 1136, 1152, 1136, 1152, 1136, 1136, 1120, 1152, 1152, \ + 1104, 1152, 1152, 1152, 1104, 1120, 1104, 1120, 1120, 1120, 1120, 1136, 1136, 1136, 1136, 1152, 1120, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1104, \ + 1120, 1152, 1136, 1136, 1136, 1136, 1136, 1136, 1120, 1152, 1152, 1152, 1136, 1104, 1152, 1136, 1136, 1152, 1136, 1120, 1136, 1152, 1152, 1136, 1136, 1120, 1136, 1136, 1136, 1120, \ + 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1104, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1152, \ + 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, \ + 1120, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1136, \ + 1152, 1152, 1152, 1104, 1120, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1152, 1120, 1152, 1136, 1152, 1152, 1120, 1152, 1120, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1120, 1152, 1152, 1136, 1152, 1136, 1552, 1552, 1136, 1136, 1152, 1552, 1136, 1136, 1584, 1552, 1552, 1552, \ + 1152, 1552, 1552, 1152, 1584, 1552, 1152, 1152, 1552, 1584, 1152, 1152, 1552, 1584, 1568, 1552, 1552, 1552, 1152, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1625, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1568, 1632, 1552, 1632, 1568, 1584, 1568, 1568, 1584, 1600, 1552, 1552, 1632, 1600, 1632, 1632, 1632, \ + 1632, 1568, 1584, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1552, 1662, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1568, 1600, 1632, 1552, 1616, 1632, 1632, 1632, 1584, 1632, \ + 1632, 1552, 1632, 1632, 1632, 1568, 1600, 1632, 1648, 1616, 1632, 1648, 1632, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1632, 1664, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1648, 1648, 1648, 1616, 1632, 1680, 1632, 1680, 1648, 1632, 1664, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1648, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1648, 1664, 1648, 1648, 1632, 1648, 1632, 1680, 1664, 1648, 1680, 1664, 1696, 1632, 1664, 1680, 1632, 1648, 1696, \ + 1680, 1632, 1632, 1648, 1648, 1648, 1632, 1664, 1632, 1648, 1648, 1632, 1648, 1648, 1632, 1664, 1664, 1680, 1680, 1632, 1664, 1664, 1648, 1648, 1712, 1728, 1664, 1712, 1696, 1712, \ + 1680, 1632, 1728, 1648, 1728, 1728, 1712, 1648, 1632, 1632, 1664, 1648, 1664, 1664, 1680, 1712, 1664, 1632, 1696, 1664, 1712, 1696, 1680, 1680, 1696, 1680, 1680, 1712, 1648, 1712, \ + 1632, 1728, 1696, 1648, 1712, 1632, 1712, 1696, 1648, 1680, 1648, 1664, 1648, 1696, 1712, 1648, 1648, 1680, 1664, 1696, 1728, 1696, 1712, 1728, 1696, 1728, 1712, 1664, 1680, 1728, \ + 1712, 1712, 1712, 1728, 1728, 1712, 1728, 1696, 1728, 1712, 1696, 1728, 1728, 1680, 1728, 1712, 1664, 1680, 1680, 1696, 1712, 1696, 1696, 1696, 1728, 1696, 1728, 1728, 1696, 1712, \ + 1712, 1664, 1712, 1680, 1664, 1728, 1728, 1664, 1696, 1696, 1680, 1712, 1680, 1712, 1696, 1728, 1696, 1696, 1728, 1696, 1712, 1696, 1712, 1712, 1712, 1728, 1712, 1696, 1728, 1680, \ + 1696, 1712, 1712, 1728, 1712, 1728, 1696, 1728, 1776, 1728, 1696, 1776, 1728, 1728, 1712, 1712, 1824, 1856, 1728, 1824, 1728, 1728, 1712, 1728, 1728, 1728, 1856, 1696, 1728, 1840, \ + 1712, 1712, 1824, 1856, 1792, 1712, 1840, 1728, 1808, 1728, 1824, 1824, 1840, 1824, 1824, 1856, 1856, 1824, 1856, 1840, 1696, 1856, 1840, 1840, 1856, 1856, 1824, 1712, 1792, 1856, \ + 1824, 1728, 1808, 1792, 1856, 1728, 1792, 1840, 1808, 1808, 1712, 1808, 1840, 1808, 1824, 1824, 1824, 1840, 1824, 1840, 1856, 1824, 1728, 1856, 1856, 1824, 1856, 1856, 1792, 1792, \ + 1856, 1824, 1856, 1824, 1824, 1856, 1808, 1824, 1856, 1856, 1840, 1840, 1840, 1840, 1856, 1840, 1840, 1856, 1824, 1840, 1808, 1824, 1840, 1856, 1856, 1824, 1856, 1840, 1840, 1840, \ + 1824, 1824, 1840, 1840, 1840, 1856, 1856, 1856 + #endif diff --git a/src/mpn_extras/generic/flint-mparam.h b/src/mpn_extras/generic/flint-mparam.h index 0112747333..8c5e03d736 100644 --- a/src/mpn_extras/generic/flint-mparam.h +++ b/src/mpn_extras/generic/flint-mparam.h @@ -29,4 +29,156 @@ #define FFT_N_NUM 19 #define FFT_MULMOD_2EXPP1_CUTOFF 128 +/* FIXME: This tuning is for x86_64_adx with fft_small */ +/* NOTE: We assume that the same cutoff is optimal for both mulhigh and mullow */ +#define FLINT_MPN_MULHIGH_MULDERS_CUTOFF 50 +#define FLINT_MPN_MULHIGH_MUL_CUTOFF 2000 +#define FLINT_MPN_MULHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_SQRHIGH_MULDERS_CUTOFF 90 +#define FLINT_MPN_SQRHIGH_SQR_CUTOFF 2000 +#define FLINT_MPN_SQRHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_MULHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 16, 15, 15, 18, 18, \ + 18, 19, 20, 18, 22, 22, 20, 20, 26, 22, 22, 22, 24, 24, 24, 26, 25, 26, 30, 30, 28, 30, 31, 32, 32, 30, 36, 36, 36, 36, \ + 38, 39, 39, 38, 39, 40, 40, 40, 44, 40, 44, 44, 40, 44, 44, 48, 44, 48, 44, 48, 48, 52, 52, 52, 44, 52, 52, 52, 52, 56, \ + 60, 60, 52, 60, 60, 52, 52, 60, 64, 72, 56, 60, 72, 60, 60, 60, 76, 64, 60, 60, 72, 60, 72, 80, 72, 72, 80, 72, 68, 76, \ + 88, 76, 68, 76, 72, 72, 80, 88, 72, 72, 88, 72, 80, 76, 76, 80, 80, 88, 80, 88, 84, 88, 80, 96, 80, 80, 88, 80, 88, 88, \ + 80, 88, 96, 96, 88, 96, 92, 96, 96, 92, 100, 88, 96, 104, 88, 108, 96, 104, 104, 104, 112, 112, 108, 104, 104, 112, 112, 120, 104, 112, \ + 120, 112, 112, 120, 124, 124, 116, 124, 108, 120, 124, 116, 120, 120, 116, 120, 124, 120, 120, 140, 120, 120, 120, 120, 144, 120, 132, 144, 136, 140, \ + 144, 144, 144, 144, 144, 144, 144, 144, 140, 156, 140, 140, 144, 144, 144, 160, 144, 144, 156, 156, 144, 160, 160, 160, 160, 152, 160, 156, 156, 156, \ + 160, 160, 144, 160, 164, 156, 156, 156, 172, 156, 156, 160, 176, 160, 160, 164, 176, 156, 160, 160, 156, 156, 160, 160, 156, 160, 172, 160, 188, 172, \ + 172, 172, 160, 172, 176, 160, 160, 176, 180, 176, 164, 188, 192, 176, 172, 188, 188, 188, 172, 188, 192, 188, 180, 192, 192, 188, 188, 192, 188, 188, \ + 188, 188, 192, 160, 156, 204, 160, 164, 164, 164, 164, 176, 180, 168, 172, 184, 188, 200, 216, 188, 164, 188, 220, 188, 208, 176, 180, 188, 172, 188, \ + 184, 188, 204, 208, 220, 196, 220, 196, 208, 212, 188, 220, 176, 176, 184, 192, 208, 184, 188, 196, 204, 244, 208, 212, 212, 228, 256, 188, 204, 196, \ + 188, 192, 192, 192, 212, 188, 292, 212, 220, 236, 228, 248, 260, 224, 264, 196, 200, 196, 212, 208, 204, 216, 208, 228, 216, 220, 252, 220, 268, 264, \ + 284, 268, 300, 220, 208, 212, 220, 236, 244, 224, 252, 252, 260, 264, 256, 256, 292, 272, 288, 292, 328, 224, 256, 236, 252, 268, 256, 252, 260, 272, \ + 284, 296, 300, 280, 300, 284, 252, 236, 328, 324, 264, 264, 256, 264, 280, 268, 284, 284, 292, 304, 260, 304, 264, 256, 328, 328, 260, 276, 328, 284, \ + 276, 296, 300, 320, 320, 304, 328, 304, 272, 268, 280, 268, 288, 292, 288, 284, 316, 288, 328, 328, 300, 328, 328, 280, 264, 328, 300, 328, 316, 324, \ + 300, 324, 300, 324, 316, 316, 328, 348, 276, 376, 288, 296, 296, 304, 320, 316, 328, 328, 324, 328, 340, 384, 348, 376, 300, 396, 304, 300, 304, 324, \ + 300, 324, 328, 328, 328, 440, 448, 384, 376, 456, 464, 384, 376, 472, 480, 376, 352, 328, 376, 352, 376, 392, 392, 384, 456, 456, 480, 448, 456, 456, \ + 472, 472, 472, 352, 464, 472, 472, 472, 480, 440, 480, 480, 480, 480, 456, 472, 472, 464, 464, 464, 456, 472, 480, 472, 480, 480, 480, 480, 448, 456, \ + 480, 448, 456, 464, 456, 464, 456, 480, 472, 464, 464, 472, 472, 472, 480, 472, 480, 480, 472, 480, 480, 480, 480, 464, 464, 464, 456, 472, 464, 480, \ + 472, 472, 480, 472, 480, 480, 464, 464, 472, 464, 472, 472, 480, 464, 480, 472, 480, 480, 576, 576, 560, 480, 472, 480, 568, 480, 480, 464, 480, 472, \ + 480, 576, 480, 552, 560, 560, 560, 560, 568, 560, 560, 576, 576, 560, 568, 472, 480, 480, 544, 568, 552, 544, 560, 544, 560, 568, 552, 576, 568, 560, \ + 576, 576, 568, 576, 560, 576, 568, 536, 576, 568, 560, 544, 560, 552, 560, 568, 560, 576, 568, 560, 560, 560, 568, 576, 568, 576, 576, 576, 576, 544, \ + 576, 576, 568, 576, 560, 576, 576, 576, 544, 552, 568, 576, 552, 560, 576, 560, 568, 560, 576, 560, 544, 576, 576, 576, 576, 568, 576, 568, 560, 576, \ + 552, 552, 576, 560, 568, 568, 568, 576, 576, 576, 560, 552, 576, 560, 568, 560, 576, 560, 568, 560, 568, 568, 568, 576, 552, 576, 560, 576, 576, 560, \ + 568, 576, 568, 576, 576, 576, 576, 560, 576, 568, 568, 568, 560, 560, 576, 576, 568, 568, 576, 560, 576, 576, 568, 576, 560, 576, 576, 568, 576, 568, \ + 576, 568, 576, 576, 568, 576, 576, 576, 576, 568, 576, 576, 568, 568, 576, 576, 784, 576, 776, 576, 568, 576, 576, 576, 576, 576, 576, 576, 776, 776, \ + 776, 776, 776, 776, 776, 784, 776, 776, 784, 776, 776, 776, 800, 776, 776, 776, 776, 776, 776, 800, 776, 808, 792, 800, 776, 792, 776, 776, 776, 776, \ + 792, 776, 776, 784, 792, 784, 800, 776, 784, 808, 784, 776, 776, 776, 808, 784, 792, 776, 792, 832, 800, 800, 816, 792, 816, 816, 856, 808, 848, 824, \ + 870, 832, 792, 776, 784, 784, 784, 784, 800, 792, 800, 792, 784, 800, 800, 800, 816, 824, 824, 824, 832, 816, 816, 832, 824, 824, 848, 832, 856, 856, \ + 840, 872, 864, 872, 872, 880, 880, 880, 872, 888, 880, 880, 872, 880, 880, 880, 840, 872, 872, 848, 880, 848, 856, 840, 848, 840, 880, 872, 856, 872, \ + 856, 888, 880, 872, 888, 880, 872, 920, 888, 872, 880, 872, 888, 888, 888, 880, 880, 928, 880, 928, 928, 928, 920, 920, 904, 912, 880, 904, 928, 872, \ + 872, 880, 888, 880, 896, 880, 872, 896, 888, 896, 896, 928, 904, 896, 896, 912, 904, 904, 920, 880, 912, 920, 928, 928, 880, 920, 920, 880, 888, 904, \ + 896, 904, 928, 896, 912, 896, 912, 920, 912, 912, 928, 928, 928, 920, 928, 928, 928, 928, 928, 928, 912, 904, 912, 896, 904, 904, 920, 920, 920, 928, \ + 928, 920, 928, 928, 928, 912, 928, 912, 928, 928, 928, 912, 912, 912, 928, 928, 928, 896, 928, 928, 912, 928, 928, 928, 912, 928, 912, 928, 928, 912, \ + 928, 912, 928, 928, 928, 928, 928, 912, 928, 928, 928, 928, 912, 912, 928, 912, 928, 1024, 928, 928, 928, 928, 928, 928, 1056, 912, 928, 928, 1024, 1024, \ + 928, 928, 1024, 928, 928, 928, 928, 928, 928, 1040, 1040, 928, 1056, 1024, 1072, 1024, 1040, 1040, 1040, 1024, 1088, 1056, 1056, 1088, 1040, 1056, 1072, 1072, 1056, 1056, \ + 1024, 1088, 1040, 1024, 1040, 1040, 1024, 1056, 1056, 1056, 1040, 1072, 1056, 1040, 1056, 1056, 1056, 1056, 1056, 1056, 1056, 1120, 1056, 1088, 1056, 1120, 1088, 1072, 1104, 1104, \ + 1104, 1120, 1088, 1088, 1072, 1088, 1120, 1104, 1088, 1104, 1088, 1072, 1104, 1088, 1120, 1088, 1072, 1072, 1072, 1088, 1088, 1072, 1072, 1088, 1104, 1152, 1104, 1104, 1088, 1104, \ + 1136, 1088, 1104, 1152, 1152, 1152, 1136, 1120, 1136, 1152, 1120, 1152, 1088, 1120, 1104, 1120, 1136, 1104, 1136, 1088, 1136, 1104, 1088, 1104, 1120, 1104, 1104, 1120, 1136, 1136, \ + 1120, 1136, 1136, 1136, 1120, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1104, 1152, 1120, 1136, 1120, 1120, 1152, 1120, 1136, 1152, 1136, 1152, 1120, 1152, 1136, 1136, 1136, 1136, \ + 1136, 1152, 1152, 1152, 1152, 1120, 1120, 1152, 1136, 1136, 1136, 1152, 1152, 1120, 1152, 1152, 1152, 1152, 1152, 1104, 1152, 1152, 1120, 1136, 1152, 1120, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1344, 1152, 1344, 1344, 1344, 1152, 1152, 1152, 1152, 1344, 1328, 1328, 1328, 1152, 1344, 1152, 1344, 1152, 1344, 1152, 1152, 1328, 1152, 1328, 1344, 1328, 1344, 1328, 1344, \ + 1344, 1312, 1328, 1328, 1328, 1344, 1344, 1344, 1328, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, \ + 1328, 1344, 1344, 1344, 1344, 1328, 1344, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1573, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1583, 1568, 1568, 1552, 1568, 1552, 1568, \ + 1568, 1568, 1584, 1568, 1584, 1568, 1568, 1568, 1552, 1552, 1552, 1568, 1552, 1584, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1584, 1552, 1584, 1552, 1617, 1568, 1584, \ + 1552, 1552, 1584, 1584, 1552, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1632, 1636, 1632, 1638, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1648, 1669, 1648, 1671, 1632, 1664, 1648, 1632, 1648, 1648, 1648, 1632, \ + 1632, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1648, 1664, 1648, 1648, 1648, 1648, 1648, 1648, \ + 1680, 1664, 1680, 1696, 1680, 1680, 1664, 1680, 1680, 1648, 1632, 1680, 1696, 1632, 1648, 1648, 1632, 1680, 1680, 1664, 1664, 1664, 1648, 1680, 1664, 1680, 1664, 1680, 1664, 1664, \ + 1680, 1696, 1664, 1696, 1712, 1712, 1696, 1680, 1712, 1696, 1728, 1712, 1696, 1728, 1728, 1712, 1728, 1648, 1680, 1696, 1712, 1696, 1712, 1696, 1696, 1680, 1696, 1696, 1696, 1712, \ + 1696, 1696, 1696, 1696, 1712, 1728, 1696, 1728, 1696, 1696, 1712, 1728, 1712, 1728, 1712, 1680, 1696, 1728, 1712, 1696, 1696, 1696, 1712, 1712, 1728, 1696, 1728, 1712, 1712, 1728, \ + 1696, 1696, 1696, 1712, 1696, 1728, 1712, 1712, 1712, 1728, 1696, 1712, 1728, 1728, 1696, 1728, 1728, 1728, 1728, 1728, 1680, 1712, 1728, 1696, 1728, 1728, 1728, 1728, 1696, 1728, \ + 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1728, 1696, 1728, 1712, 1712, 1712, 1712, 1728, 1712, 1712, 1712, 1728, 1712, 1728, 1728, 1728, \ + 1728, 1712, 1728, 1728, 1712, 1728, 1728, 1712, 1712, 1728, 1712, 1712, 1728, 1728, 1712, 1728, 1712, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1712, \ + 1728, 1712, 1712, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, \ + 1728, 1728, 1712, 1728, 1728, 1824, 1728, 1728, 1728, 1728, 1824, 1728, 1840, 1728, 1728, 1728, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1728, 1728, 1856, 1856, 1856, 1856, 1728, \ + 1728, 1856, 1856, 1728, 1856, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1856, 1856, 1856, 1840, 1856, 1856, 1856, 1856, 1856, 1856, 1856, 1840, 1904, 1856, 1856, 1840, 1840, 1856, \ + 1856, 1840, 1856, 1840, 1856, 1856, 1856, 1856, 1952, 1856, 1856, 1856, 1856, 1952, 1904, 1904, 1856, 1856, 1856, 1920, 1952, 2001, 1952, 1984, 1952, 1936, 1952, 1904, 1968, 1920, \ + 1984, 1920, 1968, 1920, 1936, 1856, 2000, 1920, 1936, 1952, 2000, 1968, 1984, 1968, 1984, 2000, 1952, 2000, 2016, 1984, 2000, 2016, 1984, 1664, 2016, 1984, 2016, 1968, 2016, 2016, \ + 1744, 2016, 2016, 1968, 2000, 1728, 1712, 1696 + +#define FLINT_MPN_SQRHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 31, 0, 31, \ + 0, 32, 0, 34, 0, 36, 36, 40, 0, 40, 40, 40, 0, 44, 0, 44, 44, 48, 0, 52, 48, 52, 44, 44, 48, 48, 48, 48, 48, 48, \ + 48, 48, 48, 48, 52, 52, 52, 52, 52, 56, 56, 52, 56, 56, 56, 56, 56, 56, 56, 56, 60, 60, 68, 64, 60, 60, 60, 60, 64, 64, \ + 64, 68, 68, 68, 64, 64, 68, 68, 72, 72, 68, 68, 68, 72, 76, 80, 72, 72, 72, 88, 76, 76, 80, 80, 76, 80, 76, 80, 84, 80, \ + 88, 88, 80, 84, 88, 80, 80, 84, 92, 88, 92, 88, 88, 88, 88, 96, 88, 108, 100, 92, 88, 88, 104, 100, 100, 92, 104, 108, 100, 92, \ + 104, 100, 104, 96, 108, 104, 96, 96, 104, 100, 100, 104, 112, 116, 108, 104, 104, 116, 108, 104, 104, 120, 116, 104, 108, 132, 116, 108, 120, 108, \ + 108, 108, 132, 108, 120, 112, 112, 116, 132, 128, 116, 124, 128, 116, 132, 120, 132, 120, 124, 120, 132, 120, 124, 128, 120, 128, 128, 132, 144, 124, \ + 128, 124, 140, 128, 128, 124, 136, 132, 128, 128, 140, 144, 128, 128, 140, 136, 132, 144, 148, 152, 144, 132, 160, 156, 140, 144, 156, 144, 140, 144, \ + 140, 156, 156, 156, 140, 144, 168, 156, 156, 164, 168, 156, 156, 160, 144, 144, 180, 156, 152, 168, 156, 160, 156, 148, 180, 168, 180, 156, 164, 156, \ + 156, 172, 156, 156, 156, 180, 180, 172, 180, 168, 164, 172, 164, 172, 176, 176, 168, 176, 172, 176, 180, 168, 176, 180, 180, 180, 192, 184, 180, 176, \ + 204, 176, 188, 180, 188, 180, 204, 180, 192, 204, 180, 192, 180, 204, 228, 192, 188, 192, 204, 180, 192, 216, 200, 216, 228, 216, 204, 216, 188, 216, \ + 216, 204, 216, 192, 204, 212, 228, 204, 228, 216, 204, 216, 192, 204, 204, 212, 204, 216, 204, 216, 228, 228, 224, 212, 204, 216, 216, 224, 228, 212, \ + 216, 228, 212, 204, 204, 216, 216, 216, 228, 216, 220, 224, 228, 220, 228, 228, 236, 224, 260, 224, 228, 228, 260, 228, 248, 252, 264, 248, 216, 212, \ + 212, 212, 220, 216, 216, 220, 216, 224, 216, 228, 232, 224, 220, 224, 240, 244, 236, 244, 232, 256, 288, 240, 236, 288, 260, 260, 264, 232, 228, 228, \ + 228, 236, 236, 228, 236, 236, 236, 248, 256, 260, 232, 236, 264, 256, 260, 252, 284, 252, 264, 276, 244, 296, 244, 240, 244, 248, 240, 252, 256, 260, \ + 260, 252, 252, 272, 252, 272, 260, 296, 268, 260, 256, 276, 272, 264, 312, 284, 256, 252, 252, 252, 264, 252, 256, 260, 264, 276, 292, 268, 264, 276, \ + 264, 260, 264, 304, 288, 296, 296, 296, 288, 296, 272, 280, 264, 264, 272, 264, 288, 288, 280, 288, 272, 280, 296, 296, 280, 288, 296, 320, 344, 320, \ + 344, 272, 344, 344, 304, 288, 280, 280, 312, 280, 280, 304, 304, 312, 304, 296, 288, 328, 320, 352, 320, 320, 328, 360, 344, 344, 360, 288, 288, 304, \ + 288, 296, 320, 320, 312, 312, 304, 328, 336, 312, 312, 360, 336, 344, 344, 336, 360, 360, 296, 296, 304, 360, 328, 328, 312, 320, 320, 328, 312, 344, \ + 344, 328, 344, 344, 368, 360, 352, 360, 392, 392, 368, 320, 312, 392, 312, 328, 344, 336, 344, 328, 360, 352, 352, 360, 360, 360, 368, 408, 360, 376, \ + 392, 392, 376, 336, 344, 352, 360, 352, 344, 344, 384, 344, 360, 376, 392, 368, 360, 408, 408, 448, 432, 384, 392, 336, 360, 344, 360, 360, 368, 376, \ + 376, 360, 368, 408, 368, 376, 376, 376, 432, 376, 384, 464, 432, 344, 432, 376, 344, 344, 384, 344, 384, 376, 400, 432, 456, 432, 456, 392, 432, 392, \ + 448, 456, 360, 376, 456, 408, 384, 368, 376, 432, 376, 472, 464, 504, 448, 360, 408, 456, 376, 408, 424, 424, 448, 440, 456, 392, 408, 384, 408, 392, \ + 384, 416, 424, 432, 400, 472, 480, 408, 432, 432, 464, 456, 504, 464, 456, 472, 496, 416, 392, 424, 504, 400, 440, 432, 472, 448, 456, 456, 432, 448, \ + 456, 504, 504, 512, 496, 504, 424, 416, 408, 432, 544, 432, 440, 456, 448, 448, 464, 472, 480, 552, 552, 544, 552, 560, 552, 544, 544, 544, 552, 552, \ + 552, 552, 552, 552, 552, 560, 552, 552, 560, 560, 560, 560, 568, 576, 568, 592, 576, 560, 560, 560, 560, 568, 568, 576, 568, 568, 576, 568, 568, 576, \ + 568, 568, 600, 592, 576, 608, 576, 576, 576, 576, 584, 584, 576, 592, 592, 600, 584, 592, 584, 600, 600, 600, 584, 624, 584, 592, 600, 592, 616, 592, \ + 592, 592, 592, 592, 592, 592, 600, 608, 600, 608, 600, 600, 600, 624, 616, 608, 616, 600, 632, 608, 608, 608, 616, 608, 608, 608, 608, 616, 608, 664, \ + 664, 632, 656, 616, 664, 624, 616, 632, 632, 616, 624, 624, 624, 632, 624, 624, 624, 624, 648, 632, 624, 624, 872, 872, 656, 664, 696, 872, 872, 872, \ + 632, 872, 872, 896, 872, 896, 872, 896, 880, 872, 896, 872, 888, 872, 872, 872, 872, 872, 888, 872, 880, 824, 872, 856, 880, 888, 800, 848, 800, 880, \ + 848, 800, 808, 872, 872, 864, 824, 840, 872, 872, 872, 872, 872, 872, 872, 872, 872, 872, 880, 928, 872, 920, 872, 920, 888, 880, 872, 880, 872, 872, \ + 872, 888, 880, 888, 888, 872, 888, 872, 880, 896, 920, 920, 880, 928, 904, 872, 888, 904, 872, 872, 872, 872, 880, 888, 872, 872, 872, 880, 872, 904, \ + 920, 888, 888, 872, 880, 880, 888, 896, 896, 880, 872, 904, 880, 912, 896, 872, 904, 904, 912, 880, 880, 920, 912, 896, 928, 928, 872, 872, 872, 872, \ + 880, 880, 888, 888, 888, 912, 880, 880, 880, 912, 896, 896, 928, 896, 912, 912, 896, 928, 928, 928, 928, 912, 880, 928, 880, 880, 896, 880, 896, 880, \ + 896, 880, 880, 912, 912, 880, 896, 880, 896, 912, 880, 880, 928, 912, 912, 928, 928, 896, 928, 912, 928, 928, 928, 928, 928, 928, 880, 880, 880, 880, \ + 880, 912, 896, 912, 912, 896, 928, 896, 928, 896, 928, 928, 928, 928, 912, 880, 928, 912, 896, 928, 896, 880, 912, 896, 912, 896, 896, 896, 928, 896, \ + 912, 928, 912, 928, 912, 928, 912, 880, 928, 928, 880, 928, 928, 880, 896, 928, 880, 896, 912, 928, 928, 896, 912, 928, 912, 928, 1024, 928, 928, 1040, \ + 912, 928, 1024, 1024, 928, 928, 928, 928, 912, 1040, 1072, 912, 1072, 1072, 1056, 1040, 1072, 1088, 1104, 1040, 1040, 928, 1024, 928, 928, 1056, 928, 928, 1056, 1040, \ + 928, 1056, 1024, 1056, 1056, 1056, 1104, 1024, 1056, 1072, 1056, 1056, 1088, 1072, 1120, 1104, 1072, 1072, 1072, 1104, 1120, 1024, 1120, 1120, 1056, 1024, 1040, 1024, 1040, 1040, \ + 1040, 1024, 1040, 1056, 1040, 1088, 1040, 1056, 1120, 1040, 1040, 1072, 1072, 1088, 1104, 1088, 1104, 1088, 1088, 1056, 1120, 1104, 1152, 1104, 1088, 1120, 1104, 1120, 1024, 1120, \ + 1152, 1136, 1152, 1056, 1056, 1072, 1056, 1152, 1088, 1056, 1104, 1072, 1104, 1088, 1120, 1072, 1104, 1088, 1088, 1120, 1120, 1120, 1152, 1136, 1120, 1120, 1152, 1120, 1136, 1152, \ + 1152, 1072, 1136, 1136, 1088, 1152, 1136, 1136, 1104, 1056, 1072, 1088, 1104, 1104, 1120, 1120, 1104, 1104, 1104, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1136, 1072, \ + 1152, 1136, 1152, 1152, 1104, 1152, 1104, 1152, 1152, 1088, 1120, 1152, 1136, 1136, 1120, 1136, 1120, 1152, 1136, 1136, 1120, 1136, 1152, 1136, 1152, 1136, 1136, 1120, 1152, 1152, \ + 1104, 1152, 1152, 1152, 1104, 1120, 1104, 1120, 1120, 1120, 1120, 1136, 1136, 1136, 1136, 1152, 1120, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1104, \ + 1120, 1152, 1136, 1136, 1136, 1136, 1136, 1136, 1120, 1152, 1152, 1152, 1136, 1104, 1152, 1136, 1136, 1152, 1136, 1120, 1136, 1152, 1152, 1136, 1136, 1120, 1136, 1136, 1136, 1120, \ + 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1104, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1152, \ + 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, \ + 1120, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1136, \ + 1152, 1152, 1152, 1104, 1120, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1152, 1120, 1152, 1136, 1152, 1152, 1120, 1152, 1120, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1120, 1152, 1152, 1136, 1152, 1136, 1552, 1552, 1136, 1136, 1152, 1552, 1136, 1136, 1584, 1552, 1552, 1552, \ + 1152, 1552, 1552, 1152, 1584, 1552, 1152, 1152, 1552, 1584, 1152, 1152, 1552, 1584, 1568, 1552, 1552, 1552, 1152, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1625, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1568, 1632, 1552, 1632, 1568, 1584, 1568, 1568, 1584, 1600, 1552, 1552, 1632, 1600, 1632, 1632, 1632, \ + 1632, 1568, 1584, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1552, 1662, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1568, 1600, 1632, 1552, 1616, 1632, 1632, 1632, 1584, 1632, \ + 1632, 1552, 1632, 1632, 1632, 1568, 1600, 1632, 1648, 1616, 1632, 1648, 1632, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1632, 1664, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1648, 1648, 1648, 1616, 1632, 1680, 1632, 1680, 1648, 1632, 1664, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1648, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1648, 1664, 1648, 1648, 1632, 1648, 1632, 1680, 1664, 1648, 1680, 1664, 1696, 1632, 1664, 1680, 1632, 1648, 1696, \ + 1680, 1632, 1632, 1648, 1648, 1648, 1632, 1664, 1632, 1648, 1648, 1632, 1648, 1648, 1632, 1664, 1664, 1680, 1680, 1632, 1664, 1664, 1648, 1648, 1712, 1728, 1664, 1712, 1696, 1712, \ + 1680, 1632, 1728, 1648, 1728, 1728, 1712, 1648, 1632, 1632, 1664, 1648, 1664, 1664, 1680, 1712, 1664, 1632, 1696, 1664, 1712, 1696, 1680, 1680, 1696, 1680, 1680, 1712, 1648, 1712, \ + 1632, 1728, 1696, 1648, 1712, 1632, 1712, 1696, 1648, 1680, 1648, 1664, 1648, 1696, 1712, 1648, 1648, 1680, 1664, 1696, 1728, 1696, 1712, 1728, 1696, 1728, 1712, 1664, 1680, 1728, \ + 1712, 1712, 1712, 1728, 1728, 1712, 1728, 1696, 1728, 1712, 1696, 1728, 1728, 1680, 1728, 1712, 1664, 1680, 1680, 1696, 1712, 1696, 1696, 1696, 1728, 1696, 1728, 1728, 1696, 1712, \ + 1712, 1664, 1712, 1680, 1664, 1728, 1728, 1664, 1696, 1696, 1680, 1712, 1680, 1712, 1696, 1728, 1696, 1696, 1728, 1696, 1712, 1696, 1712, 1712, 1712, 1728, 1712, 1696, 1728, 1680, \ + 1696, 1712, 1712, 1728, 1712, 1728, 1696, 1728, 1776, 1728, 1696, 1776, 1728, 1728, 1712, 1712, 1824, 1856, 1728, 1824, 1728, 1728, 1712, 1728, 1728, 1728, 1856, 1696, 1728, 1840, \ + 1712, 1712, 1824, 1856, 1792, 1712, 1840, 1728, 1808, 1728, 1824, 1824, 1840, 1824, 1824, 1856, 1856, 1824, 1856, 1840, 1696, 1856, 1840, 1840, 1856, 1856, 1824, 1712, 1792, 1856, \ + 1824, 1728, 1808, 1792, 1856, 1728, 1792, 1840, 1808, 1808, 1712, 1808, 1840, 1808, 1824, 1824, 1824, 1840, 1824, 1840, 1856, 1824, 1728, 1856, 1856, 1824, 1856, 1856, 1792, 1792, \ + 1856, 1824, 1856, 1824, 1824, 1856, 1808, 1824, 1856, 1856, 1840, 1840, 1840, 1840, 1856, 1840, 1840, 1856, 1824, 1840, 1808, 1824, 1840, 1856, 1856, 1824, 1856, 1840, 1840, 1840, \ + 1824, 1824, 1840, 1840, 1840, 1856, 1856, 1856 + #endif diff --git a/src/mpn_extras/mulhigh.c b/src/mpn_extras/mulhigh.c index cd6b738818..7362da4b93 100644 --- a/src/mpn_extras/mulhigh.c +++ b/src/mpn_extras/mulhigh.c @@ -18,77 +18,11 @@ #include #include "mpn_extras.h" -/* Generated by tune-mulhigh.c */ -const signed short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE] = -{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 16, 15, 15, 18, 18, -18, 19, 20, 18, 22, 22, 20, 20, 26, 22, 22, 22, 24, 24, 24, 26, 25, 26, 30, 30, 28, 30, 31, 32, 32, 30, 36, 36, 36, 36, -38, 39, 39, 38, 39, 40, 40, 40, 44, 40, 44, 44, 40, 44, 44, 48, 44, 48, 44, 48, 48, 52, 52, 52, 44, 52, 52, 52, 52, 56, -60, 60, 52, 60, 60, 52, 52, 60, 64, 72, 56, 60, 72, 60, 60, 60, 76, 64, 60, 60, 72, 60, 72, 80, 72, 72, 80, 72, 68, 76, -88, 76, 68, 76, 72, 72, 80, 88, 72, 72, 88, 72, 80, 76, 76, 80, 80, 88, 80, 88, 84, 88, 80, 96, 80, 80, 88, 80, 88, 88, -80, 88, 96, 96, 88, 96, 92, 96, 96, 92, 100, 88, 96, 104, 88, 108, 96, 104, 104, 104, 112, 112, 108, 104, 104, 112, 112, 120, 104, 112, -120, 112, 112, 120, 124, 124, 116, 124, 108, 120, 124, 116, 120, 120, 116, 120, 124, 120, 120, 140, 120, 120, 120, 120, 144, 120, 132, 144, 136, 140, -144, 144, 144, 144, 144, 144, 144, 144, 140, 156, 140, 140, 144, 144, 144, 160, 144, 144, 156, 156, 144, 160, 160, 160, 160, 152, 160, 156, 156, 156, -160, 160, 144, 160, 164, 156, 156, 156, 172, 156, 156, 160, 176, 160, 160, 164, 176, 156, 160, 160, 156, 156, 160, 160, 156, 160, 172, 160, 188, 172, -172, 172, 160, 172, 176, 160, 160, 176, 180, 176, 164, 188, 192, 176, 172, 188, 188, 188, 172, 188, 192, 188, 180, 192, 192, 188, 188, 192, 188, 188, -188, 188, 192, 160, 156, 204, 160, 164, 164, 164, 164, 176, 180, 168, 172, 184, 188, 200, 216, 188, 164, 188, 220, 188, 208, 176, 180, 188, 172, 188, -184, 188, 204, 208, 220, 196, 220, 196, 208, 212, 188, 220, 176, 176, 184, 192, 208, 184, 188, 196, 204, 244, 208, 212, 212, 228, 256, 188, 204, 196, -188, 192, 192, 192, 212, 188, 292, 212, 220, 236, 228, 248, 260, 224, 264, 196, 200, 196, 212, 208, 204, 216, 208, 228, 216, 220, 252, 220, 268, 264, -284, 268, 300, 220, 208, 212, 220, 236, 244, 224, 252, 252, 260, 264, 256, 256, 292, 272, 288, 292, 328, 224, 256, 236, 252, 268, 256, 252, 260, 272, -284, 296, 300, 280, 300, 284, 252, 236, 328, 324, 264, 264, 256, 264, 280, 268, 284, 284, 292, 304, 260, 304, 264, 256, 328, 328, 260, 276, 328, 284, -276, 296, 300, 320, 320, 304, 328, 304, 272, 268, 280, 268, 288, 292, 288, 284, 316, 288, 328, 328, 300, 328, 328, 280, 264, 328, 300, 328, 316, 324, -300, 324, 300, 324, 316, 316, 328, 348, 276, 376, 288, 296, 296, 304, 320, 316, 328, 328, 324, 328, 340, 384, 348, 376, 300, 396, 304, 300, 304, 324, -300, 324, 328, 328, 328, 440, 448, 384, 376, 456, 464, 384, 376, 472, 480, 376, 352, 328, 376, 352, 376, 392, 392, 384, 456, 456, 480, 448, 456, 456, -472, 472, 472, 352, 464, 472, 472, 472, 480, 440, 480, 480, 480, 480, 456, 472, 472, 464, 464, 464, 456, 472, 480, 472, 480, 480, 480, 480, 448, 456, -480, 448, 456, 464, 456, 464, 456, 480, 472, 464, 464, 472, 472, 472, 480, 472, 480, 480, 472, 480, 480, 480, 480, 464, 464, 464, 456, 472, 464, 480, -472, 472, 480, 472, 480, 480, 464, 464, 472, 464, 472, 472, 480, 464, 480, 472, 480, 480, 576, 576, 560, 480, 472, 480, 568, 480, 480, 464, 480, 472, -480, 576, 480, 552, 560, 560, 560, 560, 568, 560, 560, 576, 576, 560, 568, 472, 480, 480, 544, 568, 552, 544, 560, 544, 560, 568, 552, 576, 568, 560, -576, 576, 568, 576, 560, 576, 568, 536, 576, 568, 560, 544, 560, 552, 560, 568, 560, 576, 568, 560, 560, 560, 568, 576, 568, 576, 576, 576, 576, 544, -576, 576, 568, 576, 560, 576, 576, 576, 544, 552, 568, 576, 552, 560, 576, 560, 568, 560, 576, 560, 544, 576, 576, 576, 576, 568, 576, 568, 560, 576, -552, 552, 576, 560, 568, 568, 568, 576, 576, 576, 560, 552, 576, 560, 568, 560, 576, 560, 568, 560, 568, 568, 568, 576, 552, 576, 560, 576, 576, 560, -568, 576, 568, 576, 576, 576, 576, 560, 576, 568, 568, 568, 560, 560, 576, 576, 568, 568, 576, 560, 576, 576, 568, 576, 560, 576, 576, 568, 576, 568, -576, 568, 576, 576, 568, 576, 576, 576, 576, 568, 576, 576, 568, 568, 576, 576, 784, 576, 776, 576, 568, 576, 576, 576, 576, 576, 576, 576, 776, 776, -776, 776, 776, 776, 776, 784, 776, 776, 784, 776, 776, 776, 800, 776, 776, 776, 776, 776, 776, 800, 776, 808, 792, 800, 776, 792, 776, 776, 776, 776, -792, 776, 776, 784, 792, 784, 800, 776, 784, 808, 784, 776, 776, 776, 808, 784, 792, 776, 792, 832, 800, 800, 816, 792, 816, 816, 856, 808, 848, 824, -870, 832, 792, 776, 784, 784, 784, 784, 800, 792, 800, 792, 784, 800, 800, 800, 816, 824, 824, 824, 832, 816, 816, 832, 824, 824, 848, 832, 856, 856, -840, 872, 864, 872, 872, 880, 880, 880, 872, 888, 880, 880, 872, 880, 880, 880, 840, 872, 872, 848, 880, 848, 856, 840, 848, 840, 880, 872, 856, 872, -856, 888, 880, 872, 888, 880, 872, 920, 888, 872, 880, 872, 888, 888, 888, 880, 880, 928, 880, 928, 928, 928, 920, 920, 904, 912, 880, 904, 928, 872, -872, 880, 888, 880, 896, 880, 872, 896, 888, 896, 896, 928, 904, 896, 896, 912, 904, 904, 920, 880, 912, 920, 928, 928, 880, 920, 920, 880, 888, 904, -896, 904, 928, 896, 912, 896, 912, 920, 912, 912, 928, 928, 928, 920, 928, 928, 928, 928, 928, 928, 912, 904, 912, 896, 904, 904, 920, 920, 920, 928, -928, 920, 928, 928, 928, 912, 928, 912, 928, 928, 928, 912, 912, 912, 928, 928, 928, 896, 928, 928, 912, 928, 928, 928, 912, 928, 912, 928, 928, 912, -928, 912, 928, 928, 928, 928, 928, 912, 928, 928, 928, 928, 912, 912, 928, 912, 928, 1024, 928, 928, 928, 928, 928, 928, 1056, 912, 928, 928, 1024, 1024, -928, 928, 1024, 928, 928, 928, 928, 928, 928, 1040, 1040, 928, 1056, 1024, 1072, 1024, 1040, 1040, 1040, 1024, 1088, 1056, 1056, 1088, 1040, 1056, 1072, 1072, 1056, 1056, -1024, 1088, 1040, 1024, 1040, 1040, 1024, 1056, 1056, 1056, 1040, 1072, 1056, 1040, 1056, 1056, 1056, 1056, 1056, 1056, 1056, 1120, 1056, 1088, 1056, 1120, 1088, 1072, 1104, 1104, -1104, 1120, 1088, 1088, 1072, 1088, 1120, 1104, 1088, 1104, 1088, 1072, 1104, 1088, 1120, 1088, 1072, 1072, 1072, 1088, 1088, 1072, 1072, 1088, 1104, 1152, 1104, 1104, 1088, 1104, -1136, 1088, 1104, 1152, 1152, 1152, 1136, 1120, 1136, 1152, 1120, 1152, 1088, 1120, 1104, 1120, 1136, 1104, 1136, 1088, 1136, 1104, 1088, 1104, 1120, 1104, 1104, 1120, 1136, 1136, -1120, 1136, 1136, 1136, 1120, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1104, 1152, 1120, 1136, 1120, 1120, 1152, 1120, 1136, 1152, 1136, 1152, 1120, 1152, 1136, 1136, 1136, 1136, -1136, 1152, 1152, 1152, 1152, 1120, 1120, 1152, 1136, 1136, 1136, 1152, 1152, 1120, 1152, 1152, 1152, 1152, 1152, 1104, 1152, 1152, 1120, 1136, 1152, 1120, 1152, 1136, 1152, 1152, -1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, -1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, -1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, -1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, -1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, -1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, -1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, -1152, 1344, 1152, 1344, 1344, 1344, 1152, 1152, 1152, 1152, 1344, 1328, 1328, 1328, 1152, 1344, 1152, 1344, 1152, 1344, 1152, 1152, 1328, 1152, 1328, 1344, 1328, 1344, 1328, 1344, -1344, 1312, 1328, 1328, 1328, 1344, 1344, 1344, 1328, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, -1328, 1344, 1344, 1344, 1344, 1328, 1344, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, -1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1573, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1583, 1568, 1568, 1552, 1568, 1552, 1568, -1568, 1568, 1584, 1568, 1584, 1568, 1568, 1568, 1552, 1552, 1552, 1568, 1552, 1584, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1584, 1552, 1584, 1552, 1617, 1568, 1584, -1552, 1552, 1584, 1584, 1552, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1632, 1636, 1632, 1638, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, -1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1648, 1669, 1648, 1671, 1632, 1664, 1648, 1632, 1648, 1648, 1648, 1632, -1632, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1648, 1664, 1648, 1648, 1648, 1648, 1648, 1648, -1680, 1664, 1680, 1696, 1680, 1680, 1664, 1680, 1680, 1648, 1632, 1680, 1696, 1632, 1648, 1648, 1632, 1680, 1680, 1664, 1664, 1664, 1648, 1680, 1664, 1680, 1664, 1680, 1664, 1664, -1680, 1696, 1664, 1696, 1712, 1712, 1696, 1680, 1712, 1696, 1728, 1712, 1696, 1728, 1728, 1712, 1728, 1648, 1680, 1696, 1712, 1696, 1712, 1696, 1696, 1680, 1696, 1696, 1696, 1712, -1696, 1696, 1696, 1696, 1712, 1728, 1696, 1728, 1696, 1696, 1712, 1728, 1712, 1728, 1712, 1680, 1696, 1728, 1712, 1696, 1696, 1696, 1712, 1712, 1728, 1696, 1728, 1712, 1712, 1728, -1696, 1696, 1696, 1712, 1696, 1728, 1712, 1712, 1712, 1728, 1696, 1712, 1728, 1728, 1696, 1728, 1728, 1728, 1728, 1728, 1680, 1712, 1728, 1696, 1728, 1728, 1728, 1728, 1696, 1728, -1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1728, 1696, 1728, 1712, 1712, 1712, 1712, 1728, 1712, 1712, 1712, 1728, 1712, 1728, 1728, 1728, -1728, 1712, 1728, 1728, 1712, 1728, 1728, 1712, 1712, 1728, 1712, 1712, 1728, 1728, 1712, 1728, 1712, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1712, -1728, 1712, 1712, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, -1728, 1728, 1712, 1728, 1728, 1824, 1728, 1728, 1728, 1728, 1824, 1728, 1840, 1728, 1728, 1728, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1728, 1728, 1856, 1856, 1856, 1856, 1728, -1728, 1856, 1856, 1728, 1856, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1856, 1856, 1856, 1840, 1856, 1856, 1856, 1856, 1856, 1856, 1856, 1840, 1904, 1856, 1856, 1840, 1840, 1856, -1856, 1840, 1856, 1840, 1856, 1856, 1856, 1856, 1952, 1856, 1856, 1856, 1856, 1952, 1904, 1904, 1856, 1856, 1856, 1920, 1952, 2001, 1952, 1984, 1952, 1936, 1952, 1904, 1968, 1920, -1984, 1920, 1968, 1920, 1936, 1856, 2000, 1920, 1936, 1952, 2000, 1968, 1984, 1968, 1984, 2000, 1952, 2000, 2016, 1984, 2000, 2016, 1984, 1664, 2016, 1984, 2016, 1968, 2016, 2016, -1744, 2016, 2016, 1968, 2000, 1728, 1712, 1696, }; +#if TUNE_PROGRAM +short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE]; +#else +const short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE] = {FLINT_MPN_MULHIGH_K_TAB}; +#endif void _flint_mpn_mulhigh_n_mulders_recursive(mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n) @@ -168,6 +102,7 @@ _flint_mpn_mulhigh_n_mul(mp_ptr res, mp_srcptr u, mp_srcptr v, mp_size_t n) return bot; } +#if !TUNE_PROGRAM mp_limb_t _flint_mpn_mulhigh_n(mp_ptr res, mp_srcptr u, mp_srcptr v, mp_size_t n) { @@ -203,3 +138,4 @@ mp_limb_pair_t _flint_mpn_mulhigh_normalised(mp_ptr rp, mp_srcptr xp, mp_srcptr return ret; } +#endif diff --git a/src/mpn_extras/sqrhigh.c b/src/mpn_extras/sqrhigh.c index d1578e3c5d..c5f77c0abf 100644 --- a/src/mpn_extras/sqrhigh.c +++ b/src/mpn_extras/sqrhigh.c @@ -17,77 +17,7 @@ #include "mpn_extras.h" -/* Generated by tune-sqrhigh.c */ -static const signed short flint_mpn_sqrhigh_k_tab[FLINT_MPN_SQRHIGH_K_TAB_SIZE] = -{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 31, 0, 31, -0, 32, 0, 34, 0, 36, 36, 40, 0, 40, 40, 40, 0, 44, 0, 44, 44, 48, 0, 52, 48, 52, 44, 44, 48, 48, 48, 48, 48, 48, -48, 48, 48, 48, 52, 52, 52, 52, 52, 56, 56, 52, 56, 56, 56, 56, 56, 56, 56, 56, 60, 60, 68, 64, 60, 60, 60, 60, 64, 64, -64, 68, 68, 68, 64, 64, 68, 68, 72, 72, 68, 68, 68, 72, 76, 80, 72, 72, 72, 88, 76, 76, 80, 80, 76, 80, 76, 80, 84, 80, -88, 88, 80, 84, 88, 80, 80, 84, 92, 88, 92, 88, 88, 88, 88, 96, 88, 108, 100, 92, 88, 88, 104, 100, 100, 92, 104, 108, 100, 92, -104, 100, 104, 96, 108, 104, 96, 96, 104, 100, 100, 104, 112, 116, 108, 104, 104, 116, 108, 104, 104, 120, 116, 104, 108, 132, 116, 108, 120, 108, -108, 108, 132, 108, 120, 112, 112, 116, 132, 128, 116, 124, 128, 116, 132, 120, 132, 120, 124, 120, 132, 120, 124, 128, 120, 128, 128, 132, 144, 124, -128, 124, 140, 128, 128, 124, 136, 132, 128, 128, 140, 144, 128, 128, 140, 136, 132, 144, 148, 152, 144, 132, 160, 156, 140, 144, 156, 144, 140, 144, -140, 156, 156, 156, 140, 144, 168, 156, 156, 164, 168, 156, 156, 160, 144, 144, 180, 156, 152, 168, 156, 160, 156, 148, 180, 168, 180, 156, 164, 156, -156, 172, 156, 156, 156, 180, 180, 172, 180, 168, 164, 172, 164, 172, 176, 176, 168, 176, 172, 176, 180, 168, 176, 180, 180, 180, 192, 184, 180, 176, -204, 176, 188, 180, 188, 180, 204, 180, 192, 204, 180, 192, 180, 204, 228, 192, 188, 192, 204, 180, 192, 216, 200, 216, 228, 216, 204, 216, 188, 216, -216, 204, 216, 192, 204, 212, 228, 204, 228, 216, 204, 216, 192, 204, 204, 212, 204, 216, 204, 216, 228, 228, 224, 212, 204, 216, 216, 224, 228, 212, -216, 228, 212, 204, 204, 216, 216, 216, 228, 216, 220, 224, 228, 220, 228, 228, 236, 224, 260, 224, 228, 228, 260, 228, 248, 252, 264, 248, 216, 212, -212, 212, 220, 216, 216, 220, 216, 224, 216, 228, 232, 224, 220, 224, 240, 244, 236, 244, 232, 256, 288, 240, 236, 288, 260, 260, 264, 232, 228, 228, -228, 236, 236, 228, 236, 236, 236, 248, 256, 260, 232, 236, 264, 256, 260, 252, 284, 252, 264, 276, 244, 296, 244, 240, 244, 248, 240, 252, 256, 260, -260, 252, 252, 272, 252, 272, 260, 296, 268, 260, 256, 276, 272, 264, 312, 284, 256, 252, 252, 252, 264, 252, 256, 260, 264, 276, 292, 268, 264, 276, -264, 260, 264, 304, 288, 296, 296, 296, 288, 296, 272, 280, 264, 264, 272, 264, 288, 288, 280, 288, 272, 280, 296, 296, 280, 288, 296, 320, 344, 320, -344, 272, 344, 344, 304, 288, 280, 280, 312, 280, 280, 304, 304, 312, 304, 296, 288, 328, 320, 352, 320, 320, 328, 360, 344, 344, 360, 288, 288, 304, -288, 296, 320, 320, 312, 312, 304, 328, 336, 312, 312, 360, 336, 344, 344, 336, 360, 360, 296, 296, 304, 360, 328, 328, 312, 320, 320, 328, 312, 344, -344, 328, 344, 344, 368, 360, 352, 360, 392, 392, 368, 320, 312, 392, 312, 328, 344, 336, 344, 328, 360, 352, 352, 360, 360, 360, 368, 408, 360, 376, -392, 392, 376, 336, 344, 352, 360, 352, 344, 344, 384, 344, 360, 376, 392, 368, 360, 408, 408, 448, 432, 384, 392, 336, 360, 344, 360, 360, 368, 376, -376, 360, 368, 408, 368, 376, 376, 376, 432, 376, 384, 464, 432, 344, 432, 376, 344, 344, 384, 344, 384, 376, 400, 432, 456, 432, 456, 392, 432, 392, -448, 456, 360, 376, 456, 408, 384, 368, 376, 432, 376, 472, 464, 504, 448, 360, 408, 456, 376, 408, 424, 424, 448, 440, 456, 392, 408, 384, 408, 392, -384, 416, 424, 432, 400, 472, 480, 408, 432, 432, 464, 456, 504, 464, 456, 472, 496, 416, 392, 424, 504, 400, 440, 432, 472, 448, 456, 456, 432, 448, -456, 504, 504, 512, 496, 504, 424, 416, 408, 432, 544, 432, 440, 456, 448, 448, 464, 472, 480, 552, 552, 544, 552, 560, 552, 544, 544, 544, 552, 552, -552, 552, 552, 552, 552, 560, 552, 552, 560, 560, 560, 560, 568, 576, 568, 592, 576, 560, 560, 560, 560, 568, 568, 576, 568, 568, 576, 568, 568, 576, -568, 568, 600, 592, 576, 608, 576, 576, 576, 576, 584, 584, 576, 592, 592, 600, 584, 592, 584, 600, 600, 600, 584, 624, 584, 592, 600, 592, 616, 592, -592, 592, 592, 592, 592, 592, 600, 608, 600, 608, 600, 600, 600, 624, 616, 608, 616, 600, 632, 608, 608, 608, 616, 608, 608, 608, 608, 616, 608, 664, -664, 632, 656, 616, 664, 624, 616, 632, 632, 616, 624, 624, 624, 632, 624, 624, 624, 624, 648, 632, 624, 624, 872, 872, 656, 664, 696, 872, 872, 872, -632, 872, 872, 896, 872, 896, 872, 896, 880, 872, 896, 872, 888, 872, 872, 872, 872, 872, 888, 872, 880, 824, 872, 856, 880, 888, 800, 848, 800, 880, -848, 800, 808, 872, 872, 864, 824, 840, 872, 872, 872, 872, 872, 872, 872, 872, 872, 872, 880, 928, 872, 920, 872, 920, 888, 880, 872, 880, 872, 872, -872, 888, 880, 888, 888, 872, 888, 872, 880, 896, 920, 920, 880, 928, 904, 872, 888, 904, 872, 872, 872, 872, 880, 888, 872, 872, 872, 880, 872, 904, -920, 888, 888, 872, 880, 880, 888, 896, 896, 880, 872, 904, 880, 912, 896, 872, 904, 904, 912, 880, 880, 920, 912, 896, 928, 928, 872, 872, 872, 872, -880, 880, 888, 888, 888, 912, 880, 880, 880, 912, 896, 896, 928, 896, 912, 912, 896, 928, 928, 928, 928, 912, 880, 928, 880, 880, 896, 880, 896, 880, -896, 880, 880, 912, 912, 880, 896, 880, 896, 912, 880, 880, 928, 912, 912, 928, 928, 896, 928, 912, 928, 928, 928, 928, 928, 928, 880, 880, 880, 880, -880, 912, 896, 912, 912, 896, 928, 896, 928, 896, 928, 928, 928, 928, 912, 880, 928, 912, 896, 928, 896, 880, 912, 896, 912, 896, 896, 896, 928, 896, -912, 928, 912, 928, 912, 928, 912, 880, 928, 928, 880, 928, 928, 880, 896, 928, 880, 896, 912, 928, 928, 896, 912, 928, 912, 928, 1024, 928, 928, 1040, -912, 928, 1024, 1024, 928, 928, 928, 928, 912, 1040, 1072, 912, 1072, 1072, 1056, 1040, 1072, 1088, 1104, 1040, 1040, 928, 1024, 928, 928, 1056, 928, 928, 1056, 1040, -928, 1056, 1024, 1056, 1056, 1056, 1104, 1024, 1056, 1072, 1056, 1056, 1088, 1072, 1120, 1104, 1072, 1072, 1072, 1104, 1120, 1024, 1120, 1120, 1056, 1024, 1040, 1024, 1040, 1040, -1040, 1024, 1040, 1056, 1040, 1088, 1040, 1056, 1120, 1040, 1040, 1072, 1072, 1088, 1104, 1088, 1104, 1088, 1088, 1056, 1120, 1104, 1152, 1104, 1088, 1120, 1104, 1120, 1024, 1120, -1152, 1136, 1152, 1056, 1056, 1072, 1056, 1152, 1088, 1056, 1104, 1072, 1104, 1088, 1120, 1072, 1104, 1088, 1088, 1120, 1120, 1120, 1152, 1136, 1120, 1120, 1152, 1120, 1136, 1152, -1152, 1072, 1136, 1136, 1088, 1152, 1136, 1136, 1104, 1056, 1072, 1088, 1104, 1104, 1120, 1120, 1104, 1104, 1104, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1136, 1072, -1152, 1136, 1152, 1152, 1104, 1152, 1104, 1152, 1152, 1088, 1120, 1152, 1136, 1136, 1120, 1136, 1120, 1152, 1136, 1136, 1120, 1136, 1152, 1136, 1152, 1136, 1136, 1120, 1152, 1152, -1104, 1152, 1152, 1152, 1104, 1120, 1104, 1120, 1120, 1120, 1120, 1136, 1136, 1136, 1136, 1152, 1120, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1104, -1120, 1152, 1136, 1136, 1136, 1136, 1136, 1136, 1120, 1152, 1152, 1152, 1136, 1104, 1152, 1136, 1136, 1152, 1136, 1120, 1136, 1152, 1152, 1136, 1136, 1120, 1136, 1136, 1136, 1120, -1152, 1152, 1152, 1136, 1136, 1136, 1152, 1104, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1152, -1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, -1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, -1120, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, -1136, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1136, -1152, 1152, 1152, 1104, 1120, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1152, 1120, 1152, 1136, 1152, 1152, 1120, 1152, 1120, 1136, 1152, 1152, -1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1120, 1152, 1152, 1136, 1152, 1136, 1552, 1552, 1136, 1136, 1152, 1552, 1136, 1136, 1584, 1552, 1552, 1552, -1152, 1552, 1552, 1152, 1584, 1552, 1152, 1152, 1552, 1584, 1152, 1152, 1552, 1584, 1568, 1552, 1552, 1552, 1152, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, -1552, 1552, 1552, 1552, 1552, 1625, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1568, 1632, 1552, 1632, 1568, 1584, 1568, 1568, 1584, 1600, 1552, 1552, 1632, 1600, 1632, 1632, 1632, -1632, 1568, 1584, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1552, 1662, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1568, 1600, 1632, 1552, 1616, 1632, 1632, 1632, 1584, 1632, -1632, 1552, 1632, 1632, 1632, 1568, 1600, 1632, 1648, 1616, 1632, 1648, 1632, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1632, 1664, 1632, 1632, -1632, 1632, 1632, 1632, 1632, 1648, 1648, 1648, 1616, 1632, 1680, 1632, 1680, 1648, 1632, 1664, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, -1648, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1648, 1664, 1648, 1648, 1632, 1648, 1632, 1680, 1664, 1648, 1680, 1664, 1696, 1632, 1664, 1680, 1632, 1648, 1696, -1680, 1632, 1632, 1648, 1648, 1648, 1632, 1664, 1632, 1648, 1648, 1632, 1648, 1648, 1632, 1664, 1664, 1680, 1680, 1632, 1664, 1664, 1648, 1648, 1712, 1728, 1664, 1712, 1696, 1712, -1680, 1632, 1728, 1648, 1728, 1728, 1712, 1648, 1632, 1632, 1664, 1648, 1664, 1664, 1680, 1712, 1664, 1632, 1696, 1664, 1712, 1696, 1680, 1680, 1696, 1680, 1680, 1712, 1648, 1712, -1632, 1728, 1696, 1648, 1712, 1632, 1712, 1696, 1648, 1680, 1648, 1664, 1648, 1696, 1712, 1648, 1648, 1680, 1664, 1696, 1728, 1696, 1712, 1728, 1696, 1728, 1712, 1664, 1680, 1728, -1712, 1712, 1712, 1728, 1728, 1712, 1728, 1696, 1728, 1712, 1696, 1728, 1728, 1680, 1728, 1712, 1664, 1680, 1680, 1696, 1712, 1696, 1696, 1696, 1728, 1696, 1728, 1728, 1696, 1712, -1712, 1664, 1712, 1680, 1664, 1728, 1728, 1664, 1696, 1696, 1680, 1712, 1680, 1712, 1696, 1728, 1696, 1696, 1728, 1696, 1712, 1696, 1712, 1712, 1712, 1728, 1712, 1696, 1728, 1680, -1696, 1712, 1712, 1728, 1712, 1728, 1696, 1728, 1776, 1728, 1696, 1776, 1728, 1728, 1712, 1712, 1824, 1856, 1728, 1824, 1728, 1728, 1712, 1728, 1728, 1728, 1856, 1696, 1728, 1840, -1712, 1712, 1824, 1856, 1792, 1712, 1840, 1728, 1808, 1728, 1824, 1824, 1840, 1824, 1824, 1856, 1856, 1824, 1856, 1840, 1696, 1856, 1840, 1840, 1856, 1856, 1824, 1712, 1792, 1856, -1824, 1728, 1808, 1792, 1856, 1728, 1792, 1840, 1808, 1808, 1712, 1808, 1840, 1808, 1824, 1824, 1824, 1840, 1824, 1840, 1856, 1824, 1728, 1856, 1856, 1824, 1856, 1856, 1792, 1792, -1856, 1824, 1856, 1824, 1824, 1856, 1808, 1824, 1856, 1856, 1840, 1840, 1840, 1840, 1856, 1840, 1840, 1856, 1824, 1840, 1808, 1824, 1840, 1856, 1856, 1824, 1856, 1840, 1840, 1840, -1824, 1824, 1840, 1840, 1840, 1856, 1856, 1856, }; +static const short flint_mpn_sqrhigh_k_tab[FLINT_MPN_SQRHIGH_K_TAB_SIZE] = {FLINT_MPN_SQRHIGH_K_TAB}; void _flint_mpn_sqrhigh_mulders_recursive(mp_ptr rp, mp_srcptr np, mp_size_t n) diff --git a/src/mpn_extras/x86_64/broadwell/flint-mparam.h b/src/mpn_extras/x86_64/broadwell/flint-mparam.h index 0112747333..8c5e03d736 100644 --- a/src/mpn_extras/x86_64/broadwell/flint-mparam.h +++ b/src/mpn_extras/x86_64/broadwell/flint-mparam.h @@ -29,4 +29,156 @@ #define FFT_N_NUM 19 #define FFT_MULMOD_2EXPP1_CUTOFF 128 +/* FIXME: This tuning is for x86_64_adx with fft_small */ +/* NOTE: We assume that the same cutoff is optimal for both mulhigh and mullow */ +#define FLINT_MPN_MULHIGH_MULDERS_CUTOFF 50 +#define FLINT_MPN_MULHIGH_MUL_CUTOFF 2000 +#define FLINT_MPN_MULHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_SQRHIGH_MULDERS_CUTOFF 90 +#define FLINT_MPN_SQRHIGH_SQR_CUTOFF 2000 +#define FLINT_MPN_SQRHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_MULHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 16, 15, 15, 18, 18, \ + 18, 19, 20, 18, 22, 22, 20, 20, 26, 22, 22, 22, 24, 24, 24, 26, 25, 26, 30, 30, 28, 30, 31, 32, 32, 30, 36, 36, 36, 36, \ + 38, 39, 39, 38, 39, 40, 40, 40, 44, 40, 44, 44, 40, 44, 44, 48, 44, 48, 44, 48, 48, 52, 52, 52, 44, 52, 52, 52, 52, 56, \ + 60, 60, 52, 60, 60, 52, 52, 60, 64, 72, 56, 60, 72, 60, 60, 60, 76, 64, 60, 60, 72, 60, 72, 80, 72, 72, 80, 72, 68, 76, \ + 88, 76, 68, 76, 72, 72, 80, 88, 72, 72, 88, 72, 80, 76, 76, 80, 80, 88, 80, 88, 84, 88, 80, 96, 80, 80, 88, 80, 88, 88, \ + 80, 88, 96, 96, 88, 96, 92, 96, 96, 92, 100, 88, 96, 104, 88, 108, 96, 104, 104, 104, 112, 112, 108, 104, 104, 112, 112, 120, 104, 112, \ + 120, 112, 112, 120, 124, 124, 116, 124, 108, 120, 124, 116, 120, 120, 116, 120, 124, 120, 120, 140, 120, 120, 120, 120, 144, 120, 132, 144, 136, 140, \ + 144, 144, 144, 144, 144, 144, 144, 144, 140, 156, 140, 140, 144, 144, 144, 160, 144, 144, 156, 156, 144, 160, 160, 160, 160, 152, 160, 156, 156, 156, \ + 160, 160, 144, 160, 164, 156, 156, 156, 172, 156, 156, 160, 176, 160, 160, 164, 176, 156, 160, 160, 156, 156, 160, 160, 156, 160, 172, 160, 188, 172, \ + 172, 172, 160, 172, 176, 160, 160, 176, 180, 176, 164, 188, 192, 176, 172, 188, 188, 188, 172, 188, 192, 188, 180, 192, 192, 188, 188, 192, 188, 188, \ + 188, 188, 192, 160, 156, 204, 160, 164, 164, 164, 164, 176, 180, 168, 172, 184, 188, 200, 216, 188, 164, 188, 220, 188, 208, 176, 180, 188, 172, 188, \ + 184, 188, 204, 208, 220, 196, 220, 196, 208, 212, 188, 220, 176, 176, 184, 192, 208, 184, 188, 196, 204, 244, 208, 212, 212, 228, 256, 188, 204, 196, \ + 188, 192, 192, 192, 212, 188, 292, 212, 220, 236, 228, 248, 260, 224, 264, 196, 200, 196, 212, 208, 204, 216, 208, 228, 216, 220, 252, 220, 268, 264, \ + 284, 268, 300, 220, 208, 212, 220, 236, 244, 224, 252, 252, 260, 264, 256, 256, 292, 272, 288, 292, 328, 224, 256, 236, 252, 268, 256, 252, 260, 272, \ + 284, 296, 300, 280, 300, 284, 252, 236, 328, 324, 264, 264, 256, 264, 280, 268, 284, 284, 292, 304, 260, 304, 264, 256, 328, 328, 260, 276, 328, 284, \ + 276, 296, 300, 320, 320, 304, 328, 304, 272, 268, 280, 268, 288, 292, 288, 284, 316, 288, 328, 328, 300, 328, 328, 280, 264, 328, 300, 328, 316, 324, \ + 300, 324, 300, 324, 316, 316, 328, 348, 276, 376, 288, 296, 296, 304, 320, 316, 328, 328, 324, 328, 340, 384, 348, 376, 300, 396, 304, 300, 304, 324, \ + 300, 324, 328, 328, 328, 440, 448, 384, 376, 456, 464, 384, 376, 472, 480, 376, 352, 328, 376, 352, 376, 392, 392, 384, 456, 456, 480, 448, 456, 456, \ + 472, 472, 472, 352, 464, 472, 472, 472, 480, 440, 480, 480, 480, 480, 456, 472, 472, 464, 464, 464, 456, 472, 480, 472, 480, 480, 480, 480, 448, 456, \ + 480, 448, 456, 464, 456, 464, 456, 480, 472, 464, 464, 472, 472, 472, 480, 472, 480, 480, 472, 480, 480, 480, 480, 464, 464, 464, 456, 472, 464, 480, \ + 472, 472, 480, 472, 480, 480, 464, 464, 472, 464, 472, 472, 480, 464, 480, 472, 480, 480, 576, 576, 560, 480, 472, 480, 568, 480, 480, 464, 480, 472, \ + 480, 576, 480, 552, 560, 560, 560, 560, 568, 560, 560, 576, 576, 560, 568, 472, 480, 480, 544, 568, 552, 544, 560, 544, 560, 568, 552, 576, 568, 560, \ + 576, 576, 568, 576, 560, 576, 568, 536, 576, 568, 560, 544, 560, 552, 560, 568, 560, 576, 568, 560, 560, 560, 568, 576, 568, 576, 576, 576, 576, 544, \ + 576, 576, 568, 576, 560, 576, 576, 576, 544, 552, 568, 576, 552, 560, 576, 560, 568, 560, 576, 560, 544, 576, 576, 576, 576, 568, 576, 568, 560, 576, \ + 552, 552, 576, 560, 568, 568, 568, 576, 576, 576, 560, 552, 576, 560, 568, 560, 576, 560, 568, 560, 568, 568, 568, 576, 552, 576, 560, 576, 576, 560, \ + 568, 576, 568, 576, 576, 576, 576, 560, 576, 568, 568, 568, 560, 560, 576, 576, 568, 568, 576, 560, 576, 576, 568, 576, 560, 576, 576, 568, 576, 568, \ + 576, 568, 576, 576, 568, 576, 576, 576, 576, 568, 576, 576, 568, 568, 576, 576, 784, 576, 776, 576, 568, 576, 576, 576, 576, 576, 576, 576, 776, 776, \ + 776, 776, 776, 776, 776, 784, 776, 776, 784, 776, 776, 776, 800, 776, 776, 776, 776, 776, 776, 800, 776, 808, 792, 800, 776, 792, 776, 776, 776, 776, \ + 792, 776, 776, 784, 792, 784, 800, 776, 784, 808, 784, 776, 776, 776, 808, 784, 792, 776, 792, 832, 800, 800, 816, 792, 816, 816, 856, 808, 848, 824, \ + 870, 832, 792, 776, 784, 784, 784, 784, 800, 792, 800, 792, 784, 800, 800, 800, 816, 824, 824, 824, 832, 816, 816, 832, 824, 824, 848, 832, 856, 856, \ + 840, 872, 864, 872, 872, 880, 880, 880, 872, 888, 880, 880, 872, 880, 880, 880, 840, 872, 872, 848, 880, 848, 856, 840, 848, 840, 880, 872, 856, 872, \ + 856, 888, 880, 872, 888, 880, 872, 920, 888, 872, 880, 872, 888, 888, 888, 880, 880, 928, 880, 928, 928, 928, 920, 920, 904, 912, 880, 904, 928, 872, \ + 872, 880, 888, 880, 896, 880, 872, 896, 888, 896, 896, 928, 904, 896, 896, 912, 904, 904, 920, 880, 912, 920, 928, 928, 880, 920, 920, 880, 888, 904, \ + 896, 904, 928, 896, 912, 896, 912, 920, 912, 912, 928, 928, 928, 920, 928, 928, 928, 928, 928, 928, 912, 904, 912, 896, 904, 904, 920, 920, 920, 928, \ + 928, 920, 928, 928, 928, 912, 928, 912, 928, 928, 928, 912, 912, 912, 928, 928, 928, 896, 928, 928, 912, 928, 928, 928, 912, 928, 912, 928, 928, 912, \ + 928, 912, 928, 928, 928, 928, 928, 912, 928, 928, 928, 928, 912, 912, 928, 912, 928, 1024, 928, 928, 928, 928, 928, 928, 1056, 912, 928, 928, 1024, 1024, \ + 928, 928, 1024, 928, 928, 928, 928, 928, 928, 1040, 1040, 928, 1056, 1024, 1072, 1024, 1040, 1040, 1040, 1024, 1088, 1056, 1056, 1088, 1040, 1056, 1072, 1072, 1056, 1056, \ + 1024, 1088, 1040, 1024, 1040, 1040, 1024, 1056, 1056, 1056, 1040, 1072, 1056, 1040, 1056, 1056, 1056, 1056, 1056, 1056, 1056, 1120, 1056, 1088, 1056, 1120, 1088, 1072, 1104, 1104, \ + 1104, 1120, 1088, 1088, 1072, 1088, 1120, 1104, 1088, 1104, 1088, 1072, 1104, 1088, 1120, 1088, 1072, 1072, 1072, 1088, 1088, 1072, 1072, 1088, 1104, 1152, 1104, 1104, 1088, 1104, \ + 1136, 1088, 1104, 1152, 1152, 1152, 1136, 1120, 1136, 1152, 1120, 1152, 1088, 1120, 1104, 1120, 1136, 1104, 1136, 1088, 1136, 1104, 1088, 1104, 1120, 1104, 1104, 1120, 1136, 1136, \ + 1120, 1136, 1136, 1136, 1120, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1104, 1152, 1120, 1136, 1120, 1120, 1152, 1120, 1136, 1152, 1136, 1152, 1120, 1152, 1136, 1136, 1136, 1136, \ + 1136, 1152, 1152, 1152, 1152, 1120, 1120, 1152, 1136, 1136, 1136, 1152, 1152, 1120, 1152, 1152, 1152, 1152, 1152, 1104, 1152, 1152, 1120, 1136, 1152, 1120, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1344, 1152, 1344, 1344, 1344, 1152, 1152, 1152, 1152, 1344, 1328, 1328, 1328, 1152, 1344, 1152, 1344, 1152, 1344, 1152, 1152, 1328, 1152, 1328, 1344, 1328, 1344, 1328, 1344, \ + 1344, 1312, 1328, 1328, 1328, 1344, 1344, 1344, 1328, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, \ + 1328, 1344, 1344, 1344, 1344, 1328, 1344, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1573, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1583, 1568, 1568, 1552, 1568, 1552, 1568, \ + 1568, 1568, 1584, 1568, 1584, 1568, 1568, 1568, 1552, 1552, 1552, 1568, 1552, 1584, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1584, 1552, 1584, 1552, 1617, 1568, 1584, \ + 1552, 1552, 1584, 1584, 1552, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1632, 1636, 1632, 1638, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1648, 1669, 1648, 1671, 1632, 1664, 1648, 1632, 1648, 1648, 1648, 1632, \ + 1632, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1648, 1664, 1648, 1648, 1648, 1648, 1648, 1648, \ + 1680, 1664, 1680, 1696, 1680, 1680, 1664, 1680, 1680, 1648, 1632, 1680, 1696, 1632, 1648, 1648, 1632, 1680, 1680, 1664, 1664, 1664, 1648, 1680, 1664, 1680, 1664, 1680, 1664, 1664, \ + 1680, 1696, 1664, 1696, 1712, 1712, 1696, 1680, 1712, 1696, 1728, 1712, 1696, 1728, 1728, 1712, 1728, 1648, 1680, 1696, 1712, 1696, 1712, 1696, 1696, 1680, 1696, 1696, 1696, 1712, \ + 1696, 1696, 1696, 1696, 1712, 1728, 1696, 1728, 1696, 1696, 1712, 1728, 1712, 1728, 1712, 1680, 1696, 1728, 1712, 1696, 1696, 1696, 1712, 1712, 1728, 1696, 1728, 1712, 1712, 1728, \ + 1696, 1696, 1696, 1712, 1696, 1728, 1712, 1712, 1712, 1728, 1696, 1712, 1728, 1728, 1696, 1728, 1728, 1728, 1728, 1728, 1680, 1712, 1728, 1696, 1728, 1728, 1728, 1728, 1696, 1728, \ + 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1728, 1696, 1728, 1712, 1712, 1712, 1712, 1728, 1712, 1712, 1712, 1728, 1712, 1728, 1728, 1728, \ + 1728, 1712, 1728, 1728, 1712, 1728, 1728, 1712, 1712, 1728, 1712, 1712, 1728, 1728, 1712, 1728, 1712, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1712, \ + 1728, 1712, 1712, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, \ + 1728, 1728, 1712, 1728, 1728, 1824, 1728, 1728, 1728, 1728, 1824, 1728, 1840, 1728, 1728, 1728, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1728, 1728, 1856, 1856, 1856, 1856, 1728, \ + 1728, 1856, 1856, 1728, 1856, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1856, 1856, 1856, 1840, 1856, 1856, 1856, 1856, 1856, 1856, 1856, 1840, 1904, 1856, 1856, 1840, 1840, 1856, \ + 1856, 1840, 1856, 1840, 1856, 1856, 1856, 1856, 1952, 1856, 1856, 1856, 1856, 1952, 1904, 1904, 1856, 1856, 1856, 1920, 1952, 2001, 1952, 1984, 1952, 1936, 1952, 1904, 1968, 1920, \ + 1984, 1920, 1968, 1920, 1936, 1856, 2000, 1920, 1936, 1952, 2000, 1968, 1984, 1968, 1984, 2000, 1952, 2000, 2016, 1984, 2000, 2016, 1984, 1664, 2016, 1984, 2016, 1968, 2016, 2016, \ + 1744, 2016, 2016, 1968, 2000, 1728, 1712, 1696 + +#define FLINT_MPN_SQRHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 31, 0, 31, \ + 0, 32, 0, 34, 0, 36, 36, 40, 0, 40, 40, 40, 0, 44, 0, 44, 44, 48, 0, 52, 48, 52, 44, 44, 48, 48, 48, 48, 48, 48, \ + 48, 48, 48, 48, 52, 52, 52, 52, 52, 56, 56, 52, 56, 56, 56, 56, 56, 56, 56, 56, 60, 60, 68, 64, 60, 60, 60, 60, 64, 64, \ + 64, 68, 68, 68, 64, 64, 68, 68, 72, 72, 68, 68, 68, 72, 76, 80, 72, 72, 72, 88, 76, 76, 80, 80, 76, 80, 76, 80, 84, 80, \ + 88, 88, 80, 84, 88, 80, 80, 84, 92, 88, 92, 88, 88, 88, 88, 96, 88, 108, 100, 92, 88, 88, 104, 100, 100, 92, 104, 108, 100, 92, \ + 104, 100, 104, 96, 108, 104, 96, 96, 104, 100, 100, 104, 112, 116, 108, 104, 104, 116, 108, 104, 104, 120, 116, 104, 108, 132, 116, 108, 120, 108, \ + 108, 108, 132, 108, 120, 112, 112, 116, 132, 128, 116, 124, 128, 116, 132, 120, 132, 120, 124, 120, 132, 120, 124, 128, 120, 128, 128, 132, 144, 124, \ + 128, 124, 140, 128, 128, 124, 136, 132, 128, 128, 140, 144, 128, 128, 140, 136, 132, 144, 148, 152, 144, 132, 160, 156, 140, 144, 156, 144, 140, 144, \ + 140, 156, 156, 156, 140, 144, 168, 156, 156, 164, 168, 156, 156, 160, 144, 144, 180, 156, 152, 168, 156, 160, 156, 148, 180, 168, 180, 156, 164, 156, \ + 156, 172, 156, 156, 156, 180, 180, 172, 180, 168, 164, 172, 164, 172, 176, 176, 168, 176, 172, 176, 180, 168, 176, 180, 180, 180, 192, 184, 180, 176, \ + 204, 176, 188, 180, 188, 180, 204, 180, 192, 204, 180, 192, 180, 204, 228, 192, 188, 192, 204, 180, 192, 216, 200, 216, 228, 216, 204, 216, 188, 216, \ + 216, 204, 216, 192, 204, 212, 228, 204, 228, 216, 204, 216, 192, 204, 204, 212, 204, 216, 204, 216, 228, 228, 224, 212, 204, 216, 216, 224, 228, 212, \ + 216, 228, 212, 204, 204, 216, 216, 216, 228, 216, 220, 224, 228, 220, 228, 228, 236, 224, 260, 224, 228, 228, 260, 228, 248, 252, 264, 248, 216, 212, \ + 212, 212, 220, 216, 216, 220, 216, 224, 216, 228, 232, 224, 220, 224, 240, 244, 236, 244, 232, 256, 288, 240, 236, 288, 260, 260, 264, 232, 228, 228, \ + 228, 236, 236, 228, 236, 236, 236, 248, 256, 260, 232, 236, 264, 256, 260, 252, 284, 252, 264, 276, 244, 296, 244, 240, 244, 248, 240, 252, 256, 260, \ + 260, 252, 252, 272, 252, 272, 260, 296, 268, 260, 256, 276, 272, 264, 312, 284, 256, 252, 252, 252, 264, 252, 256, 260, 264, 276, 292, 268, 264, 276, \ + 264, 260, 264, 304, 288, 296, 296, 296, 288, 296, 272, 280, 264, 264, 272, 264, 288, 288, 280, 288, 272, 280, 296, 296, 280, 288, 296, 320, 344, 320, \ + 344, 272, 344, 344, 304, 288, 280, 280, 312, 280, 280, 304, 304, 312, 304, 296, 288, 328, 320, 352, 320, 320, 328, 360, 344, 344, 360, 288, 288, 304, \ + 288, 296, 320, 320, 312, 312, 304, 328, 336, 312, 312, 360, 336, 344, 344, 336, 360, 360, 296, 296, 304, 360, 328, 328, 312, 320, 320, 328, 312, 344, \ + 344, 328, 344, 344, 368, 360, 352, 360, 392, 392, 368, 320, 312, 392, 312, 328, 344, 336, 344, 328, 360, 352, 352, 360, 360, 360, 368, 408, 360, 376, \ + 392, 392, 376, 336, 344, 352, 360, 352, 344, 344, 384, 344, 360, 376, 392, 368, 360, 408, 408, 448, 432, 384, 392, 336, 360, 344, 360, 360, 368, 376, \ + 376, 360, 368, 408, 368, 376, 376, 376, 432, 376, 384, 464, 432, 344, 432, 376, 344, 344, 384, 344, 384, 376, 400, 432, 456, 432, 456, 392, 432, 392, \ + 448, 456, 360, 376, 456, 408, 384, 368, 376, 432, 376, 472, 464, 504, 448, 360, 408, 456, 376, 408, 424, 424, 448, 440, 456, 392, 408, 384, 408, 392, \ + 384, 416, 424, 432, 400, 472, 480, 408, 432, 432, 464, 456, 504, 464, 456, 472, 496, 416, 392, 424, 504, 400, 440, 432, 472, 448, 456, 456, 432, 448, \ + 456, 504, 504, 512, 496, 504, 424, 416, 408, 432, 544, 432, 440, 456, 448, 448, 464, 472, 480, 552, 552, 544, 552, 560, 552, 544, 544, 544, 552, 552, \ + 552, 552, 552, 552, 552, 560, 552, 552, 560, 560, 560, 560, 568, 576, 568, 592, 576, 560, 560, 560, 560, 568, 568, 576, 568, 568, 576, 568, 568, 576, \ + 568, 568, 600, 592, 576, 608, 576, 576, 576, 576, 584, 584, 576, 592, 592, 600, 584, 592, 584, 600, 600, 600, 584, 624, 584, 592, 600, 592, 616, 592, \ + 592, 592, 592, 592, 592, 592, 600, 608, 600, 608, 600, 600, 600, 624, 616, 608, 616, 600, 632, 608, 608, 608, 616, 608, 608, 608, 608, 616, 608, 664, \ + 664, 632, 656, 616, 664, 624, 616, 632, 632, 616, 624, 624, 624, 632, 624, 624, 624, 624, 648, 632, 624, 624, 872, 872, 656, 664, 696, 872, 872, 872, \ + 632, 872, 872, 896, 872, 896, 872, 896, 880, 872, 896, 872, 888, 872, 872, 872, 872, 872, 888, 872, 880, 824, 872, 856, 880, 888, 800, 848, 800, 880, \ + 848, 800, 808, 872, 872, 864, 824, 840, 872, 872, 872, 872, 872, 872, 872, 872, 872, 872, 880, 928, 872, 920, 872, 920, 888, 880, 872, 880, 872, 872, \ + 872, 888, 880, 888, 888, 872, 888, 872, 880, 896, 920, 920, 880, 928, 904, 872, 888, 904, 872, 872, 872, 872, 880, 888, 872, 872, 872, 880, 872, 904, \ + 920, 888, 888, 872, 880, 880, 888, 896, 896, 880, 872, 904, 880, 912, 896, 872, 904, 904, 912, 880, 880, 920, 912, 896, 928, 928, 872, 872, 872, 872, \ + 880, 880, 888, 888, 888, 912, 880, 880, 880, 912, 896, 896, 928, 896, 912, 912, 896, 928, 928, 928, 928, 912, 880, 928, 880, 880, 896, 880, 896, 880, \ + 896, 880, 880, 912, 912, 880, 896, 880, 896, 912, 880, 880, 928, 912, 912, 928, 928, 896, 928, 912, 928, 928, 928, 928, 928, 928, 880, 880, 880, 880, \ + 880, 912, 896, 912, 912, 896, 928, 896, 928, 896, 928, 928, 928, 928, 912, 880, 928, 912, 896, 928, 896, 880, 912, 896, 912, 896, 896, 896, 928, 896, \ + 912, 928, 912, 928, 912, 928, 912, 880, 928, 928, 880, 928, 928, 880, 896, 928, 880, 896, 912, 928, 928, 896, 912, 928, 912, 928, 1024, 928, 928, 1040, \ + 912, 928, 1024, 1024, 928, 928, 928, 928, 912, 1040, 1072, 912, 1072, 1072, 1056, 1040, 1072, 1088, 1104, 1040, 1040, 928, 1024, 928, 928, 1056, 928, 928, 1056, 1040, \ + 928, 1056, 1024, 1056, 1056, 1056, 1104, 1024, 1056, 1072, 1056, 1056, 1088, 1072, 1120, 1104, 1072, 1072, 1072, 1104, 1120, 1024, 1120, 1120, 1056, 1024, 1040, 1024, 1040, 1040, \ + 1040, 1024, 1040, 1056, 1040, 1088, 1040, 1056, 1120, 1040, 1040, 1072, 1072, 1088, 1104, 1088, 1104, 1088, 1088, 1056, 1120, 1104, 1152, 1104, 1088, 1120, 1104, 1120, 1024, 1120, \ + 1152, 1136, 1152, 1056, 1056, 1072, 1056, 1152, 1088, 1056, 1104, 1072, 1104, 1088, 1120, 1072, 1104, 1088, 1088, 1120, 1120, 1120, 1152, 1136, 1120, 1120, 1152, 1120, 1136, 1152, \ + 1152, 1072, 1136, 1136, 1088, 1152, 1136, 1136, 1104, 1056, 1072, 1088, 1104, 1104, 1120, 1120, 1104, 1104, 1104, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1136, 1072, \ + 1152, 1136, 1152, 1152, 1104, 1152, 1104, 1152, 1152, 1088, 1120, 1152, 1136, 1136, 1120, 1136, 1120, 1152, 1136, 1136, 1120, 1136, 1152, 1136, 1152, 1136, 1136, 1120, 1152, 1152, \ + 1104, 1152, 1152, 1152, 1104, 1120, 1104, 1120, 1120, 1120, 1120, 1136, 1136, 1136, 1136, 1152, 1120, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1104, \ + 1120, 1152, 1136, 1136, 1136, 1136, 1136, 1136, 1120, 1152, 1152, 1152, 1136, 1104, 1152, 1136, 1136, 1152, 1136, 1120, 1136, 1152, 1152, 1136, 1136, 1120, 1136, 1136, 1136, 1120, \ + 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1104, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1152, \ + 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, \ + 1120, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1136, \ + 1152, 1152, 1152, 1104, 1120, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1152, 1120, 1152, 1136, 1152, 1152, 1120, 1152, 1120, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1120, 1152, 1152, 1136, 1152, 1136, 1552, 1552, 1136, 1136, 1152, 1552, 1136, 1136, 1584, 1552, 1552, 1552, \ + 1152, 1552, 1552, 1152, 1584, 1552, 1152, 1152, 1552, 1584, 1152, 1152, 1552, 1584, 1568, 1552, 1552, 1552, 1152, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1625, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1568, 1632, 1552, 1632, 1568, 1584, 1568, 1568, 1584, 1600, 1552, 1552, 1632, 1600, 1632, 1632, 1632, \ + 1632, 1568, 1584, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1552, 1662, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1568, 1600, 1632, 1552, 1616, 1632, 1632, 1632, 1584, 1632, \ + 1632, 1552, 1632, 1632, 1632, 1568, 1600, 1632, 1648, 1616, 1632, 1648, 1632, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1632, 1664, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1648, 1648, 1648, 1616, 1632, 1680, 1632, 1680, 1648, 1632, 1664, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1648, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1648, 1664, 1648, 1648, 1632, 1648, 1632, 1680, 1664, 1648, 1680, 1664, 1696, 1632, 1664, 1680, 1632, 1648, 1696, \ + 1680, 1632, 1632, 1648, 1648, 1648, 1632, 1664, 1632, 1648, 1648, 1632, 1648, 1648, 1632, 1664, 1664, 1680, 1680, 1632, 1664, 1664, 1648, 1648, 1712, 1728, 1664, 1712, 1696, 1712, \ + 1680, 1632, 1728, 1648, 1728, 1728, 1712, 1648, 1632, 1632, 1664, 1648, 1664, 1664, 1680, 1712, 1664, 1632, 1696, 1664, 1712, 1696, 1680, 1680, 1696, 1680, 1680, 1712, 1648, 1712, \ + 1632, 1728, 1696, 1648, 1712, 1632, 1712, 1696, 1648, 1680, 1648, 1664, 1648, 1696, 1712, 1648, 1648, 1680, 1664, 1696, 1728, 1696, 1712, 1728, 1696, 1728, 1712, 1664, 1680, 1728, \ + 1712, 1712, 1712, 1728, 1728, 1712, 1728, 1696, 1728, 1712, 1696, 1728, 1728, 1680, 1728, 1712, 1664, 1680, 1680, 1696, 1712, 1696, 1696, 1696, 1728, 1696, 1728, 1728, 1696, 1712, \ + 1712, 1664, 1712, 1680, 1664, 1728, 1728, 1664, 1696, 1696, 1680, 1712, 1680, 1712, 1696, 1728, 1696, 1696, 1728, 1696, 1712, 1696, 1712, 1712, 1712, 1728, 1712, 1696, 1728, 1680, \ + 1696, 1712, 1712, 1728, 1712, 1728, 1696, 1728, 1776, 1728, 1696, 1776, 1728, 1728, 1712, 1712, 1824, 1856, 1728, 1824, 1728, 1728, 1712, 1728, 1728, 1728, 1856, 1696, 1728, 1840, \ + 1712, 1712, 1824, 1856, 1792, 1712, 1840, 1728, 1808, 1728, 1824, 1824, 1840, 1824, 1824, 1856, 1856, 1824, 1856, 1840, 1696, 1856, 1840, 1840, 1856, 1856, 1824, 1712, 1792, 1856, \ + 1824, 1728, 1808, 1792, 1856, 1728, 1792, 1840, 1808, 1808, 1712, 1808, 1840, 1808, 1824, 1824, 1824, 1840, 1824, 1840, 1856, 1824, 1728, 1856, 1856, 1824, 1856, 1856, 1792, 1792, \ + 1856, 1824, 1856, 1824, 1824, 1856, 1808, 1824, 1856, 1856, 1840, 1840, 1840, 1840, 1856, 1840, 1840, 1856, 1824, 1840, 1808, 1824, 1840, 1856, 1856, 1824, 1856, 1840, 1840, 1840, \ + 1824, 1824, 1840, 1840, 1840, 1856, 1856, 1856 + #endif diff --git a/src/mpn_extras/x86_64/flint-mparam.h b/src/mpn_extras/x86_64/flint-mparam.h index 0112747333..8c5e03d736 100644 --- a/src/mpn_extras/x86_64/flint-mparam.h +++ b/src/mpn_extras/x86_64/flint-mparam.h @@ -29,4 +29,156 @@ #define FFT_N_NUM 19 #define FFT_MULMOD_2EXPP1_CUTOFF 128 +/* FIXME: This tuning is for x86_64_adx with fft_small */ +/* NOTE: We assume that the same cutoff is optimal for both mulhigh and mullow */ +#define FLINT_MPN_MULHIGH_MULDERS_CUTOFF 50 +#define FLINT_MPN_MULHIGH_MUL_CUTOFF 2000 +#define FLINT_MPN_MULHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_SQRHIGH_MULDERS_CUTOFF 90 +#define FLINT_MPN_SQRHIGH_SQR_CUTOFF 2000 +#define FLINT_MPN_SQRHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_MULHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 16, 15, 15, 18, 18, \ + 18, 19, 20, 18, 22, 22, 20, 20, 26, 22, 22, 22, 24, 24, 24, 26, 25, 26, 30, 30, 28, 30, 31, 32, 32, 30, 36, 36, 36, 36, \ + 38, 39, 39, 38, 39, 40, 40, 40, 44, 40, 44, 44, 40, 44, 44, 48, 44, 48, 44, 48, 48, 52, 52, 52, 44, 52, 52, 52, 52, 56, \ + 60, 60, 52, 60, 60, 52, 52, 60, 64, 72, 56, 60, 72, 60, 60, 60, 76, 64, 60, 60, 72, 60, 72, 80, 72, 72, 80, 72, 68, 76, \ + 88, 76, 68, 76, 72, 72, 80, 88, 72, 72, 88, 72, 80, 76, 76, 80, 80, 88, 80, 88, 84, 88, 80, 96, 80, 80, 88, 80, 88, 88, \ + 80, 88, 96, 96, 88, 96, 92, 96, 96, 92, 100, 88, 96, 104, 88, 108, 96, 104, 104, 104, 112, 112, 108, 104, 104, 112, 112, 120, 104, 112, \ + 120, 112, 112, 120, 124, 124, 116, 124, 108, 120, 124, 116, 120, 120, 116, 120, 124, 120, 120, 140, 120, 120, 120, 120, 144, 120, 132, 144, 136, 140, \ + 144, 144, 144, 144, 144, 144, 144, 144, 140, 156, 140, 140, 144, 144, 144, 160, 144, 144, 156, 156, 144, 160, 160, 160, 160, 152, 160, 156, 156, 156, \ + 160, 160, 144, 160, 164, 156, 156, 156, 172, 156, 156, 160, 176, 160, 160, 164, 176, 156, 160, 160, 156, 156, 160, 160, 156, 160, 172, 160, 188, 172, \ + 172, 172, 160, 172, 176, 160, 160, 176, 180, 176, 164, 188, 192, 176, 172, 188, 188, 188, 172, 188, 192, 188, 180, 192, 192, 188, 188, 192, 188, 188, \ + 188, 188, 192, 160, 156, 204, 160, 164, 164, 164, 164, 176, 180, 168, 172, 184, 188, 200, 216, 188, 164, 188, 220, 188, 208, 176, 180, 188, 172, 188, \ + 184, 188, 204, 208, 220, 196, 220, 196, 208, 212, 188, 220, 176, 176, 184, 192, 208, 184, 188, 196, 204, 244, 208, 212, 212, 228, 256, 188, 204, 196, \ + 188, 192, 192, 192, 212, 188, 292, 212, 220, 236, 228, 248, 260, 224, 264, 196, 200, 196, 212, 208, 204, 216, 208, 228, 216, 220, 252, 220, 268, 264, \ + 284, 268, 300, 220, 208, 212, 220, 236, 244, 224, 252, 252, 260, 264, 256, 256, 292, 272, 288, 292, 328, 224, 256, 236, 252, 268, 256, 252, 260, 272, \ + 284, 296, 300, 280, 300, 284, 252, 236, 328, 324, 264, 264, 256, 264, 280, 268, 284, 284, 292, 304, 260, 304, 264, 256, 328, 328, 260, 276, 328, 284, \ + 276, 296, 300, 320, 320, 304, 328, 304, 272, 268, 280, 268, 288, 292, 288, 284, 316, 288, 328, 328, 300, 328, 328, 280, 264, 328, 300, 328, 316, 324, \ + 300, 324, 300, 324, 316, 316, 328, 348, 276, 376, 288, 296, 296, 304, 320, 316, 328, 328, 324, 328, 340, 384, 348, 376, 300, 396, 304, 300, 304, 324, \ + 300, 324, 328, 328, 328, 440, 448, 384, 376, 456, 464, 384, 376, 472, 480, 376, 352, 328, 376, 352, 376, 392, 392, 384, 456, 456, 480, 448, 456, 456, \ + 472, 472, 472, 352, 464, 472, 472, 472, 480, 440, 480, 480, 480, 480, 456, 472, 472, 464, 464, 464, 456, 472, 480, 472, 480, 480, 480, 480, 448, 456, \ + 480, 448, 456, 464, 456, 464, 456, 480, 472, 464, 464, 472, 472, 472, 480, 472, 480, 480, 472, 480, 480, 480, 480, 464, 464, 464, 456, 472, 464, 480, \ + 472, 472, 480, 472, 480, 480, 464, 464, 472, 464, 472, 472, 480, 464, 480, 472, 480, 480, 576, 576, 560, 480, 472, 480, 568, 480, 480, 464, 480, 472, \ + 480, 576, 480, 552, 560, 560, 560, 560, 568, 560, 560, 576, 576, 560, 568, 472, 480, 480, 544, 568, 552, 544, 560, 544, 560, 568, 552, 576, 568, 560, \ + 576, 576, 568, 576, 560, 576, 568, 536, 576, 568, 560, 544, 560, 552, 560, 568, 560, 576, 568, 560, 560, 560, 568, 576, 568, 576, 576, 576, 576, 544, \ + 576, 576, 568, 576, 560, 576, 576, 576, 544, 552, 568, 576, 552, 560, 576, 560, 568, 560, 576, 560, 544, 576, 576, 576, 576, 568, 576, 568, 560, 576, \ + 552, 552, 576, 560, 568, 568, 568, 576, 576, 576, 560, 552, 576, 560, 568, 560, 576, 560, 568, 560, 568, 568, 568, 576, 552, 576, 560, 576, 576, 560, \ + 568, 576, 568, 576, 576, 576, 576, 560, 576, 568, 568, 568, 560, 560, 576, 576, 568, 568, 576, 560, 576, 576, 568, 576, 560, 576, 576, 568, 576, 568, \ + 576, 568, 576, 576, 568, 576, 576, 576, 576, 568, 576, 576, 568, 568, 576, 576, 784, 576, 776, 576, 568, 576, 576, 576, 576, 576, 576, 576, 776, 776, \ + 776, 776, 776, 776, 776, 784, 776, 776, 784, 776, 776, 776, 800, 776, 776, 776, 776, 776, 776, 800, 776, 808, 792, 800, 776, 792, 776, 776, 776, 776, \ + 792, 776, 776, 784, 792, 784, 800, 776, 784, 808, 784, 776, 776, 776, 808, 784, 792, 776, 792, 832, 800, 800, 816, 792, 816, 816, 856, 808, 848, 824, \ + 870, 832, 792, 776, 784, 784, 784, 784, 800, 792, 800, 792, 784, 800, 800, 800, 816, 824, 824, 824, 832, 816, 816, 832, 824, 824, 848, 832, 856, 856, \ + 840, 872, 864, 872, 872, 880, 880, 880, 872, 888, 880, 880, 872, 880, 880, 880, 840, 872, 872, 848, 880, 848, 856, 840, 848, 840, 880, 872, 856, 872, \ + 856, 888, 880, 872, 888, 880, 872, 920, 888, 872, 880, 872, 888, 888, 888, 880, 880, 928, 880, 928, 928, 928, 920, 920, 904, 912, 880, 904, 928, 872, \ + 872, 880, 888, 880, 896, 880, 872, 896, 888, 896, 896, 928, 904, 896, 896, 912, 904, 904, 920, 880, 912, 920, 928, 928, 880, 920, 920, 880, 888, 904, \ + 896, 904, 928, 896, 912, 896, 912, 920, 912, 912, 928, 928, 928, 920, 928, 928, 928, 928, 928, 928, 912, 904, 912, 896, 904, 904, 920, 920, 920, 928, \ + 928, 920, 928, 928, 928, 912, 928, 912, 928, 928, 928, 912, 912, 912, 928, 928, 928, 896, 928, 928, 912, 928, 928, 928, 912, 928, 912, 928, 928, 912, \ + 928, 912, 928, 928, 928, 928, 928, 912, 928, 928, 928, 928, 912, 912, 928, 912, 928, 1024, 928, 928, 928, 928, 928, 928, 1056, 912, 928, 928, 1024, 1024, \ + 928, 928, 1024, 928, 928, 928, 928, 928, 928, 1040, 1040, 928, 1056, 1024, 1072, 1024, 1040, 1040, 1040, 1024, 1088, 1056, 1056, 1088, 1040, 1056, 1072, 1072, 1056, 1056, \ + 1024, 1088, 1040, 1024, 1040, 1040, 1024, 1056, 1056, 1056, 1040, 1072, 1056, 1040, 1056, 1056, 1056, 1056, 1056, 1056, 1056, 1120, 1056, 1088, 1056, 1120, 1088, 1072, 1104, 1104, \ + 1104, 1120, 1088, 1088, 1072, 1088, 1120, 1104, 1088, 1104, 1088, 1072, 1104, 1088, 1120, 1088, 1072, 1072, 1072, 1088, 1088, 1072, 1072, 1088, 1104, 1152, 1104, 1104, 1088, 1104, \ + 1136, 1088, 1104, 1152, 1152, 1152, 1136, 1120, 1136, 1152, 1120, 1152, 1088, 1120, 1104, 1120, 1136, 1104, 1136, 1088, 1136, 1104, 1088, 1104, 1120, 1104, 1104, 1120, 1136, 1136, \ + 1120, 1136, 1136, 1136, 1120, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1104, 1152, 1120, 1136, 1120, 1120, 1152, 1120, 1136, 1152, 1136, 1152, 1120, 1152, 1136, 1136, 1136, 1136, \ + 1136, 1152, 1152, 1152, 1152, 1120, 1120, 1152, 1136, 1136, 1136, 1152, 1152, 1120, 1152, 1152, 1152, 1152, 1152, 1104, 1152, 1152, 1120, 1136, 1152, 1120, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1344, 1152, 1344, 1344, 1344, 1152, 1152, 1152, 1152, 1344, 1328, 1328, 1328, 1152, 1344, 1152, 1344, 1152, 1344, 1152, 1152, 1328, 1152, 1328, 1344, 1328, 1344, 1328, 1344, \ + 1344, 1312, 1328, 1328, 1328, 1344, 1344, 1344, 1328, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, \ + 1328, 1344, 1344, 1344, 1344, 1328, 1344, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1573, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1583, 1568, 1568, 1552, 1568, 1552, 1568, \ + 1568, 1568, 1584, 1568, 1584, 1568, 1568, 1568, 1552, 1552, 1552, 1568, 1552, 1584, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1584, 1552, 1584, 1552, 1617, 1568, 1584, \ + 1552, 1552, 1584, 1584, 1552, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1632, 1636, 1632, 1638, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1648, 1669, 1648, 1671, 1632, 1664, 1648, 1632, 1648, 1648, 1648, 1632, \ + 1632, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1648, 1664, 1648, 1648, 1648, 1648, 1648, 1648, \ + 1680, 1664, 1680, 1696, 1680, 1680, 1664, 1680, 1680, 1648, 1632, 1680, 1696, 1632, 1648, 1648, 1632, 1680, 1680, 1664, 1664, 1664, 1648, 1680, 1664, 1680, 1664, 1680, 1664, 1664, \ + 1680, 1696, 1664, 1696, 1712, 1712, 1696, 1680, 1712, 1696, 1728, 1712, 1696, 1728, 1728, 1712, 1728, 1648, 1680, 1696, 1712, 1696, 1712, 1696, 1696, 1680, 1696, 1696, 1696, 1712, \ + 1696, 1696, 1696, 1696, 1712, 1728, 1696, 1728, 1696, 1696, 1712, 1728, 1712, 1728, 1712, 1680, 1696, 1728, 1712, 1696, 1696, 1696, 1712, 1712, 1728, 1696, 1728, 1712, 1712, 1728, \ + 1696, 1696, 1696, 1712, 1696, 1728, 1712, 1712, 1712, 1728, 1696, 1712, 1728, 1728, 1696, 1728, 1728, 1728, 1728, 1728, 1680, 1712, 1728, 1696, 1728, 1728, 1728, 1728, 1696, 1728, \ + 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1728, 1696, 1728, 1712, 1712, 1712, 1712, 1728, 1712, 1712, 1712, 1728, 1712, 1728, 1728, 1728, \ + 1728, 1712, 1728, 1728, 1712, 1728, 1728, 1712, 1712, 1728, 1712, 1712, 1728, 1728, 1712, 1728, 1712, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1712, \ + 1728, 1712, 1712, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, \ + 1728, 1728, 1712, 1728, 1728, 1824, 1728, 1728, 1728, 1728, 1824, 1728, 1840, 1728, 1728, 1728, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1728, 1728, 1856, 1856, 1856, 1856, 1728, \ + 1728, 1856, 1856, 1728, 1856, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1856, 1856, 1856, 1840, 1856, 1856, 1856, 1856, 1856, 1856, 1856, 1840, 1904, 1856, 1856, 1840, 1840, 1856, \ + 1856, 1840, 1856, 1840, 1856, 1856, 1856, 1856, 1952, 1856, 1856, 1856, 1856, 1952, 1904, 1904, 1856, 1856, 1856, 1920, 1952, 2001, 1952, 1984, 1952, 1936, 1952, 1904, 1968, 1920, \ + 1984, 1920, 1968, 1920, 1936, 1856, 2000, 1920, 1936, 1952, 2000, 1968, 1984, 1968, 1984, 2000, 1952, 2000, 2016, 1984, 2000, 2016, 1984, 1664, 2016, 1984, 2016, 1968, 2016, 2016, \ + 1744, 2016, 2016, 1968, 2000, 1728, 1712, 1696 + +#define FLINT_MPN_SQRHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 31, 0, 31, \ + 0, 32, 0, 34, 0, 36, 36, 40, 0, 40, 40, 40, 0, 44, 0, 44, 44, 48, 0, 52, 48, 52, 44, 44, 48, 48, 48, 48, 48, 48, \ + 48, 48, 48, 48, 52, 52, 52, 52, 52, 56, 56, 52, 56, 56, 56, 56, 56, 56, 56, 56, 60, 60, 68, 64, 60, 60, 60, 60, 64, 64, \ + 64, 68, 68, 68, 64, 64, 68, 68, 72, 72, 68, 68, 68, 72, 76, 80, 72, 72, 72, 88, 76, 76, 80, 80, 76, 80, 76, 80, 84, 80, \ + 88, 88, 80, 84, 88, 80, 80, 84, 92, 88, 92, 88, 88, 88, 88, 96, 88, 108, 100, 92, 88, 88, 104, 100, 100, 92, 104, 108, 100, 92, \ + 104, 100, 104, 96, 108, 104, 96, 96, 104, 100, 100, 104, 112, 116, 108, 104, 104, 116, 108, 104, 104, 120, 116, 104, 108, 132, 116, 108, 120, 108, \ + 108, 108, 132, 108, 120, 112, 112, 116, 132, 128, 116, 124, 128, 116, 132, 120, 132, 120, 124, 120, 132, 120, 124, 128, 120, 128, 128, 132, 144, 124, \ + 128, 124, 140, 128, 128, 124, 136, 132, 128, 128, 140, 144, 128, 128, 140, 136, 132, 144, 148, 152, 144, 132, 160, 156, 140, 144, 156, 144, 140, 144, \ + 140, 156, 156, 156, 140, 144, 168, 156, 156, 164, 168, 156, 156, 160, 144, 144, 180, 156, 152, 168, 156, 160, 156, 148, 180, 168, 180, 156, 164, 156, \ + 156, 172, 156, 156, 156, 180, 180, 172, 180, 168, 164, 172, 164, 172, 176, 176, 168, 176, 172, 176, 180, 168, 176, 180, 180, 180, 192, 184, 180, 176, \ + 204, 176, 188, 180, 188, 180, 204, 180, 192, 204, 180, 192, 180, 204, 228, 192, 188, 192, 204, 180, 192, 216, 200, 216, 228, 216, 204, 216, 188, 216, \ + 216, 204, 216, 192, 204, 212, 228, 204, 228, 216, 204, 216, 192, 204, 204, 212, 204, 216, 204, 216, 228, 228, 224, 212, 204, 216, 216, 224, 228, 212, \ + 216, 228, 212, 204, 204, 216, 216, 216, 228, 216, 220, 224, 228, 220, 228, 228, 236, 224, 260, 224, 228, 228, 260, 228, 248, 252, 264, 248, 216, 212, \ + 212, 212, 220, 216, 216, 220, 216, 224, 216, 228, 232, 224, 220, 224, 240, 244, 236, 244, 232, 256, 288, 240, 236, 288, 260, 260, 264, 232, 228, 228, \ + 228, 236, 236, 228, 236, 236, 236, 248, 256, 260, 232, 236, 264, 256, 260, 252, 284, 252, 264, 276, 244, 296, 244, 240, 244, 248, 240, 252, 256, 260, \ + 260, 252, 252, 272, 252, 272, 260, 296, 268, 260, 256, 276, 272, 264, 312, 284, 256, 252, 252, 252, 264, 252, 256, 260, 264, 276, 292, 268, 264, 276, \ + 264, 260, 264, 304, 288, 296, 296, 296, 288, 296, 272, 280, 264, 264, 272, 264, 288, 288, 280, 288, 272, 280, 296, 296, 280, 288, 296, 320, 344, 320, \ + 344, 272, 344, 344, 304, 288, 280, 280, 312, 280, 280, 304, 304, 312, 304, 296, 288, 328, 320, 352, 320, 320, 328, 360, 344, 344, 360, 288, 288, 304, \ + 288, 296, 320, 320, 312, 312, 304, 328, 336, 312, 312, 360, 336, 344, 344, 336, 360, 360, 296, 296, 304, 360, 328, 328, 312, 320, 320, 328, 312, 344, \ + 344, 328, 344, 344, 368, 360, 352, 360, 392, 392, 368, 320, 312, 392, 312, 328, 344, 336, 344, 328, 360, 352, 352, 360, 360, 360, 368, 408, 360, 376, \ + 392, 392, 376, 336, 344, 352, 360, 352, 344, 344, 384, 344, 360, 376, 392, 368, 360, 408, 408, 448, 432, 384, 392, 336, 360, 344, 360, 360, 368, 376, \ + 376, 360, 368, 408, 368, 376, 376, 376, 432, 376, 384, 464, 432, 344, 432, 376, 344, 344, 384, 344, 384, 376, 400, 432, 456, 432, 456, 392, 432, 392, \ + 448, 456, 360, 376, 456, 408, 384, 368, 376, 432, 376, 472, 464, 504, 448, 360, 408, 456, 376, 408, 424, 424, 448, 440, 456, 392, 408, 384, 408, 392, \ + 384, 416, 424, 432, 400, 472, 480, 408, 432, 432, 464, 456, 504, 464, 456, 472, 496, 416, 392, 424, 504, 400, 440, 432, 472, 448, 456, 456, 432, 448, \ + 456, 504, 504, 512, 496, 504, 424, 416, 408, 432, 544, 432, 440, 456, 448, 448, 464, 472, 480, 552, 552, 544, 552, 560, 552, 544, 544, 544, 552, 552, \ + 552, 552, 552, 552, 552, 560, 552, 552, 560, 560, 560, 560, 568, 576, 568, 592, 576, 560, 560, 560, 560, 568, 568, 576, 568, 568, 576, 568, 568, 576, \ + 568, 568, 600, 592, 576, 608, 576, 576, 576, 576, 584, 584, 576, 592, 592, 600, 584, 592, 584, 600, 600, 600, 584, 624, 584, 592, 600, 592, 616, 592, \ + 592, 592, 592, 592, 592, 592, 600, 608, 600, 608, 600, 600, 600, 624, 616, 608, 616, 600, 632, 608, 608, 608, 616, 608, 608, 608, 608, 616, 608, 664, \ + 664, 632, 656, 616, 664, 624, 616, 632, 632, 616, 624, 624, 624, 632, 624, 624, 624, 624, 648, 632, 624, 624, 872, 872, 656, 664, 696, 872, 872, 872, \ + 632, 872, 872, 896, 872, 896, 872, 896, 880, 872, 896, 872, 888, 872, 872, 872, 872, 872, 888, 872, 880, 824, 872, 856, 880, 888, 800, 848, 800, 880, \ + 848, 800, 808, 872, 872, 864, 824, 840, 872, 872, 872, 872, 872, 872, 872, 872, 872, 872, 880, 928, 872, 920, 872, 920, 888, 880, 872, 880, 872, 872, \ + 872, 888, 880, 888, 888, 872, 888, 872, 880, 896, 920, 920, 880, 928, 904, 872, 888, 904, 872, 872, 872, 872, 880, 888, 872, 872, 872, 880, 872, 904, \ + 920, 888, 888, 872, 880, 880, 888, 896, 896, 880, 872, 904, 880, 912, 896, 872, 904, 904, 912, 880, 880, 920, 912, 896, 928, 928, 872, 872, 872, 872, \ + 880, 880, 888, 888, 888, 912, 880, 880, 880, 912, 896, 896, 928, 896, 912, 912, 896, 928, 928, 928, 928, 912, 880, 928, 880, 880, 896, 880, 896, 880, \ + 896, 880, 880, 912, 912, 880, 896, 880, 896, 912, 880, 880, 928, 912, 912, 928, 928, 896, 928, 912, 928, 928, 928, 928, 928, 928, 880, 880, 880, 880, \ + 880, 912, 896, 912, 912, 896, 928, 896, 928, 896, 928, 928, 928, 928, 912, 880, 928, 912, 896, 928, 896, 880, 912, 896, 912, 896, 896, 896, 928, 896, \ + 912, 928, 912, 928, 912, 928, 912, 880, 928, 928, 880, 928, 928, 880, 896, 928, 880, 896, 912, 928, 928, 896, 912, 928, 912, 928, 1024, 928, 928, 1040, \ + 912, 928, 1024, 1024, 928, 928, 928, 928, 912, 1040, 1072, 912, 1072, 1072, 1056, 1040, 1072, 1088, 1104, 1040, 1040, 928, 1024, 928, 928, 1056, 928, 928, 1056, 1040, \ + 928, 1056, 1024, 1056, 1056, 1056, 1104, 1024, 1056, 1072, 1056, 1056, 1088, 1072, 1120, 1104, 1072, 1072, 1072, 1104, 1120, 1024, 1120, 1120, 1056, 1024, 1040, 1024, 1040, 1040, \ + 1040, 1024, 1040, 1056, 1040, 1088, 1040, 1056, 1120, 1040, 1040, 1072, 1072, 1088, 1104, 1088, 1104, 1088, 1088, 1056, 1120, 1104, 1152, 1104, 1088, 1120, 1104, 1120, 1024, 1120, \ + 1152, 1136, 1152, 1056, 1056, 1072, 1056, 1152, 1088, 1056, 1104, 1072, 1104, 1088, 1120, 1072, 1104, 1088, 1088, 1120, 1120, 1120, 1152, 1136, 1120, 1120, 1152, 1120, 1136, 1152, \ + 1152, 1072, 1136, 1136, 1088, 1152, 1136, 1136, 1104, 1056, 1072, 1088, 1104, 1104, 1120, 1120, 1104, 1104, 1104, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1136, 1072, \ + 1152, 1136, 1152, 1152, 1104, 1152, 1104, 1152, 1152, 1088, 1120, 1152, 1136, 1136, 1120, 1136, 1120, 1152, 1136, 1136, 1120, 1136, 1152, 1136, 1152, 1136, 1136, 1120, 1152, 1152, \ + 1104, 1152, 1152, 1152, 1104, 1120, 1104, 1120, 1120, 1120, 1120, 1136, 1136, 1136, 1136, 1152, 1120, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1104, \ + 1120, 1152, 1136, 1136, 1136, 1136, 1136, 1136, 1120, 1152, 1152, 1152, 1136, 1104, 1152, 1136, 1136, 1152, 1136, 1120, 1136, 1152, 1152, 1136, 1136, 1120, 1136, 1136, 1136, 1120, \ + 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1104, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1152, \ + 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, \ + 1120, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1136, \ + 1152, 1152, 1152, 1104, 1120, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1152, 1120, 1152, 1136, 1152, 1152, 1120, 1152, 1120, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1120, 1152, 1152, 1136, 1152, 1136, 1552, 1552, 1136, 1136, 1152, 1552, 1136, 1136, 1584, 1552, 1552, 1552, \ + 1152, 1552, 1552, 1152, 1584, 1552, 1152, 1152, 1552, 1584, 1152, 1152, 1552, 1584, 1568, 1552, 1552, 1552, 1152, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1625, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1568, 1632, 1552, 1632, 1568, 1584, 1568, 1568, 1584, 1600, 1552, 1552, 1632, 1600, 1632, 1632, 1632, \ + 1632, 1568, 1584, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1552, 1662, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1568, 1600, 1632, 1552, 1616, 1632, 1632, 1632, 1584, 1632, \ + 1632, 1552, 1632, 1632, 1632, 1568, 1600, 1632, 1648, 1616, 1632, 1648, 1632, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1632, 1664, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1648, 1648, 1648, 1616, 1632, 1680, 1632, 1680, 1648, 1632, 1664, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1648, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1648, 1664, 1648, 1648, 1632, 1648, 1632, 1680, 1664, 1648, 1680, 1664, 1696, 1632, 1664, 1680, 1632, 1648, 1696, \ + 1680, 1632, 1632, 1648, 1648, 1648, 1632, 1664, 1632, 1648, 1648, 1632, 1648, 1648, 1632, 1664, 1664, 1680, 1680, 1632, 1664, 1664, 1648, 1648, 1712, 1728, 1664, 1712, 1696, 1712, \ + 1680, 1632, 1728, 1648, 1728, 1728, 1712, 1648, 1632, 1632, 1664, 1648, 1664, 1664, 1680, 1712, 1664, 1632, 1696, 1664, 1712, 1696, 1680, 1680, 1696, 1680, 1680, 1712, 1648, 1712, \ + 1632, 1728, 1696, 1648, 1712, 1632, 1712, 1696, 1648, 1680, 1648, 1664, 1648, 1696, 1712, 1648, 1648, 1680, 1664, 1696, 1728, 1696, 1712, 1728, 1696, 1728, 1712, 1664, 1680, 1728, \ + 1712, 1712, 1712, 1728, 1728, 1712, 1728, 1696, 1728, 1712, 1696, 1728, 1728, 1680, 1728, 1712, 1664, 1680, 1680, 1696, 1712, 1696, 1696, 1696, 1728, 1696, 1728, 1728, 1696, 1712, \ + 1712, 1664, 1712, 1680, 1664, 1728, 1728, 1664, 1696, 1696, 1680, 1712, 1680, 1712, 1696, 1728, 1696, 1696, 1728, 1696, 1712, 1696, 1712, 1712, 1712, 1728, 1712, 1696, 1728, 1680, \ + 1696, 1712, 1712, 1728, 1712, 1728, 1696, 1728, 1776, 1728, 1696, 1776, 1728, 1728, 1712, 1712, 1824, 1856, 1728, 1824, 1728, 1728, 1712, 1728, 1728, 1728, 1856, 1696, 1728, 1840, \ + 1712, 1712, 1824, 1856, 1792, 1712, 1840, 1728, 1808, 1728, 1824, 1824, 1840, 1824, 1824, 1856, 1856, 1824, 1856, 1840, 1696, 1856, 1840, 1840, 1856, 1856, 1824, 1712, 1792, 1856, \ + 1824, 1728, 1808, 1792, 1856, 1728, 1792, 1840, 1808, 1808, 1712, 1808, 1840, 1808, 1824, 1824, 1824, 1840, 1824, 1840, 1856, 1824, 1728, 1856, 1856, 1824, 1856, 1856, 1792, 1792, \ + 1856, 1824, 1856, 1824, 1824, 1856, 1808, 1824, 1856, 1856, 1840, 1840, 1840, 1840, 1856, 1840, 1840, 1856, 1824, 1840, 1808, 1824, 1840, 1856, 1856, 1824, 1856, 1840, 1840, 1840, \ + 1824, 1824, 1840, 1840, 1840, 1856, 1856, 1856 + #endif diff --git a/src/mpn_extras/x86_64/skylake/flint-mparam.h b/src/mpn_extras/x86_64/skylake/flint-mparam.h index 4ddf1efd71..6e8652f1cd 100644 --- a/src/mpn_extras/x86_64/skylake/flint-mparam.h +++ b/src/mpn_extras/x86_64/skylake/flint-mparam.h @@ -27,4 +27,156 @@ #define FFT_N_NUM 19 #define FFT_MULMOD_2EXPP1_CUTOFF 128 +/* FIXME: This tuning is for x86_64_adx with fft_small */ +/* NOTE: We assume that the same cutoff is optimal for both mulhigh and mullow */ +#define FLINT_MPN_MULHIGH_MULDERS_CUTOFF 50 +#define FLINT_MPN_MULHIGH_MUL_CUTOFF 2000 +#define FLINT_MPN_MULHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_SQRHIGH_MULDERS_CUTOFF 90 +#define FLINT_MPN_SQRHIGH_SQR_CUTOFF 2000 +#define FLINT_MPN_SQRHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_MULHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 16, 15, 15, 18, 18, \ + 18, 19, 20, 18, 22, 22, 20, 20, 26, 22, 22, 22, 24, 24, 24, 26, 25, 26, 30, 30, 28, 30, 31, 32, 32, 30, 36, 36, 36, 36, \ + 38, 39, 39, 38, 39, 40, 40, 40, 44, 40, 44, 44, 40, 44, 44, 48, 44, 48, 44, 48, 48, 52, 52, 52, 44, 52, 52, 52, 52, 56, \ + 60, 60, 52, 60, 60, 52, 52, 60, 64, 72, 56, 60, 72, 60, 60, 60, 76, 64, 60, 60, 72, 60, 72, 80, 72, 72, 80, 72, 68, 76, \ + 88, 76, 68, 76, 72, 72, 80, 88, 72, 72, 88, 72, 80, 76, 76, 80, 80, 88, 80, 88, 84, 88, 80, 96, 80, 80, 88, 80, 88, 88, \ + 80, 88, 96, 96, 88, 96, 92, 96, 96, 92, 100, 88, 96, 104, 88, 108, 96, 104, 104, 104, 112, 112, 108, 104, 104, 112, 112, 120, 104, 112, \ + 120, 112, 112, 120, 124, 124, 116, 124, 108, 120, 124, 116, 120, 120, 116, 120, 124, 120, 120, 140, 120, 120, 120, 120, 144, 120, 132, 144, 136, 140, \ + 144, 144, 144, 144, 144, 144, 144, 144, 140, 156, 140, 140, 144, 144, 144, 160, 144, 144, 156, 156, 144, 160, 160, 160, 160, 152, 160, 156, 156, 156, \ + 160, 160, 144, 160, 164, 156, 156, 156, 172, 156, 156, 160, 176, 160, 160, 164, 176, 156, 160, 160, 156, 156, 160, 160, 156, 160, 172, 160, 188, 172, \ + 172, 172, 160, 172, 176, 160, 160, 176, 180, 176, 164, 188, 192, 176, 172, 188, 188, 188, 172, 188, 192, 188, 180, 192, 192, 188, 188, 192, 188, 188, \ + 188, 188, 192, 160, 156, 204, 160, 164, 164, 164, 164, 176, 180, 168, 172, 184, 188, 200, 216, 188, 164, 188, 220, 188, 208, 176, 180, 188, 172, 188, \ + 184, 188, 204, 208, 220, 196, 220, 196, 208, 212, 188, 220, 176, 176, 184, 192, 208, 184, 188, 196, 204, 244, 208, 212, 212, 228, 256, 188, 204, 196, \ + 188, 192, 192, 192, 212, 188, 292, 212, 220, 236, 228, 248, 260, 224, 264, 196, 200, 196, 212, 208, 204, 216, 208, 228, 216, 220, 252, 220, 268, 264, \ + 284, 268, 300, 220, 208, 212, 220, 236, 244, 224, 252, 252, 260, 264, 256, 256, 292, 272, 288, 292, 328, 224, 256, 236, 252, 268, 256, 252, 260, 272, \ + 284, 296, 300, 280, 300, 284, 252, 236, 328, 324, 264, 264, 256, 264, 280, 268, 284, 284, 292, 304, 260, 304, 264, 256, 328, 328, 260, 276, 328, 284, \ + 276, 296, 300, 320, 320, 304, 328, 304, 272, 268, 280, 268, 288, 292, 288, 284, 316, 288, 328, 328, 300, 328, 328, 280, 264, 328, 300, 328, 316, 324, \ + 300, 324, 300, 324, 316, 316, 328, 348, 276, 376, 288, 296, 296, 304, 320, 316, 328, 328, 324, 328, 340, 384, 348, 376, 300, 396, 304, 300, 304, 324, \ + 300, 324, 328, 328, 328, 440, 448, 384, 376, 456, 464, 384, 376, 472, 480, 376, 352, 328, 376, 352, 376, 392, 392, 384, 456, 456, 480, 448, 456, 456, \ + 472, 472, 472, 352, 464, 472, 472, 472, 480, 440, 480, 480, 480, 480, 456, 472, 472, 464, 464, 464, 456, 472, 480, 472, 480, 480, 480, 480, 448, 456, \ + 480, 448, 456, 464, 456, 464, 456, 480, 472, 464, 464, 472, 472, 472, 480, 472, 480, 480, 472, 480, 480, 480, 480, 464, 464, 464, 456, 472, 464, 480, \ + 472, 472, 480, 472, 480, 480, 464, 464, 472, 464, 472, 472, 480, 464, 480, 472, 480, 480, 576, 576, 560, 480, 472, 480, 568, 480, 480, 464, 480, 472, \ + 480, 576, 480, 552, 560, 560, 560, 560, 568, 560, 560, 576, 576, 560, 568, 472, 480, 480, 544, 568, 552, 544, 560, 544, 560, 568, 552, 576, 568, 560, \ + 576, 576, 568, 576, 560, 576, 568, 536, 576, 568, 560, 544, 560, 552, 560, 568, 560, 576, 568, 560, 560, 560, 568, 576, 568, 576, 576, 576, 576, 544, \ + 576, 576, 568, 576, 560, 576, 576, 576, 544, 552, 568, 576, 552, 560, 576, 560, 568, 560, 576, 560, 544, 576, 576, 576, 576, 568, 576, 568, 560, 576, \ + 552, 552, 576, 560, 568, 568, 568, 576, 576, 576, 560, 552, 576, 560, 568, 560, 576, 560, 568, 560, 568, 568, 568, 576, 552, 576, 560, 576, 576, 560, \ + 568, 576, 568, 576, 576, 576, 576, 560, 576, 568, 568, 568, 560, 560, 576, 576, 568, 568, 576, 560, 576, 576, 568, 576, 560, 576, 576, 568, 576, 568, \ + 576, 568, 576, 576, 568, 576, 576, 576, 576, 568, 576, 576, 568, 568, 576, 576, 784, 576, 776, 576, 568, 576, 576, 576, 576, 576, 576, 576, 776, 776, \ + 776, 776, 776, 776, 776, 784, 776, 776, 784, 776, 776, 776, 800, 776, 776, 776, 776, 776, 776, 800, 776, 808, 792, 800, 776, 792, 776, 776, 776, 776, \ + 792, 776, 776, 784, 792, 784, 800, 776, 784, 808, 784, 776, 776, 776, 808, 784, 792, 776, 792, 832, 800, 800, 816, 792, 816, 816, 856, 808, 848, 824, \ + 870, 832, 792, 776, 784, 784, 784, 784, 800, 792, 800, 792, 784, 800, 800, 800, 816, 824, 824, 824, 832, 816, 816, 832, 824, 824, 848, 832, 856, 856, \ + 840, 872, 864, 872, 872, 880, 880, 880, 872, 888, 880, 880, 872, 880, 880, 880, 840, 872, 872, 848, 880, 848, 856, 840, 848, 840, 880, 872, 856, 872, \ + 856, 888, 880, 872, 888, 880, 872, 920, 888, 872, 880, 872, 888, 888, 888, 880, 880, 928, 880, 928, 928, 928, 920, 920, 904, 912, 880, 904, 928, 872, \ + 872, 880, 888, 880, 896, 880, 872, 896, 888, 896, 896, 928, 904, 896, 896, 912, 904, 904, 920, 880, 912, 920, 928, 928, 880, 920, 920, 880, 888, 904, \ + 896, 904, 928, 896, 912, 896, 912, 920, 912, 912, 928, 928, 928, 920, 928, 928, 928, 928, 928, 928, 912, 904, 912, 896, 904, 904, 920, 920, 920, 928, \ + 928, 920, 928, 928, 928, 912, 928, 912, 928, 928, 928, 912, 912, 912, 928, 928, 928, 896, 928, 928, 912, 928, 928, 928, 912, 928, 912, 928, 928, 912, \ + 928, 912, 928, 928, 928, 928, 928, 912, 928, 928, 928, 928, 912, 912, 928, 912, 928, 1024, 928, 928, 928, 928, 928, 928, 1056, 912, 928, 928, 1024, 1024, \ + 928, 928, 1024, 928, 928, 928, 928, 928, 928, 1040, 1040, 928, 1056, 1024, 1072, 1024, 1040, 1040, 1040, 1024, 1088, 1056, 1056, 1088, 1040, 1056, 1072, 1072, 1056, 1056, \ + 1024, 1088, 1040, 1024, 1040, 1040, 1024, 1056, 1056, 1056, 1040, 1072, 1056, 1040, 1056, 1056, 1056, 1056, 1056, 1056, 1056, 1120, 1056, 1088, 1056, 1120, 1088, 1072, 1104, 1104, \ + 1104, 1120, 1088, 1088, 1072, 1088, 1120, 1104, 1088, 1104, 1088, 1072, 1104, 1088, 1120, 1088, 1072, 1072, 1072, 1088, 1088, 1072, 1072, 1088, 1104, 1152, 1104, 1104, 1088, 1104, \ + 1136, 1088, 1104, 1152, 1152, 1152, 1136, 1120, 1136, 1152, 1120, 1152, 1088, 1120, 1104, 1120, 1136, 1104, 1136, 1088, 1136, 1104, 1088, 1104, 1120, 1104, 1104, 1120, 1136, 1136, \ + 1120, 1136, 1136, 1136, 1120, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1104, 1152, 1120, 1136, 1120, 1120, 1152, 1120, 1136, 1152, 1136, 1152, 1120, 1152, 1136, 1136, 1136, 1136, \ + 1136, 1152, 1152, 1152, 1152, 1120, 1120, 1152, 1136, 1136, 1136, 1152, 1152, 1120, 1152, 1152, 1152, 1152, 1152, 1104, 1152, 1152, 1120, 1136, 1152, 1120, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1344, 1152, 1344, 1344, 1344, 1152, 1152, 1152, 1152, 1344, 1328, 1328, 1328, 1152, 1344, 1152, 1344, 1152, 1344, 1152, 1152, 1328, 1152, 1328, 1344, 1328, 1344, 1328, 1344, \ + 1344, 1312, 1328, 1328, 1328, 1344, 1344, 1344, 1328, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, \ + 1328, 1344, 1344, 1344, 1344, 1328, 1344, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1573, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1583, 1568, 1568, 1552, 1568, 1552, 1568, \ + 1568, 1568, 1584, 1568, 1584, 1568, 1568, 1568, 1552, 1552, 1552, 1568, 1552, 1584, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1584, 1552, 1584, 1552, 1617, 1568, 1584, \ + 1552, 1552, 1584, 1584, 1552, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1632, 1636, 1632, 1638, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1648, 1669, 1648, 1671, 1632, 1664, 1648, 1632, 1648, 1648, 1648, 1632, \ + 1632, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1648, 1664, 1648, 1648, 1648, 1648, 1648, 1648, \ + 1680, 1664, 1680, 1696, 1680, 1680, 1664, 1680, 1680, 1648, 1632, 1680, 1696, 1632, 1648, 1648, 1632, 1680, 1680, 1664, 1664, 1664, 1648, 1680, 1664, 1680, 1664, 1680, 1664, 1664, \ + 1680, 1696, 1664, 1696, 1712, 1712, 1696, 1680, 1712, 1696, 1728, 1712, 1696, 1728, 1728, 1712, 1728, 1648, 1680, 1696, 1712, 1696, 1712, 1696, 1696, 1680, 1696, 1696, 1696, 1712, \ + 1696, 1696, 1696, 1696, 1712, 1728, 1696, 1728, 1696, 1696, 1712, 1728, 1712, 1728, 1712, 1680, 1696, 1728, 1712, 1696, 1696, 1696, 1712, 1712, 1728, 1696, 1728, 1712, 1712, 1728, \ + 1696, 1696, 1696, 1712, 1696, 1728, 1712, 1712, 1712, 1728, 1696, 1712, 1728, 1728, 1696, 1728, 1728, 1728, 1728, 1728, 1680, 1712, 1728, 1696, 1728, 1728, 1728, 1728, 1696, 1728, \ + 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1728, 1696, 1728, 1712, 1712, 1712, 1712, 1728, 1712, 1712, 1712, 1728, 1712, 1728, 1728, 1728, \ + 1728, 1712, 1728, 1728, 1712, 1728, 1728, 1712, 1712, 1728, 1712, 1712, 1728, 1728, 1712, 1728, 1712, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1712, \ + 1728, 1712, 1712, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, \ + 1728, 1728, 1712, 1728, 1728, 1824, 1728, 1728, 1728, 1728, 1824, 1728, 1840, 1728, 1728, 1728, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1728, 1728, 1856, 1856, 1856, 1856, 1728, \ + 1728, 1856, 1856, 1728, 1856, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1856, 1856, 1856, 1840, 1856, 1856, 1856, 1856, 1856, 1856, 1856, 1840, 1904, 1856, 1856, 1840, 1840, 1856, \ + 1856, 1840, 1856, 1840, 1856, 1856, 1856, 1856, 1952, 1856, 1856, 1856, 1856, 1952, 1904, 1904, 1856, 1856, 1856, 1920, 1952, 2001, 1952, 1984, 1952, 1936, 1952, 1904, 1968, 1920, \ + 1984, 1920, 1968, 1920, 1936, 1856, 2000, 1920, 1936, 1952, 2000, 1968, 1984, 1968, 1984, 2000, 1952, 2000, 2016, 1984, 2000, 2016, 1984, 1664, 2016, 1984, 2016, 1968, 2016, 2016, \ + 1744, 2016, 2016, 1968, 2000, 1728, 1712, 1696 + +#define FLINT_MPN_SQRHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 31, 0, 31, \ + 0, 32, 0, 34, 0, 36, 36, 40, 0, 40, 40, 40, 0, 44, 0, 44, 44, 48, 0, 52, 48, 52, 44, 44, 48, 48, 48, 48, 48, 48, \ + 48, 48, 48, 48, 52, 52, 52, 52, 52, 56, 56, 52, 56, 56, 56, 56, 56, 56, 56, 56, 60, 60, 68, 64, 60, 60, 60, 60, 64, 64, \ + 64, 68, 68, 68, 64, 64, 68, 68, 72, 72, 68, 68, 68, 72, 76, 80, 72, 72, 72, 88, 76, 76, 80, 80, 76, 80, 76, 80, 84, 80, \ + 88, 88, 80, 84, 88, 80, 80, 84, 92, 88, 92, 88, 88, 88, 88, 96, 88, 108, 100, 92, 88, 88, 104, 100, 100, 92, 104, 108, 100, 92, \ + 104, 100, 104, 96, 108, 104, 96, 96, 104, 100, 100, 104, 112, 116, 108, 104, 104, 116, 108, 104, 104, 120, 116, 104, 108, 132, 116, 108, 120, 108, \ + 108, 108, 132, 108, 120, 112, 112, 116, 132, 128, 116, 124, 128, 116, 132, 120, 132, 120, 124, 120, 132, 120, 124, 128, 120, 128, 128, 132, 144, 124, \ + 128, 124, 140, 128, 128, 124, 136, 132, 128, 128, 140, 144, 128, 128, 140, 136, 132, 144, 148, 152, 144, 132, 160, 156, 140, 144, 156, 144, 140, 144, \ + 140, 156, 156, 156, 140, 144, 168, 156, 156, 164, 168, 156, 156, 160, 144, 144, 180, 156, 152, 168, 156, 160, 156, 148, 180, 168, 180, 156, 164, 156, \ + 156, 172, 156, 156, 156, 180, 180, 172, 180, 168, 164, 172, 164, 172, 176, 176, 168, 176, 172, 176, 180, 168, 176, 180, 180, 180, 192, 184, 180, 176, \ + 204, 176, 188, 180, 188, 180, 204, 180, 192, 204, 180, 192, 180, 204, 228, 192, 188, 192, 204, 180, 192, 216, 200, 216, 228, 216, 204, 216, 188, 216, \ + 216, 204, 216, 192, 204, 212, 228, 204, 228, 216, 204, 216, 192, 204, 204, 212, 204, 216, 204, 216, 228, 228, 224, 212, 204, 216, 216, 224, 228, 212, \ + 216, 228, 212, 204, 204, 216, 216, 216, 228, 216, 220, 224, 228, 220, 228, 228, 236, 224, 260, 224, 228, 228, 260, 228, 248, 252, 264, 248, 216, 212, \ + 212, 212, 220, 216, 216, 220, 216, 224, 216, 228, 232, 224, 220, 224, 240, 244, 236, 244, 232, 256, 288, 240, 236, 288, 260, 260, 264, 232, 228, 228, \ + 228, 236, 236, 228, 236, 236, 236, 248, 256, 260, 232, 236, 264, 256, 260, 252, 284, 252, 264, 276, 244, 296, 244, 240, 244, 248, 240, 252, 256, 260, \ + 260, 252, 252, 272, 252, 272, 260, 296, 268, 260, 256, 276, 272, 264, 312, 284, 256, 252, 252, 252, 264, 252, 256, 260, 264, 276, 292, 268, 264, 276, \ + 264, 260, 264, 304, 288, 296, 296, 296, 288, 296, 272, 280, 264, 264, 272, 264, 288, 288, 280, 288, 272, 280, 296, 296, 280, 288, 296, 320, 344, 320, \ + 344, 272, 344, 344, 304, 288, 280, 280, 312, 280, 280, 304, 304, 312, 304, 296, 288, 328, 320, 352, 320, 320, 328, 360, 344, 344, 360, 288, 288, 304, \ + 288, 296, 320, 320, 312, 312, 304, 328, 336, 312, 312, 360, 336, 344, 344, 336, 360, 360, 296, 296, 304, 360, 328, 328, 312, 320, 320, 328, 312, 344, \ + 344, 328, 344, 344, 368, 360, 352, 360, 392, 392, 368, 320, 312, 392, 312, 328, 344, 336, 344, 328, 360, 352, 352, 360, 360, 360, 368, 408, 360, 376, \ + 392, 392, 376, 336, 344, 352, 360, 352, 344, 344, 384, 344, 360, 376, 392, 368, 360, 408, 408, 448, 432, 384, 392, 336, 360, 344, 360, 360, 368, 376, \ + 376, 360, 368, 408, 368, 376, 376, 376, 432, 376, 384, 464, 432, 344, 432, 376, 344, 344, 384, 344, 384, 376, 400, 432, 456, 432, 456, 392, 432, 392, \ + 448, 456, 360, 376, 456, 408, 384, 368, 376, 432, 376, 472, 464, 504, 448, 360, 408, 456, 376, 408, 424, 424, 448, 440, 456, 392, 408, 384, 408, 392, \ + 384, 416, 424, 432, 400, 472, 480, 408, 432, 432, 464, 456, 504, 464, 456, 472, 496, 416, 392, 424, 504, 400, 440, 432, 472, 448, 456, 456, 432, 448, \ + 456, 504, 504, 512, 496, 504, 424, 416, 408, 432, 544, 432, 440, 456, 448, 448, 464, 472, 480, 552, 552, 544, 552, 560, 552, 544, 544, 544, 552, 552, \ + 552, 552, 552, 552, 552, 560, 552, 552, 560, 560, 560, 560, 568, 576, 568, 592, 576, 560, 560, 560, 560, 568, 568, 576, 568, 568, 576, 568, 568, 576, \ + 568, 568, 600, 592, 576, 608, 576, 576, 576, 576, 584, 584, 576, 592, 592, 600, 584, 592, 584, 600, 600, 600, 584, 624, 584, 592, 600, 592, 616, 592, \ + 592, 592, 592, 592, 592, 592, 600, 608, 600, 608, 600, 600, 600, 624, 616, 608, 616, 600, 632, 608, 608, 608, 616, 608, 608, 608, 608, 616, 608, 664, \ + 664, 632, 656, 616, 664, 624, 616, 632, 632, 616, 624, 624, 624, 632, 624, 624, 624, 624, 648, 632, 624, 624, 872, 872, 656, 664, 696, 872, 872, 872, \ + 632, 872, 872, 896, 872, 896, 872, 896, 880, 872, 896, 872, 888, 872, 872, 872, 872, 872, 888, 872, 880, 824, 872, 856, 880, 888, 800, 848, 800, 880, \ + 848, 800, 808, 872, 872, 864, 824, 840, 872, 872, 872, 872, 872, 872, 872, 872, 872, 872, 880, 928, 872, 920, 872, 920, 888, 880, 872, 880, 872, 872, \ + 872, 888, 880, 888, 888, 872, 888, 872, 880, 896, 920, 920, 880, 928, 904, 872, 888, 904, 872, 872, 872, 872, 880, 888, 872, 872, 872, 880, 872, 904, \ + 920, 888, 888, 872, 880, 880, 888, 896, 896, 880, 872, 904, 880, 912, 896, 872, 904, 904, 912, 880, 880, 920, 912, 896, 928, 928, 872, 872, 872, 872, \ + 880, 880, 888, 888, 888, 912, 880, 880, 880, 912, 896, 896, 928, 896, 912, 912, 896, 928, 928, 928, 928, 912, 880, 928, 880, 880, 896, 880, 896, 880, \ + 896, 880, 880, 912, 912, 880, 896, 880, 896, 912, 880, 880, 928, 912, 912, 928, 928, 896, 928, 912, 928, 928, 928, 928, 928, 928, 880, 880, 880, 880, \ + 880, 912, 896, 912, 912, 896, 928, 896, 928, 896, 928, 928, 928, 928, 912, 880, 928, 912, 896, 928, 896, 880, 912, 896, 912, 896, 896, 896, 928, 896, \ + 912, 928, 912, 928, 912, 928, 912, 880, 928, 928, 880, 928, 928, 880, 896, 928, 880, 896, 912, 928, 928, 896, 912, 928, 912, 928, 1024, 928, 928, 1040, \ + 912, 928, 1024, 1024, 928, 928, 928, 928, 912, 1040, 1072, 912, 1072, 1072, 1056, 1040, 1072, 1088, 1104, 1040, 1040, 928, 1024, 928, 928, 1056, 928, 928, 1056, 1040, \ + 928, 1056, 1024, 1056, 1056, 1056, 1104, 1024, 1056, 1072, 1056, 1056, 1088, 1072, 1120, 1104, 1072, 1072, 1072, 1104, 1120, 1024, 1120, 1120, 1056, 1024, 1040, 1024, 1040, 1040, \ + 1040, 1024, 1040, 1056, 1040, 1088, 1040, 1056, 1120, 1040, 1040, 1072, 1072, 1088, 1104, 1088, 1104, 1088, 1088, 1056, 1120, 1104, 1152, 1104, 1088, 1120, 1104, 1120, 1024, 1120, \ + 1152, 1136, 1152, 1056, 1056, 1072, 1056, 1152, 1088, 1056, 1104, 1072, 1104, 1088, 1120, 1072, 1104, 1088, 1088, 1120, 1120, 1120, 1152, 1136, 1120, 1120, 1152, 1120, 1136, 1152, \ + 1152, 1072, 1136, 1136, 1088, 1152, 1136, 1136, 1104, 1056, 1072, 1088, 1104, 1104, 1120, 1120, 1104, 1104, 1104, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1136, 1072, \ + 1152, 1136, 1152, 1152, 1104, 1152, 1104, 1152, 1152, 1088, 1120, 1152, 1136, 1136, 1120, 1136, 1120, 1152, 1136, 1136, 1120, 1136, 1152, 1136, 1152, 1136, 1136, 1120, 1152, 1152, \ + 1104, 1152, 1152, 1152, 1104, 1120, 1104, 1120, 1120, 1120, 1120, 1136, 1136, 1136, 1136, 1152, 1120, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1104, \ + 1120, 1152, 1136, 1136, 1136, 1136, 1136, 1136, 1120, 1152, 1152, 1152, 1136, 1104, 1152, 1136, 1136, 1152, 1136, 1120, 1136, 1152, 1152, 1136, 1136, 1120, 1136, 1136, 1136, 1120, \ + 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1104, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1152, \ + 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, \ + 1120, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1136, \ + 1152, 1152, 1152, 1104, 1120, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1152, 1120, 1152, 1136, 1152, 1152, 1120, 1152, 1120, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1120, 1152, 1152, 1136, 1152, 1136, 1552, 1552, 1136, 1136, 1152, 1552, 1136, 1136, 1584, 1552, 1552, 1552, \ + 1152, 1552, 1552, 1152, 1584, 1552, 1152, 1152, 1552, 1584, 1152, 1152, 1552, 1584, 1568, 1552, 1552, 1552, 1152, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1625, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1568, 1632, 1552, 1632, 1568, 1584, 1568, 1568, 1584, 1600, 1552, 1552, 1632, 1600, 1632, 1632, 1632, \ + 1632, 1568, 1584, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1552, 1662, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1568, 1600, 1632, 1552, 1616, 1632, 1632, 1632, 1584, 1632, \ + 1632, 1552, 1632, 1632, 1632, 1568, 1600, 1632, 1648, 1616, 1632, 1648, 1632, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1632, 1664, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1648, 1648, 1648, 1616, 1632, 1680, 1632, 1680, 1648, 1632, 1664, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1648, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1648, 1664, 1648, 1648, 1632, 1648, 1632, 1680, 1664, 1648, 1680, 1664, 1696, 1632, 1664, 1680, 1632, 1648, 1696, \ + 1680, 1632, 1632, 1648, 1648, 1648, 1632, 1664, 1632, 1648, 1648, 1632, 1648, 1648, 1632, 1664, 1664, 1680, 1680, 1632, 1664, 1664, 1648, 1648, 1712, 1728, 1664, 1712, 1696, 1712, \ + 1680, 1632, 1728, 1648, 1728, 1728, 1712, 1648, 1632, 1632, 1664, 1648, 1664, 1664, 1680, 1712, 1664, 1632, 1696, 1664, 1712, 1696, 1680, 1680, 1696, 1680, 1680, 1712, 1648, 1712, \ + 1632, 1728, 1696, 1648, 1712, 1632, 1712, 1696, 1648, 1680, 1648, 1664, 1648, 1696, 1712, 1648, 1648, 1680, 1664, 1696, 1728, 1696, 1712, 1728, 1696, 1728, 1712, 1664, 1680, 1728, \ + 1712, 1712, 1712, 1728, 1728, 1712, 1728, 1696, 1728, 1712, 1696, 1728, 1728, 1680, 1728, 1712, 1664, 1680, 1680, 1696, 1712, 1696, 1696, 1696, 1728, 1696, 1728, 1728, 1696, 1712, \ + 1712, 1664, 1712, 1680, 1664, 1728, 1728, 1664, 1696, 1696, 1680, 1712, 1680, 1712, 1696, 1728, 1696, 1696, 1728, 1696, 1712, 1696, 1712, 1712, 1712, 1728, 1712, 1696, 1728, 1680, \ + 1696, 1712, 1712, 1728, 1712, 1728, 1696, 1728, 1776, 1728, 1696, 1776, 1728, 1728, 1712, 1712, 1824, 1856, 1728, 1824, 1728, 1728, 1712, 1728, 1728, 1728, 1856, 1696, 1728, 1840, \ + 1712, 1712, 1824, 1856, 1792, 1712, 1840, 1728, 1808, 1728, 1824, 1824, 1840, 1824, 1824, 1856, 1856, 1824, 1856, 1840, 1696, 1856, 1840, 1840, 1856, 1856, 1824, 1712, 1792, 1856, \ + 1824, 1728, 1808, 1792, 1856, 1728, 1792, 1840, 1808, 1808, 1712, 1808, 1840, 1808, 1824, 1824, 1824, 1840, 1824, 1840, 1856, 1824, 1728, 1856, 1856, 1824, 1856, 1856, 1792, 1792, \ + 1856, 1824, 1856, 1824, 1824, 1856, 1808, 1824, 1856, 1856, 1840, 1840, 1840, 1840, 1856, 1840, 1840, 1856, 1824, 1840, 1808, 1824, 1840, 1856, 1856, 1824, 1856, 1840, 1840, 1840, \ + 1824, 1824, 1840, 1840, 1840, 1856, 1856, 1856 + #endif diff --git a/src/mpn_extras/x86_64/zen3/flint-mparam.h b/src/mpn_extras/x86_64/zen3/flint-mparam.h index d1e50ebe30..373cdd7f32 100644 --- a/src/mpn_extras/x86_64/zen3/flint-mparam.h +++ b/src/mpn_extras/x86_64/zen3/flint-mparam.h @@ -27,4 +27,156 @@ #define FFT_N_NUM 19 #define FFT_MULMOD_2EXPP1_CUTOFF 128 +/* FIXME: This tuning is for x86_64_adx with fft_small */ +/* NOTE: We assume that the same cutoff is optimal for both mulhigh and mullow */ +#define FLINT_MPN_MULHIGH_MULDERS_CUTOFF 50 +#define FLINT_MPN_MULHIGH_MUL_CUTOFF 2000 +#define FLINT_MPN_MULHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_SQRHIGH_MULDERS_CUTOFF 90 +#define FLINT_MPN_SQRHIGH_SQR_CUTOFF 2000 +#define FLINT_MPN_SQRHIGH_K_TAB_SIZE 2048 + +#define FLINT_MPN_MULHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 14, 14, 16, 15, 15, 18, 18, \ + 18, 19, 20, 18, 22, 22, 20, 20, 26, 22, 22, 22, 24, 24, 24, 26, 25, 26, 30, 30, 28, 30, 31, 32, 32, 30, 36, 36, 36, 36, \ + 38, 39, 39, 38, 39, 40, 40, 40, 44, 40, 44, 44, 40, 44, 44, 48, 44, 48, 44, 48, 48, 52, 52, 52, 44, 52, 52, 52, 52, 56, \ + 60, 60, 52, 60, 60, 52, 52, 60, 64, 72, 56, 60, 72, 60, 60, 60, 76, 64, 60, 60, 72, 60, 72, 80, 72, 72, 80, 72, 68, 76, \ + 88, 76, 68, 76, 72, 72, 80, 88, 72, 72, 88, 72, 80, 76, 76, 80, 80, 88, 80, 88, 84, 88, 80, 96, 80, 80, 88, 80, 88, 88, \ + 80, 88, 96, 96, 88, 96, 92, 96, 96, 92, 100, 88, 96, 104, 88, 108, 96, 104, 104, 104, 112, 112, 108, 104, 104, 112, 112, 120, 104, 112, \ + 120, 112, 112, 120, 124, 124, 116, 124, 108, 120, 124, 116, 120, 120, 116, 120, 124, 120, 120, 140, 120, 120, 120, 120, 144, 120, 132, 144, 136, 140, \ + 144, 144, 144, 144, 144, 144, 144, 144, 140, 156, 140, 140, 144, 144, 144, 160, 144, 144, 156, 156, 144, 160, 160, 160, 160, 152, 160, 156, 156, 156, \ + 160, 160, 144, 160, 164, 156, 156, 156, 172, 156, 156, 160, 176, 160, 160, 164, 176, 156, 160, 160, 156, 156, 160, 160, 156, 160, 172, 160, 188, 172, \ + 172, 172, 160, 172, 176, 160, 160, 176, 180, 176, 164, 188, 192, 176, 172, 188, 188, 188, 172, 188, 192, 188, 180, 192, 192, 188, 188, 192, 188, 188, \ + 188, 188, 192, 160, 156, 204, 160, 164, 164, 164, 164, 176, 180, 168, 172, 184, 188, 200, 216, 188, 164, 188, 220, 188, 208, 176, 180, 188, 172, 188, \ + 184, 188, 204, 208, 220, 196, 220, 196, 208, 212, 188, 220, 176, 176, 184, 192, 208, 184, 188, 196, 204, 244, 208, 212, 212, 228, 256, 188, 204, 196, \ + 188, 192, 192, 192, 212, 188, 292, 212, 220, 236, 228, 248, 260, 224, 264, 196, 200, 196, 212, 208, 204, 216, 208, 228, 216, 220, 252, 220, 268, 264, \ + 284, 268, 300, 220, 208, 212, 220, 236, 244, 224, 252, 252, 260, 264, 256, 256, 292, 272, 288, 292, 328, 224, 256, 236, 252, 268, 256, 252, 260, 272, \ + 284, 296, 300, 280, 300, 284, 252, 236, 328, 324, 264, 264, 256, 264, 280, 268, 284, 284, 292, 304, 260, 304, 264, 256, 328, 328, 260, 276, 328, 284, \ + 276, 296, 300, 320, 320, 304, 328, 304, 272, 268, 280, 268, 288, 292, 288, 284, 316, 288, 328, 328, 300, 328, 328, 280, 264, 328, 300, 328, 316, 324, \ + 300, 324, 300, 324, 316, 316, 328, 348, 276, 376, 288, 296, 296, 304, 320, 316, 328, 328, 324, 328, 340, 384, 348, 376, 300, 396, 304, 300, 304, 324, \ + 300, 324, 328, 328, 328, 440, 448, 384, 376, 456, 464, 384, 376, 472, 480, 376, 352, 328, 376, 352, 376, 392, 392, 384, 456, 456, 480, 448, 456, 456, \ + 472, 472, 472, 352, 464, 472, 472, 472, 480, 440, 480, 480, 480, 480, 456, 472, 472, 464, 464, 464, 456, 472, 480, 472, 480, 480, 480, 480, 448, 456, \ + 480, 448, 456, 464, 456, 464, 456, 480, 472, 464, 464, 472, 472, 472, 480, 472, 480, 480, 472, 480, 480, 480, 480, 464, 464, 464, 456, 472, 464, 480, \ + 472, 472, 480, 472, 480, 480, 464, 464, 472, 464, 472, 472, 480, 464, 480, 472, 480, 480, 576, 576, 560, 480, 472, 480, 568, 480, 480, 464, 480, 472, \ + 480, 576, 480, 552, 560, 560, 560, 560, 568, 560, 560, 576, 576, 560, 568, 472, 480, 480, 544, 568, 552, 544, 560, 544, 560, 568, 552, 576, 568, 560, \ + 576, 576, 568, 576, 560, 576, 568, 536, 576, 568, 560, 544, 560, 552, 560, 568, 560, 576, 568, 560, 560, 560, 568, 576, 568, 576, 576, 576, 576, 544, \ + 576, 576, 568, 576, 560, 576, 576, 576, 544, 552, 568, 576, 552, 560, 576, 560, 568, 560, 576, 560, 544, 576, 576, 576, 576, 568, 576, 568, 560, 576, \ + 552, 552, 576, 560, 568, 568, 568, 576, 576, 576, 560, 552, 576, 560, 568, 560, 576, 560, 568, 560, 568, 568, 568, 576, 552, 576, 560, 576, 576, 560, \ + 568, 576, 568, 576, 576, 576, 576, 560, 576, 568, 568, 568, 560, 560, 576, 576, 568, 568, 576, 560, 576, 576, 568, 576, 560, 576, 576, 568, 576, 568, \ + 576, 568, 576, 576, 568, 576, 576, 576, 576, 568, 576, 576, 568, 568, 576, 576, 784, 576, 776, 576, 568, 576, 576, 576, 576, 576, 576, 576, 776, 776, \ + 776, 776, 776, 776, 776, 784, 776, 776, 784, 776, 776, 776, 800, 776, 776, 776, 776, 776, 776, 800, 776, 808, 792, 800, 776, 792, 776, 776, 776, 776, \ + 792, 776, 776, 784, 792, 784, 800, 776, 784, 808, 784, 776, 776, 776, 808, 784, 792, 776, 792, 832, 800, 800, 816, 792, 816, 816, 856, 808, 848, 824, \ + 870, 832, 792, 776, 784, 784, 784, 784, 800, 792, 800, 792, 784, 800, 800, 800, 816, 824, 824, 824, 832, 816, 816, 832, 824, 824, 848, 832, 856, 856, \ + 840, 872, 864, 872, 872, 880, 880, 880, 872, 888, 880, 880, 872, 880, 880, 880, 840, 872, 872, 848, 880, 848, 856, 840, 848, 840, 880, 872, 856, 872, \ + 856, 888, 880, 872, 888, 880, 872, 920, 888, 872, 880, 872, 888, 888, 888, 880, 880, 928, 880, 928, 928, 928, 920, 920, 904, 912, 880, 904, 928, 872, \ + 872, 880, 888, 880, 896, 880, 872, 896, 888, 896, 896, 928, 904, 896, 896, 912, 904, 904, 920, 880, 912, 920, 928, 928, 880, 920, 920, 880, 888, 904, \ + 896, 904, 928, 896, 912, 896, 912, 920, 912, 912, 928, 928, 928, 920, 928, 928, 928, 928, 928, 928, 912, 904, 912, 896, 904, 904, 920, 920, 920, 928, \ + 928, 920, 928, 928, 928, 912, 928, 912, 928, 928, 928, 912, 912, 912, 928, 928, 928, 896, 928, 928, 912, 928, 928, 928, 912, 928, 912, 928, 928, 912, \ + 928, 912, 928, 928, 928, 928, 928, 912, 928, 928, 928, 928, 912, 912, 928, 912, 928, 1024, 928, 928, 928, 928, 928, 928, 1056, 912, 928, 928, 1024, 1024, \ + 928, 928, 1024, 928, 928, 928, 928, 928, 928, 1040, 1040, 928, 1056, 1024, 1072, 1024, 1040, 1040, 1040, 1024, 1088, 1056, 1056, 1088, 1040, 1056, 1072, 1072, 1056, 1056, \ + 1024, 1088, 1040, 1024, 1040, 1040, 1024, 1056, 1056, 1056, 1040, 1072, 1056, 1040, 1056, 1056, 1056, 1056, 1056, 1056, 1056, 1120, 1056, 1088, 1056, 1120, 1088, 1072, 1104, 1104, \ + 1104, 1120, 1088, 1088, 1072, 1088, 1120, 1104, 1088, 1104, 1088, 1072, 1104, 1088, 1120, 1088, 1072, 1072, 1072, 1088, 1088, 1072, 1072, 1088, 1104, 1152, 1104, 1104, 1088, 1104, \ + 1136, 1088, 1104, 1152, 1152, 1152, 1136, 1120, 1136, 1152, 1120, 1152, 1088, 1120, 1104, 1120, 1136, 1104, 1136, 1088, 1136, 1104, 1088, 1104, 1120, 1104, 1104, 1120, 1136, 1136, \ + 1120, 1136, 1136, 1136, 1120, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1104, 1152, 1120, 1136, 1120, 1120, 1152, 1120, 1136, 1152, 1136, 1152, 1120, 1152, 1136, 1136, 1136, 1136, \ + 1136, 1152, 1152, 1152, 1152, 1120, 1120, 1152, 1136, 1136, 1136, 1152, 1152, 1120, 1152, 1152, 1152, 1152, 1152, 1104, 1152, 1152, 1120, 1136, 1152, 1120, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1344, 1152, 1344, 1344, 1344, 1152, 1152, 1152, 1152, 1344, 1328, 1328, 1328, 1152, 1344, 1152, 1344, 1152, 1344, 1152, 1152, 1328, 1152, 1328, 1344, 1328, 1344, 1328, 1344, \ + 1344, 1312, 1328, 1328, 1328, 1344, 1344, 1344, 1328, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1344, 1328, 1344, \ + 1328, 1344, 1344, 1344, 1344, 1328, 1344, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1573, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1583, 1568, 1568, 1552, 1568, 1552, 1568, \ + 1568, 1568, 1584, 1568, 1584, 1568, 1568, 1568, 1552, 1552, 1552, 1568, 1552, 1584, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1584, 1552, 1584, 1552, 1617, 1568, 1584, \ + 1552, 1552, 1584, 1584, 1552, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1632, 1636, 1632, 1638, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1648, 1669, 1648, 1671, 1632, 1664, 1648, 1632, 1648, 1648, 1648, 1632, \ + 1632, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1632, 1648, 1664, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1648, 1664, 1648, 1648, 1648, 1648, 1648, 1648, \ + 1680, 1664, 1680, 1696, 1680, 1680, 1664, 1680, 1680, 1648, 1632, 1680, 1696, 1632, 1648, 1648, 1632, 1680, 1680, 1664, 1664, 1664, 1648, 1680, 1664, 1680, 1664, 1680, 1664, 1664, \ + 1680, 1696, 1664, 1696, 1712, 1712, 1696, 1680, 1712, 1696, 1728, 1712, 1696, 1728, 1728, 1712, 1728, 1648, 1680, 1696, 1712, 1696, 1712, 1696, 1696, 1680, 1696, 1696, 1696, 1712, \ + 1696, 1696, 1696, 1696, 1712, 1728, 1696, 1728, 1696, 1696, 1712, 1728, 1712, 1728, 1712, 1680, 1696, 1728, 1712, 1696, 1696, 1696, 1712, 1712, 1728, 1696, 1728, 1712, 1712, 1728, \ + 1696, 1696, 1696, 1712, 1696, 1728, 1712, 1712, 1712, 1728, 1696, 1712, 1728, 1728, 1696, 1728, 1728, 1728, 1728, 1728, 1680, 1712, 1728, 1696, 1728, 1728, 1728, 1728, 1696, 1728, \ + 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1728, 1696, 1728, 1712, 1712, 1712, 1712, 1728, 1712, 1712, 1712, 1728, 1712, 1728, 1728, 1728, \ + 1728, 1712, 1728, 1728, 1712, 1728, 1728, 1712, 1712, 1728, 1712, 1712, 1728, 1728, 1712, 1728, 1712, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1712, \ + 1728, 1712, 1712, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1712, 1712, 1728, 1728, 1728, 1712, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, 1728, \ + 1728, 1728, 1712, 1728, 1728, 1824, 1728, 1728, 1728, 1728, 1824, 1728, 1840, 1728, 1728, 1728, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1728, 1728, 1856, 1856, 1856, 1856, 1728, \ + 1728, 1856, 1856, 1728, 1856, 1728, 1856, 1728, 1840, 1856, 1856, 1840, 1856, 1856, 1856, 1840, 1856, 1856, 1856, 1856, 1856, 1856, 1856, 1840, 1904, 1856, 1856, 1840, 1840, 1856, \ + 1856, 1840, 1856, 1840, 1856, 1856, 1856, 1856, 1952, 1856, 1856, 1856, 1856, 1952, 1904, 1904, 1856, 1856, 1856, 1920, 1952, 2001, 1952, 1984, 1952, 1936, 1952, 1904, 1968, 1920, \ + 1984, 1920, 1968, 1920, 1936, 1856, 2000, 1920, 1936, 1952, 2000, 1968, 1984, 1968, 1984, 2000, 1952, 2000, 2016, 1984, 2000, 2016, 1984, 1664, 2016, 1984, 2016, 1968, 2016, 2016, \ + 1744, 2016, 2016, 1968, 2000, 1728, 1712, 1696 + +#define FLINT_MPN_SQRHIGH_K_TAB \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 31, 0, 31, \ + 0, 32, 0, 34, 0, 36, 36, 40, 0, 40, 40, 40, 0, 44, 0, 44, 44, 48, 0, 52, 48, 52, 44, 44, 48, 48, 48, 48, 48, 48, \ + 48, 48, 48, 48, 52, 52, 52, 52, 52, 56, 56, 52, 56, 56, 56, 56, 56, 56, 56, 56, 60, 60, 68, 64, 60, 60, 60, 60, 64, 64, \ + 64, 68, 68, 68, 64, 64, 68, 68, 72, 72, 68, 68, 68, 72, 76, 80, 72, 72, 72, 88, 76, 76, 80, 80, 76, 80, 76, 80, 84, 80, \ + 88, 88, 80, 84, 88, 80, 80, 84, 92, 88, 92, 88, 88, 88, 88, 96, 88, 108, 100, 92, 88, 88, 104, 100, 100, 92, 104, 108, 100, 92, \ + 104, 100, 104, 96, 108, 104, 96, 96, 104, 100, 100, 104, 112, 116, 108, 104, 104, 116, 108, 104, 104, 120, 116, 104, 108, 132, 116, 108, 120, 108, \ + 108, 108, 132, 108, 120, 112, 112, 116, 132, 128, 116, 124, 128, 116, 132, 120, 132, 120, 124, 120, 132, 120, 124, 128, 120, 128, 128, 132, 144, 124, \ + 128, 124, 140, 128, 128, 124, 136, 132, 128, 128, 140, 144, 128, 128, 140, 136, 132, 144, 148, 152, 144, 132, 160, 156, 140, 144, 156, 144, 140, 144, \ + 140, 156, 156, 156, 140, 144, 168, 156, 156, 164, 168, 156, 156, 160, 144, 144, 180, 156, 152, 168, 156, 160, 156, 148, 180, 168, 180, 156, 164, 156, \ + 156, 172, 156, 156, 156, 180, 180, 172, 180, 168, 164, 172, 164, 172, 176, 176, 168, 176, 172, 176, 180, 168, 176, 180, 180, 180, 192, 184, 180, 176, \ + 204, 176, 188, 180, 188, 180, 204, 180, 192, 204, 180, 192, 180, 204, 228, 192, 188, 192, 204, 180, 192, 216, 200, 216, 228, 216, 204, 216, 188, 216, \ + 216, 204, 216, 192, 204, 212, 228, 204, 228, 216, 204, 216, 192, 204, 204, 212, 204, 216, 204, 216, 228, 228, 224, 212, 204, 216, 216, 224, 228, 212, \ + 216, 228, 212, 204, 204, 216, 216, 216, 228, 216, 220, 224, 228, 220, 228, 228, 236, 224, 260, 224, 228, 228, 260, 228, 248, 252, 264, 248, 216, 212, \ + 212, 212, 220, 216, 216, 220, 216, 224, 216, 228, 232, 224, 220, 224, 240, 244, 236, 244, 232, 256, 288, 240, 236, 288, 260, 260, 264, 232, 228, 228, \ + 228, 236, 236, 228, 236, 236, 236, 248, 256, 260, 232, 236, 264, 256, 260, 252, 284, 252, 264, 276, 244, 296, 244, 240, 244, 248, 240, 252, 256, 260, \ + 260, 252, 252, 272, 252, 272, 260, 296, 268, 260, 256, 276, 272, 264, 312, 284, 256, 252, 252, 252, 264, 252, 256, 260, 264, 276, 292, 268, 264, 276, \ + 264, 260, 264, 304, 288, 296, 296, 296, 288, 296, 272, 280, 264, 264, 272, 264, 288, 288, 280, 288, 272, 280, 296, 296, 280, 288, 296, 320, 344, 320, \ + 344, 272, 344, 344, 304, 288, 280, 280, 312, 280, 280, 304, 304, 312, 304, 296, 288, 328, 320, 352, 320, 320, 328, 360, 344, 344, 360, 288, 288, 304, \ + 288, 296, 320, 320, 312, 312, 304, 328, 336, 312, 312, 360, 336, 344, 344, 336, 360, 360, 296, 296, 304, 360, 328, 328, 312, 320, 320, 328, 312, 344, \ + 344, 328, 344, 344, 368, 360, 352, 360, 392, 392, 368, 320, 312, 392, 312, 328, 344, 336, 344, 328, 360, 352, 352, 360, 360, 360, 368, 408, 360, 376, \ + 392, 392, 376, 336, 344, 352, 360, 352, 344, 344, 384, 344, 360, 376, 392, 368, 360, 408, 408, 448, 432, 384, 392, 336, 360, 344, 360, 360, 368, 376, \ + 376, 360, 368, 408, 368, 376, 376, 376, 432, 376, 384, 464, 432, 344, 432, 376, 344, 344, 384, 344, 384, 376, 400, 432, 456, 432, 456, 392, 432, 392, \ + 448, 456, 360, 376, 456, 408, 384, 368, 376, 432, 376, 472, 464, 504, 448, 360, 408, 456, 376, 408, 424, 424, 448, 440, 456, 392, 408, 384, 408, 392, \ + 384, 416, 424, 432, 400, 472, 480, 408, 432, 432, 464, 456, 504, 464, 456, 472, 496, 416, 392, 424, 504, 400, 440, 432, 472, 448, 456, 456, 432, 448, \ + 456, 504, 504, 512, 496, 504, 424, 416, 408, 432, 544, 432, 440, 456, 448, 448, 464, 472, 480, 552, 552, 544, 552, 560, 552, 544, 544, 544, 552, 552, \ + 552, 552, 552, 552, 552, 560, 552, 552, 560, 560, 560, 560, 568, 576, 568, 592, 576, 560, 560, 560, 560, 568, 568, 576, 568, 568, 576, 568, 568, 576, \ + 568, 568, 600, 592, 576, 608, 576, 576, 576, 576, 584, 584, 576, 592, 592, 600, 584, 592, 584, 600, 600, 600, 584, 624, 584, 592, 600, 592, 616, 592, \ + 592, 592, 592, 592, 592, 592, 600, 608, 600, 608, 600, 600, 600, 624, 616, 608, 616, 600, 632, 608, 608, 608, 616, 608, 608, 608, 608, 616, 608, 664, \ + 664, 632, 656, 616, 664, 624, 616, 632, 632, 616, 624, 624, 624, 632, 624, 624, 624, 624, 648, 632, 624, 624, 872, 872, 656, 664, 696, 872, 872, 872, \ + 632, 872, 872, 896, 872, 896, 872, 896, 880, 872, 896, 872, 888, 872, 872, 872, 872, 872, 888, 872, 880, 824, 872, 856, 880, 888, 800, 848, 800, 880, \ + 848, 800, 808, 872, 872, 864, 824, 840, 872, 872, 872, 872, 872, 872, 872, 872, 872, 872, 880, 928, 872, 920, 872, 920, 888, 880, 872, 880, 872, 872, \ + 872, 888, 880, 888, 888, 872, 888, 872, 880, 896, 920, 920, 880, 928, 904, 872, 888, 904, 872, 872, 872, 872, 880, 888, 872, 872, 872, 880, 872, 904, \ + 920, 888, 888, 872, 880, 880, 888, 896, 896, 880, 872, 904, 880, 912, 896, 872, 904, 904, 912, 880, 880, 920, 912, 896, 928, 928, 872, 872, 872, 872, \ + 880, 880, 888, 888, 888, 912, 880, 880, 880, 912, 896, 896, 928, 896, 912, 912, 896, 928, 928, 928, 928, 912, 880, 928, 880, 880, 896, 880, 896, 880, \ + 896, 880, 880, 912, 912, 880, 896, 880, 896, 912, 880, 880, 928, 912, 912, 928, 928, 896, 928, 912, 928, 928, 928, 928, 928, 928, 880, 880, 880, 880, \ + 880, 912, 896, 912, 912, 896, 928, 896, 928, 896, 928, 928, 928, 928, 912, 880, 928, 912, 896, 928, 896, 880, 912, 896, 912, 896, 896, 896, 928, 896, \ + 912, 928, 912, 928, 912, 928, 912, 880, 928, 928, 880, 928, 928, 880, 896, 928, 880, 896, 912, 928, 928, 896, 912, 928, 912, 928, 1024, 928, 928, 1040, \ + 912, 928, 1024, 1024, 928, 928, 928, 928, 912, 1040, 1072, 912, 1072, 1072, 1056, 1040, 1072, 1088, 1104, 1040, 1040, 928, 1024, 928, 928, 1056, 928, 928, 1056, 1040, \ + 928, 1056, 1024, 1056, 1056, 1056, 1104, 1024, 1056, 1072, 1056, 1056, 1088, 1072, 1120, 1104, 1072, 1072, 1072, 1104, 1120, 1024, 1120, 1120, 1056, 1024, 1040, 1024, 1040, 1040, \ + 1040, 1024, 1040, 1056, 1040, 1088, 1040, 1056, 1120, 1040, 1040, 1072, 1072, 1088, 1104, 1088, 1104, 1088, 1088, 1056, 1120, 1104, 1152, 1104, 1088, 1120, 1104, 1120, 1024, 1120, \ + 1152, 1136, 1152, 1056, 1056, 1072, 1056, 1152, 1088, 1056, 1104, 1072, 1104, 1088, 1120, 1072, 1104, 1088, 1088, 1120, 1120, 1120, 1152, 1136, 1120, 1120, 1152, 1120, 1136, 1152, \ + 1152, 1072, 1136, 1136, 1088, 1152, 1136, 1136, 1104, 1056, 1072, 1088, 1104, 1104, 1120, 1120, 1104, 1104, 1104, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1136, 1072, \ + 1152, 1136, 1152, 1152, 1104, 1152, 1104, 1152, 1152, 1088, 1120, 1152, 1136, 1136, 1120, 1136, 1120, 1152, 1136, 1136, 1120, 1136, 1152, 1136, 1152, 1136, 1136, 1120, 1152, 1152, \ + 1104, 1152, 1152, 1152, 1104, 1120, 1104, 1120, 1120, 1120, 1120, 1136, 1136, 1136, 1136, 1152, 1120, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1104, \ + 1120, 1152, 1136, 1136, 1136, 1136, 1136, 1136, 1120, 1152, 1152, 1152, 1136, 1104, 1152, 1136, 1136, 1152, 1136, 1120, 1136, 1152, 1152, 1136, 1136, 1120, 1136, 1136, 1136, 1120, \ + 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1104, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1152, \ + 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, \ + 1120, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, \ + 1136, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1136, \ + 1152, 1152, 1152, 1104, 1120, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1152, 1120, 1152, 1136, 1152, 1152, 1120, 1152, 1120, 1136, 1152, 1152, \ + 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1120, 1152, 1152, 1136, 1152, 1136, 1552, 1552, 1136, 1136, 1152, 1552, 1136, 1136, 1584, 1552, 1552, 1552, \ + 1152, 1552, 1552, 1152, 1584, 1552, 1152, 1152, 1552, 1584, 1152, 1152, 1552, 1584, 1568, 1552, 1552, 1552, 1152, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, \ + 1552, 1552, 1552, 1552, 1552, 1625, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1568, 1632, 1552, 1632, 1568, 1584, 1568, 1568, 1584, 1600, 1552, 1552, 1632, 1600, 1632, 1632, 1632, \ + 1632, 1568, 1584, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1552, 1662, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1568, 1600, 1632, 1552, 1616, 1632, 1632, 1632, 1584, 1632, \ + 1632, 1552, 1632, 1632, 1632, 1568, 1600, 1632, 1648, 1616, 1632, 1648, 1632, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1632, 1664, 1632, 1632, \ + 1632, 1632, 1632, 1632, 1632, 1648, 1648, 1648, 1616, 1632, 1680, 1632, 1680, 1648, 1632, 1664, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632, \ + 1648, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1648, 1664, 1648, 1648, 1632, 1648, 1632, 1680, 1664, 1648, 1680, 1664, 1696, 1632, 1664, 1680, 1632, 1648, 1696, \ + 1680, 1632, 1632, 1648, 1648, 1648, 1632, 1664, 1632, 1648, 1648, 1632, 1648, 1648, 1632, 1664, 1664, 1680, 1680, 1632, 1664, 1664, 1648, 1648, 1712, 1728, 1664, 1712, 1696, 1712, \ + 1680, 1632, 1728, 1648, 1728, 1728, 1712, 1648, 1632, 1632, 1664, 1648, 1664, 1664, 1680, 1712, 1664, 1632, 1696, 1664, 1712, 1696, 1680, 1680, 1696, 1680, 1680, 1712, 1648, 1712, \ + 1632, 1728, 1696, 1648, 1712, 1632, 1712, 1696, 1648, 1680, 1648, 1664, 1648, 1696, 1712, 1648, 1648, 1680, 1664, 1696, 1728, 1696, 1712, 1728, 1696, 1728, 1712, 1664, 1680, 1728, \ + 1712, 1712, 1712, 1728, 1728, 1712, 1728, 1696, 1728, 1712, 1696, 1728, 1728, 1680, 1728, 1712, 1664, 1680, 1680, 1696, 1712, 1696, 1696, 1696, 1728, 1696, 1728, 1728, 1696, 1712, \ + 1712, 1664, 1712, 1680, 1664, 1728, 1728, 1664, 1696, 1696, 1680, 1712, 1680, 1712, 1696, 1728, 1696, 1696, 1728, 1696, 1712, 1696, 1712, 1712, 1712, 1728, 1712, 1696, 1728, 1680, \ + 1696, 1712, 1712, 1728, 1712, 1728, 1696, 1728, 1776, 1728, 1696, 1776, 1728, 1728, 1712, 1712, 1824, 1856, 1728, 1824, 1728, 1728, 1712, 1728, 1728, 1728, 1856, 1696, 1728, 1840, \ + 1712, 1712, 1824, 1856, 1792, 1712, 1840, 1728, 1808, 1728, 1824, 1824, 1840, 1824, 1824, 1856, 1856, 1824, 1856, 1840, 1696, 1856, 1840, 1840, 1856, 1856, 1824, 1712, 1792, 1856, \ + 1824, 1728, 1808, 1792, 1856, 1728, 1792, 1840, 1808, 1808, 1712, 1808, 1840, 1808, 1824, 1824, 1824, 1840, 1824, 1840, 1856, 1824, 1728, 1856, 1856, 1824, 1856, 1856, 1792, 1792, \ + 1856, 1824, 1856, 1824, 1824, 1856, 1808, 1824, 1856, 1856, 1840, 1840, 1840, 1840, 1856, 1840, 1840, 1856, 1824, 1840, 1808, 1824, 1840, 1856, 1856, 1824, 1856, 1840, 1840, 1840, \ + 1824, 1824, 1840, 1840, 1840, 1856, 1856, 1856 + #endif From 83de09070720263ef1c0d6cd37def3f01ecde708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albin=20Ahlb=C3=A4ck?= Date: Mon, 27 May 2024 23:55:08 +0200 Subject: [PATCH 3/4] Start on tuning flint_mpn_mulhigh_n --- src/tune/mpn_extras/mulhigh_0.c | 25 +++++++ src/tune/mpn_extras/tune_mulhigh_n.c | 108 +++++++++++++++++++++++++++ src/tune/tune.c | 2 +- src/tune/tune.h | 4 + 4 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 src/tune/mpn_extras/mulhigh_0.c create mode 100644 src/tune/mpn_extras/tune_mulhigh_n.c diff --git a/src/tune/mpn_extras/mulhigh_0.c b/src/tune/mpn_extras/mulhigh_0.c new file mode 100644 index 0000000000..e38740df2a --- /dev/null +++ b/src/tune/mpn_extras/mulhigh_0.c @@ -0,0 +1,25 @@ +/* + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "flint-mparam.h" +#include "mpn_extras.h" +#include "tune.h" + +#undef FLINT_MPN_MULHIGH_K_TAB_SIZE +#undef FLINT_MPN_MULHIGH_K_TAB +#define TUNE_PROGRAM 1 + +#define FLINT_MPN_MULHIGH_K_TAB_SIZE FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE +#define flint_mpn_mulhigh_k_tab flint_mpn_mulhigh_k_tab_0 +#define _flint_mpn_mulhigh_n_mulders_recursive _flint_mpn_mulhigh_n_mulders_recursive_0 +#define _flint_mpn_mulhigh_n_mulders _flint_mpn_mulhigh_n_mulders_0 + +#include "mpn_extras/mulhigh.c" diff --git a/src/tune/mpn_extras/tune_mulhigh_n.c b/src/tune/mpn_extras/tune_mulhigh_n.c new file mode 100644 index 0000000000..a1fdcb95a7 --- /dev/null +++ b/src/tune/mpn_extras/tune_mulhigh_n.c @@ -0,0 +1,108 @@ +/* + Copyright (C) 2024 Fredrik Johansson + Copyright (C) 2024 Albin Ahlbäck + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include "mpn_extras.h" +#include "tune.h" + +#undef FLINT_MPN_MULHIGH_K_TAB_SIZE +#define FLINT_MPN_MULHIGH_K_TAB_SIZE FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE + +#define flint_mpn_mulhigh_k_tab flint_mpn_mulhigh_k_tab_0 +#define _flint_mpn_mulhigh_n_mulders _flint_mpn_mulhigh_n_mulders_0 + +FLINT_DLL extern short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE]; +mp_limb_t _flint_mpn_mulhigh_n_mulders(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); + +/* IDEA: + + 1. Set all entries in flint_mpn_mulhigh_k_tab to zeroes. + 2. Skip any further altering of those entries having a corresponding + hardcoded version. + 3. When reasonable, check if basecase is faster. + 4. The sequence of k should be weakly increasing. We only need to consider k + between n / 2 and n. + TODO: How can we prune the search with this information? + 5. Run some warmups and run some hotlaps for each k. Only use the fastest + consistent time (see tune.c). + 6. Use the k that corresponds to the fastest time and push that to + flint_mpn_mulhigh_k_tab so that it can be used in consecutive runs. + 7. For large enough n, check if _flint_mpn_mulhigh_n_mul is faster. When we + have 50 consecutive runs of full multiplication that are faster high + multiplication, we exit. +*/ + +#define BASECASE_REASONABLE(n) ((n) < 200) + +double measure_func(tune_func_t, void *, int, int); + +#if 0 +void _tune_flint_mpn_mulhigh_n(void * vparam) +{ + struct mulhigh__param_0 * param = vparam; + nn_ptr ap, bp, xp, yp; + slong len; + flint_time_t t0, t1; + slong ix; + + ap = param->ap; + bp = param->bp; + xp = param->xp; + yp = param->yp; + len = param->len; + + flint_time_get(t0); + for (ix = 0; ix < len; ix++) + func(ap, bp, xp[ix], yp[ix]); + flint_time_get(t1); + + return flint_time_nsec_diff(t1, t0); +} + +void +tune_flint_mpn_mulhigh_n(int FLINT_UNUSED(warmups), int FLINT_UNUSED(min_runs)) +{ + slong n; + mp_ptr rp, xp, yp; + flint_rand_t state; + + /* Initialize flint_mpn_mulhigh_k_tab */ + for (n = 0; n < FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE; n++) + flint_mpn_mulhigh_k_tab[n] = 0; + + rp = flint_malloc(2 * sizeof(mp_limb_t) * FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE); + xp = flint_malloc(sizeof(mp_limb_t) * FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE); + yp = flint_malloc(sizeof(mp_limb_t) * FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE); + + flint_rand_init(state); + flint_mpn_rrandom(xp, state, FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE); + flint_mpn_rrandom(yp, state, FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE); + flint_rand_clear(state); + + for (n = 1; n < FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE; n++) + { + if (FLINT_HAVE_MULHIGH_FUNC(n)) + continue; + + if (BASECASE_REASONABLE(n)) + { + + } + else + { + } + } + + flint_free(rp); + flint_free(xp); + flint_free(yp); +} +#endif diff --git a/src/tune/tune.c b/src/tune/tune.c index f336535342..b95394252b 100644 --- a/src/tune/tune.c +++ b/src/tune/tune.c @@ -52,7 +52,7 @@ int compare_doubles(const void * ap, const void * bp) return (a < b) ? -1 : (a > b) ? 1 : 0; } -static double measure_func(tune_func_t fun, void * params, int runs, int warmups) +double measure_func(tune_func_t fun, void * params, int runs, int warmups) { double * times = malloc(sizeof(double) * runs); int ix; diff --git a/src/tune/tune.h b/src/tune/tune.h index 4aac8012fc..e56fbcabc3 100644 --- a/src/tune/tune.h +++ b/src/tune/tune.h @@ -42,6 +42,10 @@ void n_param_clear(void *); double _tune_n_xgcd_0(void *); double _tune_n_xgcd_1(void *); +/* mpn_extras ****************************************************************/ + +#define FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE 4096 + /* n_mod_vec *****************************************************************/ #if WANT_N_MOD struct n_mod_vec_param_0 From 43059067eeb285435346f7a37971e045f4babba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albin=20Ahlb=C3=A4ck?= Date: Mon, 17 Jun 2024 13:37:50 +0200 Subject: [PATCH 4/4] Stash IDK --- src/tune/mpn_extras/tune_mulhigh_n.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/tune/mpn_extras/tune_mulhigh_n.c b/src/tune/mpn_extras/tune_mulhigh_n.c index a1fdcb95a7..3afe77825a 100644 --- a/src/tune/mpn_extras/tune_mulhigh_n.c +++ b/src/tune/mpn_extras/tune_mulhigh_n.c @@ -44,10 +44,9 @@ mp_limb_t _flint_mpn_mulhigh_n_mulders(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); double measure_func(tune_func_t, void *, int, int); -#if 0 void _tune_flint_mpn_mulhigh_n(void * vparam) { - struct mulhigh__param_0 * param = vparam; + struct mulhigh_param_0 * param = vparam; nn_ptr ap, bp, xp, yp; slong len; flint_time_t t0, t1; @@ -105,4 +104,3 @@ tune_flint_mpn_mulhigh_n(int FLINT_UNUSED(warmups), int FLINT_UNUSED(min_runs)) flint_free(xp); flint_free(yp); } -#endif