/* Header for speed and threshold things.
Copyright 1999 - 2003 , 2005 , 2006 , 2008 - 2017 , 2019 - 2022 Free Software
Foundation , Inc .
This file is part of the GNU MP Library .
The GNU MP Library is free software ; you can redistribute it and / or modify
it under the terms of either :
* the GNU Lesser General Public License as published by the Free
Software Foundation ; either version 3 of the License , or ( at your
option ) any later version .
or
* the GNU General Public License as published by the Free Software
Foundation ; either version 2 of the License , or ( at your option ) any
later version .
or both in parallel , as here .
The GNU MP Library is distributed in the hope that it will be useful , but
WITHOUT ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE . See the GNU General Public License
for more details .
You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library . If not ,
see https://www.gnu.org/licenses/. */
#ifndef __SPEED_H__
#define __SPEED_H__
/* Pad ptr,oldsize with zero limbs (at the most significant end) to make it
newsize long. */
#define MPN_ZERO_EXTEND(ptr, oldsize, newsize) \
do { \
ASSERT ((newsize) >= (oldsize)); \
MPN_ZERO ((ptr)+(oldsize), (newsize)-(oldsize)); \
}
while (
0 )
/* A mask of the least significant n bits. Note 1<<32 doesn't give zero on
x86 family CPUs, hence the separate case for GMP_LIMB_BITS. */
#define MP_LIMB_T_LOWBITMASK(n) \
((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX : ((mp_limb_t)
1 << (n)) -
1 )
/* align must be a power of 2 here, usually CACHE_LINE_SIZE is a good choice */
#define TMP_ALLOC_ALIGNED(bytes, align) \
align_pointer (TMP_ALLOC ((bytes) + (align)-
1 ), (align))
#define TMP_ALLOC_LIMBS_ALIGNED(limbs, align) \
((mp_ptr) TMP_ALLOC_ALIGNED ((limbs)*
sizeof (mp_limb_t), align))
/* CACHE_LINE_SIZE is our default alignment for speed operands, and the
limit on what s - > align_xp etc and then request for off - alignment . Maybe
this should be an option of some sort , but in any case here are some line
sizes ,
bytes
32 pentium
64 athlon
64 itanium - 2 L1
128 itanium - 2 L2
*/
#define CACHE_LINE_SIZE
64 /* bytes */
#define SPEED_TMP_ALLOC_ADJUST_MASK (CACHE_LINE_SIZE/GMP_LIMB_BYTES -
1 )
/* Set ptr to a TMP_ALLOC block of the given limbs, with the given limb
alignment. */
#define SPEED_TMP_ALLOC_LIMBS(ptr, limbs, align) \
do { \
mp_ptr __ptr; \
mp_size_t __ptr_align, __ptr_add; \
\
ASSERT ((CACHE_LINE_SIZE % GMP_LIMB_BYTES) ==
0 ); \
__ptr = TMP_ALLOC_LIMBS ((limbs) + SPEED_TMP_ALLOC_ADJUST_MASK); \
__ptr_align = (__ptr - (mp_ptr) NULL); \
__ptr_add = ((align) - __ptr_align) & SPEED_TMP_ALLOC_ADJUST_MASK; \
(ptr) = __ptr + __ptr_add; \
}
while (
0 )
/* This is the size for s->xp_block and s->yp_block, used in certain
routines that want to run across many different data values and use
s - > size for a different purpose , eg . SPEED_ROUTINE_MPN_GCD_1 .
512 means 2 kbytes of data for each of xp_block and yp_block , making 4 k
total, which should fit easily in any L1 data cache. */
#define SPEED_BLOCK_SIZE
512 /* limbs */
extern double speed_unittime;
extern double speed_cycletime;
extern int speed_precision;
extern char speed_time_string[];
void speed_time_init (
void );
void speed_cycletime_fail (
const char *str);
void speed_cycletime_init (
void );
void speed_cycletime_need_cycles (
void );
void speed_cycletime_need_seconds (
void );
void speed_starttime (
void );
double speed_endtime (
void );
struct speed_params {
unsigned reps;
/* how many times to run the routine */
mp_ptr xp;
/* first argument */
mp_ptr yp;
/* second argument */
mp_size_t size;
/* size of both arguments */
mp_limb_t r;
/* user supplied parameter */
mp_size_t align_xp;
/* alignment of xp */
mp_size_t align_yp;
/* alignment of yp */
mp_size_t align_wp;
/* intended alignment of wp */
mp_size_t align_wp2;
/* intended alignment of wp2 */
mp_ptr xp_block;
/* first special SPEED_BLOCK_SIZE block */
mp_ptr yp_block;
/* second special SPEED_BLOCK_SIZE block */
double time_divisor;
/* optionally set by the speed routine */
/* used by the cache priming things */
int cache;
unsigned src_num, dst_num;
struct {
mp_ptr ptr;
mp_size_t size;
} src[
5 ], dst[
4 ];
};
typedef double (*speed_function_t) (
struct speed_params *);
double speed_measure (speed_function_t fun,
struct speed_params *);
/* Prototypes for speed measuring routines */
double speed_back_to_back (
struct speed_params *);
double speed_count_leading_zeros (
struct speed_params *);
double speed_count_trailing_zeros (
struct speed_params *);
double speed_find_a (
struct speed_params *);
double speed_gmp_allocate_free (
struct speed_params *);
double speed_gmp_allocate_reallocate_free (
struct speed_params *);
double speed_invert_limb (
struct speed_params *);
double speed_malloc_free (
struct speed_params *);
double speed_malloc_realloc_free (
struct speed_params *);
double speed_memcpy (
struct speed_params *);
double speed_binvert_limb (
struct speed_params *);
double speed_binvert_limb_mul1 (
struct speed_params *);
double speed_binvert_limb_loop (
struct speed_params *);
double speed_binvert_limb_cond (
struct speed_params *);
double speed_binvert_limb_arith (
struct speed_params *);
double speed_mpf_init_clear (
struct speed_params *);
double speed_mpn_add_n (
struct speed_params *);
double speed_mpn_add_1 (
struct speed_params *);
double speed_mpn_add_1_inplace (
struct speed_params *);
double speed_mpn_add_err1_n (
struct speed_params *);
double speed_mpn_add_err2_n (
struct speed_params *);
double speed_mpn_add_err3_n (
struct speed_params *);
double speed_mpn_addlsh_n (
struct speed_params *);
double speed_mpn_addlsh1_n (
struct speed_params *);
double speed_mpn_addlsh2_n (
struct speed_params *);
double speed_mpn_addlsh_n_ip1 (
struct speed_params *);
double speed_mpn_addlsh1_n_ip1 (
struct speed_params *);
double speed_mpn_addlsh2_n_ip1 (
struct speed_params *);
double speed_mpn_addlsh_n_ip2 (
struct speed_params *);
double speed_mpn_addlsh1_n_ip2 (
struct speed_params *);
double speed_mpn_addlsh2_n_ip2 (
struct speed_params *);
double speed_mpn_add_n_sub_n (
struct speed_params *);
double speed_mpn_and_n (
struct speed_params *);
double speed_mpn_andn_n (
struct speed_params *);
double speed_mpn_addmul_1 (
struct speed_params *);
double speed_mpn_addmul_2 (
struct speed_params *);
double speed_mpn_addmul_3 (
struct speed_params *);
double speed_mpn_addmul_4 (
struct speed_params *);
double speed_mpn_addmul_5 (
struct speed_params *);
double speed_mpn_addmul_6 (
struct speed_params *);
double speed_mpn_addmul_7 (
struct speed_params *);
double speed_mpn_addmul_8 (
struct speed_params *);
double speed_mpn_addaddmul_1msb0 (
struct speed_params *);
double speed_mpn_cnd_add_n (
struct speed_params *);
double speed_mpn_cnd_sub_n (
struct speed_params *);
double speed_mpn_com (
struct speed_params *);
double speed_mpn_neg (
struct speed_params *);
double speed_mpn_copyd (
struct speed_params *);
double speed_mpn_copyi (
struct speed_params *);
double speed_MPN_COPY (
struct speed_params *);
double speed_MPN_COPY_DECR (
struct speed_params *);
double speed_MPN_COPY_INCR (
struct speed_params *);
double speed_mpn_sec_tabselect (
struct speed_params *);
double speed_mpn_divexact_1 (
struct speed_params *);
double speed_mpn_divexact_by3 (
struct speed_params *);
double speed_mpn_bdiv_q_1 (
struct speed_params *);
double speed_mpn_pi1_bdiv_q_1 (
struct speed_params *);
double speed_mpn_bdiv_dbm1c (
struct speed_params *);
double speed_mpn_divrem_1 (
struct speed_params *);
double speed_mpn_divrem_1f (
struct speed_params *);
double speed_mpn_divrem_1c (
struct speed_params *);
double speed_mpn_divrem_1cf (
struct speed_params *);
double speed_mpn_divrem_1_div (
struct speed_params *);
double speed_mpn_divrem_1f_div (
struct speed_params *);
double speed_mpn_divrem_1_inv (
struct speed_params *);
double speed_mpn_divrem_1f_inv (
struct speed_params *);
double speed_mpn_divrem_2 (
struct speed_params *);
double speed_mpn_divrem_2_div (
struct speed_params *);
double speed_mpn_divrem_2_inv (
struct speed_params *);
double speed_mpn_div_qr_1n_pi1 (
struct speed_params *);
double speed_mpn_div_qr_1n_pi1_1 (
struct speed_params *);
double speed_mpn_div_qr_1n_pi1_2 (
struct speed_params *);
double speed_mpn_div_qr_1n_pi1_3 (
struct speed_params *);
double speed_mpn_div_qr_1n_pi1_4 (
struct speed_params *);
double speed_mpn_div_qr_1 (
struct speed_params *);
double speed_mpn_div_qr_2n (
struct speed_params *);
double speed_mpn_div_qr_2u (
struct speed_params *);
double speed_mpn_fib2_ui (
struct speed_params *);
double speed_mpn_matrix22_mul (
struct speed_params *);
double speed_mpn_hgcd2 (
struct speed_params *);
double speed_mpn_hgcd2_1 (
struct speed_params *);
double speed_mpn_hgcd2_2 (
struct speed_params *);
double speed_mpn_hgcd2_3 (
struct speed_params *);
double speed_mpn_hgcd2_4 (
struct speed_params *);
double speed_mpn_hgcd2_5 (
struct speed_params *);
double speed_mpn_hgcd (
struct speed_params *);
double speed_mpn_hgcd_lehmer (
struct speed_params *);
double speed_mpn_hgcd_appr (
struct speed_params *);
double speed_mpn_hgcd_appr_lehmer (
struct speed_params *);
double speed_mpn_hgcd_reduce (
struct speed_params *);
double speed_mpn_hgcd_reduce_1 (
struct speed_params *);
double speed_mpn_hgcd_reduce_2 (
struct speed_params *);
double speed_mpn_gcd (
struct speed_params *);
double speed_mpn_gcd_1 (
struct speed_params *);
double speed_mpn_gcd_11 (
struct speed_params *);
double speed_mpn_gcd_1N (
struct speed_params *);
double speed_mpn_gcd_22 (
struct speed_params *);
double speed_mpn_gcdext (
struct speed_params *);
double speed_mpn_gcdext_double (
struct speed_params *);
double speed_mpn_gcdext_one_double (
struct speed_params *);
double speed_mpn_gcdext_one_single (
struct speed_params *);
double speed_mpn_gcdext_single (
struct speed_params *);
double speed_mpn_get_str (
struct speed_params *);
double speed_mpn_hamdist (
struct speed_params *);
double speed_mpn_ior_n (
struct speed_params *);
double speed_mpn_iorn_n (
struct speed_params *);
double speed_mpn_jacobi_base (
struct speed_params *);
double speed_mpn_jacobi_base_1 (
struct speed_params *);
double speed_mpn_jacobi_base_2 (
struct speed_params *);
double speed_mpn_jacobi_base_3 (
struct speed_params *);
double speed_mpn_jacobi_base_4 (
struct speed_params *);
double speed_mpn_lshift (
struct speed_params *);
double speed_mpn_lshiftc (
struct speed_params *);
double speed_mpn_mod_1 (
struct speed_params *);
double speed_mpn_mod_1c (
struct speed_params *);
double speed_mpn_mod_1_div (
struct speed_params *);
double speed_mpn_mod_1_inv (
struct speed_params *);
double speed_mpn_mod_1_1 (
struct speed_params *);
double speed_mpn_mod_1_1_1 (
struct speed_params *);
double speed_mpn_mod_1_1_2 (
struct speed_params *);
double speed_mpn_mod_1_2 (
struct speed_params *);
double speed_mpn_mod_1_3 (
struct speed_params *);
double speed_mpn_mod_1_4 (
struct speed_params *);
double speed_mpn_mod_34lsub1 (
struct speed_params *);
double speed_mpn_modexact_1_odd (
struct speed_params *);
double speed_mpn_modexact_1c_odd (
struct speed_params *);
double speed_mpn_mul_1 (
struct speed_params *);
double speed_mpn_mul_1_inplace (
struct speed_params *);
double speed_mpn_mul_2 (
struct speed_params *);
double speed_mpn_mul_3 (
struct speed_params *);
double speed_mpn_mul_4 (
struct speed_params *);
double speed_mpn_mul_5 (
struct speed_params *);
double speed_mpn_mul_6 (
struct speed_params *);
double speed_mpn_mul (
struct speed_params *);
double speed_mpn_mul_basecase (
struct speed_params *);
double speed_mpn_mulmid (
struct speed_params *);
double speed_mpn_mulmid_basecase (
struct speed_params *);
double speed_mpn_mul_fft (
struct speed_params *);
double speed_mpn_mul_fft_sqr (
struct speed_params *);
double speed_mpn_fft_mul (
struct speed_params *);
double speed_mpn_fft_sqr (
struct speed_params *);
#if WANT_OLD_FFT_FULL
double speed_mpn_mul_fft_full (
struct speed_params *);
double speed_mpn_mul_fft_full_sqr (
struct speed_params *);
#endif
double speed_mpn_nussbaumer_mul (
struct speed_params *);
double speed_mpn_nussbaumer_mul_sqr (
struct speed_params *);
double speed_mpn_mul_n (
struct speed_params *);
double speed_mpn_mul_n_sqr (
struct speed_params *);
double speed_mpn_mulmid_n (
struct speed_params *);
double speed_mpn_sqrlo (
struct speed_params *);
double speed_mpn_sqrlo_basecase (
struct speed_params *);
double speed_mpn_mullo_n (
struct speed_params *);
double speed_mpn_mullo_basecase (
struct speed_params *);
double speed_mpn_nand_n (
struct speed_params *);
double speed_mpn_nior_n (
struct speed_params *);
double speed_mpn_popcount (
struct speed_params *);
double speed_mpn_preinv_divrem_1 (
struct speed_params *);
double speed_mpn_preinv_divrem_1f (
struct speed_params *);
double speed_mpn_preinv_mod_1 (
struct speed_params *);
double speed_mpn_sbpi1_div_qr (
struct speed_params *);
double speed_mpn_dcpi1_div_qr (
struct speed_params *);
double speed_mpn_sbpi1_divappr_q (
struct speed_params *);
double speed_mpn_dcpi1_divappr_q (
struct speed_params *);
double speed_mpn_mu_div_qr (
struct speed_params *);
double speed_mpn_mu_divappr_q (
struct speed_params *);
double speed_mpn_mupi_div_qr (
struct speed_params *);
double speed_mpn_mu_div_q (
struct speed_params *);
double speed_mpn_sbpi1_bdiv_qr (
struct speed_params *);
double speed_mpn_dcpi1_bdiv_qr (
struct speed_params *);
double speed_mpn_sbpi1_bdiv_q (
struct speed_params *);
double speed_mpn_dcpi1_bdiv_q (
struct speed_params *);
double speed_mpn_sbpi1_bdiv_r (
struct speed_params *);
double speed_mpn_mu_bdiv_q (
struct speed_params *);
double speed_mpn_mu_bdiv_qr (
struct speed_params *);
double speed_mpn_broot (
struct speed_params *);
double speed_mpn_broot_invm1 (
struct speed_params *);
double speed_mpn_brootinv (
struct speed_params *);
double speed_mpn_invert (
struct speed_params *);
double speed_mpn_invertappr (
struct speed_params *);
double speed_mpn_ni_invertappr (
struct speed_params *);
double speed_mpn_sec_invert (
struct speed_params *s);
double speed_mpn_binvert (
struct speed_params *);
double speed_mpn_redc_1 (
struct speed_params *);
double speed_mpn_redc_2 (
struct speed_params *);
double speed_mpn_redc_n (
struct speed_params *);
double speed_mpn_rsblsh_n (
struct speed_params *);
double speed_mpn_rsblsh1_n (
struct speed_params *);
double speed_mpn_rsblsh2_n (
struct speed_params *);
double speed_mpn_rsh1add_n (
struct speed_params *);
double speed_mpn_rsh1sub_n (
struct speed_params *);
double speed_mpn_rshift (
struct speed_params *);
double speed_mpn_sb_divrem_m3 (
struct speed_params *);
double speed_mpn_sb_divrem_m3_div (
struct speed_params *);
double speed_mpn_sb_divrem_m3_inv (
struct speed_params *);
double speed_mpn_set_str (
struct speed_params *);
double speed_mpn_bc_set_str (
struct speed_params *);
double speed_mpn_dc_set_str (
struct speed_params *);
double speed_mpn_set_str_pre (
struct speed_params *);
double speed_mpn_sqr_basecase (
struct speed_params *);
double speed_mpn_sqr_diag_addlsh1 (
struct speed_params *);
double speed_mpn_sqr_diagonal (
struct speed_params *);
double speed_mpn_sqr (
struct speed_params *);
double speed_mpn_sqrtrem (
struct speed_params *);
double speed_mpn_rootrem (
struct speed_params *);
double speed_mpn_sqrt (
struct speed_params *);
double speed_mpn_root (
struct speed_params *);
double speed_mpn_perfect_power_p (
struct speed_params *);
double speed_mpn_perfect_square_p (
struct speed_params *);
double speed_mpn_sub_n (
struct speed_params *);
double speed_mpn_sub_1 (
struct speed_params *);
double speed_mpn_sub_1_inplace (
struct speed_params *);
double speed_mpn_sub_err1_n (
struct speed_params *);
double speed_mpn_sub_err2_n (
struct speed_params *);
double speed_mpn_sub_err3_n (
struct speed_params *);
double speed_mpn_sublsh_n (
struct speed_params *);
double speed_mpn_sublsh1_n (
struct speed_params *);
double speed_mpn_sublsh2_n (
struct speed_params *);
double speed_mpn_sublsh_n_ip1 (
struct speed_params *);
double speed_mpn_sublsh1_n_ip1 (
struct speed_params *);
double speed_mpn_sublsh2_n_ip1 (
struct speed_params *);
double speed_mpn_submul_1 (
struct speed_params *);
double speed_mpn_toom2_sqr (
struct speed_params *);
double speed_mpn_toom3_sqr (
struct speed_params *);
double speed_mpn_toom4_sqr (
struct speed_params *);
double speed_mpn_toom6_sqr (
struct speed_params *);
double speed_mpn_toom8_sqr (
struct speed_params *);
double speed_mpn_toom22_mul (
struct speed_params *);
double speed_mpn_toom33_mul (
struct speed_params *);
double speed_mpn_toom44_mul (
struct speed_params *);
double speed_mpn_toom6h_mul (
struct speed_params *);
double speed_mpn_toom8h_mul (
struct speed_params *);
double speed_mpn_toom32_mul (
struct speed_params *);
double speed_mpn_toom42_mul (
struct speed_params *);
double speed_mpn_toom43_mul (
struct speed_params *);
double speed_mpn_toom63_mul (
struct speed_params *);
double speed_mpn_toom32_for_toom43_mul (
struct speed_params *);
double speed_mpn_toom43_for_toom32_mul (
struct speed_params *);
double speed_mpn_toom32_for_toom53_mul (
struct speed_params *);
double speed_mpn_toom53_for_toom32_mul (
struct speed_params *);
double speed_mpn_toom42_for_toom53_mul (
struct speed_params *);
double speed_mpn_toom53_for_toom42_mul (
struct speed_params *);
double speed_mpn_toom43_for_toom54_mul (
struct speed_params *);
double speed_mpn_toom54_for_toom43_mul (
struct speed_params *);
double speed_mpn_toom42_mulmid (
struct speed_params *);
double speed_mpn_mulmod_bnm1 (
struct speed_params *);
double speed_mpn_bc_mulmod_bnm1 (
struct speed_params *);
double speed_mpn_mulmod_bnm1_rounded (
struct speed_params *);
double speed_mpn_sqrmod_bnm1 (
struct speed_params *);
double speed_mpn_mulmod_bknp1 (
struct speed_params *);
double speed_mpn_sqrmod_bknp1 (
struct speed_params *);
double speed_mpn_mulmod_bnp1 (
struct speed_params *);
double speed_mpn_sqrmod_bnp1 (
struct speed_params *);
double speed_mpn_udiv_qrnnd (
struct speed_params *);
double speed_mpn_udiv_qrnnd_r (
struct speed_params *);
double speed_mpn_umul_ppmm (
struct speed_params *);
double speed_mpn_umul_ppmm_r (
struct speed_params *);
double speed_mpn_xnor_n (
struct speed_params *);
double speed_mpn_xor_n (
struct speed_params *);
double speed_MPN_ZERO (
struct speed_params *);
double speed_mpq_init_clear (
struct speed_params *);
double speed_mpz_add (
struct speed_params *);
double speed_mpz_invert (
struct speed_params *);
double speed_mpz_bin_uiui (
struct speed_params *);
double speed_mpz_bin_ui (
struct speed_params *);
double speed_mpz_fac_ui (
struct speed_params *);
double speed_mpz_2fac_ui (
struct speed_params *);
double speed_mpz_mfac_uiui (
struct speed_params *);
double speed_mpz_primorial_ui (
struct speed_params *);
double speed_mpz_fib_ui (
struct speed_params *);
double speed_mpz_fib2_ui (
struct speed_params *);
double speed_mpz_init_clear (
struct speed_params *);
double speed_mpz_init_realloc_clear (
struct speed_params *);
double speed_gmp_primesieve (
struct speed_params *);
double speed_mpz_nextprime (
struct speed_params *);
double speed_mpz_nextprime_1 (
struct speed_params *);
double speed_mpz_prevprime (
struct speed_params *);
double speed_mpz_prevprime_1 (
struct speed_params *);
double speed_mpz_jacobi (
struct speed_params *);
double speed_mpz_lucnum_ui (
struct speed_params *);
double speed_mpz_lucnum2_ui (
struct speed_params *);
double speed_mpz_mod (
struct speed_params *);
double speed_mpz_powm (
struct speed_params *);
double speed_mpz_powm_mod (
struct speed_params *);
double speed_mpz_powm_redc (
struct speed_params *);
double speed_mpz_powm_sec (
struct speed_params *);
double speed_mpz_powm_ui (
struct speed_params *);
double speed_mpz_urandomb (
struct speed_params *);
double speed_gmp_randseed (
struct speed_params *);
double speed_gmp_randseed_ui (
struct speed_params *);
double speed_noop (
struct speed_params *);
double speed_noop_wxs (
struct speed_params *);
double speed_noop_wxys (
struct speed_params *);
double speed_operator_div (
struct speed_params *);
double speed_operator_mod (
struct speed_params *);
double speed_udiv_qrnnd (
struct speed_params *);
double speed_udiv_qrnnd_preinv1 (
struct speed_params *);
double speed_udiv_qrnnd_preinv2 (
struct speed_params *);
double speed_udiv_qrnnd_preinv3 (
struct speed_params *);
double speed_udiv_qrnnd_c (
struct speed_params *);
double speed_umul_ppmm (
struct speed_params *);
/* Prototypes for other routines */
#if defined (__cplusplus)
extern "C" {
#endif
/* low 32-bits in p[0], high 32-bits in p[1] */
void speed_cyclecounter (
unsigned p[
2 ]);
#if defined (__cplusplus)
}
#endif
void mftb_function (
unsigned p[
2 ]);
double speed_cyclecounter_diff (
const unsigned [
2 ],
const unsigned [
2 ]);
int gettimeofday_microseconds_p (
void );
int getrusage_microseconds_p (
void );
int cycles_works_p (
void );
long clk_tck (
void );
double freq_measure (
const char *,
double (*)(
void ));
int double_cmp_ptr (
const double *,
const double *);
void pentium_wbinvd (
void );
typedef int (*qsort_function_t) (
const void *,
const void *);
void noop (
void );
void noop_1 (mp_limb_t);
void noop_wxs (mp_ptr, mp_srcptr, mp_size_t);
void noop_wxys (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
void mpn_cache_fill (mp_srcptr, mp_size_t);
void mpn_cache_fill_dummy (mp_limb_t);
void speed_cache_fill (
struct speed_params *);
void speed_operand_src (
struct speed_params *, mp_ptr, mp_size_t);
void speed_operand_dst (
struct speed_params *, mp_ptr, mp_size_t);
extern int speed_option_addrs;
extern int speed_option_verbose;
extern int speed_option_cycles_broken;
void speed_option_set (
const char *);
mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_l
imb_t);
mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
mp_limb_t mpn_div_qr_1n_pi1_3 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
mp_limb_t mpn_div_qr_1n_pi1_4 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
mp_limb_t mpn_divrem_1_div (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
mp_limb_t mpn_divrem_1_inv (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
mp_limb_t mpn_divrem_2_div (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
mp_limb_t mpn_divrem_2_inv (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
int mpn_jacobi_base_1 (mp_limb_t, mp_limb_t, int );
int mpn_jacobi_base_2 (mp_limb_t, mp_limb_t, int );
int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int );
int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int );
int mpn_hgcd2_1 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
int mpn_hgcd2_2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
int mpn_hgcd2_3 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
int mpn_hgcd2_4 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
int mpn_hgcd2_5 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1*);
mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4 ]);
mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4 ]);
void mpn_mod_1_1p_cps_1 (mp_limb_t [4 ], mp_limb_t);
void mpn_mod_1_1p_cps_2 (mp_limb_t [4 ], mp_limb_t);
mp_size_t mpn_gcdext_one_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
mp_size_t mpn_gcdext_one_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
mp_size_t mpn_gcdext_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
mp_size_t mpn_gcdext_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
mp_size_t mpn_hgcd_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
mp_size_t mpn_hgcd_lehmer_itch (mp_size_t);
int mpn_hgcd_appr_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
mp_size_t mpn_hgcd_appr_lehmer_itch (mp_size_t);
mp_size_t mpn_hgcd_reduce_1 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
mp_size_t mpn_hgcd_reduce_1_itch (mp_size_t, mp_size_t);
mp_size_t mpn_hgcd_reduce_2 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
mp_size_t mpn_hgcd_reduce_2_itch (mp_size_t, mp_size_t);
mp_limb_t mpn_sb_divrem_mn_div (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
mp_limb_t mpn_sb_divrem_mn_inv (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
mp_size_t mpn_set_str_basecase (mp_ptr, const unsigned char *, size_t, int );
void mpn_pre_set_str (mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr);
void mpz_powm_mod (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
void mpz_powm_redc (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int , int );
/* "get" is called repeatedly until it ticks over, just in case on a fast
processor it takes less than a microsecond , though this is probably
unlikely if it ' s a system call .
speed_cyclecounter is called on the same side of the " get " for the start
and end measurements . It doesn ' t matter how long it takes from the " get "
sample to the cycles sample , since that period will cancel out in the
difference calculation ( assuming it ' s the same each time ) .
Letting the test run for more than a process time slice is probably only
going to reduce accuracy , especially for getrusage when the cycle counter
is real time , or for gettimeofday if the cycle counter is in fact process
time . Use CLK_TCK / 2 as a reasonable stop .
It ' d be desirable to be quite accurate here . The default speed_precision
for a cycle counter is 10000 cycles , so to mix that with getrusage or
gettimeofday the frequency should be at least that accurate . But running
measurements for 10000 microseconds ( or more ) is too long . Be satisfied
with just a half clock tick (5000 microseconds usually). */
#define FREQ_MEASURE_ONE(name, type, get, getc, sec, usec) \
do { \
type st1, st, et1, et; \
unsigned sc[2 ], ec[2 ]; \
long dt, half_tick; \
double dc, cyc; \
\
half_tick = (1000000 L / clk_tck()) / 2 ; \
\
get (st1); \
do { \
get (st); \
} while (usec(st) == usec(st1) && sec(st) == sec(st1)); \
\
getc (sc); \
\
for (;;) \
{ \
get (et1); \
do { \
get (et); \
} while (usec(et) == usec(et1) && sec(et) == sec(et1)); \
\
getc (ec); \
\
dc = speed_cyclecounter_diff (ec, sc); \
\
/* allow secs to cancel before multiplying */ \
dt = sec(et) - sec(st); \
dt = dt * 1000000 L + (usec(et) - usec(st)); \
\
if (dt >= half_tick) \
break ; \
} \
\
cyc = dt * 1 e-6 / dc; \
\
if (speed_option_verbose >= 2 ) \
printf ("freq_measure_%s_one() dc=%.6g dt=%ld cyc=%.6g\n" , \
name, dc, dt, cyc); \
\
return dt * 1 e-6 / dc; \
\
} while (0 )
/* The measuring routines use these big macros to save duplication for
similar forms . They also get used for some automatically generated
measuring of new implementations of functions .
Having something like SPEED_ROUTINE_BINARY_N as a subroutine accepting a
function pointer is considered undesirable since it ' s not the way a
normal application will be calling , and some processors might do
different things with an indirect call , like not branch predicting , or
doing a full pipe flush . At least some of the " functions " measured are
actually macros too .
The net effect is to bloat the object code , possibly in a big way , but
only what ' s being measured is being run , so that doesn ' t matter .
The loop forms don ' t try to cope with _ _ GMP_ATTRIBUTE_PURE or
ATTRIBUTE_CONST on the called functions . Adding a cast to a non - pure
function pointer doesn ' t work in gcc 3 . 2 . Using an actual non - pure
function pointer variable works , but stands a real risk of a
non - optimizing compiler generating unnecessary overheads in the call .
Currently the best idea is not to use those attributes for a timing
program build . _ _ GMP_NO_ATTRIBUTE_CONST_PURE will tell gmp . h and
gmp-impl.h to omit them from routines there. */
#define SPEED_RESTRICT_COND(cond) if (!(cond)) return -1 .0 ;
/* For mpn_copy or similar. */
#define SPEED_ROUTINE_MPN_COPY_CALL(call) \
{ \
mp_ptr wp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_COPY(function) \
SPEED_ROUTINE_MPN_COPY_CALL (function (wp, s->xp, s->size))
#define SPEED_ROUTINE_MPN_TABSELECT(function) \
{ \
mp_ptr xp, wp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
\
if (s->r == 0 ) \
s->r = s->size; /* default to a quadratic shape */ \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (xp, s->size * s->r, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
speed_operand_src (s, xp, s->size * s->r); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, xp, s->size, s->r, (s->r) / 2 ); \
while (--i != 0 ); \
t = speed_endtime () / s->r; \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_COPYC(function) \
{ \
mp_ptr wp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, s->xp, s->size, 0 ); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* s->size is still in limbs, and it's limbs which are copied, but
"function" takes a size in bytes not limbs. */
#define SPEED_ROUTINE_MPN_COPY_BYTES(function) \
{ \
mp_ptr wp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, s->xp, s->size * GMP_LIMB_BYTES); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* For mpn_add_n, mpn_sub_n, or similar. */
#define SPEED_ROUTINE_MPN_BINARY_N_CALL(call) \
{ \
mp_ptr wp; \
mp_ptr xp, yp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
xp = s->xp; \
yp = s->yp; \
\
if (s->r == 0 ) ; \
else if (s->r == 1 ) { xp = wp; } \
else if (s->r == 2 ) { yp = wp; } \
else if (s->r == 3 ) { xp = wp; yp = wp; } \
else if (s->r == 4 ) { yp = xp; } \
else { \
TMP_FREE; \
return -1 .0 ; \
} \
\
/* initialize wp if operand overlap */ \
if (xp == wp || yp == wp) \
MPN_COPY (wp, s->xp, s->size); \
\
speed_operand_src (s, xp, s->size); \
speed_operand_src (s, yp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* For mpn_aors_errK_n, where 1 <= K <= 3. */
#define SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL(call, K) \
{ \
mp_ptr wp; \
mp_ptr xp, yp; \
mp_ptr zp[K]; \
mp_limb_t ep[2 *K]; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
/* (don't have a mechanism to specify zp alignments) */ \
for (i = 0 ; i < K; i++) \
SPEED_TMP_ALLOC_LIMBS (zp[i], s->size, 0 ); \
\
xp = s->xp; \
yp = s->yp; \
\
if (s->r == 0 ) ; \
else if (s->r == 1 ) { xp = wp; } \
else if (s->r == 2 ) { yp = wp; } \
else if (s->r == 3 ) { xp = wp; yp = wp; } \
else if (s->r == 4 ) { yp = xp; } \
else { \
TMP_FREE; \
return -1 .0 ; \
} \
\
/* initialize wp if operand overlap */ \
if (xp == wp || yp == wp) \
MPN_COPY (wp, s->xp, s->size); \
\
speed_operand_src (s, xp, s->size); \
speed_operand_src (s, yp, s->size); \
for (i = 0 ; i < K; i++) \
speed_operand_src (s, zp[i], s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_BINARY_ERR1_N(function) \
SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0 ], s->size, 0 ), 1 )
#define SPEED_ROUTINE_MPN_BINARY_ERR2_N(function) \
SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0 ], zp[1 ], s->size, 0 ), 2 )
#define SPEED_ROUTINE_MPN_BINARY_ERR3_N(function) \
SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0 ], zp[1 ], zp[2 ], s->size, 0 ), 3 )
/* For mpn_add_n, mpn_sub_n, or similar. */
#define SPEED_ROUTINE_MPN_ADDSUB_N_CALL(call) \
{ \
mp_ptr ap, sp; \
mp_ptr xp, yp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ap, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (sp, s->size, s->align_wp); \
\
xp = s->xp; \
yp = s->yp; \
\
if ((s->r & 1 ) != 0 ) { xp = ap; } \
if ((s->r & 2 ) != 0 ) { yp = ap; } \
if ((s->r & 4 ) != 0 ) { xp = sp; } \
if ((s->r & 8 ) != 0 ) { yp = sp; } \
if ((s->r & 3 ) == 3 || (s->r & 12 ) == 12 ) \
{ \
TMP_FREE; \
return -1 .0 ; \
} \
\
/* initialize ap if operand overlap */ \
if (xp == ap || yp == ap) \
MPN_COPY (ap, s->xp, s->size); \
/* initialize sp if operand overlap */ \
if (xp == sp || yp == sp) \
MPN_COPY (sp, s->xp, s->size); \
\
speed_operand_src (s, xp, s->size); \
speed_operand_src (s, yp, s->size); \
speed_operand_dst (s, ap, s->size); \
speed_operand_dst (s, sp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_BINARY_N(function) \
SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size))
#define SPEED_ROUTINE_MPN_BINARY_NC(function) \
SPEED_ROUTINE_MPN_BINARY_N_CALL ((*function) (wp, xp, yp, s->size, 0 ))
/* For mpn_lshift, mpn_rshift, mpn_mul_1, with r, or similar. */
#define SPEED_ROUTINE_MPN_UNARY_1_CALL(call) \
{ \
mp_ptr wp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_UNARY_1(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
#define SPEED_ROUTINE_MPN_UNARY_1C(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0 ))
/* FIXME: wp is uninitialized here, should start it off from xp */
#define SPEED_ROUTINE_MPN_UNARY_1_INPLACE(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, wp, s->size, s->r))
#define SPEED_ROUTINE_MPN_DIVEXACT_1(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
#define SPEED_ROUTINE_MPN_BDIV_Q_1(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL(call) \
{ \
unsigned shift; \
mp_limb_t dinv; \
\
SPEED_RESTRICT_COND (s->size > 0 ); \
SPEED_RESTRICT_COND (s->r != 0 ); \
\
count_trailing_zeros (shift, s->r); \
binvert_limb (dinv, s->r >> shift); \
\
SPEED_ROUTINE_MPN_UNARY_1_CALL (call); \
}
#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1(function) \
SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL \
((*function) (wp, s->xp, s->size, s->r, dinv, shift))
#define SPEED_ROUTINE_MPN_BDIV_DBM1C(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0 ))
#define SPEED_ROUTINE_MPN_DIVREM_1(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0 , s->xp, s->size, s->r))
#define SPEED_ROUTINE_MPN_DIVREM_1C(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, 0 , s->xp, s->size, s->r, 0 ))
#define SPEED_ROUTINE_MPN_DIVREM_1F(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0 , s->r))
#define SPEED_ROUTINE_MPN_DIVREM_1CF(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->size, s->xp, 0 , s->r, 0 ))
#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL(call) \
{ \
unsigned shift; \
mp_limb_t dinv; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
SPEED_RESTRICT_COND (s->r != 0 ); \
\
count_leading_zeros (shift, s->r); \
invert_limb (dinv, s->r << shift); \
\
SPEED_ROUTINE_MPN_UNARY_1_CALL (call); \
} \
#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1(function) \
SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL \
((*function) (wp, 0 , s->xp, s->size, s->r, dinv, shift))
/* s->size limbs worth of fraction part */
#define SPEED_ROUTINE_MPN_PREINV_DIVREM_1F(function) \
SPEED_ROUTINE_MPN_PREINV_DIVREM_1_CALL \
((*function) (wp, s->size, s->xp, 0 , s->r, dinv, shift))
/* s->r is duplicated to form the multiplier, defaulting to
MP_BASES_BIG_BASE_10 . Not sure if that ' s particularly useful , but at
least it provides some control. */
#define SPEED_ROUTINE_MPN_UNARY_N(function,N) \
{ \
mp_ptr wp; \
mp_size_t wn; \
unsigned i; \
double t; \
mp_limb_t yp[N]; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= N); \
\
TMP_MARK; \
wn = s->size + N-1 ; \
SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp); \
for (i = 0 ; i < N; i++) \
yp[i] = (s->r != 0 ? s->r : MP_BASES_BIG_BASE_10); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, yp, (mp_size_t) N); \
speed_operand_dst (s, wp, wn); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, s->xp, s->size, yp); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_UNARY_2(function) \
SPEED_ROUTINE_MPN_UNARY_N (function, 2 )
#define SPEED_ROUTINE_MPN_UNARY_3(function) \
SPEED_ROUTINE_MPN_UNARY_N (function, 3 )
#define SPEED_ROUTINE_MPN_UNARY_4(function) \
SPEED_ROUTINE_MPN_UNARY_N (function, 4 )
#define SPEED_ROUTINE_MPN_UNARY_5(function) \
SPEED_ROUTINE_MPN_UNARY_N (function, 5 )
#define SPEED_ROUTINE_MPN_UNARY_6(function) \
SPEED_ROUTINE_MPN_UNARY_N (function, 6 )
#define SPEED_ROUTINE_MPN_UNARY_7(function) \
SPEED_ROUTINE_MPN_UNARY_N (function, 7 )
#define SPEED_ROUTINE_MPN_UNARY_8(function) \
SPEED_ROUTINE_MPN_UNARY_N (function, 8 )
#define SPEED_ROUTINE_MPN_ADDADDMUL1_MSB0(function) \
{ \
mp_ptr wp; \
unsigned i; \
double t; \
mp_limb_t r; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
r = s->r != 0 ? s->r : MP_BASES_BIG_BASE_10; \
r &= ~GMP_NUMB_HIGHBIT; \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, s->xp, s->yp, s->size, r, r); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */
#define SPEED_ROUTINE_MPN_MUL(function) \
{ \
mp_ptr wp; \
mp_size_t size1; \
unsigned i; \
double t; \
TMP_DECL; \
\
size1 = (s->r == 0 ? s->size : s->r); \
if (size1 < 0 ) size1 = -size1 - s->size; \
\
SPEED_RESTRICT_COND (size1 >= 1 ); \
SPEED_RESTRICT_COND (s->size >= size1); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, s->yp, size1); \
speed_operand_dst (s, wp, size1 + s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, s->xp, s->size, s->yp, size1); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MUL_N_CALL(call) \
{ \
mp_ptr wp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, 2 *s->size, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, wp, 2 *s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MUL_N(function) \
SPEED_ROUTINE_MPN_MUL_N_CALL (function (wp, s->xp, s->yp, s->size));
#define SPEED_ROUTINE_MPN_MULLO_N_CALL(call) \
{ \
mp_ptr wp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MULLO_N(function) \
SPEED_ROUTINE_MPN_MULLO_N_CALL (function (wp, s->xp, s->yp, s->size));
#define SPEED_ROUTINE_MPN_MULLO_BASECASE(function) \
SPEED_ROUTINE_MPN_MULLO_N_CALL (function (wp, s->xp, s->yp, s->size));
#define SPEED_ROUTINE_MPN_SQRLO(function) \
{ \
mp_ptr wp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, s->xp, s->size); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* For mpn_mulmid, mpn_mulmid_basecase, xsize=r, ysize=s->size. */
#define SPEED_ROUTINE_MPN_MULMID(function) \
{ \
mp_ptr wp, xp; \
mp_size_t size1; \
unsigned i; \
double t; \
TMP_DECL; \
\
size1 = (s->r == 0 ? (2 * s->size - 1 ) : s->r); \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
SPEED_RESTRICT_COND (size1 >= s->size); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3 , s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
\
speed_operand_src (s, xp, size1); \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, wp, size1 - s->size + 3 ); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, xp, size1, s->yp, s->size); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MULMID_N(function) \
{ \
mp_ptr wp, xp; \
mp_size_t size1; \
unsigned i; \
double t; \
TMP_DECL; \
\
size1 = 2 * s->size - 1 ; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3 , s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
\
speed_operand_src (s, xp, size1); \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, wp, size1 - s->size + 3 ); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, xp, s->yp, s->size); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_TOOM42_MULMID(function) \
{ \
mp_ptr wp, xp, scratch; \
mp_size_t size1, scratch_size; \
unsigned i; \
double t; \
TMP_DECL; \
\
size1 = 2 * s->size - 1 ; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3 , s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
scratch_size = mpn_toom42_mulmid_itch (s->size); \
SPEED_TMP_ALLOC_LIMBS (scratch, scratch_size, 0 ); \
\
speed_operand_src (s, xp, size1); \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, wp, size1 - s->size + 3 ); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, xp, s->yp, s->size, scratch); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL(call) \
{ \
mp_ptr wp, tp; \
unsigned i; \
double t; \
mp_size_t itch; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
itch = mpn_mulmod_bnm1_itch (s->size, s->size, s->size); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, wp, 2 * s->size); \
speed_operand_dst (s, tp, itch); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED(function) \
{ \
mp_ptr wp, tp; \
unsigned i; \
double t; \
mp_size_t size, itch; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
size = mpn_mulmod_bnm1_next_size (s->size); \
itch = mpn_mulmod_bnm1_itch (size, size, size); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, wp, size); \
speed_operand_dst (s, tp, itch); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, size, s->xp, s->size, s->yp, s->size, tp); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#ifndef MOD_BKNP1_USE11
#define MOD_BKNP1_USE11 0
#endif
#ifndef MOD_BKNP1_ONLY3
#define MOD_BKNP1_ONLY3 0
#endif
#define SPEED_ROUTINE_MPN_MULMOD_BNP1_CALL(call,use_r) \
{ \
mp_ptr wp, tp; \
unsigned i, k; \
double t; \
mp_size_t itch, nk; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
SPEED_RESTRICT_COND (!use_r || (s->r == 0 ) || \
(s->r == 3 ) || (s->r == 5 ) || (s->r == 7 ) || \
(s->r == 13 ) || (s->r == 17 ) || \
((MOD_BKNP1_USE11) && (s->r == 11 ))); \
\
if (!use_r || (s->r < 2 )) \
{ \
if (s->size % 3 == 0 ) {nk = s->size / (k = 3 );} \
else if (s->size % 5 == 0 ) {nk = s->size / (k = 5 );} \
else if (s->size % 7 == 0 ) {nk = s->size / (k = 7 );} \
else if (s->size % 11 == 0 ) {nk = s->size / (k = 11 );} \
else if (s->size % 13 == 0 ) {nk = s->size / (k = 13 );} \
else if (s->size % 17 == 0 ) {nk = s->size / (k = 17 );} \
else nk = s->size / (k = 1 ); \
} \
else nk = s->size / (k = s->r); \
\
if (MOD_BKNP1_ONLY3) \
k = 3 ; \
SPEED_RESTRICT_COND ((!use_r || (k > 2 )) && (s->size == k * nk)); \
SPEED_RESTRICT_COND ((GMP_NUMB_MAX % k == 0 ) || (nk % 3 != 0 ) || \
((MOD_BKNP1_USE11) && (k == 11 ))); \
\
itch = mpn_mulmod_bknp1_itch (s->size); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size + 2 , s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \
\
s->xp [s->size] &= 1 ; \
s->yp [s->size] &= 1 ; \
speed_operand_src (s, s->xp, s->size + 1 ); \
speed_operand_src (s, s->yp, s->size + 1 ); \
speed_operand_dst (s, wp, 2 * s->size + 2 ); \
speed_operand_dst (s, tp, itch); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize) \
{ \
mp_ptr wp, tspace; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= minsize); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, 2 *s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, wp, 2 *s->size); \
speed_operand_dst (s, tspace, tsize); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_TOOM22_MUL_N(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, s->size, tspace), \
mpn_toom22_mul_itch (s->size, s->size), \
MPN_TOOM22_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM33_MUL_N(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, s->size, tspace), \
mpn_toom33_mul_itch (s->size, s->size), \
MPN_TOOM33_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM44_MUL_N(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, s->size, tspace), \
mpn_toom44_mul_itch (s->size, s->size), \
MPN_TOOM44_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, s->size, tspace), \
mpn_toom6h_mul_itch (s->size, s->size), \
MPN_TOOM6H_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM8H_MUL_N(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, s->size, tspace), \
mpn_toom8h_mul_itch (s->size, s->size), \
MPN_TOOM8H_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM32_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, 2 *s->size/3 , tspace), \
mpn_toom32_mul_itch (s->size, 2 *s->size/3 ), \
MPN_TOOM32_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM42_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, s->size/2 , tspace), \
mpn_toom42_mul_itch (s->size, s->size/2 ), \
MPN_TOOM42_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM43_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, s->size*3 /4 , tspace), \
mpn_toom43_mul_itch (s->size, s->size*3 /4 ), \
MPN_TOOM43_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM63_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, s->size/2 , tspace), \
mpn_toom63_mul_itch (s->size, s->size/2 ), \
MPN_TOOM63_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, 17 *s->size/24 , tspace), \
mpn_toom32_mul_itch (s->size, 17 *s->size/24 ), \
MPN_TOOM32_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, 17 *s->size/24 , tspace), \
mpn_toom43_mul_itch (s->size, 17 *s->size/24 ), \
MPN_TOOM43_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, 19 *s->size/30 , tspace), \
mpn_toom32_mul_itch (s->size, 19 *s->size/30 ), \
MPN_TOOM32_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, 19 *s->size/30 , tspace), \
mpn_toom53_mul_itch (s->size, 19 *s->size/30 ), \
MPN_TOOM53_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, 11 *s->size/20 , tspace), \
mpn_toom42_mul_itch (s->size, 11 *s->size/20 ), \
MPN_TOOM42_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, 11 *s->size/20 , tspace), \
mpn_toom53_mul_itch (s->size, 11 *s->size/20 ), \
MPN_TOOM53_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, 5 *s->size/6 , tspace), \
mpn_toom42_mul_itch (s->size, 5 *s->size/6 ), \
MPN_TOOM54_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
(function (wp, s->xp, s->size, s->yp, 5 *s->size/6 , tspace), \
mpn_toom54_mul_itch (s->size, 5 *s->size/6 ), \
MPN_TOOM54_MUL_MINSIZE)
#define SPEED_ROUTINE_MPN_SQR_CALL(call) \
{ \
mp_ptr wp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, 2 *s->size, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, 2 *s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_SQR(function) \
SPEED_ROUTINE_MPN_SQR_CALL (function (wp, s->xp, s->size))
#define SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL(call) \
{ \
mp_ptr wp, tp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 2 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, tp, 2 * s->size); \
speed_operand_dst (s, wp, 2 * s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime () / 2 ; \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_SQR_TSPACE(call, tsize, minsize) \
{ \
mp_ptr wp, tspace; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= minsize); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, 2 *s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tspace, tsize, s->align_wp2); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, 2 *s->size); \
speed_operand_dst (s, tspace, tsize); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_TOOM2_SQR(function) \
SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
mpn_toom2_sqr_itch (s->size), \
MPN_TOOM2_SQR_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM3_SQR(function) \
SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
mpn_toom3_sqr_itch (s->size), \
MPN_TOOM3_SQR_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM4_SQR(function) \
SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
mpn_toom4_sqr_itch (s->size), \
MPN_TOOM4_SQR_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM6_SQR(function) \
SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
mpn_toom6_sqr_itch (s->size), \
MPN_TOOM6_SQR_MINSIZE)
#define SPEED_ROUTINE_MPN_TOOM8_SQR(function) \
SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
mpn_toom8_sqr_itch (s->size), \
MPN_TOOM8_SQR_MINSIZE)
#define SPEED_ROUTINE_MPN_MOD_CALL(call) \
{ \
unsigned i; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
\
speed_operand_src (s, s->xp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
\
return speed_endtime (); \
}
#define SPEED_ROUTINE_MPN_MOD_1(function) \
SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size, s->r))
#define SPEED_ROUTINE_MPN_MOD_1C(function) \
SPEED_ROUTINE_MPN_MOD_CALL ((*function)(s->xp, s->size, s->r, CNST_LIMB(0 )))
#define SPEED_ROUTINE_MPN_MODEXACT_1_ODD(function) \
SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r));
#define SPEED_ROUTINE_MPN_MODEXACT_1C_ODD(function) \
SPEED_ROUTINE_MPN_MOD_CALL (function (s->xp, s->size, s->r, CNST_LIMB(0 )));
#define SPEED_ROUTINE_MPN_MOD_34LSUB1(function) \
SPEED_ROUTINE_MPN_MOD_CALL ((*function) (s->xp, s->size))
#define SPEED_ROUTINE_MPN_PREINV_MOD_1(function) \
{ \
unsigned i; \
mp_limb_t inv; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
SPEED_RESTRICT_COND (s->r & GMP_LIMB_HIGHBIT); \
\
invert_limb (inv, s->r); \
speed_operand_src (s, s->xp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
(*function) (s->xp, s->size, s->r, inv); \
while (--i != 0 ); \
\
return speed_endtime (); \
}
#define SPEED_ROUTINE_MPN_MOD_1_1(function,pfunc) \
{ \
unsigned i; \
mp_limb_t inv[4 ]; \
\
SPEED_RESTRICT_COND (s->size >= 2 ); \
\
mpn_mod_1_1p_cps (inv, s->r); \
speed_operand_src (s, s->xp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
pfunc (inv, s->r); \
function (s->xp, s->size, s->r << inv[1 ], inv); \
} while (--i != 0 ); \
\
return speed_endtime (); \
}
#define SPEED_ROUTINE_MPN_MOD_1_N(function,pfunc,N) \
{ \
unsigned i; \
mp_limb_t inv[N+3 ]; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
SPEED_RESTRICT_COND (s->r <= ~(mp_limb_t)0 / N); \
\
speed_operand_src (s, s->xp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
pfunc (inv, s->r); \
function (s->xp, s->size, s->r, inv); \
} while (--i != 0 ); \
\
return speed_endtime (); \
}
/* A division of 2*s->size by s->size limbs */
#define SPEED_ROUTINE_MPN_DC_DIVREM_CALL(call) \
{ \
unsigned i; \
mp_ptr a, d, q, r; \
double t; \
gmp_pi1_t dinv; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (a, 2 *s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (d, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (q, s->size+1 , s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (r, s->size, s->align_wp2); \
\
MPN_COPY (a, s->xp, s->size); \
MPN_COPY (a+s->size, s->xp, s->size); \
\
MPN_COPY (d, s->yp, s->size); \
\
/* normalize the data */ \
d[s->size-1 ] |= GMP_NUMB_HIGHBIT; \
a[2 *s->size-1 ] = d[s->size-1 ] - 1 ; \
\
invert_pi1 (dinv, d[s->size-1 ], d[s->size-2 ]); \
\
speed_operand_src (s, a, 2 *s->size); \
speed_operand_src (s, d, s->size); \
speed_operand_dst (s, q, s->size+1 ); \
speed_operand_dst (s, r, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* A remainder 2*s->size by s->size limbs */
#define SPEED_ROUTINE_MPZ_MOD(function) \
{ \
unsigned i; \
mpz_t a, d, r; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
mpz_init_set_n (d, s->yp, s->size); \
\
/* high part less than d, low part a duplicate copied in */ \
mpz_init_set_n (a, s->xp, s->size); \
mpz_mod (a, a, d); \
mpz_mul_2exp (a, a, GMP_LIMB_BITS * s->size); \
MPN_COPY (PTR(a), s->xp, s->size); \
\
mpz_init (r); \
\
speed_operand_src (s, PTR(a), SIZ(a)); \
speed_operand_src (s, PTR(d), SIZ(d)); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (r, a, d); \
while (--i != 0 ); \
return speed_endtime (); \
}
#define SPEED_ROUTINE_MPN_PI1_DIV(function, INV, DMIN, QMIN) \
{ \
unsigned i; \
mp_ptr dp, tp, ap, qp; \
gmp_pi1_t inv; \
double t; \
mp_size_t size1; \
TMP_DECL; \
\
size1 = (s->r == 0 ? 2 * s->size : s->r); \
\
SPEED_RESTRICT_COND (s->size >= DMIN); \
SPEED_RESTRICT_COND (size1 - s->size >= QMIN); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_wp2); \
\
/* we don't fill in dividend completely when size1 > s->size */ \
MPN_COPY (ap, s->xp, s->size); \
MPN_COPY (ap + size1 - s->size, s->xp, s->size); \
\
MPN_COPY (dp, s->yp, s->size); \
\
/* normalize the data */ \
dp[s->size-1 ] |= GMP_NUMB_HIGHBIT; \
ap[size1 - 1 ] = dp[s->size - 1 ] - 1 ; \
\
invert_pi1 (inv, dp[s->size-1 ], dp[s->size-2 ]); \
\
speed_operand_src (s, ap, size1); \
speed_operand_dst (s, tp, size1); \
speed_operand_src (s, dp, s->size); \
speed_operand_dst (s, qp, size1 - s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
MPN_COPY (tp, ap, size1); \
function (qp, tp, size1, dp, s->size, INV); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MU_DIV_Q(function,itchfn) \
{ \
unsigned i; \
mp_ptr dp, tp, qp, scratch; \
double t; \
mp_size_t itch; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 2 ); \
\
itch = itchfn (2 * s->size, s->size, 0 ); \
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
\
MPN_COPY (tp, s->xp, s->size); \
MPN_COPY (tp+s->size, s->xp, s->size); \
\
/* normalize the data */ \
dp[s->size-1 ] |= GMP_NUMB_HIGHBIT; \
tp[2 *s->size-1 ] = dp[s->size-1 ] - 1 ; \
\
speed_operand_dst (s, qp, s->size); \
speed_operand_src (s, tp, 2 * s->size); \
speed_operand_src (s, dp, s->size); \
speed_operand_dst (s, scratch, itch); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
function (qp, tp, 2 * s->size, dp, s->size, scratch); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MU_DIV_QR(function,itchfn) \
{ \
unsigned i; \
mp_ptr dp, tp, qp, rp, scratch; \
double t; \
mp_size_t size1, itch; \
TMP_DECL; \
\
size1 = (s->r == 0 ? 2 * s->size : s->r); \
\
SPEED_RESTRICT_COND (s->size >= 2 ); \
SPEED_RESTRICT_COND (size1 >= s->size); \
\
itch = itchfn (size1, s->size, 0 ); \
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \
\
/* we don't fill in dividend completely when size1 > s->size */ \
MPN_COPY (tp, s->xp, s->size); \
MPN_COPY (tp + size1 - s->size, s->xp, s->size); \
\
MPN_COPY (dp, s->yp, s->size); \
\
/* normalize the data */ \
dp[s->size-1 ] |= GMP_NUMB_HIGHBIT; \
tp[size1 - 1 ] = dp[s->size - 1 ] - 1 ; \
\
speed_operand_dst (s, qp, size1 - s->size); \
speed_operand_dst (s, rp, s->size); \
speed_operand_src (s, tp, size1); \
speed_operand_src (s, dp, s->size); \
speed_operand_dst (s, scratch, itch); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
function (qp, rp, tp, size1, dp, s->size, scratch); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MUPI_DIV_QR(function,itchfn) \
{ \
unsigned i; \
mp_ptr dp, tp, qp, rp, ip, scratch, tmp; \
double t; \
mp_size_t size1, itch; \
TMP_DECL; \
\
size1 = (s->r == 0 ? 2 * s->size : s->r); \
\
SPEED_RESTRICT_COND (s->size >= 2 ); \
SPEED_RESTRICT_COND (size1 >= s->size); \
\
itch = itchfn (size1, s->size, s->size); \
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \
SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_wp2); /* alignment? */ \
\
/* we don't fill in dividend completely when size1 > s->size */ \
MPN_COPY (tp, s->xp, s->size); \
MPN_COPY (tp + size1 - s->size, s->xp, s->size); \
\
MPN_COPY (dp, s->yp, s->size); \
\
/* normalize the data */ \
dp[s->size-1 ] |= GMP_NUMB_HIGHBIT; \
tp[size1 - 1 ] = dp[s->size-1 ] - 1 ; \
\
tmp = TMP_ALLOC_LIMBS (mpn_invert_itch (s->size)); \
mpn_invert (ip, dp, s->size, tmp); \
\
speed_operand_dst (s, qp, size1 - s->size); \
speed_operand_dst (s, rp, s->size); \
speed_operand_src (s, tp, size1); \
speed_operand_src (s, dp, s->size); \
speed_operand_src (s, ip, s->size); \
speed_operand_dst (s, scratch, itch); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
function (qp, rp, tp, size1, dp, s->size, ip, s->size, scratch); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_PI1_BDIV_QR(function) \
{ \
unsigned i; \
mp_ptr dp, tp, ap, qp; \
mp_limb_t inv; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ap, 2 *s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, 2 *s->size, s->align_wp2); \
\
MPN_COPY (ap, s->xp, s->size); \
MPN_COPY (ap+s->size, s->xp, s->size); \
\
/* divisor must be odd */ \
MPN_COPY (dp, s->yp, s->size); \
dp[0 ] |= 1 ; \
binvert_limb (inv, dp[0 ]); \
inv = -inv; \
\
speed_operand_src (s, ap, 2 *s->size); \
speed_operand_dst (s, tp, 2 *s->size); \
speed_operand_src (s, dp, s->size); \
speed_operand_dst (s, qp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
MPN_COPY (tp, ap, 2 *s->size); \
function (qp, tp, 2 *s->size, dp, s->size, inv); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_PI1_BDIV_Q(function) \
{ \
unsigned i; \
mp_ptr dp, tp, qp; \
mp_limb_t inv; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, s->size, s->align_wp2); \
\
/* divisor must be odd */ \
MPN_COPY (dp, s->yp, s->size); \
dp[0 ] |= 1 ; \
binvert_limb (inv, dp[0 ]); \
inv = -inv; \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, tp, s->size); \
speed_operand_src (s, dp, s->size); \
speed_operand_dst (s, qp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
MPN_COPY (tp, s->xp, s->size); \
function (qp, tp, s->size, dp, s->size, inv); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_PI1_BDIV_R(function) \
{ \
unsigned i; \
mp_ptr dp, tp, ap; \
mp_limb_t inv; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ap, 2 *s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (tp, 2 *s->size, s->align_wp2); \
\
MPN_COPY (ap, s->xp, s->size); \
MPN_COPY (ap+s->size, s->xp, s->size); \
\
/* divisor must be odd */ \
MPN_COPY (dp, s->yp, s->size); \
dp[0 ] |= 1 ; \
binvert_limb (inv, dp[0 ]); \
inv = -inv; \
\
speed_operand_src (s, ap, 2 *s->size); \
speed_operand_dst (s, tp, 2 *s->size); \
speed_operand_src (s, dp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
MPN_COPY (tp, ap, 2 *s->size); \
function (tp, 2 *s->size, dp, s->size, inv); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MU_BDIV_Q(function,itchfn) \
{ \
unsigned i; \
mp_ptr dp, qp, scratch; \
double t; \
mp_size_t itch; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 2 ); \
\
itch = itchfn (s->size, s->size); \
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
\
/* divisor must be odd */ \
MPN_COPY (dp, s->yp, s->size); \
dp[0 ] |= 1 ; \
\
speed_operand_dst (s, qp, s->size); \
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, dp, s->size); \
speed_operand_dst (s, scratch, itch); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
function (qp, s->xp, s->size, dp, s->size, scratch); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_MU_BDIV_QR(function,itchfn) \
{ \
unsigned i; \
mp_ptr dp, tp, qp, rp, scratch; \
double t; \
mp_size_t itch; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 2 ); \
\
itch = itchfn (2 * s->size, s->size); \
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \
\
MPN_COPY (tp, s->xp, s->size); \
MPN_COPY (tp+s->size, s->xp, s->size); \
\
/* divisor must be odd */ \
MPN_COPY (dp, s->yp, s->size); \
dp[0 ] |= 1 ; \
\
speed_operand_dst (s, qp, s->size); \
speed_operand_dst (s, rp, s->size); \
speed_operand_src (s, tp, 2 * s->size); \
speed_operand_src (s, dp, s->size); \
speed_operand_dst (s, scratch, itch); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
function (qp, rp, tp, 2 * s->size, dp, s->size, scratch); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_BROOT(function) \
{ \
SPEED_RESTRICT_COND (s->r & 1 ); \
s->xp[0 ] |= 1 ; \
SPEED_ROUTINE_MPN_UNARY_1_CALL \
((*function) (wp, s->xp, s->size, s->r)); \
}
#define SPEED_ROUTINE_MPN_BROOTINV(function, itch) \
{ \
mp_ptr wp, tp; \
unsigned i; \
double t; \
TMP_DECL; \
TMP_MARK; \
SPEED_RESTRICT_COND (s->size >= 1 ); \
SPEED_RESTRICT_COND (s->r & 1 ); \
wp = TMP_ALLOC_LIMBS (s->size); \
tp = TMP_ALLOC_LIMBS ( (itch)); \
s->xp[0 ] |= 1 ; \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
(*function) (wp, s->xp, s->size, s->r, tp); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_INVERT(function,itchfn) \
{ \
long i; \
mp_ptr up, tp, ip; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
\
MPN_COPY (up, s->xp, s->size); \
\
/* normalize the data */ \
up[s->size-1 ] |= GMP_NUMB_HIGHBIT; \
\
speed_operand_src (s, up, s->size); \
speed_operand_dst (s, tp, s->size); \
speed_operand_dst (s, ip, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (ip, up, s->size, tp); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_INVERTAPPR(function,itchfn) \
{ \
long i; \
mp_ptr up, tp, ip; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
\
MPN_COPY (up, s->xp, s->size); \
\
/* normalize the data */ \
up[s->size-1 ] |= GMP_NUMB_HIGHBIT; \
\
speed_operand_src (s, up, s->size); \
speed_operand_dst (s, tp, s->size); \
speed_operand_dst (s, ip, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (ip, up, s->size, tp); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_NI_INVERTAPPR(function,itchfn) \
{ \
long i; \
mp_ptr up, tp, ip; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 3 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
\
MPN_COPY (up, s->xp, s->size); \
\
/* normalize the data */ \
up[s->size-1 ] |= GMP_NUMB_HIGHBIT; \
\
speed_operand_src (s, up, s->size); \
speed_operand_dst (s, tp, s->size); \
speed_operand_dst (s, ip, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (ip, up, s->size, tp); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_BINVERT(function,itchfn) \
{ \
long i; \
mp_ptr up, tp, ip; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
\
MPN_COPY (up, s->xp, s->size); \
\
/* normalize the data */ \
up[0 ] |= 1 ; \
\
speed_operand_src (s, up, s->size); \
speed_operand_dst (s, tp, s->size); \
speed_operand_dst (s, ip, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (ip, up, s->size, tp); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_SEC_INVERT(function,itchfn) \
{ \
long i; \
mp_ptr up, mp, tp, ip; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
\
speed_operand_src (s, up, s->size); \
speed_operand_dst (s, tp, s->size); \
speed_operand_dst (s, ip, s->size); \
speed_cache_fill (s); \
\
MPN_COPY (mp, s->yp, s->size); \
/* Must be odd */ \
mp[0 ] |= 1 ; \
speed_starttime (); \
i = s->reps; \
do \
{ \
MPN_COPY (up, s->xp, s->size); \
function (ip, up, mp, s->size, 2 *s->size*GMP_NUMB_BITS, tp); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_REDC_1(function) \
{ \
unsigned i; \
mp_ptr cp, mp, tp, ap; \
mp_limb_t inv; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ap, 2 *s->size+1 , s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, 2 *s->size+1 , s->align_wp2); \
\
MPN_COPY (ap, s->xp, s->size); \
MPN_COPY (ap+s->size, s->xp, s->size); \
\
/* modulus must be odd */ \
MPN_COPY (mp, s->yp, s->size); \
mp[0 ] |= 1 ; \
binvert_limb (inv, mp[0 ]); \
inv = -inv; \
\
speed_operand_src (s, ap, 2 *s->size+1 ); \
speed_operand_dst (s, tp, 2 *s->size+1 ); \
speed_operand_src (s, mp, s->size); \
speed_operand_dst (s, cp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
MPN_COPY (tp, ap, 2 *s->size); \
function (cp, tp, mp, s->size, inv); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_REDC_2(function) \
{ \
unsigned i; \
mp_ptr cp, mp, tp, ap; \
mp_limb_t invp[2 ]; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ap, 2 *s->size+1 , s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, 2 *s->size+1 , s->align_wp2); \
\
MPN_COPY (ap, s->xp, s->size); \
MPN_COPY (ap+s->size, s->xp, s->size); \
\
/* modulus must be odd */ \
MPN_COPY (mp, s->yp, s->size); \
mp[0 ] |= 1 ; \
mpn_binvert (invp, mp, 2 , tp); \
invp[0 ] = -invp[0 ]; invp[1 ] = ~invp[1 ]; \
\
speed_operand_src (s, ap, 2 *s->size+1 ); \
speed_operand_dst (s, tp, 2 *s->size+1 ); \
speed_operand_src (s, mp, s->size); \
speed_operand_dst (s, cp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
MPN_COPY (tp, ap, 2 *s->size); \
function (cp, tp, mp, s->size, invp); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_REDC_N(function) \
{ \
unsigned i; \
mp_ptr cp, mp, tp, ap, invp; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size > 8 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ap, 2 *s->size+1 , s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, 2 *s->size+1 , s->align_wp2); \
SPEED_TMP_ALLOC_LIMBS (invp, s->size, s->align_wp2); /* align? */ \
\
MPN_COPY (ap, s->xp, s->size); \
MPN_COPY (ap+s->size, s->xp, s->size); \
\
/* modulus must be odd */ \
MPN_COPY (mp, s->yp, s->size); \
mp[0 ] |= 1 ; \
mpn_binvert (invp, mp, s->size, tp); \
\
speed_operand_src (s, ap, 2 *s->size+1 ); \
speed_operand_dst (s, tp, 2 *s->size+1 ); \
speed_operand_src (s, mp, s->size); \
speed_operand_dst (s, cp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
MPN_COPY (tp, ap, 2 *s->size); \
function (cp, tp, mp, s->size, invp); \
} while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_POPCOUNT(function) \
{ \
unsigned i; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
speed_operand_src (s, s->xp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (s->xp, s->size); \
while (--i != 0 ); \
\
return speed_endtime (); \
}
#define SPEED_ROUTINE_MPN_HAMDIST(function) \
{ \
unsigned i; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, s->yp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (s->xp, s->yp, s->size); \
while (--i != 0 ); \
\
return speed_endtime (); \
}
#define SPEED_ROUTINE_MPZ_UI(function) \
{ \
mpz_t z; \
unsigned i; \
double t; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
\
mpz_init (z); \
\
speed_starttime (); \
i = s->reps; \
do \
function (z, s->size); \
while (--i != 0 ); \
t = speed_endtime (); \
\
mpz_clear (z); \
return t; \
}
#define SPEED_ROUTINE_MPZ_FAC_UI(function) SPEED_ROUTINE_MPZ_UI(function)
#define SPEED_ROUTINE_MPZ_FIB_UI(function) SPEED_ROUTINE_MPZ_UI(function)
#define SPEED_ROUTINE_MPZ_LUCNUM_UI(function) SPEED_ROUTINE_MPZ_UI(function)
#define SPEED_ROUTINE_MPZ_UNARY_1(function) \
{ \
mpz_t z, a; \
unsigned i; \
mp_limb_t ls; \
double t; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
\
mpz_init (z); \
ls = s->size; \
mpz_roinit_n (a, &ls, s->size != 0 ); \
\
if (s->r < 2 ) \
{ \
speed_starttime (); \
i = s->reps; \
do \
function (z, a); \
while (--i != 0 ); \
t = speed_endtime (); \
} \
else \
{ \
speed_starttime (); \
i = s->reps; \
do \
{ \
int j = s->r; \
mpz_set (z, a); \
do \
{ \
function (z, z); \
} \
while (--j != 0 ); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
s->time_divisor = s->r; \
} \
\
mpz_clear (z); \
return t; \
}
#define SPEED_ROUTINE_MPZ_2_UI(function) \
{ \
mpz_t z, z2; \
unsigned i; \
double t; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
\
mpz_init (z); \
mpz_init (z2); \
\
speed_starttime (); \
i = s->reps; \
do \
function (z, z2, s->size); \
while (--i != 0 ); \
t = speed_endtime (); \
\
mpz_clear (z); \
mpz_clear (z2); \
return t; \
}
#define SPEED_ROUTINE_MPZ_FIB2_UI(function) SPEED_ROUTINE_MPZ_2_UI(function)
#define SPEED_ROUTINE_MPZ_LUCNUM2_UI(function) SPEED_ROUTINE_MPZ_2_UI(function)
#define SPEED_ROUTINE_MPN_FIB2_UI(function) \
{ \
mp_ptr fp, f1p; \
mp_size_t alloc; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
\
TMP_MARK; \
alloc = MPN_FIB2_SIZE (s->size); \
SPEED_TMP_ALLOC_LIMBS (fp, alloc, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (f1p, alloc, s->align_yp); \
\
speed_starttime (); \
i = s->reps; \
do \
function (fp, f1p, s->size); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* Calculate b^e mod m for random b and m of s->size limbs and random e of 6
limbs . m is forced to odd so that redc can be used . e is limited in
size so the calculation doesn't take too long. */
#define SPEED_ROUTINE_MPZ_POWM(function) \
{ \
mpz_t r, b, e, m; \
unsigned i; \
double t; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
mpz_init (r); \
if (s->r < 2 ) \
mpz_init_set_n (b, s->xp, s->size); \
else \
mpz_init_set_ui (b, s->r); \
mpz_init_set_n (m, s->yp, s->size); \
mpz_setbit (m, 0 ); /* force m to odd */ \
mpz_init_set_n (e, s->xp_block, 6 ); \
\
speed_starttime (); \
i = s->reps; \
do \
function (r, b, e, m); \
while (--i != 0 ); \
t = speed_endtime (); \
\
mpz_clear (r); \
mpz_clear (b); \
mpz_clear (e); \
mpz_clear (m); \
return t; \
}
/* (m-2)^0xAAAAAAAA mod m */
#define SPEED_ROUTINE_MPZ_POWM_UI(function) \
{ \
mpz_t r, b, m; \
unsigned long e; \
unsigned i; \
double t; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
mpz_init (r); \
\
/* force m to odd */ \
mpz_init (m); \
mpz_set_n (m, s->xp, s->size); \
PTR(m)[0 ] |= 1 ; \
\
e = (~ (unsigned long ) 0 ) / 3 ; \
if (s->r != 0 ) \
e = s->r; \
\
mpz_init_set (b, m); \
mpz_sub_ui (b, b, 2 ); \
/* printf ("%X\n", mpz_get_ui(m)); */ \
i = s->reps; \
speed_starttime (); \
do \
function (r, b, e, m); \
while (--i != 0 ); \
t = speed_endtime (); \
\
mpz_clear (r); \
mpz_clear (b); \
mpz_clear (m); \
return t; \
}
#define SPEED_ROUTINE_MPN_ADDSUB_CALL(call) \
{ \
mp_ptr wp, wp2, xp, yp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2); \
xp = s->xp; \
yp = s->yp; \
\
if (s->r == 0 ) ; \
else if (s->r == 1 ) { xp = wp; } \
else if (s->r == 2 ) { yp = wp2; } \
else if (s->r == 3 ) { xp = wp; yp = wp2; } \
else if (s->r == 4 ) { xp = wp2; yp = wp; } \
else { \
TMP_FREE; \
return -1 .0 ; \
} \
if (xp != s->xp) MPN_COPY (xp, s->xp, s->size); \
if (yp != s->yp) MPN_COPY (yp, s->yp, s->size); \
\
speed_operand_src (s, xp, s->size); \
speed_operand_src (s, yp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_operand_dst (s, wp2, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_ADDSUB_N(function) \
SPEED_ROUTINE_MPN_ADDSUB_CALL \
(function (wp, wp2, xp, yp, s->size));
#define SPEED_ROUTINE_MPN_ADDSUB_NC(function) \
SPEED_ROUTINE_MPN_ADDSUB_CALL \
(function (wp, wp2, xp, yp, s->size, 0 ));
/* Doing an Nx1 gcd with the given r. */
#define SPEED_ROUTINE_MPN_GCD_1N(function) \
{ \
mp_ptr xp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
SPEED_RESTRICT_COND (s->r != 0 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp); \
MPN_COPY (xp, s->xp, s->size); \
xp[0 ] |= refmpn_zero_p (xp, s->size); \
\
speed_operand_src (s, s->xp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (xp, s->size, s->r); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* SPEED_BLOCK_SIZE many one GCDs of s->size bits each. */
#define SPEED_ROUTINE_MPN_GCD_1_CALL(setup, call) \
{ \
unsigned i, j; \
mp_ptr px, py; \
mp_limb_t x_mask, y_mask; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
SPEED_RESTRICT_COND (s->size <= mp_bits_per_limb); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (px, SPEED_BLOCK_SIZE, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (py, SPEED_BLOCK_SIZE, s->align_yp); \
MPN_COPY (px, s->xp_block, SPEED_BLOCK_SIZE); \
MPN_COPY (py, s->yp_block, SPEED_BLOCK_SIZE); \
\
x_mask = MP_LIMB_T_LOWBITMASK (s->size); \
y_mask = MP_LIMB_T_LOWBITMASK (s->r != 0 ? s->r : s->size); \
for (i = 0 ; i < SPEED_BLOCK_SIZE; i++) \
{ \
px[i] &= x_mask; px[i] += (px[i] == 0 ); \
py[i] &= y_mask; py[i] += (py[i] == 0 ); \
setup; \
} \
\
speed_operand_src (s, px, SPEED_BLOCK_SIZE); \
speed_operand_src (s, py, SPEED_BLOCK_SIZE); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
{ \
j = SPEED_BLOCK_SIZE; \
do \
{ \
call; \
} \
while (--j != 0 ); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
\
s->time_divisor = SPEED_BLOCK_SIZE; \
return t; \
}
#define SPEED_ROUTINE_MPN_GCD_1(function) \
SPEED_ROUTINE_MPN_GCD_1_CALL( , function (&px[j-1 ], 1 , py[j-1 ]))
#define SPEED_ROUTINE_MPN_GCD_11(function) \
SPEED_ROUTINE_MPN_GCD_1_CALL((px[i] |= 1 , py[i] |= 1 ), \
function (px[j-1 ], py[j-1 ]))
/* Multiply limbs by (B+1). Then we get a gcd exceeding one limb, so
we can measure gcd_22 loop only, without gcd_11. */
#define SPEED_ROUTINE_MPN_GCD_22(function) \
SPEED_ROUTINE_MPN_GCD_1_CALL((px[i] |= 1 , py[i] |= 1 ), \
function (px[j-1 ], px[j-1 ], py[j-1 ], py[j-1 ]))
#define SPEED_ROUTINE_MPN_JACBASE(function) \
SPEED_ROUTINE_MPN_GCD_1_CALL \
({ \
/* require x<y, y odd, y!=1 */ \
px[i] %= py[i]; \
px[i] |= 1 ; \
py[i] |= 1 ; \
if (py[i]==1 ) py[i]=3 ; \
}, \
function (px[j-1 ], py[j-1 ], 0 ))
#define SPEED_ROUTINE_MPN_HGCD2(function) \
{ \
unsigned i, j; \
struct hgcd_matrix1 m = {{{0 ,0 },{0 ,0 }}}; \
double t; \
\
speed_operand_src (s, s->xp_block, SPEED_BLOCK_SIZE); \
speed_operand_src (s, s->yp_block, SPEED_BLOCK_SIZE); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
mp_limb_t chain = 0 ; \
do \
{ \
for (j = 0 ; j < SPEED_BLOCK_SIZE; j+= 2 ) \
{ \
/* randomized but successively dependent */ \
function (s->xp_block[j] | GMP_NUMB_HIGHBIT, \
s->xp_block[j+1 ] + chain, \
s->yp_block[j] | GMP_NUMB_HIGHBIT, \
s->yp_block[j+1 ], &m); \
chain += m.u[0 ][0 ]; \
} \
} \
while (--i != 0 ); \
t = speed_endtime (); \
\
/* make sure the compiler won't optimize away chain */ \
noop_1 (chain); \
\
s->time_divisor = SPEED_BLOCK_SIZE / 2 ; \
return t; \
}
#define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc) \
{ \
mp_size_t hgcd_init_itch, hgcd_itch; \
mp_ptr ap, bp, wp, tmp1; \
struct hgcd_matrix hgcd; \
int res; \
unsigned i; \
double t; \
TMP_DECL; \
\
if (s->size < 2 ) \
return -1 ; \
\
TMP_MARK; \
\
SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1 , s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1 , s->align_yp); \
\
s->xp[s->size - 1 ] |= 1 ; \
s->yp[s->size - 1 ] |= 1 ; \
\
hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
hgcd_itch = itchfunc (s->size); \
\
SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, ap, s->size + 1 ); \
speed_operand_dst (s, bp, s->size + 1 ); \
speed_operand_dst (s, wp, hgcd_itch); \
speed_operand_dst (s, tmp1, hgcd_init_itch); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
{ \
MPN_COPY (ap, s->xp, s->size); \
MPN_COPY (bp, s->yp, s->size); \
mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
res = func (ap, bp, s->size, &hgcd, wp); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc) \
{ \
mp_size_t hgcd_init_itch, hgcd_step_itch; \
mp_ptr ap, bp, wp, tmp1; \
struct hgcd_matrix hgcd; \
mp_size_t p = s->size/2 ; \
int res; \
unsigned i; \
double t; \
TMP_DECL; \
\
if (s->size < 2 ) \
return -1 ; \
\
TMP_MARK; \
\
SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1 , s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1 , s->align_yp); \
\
s->xp[s->size - 1 ] |= 1 ; \
s->yp[s->size - 1 ] |= 1 ; \
\
hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
hgcd_step_itch = itchfunc (s->size, p); \
\
SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, ap, s->size + 1 ); \
speed_operand_dst (s, bp, s->size + 1 ); \
speed_operand_dst (s, wp, hgcd_step_itch); \
speed_operand_dst (s, tmp1, hgcd_init_itch); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
{ \
MPN_COPY (ap, s->xp, s->size); \
MPN_COPY (bp, s->yp, s->size); \
mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
res = func (&hgcd, ap, bp, s->size, p, wp); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
TMP_FREE; \
return t; \
}
/* Run some GCDs of s->size limbs each. The number of different data values
is decreased as s - > size * * 2 , since GCD is a quadratic algorithm .
SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
though, because the plain gcd is about twice as fast as gcdext. */
#define SPEED_ROUTINE_MPN_GCD_CALL(datafactor, call) \
{ \
unsigned i; \
mp_size_t j, pieces, psize; \
mp_ptr wp, wp2, xtmp, ytmp, px, py; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1 , s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1 , s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (wp, s->size+1 , s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (wp2, s->size+1 , s->align_wp2); \
\
pieces = SPEED_BLOCK_SIZE * datafactor / s->size / s->size; \
pieces = MIN (pieces, SPEED_BLOCK_SIZE / s->size); \
pieces = MAX (pieces, 1 ); \
\
psize = pieces * s->size; \
px = TMP_ALLOC_LIMBS (psize); \
py = TMP_ALLOC_LIMBS (psize); \
MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize); \
MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize); \
\
/* Requirements: x >= y, y must be odd, high limbs != 0. \
No need to ensure random numbers are really great. */ \
for (j = 0 ; j < pieces; j++) \
{ \
mp_ptr x = px + j * s->size; \
mp_ptr y = py + j * s->size; \
if (x[s->size - 1 ] == 0 ) x[s->size - 1 ] = 1 ; \
if (y[s->size - 1 ] == 0 ) y[s->size - 1 ] = 1 ; \
\
if (x[s->size - 1 ] < y[s->size - 1 ]) \
MP_LIMB_T_SWAP (x[s->size - 1 ], y[s->size - 1 ]); \
else if (x[s->size - 1 ] == y[s->size - 1 ]) \
{ \
x[s->size - 1 ] = 2 ; \
y[s->size - 1 ] = 1 ; \
} \
y[0 ] |= 1 ; \
} \
\
speed_operand_src (s, px, psize); \
speed_operand_src (s, py, psize); \
speed_operand_dst (s, xtmp, s->size); \
speed_operand_dst (s, ytmp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
{ \
j = pieces; \
do \
{ \
MPN_COPY (xtmp, px+(j - 1 )*s->size, s->size); \
MPN_COPY (ytmp, py+(j - 1 )*s->size, s->size); \
call; \
} \
while (--j != 0 ); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
\
s->time_divisor = pieces; \
return t; \
}
#define SPEED_ROUTINE_MPN_GCD(function) \
SPEED_ROUTINE_MPN_GCD_CALL (8 , function (wp, xtmp, s->size, ytmp, s->size))
#define SPEED_ROUTINE_MPN_GCDEXT(function) \
SPEED_ROUTINE_MPN_GCD_CALL \
(4 , { mp_size_t wp2size; \
function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size); })
#define SPEED_ROUTINE_MPN_GCDEXT_ONE(function) \
{ \
unsigned i; \
mp_size_t j, pieces, psize, wp2size; \
mp_ptr wp, wp2, xtmp, ytmp, px, py; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
\
SPEED_TMP_ALLOC_LIMBS (xtmp, s->size+1 , s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (ytmp, s->size+1 , s->align_yp); \
MPN_COPY (xtmp, s->xp, s->size); \
MPN_COPY (ytmp, s->yp, s->size); \
\
SPEED_TMP_ALLOC_LIMBS (wp, s->size+1 , s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (wp2, s->size+1 , s->align_wp2); \
\
pieces = SPEED_BLOCK_SIZE / 3 ; \
psize = 3 * pieces; \
px = TMP_ALLOC_LIMBS (psize); \
py = TMP_ALLOC_LIMBS (psize); \
MPN_COPY (px, s->xp_block, psize); \
MPN_COPY (py, s->yp_block, psize); \
\
/* x must have at least as many bits as y, \
high limbs must be non-zero */ \
for (j = 0 ; j < pieces; j++) \
{ \
mp_ptr x = px+3 *j; \
mp_ptr y = py+3 *j; \
x[2 ] += (x[2 ] == 0 ); \
y[2 ] += (y[2 ] == 0 ); \
if (x[2 ] < y[2 ]) \
MP_LIMB_T_SWAP (x[2 ], y[2 ]); \
} \
\
speed_operand_src (s, px, psize); \
speed_operand_src (s, py, psize); \
speed_operand_dst (s, xtmp, s->size); \
speed_operand_dst (s, ytmp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
{ \
mp_ptr x = px; \
mp_ptr y = py; \
mp_ptr xth = &xtmp[s->size-3 ]; \
mp_ptr yth = &ytmp[s->size-3 ]; \
j = pieces; \
do \
{ \
xth[0 ] = x[0 ], xth[1 ] = x[1 ], xth[2 ] = x[2 ]; \
yth[0 ] = y[0 ], yth[1 ] = y[1 ], yth[2 ] = y[2 ]; \
\
ytmp[0 ] |= 1 ; /* y must be odd, */ \
\
function (wp, wp2, &wp2size, xtmp, s->size, ytmp, s->size); \
\
x += 3 ; \
y += 3 ; \
} \
while (--j != 0 ); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
\
s->time_divisor = pieces; \
return t; \
}
#define SPEED_ROUTINE_GMP_PRIMESIEVE(function) \
{ \
mp_ptr wp; \
unsigned i; \
double t; \
mp_limb_t a = s->size * GMP_LIMB_BITS * 3 ; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, a); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* Calculate nextprime(n) for random n of s->size bits (not limbs). */
#define SPEED_ROUTINE_MPZ_NEXTPRIME(function) \
{ \
unsigned i, j; \
mpz_t wp, n; \
double t; \
\
SPEED_RESTRICT_COND (s->size >= 10 ); \
\
mpz_init (wp); \
mpz_init_set_n (n, s->xp, s->size); \
/* limit to s->size bits, as this function is very slow */ \
mpz_tdiv_r_2exp (n, n, s->size); \
/* set high bits so operand and result are genaral s->size bits */ \
mpz_setbit (n, s->size - 1 ); \
mpz_clrbit (n, s->size - 2 ); \
\
speed_starttime (); \
i = s->reps; \
do \
{ \
/* nextprime timing is variable, so average over many calls */ \
j = SPEED_BLOCK_SIZE - 1 ; \
/* starts on random, after measures prime to next prime */ \
function (wp, n); \
do \
{ \
function (wp, wp); \
} \
while (--j != 0 ); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
\
mpz_clear (wp); \
mpz_clear (n); \
\
s->time_divisor = SPEED_BLOCK_SIZE; \
return t; \
}
#define SPEED_ROUTINE_MPZ_JACOBI(function) \
{ \
mpz_t a, b; \
unsigned i; \
mp_size_t j, pieces, psize; \
mp_ptr px, py; \
double t; \
TMP_DECL; \
\
TMP_MARK; \
pieces = SPEED_BLOCK_SIZE / MAX (s->size, 1 ); \
pieces = MAX (pieces, 1 ); \
s->time_divisor = pieces; \
\
psize = pieces * s->size; \
px = TMP_ALLOC_LIMBS (psize); \
py = TMP_ALLOC_LIMBS (psize); \
MPN_COPY (px, pieces==1 ? s->xp : s->xp_block, psize); \
MPN_COPY (py, pieces==1 ? s->yp : s->yp_block, psize); \
\
for (j = 0 ; j < pieces; j++) \
{ \
mp_ptr x = px+j*s->size; \
mp_ptr y = py+j*s->size; \
\
/* y odd */ \
y[0 ] |= 1 ; \
\
/* high limbs non-zero */ \
if (x[s->size-1 ] == 0 ) x[s->size-1 ] = 1 ; \
if (y[s->size-1 ] == 0 ) y[s->size-1 ] = 1 ; \
} \
\
SIZ(a) = s->size; \
SIZ(b) = s->size; \
\
speed_operand_src (s, px, psize); \
speed_operand_src (s, py, psize); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
{ \
j = pieces; \
do \
{ \
PTR(a) = px+(j-1 )*s->size; \
PTR(b) = py+(j-1 )*s->size; \
function (a, b); \
} \
while (--j != 0 ); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_DIVREM_2(function) \
{ \
mp_ptr wp, xp; \
mp_limb_t yp[2 ]; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 2 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (xp, s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
/* source is destroyed */ \
MPN_COPY (xp, s->xp, s->size); \
\
/* divisor must be normalized */ \
MPN_COPY (yp, s->yp_block, 2 ); \
yp[1 ] |= GMP_NUMB_HIGHBIT; \
\
speed_operand_src (s, xp, s->size); \
speed_operand_src (s, yp, 2 ); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, 0 , xp, s->size, yp); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_DIV_QR_1(function) \
{ \
mp_ptr wp, xp; \
mp_limb_t d; \
mp_limb_t r; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
d = s->r; \
if (d == 0 ) \
d = 1 ; \
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
r = function (wp, wp+s->size-1 , s->xp, s->size, d); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_DIV_QR_1N_PI1(function) \
{ \
mp_ptr wp, xp; \
mp_limb_t d, dinv; \
mp_limb_t r; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
d = s->r; \
/* divisor must be normalized */ \
SPEED_RESTRICT_COND (d & GMP_NUMB_HIGHBIT); \
invert_limb (dinv, d); \
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
r = function (wp, s->xp, s->size, 0 , d, dinv); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_DIV_QR_2(function, norm) \
{ \
mp_ptr wp, xp; \
mp_limb_t yp[2 ]; \
mp_limb_t rp[2 ]; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 2 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
\
/* divisor must be normalized */ \
MPN_COPY (yp, s->yp_block, 2 ); \
if (norm) \
yp[1 ] |= GMP_NUMB_HIGHBIT; \
else \
{ \
yp[1 ] &= ~GMP_NUMB_HIGHBIT; \
if (yp[1 ] == 0 ) \
yp[1 ] = 1 ; \
} \
speed_operand_src (s, s->xp, s->size); \
speed_operand_src (s, yp, 2 ); \
speed_operand_dst (s, wp, s->size); \
speed_operand_dst (s, rp, 2 ); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
function (wp, rp, s->xp, s->size, yp); \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MODLIMB_INVERT(function) \
{ \
unsigned i, j; \
mp_ptr xp; \
mp_limb_t n = 1 ; \
double t; \
\
xp = s->xp_block-1 ; \
\
speed_operand_src (s, s->xp_block, SPEED_BLOCK_SIZE); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
{ \
j = SPEED_BLOCK_SIZE; \
do \
{ \
/* randomized but successively dependent */ \
n += (xp[j] << 1 ); \
\
function (n, n); \
} \
while (--j != 0 ); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
\
/* make sure the compiler won't optimize away n */ \
noop_1 (n); \
\
s->time_divisor = SPEED_BLOCK_SIZE; \
return t; \
}
#define SPEED_ROUTINE_MPN_SQRTROOT_CALL(call) \
{ \
mp_ptr wp, wp2; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (wp2, s->size, s->align_wp2); \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, wp, s->size); \
speed_operand_dst (s, wp2, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* Calculate worst case for perfect_power
Worst case is multiple prime factors larger than trial div limit. */
#define SPEED_ROUTINE_MPN_PERFECT_POWER(function) \
{ \
mpz_t r; \
unsigned i, power; \
double t; \
\
SPEED_RESTRICT_COND (s->size >= 10 ); \
\
mpz_init (r); \
power = s->size * GMP_NUMB_BITS / 17 ; \
mpz_ui_pow_ui(r, (1 << 17 ) - 1 , power - 1 ); \
mpz_mul_ui(r, r, (1 << 16 ) + 1 ); /* larger than 1000th prime */ \
\
speed_starttime (); \
i = s->reps; \
do \
function (PTR(r), SIZ(r)); \
while (--i != 0 ); \
t = speed_endtime (); \
\
mpz_clear (r); \
return t; \
}
/* Calculate worst case (larger prime) for perfect_square */
#define SPEED_ROUTINE_MPN_PERFECT_SQUARE(function) \
{ \
mpz_t r; \
unsigned i; \
double t; \
\
SPEED_RESTRICT_COND (s->size >= 2 ); \
mpz_init_set_n (r, s->xp, s->size / 2 ); \
mpz_setbit (r, s->size * GMP_NUMB_BITS / 2 - 1 ); \
mpz_mul (r, r, r); \
\
speed_starttime (); \
i = s->reps; \
do \
function (PTR(r), SIZ(r)); \
while (--i != 0 ); \
t = speed_endtime (); \
\
mpz_clear (r); \
return t; \
}
/* s->size controls the number of limbs in the input, s->r is the base, or
decimal by default. */
#define SPEED_ROUTINE_MPN_GET_STR(function) \
{ \
unsigned char *wp; \
mp_size_t wn; \
mp_ptr xp; \
int base; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
base = s->r == 0 ? 10 : s->r; \
SPEED_RESTRICT_COND (base >= 2 && base <= 256 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (xp, s->size + 1 , s->align_xp); \
\
MPN_SIZEINBASE (wn, s->xp, s->size, base); \
wp = (unsigned char *) TMP_ALLOC (wn); \
\
/* use this during development to guard against overflowing wp */ \
/* \
MPN_COPY ( xp , s - > xp , s - > size ) ; \
ASSERT_ALWAYS ( mpn_get_str ( wp , base , xp , s - > size ) < = wn ) ; \
*/ \
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, xp, s->size); \
speed_operand_dst (s, (mp_ptr) wp, wn/GMP_LIMB_BYTES); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
{ \
MPN_COPY (xp, s->xp, s->size); \
function (wp, base, xp, s->size); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* s->size controls the number of digits in the input, s->r is the base, or
decimal by default. */
#define SPEED_ROUTINE_MPN_SET_STR_CALL(call) \
{ \
unsigned char *xp; \
mp_ptr wp; \
mp_size_t wn; \
unsigned i; \
int base; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1 ); \
\
base = s->r == 0 ? 10 : s->r; \
SPEED_RESTRICT_COND (base >= 2 && base <= 256 ); \
\
TMP_MARK; \
\
xp = (unsigned char *) TMP_ALLOC (s->size); \
for (i = 0 ; i < s->size; i++) \
xp[i] = s->xp[i] % base; \
\
LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base); \
SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp); \
\
/* use this during development to check wn is big enough */ \
/* \
ASSERT_ALWAYS ( mpn_set_str ( wp , xp , s - > size , base ) < = wn ) ; \
*/ \
\
speed_operand_src (s, (mp_ptr) xp, s->size/GMP_LIMB_BYTES); \
speed_operand_dst (s, wp, wn); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
/* Run an accel gcd find_a() function over various data values. A set of
values is used in case some run particularly fast or slow . The size
parameter is ignored, the amount of data tested is fixed. */
#define SPEED_ROUTINE_MPN_GCD_FINDA(function) \
{ \
unsigned i, j; \
mp_limb_t cp[SPEED_BLOCK_SIZE][2 ]; \
double t; \
TMP_DECL; \
\
TMP_MARK; \
\
/* low must be odd, high must be non-zero */ \
for (i = 0 ; i < SPEED_BLOCK_SIZE; i++) \
{ \
cp[i][0 ] = s->xp_block[i] | 1 ; \
cp[i][1 ] = s->yp_block[i] + (s->yp_block[i] == 0 ); \
} \
\
speed_operand_src (s, &cp[0 ][0 ], 2 *SPEED_BLOCK_SIZE); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
{ \
j = SPEED_BLOCK_SIZE; \
do \
{ \
function (cp[j-1 ]); \
} \
while (--j != 0 ); \
} \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
\
s->time_divisor = SPEED_BLOCK_SIZE; \
return t; \
}
/* "call" should do "count_foo_zeros(c,n)".
Give leading = 1 if foo is leading zeros , leading = 0 for trailing .
Give zero=1 if n=0 is allowed in the call, zero=0 if not. */
#define SPEED_ROUTINE_COUNT_ZEROS_A(leading, zero) \
{ \
mp_ptr xp; \
int i, c; \
unsigned j; \
mp_limb_t n; \
double t; \
TMP_DECL; \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (xp, SPEED_BLOCK_SIZE, s->align_xp); \
\
if (! speed_routine_count_zeros_setup (s, xp, leading, zero)) \
return -1 .0 ; \
speed_operand_src (s, xp, SPEED_BLOCK_SIZE); \
speed_cache_fill (s); \
\
c = 0 ; \
speed_starttime (); \
j = s->reps; \
do { \
for (i = 0 ; i < SPEED_BLOCK_SIZE; i++) \
{ \
n = xp[i]; \
n ^= c; \
#define SPEED_ROUTINE_COUNT_ZEROS_B() \
} \
} while (--j != 0 ); \
t = speed_endtime (); \
\
/* don't let c go dead */ \
noop_1 (c); \
\
s->time_divisor = SPEED_BLOCK_SIZE; \
\
TMP_FREE; \
return t; \
} \
#define SPEED_ROUTINE_COUNT_ZEROS_C(call, leading, zero) \
do { \
SPEED_ROUTINE_COUNT_ZEROS_A (leading, zero); \
call; \
SPEED_ROUTINE_COUNT_ZEROS_B (); \
} while (0 ) \
#define SPEED_ROUTINE_COUNT_LEADING_ZEROS_C(call,zero) \
SPEED_ROUTINE_COUNT_ZEROS_C (call, 1 , zero)
#define SPEED_ROUTINE_COUNT_LEADING_ZEROS(fun) \
SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 1 , 0 )
#define SPEED_ROUTINE_COUNT_TRAILING_ZEROS_C(call,zero) \
SPEED_ROUTINE_COUNT_ZEROS_C (call, 0 , zero)
#define SPEED_ROUTINE_COUNT_TRAILING_ZEROS(call) \
SPEED_ROUTINE_COUNT_ZEROS_C (fun (c, n), 0 , 0 )
#define SPEED_ROUTINE_INVERT_LIMB_CALL(call) \
{ \
unsigned i, j; \
mp_limb_t d, dinv=0 ; \
mp_ptr xp = s->xp_block - 1 ; \
\
s->time_divisor = SPEED_BLOCK_SIZE; \
\
speed_starttime (); \
i = s->reps; \
do \
{ \
j = SPEED_BLOCK_SIZE; \
do \
{ \
d = dinv ^ xp[j]; \
d |= GMP_LIMB_HIGHBIT; \
do { call; } while (0 ); \
} \
while (--j != 0 ); \
} \
while (--i != 0 ); \
\
/* don't let the compiler optimize everything away */ \
noop_1 (dinv); \
\
return speed_endtime(); \
}
#define SPEED_ROUTINE_MPN_BACK_TO_BACK(function) \
{ \
unsigned i; \
speed_starttime (); \
i = s->reps; \
do \
function (); \
while (--i != 0 ); \
return speed_endtime (); \
}
#define SPEED_ROUTINE_MPN_ZERO_CALL(call) \
{ \
mp_ptr wp; \
unsigned i; \
double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 0 ); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
speed_operand_dst (s, wp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0 ); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
#define SPEED_ROUTINE_MPN_ZERO(function) \
SPEED_ROUTINE_MPN_ZERO_CALL (function (wp, s->size))
#endif
Messung V0.5 in Prozent C=94 H=81 G=87
¤ Dauer der Verarbeitung: 0.81 Sekunden
¤
*© Formatika GbR, Deutschland