/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
Contributed to the GNU project by Marco Bodrato .
THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE . IT IS ONLY
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES . IN FACT , IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE .
Copyright 2009 , 2011 , 2012 Free Software Foundation , Inc .
This file is part of the GNU MP Library .
The GNU MP Library is free software ; you can redistribute it and / or modify
it under the terms of either :
* the GNU Lesser General Public License as published by the Free
Software Foundation ; either version 3 of the License , or ( at your
option ) any later version .
or
* the GNU General Public License as published by the Free Software
Foundation ; either version 2 of the License , or ( at your option ) any
later version .
or both in parallel , as here .
The GNU MP Library is distributed in the hope that it will be useful , but
WITHOUT ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE . See the GNU General Public License
for more details .
You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library . If not ,
see https://www.gnu.org/licenses/. */
#include "gmp-impl.h"
#define BINVERT_3 MODLIMB_INVERSE_3
#define BINVERT_15 \
((((GMP_NUMB_MAX >> (GMP_NUMB_BITS %
4 )) /
15 ) *
14 *
16 & GMP_NUMB_MAX) +
15 )
#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
#ifndef mpn_divexact_by3
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,
3 ,BINVERT_3,
0 )
#else
#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3 )
#endif
#endif
#ifndef mpn_divexact_by45
#if GMP_NUMB_BITS % 12 == 0
#define mpn_divexact_by45(dst,src,size) \
(63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45 )))
#else
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45 ,BINVERT_45,0 )
#else
#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45 )
#endif
#endif
#endif
#if HAVE_NATIVE_mpn_sublsh2_n_ip1
#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
#else
#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2 ,ws)
#endif
#if HAVE_NATIVE_mpn_sublsh_n
#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
#else
static mp_limb_t
DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
{
#if USE_MUL_1 && 0
return mpn_submul_1(dst,src,n,CNST_LIMB(1 ) <<(s));
#else
mp_limb_t __cy;
__cy = mpn_lshift (ws,src,n,s);
return __cy + mpn_sub_n (dst,dst,ws,n);
#endif
}
#endif
#if HAVE_NATIVE_mpn_subrsh
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
#else
/* This is not a correct definition, it assumes no carry */
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \
do { \
mp_limb_t __cy; \
MPN_DECR_U (dst, nd, src[0 ] >> s); \
__cy = DO_mpn_sublsh_n (dst, src + 1 , ns - 1 , GMP_NUMB_BITS - s, ws); \
MPN_DECR_U (dst + ns - 1 , nd - ns + 1 , __cy); \
} while (0 )
#endif
/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
points : infinity ( 4 . 5 only ) , 4 , - 4 , 2 , - 2 , 1 , - 1 , 0 . More precisely ,
we want to compute f ( 2 ^ ( GMP_NUMB_BITS * n ) ) for a polynomial f of
degree 7 ( or 6 ) , given the 8 ( rsp . 7 ) values :
r1 = limit at infinity of f ( x ) / x ^ 7 ,
r2 = f ( 4 ) ,
r3 = f ( - 4 ) ,
r4 = f ( 2 ) ,
r5 = f ( - 2 ) ,
r6 = f ( 1 ) ,
r7 = f ( - 1 ) ,
r8 = f ( 0 ) .
All couples of the form f ( n ) , f ( - n ) must be already mixed with
toom_couple_handling ( f ( n ) , . . . , f ( - n ) , . . . )
The result is stored in { pp , spt + 7 * n ( or 6 * n ) } .
At entry , r8 is stored at { pp , 2 n } ,
r5 is stored at { pp + 3 n , 3 n + 1 } .
The other values are 2 n + . . . limbs each ( with most significant limbs small ) .
All intermediate results are positive .
Inputs are destroyed .
*/
void
mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
mp_ptr r3, mp_ptr r7,
mp_size_t spt, mp_ptr ws)
{
mp_limb_signed_t cy;
mp_ptr r5, r1;
r5 = (pp + 3 * n); /* 3n+1 */
r1 = (pp + 7 * n); /* spt */
/******************************* interpolation *****************************/
DO_mpn_subrsh(r3+n, 2 * n + 1 , pp, 2 * n, 4 , ws);
cy = DO_mpn_sublsh_n (r3, r1, spt, 12 , ws);
MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
DO_mpn_subrsh(r5+n, 2 * n + 1 , pp, 2 * n, 2 , ws);
cy = DO_mpn_sublsh_n (r5, r1, spt, 6 , ws);
MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
r7[3 *n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
cy = mpn_sub_n (r7, r7, r1, spt);
MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1 ));
ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1 , 2 ));
ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1 ));
ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1 ));
mpn_divexact_by45 (r3, r3, 3 * n + 1 );
ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1 ));
ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1 , ws));
/* last interpolation steps... */
/* ... are mixed with recomposition */
/***************************** recomposition *******************************/
/*
pp [ ] prior to operations :
| _ H r1 | _ L r1 | _ _ _ _ | | _ H r5 | _ M_r5 | _ L r5 | _ _ _ _ _ | _ H r8 | _ L r8 | pp
summation scheme for remaining operations :
| _ _ _ _ 8 | n___7 | n___6 | n___5 | n___4 | n___3 | n___2 | n____ | n____ | pp
| _ H r1 | _ L r1 | _ _ _ _ | | _ H * r5 | _ M r5 | _ L r5 | _ _ _ _ _ | _ H_r8 | _ L r8 | pp
| | _ H r3 | _ M r3 | _ L * r3 |
| | _ H_r7 | _ M_r7 | _ L_r7 |
| | - H r3 | - M r3 | - L * r3 |
| | - H * r5 | - M_r5 | - L_r5 |
*/
cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
cy-= mpn_sub_n (pp + n, pp + n, r5, n);
if (cy > 0 ) {
MPN_INCR_U (r7 + n, 2 *n + 1 , 1 );
cy = 0 ;
}
cy = mpn_sub_nc (pp + 2 *n, r7 + n, r5 + n, n, -cy); /* Mr7-Mr5 */
MPN_DECR_U (r7 + 2 *n, n + 1 , cy);
cy = mpn_add_n (pp + 3 *n, r5, r7+ 2 *n, n+1 ); /* Hr7+Lr5 */
r5[3 *n]+= mpn_add_n (r5 + 2 *n, r5 + 2 *n, r3, n); /* Hr5+Lr3 */
cy-= mpn_sub_n (pp + 3 *n, pp + 3 *n, r5 + 2 *n, n+1 ); /* Hr7-Hr5+Lr5-Lr3 */
if (UNLIKELY(0 > cy))
MPN_DECR_U (r5 + n + 1 , 2 *n, 1 );
else
MPN_INCR_U (r5 + n + 1 , 2 *n, cy);
ASSERT_NOCARRY(mpn_sub_n(pp + 4 *n, r5 + n, r3 + n, 2 *n +1 )); /* Mr5-Mr3,Hr5-Hr3 */
cy = mpn_add_1 (pp + 6 *n, r3 + n, n, pp[6 *n]);
MPN_INCR_U (r3 + 2 *n, n + 1 , cy);
cy = mpn_add_n (pp + 7 *n, pp + 7 *n, r3 + 2 *n, n);
if (LIKELY(spt != n))
MPN_INCR_U (pp + 8 *n, spt - n, cy + r3[3 *n]);
else
ASSERT (r3[3 *n] + cy == 0 );
}
Messung V0.5 in Prozent C=95 H=93 G=93
¤ Dauer der Verarbeitung: 0.5 Sekunden
¤
*© Formatika GbR, Deutschland