/* mpn_mulmid -- middle product
Contributed by David Harvey .
THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE . IT IS ONLY
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES . IN FACT , IT IS ALMOST
GUARANTEED THAT IT ' LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE .
Copyright 2011 Free Software Foundation , Inc .
This file is part of the GNU MP Library .
The GNU MP Library is free software ; you can redistribute it and / or modify
it under the terms of either :
* the GNU Lesser General Public License as published by the Free
Software Foundation ; either version 3 of the License , or ( at your
option ) any later version .
or
* the GNU General Public License as published by the Free Software
Foundation ; either version 2 of the License , or ( at your option ) any
later version .
or both in parallel , as here .
The GNU MP Library is distributed in the hope that it will be useful , but
WITHOUT ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE . See the GNU General Public License
for more details .
You should have received copies of the GNU General Public License and the
GNU Lesser General Public License along with the GNU MP Library . If not ,
see https://www.gnu.org/licenses/. */
#include "gmp-impl.h"
#define CHUNK (
200 + MULMID_TOOM42_THRESHOLD)
void
mpn_mulmid (mp_ptr rp,
mp_srcptr ap, mp_size_t an,
mp_srcptr bp, mp_size_t bn)
{
mp_size_t rn, k;
mp_ptr scratch, temp;
ASSERT (an >= bn);
ASSERT (bn >=
1 );
ASSERT (! MPN_OVERLAP_P (rp, an - bn +
3 , ap, an));
ASSERT (! MPN_OVERLAP_P (rp, an - bn +
3 , bp, bn));
if (bn < MULMID_TOOM42_THRESHOLD)
{
/* region not tall enough to make toom42 worthwhile for any portion */
if (an < CHUNK)
{
/* region not too wide either, just call basecase directly */
mpn_mulmid_basecase (rp, ap, an, bp, bn);
return ;
}
/* Region quite wide. For better locality, use basecase on chunks:
AAABBBCC . .
. AAABBBCC .
. . AAABBBCC
*/
k = CHUNK - bn +
1 ;
/* number of diagonals per chunk */
/* first chunk (marked A in the above diagram) */
mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
/* remaining chunks (B, C, etc) */
an -= k;
while (an >= CHUNK)
{
mp_limb_t t0, t1, cy;
ap += k, rp += k;
t0 = rp[
0 ], t1 = rp[
1 ];
mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
ADDC_LIMB (cy, rp[
0 ], rp[
0 ], t0);
/* add back saved limbs */
MPN_INCR_U (rp +
1 , k +
1 , t1 + cy);
an -= k;
}
if (an >= bn)
{
/* last remaining chunk */
mp_limb_t t0, t1, cy;
ap += k, rp += k;
t0 = rp[
0 ], t1 = rp[
1 ];
mpn_mulmid_basecase (rp, ap, an, bp, bn);
ADDC_LIMB (cy, rp[
0 ], rp[
0 ], t0);
MPN_INCR_U (rp +
1 , an - bn +
2 , t1 + cy);
}
return ;
}
/* region is tall enough for toom42 */
rn = an - bn +
1 ;
if (rn < MULMID_TOOM42_THRESHOLD)
{
/* region not wide enough to make toom42 worthwhile for any portion */
TMP_DECL;
if (bn < CHUNK)
{
/* region not too tall either, just call basecase directly */
mpn_mulmid_basecase (rp, ap, an, bp, bn);
return ;
}
/* Region quite tall. For better locality, use basecase on chunks:
AAAAA . . . .
. AAAAA . . .
. . BBBBB . .
. . . BBBBB .
. . . . CCCCC
*/
TMP_MARK;
temp = TMP_ALLOC_LIMBS (rn +
2 );
/* first chunk (marked A in the above diagram) */
bp += bn - CHUNK, an -= bn - CHUNK;
mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
/* remaining chunks (B, C, etc) */
bn -= CHUNK;
while (bn >= CHUNK)
{
ap += CHUNK, bp -= CHUNK;
mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
mpn_add_n (rp, rp, temp, rn +
2 );
bn -= CHUNK;
}
if (bn)
{
/* last remaining chunk */
ap += CHUNK, bp -= bn;
mpn_mulmid_basecase (temp, ap, rn + bn -
1 , bp, bn);
mpn_add_n (rp, rp, temp, rn +
2 );
}
TMP_FREE;
return ;
}
/* we're definitely going to use toom42 somewhere */
if (bn > rn)
{
/* slice region into chunks, use toom42 on all chunks except possibly
the last :
AA . . . .
. AA . . .
. . BB . .
. . . BB .
. . . . CC
*/
TMP_DECL;
TMP_MARK;
temp = TMP_ALLOC_LIMBS (rn +
2 + mpn_toom42_mulmid_itch (rn));
scratch = temp + rn +
2 ;
/* first chunk (marked A in the above diagram) */
bp += bn - rn;
mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
/* remaining chunks (B, C, etc) */
bn -= rn;
while (bn >= rn)
{
ap += rn, bp -= rn;
mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
mpn_add_n (rp, rp, temp, rn +
2 );
bn -= rn;
}
if (bn)
{
/* last remaining chunk */
ap += rn, bp -= bn;
mpn_mulmid (temp, ap, rn + bn -
1 , bp, bn);
mpn_add_n (rp, rp, temp, rn +
2 );
}
TMP_FREE;
}
else
{
/* slice region into chunks, use toom42 on all chunks except possibly
the last :
AAABBBCC . .
. AAABBBCC .
. . AAABBBCC
*/
TMP_DECL;
TMP_MARK;
scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
/* first chunk (marked A in the above diagram) */
mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
/* remaining chunks (B, C, etc) */
rn -= bn;
while (rn >= bn)
{
mp_limb_t t0, t1, cy;
ap += bn, rp += bn;
t0 = rp[
0 ], t1 = rp[
1 ];
mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
ADDC_LIMB (cy, rp[
0 ], rp[
0 ], t0);
/* add back saved limbs */
MPN_INCR_U (rp +
1 , bn +
1 , t1 + cy);
rn -= bn;
}
TMP_FREE;
if (rn)
{
/* last remaining chunk */
mp_limb_t t0, t1, cy;
ap += bn, rp += bn;
t0 = rp[
0 ], t1 = rp[
1 ];
mpn_mulmid (rp, ap, rn + bn -
1 , bp, bn);
ADDC_LIMB (cy, rp[
0 ], rp[
0 ], t0);
MPN_INCR_U (rp +
1 , rn +
1 , t1 + cy);
}
}
}
Messung V0.5 in Prozent C=93 H=86 G=89
¤ Dauer der Verarbeitung: 0.10 Sekunden
(vorverarbeitet am 2026-06-10)
¤
*© Formatika GbR, Deutschland