YoushouldhavereceivedcopiesoftheGNUGeneralPublicLicenseandthe GNULesserGeneralPublicLicensealongwiththeGNUMPLibrary.Ifnot,
see https://www.gnu.org/licenses/. */
if (UNLIKELY (k == 2)) return mpn_sqrtrem (rootp, remp, up, un); /* (un-1)/k > 2 <=> un > 3k <=> (un + 2)/3 > k */ if (remp == NULL && (un + 2) / 3 > k) /* Pad {up,un} with k zero limbs. This will produce an approximate root
with one more limb, allowing us to compute the exact integral result. */
{
mp_ptr sp, wp;
mp_size_t rn, sn, wn;
TMP_DECL;
TMP_MARK;
wn = un + k;
sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
TMP_ALLOC_LIMBS_2 (wp, wn, /* will contain the padded input */
sp, sn); /* approximate root of padded input */
MPN_COPY (wp + k, up, un);
MPN_FILL (wp, k, 0);
rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1); /* The approximate root S = {sp,sn} is either the correct root of {sp,sn},or1toolarge.Thusunlesstheleastsignificantlimbof Sis0or1,wecandeducetherootof{up,un}isStruncatedbyone limb.(Incasesp[0]=1,wecandeducetheroot,butnotdecide
whether it is exact or not.) */
MPN_COPY (rootp, sp + 1, sn - 1);
TMP_FREE; return rn;
} else
{ return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
}
}
#define LOGROOT_USED_BITS 8 #define LOGROOT_NEEDS_TWO_CORRECTIONS 1 #define LOGROOT_RETURNED_BITS (LOGROOT_USED_BITS + LOGROOT_NEEDS_TWO_CORRECTIONS) /* Puts in *rootp some bits of the k^nt root of the number 2^bitn*1.op;whereoprepresentsthe"fractional"bits.
/* if approx is non-zero, does not compute the final remainder */ static mp_size_t
mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
mp_limb_t k, int approx)
{
mp_ptr qp, rp, sp, wp, scratch;
mp_size_t qn, rn, sn, wn, nl, bn;
mp_limb_t save, save2, cy, uh;
mp_bitcnt_t unb; /* number of significant bits of {up,un} */
mp_bitcnt_t xnb; /* number of significant bits of the result */
mp_bitcnt_t b, kk;
mp_bitcnt_t sizes[GMP_NUMB_BITS + 1]; int ni; int perf_pow; unsigned ulz, snb, c, logk;
TMP_DECL;
/* MPN_SIZEINBASE_2EXP(unb, up, un, 1); --unb; */
uh = up[un - 1];
count_leading_zeros (ulz, uh);
ulz = ulz - GMP_NAIL_BITS + 1; /* Ignore the first 1. */
unb = (mp_bitcnt_t) un * GMP_NUMB_BITS - ulz; /* unb is the (truncated) logarithm of the input U in base 2*/
if (unb < k) /* root is 1 */
{
rootp[0] = 1; if (remp == NULL)
un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */ else
{
mpn_sub_1 (remp, up, un, CNST_LIMB (1));
un -= (remp [un - 1] == 0); /* There should be at most one zero limb,
if we demand u to be normalized */
} return un;
} /* if (unb - k < k/2 + k/16) // root is 2 */
xnb = logbased_root (rootp, uh, unb, k);
snb = LOGROOT_RETURNED_BITS - 1; /* xnb+1 is the number of bits of the root R */ /* snb+1 is the number of bits of the current approximation S */
kk = k * xnb; /* number of truncated bits in the input */
/* FIXME: Should we skip the next two loops when xnb <= snb ? */ for (uh = (k - 1) / 2, logk = 3; (uh >>= 1) != 0; ++logk )
; /* logk = ceil(log(k)/log(2)) + 1 */
/* xnb is the number of remaining bits to determine in the kth root */ for (ni = 0; (sizes[ni] = xnb) > snb; ++ni)
{ /* invariant: here we want xnb+1 total bits for the kth root */
/* if c is the new value of xnb, this means that we'll go from a rootofc+1bits(says')toarootofxnb+1bits. Itisprovedinthebook"ModernComputerArithmetic"byBrent andZimmermann,Chapter1,that ifs'>=k*beta,thenatmostonecorrectionisnecessary. Herebeta=2^(xnb-c),ands'>=2^c,thusitsufficesthat
c >= ceil((xnb + log2(k))/2). */ if (xnb > logk)
xnb = (xnb + logk) / 2; else
--xnb; /* add just one bit at a time */
}
*rootp >>= snb - xnb;
kk -= xnb;
ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1); /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with sizes[i]<=2*sizes[i+1]. Newtoniterationwillfirstcomputesizes[ni-1]extrabits,
then sizes[ni-2], ..., then sizes[0] = b. */
TMP_MARK; /* qp and wp need enough space to store S'^k where S' is an approximate root.SinceS'canbeaslargeasS+2,theworstcaseiswhenS=2and S'=4.ButthensinceweknowthenumberofbitsofSinadvance,S' canonlybe3atmost.SimilarlyforS=4,thenS'canbe6atmost. SotheworstcaseisS'/S=3/2,thusS'^k<=(3/2)^k*S^k.SinceS^k fitsinunlimbs,thenumberofextralimbsneededisboundedby
ceil(k*log2(3/2)/GMP_NUMB_BITS). */ /* THINK: with the use of logbased_root, maybe the constant is
258/256 instead of 3/2 ? log2(258/256) < 1/89 < 1/64 */ #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */
qp, un + EXTRA, /* will contain quotient and remainder
of R/(k*S^(k-1)), and S^k */
wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
and temporary for mpn_pow_1 */
if (remp == NULL)
rp = scratch; /* will contain the remainder */ else
rp = remp;
sp = rootp;
sn = 1; /* Initial approximation has one limb */
for (b = xnb; ni != 0; --ni)
{ /* 1: loop invariant: {sp,sn}isthecurrentapproximationoftheroot,whichhas exactly1+sizes[ni]bits. {rp,rn}isthecurrentremainder {wp,wn}={sp,sn}^(k-1) kk=numberoftruncatedbitsoftheinput
*/
/* Since each iteration treats b bits from the root and thus k*b bits fromtheinput,andwealreadyconsideredbbitsfromtheinput,
we now have to take another (k-1)*b bits from the input. */
kk -= (k - 1) * b; /* remaining input bits */ /* {rp, rn} = floor({up, un} / 2^kk) */
rn = un - kk / GMP_NUMB_BITS;
MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
rn -= rp[rn - 1] == 0;
/* 9: current buffers: {sp,sn}, {rp,rn} */
for (c = 0;; c++)
{ /* Compute S^k in {qp,qn}. */ /* W <- S^(k-1) for the next iteration,
and S^k = W * S. */
wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
mpn_mul (qp, wp, wn, sp, sn);
qn = wn + sn;
qn -= qp[qn - 1] == 0;
perf_pow = 1; /* if S^k > floor(U/2^kk), the root approximation was too large */ if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
MPN_DECR_U (sp, sn, 1); else break;
}
/* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
/* sometimes two corrections are needed with logbased_root*/
ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
ASSERT_ALWAYS (rn >= qn);
b = sizes[ni - 1] - sizes[ni]; /* number of bits to compute in the
next iteration */
bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[], after shift */
kk = kk - b; /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS); /* nl = 1 + floor((kk + b - 1) / GMP_NUMB_BITS) -floor(kk/GMP_NUMB_BITS) <=1+(kk+b-1)/GMP_NUMB_BITS -(kk-GMP_NUMB_BITS+1)/GMP_NUMB_BITS =2+(b-2)/GMP_NUMB_BITS thussincenlisaninteger:
nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
/* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
/* R = R - Q = floor(U/2^kk) - S^k */ if (perf_pow != 0)
{
mpn_sub (rp, rp, rn, qp, qn);
MPN_NORMALIZE_NOT_ZERO (rp, rn);
/* first multiply the remainder by 2^b */
MPN_LSHIFT (cy, rp + bn, rp, rn, b % GMP_NUMB_BITS);
rn = rn + bn; if (cy != 0)
{
rp[rn] = cy;
rn++;
}
save = rp[bn]; /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */ if (nl - 1 > bn)
save2 = rp[bn + 1];
} else
{
rn = bn;
save2 = save = 0;
} /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
/* Now insert bits [kk,kk+b-1] from the input U */
MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS); /* set to zero high bits of rp[bn] */
rp[bn] &= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)) - 1; /* restore corresponding bits */
rp[bn] |= save; if (nl - 1 > bn)
rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since theystartbybit0inrp[0],sotheyuse
at most ceil(b/GMP_NUMB_BITS) limbs */ /* FIXME: Should we normalise {rp,rn} here ?*/
/* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
/* multiply the root approximation by 2^b */
MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
sn = sn + b / GMP_NUMB_BITS; if (cy != 0)
{
sp[sn] = cy;
sn++;
}
save = sp[b / GMP_NUMB_BITS];
/* Number of limbs used by b bits, when least significant bit is
aligned to least limb */
bn = (b - 1) / GMP_NUMB_BITS + 1;
/* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
/* now divide {rp, rn} by {wp, wn} to get the low part of the root */ if (UNLIKELY (rn < wn))
{
MPN_FILL (sp, bn, 0);
} else
{
qn = rn - wn; /* expected quotient size */ if (qn <= bn) { /* Divide only if result is not too big. */
mpn_div_q (qp, rp, rn, wp, wn, scratch);
qn += qp[qn] != 0;
}
/* 5: current buffers: {sp,sn}, {qp,qn}. Note:{rp,rn}isnotneededanymoresincewe'llcomputeitfrom scratchattheendoftheloop.
*/
/* the quotient should be smaller than 2^b, since the previous
approximation was correctly rounded toward zero */ if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
{ for (qn = 1; qn < bn; ++qn)
sp[qn - 1] = GMP_NUMB_MAX;
sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - 1 - ((b - 1) % GMP_NUMB_BITS));
} else
{ /* 7: current buffers: {sp,sn}, {qp,qn} */
/* Combine sB and q to form sB + q. */
MPN_COPY (sp, qp, qn);
MPN_ZERO (sp + qn, bn - qn);
}
}
sp[b / GMP_NUMB_BITS] |= save;
/* 8: current buffer: {sp,sn} */
}
/* otherwise we have rn > 0, thus the return value is ok */ if (!approx || sp[0] <= CNST_LIMB (1))
{ for (c = 0;; c++)
{ /* Compute S^k in {qp,qn}. */ /* Last iteration: we don't need W anymore. */ /* mpn_pow_1 requires that both qp and wp have enough
space to store the result {sp,sn}^k + 1 limb */
qn = mpn_pow_1 (qp, sp, sn, k, wp);
perf_pow = 1; if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
MPN_DECR_U (sp, sn, 1); else break;
};
/* sometimes two corrections are needed with logbased_root*/
ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.