/* mpfr_sqrt -- square root of a floating-point number Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. This file is part of the MPFR Library. The MPFR Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The MPFR Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the MPFR Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "gmp.h" #include "gmp-impl.h" #include "mpfr.h" #include "mpfr-impl.h" /* #define DEBUG */ int mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mp_rnd_t rnd_mode) { mp_ptr up, rp, tmp, remp; mp_size_t usize, rrsize; mp_size_t rsize; mp_size_t err; mp_limb_t q_limb; int odd_exp_u; long rw, nw, k; int inexact = 0, t; unsigned long cc = 0; int can_round = 0; TMP_DECL(marker); if (MPFR_IS_NAN(u)) { MPFR_SET_NAN(r); MPFR_RET_NAN; } if (MPFR_SIGN(u) < 0) { if (MPFR_IS_INF(u) || MPFR_NOTZERO(u)) { MPFR_SET_NAN(r); MPFR_RET_NAN; } else { /* sqrt(-0) = -0 */ MPFR_CLEAR_FLAGS(r); MPFR_SET_ZERO(r); MPFR_SET_NEG(r); MPFR_RET(0); } } MPFR_CLEAR_NAN(r); MPFR_SET_POS(r); if (MPFR_IS_INF(u)) { MPFR_SET_INF(r); MPFR_RET(0); } MPFR_CLEAR_INF(r); if (MPFR_IS_ZERO(u)) { MPFR_SET_ZERO(r); MPFR_RET(0); /* zero is exact */ } up = MPFR_MANT(u); usize = (MPFR_PREC(u) - 1)/BITS_PER_MP_LIMB + 1; #ifdef DEBUG printf("Entering square root : "); for(k = usize - 1; k >= 0; k--) { printf("%lu ", up[k]); } printf(".\n"); #endif /* Compare the mantissas */ odd_exp_u = (unsigned int) MPFR_GET_EXP (u) & 1; MPFR_ASSERTN(MPFR_PREC(r) <= MPFR_INTPREC_MAX - 3); rrsize = (MPFR_PREC(r) + 2 + odd_exp_u) / BITS_PER_MP_LIMB + 1; MPFR_ASSERTN(rrsize <= MP_SIZE_T_MAX/2); rsize = rrsize << 1; /* One extra bit is needed in order to get the square root with enough precision ; take one extra bit for rrsize in order to solve more easily the problem of rounding to nearest. Need to have 2*rrsize = rsize... Take one extra bit if the exponent of u is odd since we shall have to shift then. */ TMP_MARK(marker); if (odd_exp_u) /* Shift u one bit to the right */ { if (MPFR_PREC(u) & (BITS_PER_MP_LIMB - 1)) { up = TMP_ALLOC(usize * BYTES_PER_MP_LIMB); mpn_rshift (up, MPFR_MANT(u), usize, 1); } else { up = TMP_ALLOC((usize + 1) * BYTES_PER_MP_LIMB); if (mpn_rshift (up + 1, MPFR_MANT(u), usize, 1)) up[0] = MPFR_LIMB_HIGHBIT; else up[0] = 0; usize++; } } MPFR_SET_EXP(r, MPFR_GET_EXP(u) != MPFR_EMAX_MAX ? (MPFR_GET_EXP(u) + odd_exp_u) / 2 : (MPFR_EMAX_MAX - 1) / 2 + 1); do { err = rsize * BITS_PER_MP_LIMB; if (rsize < usize) err--; if (err > rrsize * BITS_PER_MP_LIMB) err = rrsize * BITS_PER_MP_LIMB; tmp = (mp_ptr) TMP_ALLOC (rsize * BYTES_PER_MP_LIMB); rp = (mp_ptr) TMP_ALLOC (rrsize * BYTES_PER_MP_LIMB); remp = (mp_ptr) TMP_ALLOC (rsize * BYTES_PER_MP_LIMB); if (usize >= rsize) { MPN_COPY (tmp, up + usize - rsize, rsize); } else { MPN_COPY (tmp + rsize - usize, up, usize); MPN_ZERO (tmp, rsize - usize); } /* Do the real job */ #ifdef DEBUG printf("Taking the sqrt of : "); for(k = rsize - 1; k >= 0; k--) printf("+%lu*2^%lu",tmp[k],k*BITS_PER_MP_LIMB); printf(".\n"); #endif q_limb = mpn_sqrtrem (rp, remp, tmp, rsize); #ifdef DEBUG printf ("The result is : \n"); printf ("sqrt : "); for (k = rrsize - 1; k >= 0; k--) printf ("%lu ", rp[k]); printf ("(inexact = %lu)\n", q_limb); #endif can_round = mpfr_can_round_raw (rp, rrsize, 1, err, GMP_RNDZ, rnd_mode, MPFR_PREC(r)); /* If we used all the limbs of both the dividend and the divisor, then we have the correct RNDZ rounding */ if (!can_round && (rsize < usize)) { #ifdef DEBUG printf("Increasing the precision.\n"); #endif } } while (!can_round && (rsize < usize) && (rsize += 2) && (rrsize++)); #ifdef DEBUG printf ("can_round = %d\n", can_round); #endif /* This part may be deplaced upper to avoid a few mpfr_can_round_raw */ /* when the square root is exact. It is however very unlikely that */ /* it would improve the behaviour of the present code on average. */ if (!q_limb) /* the sqrtrem call was exact, possible exact square root */ { /* if we have taken into account the whole of up */ for (k = usize - rsize - 1; k >= 0; k--) if (up[k] != 0) { q_limb = 1; /* simulate positive remainder */ break; } if (k < 0) #if 0 goto fin; /* exact square root ==> inexact = 0 */ #else /* warning: the value in rp[] is the exact square root, but it may have too many bits */ can_round = 1; #endif } if (can_round) { cc = mpfr_round_raw (rp, rp, err, 0, MPFR_PREC(r), rnd_mode, &inexact); if (inexact == 0) /* exact high part: inex flag depends on remainder */ inexact = -q_limb; rrsize = (MPFR_PREC(r) - 1)/BITS_PER_MP_LIMB + 1; } else { /* Use the return value of sqrtrem to decide of the rounding */ /* Note that at this point the sqrt has been computed */ /* EXACTLY. */ switch (rnd_mode) { case GMP_RNDZ : case GMP_RNDD : inexact = -1; /* result is truncated */ break; case GMP_RNDN : /* round bit is bit rw of word nw */ rw = (MPFR_PREC(r) + 1) & (BITS_PER_MP_LIMB - 1); nw = (MPFR_PREC(r) + 1) / BITS_PER_MP_LIMB + 1; if (rw != 0) rw = BITS_PER_MP_LIMB - rw; else nw--; if (((rp[rrsize - nw] >> rw) & 1) && /* Not 0111111111 */ (q_limb || /* Nonzero remainder */ (rw ? (rp[rrsize - nw] >> (rw + 1)) & 1 : (rp[rrsize - nw] >> (BITS_PER_MP_LIMB - 1)) & 1))) /* or even r. */ { cc = mpn_add_1 (rp + rrsize - nw, rp + rrsize - nw, rrsize, MP_LIMB_T_ONE << rw); inexact = 1; } else inexact = -1; break; default: /* necessarily rnd_mode = GMP_RNDU */ /* we should arrive here only when the result is inexact, i.e. either q_limb > 0 (the remainder from mpn_sqrtrem is non-zero) or up[0..usize-rsize-1] is non zero, thus we have to add one ulp, and inexact = 1 */ inexact = 1; t = MPFR_PREC(r) & (BITS_PER_MP_LIMB - 1); rsize = (MPFR_PREC(r) - 1)/BITS_PER_MP_LIMB + 1; cc = mpn_add_1 (rp + rrsize - rsize, rp + rrsize - rsize, rsize, t != 0 ? MP_LIMB_T_ONE << (BITS_PER_MP_LIMB - t) : MP_LIMB_T_ONE); } } if (cc) { /* Is a shift necessary here? Isn't the result 1.0000...? */ mpn_rshift (rp, rp, rrsize, 1); rp[rrsize-1] |= MPFR_LIMB_HIGHBIT; MPFR_SET_EXP (r, MPFR_EXP (r) + 1); } rsize = rrsize; rrsize = (MPFR_PREC(r) - 1)/BITS_PER_MP_LIMB + 1; MPN_COPY(MPFR_MANT(r), rp + rsize - rrsize, rrsize); if (MPFR_PREC(r) & (BITS_PER_MP_LIMB - 1)) MPFR_MANT(r)[0] &= ~((MP_LIMB_T_ONE << (BITS_PER_MP_LIMB - (MPFR_PREC(r) & (BITS_PER_MP_LIMB - 1)))) - 1); TMP_FREE(marker); return inexact; }