diff -Naurd mpfr-4.0.1-a/PATCHES mpfr-4.0.1-b/PATCHES
--- mpfr-4.0.1-a/PATCHES	2018-04-27 12:47:54.194308761 +0000
+++ mpfr-4.0.1-b/PATCHES	2018-04-27 12:47:54.230308407 +0000
@@ -0,0 +1 @@
+sqr_1n-underflow
diff -Naurd mpfr-4.0.1-a/VERSION mpfr-4.0.1-b/VERSION
--- mpfr-4.0.1-a/VERSION	2018-04-27 12:45:53.171452379 +0000
+++ mpfr-4.0.1-b/VERSION	2018-04-27 12:47:54.226308446 +0000
@@ -1 +1 @@
-4.0.1-p2
+4.0.1-p3
diff -Naurd mpfr-4.0.1-a/src/mpfr.h mpfr-4.0.1-b/src/mpfr.h
--- mpfr-4.0.1-a/src/mpfr.h	2018-04-27 12:45:53.171452379 +0000
+++ mpfr-4.0.1-b/src/mpfr.h	2018-04-27 12:47:54.226308446 +0000
@@ -27,7 +27,7 @@
 #define MPFR_VERSION_MAJOR 4
 #define MPFR_VERSION_MINOR 0
 #define MPFR_VERSION_PATCHLEVEL 1
-#define MPFR_VERSION_STRING "4.0.1-p2"
+#define MPFR_VERSION_STRING "4.0.1-p3"
 
 /* User macros:
    MPFR_USE_FILE:        Define it to make MPFR define functions dealing
diff -Naurd mpfr-4.0.1-a/src/sqr.c mpfr-4.0.1-b/src/sqr.c
--- mpfr-4.0.1-a/src/sqr.c	2018-01-09 12:30:58.000000000 +0000
+++ mpfr-4.0.1-b/src/sqr.c	2018-04-27 12:47:54.218308525 +0000
@@ -161,15 +161,18 @@
   if (MPFR_UNLIKELY(ax < __gmpfr_emin))
     {
       /* As seen in mpfr_mul_1, we cannot have a0 = 111...111 here if there
-         was not exponent decrease (ax--) above.
-         In the case of an exponent decrease, it is not possible for
-         GMP_NUMB_BITS=32 since the largest b0 such that b0^2 < 2^(2*32-1)
-         is b0=3037000499, but its square has only 30 leading ones.
-         For GMP_NUMB_BITS=64 it is possible: the largest b0 is
-         13043817825332782212, and its square has 64 leading ones. */
-      if ((ax == __gmpfr_emin - 1) && (ap[0] == ~MPFR_LIMB_HIGHBIT) &&
-          ((rnd_mode == MPFR_RNDN && rb) ||
-           (MPFR_IS_LIKE_RNDA(rnd_mode, MPFR_IS_NEG (a)) && (rb | sb))))
+         was not an exponent decrease (ax--) above.
+         In the case of an exponent decrease:
+         - For GMP_NUMB_BITS=32, a0 = 111...111 is not possible since the
+           largest b0 such that b0^2 < 2^(2*32-1) is b0=3037000499, but
+           its square has only 30 leading ones.
+         - For GMP_NUMB_BITS=64, a0 = 111...111 is possible: the largest b0
+           is 13043817825332782212, and its square has 64 leading ones; but
+           since the next bit is rb=0, for RNDN, we always have an underflow.
+         For the test below, note that a is positive.
+      */
+      if (ax == __gmpfr_emin - 1 && ap[0] == MPFR_LIMB_MAX &&
+          MPFR_IS_LIKE_RNDA (rnd_mode, 0))
         goto rounding; /* no underflow */
       /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2)
          we have to change to RNDZ. This corresponds to:
diff -Naurd mpfr-4.0.1-a/src/version.c mpfr-4.0.1-b/src/version.c
--- mpfr-4.0.1-a/src/version.c	2018-04-27 12:45:53.171452379 +0000
+++ mpfr-4.0.1-b/src/version.c	2018-04-27 12:47:54.226308446 +0000
@@ -25,5 +25,5 @@
 const char *
 mpfr_get_version (void)
 {
-  return "4.0.1-p2";
+  return "4.0.1-p3";
 }
diff -Naurd mpfr-4.0.1-a/tests/tsqr.c mpfr-4.0.1-b/tests/tsqr.c
--- mpfr-4.0.1-a/tests/tsqr.c	2018-01-09 12:30:58.000000000 +0000
+++ mpfr-4.0.1-b/tests/tsqr.c	2018-04-27 12:47:54.218308525 +0000
@@ -188,6 +188,156 @@
 #endif
 }
 
+static void
+coverage (mpfr_prec_t pmax)
+{
+  mpfr_prec_t p;
+
+  for (p = MPFR_PREC_MIN; p <= pmax; p++)
+    {
+      mpfr_t a, b, c;
+      int inex;
+      mpfr_exp_t emin;
+
+      mpfr_init2 (a, p);
+      mpfr_init2 (b, p);
+      mpfr_init2 (c, p);
+
+      /* exercise carry in most significant bits of a, with overflow */
+      mpfr_set_ui_2exp (b, 1, mpfr_get_emax (), MPFR_RNDZ);
+      mpfr_sqrt (b, b, MPFR_RNDU);
+      mpfr_div_2exp (c, b, 1, MPFR_RNDN);
+      mpfr_sqr (c, c, MPFR_RNDN);
+      mpfr_clear_flags ();
+      inex = mpfr_sqr (a, b, MPFR_RNDN);
+      /* if EXP(c) > emax-2, there is overflow */
+      if (mpfr_get_exp (c) > mpfr_get_emax () - 2)
+        {
+          MPFR_ASSERTN(inex > 0);
+          MPFR_ASSERTN(mpfr_inf_p (a) && mpfr_sgn (a) > 0);
+          MPFR_ASSERTN(mpfr_overflow_p ());
+        }
+      else /* no overflow */
+        {
+          /* 2^p-1 is a square only for p=1 */
+          MPFR_ASSERTN((p == 1 && inex == 0) || (p > 1 && inex < 0));
+          MPFR_ASSERTN(!mpfr_overflow_p ());
+          mpfr_set_ui_2exp (c, 1, mpfr_get_emax (), MPFR_RNDZ);
+          MPFR_ASSERTN(mpfr_equal_p (a, c));
+        }
+
+      /* same as above, with RNDU */
+      mpfr_set_ui_2exp (b, 1, mpfr_get_emax (), MPFR_RNDZ);
+      mpfr_sqrt (b, b, MPFR_RNDU);
+      mpfr_div_2exp (c, b, 1, MPFR_RNDN);
+      mpfr_sqr (c, c, MPFR_RNDU);
+      mpfr_clear_flags ();
+      inex = mpfr_sqr (a, b, MPFR_RNDU);
+      /* if EXP(c) > emax-2, there is overflow */
+      if (mpfr_get_exp (c) > mpfr_get_emax () - 2)
+        {
+          MPFR_ASSERTN(inex > 0);
+          MPFR_ASSERTN(mpfr_inf_p (a) && mpfr_sgn (a) > 0);
+          MPFR_ASSERTN(mpfr_overflow_p ());
+        }
+      else /* no overflow */
+        {
+          /* 2^p-1 is a square only for p=1 */
+          MPFR_ASSERTN((p == 1 && inex == 0) || (p > 1 && inex < 0));
+          MPFR_ASSERTN(!mpfr_overflow_p ());
+          mpfr_set_ui_2exp (c, 1, mpfr_get_emax (), MPFR_RNDZ);
+          MPFR_ASSERTN(mpfr_equal_p (a, c));
+        }
+
+      /* exercise trivial overflow */
+      mpfr_set_ui_2exp (b, 1, mpfr_get_emax (), MPFR_RNDZ);
+      mpfr_sqrt (b, b, MPFR_RNDU);
+      mpfr_mul_2exp (b, b, 1, MPFR_RNDN);
+      mpfr_clear_flags ();
+      inex = mpfr_sqr (a, b, MPFR_RNDN);
+      MPFR_ASSERTN(inex > 0);
+      MPFR_ASSERTN(mpfr_inf_p (a) && mpfr_sgn (a) > 0);
+      MPFR_ASSERTN(mpfr_overflow_p ());
+
+      /* exercise trivial underflow */
+      mpfr_set_ui_2exp (b, 1, mpfr_get_emin () - 1, MPFR_RNDZ);
+      mpfr_sqrt (b, b, MPFR_RNDU);
+      mpfr_div_2exp (b, b, 1, MPFR_RNDN);
+      mpfr_clear_flags ();
+      inex = mpfr_sqr (a, b, MPFR_RNDN);
+      MPFR_ASSERTN(inex < 0);
+      MPFR_ASSERTN(mpfr_zero_p (a) && mpfr_signbit (a) == 0);
+      MPFR_ASSERTN(mpfr_underflow_p ());
+
+      /* exercise square between 0.5*2^emin and its predecessor (emin even) */
+      emin = mpfr_get_emin ();
+      mpfr_set_emin (emin + (emin & 1)); /* now emin is even */
+      mpfr_set_ui_2exp (b, 1, mpfr_get_emin () - 1, MPFR_RNDN);
+      inex = mpfr_sqrt (b, b, MPFR_RNDZ);
+      MPFR_ASSERTN(inex != 0); /* sqrt(2) is not exact */
+      mpfr_mul_2exp (c, b, 1, MPFR_RNDN);
+      mpfr_sqr (c, c, MPFR_RNDN);
+      mpfr_clear_flags ();
+      inex = mpfr_sqr (a, b, MPFR_RNDN);
+      if (mpfr_get_exp (c) < mpfr_get_emin () + 2) /* underflow */
+        {
+          /* if c > 0.5*2^(emin+1), we should round to 0.5*2^emin */
+          if (mpfr_cmp_ui_2exp (c, 1, mpfr_get_emin ()) > 0)
+            {
+              MPFR_ASSERTN(inex > 0);
+              MPFR_ASSERTN(mpfr_cmp_ui_2exp (a, 1, mpfr_get_emin () - 1) == 0);
+              MPFR_ASSERTN(mpfr_underflow_p ());
+            }
+          else /* we should round to 0 */
+            {
+              MPFR_ASSERTN(inex < 0);
+              MPFR_ASSERTN(mpfr_zero_p (a) && mpfr_signbit (a) == 0);
+              MPFR_ASSERTN(mpfr_underflow_p ());
+            }
+        }
+      else
+        {
+          MPFR_ASSERTN(inex > 0);
+          MPFR_ASSERTN(mpfr_cmp_ui_2exp (a, 1, mpfr_get_emin () - 1) == 0);
+          MPFR_ASSERTN(!mpfr_underflow_p ());
+        }
+      mpfr_set_emin (emin);
+
+      /* exercise exact square root 2^(emin-2) for emin even */
+      emin = mpfr_get_emin ();
+      mpfr_set_emin (emin + (emin & 1)); /* now emin is even */
+      mpfr_set_ui_2exp (b, 1, (mpfr_get_emin () - 2) / 2, MPFR_RNDN);
+      inex = mpfr_sqr (a, b, MPFR_RNDN);
+      MPFR_ASSERTN(inex < 0);
+      MPFR_ASSERTN(mpfr_zero_p (a) && mpfr_signbit (a) == 0);
+      MPFR_ASSERTN(mpfr_underflow_p ());
+      mpfr_set_emin (emin);
+
+      /* same as above, for RNDU */
+      emin = mpfr_get_emin ();
+      mpfr_set_emin (emin + (emin & 1)); /* now emin is even */
+      mpfr_set_ui_2exp (b, 1, mpfr_get_emin () - 1, MPFR_RNDN);
+      inex = mpfr_sqrt (b, b, MPFR_RNDZ);
+      MPFR_ASSERTN(inex != 0); /* sqrt(2) is not exact */
+      mpfr_mul_2exp (c, b, 1, MPFR_RNDN);
+      mpfr_sqr (c, c, MPFR_RNDU);
+      mpfr_clear_flags ();
+      inex = mpfr_sqr (a, b, MPFR_RNDU);
+      MPFR_ASSERTN(inex > 0);
+      MPFR_ASSERTN(mpfr_cmp_ui_2exp (a, 1, mpfr_get_emin () - 1) == 0);
+      /* we have underflow if c < 2^(emin+1) */
+      if (mpfr_cmp_ui_2exp (c, 1, mpfr_get_emin () + 1) < 0)
+        MPFR_ASSERTN(mpfr_underflow_p ());
+      else
+        MPFR_ASSERTN(!mpfr_underflow_p ());
+      mpfr_set_emin (emin);
+
+      mpfr_clear (a);
+      mpfr_clear (b);
+      mpfr_clear (c);
+    }
+}
+
 int
 main (void)
 {
@@ -195,6 +345,7 @@
 
   tests_start_mpfr ();
 
+  coverage (1024);
   check_mpn_sqr ();
   check_special ();
   test_underflow ();