git: 3085fc9d97bd - main - [libm] implementation of rsqrt, rsqrtf, and rsqrtl

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Sun, 10 May 2026 16:37:37 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=3085fc9d97bd83785ba3ba43e0378d7d67987d1f

commit 3085fc9d97bd83785ba3ba43e0378d7d67987d1f
Author:     Steve Kargl <kargl@FreeBSD.org>
AuthorDate: 2026-05-08 14:06:08 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2026-05-10 16:36:33 +0000

    [libm] implementation of rsqrt, rsqrtf, and rsqrtl
    
    From the PR:
    The attached diff implements the inverse square root function, i.e,
    rsqrt(x) = 1 / sqrt(x).  Exhaustive testing of the float version
    suggests that it is correctly rounded in round-to-nearest for all
    test values in the range [0x1p-127,0x1p126].
    Exhaustive testing of rsqrt and rsqrtl cannot be done, but 1100M
    values of x for rsqrt and 400M values for rsqrtl were tested.  All
    tested values were correctly rounded.
    
    I do not have access to LD128 (i.e., IEEE 128-bit floating point)
    hardware, so the implementation of rsqrtl() is untested.
    
    The following is a summary of changes to source code.
    
    * lib/msun/Makefile:
      . Add s_rsqrt.c and s_rsqrtf.c to COMMON_SRCS.
      . For non-53-bit long double targets, add s_rsqrtl.c to COMMON_SRCS.
      . Add MLINKS for rsqrt.3, rsqrtf.3, and rsqrtl.3 to sqrt.3.
    
    * lib/msun/Symbol.map:
      . Add rsqrt, rsqrtf, and rsqrtl to the Symbol map for shared libm.so.
    
    * lib/msun/man/sqrt.3:
      . Update the sqrt.3 manual page to include information for rsqrt[fl].
      . Note, these function come from ISO C23 (and IEEE-754 2008).
    
    * lib/msun/src/math.h:
      . Add prototypes for new functions.
    
    * lib/msun/src/math_private.h:
      . Add _SPLIT, _FAST2SUM, _SLOW2SUM, _XADD, _MUL, and _XMUL
        macros to perform type-type arthimetic (i.e., float-float).
    
    * src/s_rsqrt.c:
      . New file with the implementation of 'double rsqrt(double)'.
      . For 53-bit long double targets, add a weak reference for rsqrtl.
    
    * src/s_rsqrtf.c:
      . New file with the implementation of 'float rsqrt(float)'.
    
    * src/s_rsqrtl.c
      . New file with the implementation of 'long double rsqrt(long double)'.
        Note, the LD80 version uses bit twiddling and LD128 version is a
        straight C language implementation.  The LD128 is untested due to
        lack of hardware.
    
    PR:     295089
    MFC after:      1 week
---
 lib/msun/Makefile           |   6 +-
 lib/msun/Symbol.map         |   3 +
 lib/msun/man/sqrt.3         |  53 +++++++++++-
 lib/msun/src/math.h         |   3 +
 lib/msun/src/math_private.h |  83 ++++++++++++++++++
 lib/msun/src/s_rsqrt.c      | 153 +++++++++++++++++++++++++++++++++
 lib/msun/src/s_rsqrtf.c     | 155 +++++++++++++++++++++++++++++++++
 lib/msun/src/s_rsqrtl.c     | 203 ++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 654 insertions(+), 5 deletions(-)

diff --git a/lib/msun/Makefile b/lib/msun/Makefile
index a9b07babc0a6..5e9adb533760 100644
--- a/lib/msun/Makefile
+++ b/lib/msun/Makefile
@@ -88,7 +88,7 @@ COMMON_SRCS= b_tgamma.c \
 	s_lround.c s_lroundf.c s_lroundl.c s_modff.c \
 	s_nan.c s_nearbyint.c s_nextafter.c s_nextafterf.c \
 	s_nexttowardf.c s_remquo.c s_remquof.c \
-	s_rint.c s_rintf.c s_round.c s_roundf.c \
+	s_rint.c s_rintf.c s_round.c s_roundf.c s_rsqrt.c s_rsqrtf.c \
 	s_scalbln.c s_scalbn.c s_scalbnf.c s_signbit.c \
 	s_signgam.c s_significand.c s_significandf.c s_sin.c \
 	s_sincos.c s_sincosf.c s_sinf.c \
@@ -143,7 +143,7 @@ COMMON_SRCS+=	b_tgammal.c catrigl.c \
 	s_fminimum_numl.c s_fminimum_mag_numl.c \
 	s_frexpl.c s_logbl.c s_logl.c s_nanl.c \
 	s_nextafterl.c s_nexttoward.c s_remquol.c s_rintl.c s_roundl.c \
-	s_scalbnl.c s_sinl.c s_sincosl.c s_sinpil.c \
+	s_scalbnl.c s_sinl.c s_sincosl.c s_sinpil.c s_rsqrtl.c \
 	s_tanhl.c s_tanl.c s_tanpil.c s_truncl.c w_cabsl.c
 # Work around this warning from gcc:
 #     lib/msun/ld80/e_powl.c:275:1: error: floating constant exceeds range of
@@ -289,7 +289,7 @@ MLINKS+=sincos.3 sincosf.3 sin.3 sincosl.3
 MLINKS+=sinh.3 sinhf.3 sinh.3 sinhl.3
 MLINKS+=sinpi.3 sinpif.3 sinpi.3 sinpil.3
 MLINKS+=sqrt.3 cbrt.3 sqrt.3 cbrtf.3 sqrt.3 cbrtl.3 sqrt.3 sqrtf.3 \
-	sqrt.3 sqrtl.3
+	sqrt.3 sqrtl.3 sqrt.3 rsqrt.3 sqrt.3 rsqrtf.3 sqrt.3 rsqrtl.3
 MLINKS+=tan.3 tanf.3 tan.3 tanl.3
 MLINKS+=tanh.3 tanhf.3 tanh.3 tanhl.3
 MLINKS+=tanpi.3 tanpif.3 tanpi.3 tanpil.3
diff --git a/lib/msun/Symbol.map b/lib/msun/Symbol.map
index 00222c960f80..2484cc13013b 100644
--- a/lib/msun/Symbol.map
+++ b/lib/msun/Symbol.map
@@ -344,4 +344,7 @@ FBSD_1.9 {
 	fminimum_mag_num;
 	fminimum_mag_numf;
 	fminimum_mag_numl;
+	rsqrt;
+	rsqrtf;
+	rsqrtl;
 };
diff --git a/lib/msun/man/sqrt.3 b/lib/msun/man/sqrt.3
index f4a217353af0..6da6407ecc4b 100644
--- a/lib/msun/man/sqrt.3
+++ b/lib/msun/man/sqrt.3
@@ -25,17 +25,20 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd February 15, 2020
+.Dd May 7, 2026
 .Dt SQRT 3
 .Os
 .Sh NAME
 .Nm cbrt ,
 .Nm cbrtf ,
 .Nm cbrtl ,
+.Nm rsqrt ,
+.Nm rsqrtf ,
+.Nm rsqrtl,
 .Nm sqrt ,
 .Nm sqrtf ,
 .Nm sqrtl
-.Nd cube root and square root functions
+.Nd cube root, square root, and inverse square root functions
 .Sh LIBRARY
 .Lb libm
 .Sh SYNOPSIS
@@ -47,6 +50,12 @@
 .Ft long double
 .Fn cbrtl "long double x"
 .Ft double
+.Fn rsqrt "double x"
+.Ft float
+.Fn rsqrtf "float x"
+.Ft long double
+.Fn rsqrtl "long double x"
+.Ft double
 .Fn sqrt "double x"
 .Ft float
 .Fn sqrtf "float x"
@@ -63,6 +72,15 @@ the cube root of
 .Fa x .
 .Pp
 The
+.Fn rsqrt ,
+.Fn rsqrtf ,
+and
+.Fn rsqrtl
+functions compute
+the inverse square root of
+.Fa x .
+.Pp
+The
 .Fn sqrt ,
 .Fn sqrtf ,
 and
@@ -77,6 +95,23 @@ The
 and
 .Fn cbrtl
 functions return the requested cube root.
+.Pp
+The
+.Fn rsqrt ,
+.Fn rsqrtf ,
+and
+.Fn rsqrtl
+functions return 1 divided by the square root of
+.Fa x
+unless an error occurs.
+An attempt to take the
+.Fn rsqrt
+of negative
+.Fa x
+raises an invalid exception and causes an \*(Na to be returned.
+The inverse square root of \*(Pm0 returns \*(Pm\(if and
+raises a divide-by-zero exception.
+.Pp
 The
 .Fn sqrt ,
 .Fn sqrtf ,
@@ -104,6 +139,13 @@ and
 .Fn sqrtl
 functions conform to
 .St -isoC-99 .
+The
+.Fn rsqrt ,
+.Fn rsqrtf ,
+and
+.Fn rsqrtl
+functions conform to ISO/IEC 9899:2024 ("ISO C23").
+.\" .St -isoC-24 .
 .Sh HISTORY
 The
 .Fn cbrt
@@ -120,3 +162,10 @@ The
 .Fn cbrtl
 function appeared in
 .Fx 9.0 .
+The
+.Fn rsqrt ,
+.Fn rsqrtf ,
+and
+.Fn rsqrtl
+functions appeared in
+.Fx 16.0 .
diff --git a/lib/msun/src/math.h b/lib/msun/src/math.h
index 9894401160d4..e5c2ccb786a4 100644
--- a/lib/msun/src/math.h
+++ b/lib/msun/src/math.h
@@ -544,6 +544,9 @@ long double	fminimum_mag_numl(long double, long double);
 double		fminimum_num(double, double);
 float		fminimum_numf(float, float);
 long double	fminimum_numl(long double, long double);
+double		rsqrt(double);
+float		rsqrtf(float);
+long double	rsqrtl(long double);
 #endif /* __ISO_C_VISIBLE >= 2023 */
 
 __END_DECLS
diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h
index fbd84e246ca7..e9055a9a8c61 100644
--- a/lib/msun/src/math_private.h
+++ b/lib/msun/src/math_private.h
@@ -471,6 +471,89 @@ do {								\
 	(a) = __tmp;		\
 } while (0)
 
+/*
+ * Split x into high and low bits where CC is 0x1p(N/2) + 1 where
+ * N is rounded up for types with odd precisions.
+ *
+ * #define _CC (0x1p12F + 1)	// float
+ * #define _CC (0x1p27  + 1)	// double
+ * #define _CC (0x1p32L + 1)	// long double (LD80)
+ * #define _CC (0x1p57L + 1)	// long double (LD128)
+ */
+#define _SPLIT(x, xh, xl)		\
+do {					\
+	typeof(x) __t1;			\
+	__t1 = (x) * _CC;		\
+	xh = __t1 + ((x) - __t1);	\
+	xl = (x) - xh;			\
+} while(0)
+
+/*
+ * FAST2SUM requires |x| >= |y|.  x and y are full precision.
+ * Note, _2SUMF(x,y) above destroys x and y.
+ */
+#define _FAST2SUM(x, y, hi, lo)	\
+do {				\
+	hi = (x) + (y);		\
+	lo = (y) - (hi - (x));	\
+} while(0)
+
+/*
+ * SLOW2SUM does not require |x| >= |y|.  Here, x and y are full precision.
+ * The t1 temporary variable is volatile to prevent compiler optimizations.
+ * Note, _2SUM(x,y) above destroys x and y.
+ */
+#define _SLOW2SUM(x, y, hi, lo)			\
+do {						\
+	volatile typeof(x) __t1;		\
+	typeof(x) __t2;				\
+	hi = (x) + (y);				\
+	__t1 = hi - (y);			\
+	__t2 = hi - __t1;			\
+	lo = ((x) - __t1) + ((y) - __t2);	\
+} while(0)
+
+/*
+ * x and y are full precision quantities that have been split into high
+ * and low parts via the _SPLIT macro.  x and y are added to give z as
+ * high and low parts.
+ */
+#define _XADD(xh, xl, yh, yl, zh, zl)			\
+do {							\
+	typeof(xh) __s1, __s2, __s3, __s4, __s5, __s6;	\
+	_SLOW2SUM(xh, yh, __s1, __s2);			\
+	_SLOW2SUM(xl, yl, __s3, __s4);			\
+	_FAST2SUM(__s1, __s2 + __s3, __s5, __s6);	\
+	_FAST2SUM(__s5, __s6 + __s4, zh, zl);		\
+} while(0)
+
+/*
+ * x and y are full precision quantities.  r1 and r2 are full precision
+ * high and low parts of the multiplication x * y.
+ */
+#define _MUL(x, y, r1 ,r2)				\
+do  {							\
+	typeof(x) __xh, __xl, __yh, __yl;		\
+	typeof(x) __t1;				\
+	_SPLIT(x, __xh, __xl);				\
+	_SPLIT(y, __yh, __yl);				\
+	r1 = (x) * (y);					\
+	__t1 = __xh * __yl + (__xh * __yh - r1);	\
+	r2 = __xl * __yl + (__xl * __yh + __t1);	\
+} while(0)
+
+/*
+ * x and y are full precision quantities that have been split into high
+ * and low parts via the _SPLIT macro.  x and y are multiplied to give z
+ * as high and low parts.
+ */
+#define _XMUL(xh, xl, yh, yl, ph, pl)	\
+do {					\
+    _MUL(xh, yh, ph, pl);		\
+    pl += xl * yl + xl * yh + xh * yl;	\
+} while(0)
+
+
 /*
  * Common routine to process the arguments to nan(), nanf(), and nanl().
  */
diff --git a/lib/msun/src/s_rsqrt.c b/lib/msun/src/s_rsqrt.c
new file mode 100644
index 000000000000..0a513afe8ed2
--- /dev/null
+++ b/lib/msun/src/s_rsqrt.c
@@ -0,0 +1,153 @@
+/*-
+ * Copyright (c) 2026 Steven G. Kargl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Compute the inverse sqrt of x, i.e., rsqrt(x) = 1 / sqrt(x).
+ *
+ * First, filter out special cases:
+ *
+ *   1. rsqrt(+-0) = +-inf, and raise FE_DIVBYZERO exception.
+ *   2. rsqrt(nan) = NaN.
+ *   3. rsqrt(+inf) returns +0.
+ *   2. rsqrt(x<0) = NaN, and raises FE_INVALID.
+ *
+ * If x is a subnormal, scale x into the normal range by x*0x1pN; while
+ * recording the exponent of the scale factor N.  Split the possibly
+ * scaled x into f*2^n with f in [0.5,1).  Set m=n or m=n-N (subnormal).
+ * If n is odd, then set f = f/2 and increase n to n+1.  Thus, f is
+ * in [0.25,1) with n even.
+ *
+ * An initial estimate of y = rqrt[f](x) is 1 / sqrt[f](x).  Exhaustive
+ * testing of rsqrtf() gave a max ULP of 1.49; while testing 500M x in
+ * [0,1000] gave a max ULP of 1.24 for rsqrt().  The value of y is then
+ * used with one iteration of Goldschmidt's algorithm:
+ *
+ *	z = x * y
+ *	h = y / 2
+ *	r = 0.5 - h * z
+ *	y = h * r + h
+ *
+ * A factor of 2 appears missing in the above, but it is included in the
+ * exponent m.
+ */
+#include <fenv.h>
+#include <float.h>
+#include "math.h"
+#include "math_private.h"
+
+#pragma STDC FENV_ACCESS ON
+
+#ifdef _CC
+#undef _CC
+#endif
+#define _CC (0x1p27 + 1)
+
+double
+rsqrt(double x)
+{
+	volatile static const double vzero = 0;
+	static const double half = 0.5;
+	int hx, m, rnd;
+	uint32_t lx, ux;
+	double h, ph, pl, rh, rl, y, zh, zl;
+
+	EXTRACT_WORDS(hx, lx, x);
+	ux = (uint32_t)hx & 0x7fffffff;
+
+	/* x = +-0.  Raise exception. */
+	if ((ux | lx) == 0)
+	    return (1 / x);
+
+	/* x is NaN. */
+	if (ux > 0x7ff00000)
+	    return (x + x);
+
+	/* x is +-inf. */
+	if (ux == 0x7ff00000)
+	    return (hx & 0x80000000 ? vzero / vzero : 0.);
+
+	/* x < 0.  Raise exception. */
+	if (hx < 0)
+	    return (vzero / vzero);
+
+	/*
+	 * If x is subnormal, then scale it into the normal range.
+	 * Split x into significand and exponent, x = f * 2^m, with
+	 * f in [0.5,1) and m a biased exponent.
+	 */
+	m = 0;
+	if (hx < 0x00100000) {		/* Subnormal */
+	    x *= 0x1p54;
+	    GET_HIGH_WORD(hx, x);
+	    m = -54;
+	}
+	m += (hx >> 20) - 1022;
+	hx = (hx & 0x000fffff) | 0x3fe00000;
+	SET_HIGH_WORD(x, hx);
+
+	/* m is odd.  Put x into [0.25,5) and increase m. */
+	if (m & 1) {
+	    x /= 2;
+	    m += 1;
+	}
+	m = -(m >> 1);			/* Prepare for 2^(-m/2). */
+
+	y = 1 / sqrt(x);	/* ~52-bit estimate. */
+
+	h = y / 2;
+
+	/*
+	 * For values of x with a representation of 0x1.ffffffffffffepN
+	 * with N an odd integer, the computed rsqrt() is not correctly
+	 * rounded in round-to-nearest without toggling the rounding mode
+	 * to FE_TOWARDZERO.  Note, FE_DOWNWARD also works.  However,
+	 * messing with the rounding mode is expensive, so only do it
+	 * when necessary. Example, x = 3.9999999999999991
+	 * gives y --> hx = 0x3ff00000, lx = 0x00000001
+	 */
+	EXTRACT_WORDS(hx, lx, y);
+	if ((hx & 0x000fffff) == 0 && lx == 1) {
+	    rnd = fegetround();
+	    fesetround(FE_TOWARDZERO);
+	    _MUL(x, y, zh, zl);
+	    _XMUL(zh, zl, h, 0, ph, pl);
+	    fesetround(rnd);
+	} else {
+	    _MUL(x, y, zh, zl);
+	    _XMUL(zh, zl, h, 0, ph, pl);
+	}
+
+	_XADD(-ph, -pl, half, 0, rh, rl);
+	y = rh * h + h;
+
+	ux = (m + 1024) << 20;
+	INSERT_WORDS(x, ux, 0);
+	return (y *= x);
+}
+
+#if LDBL_MANT_DIG == 53
+__weak_reference(rsqrt, rsqrtl);
+#endif
diff --git a/lib/msun/src/s_rsqrtf.c b/lib/msun/src/s_rsqrtf.c
new file mode 100644
index 000000000000..b71f7baf5657
--- /dev/null
+++ b/lib/msun/src/s_rsqrtf.c
@@ -0,0 +1,155 @@
+/*-
+ * Copyright (c) 2026 Steven G. Kargl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Compute the inverse sqrt of x, i.e., rsqrt(x) = 1 / sqrt(x).
+ *
+ * First, filter out special cases:
+ *
+ *   1. rsqrt(+-0) = +-inf, and raise FE_DIVBYZERO exception.
+ *   2. rsqrt(nan) = NaN.
+ *   3. rsqrt(+inf) returns +0.
+ *   2. rsqrt(x<0) = NaN, and raises FE_INVALID.
+ *
+ * If x is a subnormal, scale x into the normal range by x*0x1pN; while
+ * recording the exponent of the scale factor N.  Split the possibly
+ * scaled x into f*2^n with f in [0.5,1).  Set m=n or m=n-N (subnormal).
+ * If n is odd, then set f = f/2 and increase n to n+1.  Thus, f is
+ * in [0.25,1) with n even.
+ *
+ * An initial estimate of y = rqrt[f](x) is 1 / sqrt[f](x).  Exhaustive
+ * testing of rsqrtf() gave a max ULP of 1.49; while testing 500M x in
+ * [0,1000] gave a max ULP of 1.24 for rsqrt().  The value of y is then
+ * used with one iteration of Goldschmidt's algorithm:
+ *
+ *	z = x * y
+ *	h = y / 2
+ *	r = 0.5 - h * z
+ *	y = h * r + h
+ *
+ * A factor of 2 appears missing in the above, but it is included in the
+ * exponent m.
+ */
+#include <fenv.h>
+#include <float.h>
+#include "math.h"
+#include "math_private.h"
+
+#pragma STDC FENV_ACCESS ON
+
+#ifdef _CC
+#undef _CC
+#endif
+#define _CC (0x1p12F + 1)
+
+float
+rsqrtf(float x)
+{
+	volatile static const float vzero = 0;
+	static const float half = 0.5;
+	uint32_t ix, ux;
+	int m, rnd;
+	float h, ph, pl, rh, rl, y, zh, zl;
+
+	GET_FLOAT_WORD(ix, x);
+	ux = ix & 0x7fffffff;
+
+	/* x = +-0.  Raise exception. */
+	if (ux == 0)
+	    return (1 / x);
+
+	/* x is NaN. */
+	if (ux > 0x7f800000)
+	    return (x + x);
+
+	/* x is +-inf. */
+	if (ux == 0x7f800000)
+	    return (ix & 0x80000000 ? vzero / vzero : 0.F);
+
+	/* x < 0.  Raise exception. */
+	if (ix & 0x80000000)
+	    return (vzero / vzero);
+
+	/*
+	 * If x is subnormal, then scale it into the normal range.
+	 * Split x into significand and exponent, x = f * 2^m, with
+	 * f in [0.5,1) and m a biased exponent.
+	 */
+	m = 0;
+	if (ux < 0x00800000) {		/* Subnormal */
+	    x *= 0x1p25f;
+	    GET_FLOAT_WORD(ix, x);
+	    m = -25;
+	}
+	m += (ix >> 23) - 126;		/* Unbiased exponent */
+	ix = (ix & 0x007fffff) | 0x3f000000;
+	SET_FLOAT_WORD(x, ix);		/* x is in [0.5,1). */
+
+	/* m is odd.  Put x into [0.25,5) and increase m. */
+	if (m & 1) {
+	    x /= 2;
+	    m += 1;
+	}
+	m = -(m >> 1);		/* Prepare for 2^(-m/2). */
+
+	/*
+	 * Exhaustive testing of rsqrtf(x) = 1 / sqrtf(x) with x in
+	 * [0x1p-127,0x1p126] shows the this approximation gives a
+	 * 22- to 23-bit estimate of rsqrt(f).  This is equivalent to
+	 * a max ulp of ~1.49.
+	 */
+	y = 1 / sqrtf(x);
+
+	h = y / 2;
+
+	/*
+	 * For values of x with a representation of 0x1.fffffcpN with
+	 * N an odd integer, the computed rsqrtf() is not correctly
+	 * rounded in round-to-nearest without toggling the rounding
+	 * mode  to FE_TOWARDZERO.  Note, FE_DOWNWARD also works.
+	 * However, messing with the rounding mode is expensive, so
+	 * only do it when necessary. Example, x = 0x1.fffffcp3 gives
+	 * y --> 0x3f800001.
+	 */
+	GET_FLOAT_WORD(ix, y);
+	if ((ix & 0x000fffff) == 1) {
+	    rnd = fegetround();
+	    fesetround(FE_TOWARDZERO);
+	    _MUL(x, y, zh, zl);
+	    _XMUL(zh, zl, h, 0, ph, pl);
+	   fesetround(rnd);
+	} else {
+	    _MUL(x, y, zh, zl);
+	    _XMUL(zh, zl, h, 0, ph, pl);
+	}
+
+	_XADD(-ph, -pl, half, 0, rh, rl);
+	y = h * rh + h;
+
+	ix = (uint32_t)(m + 128) << 23;
+	SET_FLOAT_WORD(x, ix);
+	return (y *= x);
+}
diff --git a/lib/msun/src/s_rsqrtl.c b/lib/msun/src/s_rsqrtl.c
new file mode 100644
index 000000000000..34d5cdc48173
--- /dev/null
+++ b/lib/msun/src/s_rsqrtl.c
@@ -0,0 +1,203 @@
+/*-
+ * Copyright (c) 2026 Steven G. Kargl
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Compute the inverse sqrt of x, i.e., rsqrt(x) = 1 / sqrt(x).
+ *
+ * First, filter out special cases:
+ *
+ *   1. rsqrt(+-0) = +-inf, and raise FE_DIVBYZERO exception.
+ *   2. rsqrt(nan) = NaN.
+ *   3. rsqrt(+inf) returns +0.
+ *   2. rsqrt(x<0) = NaN, and raises FE_INVALID.
+ *
+ * If x is a subnormal, scale x into the normal range by x*0x1pN; while
+ * recording the exponent of the scale factor N.  Split the possibly
+ * scaled x into f*2^n with f in [0.5,1).  Set m=n or m=n-N (subnormal).
+ * If n is odd, then set f = f/2 and increase n to n+1.  Thus, f is
+ * in [0.25,1) with n even.
+ *
+ * An initial estimate of y = rqrt[f](x) is 1 / sqrt[f](x).  Exhaustive
+ * testing of rsqrtf() gave a max ULP of 1.49; while testing 500M x in
+ * [0,1000] gave a max ULP of 1.24 for rsqrt().  The value of y is then
+ * used with one iteration of Goldschmidt's algorithm:
+ *
+ *	z = x * y
+ *	h = y / 2
+ *	r = 0.5 - h * z
+ *	y = h * r + h
+ *
+ * A factor of 2 appears missing in the above, but it is included in the
+ * exponent m.
+ */
+#include <fenv.h>
+#include <float.h>
+#include "math.h"
+#include "math_private.h"
+#include "fpmath.h"
+
+#pragma STDC FENV_ACCESS ON
+
+#if LDBL_MANT_DIG == 64
+
+#ifdef _CC
+#undef _CC
+#endif
+#define _CC (0x1p32L + 1)
+
+long double
+rsqrtl(long double x)
+{
+	volatile static const double vzero = 0;
+	static const double half = 0.5;
+	uint32_t ux;
+	int m, rnd;
+	long double h, ph, pl, rh, rl, y, zh, zl;
+	union IEEEl2bits u;
+
+	u.e = x;
+	ux = (u.bits.manl | u.bits.manh);
+
+	/* x = +-0.  Raise exception. */
+	if ((u.bits.exp | ux) == 0)
+	    return (1 / x);
+
+	/* x is NaN or x is +-inf. */
+	if (u.bits.exp == 0x7fff)
+	    return (ux ? (x + x) : (u.bits.sign ? vzero / vzero : 0));
+
+	/* x < 0.  Raise exception. */
+	if (u.bits.sign)
+	    return (vzero / vzero);
+
+	/*
+	 * If x is subnormal, then scale it into the normal range.
+	 * Split x into significand and exponent, x = f * 2^m, with
+	 * f in [0.5,1) and m a biased exponent.
+	 */
+	ENTERI();
+
+	if (u.bits.exp == 0) {		/* Subnormal */
+	    u.e *= 0x1p512;
+	    m = u.bits.exp - 0x41fe;
+	} else {
+	    m = u.bits.exp - 0x3ffe;
+	}
+	u.bits.exp = 0x3ffe;
+
+	/* m is odd.  Put x into [0.25,5) and increase m. */
+	if (m & 1) {
+	    u.e /= 2;
+	    m += 1;
+	}
+	m = -(m >> 1);			/* Prepare for 2^(-m/2). */
+
+	y = 1 / sqrt((double)u.e);	/* ~52-bit estimate. */
+	y -= y * (u.e * y * y - 1) / 2;	/* ~63-bit estimate. */
+
+	h = y / 2;
+
+	_MUL(u.e, y, zh, zl);
+	_XMUL(zh, zl, h, 0, ph, pl);
+	_XADD(-ph, -pl, half, 0, rh, rl);
+	y = rh * h + h;
+
+	u.e = 1;
+	u.xbits.expsign = 0x3fff + m + 1;
+	RETURNI(y * u.e);
+}
+
+#else
+
+#ifdef _CC
+#undef _CC
+#endif
+#define _CC (0x1p57L + 1)
+
+long double
+rsqrtl(long double x)
+{
+	volatile static const double vzero = 0;
+	int hx, m, rnd;
+	long double y;
+
+	/* x = +-0.  Raise exception. */
+	if (x == 0)
+	    return (1 / x);
+
+	/* x is NaN. */
+	if (isnan(x))
+	    return (x + x);
+
+	/* x is +-inf. */
+	if (isinf(x))
+	    return (x > 0 ? 0 : vzero / vzero);
+
+	/* x < 0.  Raise exception. */
+	if (x < 0)
+	    return (vzero / vzero);
+
+	/*
+	 * If x is subnormal, then scale it into the normal range.
+	 * Split x into significand and exponent, x = f * 2^m, with
+	 * f in [0.5,1) and m a biased exponent.
+	 */
+	m = 0;
+	if (!isnormal(x)) {
+	    x *= 0x1p114L;
+	    m = -114;
+	}
+	x = frexpl(x, &hx);
+	m += hx;
+
+	/* m is odd.  Put x into [0.25,5) and increase m. */
+	if (m & 1) {
+	    x /= 2;
+	    m += 1;
+	}
+	m = -(m >> 1);			/* Prepare for 2^(-m/2). */
+
+	y = 1 / sqrt((double)x);	/* ~52-bit estimate. */
+	y -= y * (x * y * y - 1) / 2;	/* ~104-bit estimate. */
+
+	static const double half = 0.5;
+	long double h, ph, pl, rh, rl, zh, zl;
+
+	h = y / 2;
+
+	rnd = fegetround();
+	fesetround(FE_TOWARDZERO);
+	_MUL(x, y, zh, zl);
+	_XMUL(zh, zl, -h, 0, ph, pl);
+	fesetround(rnd);
+
+	_XADD(ph, pl, half, 0, rh, rl);
+	y = rh * h + h;
+	m++;
+
+	RETURNI(ldexpl(y, m));
+}
+#endif