mirror of
https://github.com/ptitSeb/box64.git
synced 2025-05-08 07:59:18 +08:00
84 lines
1.9 KiB
C
84 lines
1.9 KiB
C
#include <stdint.h>
|
|
#include <math.h>
|
|
#include "libm.h"
|
|
#include "sqrt_data.h"
|
|
|
|
#define FENV_SUPPORT 1
|
|
|
|
static inline uint32_t mul32(uint32_t a, uint32_t b)
|
|
{
|
|
return (uint64_t)a*b >> 32;
|
|
}
|
|
|
|
/* see sqrt.c for more detailed comments. */
|
|
|
|
float __cdecl sqrtf(float x)
|
|
{
|
|
uint32_t ix, m, m1, m0, even, ey;
|
|
|
|
ix = asuint(x);
|
|
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
|
|
/* x < 0x1p-126 or inf or nan. */
|
|
if (ix * 2 == 0)
|
|
return x;
|
|
if (ix == 0x7f800000)
|
|
return x;
|
|
if (ix > 0x7f800000)
|
|
return math_error(_DOMAIN, "sqrtf", x, 0, (x - x) / (x - x));
|
|
/* x is subnormal, normalize it. */
|
|
ix = asuint(x * 0x1p23f);
|
|
ix -= 23 << 23;
|
|
}
|
|
|
|
/* x = 4^e m; with int e and m in [1, 4). */
|
|
even = ix & 0x00800000;
|
|
m1 = (ix << 8) | 0x80000000;
|
|
m0 = (ix << 7) & 0x7fffffff;
|
|
m = even ? m0 : m1;
|
|
|
|
/* 2^e is the exponent part of the return value. */
|
|
ey = ix >> 1;
|
|
ey += 0x3f800000 >> 1;
|
|
ey &= 0x7f800000;
|
|
|
|
/* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations. */
|
|
static const uint32_t three = 0xc0000000;
|
|
uint32_t r, s, d, u, i;
|
|
i = (ix >> 17) % 128;
|
|
r = (uint32_t)__rsqrt_tab[i] << 16;
|
|
/* |r*sqrt(m) - 1| < 0x1p-8 */
|
|
s = mul32(m, r);
|
|
/* |s/sqrt(m) - 1| < 0x1p-8 */
|
|
d = mul32(s, r);
|
|
u = three - d;
|
|
r = mul32(r, u) << 1;
|
|
/* |r*sqrt(m) - 1| < 0x1.7bp-16 */
|
|
s = mul32(s, u) << 1;
|
|
/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
|
|
d = mul32(s, r);
|
|
u = three - d;
|
|
s = mul32(s, u);
|
|
/* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */
|
|
s = (s - 1)>>6;
|
|
/* s < sqrt(m) < s + 0x1.08p-23 */
|
|
|
|
/* compute nearest rounded result. */
|
|
uint32_t d0, d1, d2;
|
|
float y, t;
|
|
d0 = (m << 16) - s*s;
|
|
d1 = s - d0;
|
|
d2 = d1 + s + 1;
|
|
s += d1 >> 31;
|
|
s &= 0x007fffff;
|
|
s |= ey;
|
|
y = asfloat(s);
|
|
if (FENV_SUPPORT) {
|
|
/* handle rounding and inexact exception. */
|
|
uint32_t tiny = predict_false(d2==0) ? 0 : 0x01000000;
|
|
tiny |= (d1^d2) & 0x80000000;
|
|
t = asfloat(tiny);
|
|
y = eval_as_float(y + t);
|
|
}
|
|
return y;
|
|
}
|