1
0
Fork 0
mirror of https://github.com/yuzu-emu/unicorn.git synced 2025-04-01 23:07:03 +00:00

fpu/softfloat: re-factor muladd

We can now add float16_muladd and use the common decompose and
canonicalize functions to have a single implementation for
float16/32/64 muladd functions.

Backports commit d446830a3aac33e7221e361dad3ab1e1892646cb from qemu
This commit is contained in:
Alex Bennée 2018-03-08 10:55:02 -05:00 committed by Lioncash
parent 5ea008e178
commit d92d5c6910
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
17 changed files with 287 additions and 573 deletions

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_aarch64
#define float16_maybe_silence_nan float16_maybe_silence_nan_aarch64
#define float16_mul float16_mul_aarch64
#define float16_muladd float16_muladd_aarch64
#define float16_squash_input_denormal float16_squash_input_denormal_aarch64
#define float16_sub float16_sub_aarch64
#define float16_to_float32 float16_to_float32_aarch64

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_aarch64eb
#define float16_maybe_silence_nan float16_maybe_silence_nan_aarch64eb
#define float16_mul float16_mul_aarch64eb
#define float16_muladd float16_muladd_aarch64eb
#define float16_squash_input_denormal float16_squash_input_denormal_aarch64eb
#define float16_sub float16_sub_aarch64eb
#define float16_to_float32 float16_to_float32_aarch64eb

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_arm
#define float16_maybe_silence_nan float16_maybe_silence_nan_arm
#define float16_mul float16_mul_arm
#define float16_muladd float16_muladd_arm
#define float16_squash_input_denormal float16_squash_input_denormal_arm
#define float16_sub float16_sub_arm
#define float16_to_float32 float16_to_float32_arm

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_armeb
#define float16_maybe_silence_nan float16_maybe_silence_nan_armeb
#define float16_mul float16_mul_armeb
#define float16_muladd float16_muladd_armeb
#define float16_squash_input_denormal float16_squash_input_denormal_armeb
#define float16_sub float16_sub_armeb
#define float16_to_float32 float16_to_float32_armeb

View file

@ -726,57 +726,6 @@ static float32 propagateFloat32NaN(float32 a, float32 b, float_status *status)
}
}
/*----------------------------------------------------------------------------
| Takes three single-precision floating-point values `a', `b' and `c', one of
| which is a NaN, and returns the appropriate NaN result. If any of `a',
| `b' or `c' is a signaling NaN, the invalid exception is raised.
| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case
| obviously c is a NaN, and whether to propagate c or some other NaN is
| implementation defined).
*----------------------------------------------------------------------------*/
static float32 propagateFloat32MulAddNaN(float32 a, float32 b,
float32 c, flag infzero, float_status *status)
{
flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
cIsQuietNaN, cIsSignalingNaN;
int which;
aIsQuietNaN = float32_is_quiet_nan(a, status);
aIsSignalingNaN = float32_is_signaling_nan(a, status);
bIsQuietNaN = float32_is_quiet_nan(b, status);
bIsSignalingNaN = float32_is_signaling_nan(b, status);
cIsQuietNaN = float32_is_quiet_nan(c, status);
cIsSignalingNaN = float32_is_signaling_nan(c, status);
if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN) {
float_raise(float_flag_invalid, status);
}
which = pickNaNMulAdd(aIsQuietNaN, aIsSignalingNaN,
bIsQuietNaN, bIsSignalingNaN,
cIsQuietNaN, cIsSignalingNaN, infzero, status);
if (status->default_nan_mode) {
/* Note that this check is after pickNaNMulAdd so that function
* has an opportunity to set the Invalid flag.
*/
return float32_default_nan(status);
}
switch (which) {
case 0:
return float32_maybe_silence_nan(a, status);
case 1:
return float32_maybe_silence_nan(b, status);
case 2:
return float32_maybe_silence_nan(c, status);
case 3:
default:
return float32_default_nan(status);
}
}
#ifdef NO_SIGNALING_NANS
int float64_is_quiet_nan(float64 a_, float_status *status)
{
@ -932,57 +881,6 @@ static float64 propagateFloat64NaN(float64 a, float64 b, float_status *status)
}
}
/*----------------------------------------------------------------------------
| Takes three double-precision floating-point values `a', `b' and `c', one of
| which is a NaN, and returns the appropriate NaN result. If any of `a',
| `b' or `c' is a signaling NaN, the invalid exception is raised.
| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case
| obviously c is a NaN, and whether to propagate c or some other NaN is
| implementation defined).
*----------------------------------------------------------------------------*/
static float64 propagateFloat64MulAddNaN(float64 a, float64 b,
float64 c, flag infzero, float_status *status)
{
flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
cIsQuietNaN, cIsSignalingNaN;
int which;
aIsQuietNaN = float64_is_quiet_nan(a, status);
aIsSignalingNaN = float64_is_signaling_nan(a, status);
bIsQuietNaN = float64_is_quiet_nan(b, status);
bIsSignalingNaN = float64_is_signaling_nan(b, status);
cIsQuietNaN = float64_is_quiet_nan(c, status);
cIsSignalingNaN = float64_is_signaling_nan(c, status);
if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN) {
float_raise(float_flag_invalid, status);
}
which = pickNaNMulAdd(aIsQuietNaN, aIsSignalingNaN,
bIsQuietNaN, bIsSignalingNaN,
cIsQuietNaN, cIsSignalingNaN, infzero, status);
if (status->default_nan_mode) {
/* Note that this check is after pickNaNMulAdd so that function
* has an opportunity to set the Invalid flag.
*/
return float64_default_nan(status);
}
switch (which) {
case 0:
return float64_maybe_silence_nan(a, status);
case 1:
return float64_maybe_silence_nan(b, status);
case 2:
return float64_maybe_silence_nan(c, status);
case 3:
default:
return float64_default_nan(status);
}
}
#ifdef NO_SIGNALING_NANS
int floatx80_is_quiet_nan(floatx80 a_, float_status *status)
{

View file

@ -669,6 +669,40 @@ static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
g_assert_not_reached();
}
static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
bool inf_zero, float_status *s)
{
if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
s->float_exception_flags |= float_flag_invalid;
}
if (s->default_nan_mode) {
a.cls = float_class_dnan;
} else {
switch (pickNaNMulAdd(is_qnan(a.cls), is_snan(a.cls),
is_qnan(b.cls), is_snan(b.cls),
is_qnan(c.cls), is_snan(c.cls),
inf_zero, s)) {
case 0:
break;
case 1:
a = b;
break;
case 2:
a = c;
break;
case 3:
a.cls = float_class_dnan;
return a;
default:
g_assert_not_reached();
}
a.cls = float_class_msnan;
}
return a;
}
/*
* Returns the result of adding or subtracting the floating-point
* values `a' and `b'. The operation is performed according to the
@ -816,6 +850,244 @@ float64 QEMU_FLATTEN float64_mul(float64 a, float64 b,
return float64_round_pack_canonical(pr, status);
}
/*
* Returns the result of multiplying the floating-point values `a' and
* `b' then adding 'c', with no intermediate rounding step after the
* multiplication. The operation is performed according to the
* IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
* The flags argument allows the caller to select negation of the
* addend, the intermediate product, or the final result. (The
* difference between this and having the caller do a separate
* negation is that negating externally will flip the sign bit on
* NaNs.)
*/
static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
int flags, float_status *s)
{
bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
((1 << float_class_inf) | (1 << float_class_zero));
bool p_sign;
bool sign_flip = flags & float_muladd_negate_result;
FloatClass p_class;
uint64_t hi, lo;
int p_exp;
/* It is implementation-defined whether the cases of (0,inf,qnan)
* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
* they return if they do), so we have to hand this information
* off to the target-specific pick-a-NaN routine.
*/
if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
return pick_nan_muladd(a, b, c, inf_zero, s);
}
if (inf_zero) {
s->float_exception_flags |= float_flag_invalid;
a.cls = float_class_dnan;
return a;
}
if (flags & float_muladd_negate_c) {
c.sign ^= 1;
}
p_sign = a.sign ^ b.sign;
if (flags & float_muladd_negate_product) {
p_sign ^= 1;
}
if (a.cls == float_class_inf || b.cls == float_class_inf) {
p_class = float_class_inf;
} else if (a.cls == float_class_zero || b.cls == float_class_zero) {
p_class = float_class_zero;
} else {
p_class = float_class_normal;
}
if (c.cls == float_class_inf) {
if (p_class == float_class_inf && p_sign != c.sign) {
s->float_exception_flags |= float_flag_invalid;
a.cls = float_class_dnan;
} else {
a.cls = float_class_inf;
a.sign = c.sign ^ sign_flip;
}
return a;
}
if (p_class == float_class_inf) {
a.cls = float_class_inf;
a.sign = p_sign ^ sign_flip;
return a;
}
if (p_class == float_class_zero) {
if (c.cls == float_class_zero) {
if (p_sign != c.sign) {
p_sign = s->float_rounding_mode == float_round_down;
}
c.sign = p_sign;
} else if (flags & float_muladd_halve_result) {
c.exp -= 1;
}
c.sign ^= sign_flip;
return c;
}
/* a & b should be normals now... */
assert(a.cls == float_class_normal &&
b.cls == float_class_normal);
p_exp = a.exp + b.exp;
/* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
* result.
*/
mul64To128(a.frac, b.frac, &hi, &lo);
/* binary point now at bit 124 */
/* check for overflow */
if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
shift128RightJamming(hi, lo, 1, &hi, &lo);
p_exp += 1;
}
/* + add/sub */
if (c.cls == float_class_zero) {
/* move binary point back to 62 */
shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
} else {
int exp_diff = p_exp - c.exp;
if (p_sign == c.sign) {
/* Addition */
if (exp_diff <= 0) {
shift128RightJamming(hi, lo,
DECOMPOSED_BINARY_POINT - exp_diff,
&hi, &lo);
lo += c.frac;
p_exp = c.exp;
} else {
uint64_t c_hi, c_lo;
/* shift c to the same binary point as the product (124) */
c_hi = c.frac >> 2;
c_lo = 0;
shift128RightJamming(c_hi, c_lo,
exp_diff,
&c_hi, &c_lo);
add128(hi, lo, c_hi, c_lo, &hi, &lo);
/* move binary point back to 62 */
shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
}
if (lo & DECOMPOSED_OVERFLOW_BIT) {
shift64RightJamming(lo, 1, &lo);
p_exp += 1;
}
} else {
/* Subtraction */
uint64_t c_hi, c_lo;
/* make C binary point match product at bit 124 */
c_hi = c.frac >> 2;
c_lo = 0;
if (exp_diff <= 0) {
shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
if (exp_diff == 0
&&
(hi > c_hi || (hi == c_hi && lo >= c_lo))) {
sub128(hi, lo, c_hi, c_lo, &hi, &lo);
} else {
sub128(c_hi, c_lo, hi, lo, &hi, &lo);
p_sign ^= 1;
p_exp = c.exp;
}
} else {
shift128RightJamming(c_hi, c_lo,
exp_diff,
&c_hi, &c_lo);
sub128(hi, lo, c_hi, c_lo, &hi, &lo);
}
if (hi == 0 && lo == 0) {
a.cls = float_class_zero;
a.sign = s->float_rounding_mode == float_round_down;
a.sign ^= sign_flip;
return a;
} else {
int shift;
if (hi != 0) {
shift = clz64(hi);
} else {
shift = clz64(lo) + 64;
}
/* Normalizing to a binary point of 124 is the
correct adjust for the exponent. However since we're
shifting, we might as well put the binary point back
at 62 where we really want it. Therefore shift as
if we're leaving 1 bit at the top of the word, but
adjust the exponent as if we're leaving 3 bits. */
shift -= 1;
if (shift >= 64) {
lo = lo << (shift - 64);
} else {
hi = (hi << shift) | (lo >> (64 - shift));
lo = hi | ((lo << shift) != 0);
}
p_exp -= shift - 2;
}
}
}
if (flags & float_muladd_halve_result) {
p_exp -= 1;
}
/* finally prepare our result */
a.cls = float_class_normal;
a.sign = p_sign ^ sign_flip;
a.exp = p_exp;
a.frac = lo;
return a;
}
float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
int flags, float_status *status)
{
FloatParts pa = float16_unpack_canonical(a, status);
FloatParts pb = float16_unpack_canonical(b, status);
FloatParts pc = float16_unpack_canonical(c, status);
FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
return float16_round_pack_canonical(pr, status);
}
float32 QEMU_FLATTEN float32_muladd(float32 a, float32 b, float32 c,
int flags, float_status *status)
{
FloatParts pa = float32_unpack_canonical(a, status);
FloatParts pb = float32_unpack_canonical(b, status);
FloatParts pc = float32_unpack_canonical(c, status);
FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
return float32_round_pack_canonical(pr, status);
}
float64 QEMU_FLATTEN float64_muladd(float64 a, float64 b, float64 c,
int flags, float_status *status)
{
FloatParts pa = float64_unpack_canonical(a, status);
FloatParts pb = float64_unpack_canonical(b, status);
FloatParts pc = float64_unpack_canonical(c, status);
FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
return float64_round_pack_canonical(pr, status);
}
/*
* Returns the result of dividing the floating-point value `a' by the
* corresponding value `b'. The operation is performed according to
@ -2787,231 +3059,6 @@ float32 float32_rem(float32 a, float32 b, float_status *status)
}
/*----------------------------------------------------------------------------
| Returns the result of multiplying the single-precision floating-point values
| `a' and `b' then adding 'c', with no intermediate rounding step after the
| multiplication. The operation is performed according to the IEC/IEEE
| Standard for Binary Floating-Point Arithmetic 754-2008.
| The flags argument allows the caller to select negation of the
| addend, the intermediate product, or the final result. (The difference
| between this and having the caller do a separate negation is that negating
| externally will flip the sign bit on NaNs.)
*----------------------------------------------------------------------------*/
float32 float32_muladd(float32 a, float32 b, float32 c, int flags, float_status *status)
{
flag aSign, bSign, cSign, zSign;
int aExp, bExp, cExp, pExp, zExp, expDiff;
uint32_t aSig, bSig, cSig;
flag pInf, pZero, pSign;
uint64_t pSig64, cSig64, zSig64;
uint32_t pSig;
int shiftcount;
flag signflip, infzero;
a = float32_squash_input_denormal(a, status);
b = float32_squash_input_denormal(b, status);
c = float32_squash_input_denormal(c, status);
aSig = extractFloat32Frac(a);
aExp = extractFloat32Exp(a);
aSign = extractFloat32Sign(a);
bSig = extractFloat32Frac(b);
bExp = extractFloat32Exp(b);
bSign = extractFloat32Sign(b);
cSig = extractFloat32Frac(c);
cExp = extractFloat32Exp(c);
cSign = extractFloat32Sign(c);
infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
(aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
/* It is implementation-defined whether the cases of (0,inf,qnan)
* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
* they return if they do), so we have to hand this information
* off to the target-specific pick-a-NaN routine.
*/
if (((aExp == 0xff) && aSig) ||
((bExp == 0xff) && bSig) ||
((cExp == 0xff) && cSig)) {
return propagateFloat32MulAddNaN(a, b, c, infzero, status);
}
if (infzero) {
float_raise(float_flag_invalid, status);
return float32_default_nan(status);
}
if (flags & float_muladd_negate_c) {
cSign ^= 1;
}
signflip = (flags & float_muladd_negate_result) ? 1 : 0;
/* Work out the sign and type of the product */
pSign = aSign ^ bSign;
if (flags & float_muladd_negate_product) {
pSign ^= 1;
}
pInf = (aExp == 0xff) || (bExp == 0xff);
pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
if (cExp == 0xff) {
if (pInf && (pSign ^ cSign)) {
/* addition of opposite-signed infinities => InvalidOperation */
float_raise(float_flag_invalid, status);
return float32_default_nan(status);
}
/* Otherwise generate an infinity of the same sign */
return packFloat32(cSign ^ signflip, 0xff, 0);
}
if (pInf) {
return packFloat32(pSign ^ signflip, 0xff, 0);
}
if (pZero) {
if (cExp == 0) {
if (cSig == 0) {
/* Adding two exact zeroes */
if (pSign == cSign) {
zSign = pSign;
} else if (status->float_rounding_mode == float_round_down) {
zSign = 1;
} else {
zSign = 0;
}
return packFloat32(zSign ^ signflip, 0, 0);
}
/* Exact zero plus a denorm */
if (status->flush_to_zero) {
float_raise(float_flag_output_denormal, status);
return packFloat32(cSign ^ signflip, 0, 0);
}
}
/* Zero plus something non-zero : just return the something */
if (flags & float_muladd_halve_result) {
if (cExp == 0) {
normalizeFloat32Subnormal(cSig, &cExp, &cSig);
}
/* Subtract one to halve, and one again because roundAndPackFloat32
* wants one less than the true exponent.
*/
cExp -= 2;
cSig = (cSig | 0x00800000) << 7;
return roundAndPackFloat32(cSign ^ signflip, cExp, cSig, status);
}
return packFloat32(cSign ^ signflip, cExp, cSig);
}
if (aExp == 0) {
normalizeFloat32Subnormal(aSig, &aExp, &aSig);
}
if (bExp == 0) {
normalizeFloat32Subnormal(bSig, &bExp, &bSig);
}
/* Calculate the actual result a * b + c */
/* Multiply first; this is easy. */
/* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
* because we want the true exponent, not the "one-less-than"
* flavour that roundAndPackFloat32() takes.
*/
pExp = aExp + bExp - 0x7e;
aSig = (aSig | 0x00800000) << 7;
bSig = (bSig | 0x00800000) << 8;
pSig64 = (uint64_t)aSig * bSig;
if ((int64_t)(pSig64 << 1) >= 0) {
pSig64 <<= 1;
pExp--;
}
zSign = pSign ^ signflip;
/* Now pSig64 is the significand of the multiply, with the explicit bit in
* position 62.
*/
if (cExp == 0) {
if (!cSig) {
/* Throw out the special case of c being an exact zero now */
shift64RightJamming(pSig64, 32, &pSig64);
pSig = (uint32_t)pSig64;
if (flags & float_muladd_halve_result) {
pExp--;
}
return roundAndPackFloat32(zSign, pExp - 1,
pSig, status);
}
normalizeFloat32Subnormal(cSig, &cExp, &cSig);
}
cSig64 = (uint64_t)cSig << (62 - 23);
cSig64 |= LIT64(0x4000000000000000);
expDiff = pExp - cExp;
if (pSign == cSign) {
/* Addition */
if (expDiff > 0) {
/* scale c to match p */
shift64RightJamming(cSig64, expDiff, &cSig64);
zExp = pExp;
} else if (expDiff < 0) {
/* scale p to match c */
shift64RightJamming(pSig64, -expDiff, &pSig64);
zExp = cExp;
} else {
/* no scaling needed */
zExp = cExp;
}
/* Add significands and make sure explicit bit ends up in posn 62 */
zSig64 = pSig64 + cSig64;
if ((int64_t)zSig64 < 0) {
shift64RightJamming(zSig64, 1, &zSig64);
} else {
zExp--;
}
} else {
/* Subtraction */
if (expDiff > 0) {
shift64RightJamming(cSig64, expDiff, &cSig64);
zSig64 = pSig64 - cSig64;
zExp = pExp;
} else if (expDiff < 0) {
shift64RightJamming(pSig64, -expDiff, &pSig64);
zSig64 = cSig64 - pSig64;
zExp = cExp;
zSign ^= 1;
} else {
zExp = pExp;
if (cSig64 < pSig64) {
zSig64 = pSig64 - cSig64;
} else if (pSig64 < cSig64) {
zSig64 = cSig64 - pSig64;
zSign ^= 1;
} else {
/* Exact zero */
zSign = signflip;
if (status->float_rounding_mode == float_round_down) {
zSign ^= 1;
}
return packFloat32(zSign, 0, 0);
}
}
--zExp;
/* Normalize to put the explicit bit back into bit 62. */
shiftcount = countLeadingZeros64(zSig64) - 1;
zSig64 <<= shiftcount;
zExp -= shiftcount;
}
if (flags & float_muladd_halve_result) {
zExp--;
}
shift64RightJamming(zSig64, 32, &zSig64);
return roundAndPackFloat32(zSign, zExp, (uint32_t)zSig64, status);
}
/*----------------------------------------------------------------------------
| Returns the square root of the single-precision floating-point value `a'.
| The operation is performed according to the IEC/IEEE Standard for Binary
@ -4232,252 +4279,6 @@ float64 float64_rem(float64 a, float64 b, float_status *status)
}
/*----------------------------------------------------------------------------
| Returns the result of multiplying the double-precision floating-point values
| `a' and `b' then adding 'c', with no intermediate rounding step after the
| multiplication. The operation is performed according to the IEC/IEEE
| Standard for Binary Floating-Point Arithmetic 754-2008.
| The flags argument allows the caller to select negation of the
| addend, the intermediate product, or the final result. (The difference
| between this and having the caller do a separate negation is that negating
| externally will flip the sign bit on NaNs.)
*----------------------------------------------------------------------------*/
float64 float64_muladd(float64 a, float64 b, float64 c, int flags, float_status *status)
{
flag aSign, bSign, cSign, zSign;
int aExp, bExp, cExp, pExp, zExp, expDiff;
uint64_t aSig, bSig, cSig;
flag pInf, pZero, pSign;
uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
int shiftcount;
flag signflip, infzero;
a = float64_squash_input_denormal(a, status);
b = float64_squash_input_denormal(b, status);
c = float64_squash_input_denormal(c, status);
aSig = extractFloat64Frac(a);
aExp = extractFloat64Exp(a);
aSign = extractFloat64Sign(a);
bSig = extractFloat64Frac(b);
bExp = extractFloat64Exp(b);
bSign = extractFloat64Sign(b);
cSig = extractFloat64Frac(c);
cExp = extractFloat64Exp(c);
cSign = extractFloat64Sign(c);
infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
(aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
/* It is implementation-defined whether the cases of (0,inf,qnan)
* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
* they return if they do), so we have to hand this information
* off to the target-specific pick-a-NaN routine.
*/
if (((aExp == 0x7ff) && aSig) ||
((bExp == 0x7ff) && bSig) ||
((cExp == 0x7ff) && cSig)) {
return propagateFloat64MulAddNaN(a, b, c, infzero, status);
}
if (infzero) {
float_raise(float_flag_invalid, status);
return float64_default_nan(status);
}
if (flags & float_muladd_negate_c) {
cSign ^= 1;
}
signflip = (flags & float_muladd_negate_result) ? 1 : 0;
/* Work out the sign and type of the product */
pSign = aSign ^ bSign;
if (flags & float_muladd_negate_product) {
pSign ^= 1;
}
pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
if (cExp == 0x7ff) {
if (pInf && (pSign ^ cSign)) {
/* addition of opposite-signed infinities => InvalidOperation */
float_raise(float_flag_invalid, status);
return float64_default_nan(status);
}
/* Otherwise generate an infinity of the same sign */
return packFloat64(cSign ^ signflip, 0x7ff, 0);
}
if (pInf) {
return packFloat64(pSign ^ signflip, 0x7ff, 0);
}
if (pZero) {
if (cExp == 0) {
if (cSig == 0) {
/* Adding two exact zeroes */
if (pSign == cSign) {
zSign = pSign;
} else if (status->float_rounding_mode == float_round_down) {
zSign = 1;
} else {
zSign = 0;
}
return packFloat64(zSign ^ signflip, 0, 0);
}
/* Exact zero plus a denorm */
if (status->flush_to_zero) {
float_raise(float_flag_output_denormal, status);
return packFloat64(cSign ^ signflip, 0, 0);
}
}
/* Zero plus something non-zero : just return the something */
if (flags & float_muladd_halve_result) {
if (cExp == 0) {
normalizeFloat64Subnormal(cSig, &cExp, &cSig);
}
/* Subtract one to halve, and one again because roundAndPackFloat64
* wants one less than the true exponent.
*/
cExp -= 2;
cSig = (cSig | 0x0010000000000000ULL) << 10;
return roundAndPackFloat64(cSign ^ signflip, cExp, cSig, status);
}
return packFloat64(cSign ^ signflip, cExp, cSig);
}
if (aExp == 0) {
normalizeFloat64Subnormal(aSig, &aExp, &aSig);
}
if (bExp == 0) {
normalizeFloat64Subnormal(bSig, &bExp, &bSig);
}
/* Calculate the actual result a * b + c */
/* Multiply first; this is easy. */
/* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
* because we want the true exponent, not the "one-less-than"
* flavour that roundAndPackFloat64() takes.
*/
pExp = aExp + bExp - 0x3fe;
aSig = (aSig | LIT64(0x0010000000000000))<<10;
bSig = (bSig | LIT64(0x0010000000000000))<<11;
mul64To128(aSig, bSig, &pSig0, &pSig1);
if ((int64_t)(pSig0 << 1) >= 0) {
shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
pExp--;
}
zSign = pSign ^ signflip;
/* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
* bit in position 126.
*/
if (cExp == 0) {
if (!cSig) {
/* Throw out the special case of c being an exact zero now */
shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
if (flags & float_muladd_halve_result) {
pExp--;
}
return roundAndPackFloat64(zSign, pExp - 1,
pSig1, status);
}
normalizeFloat64Subnormal(cSig, &cExp, &cSig);
}
/* Shift cSig and add the explicit bit so [cSig0:cSig1] is the
* significand of the addend, with the explicit bit in position 126.
*/
cSig0 = cSig << (126 - 64 - 52);
cSig1 = 0;
cSig0 |= LIT64(0x4000000000000000);
expDiff = pExp - cExp;
if (pSign == cSign) {
/* Addition */
if (expDiff > 0) {
/* scale c to match p */
shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
zExp = pExp;
} else if (expDiff < 0) {
/* scale p to match c */
shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
zExp = cExp;
} else {
/* no scaling needed */
zExp = cExp;
}
/* Add significands and make sure explicit bit ends up in posn 126 */
add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
if ((int64_t)zSig0 < 0) {
shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
} else {
zExp--;
}
shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
if (flags & float_muladd_halve_result) {
zExp--;
}
return roundAndPackFloat64(zSign, zExp, zSig1, status);
} else {
/* Subtraction */
if (expDiff > 0) {
shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
zExp = pExp;
} else if (expDiff < 0) {
shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
zExp = cExp;
zSign ^= 1;
} else {
zExp = pExp;
if (lt128(cSig0, cSig1, pSig0, pSig1)) {
sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
} else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
zSign ^= 1;
} else {
/* Exact zero */
zSign = signflip;
if (status->float_rounding_mode == float_round_down) {
zSign ^= 1;
}
return packFloat64(zSign, 0, 0);
}
}
--zExp;
/* Do the equivalent of normalizeRoundAndPackFloat64() but
* starting with the significand in a pair of uint64_t.
*/
if (zSig0) {
shiftcount = countLeadingZeros64(zSig0) - 1;
shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
if (zSig1) {
zSig0 |= 1;
}
zExp -= shiftcount;
} else {
shiftcount = countLeadingZeros64(zSig1);
if (shiftcount == 0) {
zSig0 = (zSig1 >> 1) | (zSig1 & 1);
zExp -= 63;
} else {
shiftcount--;
zSig0 = zSig1 << shiftcount;
zExp -= (shiftcount + 64);
}
}
if (flags & float_muladd_halve_result) {
zExp--;
}
return roundAndPackFloat64(zSign, zExp, zSig0, status);
}
}
/*----------------------------------------------------------------------------
| Returns the square root of the double-precision floating-point value `a'.
| The operation is performed according to the IEC/IEEE Standard for Binary

View file

@ -500,6 +500,7 @@ symbols = (
'float16_is_signaling_nan',
'float16_maybe_silence_nan',
'float16_mul',
'float16_muladd',
'float16_squash_input_denormal',
'float16_sub',
'float16_to_float32',

View file

@ -247,6 +247,7 @@ float64 float16_to_float64(float16 a, flag ieee, float_status *status);
float16 float16_add(float16, float16, float_status *status);
float16 float16_sub(float16, float16, float_status *status);
float16 float16_mul(float16, float16, float_status *status);
float16 float16_muladd(float16, float16, float16, int, float_status *status);
float16 float16_div(float16, float16, float_status *status);
int float16_is_quiet_nan(float16, float_status *status);

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_m68k
#define float16_maybe_silence_nan float16_maybe_silence_nan_m68k
#define float16_mul float16_mul_m68k
#define float16_muladd float16_muladd_m68k
#define float16_squash_input_denormal float16_squash_input_denormal_m68k
#define float16_sub float16_sub_m68k
#define float16_to_float32 float16_to_float32_m68k

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_mips
#define float16_maybe_silence_nan float16_maybe_silence_nan_mips
#define float16_mul float16_mul_mips
#define float16_muladd float16_muladd_mips
#define float16_squash_input_denormal float16_squash_input_denormal_mips
#define float16_sub float16_sub_mips
#define float16_to_float32 float16_to_float32_mips

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_mips64
#define float16_maybe_silence_nan float16_maybe_silence_nan_mips64
#define float16_mul float16_mul_mips64
#define float16_muladd float16_muladd_mips64
#define float16_squash_input_denormal float16_squash_input_denormal_mips64
#define float16_sub float16_sub_mips64
#define float16_to_float32 float16_to_float32_mips64

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_mips64el
#define float16_maybe_silence_nan float16_maybe_silence_nan_mips64el
#define float16_mul float16_mul_mips64el
#define float16_muladd float16_muladd_mips64el
#define float16_squash_input_denormal float16_squash_input_denormal_mips64el
#define float16_sub float16_sub_mips64el
#define float16_to_float32 float16_to_float32_mips64el

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_mipsel
#define float16_maybe_silence_nan float16_maybe_silence_nan_mipsel
#define float16_mul float16_mul_mipsel
#define float16_muladd float16_muladd_mipsel
#define float16_squash_input_denormal float16_squash_input_denormal_mipsel
#define float16_sub float16_sub_mipsel
#define float16_to_float32 float16_to_float32_mipsel

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_powerpc
#define float16_maybe_silence_nan float16_maybe_silence_nan_powerpc
#define float16_mul float16_mul_powerpc
#define float16_muladd float16_muladd_powerpc
#define float16_squash_input_denormal float16_squash_input_denormal_powerpc
#define float16_sub float16_sub_powerpc
#define float16_to_float32 float16_to_float32_powerpc

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_sparc
#define float16_maybe_silence_nan float16_maybe_silence_nan_sparc
#define float16_mul float16_mul_sparc
#define float16_muladd float16_muladd_sparc
#define float16_squash_input_denormal float16_squash_input_denormal_sparc
#define float16_sub float16_sub_sparc
#define float16_to_float32 float16_to_float32_sparc

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_sparc64
#define float16_maybe_silence_nan float16_maybe_silence_nan_sparc64
#define float16_mul float16_mul_sparc64
#define float16_muladd float16_muladd_sparc64
#define float16_squash_input_denormal float16_squash_input_denormal_sparc64
#define float16_sub float16_sub_sparc64
#define float16_to_float32 float16_to_float32_sparc64

View file

@ -494,6 +494,7 @@
#define float16_is_signaling_nan float16_is_signaling_nan_x86_64
#define float16_maybe_silence_nan float16_maybe_silence_nan_x86_64
#define float16_mul float16_mul_x86_64
#define float16_muladd float16_muladd_x86_64
#define float16_squash_input_denormal float16_squash_input_denormal_x86_64
#define float16_sub float16_sub_x86_64
#define float16_to_float32 float16_to_float32_x86_64