aboutsummaryrefslogtreecommitdiff
path: root/fpu
diff options
context:
space:
mode:
authorAlex Bennée <alex.bennee@linaro.org>2017-11-27 14:15:17 +0000
committerAlex Bennée <alex.bennee@linaro.org>2018-02-21 10:20:53 +0000
commit6fff216769cf7eaa3961c85dee7a72838696d365 (patch)
tree1d0dbc2ee3e15e5257029c84bb47e4b9c1adb366 /fpu
parenta90119b5a2c174250601be6503b91e5c9df6e83b (diff)
fpu/softfloat: re-factor add/sub
We can now add float16_add/sub and use the common decompose and canonicalize functions to have a single implementation for float16/32/64 add and sub functions. Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Diffstat (limited to 'fpu')
-rw-r--r--fpu/softfloat.c892
1 files changed, 465 insertions, 427 deletions
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 568d555595..2190e7de56 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -83,6 +83,7 @@ this code that are retained.
* target-dependent and needs the TARGET_* macros.
*/
#include "qemu/osdep.h"
+#include "qemu/bitops.h"
#include "fpu/softfloat.h"
/* We only need stdlib for abort() */
@@ -270,6 +271,470 @@ static const FloatFmt float64_params = {
FLOAT_PARAMS(11, 52)
};
+/* Unpack a float to parts, but do not canonicalize. */
+static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
+{
+ const int sign_pos = fmt.frac_size + fmt.exp_size;
+
+ return (FloatParts) {
+ .cls = float_class_unclassified,
+ .sign = extract64(raw, sign_pos, 1),
+ .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
+ .frac = extract64(raw, 0, fmt.frac_size),
+ };
+}
+
+static inline FloatParts float16_unpack_raw(float16 f)
+{
+ return unpack_raw(float16_params, f);
+}
+
+static inline FloatParts float32_unpack_raw(float32 f)
+{
+ return unpack_raw(float32_params, f);
+}
+
+static inline FloatParts float64_unpack_raw(float64 f)
+{
+ return unpack_raw(float64_params, f);
+}
+
+/* Pack a float from parts, but do not canonicalize. */
+static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
+{
+ const int sign_pos = fmt.frac_size + fmt.exp_size;
+ uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
+ return deposit64(ret, sign_pos, 1, p.sign);
+}
+
+static inline float16 float16_pack_raw(FloatParts p)
+{
+ return make_float16(pack_raw(float16_params, p));
+}
+
+static inline float32 float32_pack_raw(FloatParts p)
+{
+ return make_float32(pack_raw(float32_params, p));
+}
+
+static inline float64 float64_pack_raw(FloatParts p)
+{
+ return make_float64(pack_raw(float64_params, p));
+}
+
+/* Canonicalize EXP and FRAC, setting CLS. */
+static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
+ float_status *status)
+{
+ if (part.exp == parm->exp_max) {
+ if (part.frac == 0) {
+ part.cls = float_class_inf;
+ } else {
+#ifdef NO_SIGNALING_NANS
+ part.cls = float_class_qnan;
+#else
+ int64_t msb = part.frac << (parm->frac_shift + 2);
+ if ((msb < 0) == status->snan_bit_is_one) {
+ part.cls = float_class_snan;
+ } else {
+ part.cls = float_class_qnan;
+ }
+#endif
+ }
+ } else if (part.exp == 0) {
+ if (likely(part.frac == 0)) {
+ part.cls = float_class_zero;
+ } else if (status->flush_inputs_to_zero) {
+ float_raise(float_flag_input_denormal, status);
+ part.cls = float_class_zero;
+ part.frac = 0;
+ } else {
+ int shift = clz64(part.frac) - 1;
+ part.cls = float_class_normal;
+ part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
+ part.frac <<= shift;
+ }
+ } else {
+ part.cls = float_class_normal;
+ part.exp -= parm->exp_bias;
+ part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
+ }
+ return part;
+}
+
+/* Round and uncanonicalize a floating-point number by parts. There
+ * are FRAC_SHIFT bits that may require rounding at the bottom of the
+ * fraction; these bits will be removed. The exponent will be biased
+ * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
+ */
+
+static FloatParts round_canonical(FloatParts p, float_status *s,
+ const FloatFmt *parm)
+{
+ const uint64_t frac_lsbm1 = parm->frac_lsbm1;
+ const uint64_t round_mask = parm->round_mask;
+ const uint64_t roundeven_mask = parm->roundeven_mask;
+ const int exp_max = parm->exp_max;
+ const int frac_shift = parm->frac_shift;
+ uint64_t frac, inc;
+ int exp, flags = 0;
+ bool overflow_norm;
+
+ frac = p.frac;
+ exp = p.exp;
+
+ switch (p.cls) {
+ case float_class_normal:
+ switch (s->float_rounding_mode) {
+ case float_round_nearest_even:
+ overflow_norm = false;
+ inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
+ break;
+ case float_round_ties_away:
+ overflow_norm = false;
+ inc = frac_lsbm1;
+ break;
+ case float_round_to_zero:
+ overflow_norm = true;
+ inc = 0;
+ break;
+ case float_round_up:
+ inc = p.sign ? 0 : round_mask;
+ overflow_norm = p.sign;
+ break;
+ case float_round_down:
+ inc = p.sign ? round_mask : 0;
+ overflow_norm = !p.sign;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ exp += parm->exp_bias;
+ if (likely(exp > 0)) {
+ if (frac & round_mask) {
+ flags |= float_flag_inexact;
+ frac += inc;
+ if (frac & DECOMPOSED_OVERFLOW_BIT) {
+ frac >>= 1;
+ exp++;
+ }
+ }
+ frac >>= frac_shift;
+
+ if (unlikely(exp >= exp_max)) {
+ flags |= float_flag_overflow | float_flag_inexact;
+ if (overflow_norm) {
+ exp = exp_max - 1;
+ frac = -1;
+ } else {
+ p.cls = float_class_inf;
+ goto do_inf;
+ }
+ }
+ } else if (s->flush_to_zero) {
+ flags |= float_flag_output_denormal;
+ p.cls = float_class_zero;
+ goto do_zero;
+ } else {
+ bool is_tiny = (s->float_detect_tininess
+ == float_tininess_before_rounding)
+ || (exp < 0)
+ || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
+
+ shift64RightJamming(frac, 1 - exp, &frac);
+ if (frac & round_mask) {
+ /* Need to recompute round-to-even. */
+ if (s->float_rounding_mode == float_round_nearest_even) {
+ inc = ((frac & roundeven_mask) != frac_lsbm1
+ ? frac_lsbm1 : 0);
+ }
+ flags |= float_flag_inexact;
+ frac += inc;
+ }
+
+ exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
+ frac >>= frac_shift;
+
+ if (is_tiny && (flags & float_flag_inexact)) {
+ flags |= float_flag_underflow;
+ }
+ if (exp == 0 && frac == 0) {
+ p.cls = float_class_zero;
+ }
+ }
+ break;
+
+ case float_class_zero:
+ do_zero:
+ exp = 0;
+ frac = 0;
+ break;
+
+ case float_class_inf:
+ do_inf:
+ exp = exp_max;
+ frac = 0;
+ break;
+
+ case float_class_qnan:
+ case float_class_snan:
+ exp = exp_max;
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ float_raise(flags, s);
+ p.exp = exp;
+ p.frac = frac;
+ return p;
+}
+
+static FloatParts float16_unpack_canonical(float16 f, float_status *s)
+{
+ return canonicalize(float16_unpack_raw(f), &float16_params, s);
+}
+
+static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
+{
+ switch (p.cls) {
+ case float_class_dnan:
+ return float16_default_nan(s);
+ case float_class_msnan:
+ return float16_maybe_silence_nan(float16_pack_raw(p), s);
+ default:
+ p = round_canonical(p, s, &float16_params);
+ return float16_pack_raw(p);
+ }
+}
+
+static FloatParts float32_unpack_canonical(float32 f, float_status *s)
+{
+ return canonicalize(float32_unpack_raw(f), &float32_params, s);
+}
+
+static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
+{
+ switch (p.cls) {
+ case float_class_dnan:
+ return float32_default_nan(s);
+ case float_class_msnan:
+ return float32_maybe_silence_nan(float32_pack_raw(p), s);
+ default:
+ p = round_canonical(p, s, &float32_params);
+ return float32_pack_raw(p);
+ }
+}
+
+static FloatParts float64_unpack_canonical(float64 f, float_status *s)
+{
+ return canonicalize(float64_unpack_raw(f), &float64_params, s);
+}
+
+static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
+{
+ switch (p.cls) {
+ case float_class_dnan:
+ return float64_default_nan(s);
+ case float_class_msnan:
+ return float64_maybe_silence_nan(float64_pack_raw(p), s);
+ default:
+ p = round_canonical(p, s, &float64_params);
+ return float64_pack_raw(p);
+ }
+}
+
+/* Simple helpers for checking if what NaN we have */
+static bool is_nan(FloatClass c)
+{
+ return unlikely(c >= float_class_qnan);
+}
+static bool is_snan(FloatClass c)
+{
+ return c == float_class_snan;
+}
+static bool is_qnan(FloatClass c)
+{
+ return c == float_class_qnan;
+}
+
+static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
+{
+ if (is_snan(a.cls) || is_snan(b.cls)) {
+ s->float_exception_flags |= float_flag_invalid;
+ }
+
+ if (s->default_nan_mode) {
+ a.cls = float_class_dnan;
+ } else {
+ if (pickNaN(is_qnan(a.cls), is_snan(a.cls),
+ is_qnan(b.cls), is_snan(b.cls),
+ a.frac > b.frac ||
+ (a.frac == b.frac && a.sign < b.sign))) {
+ a = b;
+ }
+ a.cls = float_class_msnan;
+ }
+ return a;
+}
+
+/*
+ * Returns the result of adding or subtracting the values of the
+ * floating-point values `a' and `b'. The operation is performed
+ * according to the IEC/IEEE Standard for Binary Floating-Point
+ * Arithmetic.
+ */
+
+static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
+ float_status *s)
+{
+ bool a_sign = a.sign;
+ bool b_sign = b.sign ^ subtract;
+
+ if (a_sign != b_sign) {
+ /* Subtraction */
+
+ if (a.cls == float_class_normal && b.cls == float_class_normal) {
+ if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
+ shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
+ a.frac = a.frac - b.frac;
+ } else {
+ shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
+ a.frac = b.frac - a.frac;
+ a.exp = b.exp;
+ a_sign ^= 1;
+ }
+
+ if (a.frac == 0) {
+ a.cls = float_class_zero;
+ a.sign = s->float_rounding_mode == float_round_down;
+ } else {
+ int shift = clz64(a.frac) - 1;
+ a.frac = a.frac << shift;
+ a.exp = a.exp - shift;
+ a.sign = a_sign;
+ }
+ return a;
+ }
+ if (is_nan(a.cls) || is_nan(b.cls)) {
+ return pick_nan(a, b, s);
+ }
+ if (a.cls == float_class_inf) {
+ if (b.cls == float_class_inf) {
+ float_raise(float_flag_invalid, s);
+ a.cls = float_class_dnan;
+ }
+ return a;
+ }
+ if (a.cls == float_class_zero && b.cls == float_class_zero) {
+ a.sign = s->float_rounding_mode == float_round_down;
+ return a;
+ }
+ if (a.cls == float_class_zero || b.cls == float_class_inf) {
+ b.sign = a_sign ^ 1;
+ return b;
+ }
+ if (b.cls == float_class_zero) {
+ return a;
+ }
+ } else {
+ /* Addition */
+ if (a.cls == float_class_normal && b.cls == float_class_normal) {
+ if (a.exp > b.exp) {
+ shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
+ } else if (a.exp < b.exp) {
+ shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
+ a.exp = b.exp;
+ }
+ a.frac += b.frac;
+ if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
+ a.frac >>= 1;
+ a.exp += 1;
+ }
+ return a;
+ }
+ if (is_nan(a.cls) || is_nan(b.cls)) {
+ return pick_nan(a, b, s);
+ }
+ if (a.cls == float_class_inf || b.cls == float_class_zero) {
+ return a;
+ }
+ if (b.cls == float_class_inf || a.cls == float_class_zero) {
+ b.sign = b_sign;
+ return b;
+ }
+ }
+ g_assert_not_reached();
+}
+
+/*
+ * Returns the result of adding or subtracting the floating-point
+ * values `a' and `b'. The operation is performed according to the
+ * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+ */
+
+float16 __attribute__((flatten)) float16_add(float16 a, float16 b,
+ float_status *status)
+{
+ FloatParts pa = float16_unpack_canonical(a, status);
+ FloatParts pb = float16_unpack_canonical(b, status);
+ FloatParts pr = addsub_floats(pa, pb, false, status);
+
+ return float16_round_pack_canonical(pr, status);
+}
+
+float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
+ float_status *status)
+{
+ FloatParts pa = float32_unpack_canonical(a, status);
+ FloatParts pb = float32_unpack_canonical(b, status);
+ FloatParts pr = addsub_floats(pa, pb, false, status);
+
+ return float32_round_pack_canonical(pr, status);
+}
+
+float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
+ float_status *status)
+{
+ FloatParts pa = float64_unpack_canonical(a, status);
+ FloatParts pb = float64_unpack_canonical(b, status);
+ FloatParts pr = addsub_floats(pa, pb, false, status);
+
+ return float64_round_pack_canonical(pr, status);
+}
+
+float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
+ float_status *status)
+{
+ FloatParts pa = float16_unpack_canonical(a, status);
+ FloatParts pb = float16_unpack_canonical(b, status);
+ FloatParts pr = addsub_floats(pa, pb, true, status);
+
+ return float16_round_pack_canonical(pr, status);
+}
+
+float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
+ float_status *status)
+{
+ FloatParts pa = float32_unpack_canonical(a, status);
+ FloatParts pb = float32_unpack_canonical(b, status);
+ FloatParts pr = addsub_floats(pa, pb, true, status);
+
+ return float32_round_pack_canonical(pr, status);
+}
+
+float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
+ float_status *status)
+{
+ FloatParts pa = float64_unpack_canonical(a, status);
+ FloatParts pb = float64_unpack_canonical(b, status);
+ FloatParts pr = addsub_floats(pa, pb, true, status);
+
+ return float64_round_pack_canonical(pr, status);
+}
+
/*----------------------------------------------------------------------------
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
| and 7, and returns the properly rounded 32-bit integer corresponding to the
@@ -2082,220 +2547,6 @@ float32 float32_round_to_int(float32 a, float_status *status)
}
/*----------------------------------------------------------------------------
-| Returns the result of adding the absolute values of the single-precision
-| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
-| before being returned. `zSign' is ignored if the result is a NaN.
-| The addition is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float32 addFloat32Sigs(float32 a, float32 b, flag zSign,
- float_status *status)
-{
- int aExp, bExp, zExp;
- uint32_t aSig, bSig, zSig;
- int expDiff;
-
- aSig = extractFloat32Frac( a );
- aExp = extractFloat32Exp( a );
- bSig = extractFloat32Frac( b );
- bExp = extractFloat32Exp( b );
- expDiff = aExp - bExp;
- aSig <<= 6;
- bSig <<= 6;
- if ( 0 < expDiff ) {
- if ( aExp == 0xFF ) {
- if (aSig) {
- return propagateFloat32NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) {
- --expDiff;
- }
- else {
- bSig |= 0x20000000;
- }
- shift32RightJamming( bSig, expDiff, &bSig );
- zExp = aExp;
- }
- else if ( expDiff < 0 ) {
- if ( bExp == 0xFF ) {
- if (bSig) {
- return propagateFloat32NaN(a, b, status);
- }
- return packFloat32( zSign, 0xFF, 0 );
- }
- if ( aExp == 0 ) {
- ++expDiff;
- }
- else {
- aSig |= 0x20000000;
- }
- shift32RightJamming( aSig, - expDiff, &aSig );
- zExp = bExp;
- }
- else {
- if ( aExp == 0xFF ) {
- if (aSig | bSig) {
- return propagateFloat32NaN(a, b, status);
- }
- return a;
- }
- if ( aExp == 0 ) {
- if (status->flush_to_zero) {
- if (aSig | bSig) {
- float_raise(float_flag_output_denormal, status);
- }
- return packFloat32(zSign, 0, 0);
- }
- return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
- }
- zSig = 0x40000000 + aSig + bSig;
- zExp = aExp;
- goto roundAndPack;
- }
- aSig |= 0x20000000;
- zSig = ( aSig + bSig )<<1;
- --zExp;
- if ( (int32_t) zSig < 0 ) {
- zSig = aSig + bSig;
- ++zExp;
- }
- roundAndPack:
- return roundAndPackFloat32(zSign, zExp, zSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the absolute values of the single-
-| precision floating-point values `a' and `b'. If `zSign' is 1, the
-| difference is negated before being returned. `zSign' is ignored if the
-| result is a NaN. The subtraction is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float32 subFloat32Sigs(float32 a, float32 b, flag zSign,
- float_status *status)
-{
- int aExp, bExp, zExp;
- uint32_t aSig, bSig, zSig;
- int expDiff;
-
- aSig = extractFloat32Frac( a );
- aExp = extractFloat32Exp( a );
- bSig = extractFloat32Frac( b );
- bExp = extractFloat32Exp( b );
- expDiff = aExp - bExp;
- aSig <<= 7;
- bSig <<= 7;
- if ( 0 < expDiff ) goto aExpBigger;
- if ( expDiff < 0 ) goto bExpBigger;
- if ( aExp == 0xFF ) {
- if (aSig | bSig) {
- return propagateFloat32NaN(a, b, status);
- }
- float_raise(float_flag_invalid, status);
- return float32_default_nan(status);
- }
- if ( aExp == 0 ) {
- aExp = 1;
- bExp = 1;
- }
- if ( bSig < aSig ) goto aBigger;
- if ( aSig < bSig ) goto bBigger;
- return packFloat32(status->float_rounding_mode == float_round_down, 0, 0);
- bExpBigger:
- if ( bExp == 0xFF ) {
- if (bSig) {
- return propagateFloat32NaN(a, b, status);
- }
- return packFloat32( zSign ^ 1, 0xFF, 0 );
- }
- if ( aExp == 0 ) {
- ++expDiff;
- }
- else {
- aSig |= 0x40000000;
- }
- shift32RightJamming( aSig, - expDiff, &aSig );
- bSig |= 0x40000000;
- bBigger:
- zSig = bSig - aSig;
- zExp = bExp;
- zSign ^= 1;
- goto normalizeRoundAndPack;
- aExpBigger:
- if ( aExp == 0xFF ) {
- if (aSig) {
- return propagateFloat32NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) {
- --expDiff;
- }
- else {
- bSig |= 0x40000000;
- }
- shift32RightJamming( bSig, expDiff, &bSig );
- aSig |= 0x40000000;
- aBigger:
- zSig = aSig - bSig;
- zExp = aExp;
- normalizeRoundAndPack:
- --zExp;
- return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of adding the single-precision floating-point values `a'
-| and `b'. The operation is performed according to the IEC/IEEE Standard for
-| Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 float32_add(float32 a, float32 b, float_status *status)
-{
- flag aSign, bSign;
- a = float32_squash_input_denormal(a, status);
- b = float32_squash_input_denormal(b, status);
-
- aSign = extractFloat32Sign( a );
- bSign = extractFloat32Sign( b );
- if ( aSign == bSign ) {
- return addFloat32Sigs(a, b, aSign, status);
- }
- else {
- return subFloat32Sigs(a, b, aSign, status);
- }
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the single-precision floating-point values
-| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 float32_sub(float32 a, float32 b, float_status *status)
-{
- flag aSign, bSign;
- a = float32_squash_input_denormal(a, status);
- b = float32_squash_input_denormal(b, status);
-
- aSign = extractFloat32Sign( a );
- bSign = extractFloat32Sign( b );
- if ( aSign == bSign ) {
- return subFloat32Sigs(a, b, aSign, status);
- }
- else {
- return addFloat32Sigs(a, b, aSign, status);
- }
-
-}
-
-/*----------------------------------------------------------------------------
| Returns the result of multiplying the single-precision floating-point values
| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
| for Binary Floating-Point Arithmetic.
@@ -3891,219 +4142,6 @@ float64 float64_trunc_to_int(float64 a, float_status *status)
return res;
}
-/*----------------------------------------------------------------------------
-| Returns the result of adding the absolute values of the double-precision
-| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
-| before being returned. `zSign' is ignored if the result is a NaN.
-| The addition is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float64 addFloat64Sigs(float64 a, float64 b, flag zSign,
- float_status *status)
-{
- int aExp, bExp, zExp;
- uint64_t aSig, bSig, zSig;
- int expDiff;
-
- aSig = extractFloat64Frac( a );
- aExp = extractFloat64Exp( a );
- bSig = extractFloat64Frac( b );
- bExp = extractFloat64Exp( b );
- expDiff = aExp - bExp;
- aSig <<= 9;
- bSig <<= 9;
- if ( 0 < expDiff ) {
- if ( aExp == 0x7FF ) {
- if (aSig) {
- return propagateFloat64NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) {
- --expDiff;
- }
- else {
- bSig |= LIT64( 0x2000000000000000 );
- }
- shift64RightJamming( bSig, expDiff, &bSig );
- zExp = aExp;
- }
- else if ( expDiff < 0 ) {
- if ( bExp == 0x7FF ) {
- if (bSig) {
- return propagateFloat64NaN(a, b, status);
- }
- return packFloat64( zSign, 0x7FF, 0 );
- }
- if ( aExp == 0 ) {
- ++expDiff;
- }
- else {
- aSig |= LIT64( 0x2000000000000000 );
- }
- shift64RightJamming( aSig, - expDiff, &aSig );
- zExp = bExp;
- }
- else {
- if ( aExp == 0x7FF ) {
- if (aSig | bSig) {
- return propagateFloat64NaN(a, b, status);
- }
- return a;
- }
- if ( aExp == 0 ) {
- if (status->flush_to_zero) {
- if (aSig | bSig) {
- float_raise(float_flag_output_denormal, status);
- }
- return packFloat64(zSign, 0, 0);
- }
- return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
- }
- zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
- zExp = aExp;
- goto roundAndPack;
- }
- aSig |= LIT64( 0x2000000000000000 );
- zSig = ( aSig + bSig )<<1;
- --zExp;
- if ( (int64_t) zSig < 0 ) {
- zSig = aSig + bSig;
- ++zExp;
- }
- roundAndPack:
- return roundAndPackFloat64(zSign, zExp, zSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the absolute values of the double-
-| precision floating-point values `a' and `b'. If `zSign' is 1, the
-| difference is negated before being returned. `zSign' is ignored if the
-| result is a NaN. The subtraction is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float64 subFloat64Sigs(float64 a, float64 b, flag zSign,
- float_status *status)
-{
- int aExp, bExp, zExp;
- uint64_t aSig, bSig, zSig;
- int expDiff;
-
- aSig = extractFloat64Frac( a );
- aExp = extractFloat64Exp( a );
- bSig = extractFloat64Frac( b );
- bExp = extractFloat64Exp( b );
- expDiff = aExp - bExp;
- aSig <<= 10;
- bSig <<= 10;
- if ( 0 < expDiff ) goto aExpBigger;
- if ( expDiff < 0 ) goto bExpBigger;
- if ( aExp == 0x7FF ) {
- if (aSig | bSig) {
- return propagateFloat64NaN(a, b, status);
- }
- float_raise(float_flag_invalid, status);
- return float64_default_nan(status);
- }
- if ( aExp == 0 ) {
- aExp = 1;
- bExp = 1;
- }
- if ( bSig < aSig ) goto aBigger;
- if ( aSig < bSig ) goto bBigger;
- return packFloat64(status->float_rounding_mode == float_round_down, 0, 0);
- bExpBigger:
- if ( bExp == 0x7FF ) {
- if (bSig) {
- return propagateFloat64NaN(a, b, status);
- }
- return packFloat64( zSign ^ 1, 0x7FF, 0 );
- }
- if ( aExp == 0 ) {
- ++expDiff;
- }
- else {
- aSig |= LIT64( 0x4000000000000000 );
- }
- shift64RightJamming( aSig, - expDiff, &aSig );
- bSig |= LIT64( 0x4000000000000000 );
- bBigger:
- zSig = bSig - aSig;
- zExp = bExp;
- zSign ^= 1;
- goto normalizeRoundAndPack;
- aExpBigger:
- if ( aExp == 0x7FF ) {
- if (aSig) {
- return propagateFloat64NaN(a, b, status);
- }
- return a;
- }
- if ( bExp == 0 ) {
- --expDiff;
- }
- else {
- bSig |= LIT64( 0x4000000000000000 );
- }
- shift64RightJamming( bSig, expDiff, &bSig );
- aSig |= LIT64( 0x4000000000000000 );
- aBigger:
- zSig = aSig - bSig;
- zExp = aExp;
- normalizeRoundAndPack:
- --zExp;
- return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of adding the double-precision floating-point values `a'
-| and `b'. The operation is performed according to the IEC/IEEE Standard for
-| Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 float64_add(float64 a, float64 b, float_status *status)
-{
- flag aSign, bSign;
- a = float64_squash_input_denormal(a, status);
- b = float64_squash_input_denormal(b, status);
-
- aSign = extractFloat64Sign( a );
- bSign = extractFloat64Sign( b );
- if ( aSign == bSign ) {
- return addFloat64Sigs(a, b, aSign, status);
- }
- else {
- return subFloat64Sigs(a, b, aSign, status);
- }
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the double-precision floating-point values
-| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 float64_sub(float64 a, float64 b, float_status *status)
-{
- flag aSign, bSign;
- a = float64_squash_input_denormal(a, status);
- b = float64_squash_input_denormal(b, status);
-
- aSign = extractFloat64Sign( a );
- bSign = extractFloat64Sign( b );
- if ( aSign == bSign ) {
- return subFloat64Sigs(a, b, aSign, status);
- }
- else {
- return addFloat64Sigs(a, b, aSign, status);
- }
-
-}
/*----------------------------------------------------------------------------
| Returns the result of multiplying the double-precision floating-point values