blob: 83ccc4b8cd31254d449a28e29d88375a8d928aee [file] [log] [blame]
Andreas Färber8d725fa2011-03-07 01:34:04 +01001/*
2 * QEMU float support
3 *
4 * Derived from SoftFloat.
5 */
bellard158142c2005-03-13 16:54:06 +00006
7/*============================================================================
8
9This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
10Package, Release 2b.
11
12Written by John R. Hauser. This work was made possible in part by the
13International Computer Science Institute, located at Suite 600, 1947 Center
14Street, Berkeley, California 94704. Funding was partially provided by the
15National Science Foundation under grant MIP-9311980. The original version
16of this code was written as part of a project to build a fixed-point vector
17processor in collaboration with the University of California at Berkeley,
18overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20arithmetic/SoftFloat.html'.
21
22THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
23been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
29OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30
31Derivative works are acceptable, even for commercial purposes, so long as
32(1) the source code for the derivative work includes prominent notice that
33the work is derivative, and (2) the source code includes prominent notice with
34these four paragraphs for those parts of this code that are retained.
35
36=============================================================================*/
37
Peter Maydell2ac8bd02011-09-26 16:56:55 +010038/* softfloat (and in particular the code in softfloat-specialize.h) is
39 * target-dependent and needs the TARGET_* macros.
40 */
41#include "config.h"
42
Paolo Bonzini6b4c3052012-10-24 13:12:00 +020043#include "fpu/softfloat.h"
bellard158142c2005-03-13 16:54:06 +000044
45/*----------------------------------------------------------------------------
46| Primitive arithmetic functions, including multi-word arithmetic, and
47| division and square root approximations. (Can be specialized to target if
48| desired.)
49*----------------------------------------------------------------------------*/
50#include "softfloat-macros.h"
51
52/*----------------------------------------------------------------------------
53| Functions and definitions to determine: (1) whether tininess for underflow
54| is detected before or after rounding by default, (2) what (if anything)
55| happens when exceptions are raised, (3) how signaling NaNs are distinguished
56| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
57| are propagated from function inputs to output. These details are target-
58| specific.
59*----------------------------------------------------------------------------*/
60#include "softfloat-specialize.h"
61
62void set_float_rounding_mode(int val STATUS_PARAM)
63{
64 STATUS(float_rounding_mode) = val;
65}
66
bellard1d6bda32005-03-13 18:52:29 +000067void set_float_exception_flags(int val STATUS_PARAM)
68{
69 STATUS(float_exception_flags) = val;
70}
71
bellard158142c2005-03-13 16:54:06 +000072void set_floatx80_rounding_precision(int val STATUS_PARAM)
73{
74 STATUS(floatx80_rounding_precision) = val;
75}
bellard158142c2005-03-13 16:54:06 +000076
77/*----------------------------------------------------------------------------
Peter Maydellbb4d4bb2011-02-10 11:28:56 +000078| Returns the fraction bits of the half-precision floating-point value `a'.
79*----------------------------------------------------------------------------*/
80
81INLINE uint32_t extractFloat16Frac(float16 a)
82{
83 return float16_val(a) & 0x3ff;
84}
85
86/*----------------------------------------------------------------------------
87| Returns the exponent bits of the half-precision floating-point value `a'.
88*----------------------------------------------------------------------------*/
89
Andreas Färber94a49d82012-04-26 00:15:56 +020090INLINE int_fast16_t extractFloat16Exp(float16 a)
Peter Maydellbb4d4bb2011-02-10 11:28:56 +000091{
92 return (float16_val(a) >> 10) & 0x1f;
93}
94
95/*----------------------------------------------------------------------------
96| Returns the sign bit of the single-precision floating-point value `a'.
97*----------------------------------------------------------------------------*/
98
99INLINE flag extractFloat16Sign(float16 a)
100{
101 return float16_val(a)>>15;
102}
103
104/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +0000105| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
106| and 7, and returns the properly rounded 32-bit integer corresponding to the
107| input. If `zSign' is 1, the input is negated before being converted to an
108| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
109| is simply rounded to an integer, with the inexact exception raised if the
110| input cannot be represented exactly as an integer. However, if the fixed-
111| point input is too large, the invalid exception is raised and the largest
112| positive or negative integer is returned.
113*----------------------------------------------------------------------------*/
114
Andreas Färberbb98fe42011-03-07 01:34:06 +0100115static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000116{
117 int8 roundingMode;
118 flag roundNearestEven;
119 int8 roundIncrement, roundBits;
Peter Maydell760e1412012-04-05 19:12:35 +0100120 int32_t z;
bellard158142c2005-03-13 16:54:06 +0000121
122 roundingMode = STATUS(float_rounding_mode);
123 roundNearestEven = ( roundingMode == float_round_nearest_even );
124 roundIncrement = 0x40;
125 if ( ! roundNearestEven ) {
126 if ( roundingMode == float_round_to_zero ) {
127 roundIncrement = 0;
128 }
129 else {
130 roundIncrement = 0x7F;
131 if ( zSign ) {
132 if ( roundingMode == float_round_up ) roundIncrement = 0;
133 }
134 else {
135 if ( roundingMode == float_round_down ) roundIncrement = 0;
136 }
137 }
138 }
139 roundBits = absZ & 0x7F;
140 absZ = ( absZ + roundIncrement )>>7;
141 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
142 z = absZ;
143 if ( zSign ) z = - z;
144 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
145 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +0100146 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +0000147 }
148 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
149 return z;
150
151}
152
153/*----------------------------------------------------------------------------
154| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
155| `absZ1', with binary point between bits 63 and 64 (between the input words),
156| and returns the properly rounded 64-bit integer corresponding to the input.
157| If `zSign' is 1, the input is negated before being converted to an integer.
158| Ordinarily, the fixed-point input is simply rounded to an integer, with
159| the inexact exception raised if the input cannot be represented exactly as
160| an integer. However, if the fixed-point input is too large, the invalid
161| exception is raised and the largest positive or negative integer is
162| returned.
163*----------------------------------------------------------------------------*/
164
Andreas Färberbb98fe42011-03-07 01:34:06 +0100165static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000166{
167 int8 roundingMode;
168 flag roundNearestEven, increment;
Peter Maydell760e1412012-04-05 19:12:35 +0100169 int64_t z;
bellard158142c2005-03-13 16:54:06 +0000170
171 roundingMode = STATUS(float_rounding_mode);
172 roundNearestEven = ( roundingMode == float_round_nearest_even );
Andreas Färberbb98fe42011-03-07 01:34:06 +0100173 increment = ( (int64_t) absZ1 < 0 );
bellard158142c2005-03-13 16:54:06 +0000174 if ( ! roundNearestEven ) {
175 if ( roundingMode == float_round_to_zero ) {
176 increment = 0;
177 }
178 else {
179 if ( zSign ) {
180 increment = ( roundingMode == float_round_down ) && absZ1;
181 }
182 else {
183 increment = ( roundingMode == float_round_up ) && absZ1;
184 }
185 }
186 }
187 if ( increment ) {
188 ++absZ0;
189 if ( absZ0 == 0 ) goto overflow;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100190 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
bellard158142c2005-03-13 16:54:06 +0000191 }
192 z = absZ0;
193 if ( zSign ) z = - z;
194 if ( z && ( ( z < 0 ) ^ zSign ) ) {
195 overflow:
196 float_raise( float_flag_invalid STATUS_VAR);
197 return
Andreas Färberbb98fe42011-03-07 01:34:06 +0100198 zSign ? (int64_t) LIT64( 0x8000000000000000 )
bellard158142c2005-03-13 16:54:06 +0000199 : LIT64( 0x7FFFFFFFFFFFFFFF );
200 }
201 if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
202 return z;
203
204}
205
206/*----------------------------------------------------------------------------
207| Returns the fraction bits of the single-precision floating-point value `a'.
208*----------------------------------------------------------------------------*/
209
Andreas Färberbb98fe42011-03-07 01:34:06 +0100210INLINE uint32_t extractFloat32Frac( float32 a )
bellard158142c2005-03-13 16:54:06 +0000211{
212
pbrookf090c9d2007-11-18 14:33:24 +0000213 return float32_val(a) & 0x007FFFFF;
bellard158142c2005-03-13 16:54:06 +0000214
215}
216
217/*----------------------------------------------------------------------------
218| Returns the exponent bits of the single-precision floating-point value `a'.
219*----------------------------------------------------------------------------*/
220
Andreas Färber94a49d82012-04-26 00:15:56 +0200221INLINE int_fast16_t extractFloat32Exp(float32 a)
bellard158142c2005-03-13 16:54:06 +0000222{
223
pbrookf090c9d2007-11-18 14:33:24 +0000224 return ( float32_val(a)>>23 ) & 0xFF;
bellard158142c2005-03-13 16:54:06 +0000225
226}
227
228/*----------------------------------------------------------------------------
229| Returns the sign bit of the single-precision floating-point value `a'.
230*----------------------------------------------------------------------------*/
231
232INLINE flag extractFloat32Sign( float32 a )
233{
234
pbrookf090c9d2007-11-18 14:33:24 +0000235 return float32_val(a)>>31;
bellard158142c2005-03-13 16:54:06 +0000236
237}
238
239/*----------------------------------------------------------------------------
Peter Maydell37d18662011-01-06 19:37:53 +0000240| If `a' is denormal and we are in flush-to-zero mode then set the
241| input-denormal exception and return zero. Otherwise just return the value.
242*----------------------------------------------------------------------------*/
243static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
244{
245 if (STATUS(flush_inputs_to_zero)) {
246 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
247 float_raise(float_flag_input_denormal STATUS_VAR);
248 return make_float32(float32_val(a) & 0x80000000);
249 }
250 }
251 return a;
252}
253
254/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +0000255| Normalizes the subnormal single-precision floating-point value represented
256| by the denormalized significand `aSig'. The normalized exponent and
257| significand are stored at the locations pointed to by `zExpPtr' and
258| `zSigPtr', respectively.
259*----------------------------------------------------------------------------*/
260
261static void
Andreas Färber94a49d82012-04-26 00:15:56 +0200262 normalizeFloat32Subnormal(uint32_t aSig, int_fast16_t *zExpPtr, uint32_t *zSigPtr)
bellard158142c2005-03-13 16:54:06 +0000263{
264 int8 shiftCount;
265
266 shiftCount = countLeadingZeros32( aSig ) - 8;
267 *zSigPtr = aSig<<shiftCount;
268 *zExpPtr = 1 - shiftCount;
269
270}
271
272/*----------------------------------------------------------------------------
273| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
274| single-precision floating-point value, returning the result. After being
275| shifted into the proper positions, the three fields are simply added
276| together to form the result. This means that any integer portion of `zSig'
277| will be added into the exponent. Since a properly normalized significand
278| will have an integer portion equal to 1, the `zExp' input should be 1 less
279| than the desired result exponent whenever `zSig' is a complete, normalized
280| significand.
281*----------------------------------------------------------------------------*/
282
Andreas Färber94a49d82012-04-26 00:15:56 +0200283INLINE float32 packFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig)
bellard158142c2005-03-13 16:54:06 +0000284{
285
pbrookf090c9d2007-11-18 14:33:24 +0000286 return make_float32(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100287 ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
bellard158142c2005-03-13 16:54:06 +0000288
289}
290
291/*----------------------------------------------------------------------------
292| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
293| and significand `zSig', and returns the proper single-precision floating-
294| point value corresponding to the abstract input. Ordinarily, the abstract
295| value is simply rounded and packed into the single-precision format, with
296| the inexact exception raised if the abstract input cannot be represented
297| exactly. However, if the abstract value is too large, the overflow and
298| inexact exceptions are raised and an infinity or maximal finite value is
299| returned. If the abstract value is too small, the input value is rounded to
300| a subnormal number, and the underflow and inexact exceptions are raised if
301| the abstract input cannot be represented exactly as a subnormal single-
302| precision floating-point number.
303| The input significand `zSig' has its binary point between bits 30
304| and 29, which is 7 bits to the left of the usual location. This shifted
305| significand must be normalized or smaller. If `zSig' is not normalized,
306| `zExp' must be 0; in that case, the result returned is a subnormal number,
307| and it must not require rounding. In the usual case that `zSig' is
308| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
309| The handling of underflow and overflow follows the IEC/IEEE Standard for
310| Binary Floating-Point Arithmetic.
311*----------------------------------------------------------------------------*/
312
Andreas Färber94a49d82012-04-26 00:15:56 +0200313static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000314{
315 int8 roundingMode;
316 flag roundNearestEven;
317 int8 roundIncrement, roundBits;
318 flag isTiny;
319
320 roundingMode = STATUS(float_rounding_mode);
321 roundNearestEven = ( roundingMode == float_round_nearest_even );
322 roundIncrement = 0x40;
323 if ( ! roundNearestEven ) {
324 if ( roundingMode == float_round_to_zero ) {
325 roundIncrement = 0;
326 }
327 else {
328 roundIncrement = 0x7F;
329 if ( zSign ) {
330 if ( roundingMode == float_round_up ) roundIncrement = 0;
331 }
332 else {
333 if ( roundingMode == float_round_down ) roundIncrement = 0;
334 }
335 }
336 }
337 roundBits = zSig & 0x7F;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100338 if ( 0xFD <= (uint16_t) zExp ) {
bellard158142c2005-03-13 16:54:06 +0000339 if ( ( 0xFD < zExp )
340 || ( ( zExp == 0xFD )
Andreas Färberbb98fe42011-03-07 01:34:06 +0100341 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
bellard158142c2005-03-13 16:54:06 +0000342 ) {
343 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
pbrookf090c9d2007-11-18 14:33:24 +0000344 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
bellard158142c2005-03-13 16:54:06 +0000345 }
346 if ( zExp < 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +0100347 if (STATUS(flush_to_zero)) {
348 float_raise(float_flag_output_denormal STATUS_VAR);
349 return packFloat32(zSign, 0, 0);
350 }
bellard158142c2005-03-13 16:54:06 +0000351 isTiny =
352 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
353 || ( zExp < -1 )
354 || ( zSig + roundIncrement < 0x80000000 );
355 shift32RightJamming( zSig, - zExp, &zSig );
356 zExp = 0;
357 roundBits = zSig & 0x7F;
358 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
359 }
360 }
361 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
362 zSig = ( zSig + roundIncrement )>>7;
363 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
364 if ( zSig == 0 ) zExp = 0;
365 return packFloat32( zSign, zExp, zSig );
366
367}
368
369/*----------------------------------------------------------------------------
370| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
371| and significand `zSig', and returns the proper single-precision floating-
372| point value corresponding to the abstract input. This routine is just like
373| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
374| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
375| floating-point exponent.
376*----------------------------------------------------------------------------*/
377
378static float32
Andreas Färber94a49d82012-04-26 00:15:56 +0200379 normalizeRoundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000380{
381 int8 shiftCount;
382
383 shiftCount = countLeadingZeros32( zSig ) - 1;
384 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
385
386}
387
388/*----------------------------------------------------------------------------
389| Returns the fraction bits of the double-precision floating-point value `a'.
390*----------------------------------------------------------------------------*/
391
Andreas Färberbb98fe42011-03-07 01:34:06 +0100392INLINE uint64_t extractFloat64Frac( float64 a )
bellard158142c2005-03-13 16:54:06 +0000393{
394
pbrookf090c9d2007-11-18 14:33:24 +0000395 return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
bellard158142c2005-03-13 16:54:06 +0000396
397}
398
399/*----------------------------------------------------------------------------
400| Returns the exponent bits of the double-precision floating-point value `a'.
401*----------------------------------------------------------------------------*/
402
Andreas Färber94a49d82012-04-26 00:15:56 +0200403INLINE int_fast16_t extractFloat64Exp(float64 a)
bellard158142c2005-03-13 16:54:06 +0000404{
405
pbrookf090c9d2007-11-18 14:33:24 +0000406 return ( float64_val(a)>>52 ) & 0x7FF;
bellard158142c2005-03-13 16:54:06 +0000407
408}
409
410/*----------------------------------------------------------------------------
411| Returns the sign bit of the double-precision floating-point value `a'.
412*----------------------------------------------------------------------------*/
413
414INLINE flag extractFloat64Sign( float64 a )
415{
416
pbrookf090c9d2007-11-18 14:33:24 +0000417 return float64_val(a)>>63;
bellard158142c2005-03-13 16:54:06 +0000418
419}
420
421/*----------------------------------------------------------------------------
Peter Maydell37d18662011-01-06 19:37:53 +0000422| If `a' is denormal and we are in flush-to-zero mode then set the
423| input-denormal exception and return zero. Otherwise just return the value.
424*----------------------------------------------------------------------------*/
425static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
426{
427 if (STATUS(flush_inputs_to_zero)) {
428 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
429 float_raise(float_flag_input_denormal STATUS_VAR);
430 return make_float64(float64_val(a) & (1ULL << 63));
431 }
432 }
433 return a;
434}
435
436/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +0000437| Normalizes the subnormal double-precision floating-point value represented
438| by the denormalized significand `aSig'. The normalized exponent and
439| significand are stored at the locations pointed to by `zExpPtr' and
440| `zSigPtr', respectively.
441*----------------------------------------------------------------------------*/
442
443static void
Andreas Färber94a49d82012-04-26 00:15:56 +0200444 normalizeFloat64Subnormal(uint64_t aSig, int_fast16_t *zExpPtr, uint64_t *zSigPtr)
bellard158142c2005-03-13 16:54:06 +0000445{
446 int8 shiftCount;
447
448 shiftCount = countLeadingZeros64( aSig ) - 11;
449 *zSigPtr = aSig<<shiftCount;
450 *zExpPtr = 1 - shiftCount;
451
452}
453
454/*----------------------------------------------------------------------------
455| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
456| double-precision floating-point value, returning the result. After being
457| shifted into the proper positions, the three fields are simply added
458| together to form the result. This means that any integer portion of `zSig'
459| will be added into the exponent. Since a properly normalized significand
460| will have an integer portion equal to 1, the `zExp' input should be 1 less
461| than the desired result exponent whenever `zSig' is a complete, normalized
462| significand.
463*----------------------------------------------------------------------------*/
464
Andreas Färber94a49d82012-04-26 00:15:56 +0200465INLINE float64 packFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig)
bellard158142c2005-03-13 16:54:06 +0000466{
467
pbrookf090c9d2007-11-18 14:33:24 +0000468 return make_float64(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100469 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
bellard158142c2005-03-13 16:54:06 +0000470
471}
472
473/*----------------------------------------------------------------------------
474| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
475| and significand `zSig', and returns the proper double-precision floating-
476| point value corresponding to the abstract input. Ordinarily, the abstract
477| value is simply rounded and packed into the double-precision format, with
478| the inexact exception raised if the abstract input cannot be represented
479| exactly. However, if the abstract value is too large, the overflow and
480| inexact exceptions are raised and an infinity or maximal finite value is
481| returned. If the abstract value is too small, the input value is rounded
482| to a subnormal number, and the underflow and inexact exceptions are raised
483| if the abstract input cannot be represented exactly as a subnormal double-
484| precision floating-point number.
485| The input significand `zSig' has its binary point between bits 62
486| and 61, which is 10 bits to the left of the usual location. This shifted
487| significand must be normalized or smaller. If `zSig' is not normalized,
488| `zExp' must be 0; in that case, the result returned is a subnormal number,
489| and it must not require rounding. In the usual case that `zSig' is
490| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
491| The handling of underflow and overflow follows the IEC/IEEE Standard for
492| Binary Floating-Point Arithmetic.
493*----------------------------------------------------------------------------*/
494
Andreas Färber94a49d82012-04-26 00:15:56 +0200495static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000496{
497 int8 roundingMode;
498 flag roundNearestEven;
Andreas Färber94a49d82012-04-26 00:15:56 +0200499 int_fast16_t roundIncrement, roundBits;
bellard158142c2005-03-13 16:54:06 +0000500 flag isTiny;
501
502 roundingMode = STATUS(float_rounding_mode);
503 roundNearestEven = ( roundingMode == float_round_nearest_even );
504 roundIncrement = 0x200;
505 if ( ! roundNearestEven ) {
506 if ( roundingMode == float_round_to_zero ) {
507 roundIncrement = 0;
508 }
509 else {
510 roundIncrement = 0x3FF;
511 if ( zSign ) {
512 if ( roundingMode == float_round_up ) roundIncrement = 0;
513 }
514 else {
515 if ( roundingMode == float_round_down ) roundIncrement = 0;
516 }
517 }
518 }
519 roundBits = zSig & 0x3FF;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100520 if ( 0x7FD <= (uint16_t) zExp ) {
bellard158142c2005-03-13 16:54:06 +0000521 if ( ( 0x7FD < zExp )
522 || ( ( zExp == 0x7FD )
Andreas Färberbb98fe42011-03-07 01:34:06 +0100523 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
bellard158142c2005-03-13 16:54:06 +0000524 ) {
525 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
pbrookf090c9d2007-11-18 14:33:24 +0000526 return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
bellard158142c2005-03-13 16:54:06 +0000527 }
528 if ( zExp < 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +0100529 if (STATUS(flush_to_zero)) {
530 float_raise(float_flag_output_denormal STATUS_VAR);
531 return packFloat64(zSign, 0, 0);
532 }
bellard158142c2005-03-13 16:54:06 +0000533 isTiny =
534 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
535 || ( zExp < -1 )
536 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
537 shift64RightJamming( zSig, - zExp, &zSig );
538 zExp = 0;
539 roundBits = zSig & 0x3FF;
540 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
541 }
542 }
543 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
544 zSig = ( zSig + roundIncrement )>>10;
545 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
546 if ( zSig == 0 ) zExp = 0;
547 return packFloat64( zSign, zExp, zSig );
548
549}
550
551/*----------------------------------------------------------------------------
552| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
553| and significand `zSig', and returns the proper double-precision floating-
554| point value corresponding to the abstract input. This routine is just like
555| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
556| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
557| floating-point exponent.
558*----------------------------------------------------------------------------*/
559
560static float64
Andreas Färber94a49d82012-04-26 00:15:56 +0200561 normalizeRoundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000562{
563 int8 shiftCount;
564
565 shiftCount = countLeadingZeros64( zSig ) - 1;
566 return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
567
568}
569
bellard158142c2005-03-13 16:54:06 +0000570/*----------------------------------------------------------------------------
571| Returns the fraction bits of the extended double-precision floating-point
572| value `a'.
573*----------------------------------------------------------------------------*/
574
Andreas Färberbb98fe42011-03-07 01:34:06 +0100575INLINE uint64_t extractFloatx80Frac( floatx80 a )
bellard158142c2005-03-13 16:54:06 +0000576{
577
578 return a.low;
579
580}
581
582/*----------------------------------------------------------------------------
583| Returns the exponent bits of the extended double-precision floating-point
584| value `a'.
585*----------------------------------------------------------------------------*/
586
587INLINE int32 extractFloatx80Exp( floatx80 a )
588{
589
590 return a.high & 0x7FFF;
591
592}
593
594/*----------------------------------------------------------------------------
595| Returns the sign bit of the extended double-precision floating-point value
596| `a'.
597*----------------------------------------------------------------------------*/
598
599INLINE flag extractFloatx80Sign( floatx80 a )
600{
601
602 return a.high>>15;
603
604}
605
606/*----------------------------------------------------------------------------
607| Normalizes the subnormal extended double-precision floating-point value
608| represented by the denormalized significand `aSig'. The normalized exponent
609| and significand are stored at the locations pointed to by `zExpPtr' and
610| `zSigPtr', respectively.
611*----------------------------------------------------------------------------*/
612
613static void
Andreas Färberbb98fe42011-03-07 01:34:06 +0100614 normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
bellard158142c2005-03-13 16:54:06 +0000615{
616 int8 shiftCount;
617
618 shiftCount = countLeadingZeros64( aSig );
619 *zSigPtr = aSig<<shiftCount;
620 *zExpPtr = 1 - shiftCount;
621
622}
623
624/*----------------------------------------------------------------------------
625| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
626| extended double-precision floating-point value, returning the result.
627*----------------------------------------------------------------------------*/
628
Andreas Färberbb98fe42011-03-07 01:34:06 +0100629INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
bellard158142c2005-03-13 16:54:06 +0000630{
631 floatx80 z;
632
633 z.low = zSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100634 z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
bellard158142c2005-03-13 16:54:06 +0000635 return z;
636
637}
638
639/*----------------------------------------------------------------------------
640| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
641| and extended significand formed by the concatenation of `zSig0' and `zSig1',
642| and returns the proper extended double-precision floating-point value
643| corresponding to the abstract input. Ordinarily, the abstract value is
644| rounded and packed into the extended double-precision format, with the
645| inexact exception raised if the abstract input cannot be represented
646| exactly. However, if the abstract value is too large, the overflow and
647| inexact exceptions are raised and an infinity or maximal finite value is
648| returned. If the abstract value is too small, the input value is rounded to
649| a subnormal number, and the underflow and inexact exceptions are raised if
650| the abstract input cannot be represented exactly as a subnormal extended
651| double-precision floating-point number.
652| If `roundingPrecision' is 32 or 64, the result is rounded to the same
653| number of bits as single or double precision, respectively. Otherwise, the
654| result is rounded to the full precision of the extended double-precision
655| format.
656| The input significand must be normalized or smaller. If the input
657| significand is not normalized, `zExp' must be 0; in that case, the result
658| returned is a subnormal number, and it must not require rounding. The
659| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
660| Floating-Point Arithmetic.
661*----------------------------------------------------------------------------*/
662
663static floatx80
664 roundAndPackFloatx80(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100665 int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard158142c2005-03-13 16:54:06 +0000666 STATUS_PARAM)
667{
668 int8 roundingMode;
669 flag roundNearestEven, increment, isTiny;
670 int64 roundIncrement, roundMask, roundBits;
671
672 roundingMode = STATUS(float_rounding_mode);
673 roundNearestEven = ( roundingMode == float_round_nearest_even );
674 if ( roundingPrecision == 80 ) goto precision80;
675 if ( roundingPrecision == 64 ) {
676 roundIncrement = LIT64( 0x0000000000000400 );
677 roundMask = LIT64( 0x00000000000007FF );
678 }
679 else if ( roundingPrecision == 32 ) {
680 roundIncrement = LIT64( 0x0000008000000000 );
681 roundMask = LIT64( 0x000000FFFFFFFFFF );
682 }
683 else {
684 goto precision80;
685 }
686 zSig0 |= ( zSig1 != 0 );
687 if ( ! roundNearestEven ) {
688 if ( roundingMode == float_round_to_zero ) {
689 roundIncrement = 0;
690 }
691 else {
692 roundIncrement = roundMask;
693 if ( zSign ) {
694 if ( roundingMode == float_round_up ) roundIncrement = 0;
695 }
696 else {
697 if ( roundingMode == float_round_down ) roundIncrement = 0;
698 }
699 }
700 }
701 roundBits = zSig0 & roundMask;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100702 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard158142c2005-03-13 16:54:06 +0000703 if ( ( 0x7FFE < zExp )
704 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
705 ) {
706 goto overflow;
707 }
708 if ( zExp <= 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +0100709 if (STATUS(flush_to_zero)) {
710 float_raise(float_flag_output_denormal STATUS_VAR);
711 return packFloatx80(zSign, 0, 0);
712 }
bellard158142c2005-03-13 16:54:06 +0000713 isTiny =
714 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
715 || ( zExp < 0 )
716 || ( zSig0 <= zSig0 + roundIncrement );
717 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
718 zExp = 0;
719 roundBits = zSig0 & roundMask;
720 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
721 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
722 zSig0 += roundIncrement;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100723 if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard158142c2005-03-13 16:54:06 +0000724 roundIncrement = roundMask + 1;
725 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
726 roundMask |= roundIncrement;
727 }
728 zSig0 &= ~ roundMask;
729 return packFloatx80( zSign, zExp, zSig0 );
730 }
731 }
732 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
733 zSig0 += roundIncrement;
734 if ( zSig0 < roundIncrement ) {
735 ++zExp;
736 zSig0 = LIT64( 0x8000000000000000 );
737 }
738 roundIncrement = roundMask + 1;
739 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
740 roundMask |= roundIncrement;
741 }
742 zSig0 &= ~ roundMask;
743 if ( zSig0 == 0 ) zExp = 0;
744 return packFloatx80( zSign, zExp, zSig0 );
745 precision80:
Andreas Färberbb98fe42011-03-07 01:34:06 +0100746 increment = ( (int64_t) zSig1 < 0 );
bellard158142c2005-03-13 16:54:06 +0000747 if ( ! roundNearestEven ) {
748 if ( roundingMode == float_round_to_zero ) {
749 increment = 0;
750 }
751 else {
752 if ( zSign ) {
753 increment = ( roundingMode == float_round_down ) && zSig1;
754 }
755 else {
756 increment = ( roundingMode == float_round_up ) && zSig1;
757 }
758 }
759 }
Andreas Färberbb98fe42011-03-07 01:34:06 +0100760 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard158142c2005-03-13 16:54:06 +0000761 if ( ( 0x7FFE < zExp )
762 || ( ( zExp == 0x7FFE )
763 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
764 && increment
765 )
766 ) {
767 roundMask = 0;
768 overflow:
769 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
770 if ( ( roundingMode == float_round_to_zero )
771 || ( zSign && ( roundingMode == float_round_up ) )
772 || ( ! zSign && ( roundingMode == float_round_down ) )
773 ) {
774 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
775 }
776 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
777 }
778 if ( zExp <= 0 ) {
779 isTiny =
780 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
781 || ( zExp < 0 )
782 || ! increment
783 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
784 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
785 zExp = 0;
786 if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
787 if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
788 if ( roundNearestEven ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +0100789 increment = ( (int64_t) zSig1 < 0 );
bellard158142c2005-03-13 16:54:06 +0000790 }
791 else {
792 if ( zSign ) {
793 increment = ( roundingMode == float_round_down ) && zSig1;
794 }
795 else {
796 increment = ( roundingMode == float_round_up ) && zSig1;
797 }
798 }
799 if ( increment ) {
800 ++zSig0;
801 zSig0 &=
Andreas Färberbb98fe42011-03-07 01:34:06 +0100802 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
803 if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard158142c2005-03-13 16:54:06 +0000804 }
805 return packFloatx80( zSign, zExp, zSig0 );
806 }
807 }
808 if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
809 if ( increment ) {
810 ++zSig0;
811 if ( zSig0 == 0 ) {
812 ++zExp;
813 zSig0 = LIT64( 0x8000000000000000 );
814 }
815 else {
Andreas Färberbb98fe42011-03-07 01:34:06 +0100816 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
bellard158142c2005-03-13 16:54:06 +0000817 }
818 }
819 else {
820 if ( zSig0 == 0 ) zExp = 0;
821 }
822 return packFloatx80( zSign, zExp, zSig0 );
823
824}
825
826/*----------------------------------------------------------------------------
827| Takes an abstract floating-point value having sign `zSign', exponent
828| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
829| and returns the proper extended double-precision floating-point value
830| corresponding to the abstract input. This routine is just like
831| `roundAndPackFloatx80' except that the input significand does not have to be
832| normalized.
833*----------------------------------------------------------------------------*/
834
835static floatx80
836 normalizeRoundAndPackFloatx80(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100837 int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard158142c2005-03-13 16:54:06 +0000838 STATUS_PARAM)
839{
840 int8 shiftCount;
841
842 if ( zSig0 == 0 ) {
843 zSig0 = zSig1;
844 zSig1 = 0;
845 zExp -= 64;
846 }
847 shiftCount = countLeadingZeros64( zSig0 );
848 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
849 zExp -= shiftCount;
850 return
851 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
852
853}
854
bellard158142c2005-03-13 16:54:06 +0000855/*----------------------------------------------------------------------------
856| Returns the least-significant 64 fraction bits of the quadruple-precision
857| floating-point value `a'.
858*----------------------------------------------------------------------------*/
859
Andreas Färberbb98fe42011-03-07 01:34:06 +0100860INLINE uint64_t extractFloat128Frac1( float128 a )
bellard158142c2005-03-13 16:54:06 +0000861{
862
863 return a.low;
864
865}
866
867/*----------------------------------------------------------------------------
868| Returns the most-significant 48 fraction bits of the quadruple-precision
869| floating-point value `a'.
870*----------------------------------------------------------------------------*/
871
Andreas Färberbb98fe42011-03-07 01:34:06 +0100872INLINE uint64_t extractFloat128Frac0( float128 a )
bellard158142c2005-03-13 16:54:06 +0000873{
874
875 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
876
877}
878
879/*----------------------------------------------------------------------------
880| Returns the exponent bits of the quadruple-precision floating-point value
881| `a'.
882*----------------------------------------------------------------------------*/
883
884INLINE int32 extractFloat128Exp( float128 a )
885{
886
887 return ( a.high>>48 ) & 0x7FFF;
888
889}
890
891/*----------------------------------------------------------------------------
892| Returns the sign bit of the quadruple-precision floating-point value `a'.
893*----------------------------------------------------------------------------*/
894
895INLINE flag extractFloat128Sign( float128 a )
896{
897
898 return a.high>>63;
899
900}
901
902/*----------------------------------------------------------------------------
903| Normalizes the subnormal quadruple-precision floating-point value
904| represented by the denormalized significand formed by the concatenation of
905| `aSig0' and `aSig1'. The normalized exponent is stored at the location
906| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
907| significand are stored at the location pointed to by `zSig0Ptr', and the
908| least significant 64 bits of the normalized significand are stored at the
909| location pointed to by `zSig1Ptr'.
910*----------------------------------------------------------------------------*/
911
912static void
913 normalizeFloat128Subnormal(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100914 uint64_t aSig0,
915 uint64_t aSig1,
bellard158142c2005-03-13 16:54:06 +0000916 int32 *zExpPtr,
Andreas Färberbb98fe42011-03-07 01:34:06 +0100917 uint64_t *zSig0Ptr,
918 uint64_t *zSig1Ptr
bellard158142c2005-03-13 16:54:06 +0000919 )
920{
921 int8 shiftCount;
922
923 if ( aSig0 == 0 ) {
924 shiftCount = countLeadingZeros64( aSig1 ) - 15;
925 if ( shiftCount < 0 ) {
926 *zSig0Ptr = aSig1>>( - shiftCount );
927 *zSig1Ptr = aSig1<<( shiftCount & 63 );
928 }
929 else {
930 *zSig0Ptr = aSig1<<shiftCount;
931 *zSig1Ptr = 0;
932 }
933 *zExpPtr = - shiftCount - 63;
934 }
935 else {
936 shiftCount = countLeadingZeros64( aSig0 ) - 15;
937 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
938 *zExpPtr = 1 - shiftCount;
939 }
940
941}
942
943/*----------------------------------------------------------------------------
944| Packs the sign `zSign', the exponent `zExp', and the significand formed
945| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
946| floating-point value, returning the result. After being shifted into the
947| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
948| added together to form the most significant 32 bits of the result. This
949| means that any integer portion of `zSig0' will be added into the exponent.
950| Since a properly normalized significand will have an integer portion equal
951| to 1, the `zExp' input should be 1 less than the desired result exponent
952| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
953| significand.
954*----------------------------------------------------------------------------*/
955
956INLINE float128
Andreas Färberbb98fe42011-03-07 01:34:06 +0100957 packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
bellard158142c2005-03-13 16:54:06 +0000958{
959 float128 z;
960
961 z.low = zSig1;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100962 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
bellard158142c2005-03-13 16:54:06 +0000963 return z;
964
965}
966
967/*----------------------------------------------------------------------------
968| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
969| and extended significand formed by the concatenation of `zSig0', `zSig1',
970| and `zSig2', and returns the proper quadruple-precision floating-point value
971| corresponding to the abstract input. Ordinarily, the abstract value is
972| simply rounded and packed into the quadruple-precision format, with the
973| inexact exception raised if the abstract input cannot be represented
974| exactly. However, if the abstract value is too large, the overflow and
975| inexact exceptions are raised and an infinity or maximal finite value is
976| returned. If the abstract value is too small, the input value is rounded to
977| a subnormal number, and the underflow and inexact exceptions are raised if
978| the abstract input cannot be represented exactly as a subnormal quadruple-
979| precision floating-point number.
980| The input significand must be normalized or smaller. If the input
981| significand is not normalized, `zExp' must be 0; in that case, the result
982| returned is a subnormal number, and it must not require rounding. In the
983| usual case that the input significand is normalized, `zExp' must be 1 less
984| than the ``true'' floating-point exponent. The handling of underflow and
985| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
986*----------------------------------------------------------------------------*/
987
988static float128
989 roundAndPackFloat128(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100990 flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000991{
992 int8 roundingMode;
993 flag roundNearestEven, increment, isTiny;
994
995 roundingMode = STATUS(float_rounding_mode);
996 roundNearestEven = ( roundingMode == float_round_nearest_even );
Andreas Färberbb98fe42011-03-07 01:34:06 +0100997 increment = ( (int64_t) zSig2 < 0 );
bellard158142c2005-03-13 16:54:06 +0000998 if ( ! roundNearestEven ) {
999 if ( roundingMode == float_round_to_zero ) {
1000 increment = 0;
1001 }
1002 else {
1003 if ( zSign ) {
1004 increment = ( roundingMode == float_round_down ) && zSig2;
1005 }
1006 else {
1007 increment = ( roundingMode == float_round_up ) && zSig2;
1008 }
1009 }
1010 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001011 if ( 0x7FFD <= (uint32_t) zExp ) {
bellard158142c2005-03-13 16:54:06 +00001012 if ( ( 0x7FFD < zExp )
1013 || ( ( zExp == 0x7FFD )
1014 && eq128(
1015 LIT64( 0x0001FFFFFFFFFFFF ),
1016 LIT64( 0xFFFFFFFFFFFFFFFF ),
1017 zSig0,
1018 zSig1
1019 )
1020 && increment
1021 )
1022 ) {
1023 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
1024 if ( ( roundingMode == float_round_to_zero )
1025 || ( zSign && ( roundingMode == float_round_up ) )
1026 || ( ! zSign && ( roundingMode == float_round_down ) )
1027 ) {
1028 return
1029 packFloat128(
1030 zSign,
1031 0x7FFE,
1032 LIT64( 0x0000FFFFFFFFFFFF ),
1033 LIT64( 0xFFFFFFFFFFFFFFFF )
1034 );
1035 }
1036 return packFloat128( zSign, 0x7FFF, 0, 0 );
1037 }
1038 if ( zExp < 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01001039 if (STATUS(flush_to_zero)) {
1040 float_raise(float_flag_output_denormal STATUS_VAR);
1041 return packFloat128(zSign, 0, 0, 0);
1042 }
bellard158142c2005-03-13 16:54:06 +00001043 isTiny =
1044 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
1045 || ( zExp < -1 )
1046 || ! increment
1047 || lt128(
1048 zSig0,
1049 zSig1,
1050 LIT64( 0x0001FFFFFFFFFFFF ),
1051 LIT64( 0xFFFFFFFFFFFFFFFF )
1052 );
1053 shift128ExtraRightJamming(
1054 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1055 zExp = 0;
1056 if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
1057 if ( roundNearestEven ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01001058 increment = ( (int64_t) zSig2 < 0 );
bellard158142c2005-03-13 16:54:06 +00001059 }
1060 else {
1061 if ( zSign ) {
1062 increment = ( roundingMode == float_round_down ) && zSig2;
1063 }
1064 else {
1065 increment = ( roundingMode == float_round_up ) && zSig2;
1066 }
1067 }
1068 }
1069 }
1070 if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1071 if ( increment ) {
1072 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1073 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1074 }
1075 else {
1076 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1077 }
1078 return packFloat128( zSign, zExp, zSig0, zSig1 );
1079
1080}
1081
1082/*----------------------------------------------------------------------------
1083| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1084| and significand formed by the concatenation of `zSig0' and `zSig1', and
1085| returns the proper quadruple-precision floating-point value corresponding
1086| to the abstract input. This routine is just like `roundAndPackFloat128'
1087| except that the input significand has fewer bits and does not have to be
1088| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
1089| point exponent.
1090*----------------------------------------------------------------------------*/
1091
1092static float128
1093 normalizeRoundAndPackFloat128(
Andreas Färberbb98fe42011-03-07 01:34:06 +01001094 flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001095{
1096 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001097 uint64_t zSig2;
bellard158142c2005-03-13 16:54:06 +00001098
1099 if ( zSig0 == 0 ) {
1100 zSig0 = zSig1;
1101 zSig1 = 0;
1102 zExp -= 64;
1103 }
1104 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1105 if ( 0 <= shiftCount ) {
1106 zSig2 = 0;
1107 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1108 }
1109 else {
1110 shift128ExtraRightJamming(
1111 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1112 }
1113 zExp -= shiftCount;
1114 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1115
1116}
1117
bellard158142c2005-03-13 16:54:06 +00001118/*----------------------------------------------------------------------------
1119| Returns the result of converting the 32-bit two's complement integer `a'
1120| to the single-precision floating-point format. The conversion is performed
1121| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1122*----------------------------------------------------------------------------*/
1123
1124float32 int32_to_float32( int32 a STATUS_PARAM )
1125{
1126 flag zSign;
1127
pbrookf090c9d2007-11-18 14:33:24 +00001128 if ( a == 0 ) return float32_zero;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001129 if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
bellard158142c2005-03-13 16:54:06 +00001130 zSign = ( a < 0 );
1131 return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1132
1133}
1134
1135/*----------------------------------------------------------------------------
1136| Returns the result of converting the 32-bit two's complement integer `a'
1137| to the double-precision floating-point format. The conversion is performed
1138| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1139*----------------------------------------------------------------------------*/
1140
1141float64 int32_to_float64( int32 a STATUS_PARAM )
1142{
1143 flag zSign;
1144 uint32 absA;
1145 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001146 uint64_t zSig;
bellard158142c2005-03-13 16:54:06 +00001147
pbrookf090c9d2007-11-18 14:33:24 +00001148 if ( a == 0 ) return float64_zero;
bellard158142c2005-03-13 16:54:06 +00001149 zSign = ( a < 0 );
1150 absA = zSign ? - a : a;
1151 shiftCount = countLeadingZeros32( absA ) + 21;
1152 zSig = absA;
1153 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1154
1155}
1156
bellard158142c2005-03-13 16:54:06 +00001157/*----------------------------------------------------------------------------
1158| Returns the result of converting the 32-bit two's complement integer `a'
1159| to the extended double-precision floating-point format. The conversion
1160| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1161| Arithmetic.
1162*----------------------------------------------------------------------------*/
1163
1164floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1165{
1166 flag zSign;
1167 uint32 absA;
1168 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001169 uint64_t zSig;
bellard158142c2005-03-13 16:54:06 +00001170
1171 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1172 zSign = ( a < 0 );
1173 absA = zSign ? - a : a;
1174 shiftCount = countLeadingZeros32( absA ) + 32;
1175 zSig = absA;
1176 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1177
1178}
1179
bellard158142c2005-03-13 16:54:06 +00001180/*----------------------------------------------------------------------------
1181| Returns the result of converting the 32-bit two's complement integer `a' to
1182| the quadruple-precision floating-point format. The conversion is performed
1183| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1184*----------------------------------------------------------------------------*/
1185
1186float128 int32_to_float128( int32 a STATUS_PARAM )
1187{
1188 flag zSign;
1189 uint32 absA;
1190 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001191 uint64_t zSig0;
bellard158142c2005-03-13 16:54:06 +00001192
1193 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1194 zSign = ( a < 0 );
1195 absA = zSign ? - a : a;
1196 shiftCount = countLeadingZeros32( absA ) + 17;
1197 zSig0 = absA;
1198 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1199
1200}
1201
bellard158142c2005-03-13 16:54:06 +00001202/*----------------------------------------------------------------------------
1203| Returns the result of converting the 64-bit two's complement integer `a'
1204| to the single-precision floating-point format. The conversion is performed
1205| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1206*----------------------------------------------------------------------------*/
1207
1208float32 int64_to_float32( int64 a STATUS_PARAM )
1209{
1210 flag zSign;
1211 uint64 absA;
1212 int8 shiftCount;
1213
pbrookf090c9d2007-11-18 14:33:24 +00001214 if ( a == 0 ) return float32_zero;
bellard158142c2005-03-13 16:54:06 +00001215 zSign = ( a < 0 );
1216 absA = zSign ? - a : a;
1217 shiftCount = countLeadingZeros64( absA ) - 40;
1218 if ( 0 <= shiftCount ) {
1219 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1220 }
1221 else {
1222 shiftCount += 7;
1223 if ( shiftCount < 0 ) {
1224 shift64RightJamming( absA, - shiftCount, &absA );
1225 }
1226 else {
1227 absA <<= shiftCount;
1228 }
1229 return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1230 }
1231
1232}
1233
j_mayer3430b0b2007-03-20 22:25:37 +00001234float32 uint64_to_float32( uint64 a STATUS_PARAM )
j_mayer75d62a52007-03-20 22:10:42 +00001235{
1236 int8 shiftCount;
1237
pbrookf090c9d2007-11-18 14:33:24 +00001238 if ( a == 0 ) return float32_zero;
j_mayer75d62a52007-03-20 22:10:42 +00001239 shiftCount = countLeadingZeros64( a ) - 40;
1240 if ( 0 <= shiftCount ) {
Peter Maydelle744c062012-09-28 16:17:03 +01001241 return packFloat32(0, 0x95 - shiftCount, a<<shiftCount);
j_mayer75d62a52007-03-20 22:10:42 +00001242 }
1243 else {
1244 shiftCount += 7;
1245 if ( shiftCount < 0 ) {
1246 shift64RightJamming( a, - shiftCount, &a );
1247 }
1248 else {
1249 a <<= shiftCount;
1250 }
Peter Maydelle744c062012-09-28 16:17:03 +01001251 return roundAndPackFloat32(0, 0x9C - shiftCount, a STATUS_VAR);
j_mayer75d62a52007-03-20 22:10:42 +00001252 }
1253}
1254
bellard158142c2005-03-13 16:54:06 +00001255/*----------------------------------------------------------------------------
1256| Returns the result of converting the 64-bit two's complement integer `a'
1257| to the double-precision floating-point format. The conversion is performed
1258| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1259*----------------------------------------------------------------------------*/
1260
1261float64 int64_to_float64( int64 a STATUS_PARAM )
1262{
1263 flag zSign;
1264
pbrookf090c9d2007-11-18 14:33:24 +00001265 if ( a == 0 ) return float64_zero;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001266 if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
bellard158142c2005-03-13 16:54:06 +00001267 return packFloat64( 1, 0x43E, 0 );
1268 }
1269 zSign = ( a < 0 );
1270 return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1271
1272}
1273
Richard Henderson17ed2292012-12-31 10:09:03 -08001274float64 uint64_to_float64(uint64 a STATUS_PARAM)
j_mayer75d62a52007-03-20 22:10:42 +00001275{
Richard Henderson17ed2292012-12-31 10:09:03 -08001276 int exp = 0x43C;
j_mayer75d62a52007-03-20 22:10:42 +00001277
Richard Henderson17ed2292012-12-31 10:09:03 -08001278 if (a == 0) {
1279 return float64_zero;
1280 }
1281 if ((int64_t)a < 0) {
1282 shift64RightJamming(a, 1, &a);
1283 exp += 1;
1284 }
1285 return normalizeRoundAndPackFloat64(0, exp, a STATUS_VAR);
j_mayer75d62a52007-03-20 22:10:42 +00001286}
1287
bellard158142c2005-03-13 16:54:06 +00001288/*----------------------------------------------------------------------------
1289| Returns the result of converting the 64-bit two's complement integer `a'
1290| to the extended double-precision floating-point format. The conversion
1291| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1292| Arithmetic.
1293*----------------------------------------------------------------------------*/
1294
1295floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1296{
1297 flag zSign;
1298 uint64 absA;
1299 int8 shiftCount;
1300
1301 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1302 zSign = ( a < 0 );
1303 absA = zSign ? - a : a;
1304 shiftCount = countLeadingZeros64( absA );
1305 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1306
1307}
1308
bellard158142c2005-03-13 16:54:06 +00001309/*----------------------------------------------------------------------------
1310| Returns the result of converting the 64-bit two's complement integer `a' to
1311| the quadruple-precision floating-point format. The conversion is performed
1312| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1313*----------------------------------------------------------------------------*/
1314
1315float128 int64_to_float128( int64 a STATUS_PARAM )
1316{
1317 flag zSign;
1318 uint64 absA;
1319 int8 shiftCount;
1320 int32 zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001321 uint64_t zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00001322
1323 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1324 zSign = ( a < 0 );
1325 absA = zSign ? - a : a;
1326 shiftCount = countLeadingZeros64( absA ) + 49;
1327 zExp = 0x406E - shiftCount;
1328 if ( 64 <= shiftCount ) {
1329 zSig1 = 0;
1330 zSig0 = absA;
1331 shiftCount -= 64;
1332 }
1333 else {
1334 zSig1 = absA;
1335 zSig0 = 0;
1336 }
1337 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1338 return packFloat128( zSign, zExp, zSig0, zSig1 );
1339
1340}
1341
Richard Henderson1e397ea2012-12-31 10:09:04 -08001342float128 uint64_to_float128(uint64 a STATUS_PARAM)
1343{
1344 if (a == 0) {
1345 return float128_zero;
1346 }
1347 return normalizeRoundAndPackFloat128(0, 0x406E, a, 0 STATUS_VAR);
1348}
1349
bellard158142c2005-03-13 16:54:06 +00001350/*----------------------------------------------------------------------------
1351| Returns the result of converting the single-precision floating-point value
1352| `a' to the 32-bit two's complement integer format. The conversion is
1353| performed according to the IEC/IEEE Standard for Binary Floating-Point
1354| Arithmetic---which means in particular that the conversion is rounded
1355| according to the current rounding mode. If `a' is a NaN, the largest
1356| positive integer is returned. Otherwise, if the conversion overflows, the
1357| largest integer with the same sign as `a' is returned.
1358*----------------------------------------------------------------------------*/
1359
1360int32 float32_to_int32( float32 a STATUS_PARAM )
1361{
1362 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001363 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001364 uint32_t aSig;
1365 uint64_t aSig64;
bellard158142c2005-03-13 16:54:06 +00001366
Peter Maydell37d18662011-01-06 19:37:53 +00001367 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001368 aSig = extractFloat32Frac( a );
1369 aExp = extractFloat32Exp( a );
1370 aSign = extractFloat32Sign( a );
1371 if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1372 if ( aExp ) aSig |= 0x00800000;
1373 shiftCount = 0xAF - aExp;
1374 aSig64 = aSig;
1375 aSig64 <<= 32;
1376 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1377 return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1378
1379}
1380
1381/*----------------------------------------------------------------------------
1382| Returns the result of converting the single-precision floating-point value
1383| `a' to the 32-bit two's complement integer format. The conversion is
1384| performed according to the IEC/IEEE Standard for Binary Floating-Point
1385| Arithmetic, except that the conversion is always rounded toward zero.
1386| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1387| the conversion overflows, the largest integer with the same sign as `a' is
1388| returned.
1389*----------------------------------------------------------------------------*/
1390
1391int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1392{
1393 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001394 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001395 uint32_t aSig;
Peter Maydellb3a6a2e2012-04-05 19:12:34 +01001396 int32_t z;
Peter Maydell37d18662011-01-06 19:37:53 +00001397 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001398
1399 aSig = extractFloat32Frac( a );
1400 aExp = extractFloat32Exp( a );
1401 aSign = extractFloat32Sign( a );
1402 shiftCount = aExp - 0x9E;
1403 if ( 0 <= shiftCount ) {
pbrookf090c9d2007-11-18 14:33:24 +00001404 if ( float32_val(a) != 0xCF000000 ) {
bellard158142c2005-03-13 16:54:06 +00001405 float_raise( float_flag_invalid STATUS_VAR);
1406 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1407 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001408 return (int32_t) 0x80000000;
bellard158142c2005-03-13 16:54:06 +00001409 }
1410 else if ( aExp <= 0x7E ) {
1411 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1412 return 0;
1413 }
1414 aSig = ( aSig | 0x00800000 )<<8;
1415 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01001416 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00001417 STATUS(float_exception_flags) |= float_flag_inexact;
1418 }
1419 if ( aSign ) z = - z;
1420 return z;
1421
1422}
1423
1424/*----------------------------------------------------------------------------
1425| Returns the result of converting the single-precision floating-point value
Peter Maydellcbcef452010-12-07 15:37:34 +00001426| `a' to the 16-bit two's complement integer format. The conversion is
1427| performed according to the IEC/IEEE Standard for Binary Floating-Point
1428| Arithmetic, except that the conversion is always rounded toward zero.
1429| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1430| the conversion overflows, the largest integer with the same sign as `a' is
1431| returned.
1432*----------------------------------------------------------------------------*/
1433
Andreas Färber94a49d82012-04-26 00:15:56 +02001434int_fast16_t float32_to_int16_round_to_zero(float32 a STATUS_PARAM)
Peter Maydellcbcef452010-12-07 15:37:34 +00001435{
1436 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001437 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001438 uint32_t aSig;
Peter Maydellcbcef452010-12-07 15:37:34 +00001439 int32 z;
1440
1441 aSig = extractFloat32Frac( a );
1442 aExp = extractFloat32Exp( a );
1443 aSign = extractFloat32Sign( a );
1444 shiftCount = aExp - 0x8E;
1445 if ( 0 <= shiftCount ) {
1446 if ( float32_val(a) != 0xC7000000 ) {
1447 float_raise( float_flag_invalid STATUS_VAR);
1448 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1449 return 0x7FFF;
1450 }
1451 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001452 return (int32_t) 0xffff8000;
Peter Maydellcbcef452010-12-07 15:37:34 +00001453 }
1454 else if ( aExp <= 0x7E ) {
1455 if ( aExp | aSig ) {
1456 STATUS(float_exception_flags) |= float_flag_inexact;
1457 }
1458 return 0;
1459 }
1460 shiftCount -= 0x10;
1461 aSig = ( aSig | 0x00800000 )<<8;
1462 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01001463 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
Peter Maydellcbcef452010-12-07 15:37:34 +00001464 STATUS(float_exception_flags) |= float_flag_inexact;
1465 }
1466 if ( aSign ) {
1467 z = - z;
1468 }
1469 return z;
1470
1471}
1472
1473/*----------------------------------------------------------------------------
1474| Returns the result of converting the single-precision floating-point value
bellard158142c2005-03-13 16:54:06 +00001475| `a' to the 64-bit two's complement integer format. The conversion is
1476| performed according to the IEC/IEEE Standard for Binary Floating-Point
1477| Arithmetic---which means in particular that the conversion is rounded
1478| according to the current rounding mode. If `a' is a NaN, the largest
1479| positive integer is returned. Otherwise, if the conversion overflows, the
1480| largest integer with the same sign as `a' is returned.
1481*----------------------------------------------------------------------------*/
1482
1483int64 float32_to_int64( float32 a STATUS_PARAM )
1484{
1485 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001486 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001487 uint32_t aSig;
1488 uint64_t aSig64, aSigExtra;
Peter Maydell37d18662011-01-06 19:37:53 +00001489 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001490
1491 aSig = extractFloat32Frac( a );
1492 aExp = extractFloat32Exp( a );
1493 aSign = extractFloat32Sign( a );
1494 shiftCount = 0xBE - aExp;
1495 if ( shiftCount < 0 ) {
1496 float_raise( float_flag_invalid STATUS_VAR);
1497 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1498 return LIT64( 0x7FFFFFFFFFFFFFFF );
1499 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001500 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00001501 }
1502 if ( aExp ) aSig |= 0x00800000;
1503 aSig64 = aSig;
1504 aSig64 <<= 40;
1505 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1506 return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1507
1508}
1509
1510/*----------------------------------------------------------------------------
1511| Returns the result of converting the single-precision floating-point value
1512| `a' to the 64-bit two's complement integer format. The conversion is
1513| performed according to the IEC/IEEE Standard for Binary Floating-Point
1514| Arithmetic, except that the conversion is always rounded toward zero. If
1515| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
1516| conversion overflows, the largest integer with the same sign as `a' is
1517| returned.
1518*----------------------------------------------------------------------------*/
1519
1520int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1521{
1522 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001523 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001524 uint32_t aSig;
1525 uint64_t aSig64;
bellard158142c2005-03-13 16:54:06 +00001526 int64 z;
Peter Maydell37d18662011-01-06 19:37:53 +00001527 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001528
1529 aSig = extractFloat32Frac( a );
1530 aExp = extractFloat32Exp( a );
1531 aSign = extractFloat32Sign( a );
1532 shiftCount = aExp - 0xBE;
1533 if ( 0 <= shiftCount ) {
pbrookf090c9d2007-11-18 14:33:24 +00001534 if ( float32_val(a) != 0xDF000000 ) {
bellard158142c2005-03-13 16:54:06 +00001535 float_raise( float_flag_invalid STATUS_VAR);
1536 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1537 return LIT64( 0x7FFFFFFFFFFFFFFF );
1538 }
1539 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001540 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00001541 }
1542 else if ( aExp <= 0x7E ) {
1543 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1544 return 0;
1545 }
1546 aSig64 = aSig | 0x00800000;
1547 aSig64 <<= 40;
1548 z = aSig64>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01001549 if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00001550 STATUS(float_exception_flags) |= float_flag_inexact;
1551 }
1552 if ( aSign ) z = - z;
1553 return z;
1554
1555}
1556
1557/*----------------------------------------------------------------------------
1558| Returns the result of converting the single-precision floating-point value
1559| `a' to the double-precision floating-point format. The conversion is
1560| performed according to the IEC/IEEE Standard for Binary Floating-Point
1561| Arithmetic.
1562*----------------------------------------------------------------------------*/
1563
1564float64 float32_to_float64( float32 a STATUS_PARAM )
1565{
1566 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001567 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001568 uint32_t aSig;
Peter Maydell37d18662011-01-06 19:37:53 +00001569 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001570
1571 aSig = extractFloat32Frac( a );
1572 aExp = extractFloat32Exp( a );
1573 aSign = extractFloat32Sign( a );
1574 if ( aExp == 0xFF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00001575 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00001576 return packFloat64( aSign, 0x7FF, 0 );
1577 }
1578 if ( aExp == 0 ) {
1579 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1580 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1581 --aExp;
1582 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001583 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
bellard158142c2005-03-13 16:54:06 +00001584
1585}
1586
bellard158142c2005-03-13 16:54:06 +00001587/*----------------------------------------------------------------------------
1588| Returns the result of converting the single-precision floating-point value
1589| `a' to the extended double-precision floating-point format. The conversion
1590| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1591| Arithmetic.
1592*----------------------------------------------------------------------------*/
1593
1594floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1595{
1596 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001597 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001598 uint32_t aSig;
bellard158142c2005-03-13 16:54:06 +00001599
Peter Maydell37d18662011-01-06 19:37:53 +00001600 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001601 aSig = extractFloat32Frac( a );
1602 aExp = extractFloat32Exp( a );
1603 aSign = extractFloat32Sign( a );
1604 if ( aExp == 0xFF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00001605 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00001606 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1607 }
1608 if ( aExp == 0 ) {
1609 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1610 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1611 }
1612 aSig |= 0x00800000;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001613 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
bellard158142c2005-03-13 16:54:06 +00001614
1615}
1616
bellard158142c2005-03-13 16:54:06 +00001617/*----------------------------------------------------------------------------
1618| Returns the result of converting the single-precision floating-point value
1619| `a' to the double-precision floating-point format. The conversion is
1620| performed according to the IEC/IEEE Standard for Binary Floating-Point
1621| Arithmetic.
1622*----------------------------------------------------------------------------*/
1623
1624float128 float32_to_float128( float32 a STATUS_PARAM )
1625{
1626 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001627 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001628 uint32_t aSig;
bellard158142c2005-03-13 16:54:06 +00001629
Peter Maydell37d18662011-01-06 19:37:53 +00001630 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001631 aSig = extractFloat32Frac( a );
1632 aExp = extractFloat32Exp( a );
1633 aSign = extractFloat32Sign( a );
1634 if ( aExp == 0xFF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00001635 if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00001636 return packFloat128( aSign, 0x7FFF, 0, 0 );
1637 }
1638 if ( aExp == 0 ) {
1639 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1640 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1641 --aExp;
1642 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001643 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
bellard158142c2005-03-13 16:54:06 +00001644
1645}
1646
bellard158142c2005-03-13 16:54:06 +00001647/*----------------------------------------------------------------------------
1648| Rounds the single-precision floating-point value `a' to an integer, and
1649| returns the result as a single-precision floating-point value. The
1650| operation is performed according to the IEC/IEEE Standard for Binary
1651| Floating-Point Arithmetic.
1652*----------------------------------------------------------------------------*/
1653
1654float32 float32_round_to_int( float32 a STATUS_PARAM)
1655{
1656 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001657 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001658 uint32_t lastBitMask, roundBitsMask;
bellard158142c2005-03-13 16:54:06 +00001659 int8 roundingMode;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001660 uint32_t z;
Peter Maydell37d18662011-01-06 19:37:53 +00001661 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001662
1663 aExp = extractFloat32Exp( a );
1664 if ( 0x96 <= aExp ) {
1665 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1666 return propagateFloat32NaN( a, a STATUS_VAR );
1667 }
1668 return a;
1669 }
1670 if ( aExp <= 0x7E ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01001671 if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00001672 STATUS(float_exception_flags) |= float_flag_inexact;
1673 aSign = extractFloat32Sign( a );
1674 switch ( STATUS(float_rounding_mode) ) {
1675 case float_round_nearest_even:
1676 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1677 return packFloat32( aSign, 0x7F, 0 );
1678 }
1679 break;
1680 case float_round_down:
pbrookf090c9d2007-11-18 14:33:24 +00001681 return make_float32(aSign ? 0xBF800000 : 0);
bellard158142c2005-03-13 16:54:06 +00001682 case float_round_up:
pbrookf090c9d2007-11-18 14:33:24 +00001683 return make_float32(aSign ? 0x80000000 : 0x3F800000);
bellard158142c2005-03-13 16:54:06 +00001684 }
1685 return packFloat32( aSign, 0, 0 );
1686 }
1687 lastBitMask = 1;
1688 lastBitMask <<= 0x96 - aExp;
1689 roundBitsMask = lastBitMask - 1;
pbrookf090c9d2007-11-18 14:33:24 +00001690 z = float32_val(a);
bellard158142c2005-03-13 16:54:06 +00001691 roundingMode = STATUS(float_rounding_mode);
1692 if ( roundingMode == float_round_nearest_even ) {
1693 z += lastBitMask>>1;
1694 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1695 }
1696 else if ( roundingMode != float_round_to_zero ) {
pbrookf090c9d2007-11-18 14:33:24 +00001697 if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard158142c2005-03-13 16:54:06 +00001698 z += roundBitsMask;
1699 }
1700 }
1701 z &= ~ roundBitsMask;
pbrookf090c9d2007-11-18 14:33:24 +00001702 if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1703 return make_float32(z);
bellard158142c2005-03-13 16:54:06 +00001704
1705}
1706
1707/*----------------------------------------------------------------------------
1708| Returns the result of adding the absolute values of the single-precision
1709| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
1710| before being returned. `zSign' is ignored if the result is a NaN.
1711| The addition is performed according to the IEC/IEEE Standard for Binary
1712| Floating-Point Arithmetic.
1713*----------------------------------------------------------------------------*/
1714
1715static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1716{
Andreas Färber94a49d82012-04-26 00:15:56 +02001717 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001718 uint32_t aSig, bSig, zSig;
Andreas Färber94a49d82012-04-26 00:15:56 +02001719 int_fast16_t expDiff;
bellard158142c2005-03-13 16:54:06 +00001720
1721 aSig = extractFloat32Frac( a );
1722 aExp = extractFloat32Exp( a );
1723 bSig = extractFloat32Frac( b );
1724 bExp = extractFloat32Exp( b );
1725 expDiff = aExp - bExp;
1726 aSig <<= 6;
1727 bSig <<= 6;
1728 if ( 0 < expDiff ) {
1729 if ( aExp == 0xFF ) {
1730 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1731 return a;
1732 }
1733 if ( bExp == 0 ) {
1734 --expDiff;
1735 }
1736 else {
1737 bSig |= 0x20000000;
1738 }
1739 shift32RightJamming( bSig, expDiff, &bSig );
1740 zExp = aExp;
1741 }
1742 else if ( expDiff < 0 ) {
1743 if ( bExp == 0xFF ) {
1744 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1745 return packFloat32( zSign, 0xFF, 0 );
1746 }
1747 if ( aExp == 0 ) {
1748 ++expDiff;
1749 }
1750 else {
1751 aSig |= 0x20000000;
1752 }
1753 shift32RightJamming( aSig, - expDiff, &aSig );
1754 zExp = bExp;
1755 }
1756 else {
1757 if ( aExp == 0xFF ) {
1758 if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1759 return a;
1760 }
pbrookfe76d972008-12-19 14:33:59 +00001761 if ( aExp == 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01001762 if (STATUS(flush_to_zero)) {
1763 if (aSig | bSig) {
1764 float_raise(float_flag_output_denormal STATUS_VAR);
1765 }
1766 return packFloat32(zSign, 0, 0);
1767 }
pbrookfe76d972008-12-19 14:33:59 +00001768 return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1769 }
bellard158142c2005-03-13 16:54:06 +00001770 zSig = 0x40000000 + aSig + bSig;
1771 zExp = aExp;
1772 goto roundAndPack;
1773 }
1774 aSig |= 0x20000000;
1775 zSig = ( aSig + bSig )<<1;
1776 --zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001777 if ( (int32_t) zSig < 0 ) {
bellard158142c2005-03-13 16:54:06 +00001778 zSig = aSig + bSig;
1779 ++zExp;
1780 }
1781 roundAndPack:
1782 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1783
1784}
1785
1786/*----------------------------------------------------------------------------
1787| Returns the result of subtracting the absolute values of the single-
1788| precision floating-point values `a' and `b'. If `zSign' is 1, the
1789| difference is negated before being returned. `zSign' is ignored if the
1790| result is a NaN. The subtraction is performed according to the IEC/IEEE
1791| Standard for Binary Floating-Point Arithmetic.
1792*----------------------------------------------------------------------------*/
1793
1794static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1795{
Andreas Färber94a49d82012-04-26 00:15:56 +02001796 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001797 uint32_t aSig, bSig, zSig;
Andreas Färber94a49d82012-04-26 00:15:56 +02001798 int_fast16_t expDiff;
bellard158142c2005-03-13 16:54:06 +00001799
1800 aSig = extractFloat32Frac( a );
1801 aExp = extractFloat32Exp( a );
1802 bSig = extractFloat32Frac( b );
1803 bExp = extractFloat32Exp( b );
1804 expDiff = aExp - bExp;
1805 aSig <<= 7;
1806 bSig <<= 7;
1807 if ( 0 < expDiff ) goto aExpBigger;
1808 if ( expDiff < 0 ) goto bExpBigger;
1809 if ( aExp == 0xFF ) {
1810 if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1811 float_raise( float_flag_invalid STATUS_VAR);
1812 return float32_default_nan;
1813 }
1814 if ( aExp == 0 ) {
1815 aExp = 1;
1816 bExp = 1;
1817 }
1818 if ( bSig < aSig ) goto aBigger;
1819 if ( aSig < bSig ) goto bBigger;
1820 return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1821 bExpBigger:
1822 if ( bExp == 0xFF ) {
1823 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1824 return packFloat32( zSign ^ 1, 0xFF, 0 );
1825 }
1826 if ( aExp == 0 ) {
1827 ++expDiff;
1828 }
1829 else {
1830 aSig |= 0x40000000;
1831 }
1832 shift32RightJamming( aSig, - expDiff, &aSig );
1833 bSig |= 0x40000000;
1834 bBigger:
1835 zSig = bSig - aSig;
1836 zExp = bExp;
1837 zSign ^= 1;
1838 goto normalizeRoundAndPack;
1839 aExpBigger:
1840 if ( aExp == 0xFF ) {
1841 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1842 return a;
1843 }
1844 if ( bExp == 0 ) {
1845 --expDiff;
1846 }
1847 else {
1848 bSig |= 0x40000000;
1849 }
1850 shift32RightJamming( bSig, expDiff, &bSig );
1851 aSig |= 0x40000000;
1852 aBigger:
1853 zSig = aSig - bSig;
1854 zExp = aExp;
1855 normalizeRoundAndPack:
1856 --zExp;
1857 return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1858
1859}
1860
1861/*----------------------------------------------------------------------------
1862| Returns the result of adding the single-precision floating-point values `a'
1863| and `b'. The operation is performed according to the IEC/IEEE Standard for
1864| Binary Floating-Point Arithmetic.
1865*----------------------------------------------------------------------------*/
1866
1867float32 float32_add( float32 a, float32 b STATUS_PARAM )
1868{
1869 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00001870 a = float32_squash_input_denormal(a STATUS_VAR);
1871 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001872
1873 aSign = extractFloat32Sign( a );
1874 bSign = extractFloat32Sign( b );
1875 if ( aSign == bSign ) {
1876 return addFloat32Sigs( a, b, aSign STATUS_VAR);
1877 }
1878 else {
1879 return subFloat32Sigs( a, b, aSign STATUS_VAR );
1880 }
1881
1882}
1883
1884/*----------------------------------------------------------------------------
1885| Returns the result of subtracting the single-precision floating-point values
1886| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1887| for Binary Floating-Point Arithmetic.
1888*----------------------------------------------------------------------------*/
1889
1890float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1891{
1892 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00001893 a = float32_squash_input_denormal(a STATUS_VAR);
1894 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001895
1896 aSign = extractFloat32Sign( a );
1897 bSign = extractFloat32Sign( b );
1898 if ( aSign == bSign ) {
1899 return subFloat32Sigs( a, b, aSign STATUS_VAR );
1900 }
1901 else {
1902 return addFloat32Sigs( a, b, aSign STATUS_VAR );
1903 }
1904
1905}
1906
1907/*----------------------------------------------------------------------------
1908| Returns the result of multiplying the single-precision floating-point values
1909| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1910| for Binary Floating-Point Arithmetic.
1911*----------------------------------------------------------------------------*/
1912
1913float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1914{
1915 flag aSign, bSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001916 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001917 uint32_t aSig, bSig;
1918 uint64_t zSig64;
1919 uint32_t zSig;
bellard158142c2005-03-13 16:54:06 +00001920
Peter Maydell37d18662011-01-06 19:37:53 +00001921 a = float32_squash_input_denormal(a STATUS_VAR);
1922 b = float32_squash_input_denormal(b STATUS_VAR);
1923
bellard158142c2005-03-13 16:54:06 +00001924 aSig = extractFloat32Frac( a );
1925 aExp = extractFloat32Exp( a );
1926 aSign = extractFloat32Sign( a );
1927 bSig = extractFloat32Frac( b );
1928 bExp = extractFloat32Exp( b );
1929 bSign = extractFloat32Sign( b );
1930 zSign = aSign ^ bSign;
1931 if ( aExp == 0xFF ) {
1932 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1933 return propagateFloat32NaN( a, b STATUS_VAR );
1934 }
1935 if ( ( bExp | bSig ) == 0 ) {
1936 float_raise( float_flag_invalid STATUS_VAR);
1937 return float32_default_nan;
1938 }
1939 return packFloat32( zSign, 0xFF, 0 );
1940 }
1941 if ( bExp == 0xFF ) {
1942 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1943 if ( ( aExp | aSig ) == 0 ) {
1944 float_raise( float_flag_invalid STATUS_VAR);
1945 return float32_default_nan;
1946 }
1947 return packFloat32( zSign, 0xFF, 0 );
1948 }
1949 if ( aExp == 0 ) {
1950 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1951 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1952 }
1953 if ( bExp == 0 ) {
1954 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1955 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1956 }
1957 zExp = aExp + bExp - 0x7F;
1958 aSig = ( aSig | 0x00800000 )<<7;
1959 bSig = ( bSig | 0x00800000 )<<8;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001960 shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
bellard158142c2005-03-13 16:54:06 +00001961 zSig = zSig64;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001962 if ( 0 <= (int32_t) ( zSig<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00001963 zSig <<= 1;
1964 --zExp;
1965 }
1966 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1967
1968}
1969
1970/*----------------------------------------------------------------------------
1971| Returns the result of dividing the single-precision floating-point value `a'
1972| by the corresponding value `b'. The operation is performed according to the
1973| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1974*----------------------------------------------------------------------------*/
1975
1976float32 float32_div( float32 a, float32 b STATUS_PARAM )
1977{
1978 flag aSign, bSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001979 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001980 uint32_t aSig, bSig, zSig;
Peter Maydell37d18662011-01-06 19:37:53 +00001981 a = float32_squash_input_denormal(a STATUS_VAR);
1982 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001983
1984 aSig = extractFloat32Frac( a );
1985 aExp = extractFloat32Exp( a );
1986 aSign = extractFloat32Sign( a );
1987 bSig = extractFloat32Frac( b );
1988 bExp = extractFloat32Exp( b );
1989 bSign = extractFloat32Sign( b );
1990 zSign = aSign ^ bSign;
1991 if ( aExp == 0xFF ) {
1992 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1993 if ( bExp == 0xFF ) {
1994 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1995 float_raise( float_flag_invalid STATUS_VAR);
1996 return float32_default_nan;
1997 }
1998 return packFloat32( zSign, 0xFF, 0 );
1999 }
2000 if ( bExp == 0xFF ) {
2001 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2002 return packFloat32( zSign, 0, 0 );
2003 }
2004 if ( bExp == 0 ) {
2005 if ( bSig == 0 ) {
2006 if ( ( aExp | aSig ) == 0 ) {
2007 float_raise( float_flag_invalid STATUS_VAR);
2008 return float32_default_nan;
2009 }
2010 float_raise( float_flag_divbyzero STATUS_VAR);
2011 return packFloat32( zSign, 0xFF, 0 );
2012 }
2013 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2014 }
2015 if ( aExp == 0 ) {
2016 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2017 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2018 }
2019 zExp = aExp - bExp + 0x7D;
2020 aSig = ( aSig | 0x00800000 )<<7;
2021 bSig = ( bSig | 0x00800000 )<<8;
2022 if ( bSig <= ( aSig + aSig ) ) {
2023 aSig >>= 1;
2024 ++zExp;
2025 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01002026 zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard158142c2005-03-13 16:54:06 +00002027 if ( ( zSig & 0x3F ) == 0 ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01002028 zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
bellard158142c2005-03-13 16:54:06 +00002029 }
2030 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2031
2032}
2033
2034/*----------------------------------------------------------------------------
2035| Returns the remainder of the single-precision floating-point value `a'
2036| with respect to the corresponding value `b'. The operation is performed
2037| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2038*----------------------------------------------------------------------------*/
2039
2040float32 float32_rem( float32 a, float32 b STATUS_PARAM )
2041{
Blue Swirled086f32010-03-07 13:49:58 +00002042 flag aSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002043 int_fast16_t aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002044 uint32_t aSig, bSig;
2045 uint32_t q;
2046 uint64_t aSig64, bSig64, q64;
2047 uint32_t alternateASig;
2048 int32_t sigMean;
Peter Maydell37d18662011-01-06 19:37:53 +00002049 a = float32_squash_input_denormal(a STATUS_VAR);
2050 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002051
2052 aSig = extractFloat32Frac( a );
2053 aExp = extractFloat32Exp( a );
2054 aSign = extractFloat32Sign( a );
2055 bSig = extractFloat32Frac( b );
2056 bExp = extractFloat32Exp( b );
bellard158142c2005-03-13 16:54:06 +00002057 if ( aExp == 0xFF ) {
2058 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2059 return propagateFloat32NaN( a, b STATUS_VAR );
2060 }
2061 float_raise( float_flag_invalid STATUS_VAR);
2062 return float32_default_nan;
2063 }
2064 if ( bExp == 0xFF ) {
2065 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2066 return a;
2067 }
2068 if ( bExp == 0 ) {
2069 if ( bSig == 0 ) {
2070 float_raise( float_flag_invalid STATUS_VAR);
2071 return float32_default_nan;
2072 }
2073 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2074 }
2075 if ( aExp == 0 ) {
2076 if ( aSig == 0 ) return a;
2077 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2078 }
2079 expDiff = aExp - bExp;
2080 aSig |= 0x00800000;
2081 bSig |= 0x00800000;
2082 if ( expDiff < 32 ) {
2083 aSig <<= 8;
2084 bSig <<= 8;
2085 if ( expDiff < 0 ) {
2086 if ( expDiff < -1 ) return a;
2087 aSig >>= 1;
2088 }
2089 q = ( bSig <= aSig );
2090 if ( q ) aSig -= bSig;
2091 if ( 0 < expDiff ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01002092 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard158142c2005-03-13 16:54:06 +00002093 q >>= 32 - expDiff;
2094 bSig >>= 2;
2095 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2096 }
2097 else {
2098 aSig >>= 2;
2099 bSig >>= 2;
2100 }
2101 }
2102 else {
2103 if ( bSig <= aSig ) aSig -= bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002104 aSig64 = ( (uint64_t) aSig )<<40;
2105 bSig64 = ( (uint64_t) bSig )<<40;
bellard158142c2005-03-13 16:54:06 +00002106 expDiff -= 64;
2107 while ( 0 < expDiff ) {
2108 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2109 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2110 aSig64 = - ( ( bSig * q64 )<<38 );
2111 expDiff -= 62;
2112 }
2113 expDiff += 64;
2114 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2115 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2116 q = q64>>( 64 - expDiff );
2117 bSig <<= 6;
2118 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2119 }
2120 do {
2121 alternateASig = aSig;
2122 ++q;
2123 aSig -= bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002124 } while ( 0 <= (int32_t) aSig );
bellard158142c2005-03-13 16:54:06 +00002125 sigMean = aSig + alternateASig;
2126 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2127 aSig = alternateASig;
2128 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01002129 zSign = ( (int32_t) aSig < 0 );
bellard158142c2005-03-13 16:54:06 +00002130 if ( zSign ) aSig = - aSig;
2131 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2132
2133}
2134
2135/*----------------------------------------------------------------------------
Peter Maydell369be8f2011-10-19 16:14:06 +00002136| Returns the result of multiplying the single-precision floating-point values
2137| `a' and `b' then adding 'c', with no intermediate rounding step after the
2138| multiplication. The operation is performed according to the IEC/IEEE
2139| Standard for Binary Floating-Point Arithmetic 754-2008.
2140| The flags argument allows the caller to select negation of the
2141| addend, the intermediate product, or the final result. (The difference
2142| between this and having the caller do a separate negation is that negating
2143| externally will flip the sign bit on NaNs.)
2144*----------------------------------------------------------------------------*/
2145
2146float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
2147{
2148 flag aSign, bSign, cSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002149 int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
Peter Maydell369be8f2011-10-19 16:14:06 +00002150 uint32_t aSig, bSig, cSig;
2151 flag pInf, pZero, pSign;
2152 uint64_t pSig64, cSig64, zSig64;
2153 uint32_t pSig;
2154 int shiftcount;
2155 flag signflip, infzero;
2156
2157 a = float32_squash_input_denormal(a STATUS_VAR);
2158 b = float32_squash_input_denormal(b STATUS_VAR);
2159 c = float32_squash_input_denormal(c STATUS_VAR);
2160 aSig = extractFloat32Frac(a);
2161 aExp = extractFloat32Exp(a);
2162 aSign = extractFloat32Sign(a);
2163 bSig = extractFloat32Frac(b);
2164 bExp = extractFloat32Exp(b);
2165 bSign = extractFloat32Sign(b);
2166 cSig = extractFloat32Frac(c);
2167 cExp = extractFloat32Exp(c);
2168 cSign = extractFloat32Sign(c);
2169
2170 infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
2171 (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
2172
2173 /* It is implementation-defined whether the cases of (0,inf,qnan)
2174 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
2175 * they return if they do), so we have to hand this information
2176 * off to the target-specific pick-a-NaN routine.
2177 */
2178 if (((aExp == 0xff) && aSig) ||
2179 ((bExp == 0xff) && bSig) ||
2180 ((cExp == 0xff) && cSig)) {
2181 return propagateFloat32MulAddNaN(a, b, c, infzero STATUS_VAR);
2182 }
2183
2184 if (infzero) {
2185 float_raise(float_flag_invalid STATUS_VAR);
2186 return float32_default_nan;
2187 }
2188
2189 if (flags & float_muladd_negate_c) {
2190 cSign ^= 1;
2191 }
2192
2193 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
2194
2195 /* Work out the sign and type of the product */
2196 pSign = aSign ^ bSign;
2197 if (flags & float_muladd_negate_product) {
2198 pSign ^= 1;
2199 }
2200 pInf = (aExp == 0xff) || (bExp == 0xff);
2201 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
2202
2203 if (cExp == 0xff) {
2204 if (pInf && (pSign ^ cSign)) {
2205 /* addition of opposite-signed infinities => InvalidOperation */
2206 float_raise(float_flag_invalid STATUS_VAR);
2207 return float32_default_nan;
2208 }
2209 /* Otherwise generate an infinity of the same sign */
2210 return packFloat32(cSign ^ signflip, 0xff, 0);
2211 }
2212
2213 if (pInf) {
2214 return packFloat32(pSign ^ signflip, 0xff, 0);
2215 }
2216
2217 if (pZero) {
2218 if (cExp == 0) {
2219 if (cSig == 0) {
2220 /* Adding two exact zeroes */
2221 if (pSign == cSign) {
2222 zSign = pSign;
2223 } else if (STATUS(float_rounding_mode) == float_round_down) {
2224 zSign = 1;
2225 } else {
2226 zSign = 0;
2227 }
2228 return packFloat32(zSign ^ signflip, 0, 0);
2229 }
2230 /* Exact zero plus a denorm */
2231 if (STATUS(flush_to_zero)) {
2232 float_raise(float_flag_output_denormal STATUS_VAR);
2233 return packFloat32(cSign ^ signflip, 0, 0);
2234 }
2235 }
2236 /* Zero plus something non-zero : just return the something */
Richard Sandiforda6e7c182013-01-22 17:03:05 +00002237 return packFloat32(cSign ^ signflip, cExp, cSig);
Peter Maydell369be8f2011-10-19 16:14:06 +00002238 }
2239
2240 if (aExp == 0) {
2241 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
2242 }
2243 if (bExp == 0) {
2244 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
2245 }
2246
2247 /* Calculate the actual result a * b + c */
2248
2249 /* Multiply first; this is easy. */
2250 /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
2251 * because we want the true exponent, not the "one-less-than"
2252 * flavour that roundAndPackFloat32() takes.
2253 */
2254 pExp = aExp + bExp - 0x7e;
2255 aSig = (aSig | 0x00800000) << 7;
2256 bSig = (bSig | 0x00800000) << 8;
2257 pSig64 = (uint64_t)aSig * bSig;
2258 if ((int64_t)(pSig64 << 1) >= 0) {
2259 pSig64 <<= 1;
2260 pExp--;
2261 }
2262
2263 zSign = pSign ^ signflip;
2264
2265 /* Now pSig64 is the significand of the multiply, with the explicit bit in
2266 * position 62.
2267 */
2268 if (cExp == 0) {
2269 if (!cSig) {
2270 /* Throw out the special case of c being an exact zero now */
2271 shift64RightJamming(pSig64, 32, &pSig64);
2272 pSig = pSig64;
2273 return roundAndPackFloat32(zSign, pExp - 1,
2274 pSig STATUS_VAR);
2275 }
2276 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2277 }
2278
2279 cSig64 = (uint64_t)cSig << (62 - 23);
2280 cSig64 |= LIT64(0x4000000000000000);
2281 expDiff = pExp - cExp;
2282
2283 if (pSign == cSign) {
2284 /* Addition */
2285 if (expDiff > 0) {
2286 /* scale c to match p */
2287 shift64RightJamming(cSig64, expDiff, &cSig64);
2288 zExp = pExp;
2289 } else if (expDiff < 0) {
2290 /* scale p to match c */
2291 shift64RightJamming(pSig64, -expDiff, &pSig64);
2292 zExp = cExp;
2293 } else {
2294 /* no scaling needed */
2295 zExp = cExp;
2296 }
2297 /* Add significands and make sure explicit bit ends up in posn 62 */
2298 zSig64 = pSig64 + cSig64;
2299 if ((int64_t)zSig64 < 0) {
2300 shift64RightJamming(zSig64, 1, &zSig64);
2301 } else {
2302 zExp--;
2303 }
2304 } else {
2305 /* Subtraction */
2306 if (expDiff > 0) {
2307 shift64RightJamming(cSig64, expDiff, &cSig64);
2308 zSig64 = pSig64 - cSig64;
2309 zExp = pExp;
2310 } else if (expDiff < 0) {
2311 shift64RightJamming(pSig64, -expDiff, &pSig64);
2312 zSig64 = cSig64 - pSig64;
2313 zExp = cExp;
2314 zSign ^= 1;
2315 } else {
2316 zExp = pExp;
2317 if (cSig64 < pSig64) {
2318 zSig64 = pSig64 - cSig64;
2319 } else if (pSig64 < cSig64) {
2320 zSig64 = cSig64 - pSig64;
2321 zSign ^= 1;
2322 } else {
2323 /* Exact zero */
2324 zSign = signflip;
2325 if (STATUS(float_rounding_mode) == float_round_down) {
2326 zSign ^= 1;
2327 }
2328 return packFloat32(zSign, 0, 0);
2329 }
2330 }
2331 --zExp;
2332 /* Normalize to put the explicit bit back into bit 62. */
2333 shiftcount = countLeadingZeros64(zSig64) - 1;
2334 zSig64 <<= shiftcount;
2335 zExp -= shiftcount;
2336 }
2337 shift64RightJamming(zSig64, 32, &zSig64);
2338 return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
2339}
2340
2341
2342/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002343| Returns the square root of the single-precision floating-point value `a'.
2344| The operation is performed according to the IEC/IEEE Standard for Binary
2345| Floating-Point Arithmetic.
2346*----------------------------------------------------------------------------*/
2347
2348float32 float32_sqrt( float32 a STATUS_PARAM )
2349{
2350 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002351 int_fast16_t aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002352 uint32_t aSig, zSig;
2353 uint64_t rem, term;
Peter Maydell37d18662011-01-06 19:37:53 +00002354 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002355
2356 aSig = extractFloat32Frac( a );
2357 aExp = extractFloat32Exp( a );
2358 aSign = extractFloat32Sign( a );
2359 if ( aExp == 0xFF ) {
pbrookf090c9d2007-11-18 14:33:24 +00002360 if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00002361 if ( ! aSign ) return a;
2362 float_raise( float_flag_invalid STATUS_VAR);
2363 return float32_default_nan;
2364 }
2365 if ( aSign ) {
2366 if ( ( aExp | aSig ) == 0 ) return a;
2367 float_raise( float_flag_invalid STATUS_VAR);
2368 return float32_default_nan;
2369 }
2370 if ( aExp == 0 ) {
pbrookf090c9d2007-11-18 14:33:24 +00002371 if ( aSig == 0 ) return float32_zero;
bellard158142c2005-03-13 16:54:06 +00002372 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2373 }
2374 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2375 aSig = ( aSig | 0x00800000 )<<8;
2376 zSig = estimateSqrt32( aExp, aSig ) + 2;
2377 if ( ( zSig & 0x7F ) <= 5 ) {
2378 if ( zSig < 2 ) {
2379 zSig = 0x7FFFFFFF;
2380 goto roundAndPack;
2381 }
2382 aSig >>= aExp & 1;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002383 term = ( (uint64_t) zSig ) * zSig;
2384 rem = ( ( (uint64_t) aSig )<<32 ) - term;
2385 while ( (int64_t) rem < 0 ) {
bellard158142c2005-03-13 16:54:06 +00002386 --zSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002387 rem += ( ( (uint64_t) zSig )<<1 ) | 1;
bellard158142c2005-03-13 16:54:06 +00002388 }
2389 zSig |= ( rem != 0 );
2390 }
2391 shift32RightJamming( zSig, 1, &zSig );
2392 roundAndPack:
2393 return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2394
2395}
2396
2397/*----------------------------------------------------------------------------
Aurelien Jarno8229c992009-02-05 12:04:05 +01002398| Returns the binary exponential of the single-precision floating-point value
2399| `a'. The operation is performed according to the IEC/IEEE Standard for
2400| Binary Floating-Point Arithmetic.
2401|
2402| Uses the following identities:
2403|
2404| 1. -------------------------------------------------------------------------
2405| x x*ln(2)
2406| 2 = e
2407|
2408| 2. -------------------------------------------------------------------------
2409| 2 3 4 5 n
2410| x x x x x x x
2411| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
2412| 1! 2! 3! 4! 5! n!
2413*----------------------------------------------------------------------------*/
2414
2415static const float64 float32_exp2_coefficients[15] =
2416{
Peter Maydelld5138cf2011-02-10 13:59:34 +00002417 const_float64( 0x3ff0000000000000ll ), /* 1 */
2418 const_float64( 0x3fe0000000000000ll ), /* 2 */
2419 const_float64( 0x3fc5555555555555ll ), /* 3 */
2420 const_float64( 0x3fa5555555555555ll ), /* 4 */
2421 const_float64( 0x3f81111111111111ll ), /* 5 */
2422 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
2423 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
2424 const_float64( 0x3efa01a01a01a01all ), /* 8 */
2425 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
2426 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
2427 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
2428 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
2429 const_float64( 0x3de6124613a86d09ll ), /* 13 */
2430 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
2431 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
Aurelien Jarno8229c992009-02-05 12:04:05 +01002432};
2433
2434float32 float32_exp2( float32 a STATUS_PARAM )
2435{
2436 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002437 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002438 uint32_t aSig;
Aurelien Jarno8229c992009-02-05 12:04:05 +01002439 float64 r, x, xn;
2440 int i;
Peter Maydell37d18662011-01-06 19:37:53 +00002441 a = float32_squash_input_denormal(a STATUS_VAR);
Aurelien Jarno8229c992009-02-05 12:04:05 +01002442
2443 aSig = extractFloat32Frac( a );
2444 aExp = extractFloat32Exp( a );
2445 aSign = extractFloat32Sign( a );
2446
2447 if ( aExp == 0xFF) {
2448 if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2449 return (aSign) ? float32_zero : a;
2450 }
2451 if (aExp == 0) {
2452 if (aSig == 0) return float32_one;
2453 }
2454
2455 float_raise( float_flag_inexact STATUS_VAR);
2456
2457 /* ******************************* */
2458 /* using float64 for approximation */
2459 /* ******************************* */
2460 x = float32_to_float64(a STATUS_VAR);
2461 x = float64_mul(x, float64_ln2 STATUS_VAR);
2462
2463 xn = x;
2464 r = float64_one;
2465 for (i = 0 ; i < 15 ; i++) {
2466 float64 f;
2467
2468 f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
2469 r = float64_add(r, f STATUS_VAR);
2470
2471 xn = float64_mul(xn, x STATUS_VAR);
2472 }
2473
2474 return float64_to_float32(r, status);
2475}
2476
2477/*----------------------------------------------------------------------------
aurel32374dfc32009-02-05 13:42:47 +00002478| Returns the binary log of the single-precision floating-point value `a'.
2479| The operation is performed according to the IEC/IEEE Standard for Binary
2480| Floating-Point Arithmetic.
2481*----------------------------------------------------------------------------*/
2482float32 float32_log2( float32 a STATUS_PARAM )
2483{
2484 flag aSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002485 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002486 uint32_t aSig, zSig, i;
aurel32374dfc32009-02-05 13:42:47 +00002487
Peter Maydell37d18662011-01-06 19:37:53 +00002488 a = float32_squash_input_denormal(a STATUS_VAR);
aurel32374dfc32009-02-05 13:42:47 +00002489 aSig = extractFloat32Frac( a );
2490 aExp = extractFloat32Exp( a );
2491 aSign = extractFloat32Sign( a );
2492
2493 if ( aExp == 0 ) {
2494 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2495 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2496 }
2497 if ( aSign ) {
2498 float_raise( float_flag_invalid STATUS_VAR);
2499 return float32_default_nan;
2500 }
2501 if ( aExp == 0xFF ) {
2502 if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2503 return a;
2504 }
2505
2506 aExp -= 0x7F;
2507 aSig |= 0x00800000;
2508 zSign = aExp < 0;
2509 zSig = aExp << 23;
2510
2511 for (i = 1 << 22; i > 0; i >>= 1) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01002512 aSig = ( (uint64_t)aSig * aSig ) >> 23;
aurel32374dfc32009-02-05 13:42:47 +00002513 if ( aSig & 0x01000000 ) {
2514 aSig >>= 1;
2515 zSig |= i;
2516 }
2517 }
2518
2519 if ( zSign )
2520 zSig = -zSig;
2521
2522 return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2523}
2524
2525/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002526| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarnob6893622011-04-14 00:49:29 +02002527| the corresponding value `b', and 0 otherwise. The invalid exception is
2528| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard158142c2005-03-13 16:54:06 +00002529| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2530*----------------------------------------------------------------------------*/
2531
Aurelien Jarnob6893622011-04-14 00:49:29 +02002532int float32_eq( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002533{
Aurelien Jarnob6893622011-04-14 00:49:29 +02002534 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002535 a = float32_squash_input_denormal(a STATUS_VAR);
2536 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002537
2538 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2539 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2540 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02002541 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002542 return 0;
2543 }
Aurelien Jarnob6893622011-04-14 00:49:29 +02002544 av = float32_val(a);
2545 bv = float32_val(b);
2546 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00002547}
2548
2549/*----------------------------------------------------------------------------
2550| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002551| or equal to the corresponding value `b', and 0 otherwise. The invalid
2552| exception is raised if either operand is a NaN. The comparison is performed
2553| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00002554*----------------------------------------------------------------------------*/
2555
bellard750afe92006-10-28 19:27:11 +00002556int float32_le( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002557{
2558 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002559 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002560 a = float32_squash_input_denormal(a STATUS_VAR);
2561 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002562
2563 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2564 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2565 ) {
2566 float_raise( float_flag_invalid STATUS_VAR);
2567 return 0;
2568 }
2569 aSign = extractFloat32Sign( a );
2570 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002571 av = float32_val(a);
2572 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002573 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002574 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002575
2576}
2577
2578/*----------------------------------------------------------------------------
2579| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002580| the corresponding value `b', and 0 otherwise. The invalid exception is
2581| raised if either operand is a NaN. The comparison is performed according
2582| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00002583*----------------------------------------------------------------------------*/
2584
bellard750afe92006-10-28 19:27:11 +00002585int float32_lt( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002586{
2587 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002588 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002589 a = float32_squash_input_denormal(a STATUS_VAR);
2590 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002591
2592 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2593 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2594 ) {
2595 float_raise( float_flag_invalid STATUS_VAR);
2596 return 0;
2597 }
2598 aSign = extractFloat32Sign( a );
2599 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002600 av = float32_val(a);
2601 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002602 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002603 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002604
2605}
2606
2607/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02002608| Returns 1 if the single-precision floating-point values `a' and `b' cannot
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002609| be compared, and 0 otherwise. The invalid exception is raised if either
2610| operand is a NaN. The comparison is performed according to the IEC/IEEE
2611| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02002612*----------------------------------------------------------------------------*/
2613
2614int float32_unordered( float32 a, float32 b STATUS_PARAM )
2615{
2616 a = float32_squash_input_denormal(a STATUS_VAR);
2617 b = float32_squash_input_denormal(b STATUS_VAR);
2618
2619 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2620 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2621 ) {
2622 float_raise( float_flag_invalid STATUS_VAR);
2623 return 1;
2624 }
2625 return 0;
2626}
Aurelien Jarnob6893622011-04-14 00:49:29 +02002627
Aurelien Jarno67b78612011-04-14 00:49:29 +02002628/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002629| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002630| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2631| exception. The comparison is performed according to the IEC/IEEE Standard
2632| for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00002633*----------------------------------------------------------------------------*/
2634
Aurelien Jarnob6893622011-04-14 00:49:29 +02002635int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002636{
Peter Maydell37d18662011-01-06 19:37:53 +00002637 a = float32_squash_input_denormal(a STATUS_VAR);
2638 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002639
2640 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2641 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2642 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02002643 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2644 float_raise( float_flag_invalid STATUS_VAR);
2645 }
bellard158142c2005-03-13 16:54:06 +00002646 return 0;
2647 }
Aurelien Jarnob6893622011-04-14 00:49:29 +02002648 return ( float32_val(a) == float32_val(b) ) ||
2649 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00002650}
2651
2652/*----------------------------------------------------------------------------
2653| Returns 1 if the single-precision floating-point value `a' is less than or
2654| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2655| cause an exception. Otherwise, the comparison is performed according to the
2656| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2657*----------------------------------------------------------------------------*/
2658
bellard750afe92006-10-28 19:27:11 +00002659int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002660{
2661 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002662 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002663 a = float32_squash_input_denormal(a STATUS_VAR);
2664 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002665
2666 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2667 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2668 ) {
2669 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2670 float_raise( float_flag_invalid STATUS_VAR);
2671 }
2672 return 0;
2673 }
2674 aSign = extractFloat32Sign( a );
2675 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002676 av = float32_val(a);
2677 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002678 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002679 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002680
2681}
2682
2683/*----------------------------------------------------------------------------
2684| Returns 1 if the single-precision floating-point value `a' is less than
2685| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2686| exception. Otherwise, the comparison is performed according to the IEC/IEEE
2687| Standard for Binary Floating-Point Arithmetic.
2688*----------------------------------------------------------------------------*/
2689
bellard750afe92006-10-28 19:27:11 +00002690int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002691{
2692 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002693 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002694 a = float32_squash_input_denormal(a STATUS_VAR);
2695 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002696
2697 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2698 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2699 ) {
2700 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2701 float_raise( float_flag_invalid STATUS_VAR);
2702 }
2703 return 0;
2704 }
2705 aSign = extractFloat32Sign( a );
2706 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002707 av = float32_val(a);
2708 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002709 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002710 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002711
2712}
2713
2714/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02002715| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2716| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
2717| comparison is performed according to the IEC/IEEE Standard for Binary
2718| Floating-Point Arithmetic.
2719*----------------------------------------------------------------------------*/
2720
2721int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
2722{
2723 a = float32_squash_input_denormal(a STATUS_VAR);
2724 b = float32_squash_input_denormal(b STATUS_VAR);
2725
2726 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2727 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2728 ) {
2729 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2730 float_raise( float_flag_invalid STATUS_VAR);
2731 }
2732 return 1;
2733 }
2734 return 0;
2735}
2736
2737/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002738| Returns the result of converting the double-precision floating-point value
2739| `a' to the 32-bit two's complement integer format. The conversion is
2740| performed according to the IEC/IEEE Standard for Binary Floating-Point
2741| Arithmetic---which means in particular that the conversion is rounded
2742| according to the current rounding mode. If `a' is a NaN, the largest
2743| positive integer is returned. Otherwise, if the conversion overflows, the
2744| largest integer with the same sign as `a' is returned.
2745*----------------------------------------------------------------------------*/
2746
2747int32 float64_to_int32( float64 a STATUS_PARAM )
2748{
2749 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002750 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002751 uint64_t aSig;
Peter Maydell37d18662011-01-06 19:37:53 +00002752 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002753
2754 aSig = extractFloat64Frac( a );
2755 aExp = extractFloat64Exp( a );
2756 aSign = extractFloat64Sign( a );
2757 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2758 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2759 shiftCount = 0x42C - aExp;
2760 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2761 return roundAndPackInt32( aSign, aSig STATUS_VAR );
2762
2763}
2764
2765/*----------------------------------------------------------------------------
2766| Returns the result of converting the double-precision floating-point value
2767| `a' to the 32-bit two's complement integer format. The conversion is
2768| performed according to the IEC/IEEE Standard for Binary Floating-Point
2769| Arithmetic, except that the conversion is always rounded toward zero.
2770| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2771| the conversion overflows, the largest integer with the same sign as `a' is
2772| returned.
2773*----------------------------------------------------------------------------*/
2774
2775int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2776{
2777 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002778 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002779 uint64_t aSig, savedASig;
Peter Maydellb3a6a2e2012-04-05 19:12:34 +01002780 int32_t z;
Peter Maydell37d18662011-01-06 19:37:53 +00002781 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002782
2783 aSig = extractFloat64Frac( a );
2784 aExp = extractFloat64Exp( a );
2785 aSign = extractFloat64Sign( a );
2786 if ( 0x41E < aExp ) {
2787 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2788 goto invalid;
2789 }
2790 else if ( aExp < 0x3FF ) {
2791 if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2792 return 0;
2793 }
2794 aSig |= LIT64( 0x0010000000000000 );
2795 shiftCount = 0x433 - aExp;
2796 savedASig = aSig;
2797 aSig >>= shiftCount;
2798 z = aSig;
2799 if ( aSign ) z = - z;
2800 if ( ( z < 0 ) ^ aSign ) {
2801 invalid:
2802 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002803 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +00002804 }
2805 if ( ( aSig<<shiftCount ) != savedASig ) {
2806 STATUS(float_exception_flags) |= float_flag_inexact;
2807 }
2808 return z;
2809
2810}
2811
2812/*----------------------------------------------------------------------------
2813| Returns the result of converting the double-precision floating-point value
Peter Maydellcbcef452010-12-07 15:37:34 +00002814| `a' to the 16-bit two's complement integer format. The conversion is
2815| performed according to the IEC/IEEE Standard for Binary Floating-Point
2816| Arithmetic, except that the conversion is always rounded toward zero.
2817| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2818| the conversion overflows, the largest integer with the same sign as `a' is
2819| returned.
2820*----------------------------------------------------------------------------*/
2821
Andreas Färber94a49d82012-04-26 00:15:56 +02002822int_fast16_t float64_to_int16_round_to_zero(float64 a STATUS_PARAM)
Peter Maydellcbcef452010-12-07 15:37:34 +00002823{
2824 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002825 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002826 uint64_t aSig, savedASig;
Peter Maydellcbcef452010-12-07 15:37:34 +00002827 int32 z;
2828
2829 aSig = extractFloat64Frac( a );
2830 aExp = extractFloat64Exp( a );
2831 aSign = extractFloat64Sign( a );
2832 if ( 0x40E < aExp ) {
2833 if ( ( aExp == 0x7FF ) && aSig ) {
2834 aSign = 0;
2835 }
2836 goto invalid;
2837 }
2838 else if ( aExp < 0x3FF ) {
2839 if ( aExp || aSig ) {
2840 STATUS(float_exception_flags) |= float_flag_inexact;
2841 }
2842 return 0;
2843 }
2844 aSig |= LIT64( 0x0010000000000000 );
2845 shiftCount = 0x433 - aExp;
2846 savedASig = aSig;
2847 aSig >>= shiftCount;
2848 z = aSig;
2849 if ( aSign ) {
2850 z = - z;
2851 }
2852 if ( ( (int16_t)z < 0 ) ^ aSign ) {
2853 invalid:
2854 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002855 return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
Peter Maydellcbcef452010-12-07 15:37:34 +00002856 }
2857 if ( ( aSig<<shiftCount ) != savedASig ) {
2858 STATUS(float_exception_flags) |= float_flag_inexact;
2859 }
2860 return z;
2861}
2862
2863/*----------------------------------------------------------------------------
2864| Returns the result of converting the double-precision floating-point value
bellard158142c2005-03-13 16:54:06 +00002865| `a' to the 64-bit two's complement integer format. The conversion is
2866| performed according to the IEC/IEEE Standard for Binary Floating-Point
2867| Arithmetic---which means in particular that the conversion is rounded
2868| according to the current rounding mode. If `a' is a NaN, the largest
2869| positive integer is returned. Otherwise, if the conversion overflows, the
2870| largest integer with the same sign as `a' is returned.
2871*----------------------------------------------------------------------------*/
2872
2873int64 float64_to_int64( float64 a STATUS_PARAM )
2874{
2875 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002876 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002877 uint64_t aSig, aSigExtra;
Peter Maydell37d18662011-01-06 19:37:53 +00002878 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002879
2880 aSig = extractFloat64Frac( a );
2881 aExp = extractFloat64Exp( a );
2882 aSign = extractFloat64Sign( a );
2883 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2884 shiftCount = 0x433 - aExp;
2885 if ( shiftCount <= 0 ) {
2886 if ( 0x43E < aExp ) {
2887 float_raise( float_flag_invalid STATUS_VAR);
2888 if ( ! aSign
2889 || ( ( aExp == 0x7FF )
2890 && ( aSig != LIT64( 0x0010000000000000 ) ) )
2891 ) {
2892 return LIT64( 0x7FFFFFFFFFFFFFFF );
2893 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01002894 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00002895 }
2896 aSigExtra = 0;
2897 aSig <<= - shiftCount;
2898 }
2899 else {
2900 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2901 }
2902 return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2903
2904}
2905
2906/*----------------------------------------------------------------------------
2907| Returns the result of converting the double-precision floating-point value
2908| `a' to the 64-bit two's complement integer format. The conversion is
2909| performed according to the IEC/IEEE Standard for Binary Floating-Point
2910| Arithmetic, except that the conversion is always rounded toward zero.
2911| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2912| the conversion overflows, the largest integer with the same sign as `a' is
2913| returned.
2914*----------------------------------------------------------------------------*/
2915
2916int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2917{
2918 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002919 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002920 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00002921 int64 z;
Peter Maydell37d18662011-01-06 19:37:53 +00002922 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002923
2924 aSig = extractFloat64Frac( a );
2925 aExp = extractFloat64Exp( a );
2926 aSign = extractFloat64Sign( a );
2927 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2928 shiftCount = aExp - 0x433;
2929 if ( 0 <= shiftCount ) {
2930 if ( 0x43E <= aExp ) {
pbrookf090c9d2007-11-18 14:33:24 +00002931 if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
bellard158142c2005-03-13 16:54:06 +00002932 float_raise( float_flag_invalid STATUS_VAR);
2933 if ( ! aSign
2934 || ( ( aExp == 0x7FF )
2935 && ( aSig != LIT64( 0x0010000000000000 ) ) )
2936 ) {
2937 return LIT64( 0x7FFFFFFFFFFFFFFF );
2938 }
2939 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01002940 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00002941 }
2942 z = aSig<<shiftCount;
2943 }
2944 else {
2945 if ( aExp < 0x3FE ) {
2946 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2947 return 0;
2948 }
2949 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01002950 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00002951 STATUS(float_exception_flags) |= float_flag_inexact;
2952 }
2953 }
2954 if ( aSign ) z = - z;
2955 return z;
2956
2957}
2958
2959/*----------------------------------------------------------------------------
2960| Returns the result of converting the double-precision floating-point value
2961| `a' to the single-precision floating-point format. The conversion is
2962| performed according to the IEC/IEEE Standard for Binary Floating-Point
2963| Arithmetic.
2964*----------------------------------------------------------------------------*/
2965
2966float32 float64_to_float32( float64 a STATUS_PARAM )
2967{
2968 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002969 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002970 uint64_t aSig;
2971 uint32_t zSig;
Peter Maydell37d18662011-01-06 19:37:53 +00002972 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002973
2974 aSig = extractFloat64Frac( a );
2975 aExp = extractFloat64Exp( a );
2976 aSign = extractFloat64Sign( a );
2977 if ( aExp == 0x7FF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00002978 if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00002979 return packFloat32( aSign, 0xFF, 0 );
2980 }
2981 shift64RightJamming( aSig, 22, &aSig );
2982 zSig = aSig;
2983 if ( aExp || zSig ) {
2984 zSig |= 0x40000000;
2985 aExp -= 0x381;
2986 }
2987 return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2988
2989}
2990
Paul Brook60011492009-11-19 16:45:20 +00002991
2992/*----------------------------------------------------------------------------
2993| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2994| half-precision floating-point value, returning the result. After being
2995| shifted into the proper positions, the three fields are simply added
2996| together to form the result. This means that any integer portion of `zSig'
2997| will be added into the exponent. Since a properly normalized significand
2998| will have an integer portion equal to 1, the `zExp' input should be 1 less
2999| than the desired result exponent whenever `zSig' is a complete, normalized
3000| significand.
3001*----------------------------------------------------------------------------*/
Andreas Färber94a49d82012-04-26 00:15:56 +02003002static float16 packFloat16(flag zSign, int_fast16_t zExp, uint16_t zSig)
Paul Brook60011492009-11-19 16:45:20 +00003003{
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00003004 return make_float16(
Andreas Färberbb98fe42011-03-07 01:34:06 +01003005 (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
Paul Brook60011492009-11-19 16:45:20 +00003006}
3007
3008/* Half precision floats come in two formats: standard IEEE and "ARM" format.
3009 The latter gains extra exponent range by omitting the NaN/Inf encodings. */
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00003010
3011float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
Paul Brook60011492009-11-19 16:45:20 +00003012{
3013 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003014 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003015 uint32_t aSig;
Paul Brook60011492009-11-19 16:45:20 +00003016
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00003017 aSign = extractFloat16Sign(a);
3018 aExp = extractFloat16Exp(a);
3019 aSig = extractFloat16Frac(a);
Paul Brook60011492009-11-19 16:45:20 +00003020
3021 if (aExp == 0x1f && ieee) {
3022 if (aSig) {
Peter Maydellf591e1b2011-02-10 11:28:59 +00003023 return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
Paul Brook60011492009-11-19 16:45:20 +00003024 }
Peter Maydell4be8eea2012-09-24 17:28:35 +01003025 return packFloat32(aSign, 0xff, 0);
Paul Brook60011492009-11-19 16:45:20 +00003026 }
3027 if (aExp == 0) {
3028 int8 shiftCount;
3029
3030 if (aSig == 0) {
3031 return packFloat32(aSign, 0, 0);
3032 }
3033
3034 shiftCount = countLeadingZeros32( aSig ) - 21;
3035 aSig = aSig << shiftCount;
3036 aExp = -shiftCount;
3037 }
3038 return packFloat32( aSign, aExp + 0x70, aSig << 13);
3039}
3040
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00003041float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
Paul Brook60011492009-11-19 16:45:20 +00003042{
3043 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003044 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003045 uint32_t aSig;
3046 uint32_t mask;
3047 uint32_t increment;
Paul Brook60011492009-11-19 16:45:20 +00003048 int8 roundingMode;
Peter Maydell37d18662011-01-06 19:37:53 +00003049 a = float32_squash_input_denormal(a STATUS_VAR);
Paul Brook60011492009-11-19 16:45:20 +00003050
3051 aSig = extractFloat32Frac( a );
3052 aExp = extractFloat32Exp( a );
3053 aSign = extractFloat32Sign( a );
3054 if ( aExp == 0xFF ) {
3055 if (aSig) {
Peter Maydell600e30d2011-02-10 11:28:58 +00003056 /* Input is a NaN */
3057 float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3058 if (!ieee) {
3059 return packFloat16(aSign, 0, 0);
3060 }
3061 return r;
Paul Brook60011492009-11-19 16:45:20 +00003062 }
Peter Maydell600e30d2011-02-10 11:28:58 +00003063 /* Infinity */
3064 if (!ieee) {
3065 float_raise(float_flag_invalid STATUS_VAR);
3066 return packFloat16(aSign, 0x1f, 0x3ff);
3067 }
3068 return packFloat16(aSign, 0x1f, 0);
Paul Brook60011492009-11-19 16:45:20 +00003069 }
Peter Maydell600e30d2011-02-10 11:28:58 +00003070 if (aExp == 0 && aSig == 0) {
Paul Brook60011492009-11-19 16:45:20 +00003071 return packFloat16(aSign, 0, 0);
3072 }
3073 /* Decimal point between bits 22 and 23. */
3074 aSig |= 0x00800000;
3075 aExp -= 0x7f;
3076 if (aExp < -14) {
Peter Maydell600e30d2011-02-10 11:28:58 +00003077 mask = 0x00ffffff;
3078 if (aExp >= -24) {
3079 mask >>= 25 + aExp;
Paul Brook60011492009-11-19 16:45:20 +00003080 }
3081 } else {
3082 mask = 0x00001fff;
3083 }
3084 if (aSig & mask) {
3085 float_raise( float_flag_underflow STATUS_VAR );
3086 roundingMode = STATUS(float_rounding_mode);
3087 switch (roundingMode) {
3088 case float_round_nearest_even:
3089 increment = (mask + 1) >> 1;
3090 if ((aSig & mask) == increment) {
3091 increment = aSig & (increment << 1);
3092 }
3093 break;
3094 case float_round_up:
3095 increment = aSign ? 0 : mask;
3096 break;
3097 case float_round_down:
3098 increment = aSign ? mask : 0;
3099 break;
3100 default: /* round_to_zero */
3101 increment = 0;
3102 break;
3103 }
3104 aSig += increment;
3105 if (aSig >= 0x01000000) {
3106 aSig >>= 1;
3107 aExp++;
3108 }
3109 } else if (aExp < -14
3110 && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
3111 float_raise( float_flag_underflow STATUS_VAR);
3112 }
3113
3114 if (ieee) {
3115 if (aExp > 15) {
3116 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
3117 return packFloat16(aSign, 0x1f, 0);
3118 }
3119 } else {
3120 if (aExp > 16) {
Peter Maydell600e30d2011-02-10 11:28:58 +00003121 float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
Paul Brook60011492009-11-19 16:45:20 +00003122 return packFloat16(aSign, 0x1f, 0x3ff);
3123 }
3124 }
3125 if (aExp < -24) {
3126 return packFloat16(aSign, 0, 0);
3127 }
3128 if (aExp < -14) {
3129 aSig >>= -14 - aExp;
3130 aExp = -14;
3131 }
3132 return packFloat16(aSign, aExp + 14, aSig >> 13);
3133}
3134
bellard158142c2005-03-13 16:54:06 +00003135/*----------------------------------------------------------------------------
3136| Returns the result of converting the double-precision floating-point value
3137| `a' to the extended double-precision floating-point format. The conversion
3138| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3139| Arithmetic.
3140*----------------------------------------------------------------------------*/
3141
3142floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
3143{
3144 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003145 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003146 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00003147
Peter Maydell37d18662011-01-06 19:37:53 +00003148 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003149 aSig = extractFloat64Frac( a );
3150 aExp = extractFloat64Exp( a );
3151 aSign = extractFloat64Sign( a );
3152 if ( aExp == 0x7FF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00003153 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00003154 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3155 }
3156 if ( aExp == 0 ) {
3157 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3158 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3159 }
3160 return
3161 packFloatx80(
3162 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3163
3164}
3165
bellard158142c2005-03-13 16:54:06 +00003166/*----------------------------------------------------------------------------
3167| Returns the result of converting the double-precision floating-point value
3168| `a' to the quadruple-precision floating-point format. The conversion is
3169| performed according to the IEC/IEEE Standard for Binary Floating-Point
3170| Arithmetic.
3171*----------------------------------------------------------------------------*/
3172
3173float128 float64_to_float128( float64 a STATUS_PARAM )
3174{
3175 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003176 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003177 uint64_t aSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00003178
Peter Maydell37d18662011-01-06 19:37:53 +00003179 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003180 aSig = extractFloat64Frac( a );
3181 aExp = extractFloat64Exp( a );
3182 aSign = extractFloat64Sign( a );
3183 if ( aExp == 0x7FF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00003184 if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00003185 return packFloat128( aSign, 0x7FFF, 0, 0 );
3186 }
3187 if ( aExp == 0 ) {
3188 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3189 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3190 --aExp;
3191 }
3192 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3193 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3194
3195}
3196
bellard158142c2005-03-13 16:54:06 +00003197/*----------------------------------------------------------------------------
3198| Rounds the double-precision floating-point value `a' to an integer, and
3199| returns the result as a double-precision floating-point value. The
3200| operation is performed according to the IEC/IEEE Standard for Binary
3201| Floating-Point Arithmetic.
3202*----------------------------------------------------------------------------*/
3203
3204float64 float64_round_to_int( float64 a STATUS_PARAM )
3205{
3206 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003207 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003208 uint64_t lastBitMask, roundBitsMask;
bellard158142c2005-03-13 16:54:06 +00003209 int8 roundingMode;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003210 uint64_t z;
Peter Maydell37d18662011-01-06 19:37:53 +00003211 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003212
3213 aExp = extractFloat64Exp( a );
3214 if ( 0x433 <= aExp ) {
3215 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3216 return propagateFloat64NaN( a, a STATUS_VAR );
3217 }
3218 return a;
3219 }
3220 if ( aExp < 0x3FF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01003221 if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00003222 STATUS(float_exception_flags) |= float_flag_inexact;
3223 aSign = extractFloat64Sign( a );
3224 switch ( STATUS(float_rounding_mode) ) {
3225 case float_round_nearest_even:
3226 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3227 return packFloat64( aSign, 0x3FF, 0 );
3228 }
3229 break;
3230 case float_round_down:
pbrookf090c9d2007-11-18 14:33:24 +00003231 return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
bellard158142c2005-03-13 16:54:06 +00003232 case float_round_up:
pbrookf090c9d2007-11-18 14:33:24 +00003233 return make_float64(
3234 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
bellard158142c2005-03-13 16:54:06 +00003235 }
3236 return packFloat64( aSign, 0, 0 );
3237 }
3238 lastBitMask = 1;
3239 lastBitMask <<= 0x433 - aExp;
3240 roundBitsMask = lastBitMask - 1;
pbrookf090c9d2007-11-18 14:33:24 +00003241 z = float64_val(a);
bellard158142c2005-03-13 16:54:06 +00003242 roundingMode = STATUS(float_rounding_mode);
3243 if ( roundingMode == float_round_nearest_even ) {
3244 z += lastBitMask>>1;
3245 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
3246 }
3247 else if ( roundingMode != float_round_to_zero ) {
pbrookf090c9d2007-11-18 14:33:24 +00003248 if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard158142c2005-03-13 16:54:06 +00003249 z += roundBitsMask;
3250 }
3251 }
3252 z &= ~ roundBitsMask;
pbrookf090c9d2007-11-18 14:33:24 +00003253 if ( z != float64_val(a) )
3254 STATUS(float_exception_flags) |= float_flag_inexact;
3255 return make_float64(z);
bellard158142c2005-03-13 16:54:06 +00003256
3257}
3258
pbrooke6e59062006-10-22 00:18:54 +00003259float64 float64_trunc_to_int( float64 a STATUS_PARAM)
3260{
3261 int oldmode;
3262 float64 res;
3263 oldmode = STATUS(float_rounding_mode);
3264 STATUS(float_rounding_mode) = float_round_to_zero;
3265 res = float64_round_to_int(a STATUS_VAR);
3266 STATUS(float_rounding_mode) = oldmode;
3267 return res;
3268}
3269
bellard158142c2005-03-13 16:54:06 +00003270/*----------------------------------------------------------------------------
3271| Returns the result of adding the absolute values of the double-precision
3272| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
3273| before being returned. `zSign' is ignored if the result is a NaN.
3274| The addition is performed according to the IEC/IEEE Standard for Binary
3275| Floating-Point Arithmetic.
3276*----------------------------------------------------------------------------*/
3277
3278static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3279{
Andreas Färber94a49d82012-04-26 00:15:56 +02003280 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003281 uint64_t aSig, bSig, zSig;
Andreas Färber94a49d82012-04-26 00:15:56 +02003282 int_fast16_t expDiff;
bellard158142c2005-03-13 16:54:06 +00003283
3284 aSig = extractFloat64Frac( a );
3285 aExp = extractFloat64Exp( a );
3286 bSig = extractFloat64Frac( b );
3287 bExp = extractFloat64Exp( b );
3288 expDiff = aExp - bExp;
3289 aSig <<= 9;
3290 bSig <<= 9;
3291 if ( 0 < expDiff ) {
3292 if ( aExp == 0x7FF ) {
3293 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3294 return a;
3295 }
3296 if ( bExp == 0 ) {
3297 --expDiff;
3298 }
3299 else {
3300 bSig |= LIT64( 0x2000000000000000 );
3301 }
3302 shift64RightJamming( bSig, expDiff, &bSig );
3303 zExp = aExp;
3304 }
3305 else if ( expDiff < 0 ) {
3306 if ( bExp == 0x7FF ) {
3307 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3308 return packFloat64( zSign, 0x7FF, 0 );
3309 }
3310 if ( aExp == 0 ) {
3311 ++expDiff;
3312 }
3313 else {
3314 aSig |= LIT64( 0x2000000000000000 );
3315 }
3316 shift64RightJamming( aSig, - expDiff, &aSig );
3317 zExp = bExp;
3318 }
3319 else {
3320 if ( aExp == 0x7FF ) {
3321 if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3322 return a;
3323 }
pbrookfe76d972008-12-19 14:33:59 +00003324 if ( aExp == 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01003325 if (STATUS(flush_to_zero)) {
3326 if (aSig | bSig) {
3327 float_raise(float_flag_output_denormal STATUS_VAR);
3328 }
3329 return packFloat64(zSign, 0, 0);
3330 }
pbrookfe76d972008-12-19 14:33:59 +00003331 return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3332 }
bellard158142c2005-03-13 16:54:06 +00003333 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3334 zExp = aExp;
3335 goto roundAndPack;
3336 }
3337 aSig |= LIT64( 0x2000000000000000 );
3338 zSig = ( aSig + bSig )<<1;
3339 --zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003340 if ( (int64_t) zSig < 0 ) {
bellard158142c2005-03-13 16:54:06 +00003341 zSig = aSig + bSig;
3342 ++zExp;
3343 }
3344 roundAndPack:
3345 return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3346
3347}
3348
3349/*----------------------------------------------------------------------------
3350| Returns the result of subtracting the absolute values of the double-
3351| precision floating-point values `a' and `b'. If `zSign' is 1, the
3352| difference is negated before being returned. `zSign' is ignored if the
3353| result is a NaN. The subtraction is performed according to the IEC/IEEE
3354| Standard for Binary Floating-Point Arithmetic.
3355*----------------------------------------------------------------------------*/
3356
3357static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3358{
Andreas Färber94a49d82012-04-26 00:15:56 +02003359 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003360 uint64_t aSig, bSig, zSig;
Andreas Färber94a49d82012-04-26 00:15:56 +02003361 int_fast16_t expDiff;
bellard158142c2005-03-13 16:54:06 +00003362
3363 aSig = extractFloat64Frac( a );
3364 aExp = extractFloat64Exp( a );
3365 bSig = extractFloat64Frac( b );
3366 bExp = extractFloat64Exp( b );
3367 expDiff = aExp - bExp;
3368 aSig <<= 10;
3369 bSig <<= 10;
3370 if ( 0 < expDiff ) goto aExpBigger;
3371 if ( expDiff < 0 ) goto bExpBigger;
3372 if ( aExp == 0x7FF ) {
3373 if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3374 float_raise( float_flag_invalid STATUS_VAR);
3375 return float64_default_nan;
3376 }
3377 if ( aExp == 0 ) {
3378 aExp = 1;
3379 bExp = 1;
3380 }
3381 if ( bSig < aSig ) goto aBigger;
3382 if ( aSig < bSig ) goto bBigger;
3383 return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3384 bExpBigger:
3385 if ( bExp == 0x7FF ) {
3386 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3387 return packFloat64( zSign ^ 1, 0x7FF, 0 );
3388 }
3389 if ( aExp == 0 ) {
3390 ++expDiff;
3391 }
3392 else {
3393 aSig |= LIT64( 0x4000000000000000 );
3394 }
3395 shift64RightJamming( aSig, - expDiff, &aSig );
3396 bSig |= LIT64( 0x4000000000000000 );
3397 bBigger:
3398 zSig = bSig - aSig;
3399 zExp = bExp;
3400 zSign ^= 1;
3401 goto normalizeRoundAndPack;
3402 aExpBigger:
3403 if ( aExp == 0x7FF ) {
3404 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3405 return a;
3406 }
3407 if ( bExp == 0 ) {
3408 --expDiff;
3409 }
3410 else {
3411 bSig |= LIT64( 0x4000000000000000 );
3412 }
3413 shift64RightJamming( bSig, expDiff, &bSig );
3414 aSig |= LIT64( 0x4000000000000000 );
3415 aBigger:
3416 zSig = aSig - bSig;
3417 zExp = aExp;
3418 normalizeRoundAndPack:
3419 --zExp;
3420 return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3421
3422}
3423
3424/*----------------------------------------------------------------------------
3425| Returns the result of adding the double-precision floating-point values `a'
3426| and `b'. The operation is performed according to the IEC/IEEE Standard for
3427| Binary Floating-Point Arithmetic.
3428*----------------------------------------------------------------------------*/
3429
3430float64 float64_add( float64 a, float64 b STATUS_PARAM )
3431{
3432 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00003433 a = float64_squash_input_denormal(a STATUS_VAR);
3434 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003435
3436 aSign = extractFloat64Sign( a );
3437 bSign = extractFloat64Sign( b );
3438 if ( aSign == bSign ) {
3439 return addFloat64Sigs( a, b, aSign STATUS_VAR );
3440 }
3441 else {
3442 return subFloat64Sigs( a, b, aSign STATUS_VAR );
3443 }
3444
3445}
3446
3447/*----------------------------------------------------------------------------
3448| Returns the result of subtracting the double-precision floating-point values
3449| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
3450| for Binary Floating-Point Arithmetic.
3451*----------------------------------------------------------------------------*/
3452
3453float64 float64_sub( float64 a, float64 b STATUS_PARAM )
3454{
3455 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00003456 a = float64_squash_input_denormal(a STATUS_VAR);
3457 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003458
3459 aSign = extractFloat64Sign( a );
3460 bSign = extractFloat64Sign( b );
3461 if ( aSign == bSign ) {
3462 return subFloat64Sigs( a, b, aSign STATUS_VAR );
3463 }
3464 else {
3465 return addFloat64Sigs( a, b, aSign STATUS_VAR );
3466 }
3467
3468}
3469
3470/*----------------------------------------------------------------------------
3471| Returns the result of multiplying the double-precision floating-point values
3472| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
3473| for Binary Floating-Point Arithmetic.
3474*----------------------------------------------------------------------------*/
3475
3476float64 float64_mul( float64 a, float64 b STATUS_PARAM )
3477{
3478 flag aSign, bSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003479 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003480 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00003481
Peter Maydell37d18662011-01-06 19:37:53 +00003482 a = float64_squash_input_denormal(a STATUS_VAR);
3483 b = float64_squash_input_denormal(b STATUS_VAR);
3484
bellard158142c2005-03-13 16:54:06 +00003485 aSig = extractFloat64Frac( a );
3486 aExp = extractFloat64Exp( a );
3487 aSign = extractFloat64Sign( a );
3488 bSig = extractFloat64Frac( b );
3489 bExp = extractFloat64Exp( b );
3490 bSign = extractFloat64Sign( b );
3491 zSign = aSign ^ bSign;
3492 if ( aExp == 0x7FF ) {
3493 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3494 return propagateFloat64NaN( a, b STATUS_VAR );
3495 }
3496 if ( ( bExp | bSig ) == 0 ) {
3497 float_raise( float_flag_invalid STATUS_VAR);
3498 return float64_default_nan;
3499 }
3500 return packFloat64( zSign, 0x7FF, 0 );
3501 }
3502 if ( bExp == 0x7FF ) {
3503 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3504 if ( ( aExp | aSig ) == 0 ) {
3505 float_raise( float_flag_invalid STATUS_VAR);
3506 return float64_default_nan;
3507 }
3508 return packFloat64( zSign, 0x7FF, 0 );
3509 }
3510 if ( aExp == 0 ) {
3511 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3512 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3513 }
3514 if ( bExp == 0 ) {
3515 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
3516 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3517 }
3518 zExp = aExp + bExp - 0x3FF;
3519 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3520 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3521 mul64To128( aSig, bSig, &zSig0, &zSig1 );
3522 zSig0 |= ( zSig1 != 0 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01003523 if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00003524 zSig0 <<= 1;
3525 --zExp;
3526 }
3527 return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
3528
3529}
3530
3531/*----------------------------------------------------------------------------
3532| Returns the result of dividing the double-precision floating-point value `a'
3533| by the corresponding value `b'. The operation is performed according to
3534| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3535*----------------------------------------------------------------------------*/
3536
3537float64 float64_div( float64 a, float64 b STATUS_PARAM )
3538{
3539 flag aSign, bSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003540 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003541 uint64_t aSig, bSig, zSig;
3542 uint64_t rem0, rem1;
3543 uint64_t term0, term1;
Peter Maydell37d18662011-01-06 19:37:53 +00003544 a = float64_squash_input_denormal(a STATUS_VAR);
3545 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003546
3547 aSig = extractFloat64Frac( a );
3548 aExp = extractFloat64Exp( a );
3549 aSign = extractFloat64Sign( a );
3550 bSig = extractFloat64Frac( b );
3551 bExp = extractFloat64Exp( b );
3552 bSign = extractFloat64Sign( b );
3553 zSign = aSign ^ bSign;
3554 if ( aExp == 0x7FF ) {
3555 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3556 if ( bExp == 0x7FF ) {
3557 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3558 float_raise( float_flag_invalid STATUS_VAR);
3559 return float64_default_nan;
3560 }
3561 return packFloat64( zSign, 0x7FF, 0 );
3562 }
3563 if ( bExp == 0x7FF ) {
3564 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3565 return packFloat64( zSign, 0, 0 );
3566 }
3567 if ( bExp == 0 ) {
3568 if ( bSig == 0 ) {
3569 if ( ( aExp | aSig ) == 0 ) {
3570 float_raise( float_flag_invalid STATUS_VAR);
3571 return float64_default_nan;
3572 }
3573 float_raise( float_flag_divbyzero STATUS_VAR);
3574 return packFloat64( zSign, 0x7FF, 0 );
3575 }
3576 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3577 }
3578 if ( aExp == 0 ) {
3579 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3580 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3581 }
3582 zExp = aExp - bExp + 0x3FD;
3583 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3584 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3585 if ( bSig <= ( aSig + aSig ) ) {
3586 aSig >>= 1;
3587 ++zExp;
3588 }
3589 zSig = estimateDiv128To64( aSig, 0, bSig );
3590 if ( ( zSig & 0x1FF ) <= 2 ) {
3591 mul64To128( bSig, zSig, &term0, &term1 );
3592 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01003593 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00003594 --zSig;
3595 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3596 }
3597 zSig |= ( rem1 != 0 );
3598 }
3599 return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3600
3601}
3602
3603/*----------------------------------------------------------------------------
3604| Returns the remainder of the double-precision floating-point value `a'
3605| with respect to the corresponding value `b'. The operation is performed
3606| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3607*----------------------------------------------------------------------------*/
3608
3609float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3610{
Blue Swirled086f32010-03-07 13:49:58 +00003611 flag aSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003612 int_fast16_t aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003613 uint64_t aSig, bSig;
3614 uint64_t q, alternateASig;
3615 int64_t sigMean;
bellard158142c2005-03-13 16:54:06 +00003616
Peter Maydell37d18662011-01-06 19:37:53 +00003617 a = float64_squash_input_denormal(a STATUS_VAR);
3618 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003619 aSig = extractFloat64Frac( a );
3620 aExp = extractFloat64Exp( a );
3621 aSign = extractFloat64Sign( a );
3622 bSig = extractFloat64Frac( b );
3623 bExp = extractFloat64Exp( b );
bellard158142c2005-03-13 16:54:06 +00003624 if ( aExp == 0x7FF ) {
3625 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3626 return propagateFloat64NaN( a, b STATUS_VAR );
3627 }
3628 float_raise( float_flag_invalid STATUS_VAR);
3629 return float64_default_nan;
3630 }
3631 if ( bExp == 0x7FF ) {
3632 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3633 return a;
3634 }
3635 if ( bExp == 0 ) {
3636 if ( bSig == 0 ) {
3637 float_raise( float_flag_invalid STATUS_VAR);
3638 return float64_default_nan;
3639 }
3640 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3641 }
3642 if ( aExp == 0 ) {
3643 if ( aSig == 0 ) return a;
3644 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3645 }
3646 expDiff = aExp - bExp;
3647 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3648 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3649 if ( expDiff < 0 ) {
3650 if ( expDiff < -1 ) return a;
3651 aSig >>= 1;
3652 }
3653 q = ( bSig <= aSig );
3654 if ( q ) aSig -= bSig;
3655 expDiff -= 64;
3656 while ( 0 < expDiff ) {
3657 q = estimateDiv128To64( aSig, 0, bSig );
3658 q = ( 2 < q ) ? q - 2 : 0;
3659 aSig = - ( ( bSig>>2 ) * q );
3660 expDiff -= 62;
3661 }
3662 expDiff += 64;
3663 if ( 0 < expDiff ) {
3664 q = estimateDiv128To64( aSig, 0, bSig );
3665 q = ( 2 < q ) ? q - 2 : 0;
3666 q >>= 64 - expDiff;
3667 bSig >>= 2;
3668 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3669 }
3670 else {
3671 aSig >>= 2;
3672 bSig >>= 2;
3673 }
3674 do {
3675 alternateASig = aSig;
3676 ++q;
3677 aSig -= bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003678 } while ( 0 <= (int64_t) aSig );
bellard158142c2005-03-13 16:54:06 +00003679 sigMean = aSig + alternateASig;
3680 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3681 aSig = alternateASig;
3682 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01003683 zSign = ( (int64_t) aSig < 0 );
bellard158142c2005-03-13 16:54:06 +00003684 if ( zSign ) aSig = - aSig;
3685 return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3686
3687}
3688
3689/*----------------------------------------------------------------------------
Peter Maydell369be8f2011-10-19 16:14:06 +00003690| Returns the result of multiplying the double-precision floating-point values
3691| `a' and `b' then adding 'c', with no intermediate rounding step after the
3692| multiplication. The operation is performed according to the IEC/IEEE
3693| Standard for Binary Floating-Point Arithmetic 754-2008.
3694| The flags argument allows the caller to select negation of the
3695| addend, the intermediate product, or the final result. (The difference
3696| between this and having the caller do a separate negation is that negating
3697| externally will flip the sign bit on NaNs.)
3698*----------------------------------------------------------------------------*/
3699
3700float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
3701{
3702 flag aSign, bSign, cSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003703 int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
Peter Maydell369be8f2011-10-19 16:14:06 +00003704 uint64_t aSig, bSig, cSig;
3705 flag pInf, pZero, pSign;
3706 uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
3707 int shiftcount;
3708 flag signflip, infzero;
3709
3710 a = float64_squash_input_denormal(a STATUS_VAR);
3711 b = float64_squash_input_denormal(b STATUS_VAR);
3712 c = float64_squash_input_denormal(c STATUS_VAR);
3713 aSig = extractFloat64Frac(a);
3714 aExp = extractFloat64Exp(a);
3715 aSign = extractFloat64Sign(a);
3716 bSig = extractFloat64Frac(b);
3717 bExp = extractFloat64Exp(b);
3718 bSign = extractFloat64Sign(b);
3719 cSig = extractFloat64Frac(c);
3720 cExp = extractFloat64Exp(c);
3721 cSign = extractFloat64Sign(c);
3722
3723 infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
3724 (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
3725
3726 /* It is implementation-defined whether the cases of (0,inf,qnan)
3727 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
3728 * they return if they do), so we have to hand this information
3729 * off to the target-specific pick-a-NaN routine.
3730 */
3731 if (((aExp == 0x7ff) && aSig) ||
3732 ((bExp == 0x7ff) && bSig) ||
3733 ((cExp == 0x7ff) && cSig)) {
3734 return propagateFloat64MulAddNaN(a, b, c, infzero STATUS_VAR);
3735 }
3736
3737 if (infzero) {
3738 float_raise(float_flag_invalid STATUS_VAR);
3739 return float64_default_nan;
3740 }
3741
3742 if (flags & float_muladd_negate_c) {
3743 cSign ^= 1;
3744 }
3745
3746 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
3747
3748 /* Work out the sign and type of the product */
3749 pSign = aSign ^ bSign;
3750 if (flags & float_muladd_negate_product) {
3751 pSign ^= 1;
3752 }
3753 pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
3754 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
3755
3756 if (cExp == 0x7ff) {
3757 if (pInf && (pSign ^ cSign)) {
3758 /* addition of opposite-signed infinities => InvalidOperation */
3759 float_raise(float_flag_invalid STATUS_VAR);
3760 return float64_default_nan;
3761 }
3762 /* Otherwise generate an infinity of the same sign */
3763 return packFloat64(cSign ^ signflip, 0x7ff, 0);
3764 }
3765
3766 if (pInf) {
3767 return packFloat64(pSign ^ signflip, 0x7ff, 0);
3768 }
3769
3770 if (pZero) {
3771 if (cExp == 0) {
3772 if (cSig == 0) {
3773 /* Adding two exact zeroes */
3774 if (pSign == cSign) {
3775 zSign = pSign;
3776 } else if (STATUS(float_rounding_mode) == float_round_down) {
3777 zSign = 1;
3778 } else {
3779 zSign = 0;
3780 }
3781 return packFloat64(zSign ^ signflip, 0, 0);
3782 }
3783 /* Exact zero plus a denorm */
3784 if (STATUS(flush_to_zero)) {
3785 float_raise(float_flag_output_denormal STATUS_VAR);
3786 return packFloat64(cSign ^ signflip, 0, 0);
3787 }
3788 }
3789 /* Zero plus something non-zero : just return the something */
Richard Sandiforda6e7c182013-01-22 17:03:05 +00003790 return packFloat64(cSign ^ signflip, cExp, cSig);
Peter Maydell369be8f2011-10-19 16:14:06 +00003791 }
3792
3793 if (aExp == 0) {
3794 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
3795 }
3796 if (bExp == 0) {
3797 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
3798 }
3799
3800 /* Calculate the actual result a * b + c */
3801
3802 /* Multiply first; this is easy. */
3803 /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
3804 * because we want the true exponent, not the "one-less-than"
3805 * flavour that roundAndPackFloat64() takes.
3806 */
3807 pExp = aExp + bExp - 0x3fe;
3808 aSig = (aSig | LIT64(0x0010000000000000))<<10;
3809 bSig = (bSig | LIT64(0x0010000000000000))<<11;
3810 mul64To128(aSig, bSig, &pSig0, &pSig1);
3811 if ((int64_t)(pSig0 << 1) >= 0) {
3812 shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
3813 pExp--;
3814 }
3815
3816 zSign = pSign ^ signflip;
3817
3818 /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
3819 * bit in position 126.
3820 */
3821 if (cExp == 0) {
3822 if (!cSig) {
3823 /* Throw out the special case of c being an exact zero now */
3824 shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
3825 return roundAndPackFloat64(zSign, pExp - 1,
3826 pSig1 STATUS_VAR);
3827 }
3828 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
3829 }
3830
3831 /* Shift cSig and add the explicit bit so [cSig0:cSig1] is the
3832 * significand of the addend, with the explicit bit in position 126.
3833 */
3834 cSig0 = cSig << (126 - 64 - 52);
3835 cSig1 = 0;
3836 cSig0 |= LIT64(0x4000000000000000);
3837 expDiff = pExp - cExp;
3838
3839 if (pSign == cSign) {
3840 /* Addition */
3841 if (expDiff > 0) {
3842 /* scale c to match p */
3843 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
3844 zExp = pExp;
3845 } else if (expDiff < 0) {
3846 /* scale p to match c */
3847 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
3848 zExp = cExp;
3849 } else {
3850 /* no scaling needed */
3851 zExp = cExp;
3852 }
3853 /* Add significands and make sure explicit bit ends up in posn 126 */
3854 add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
3855 if ((int64_t)zSig0 < 0) {
3856 shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
3857 } else {
3858 zExp--;
3859 }
3860 shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
3861 return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
3862 } else {
3863 /* Subtraction */
3864 if (expDiff > 0) {
3865 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
3866 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
3867 zExp = pExp;
3868 } else if (expDiff < 0) {
3869 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
3870 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
3871 zExp = cExp;
3872 zSign ^= 1;
3873 } else {
3874 zExp = pExp;
3875 if (lt128(cSig0, cSig1, pSig0, pSig1)) {
3876 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
3877 } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
3878 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
3879 zSign ^= 1;
3880 } else {
3881 /* Exact zero */
3882 zSign = signflip;
3883 if (STATUS(float_rounding_mode) == float_round_down) {
3884 zSign ^= 1;
3885 }
3886 return packFloat64(zSign, 0, 0);
3887 }
3888 }
3889 --zExp;
3890 /* Do the equivalent of normalizeRoundAndPackFloat64() but
3891 * starting with the significand in a pair of uint64_t.
3892 */
3893 if (zSig0) {
3894 shiftcount = countLeadingZeros64(zSig0) - 1;
3895 shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
3896 if (zSig1) {
3897 zSig0 |= 1;
3898 }
3899 zExp -= shiftcount;
3900 } else {
3901 shiftcount = countLeadingZeros64(zSig1) - 1;
3902 zSig0 = zSig1 << shiftcount;
3903 zExp -= (shiftcount + 64);
3904 }
3905 return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
3906 }
3907}
3908
3909/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00003910| Returns the square root of the double-precision floating-point value `a'.
3911| The operation is performed according to the IEC/IEEE Standard for Binary
3912| Floating-Point Arithmetic.
3913*----------------------------------------------------------------------------*/
3914
3915float64 float64_sqrt( float64 a STATUS_PARAM )
3916{
3917 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003918 int_fast16_t aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003919 uint64_t aSig, zSig, doubleZSig;
3920 uint64_t rem0, rem1, term0, term1;
Peter Maydell37d18662011-01-06 19:37:53 +00003921 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003922
3923 aSig = extractFloat64Frac( a );
3924 aExp = extractFloat64Exp( a );
3925 aSign = extractFloat64Sign( a );
3926 if ( aExp == 0x7FF ) {
3927 if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
3928 if ( ! aSign ) return a;
3929 float_raise( float_flag_invalid STATUS_VAR);
3930 return float64_default_nan;
3931 }
3932 if ( aSign ) {
3933 if ( ( aExp | aSig ) == 0 ) return a;
3934 float_raise( float_flag_invalid STATUS_VAR);
3935 return float64_default_nan;
3936 }
3937 if ( aExp == 0 ) {
pbrookf090c9d2007-11-18 14:33:24 +00003938 if ( aSig == 0 ) return float64_zero;
bellard158142c2005-03-13 16:54:06 +00003939 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3940 }
3941 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3942 aSig |= LIT64( 0x0010000000000000 );
3943 zSig = estimateSqrt32( aExp, aSig>>21 );
3944 aSig <<= 9 - ( aExp & 1 );
3945 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3946 if ( ( zSig & 0x1FF ) <= 5 ) {
3947 doubleZSig = zSig<<1;
3948 mul64To128( zSig, zSig, &term0, &term1 );
3949 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01003950 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00003951 --zSig;
3952 doubleZSig -= 2;
3953 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3954 }
3955 zSig |= ( ( rem0 | rem1 ) != 0 );
3956 }
3957 return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
3958
3959}
3960
3961/*----------------------------------------------------------------------------
aurel32374dfc32009-02-05 13:42:47 +00003962| Returns the binary log of the double-precision floating-point value `a'.
3963| The operation is performed according to the IEC/IEEE Standard for Binary
3964| Floating-Point Arithmetic.
3965*----------------------------------------------------------------------------*/
3966float64 float64_log2( float64 a STATUS_PARAM )
3967{
3968 flag aSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003969 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003970 uint64_t aSig, aSig0, aSig1, zSig, i;
Peter Maydell37d18662011-01-06 19:37:53 +00003971 a = float64_squash_input_denormal(a STATUS_VAR);
aurel32374dfc32009-02-05 13:42:47 +00003972
3973 aSig = extractFloat64Frac( a );
3974 aExp = extractFloat64Exp( a );
3975 aSign = extractFloat64Sign( a );
3976
3977 if ( aExp == 0 ) {
3978 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3979 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3980 }
3981 if ( aSign ) {
3982 float_raise( float_flag_invalid STATUS_VAR);
3983 return float64_default_nan;
3984 }
3985 if ( aExp == 0x7FF ) {
3986 if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
3987 return a;
3988 }
3989
3990 aExp -= 0x3FF;
3991 aSig |= LIT64( 0x0010000000000000 );
3992 zSign = aExp < 0;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003993 zSig = (uint64_t)aExp << 52;
aurel32374dfc32009-02-05 13:42:47 +00003994 for (i = 1LL << 51; i > 0; i >>= 1) {
3995 mul64To128( aSig, aSig, &aSig0, &aSig1 );
3996 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3997 if ( aSig & LIT64( 0x0020000000000000 ) ) {
3998 aSig >>= 1;
3999 zSig |= i;
4000 }
4001 }
4002
4003 if ( zSign )
4004 zSig = -zSig;
4005 return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
4006}
4007
4008/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00004009| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarnob6893622011-04-14 00:49:29 +02004010| corresponding value `b', and 0 otherwise. The invalid exception is raised
4011| if either operand is a NaN. Otherwise, the comparison is performed
bellard158142c2005-03-13 16:54:06 +00004012| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4013*----------------------------------------------------------------------------*/
4014
Aurelien Jarnob6893622011-04-14 00:49:29 +02004015int float64_eq( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004016{
Andreas Färberbb98fe42011-03-07 01:34:06 +01004017 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004018 a = float64_squash_input_denormal(a STATUS_VAR);
4019 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004020
4021 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4022 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4023 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02004024 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004025 return 0;
4026 }
pbrookf090c9d2007-11-18 14:33:24 +00004027 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004028 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004029 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00004030
4031}
4032
4033/*----------------------------------------------------------------------------
4034| Returns 1 if the double-precision floating-point value `a' is less than or
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004035| equal to the corresponding value `b', and 0 otherwise. The invalid
4036| exception is raised if either operand is a NaN. The comparison is performed
4037| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00004038*----------------------------------------------------------------------------*/
4039
bellard750afe92006-10-28 19:27:11 +00004040int float64_le( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004041{
4042 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004043 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004044 a = float64_squash_input_denormal(a STATUS_VAR);
4045 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004046
4047 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4048 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4049 ) {
4050 float_raise( float_flag_invalid STATUS_VAR);
4051 return 0;
4052 }
4053 aSign = extractFloat64Sign( a );
4054 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004055 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004056 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004057 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004058 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004059
4060}
4061
4062/*----------------------------------------------------------------------------
4063| Returns 1 if the double-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004064| the corresponding value `b', and 0 otherwise. The invalid exception is
4065| raised if either operand is a NaN. The comparison is performed according
4066| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00004067*----------------------------------------------------------------------------*/
4068
bellard750afe92006-10-28 19:27:11 +00004069int float64_lt( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004070{
4071 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004072 uint64_t av, bv;
bellard158142c2005-03-13 16:54:06 +00004073
Peter Maydell37d18662011-01-06 19:37:53 +00004074 a = float64_squash_input_denormal(a STATUS_VAR);
4075 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004076 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4077 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4078 ) {
4079 float_raise( float_flag_invalid STATUS_VAR);
4080 return 0;
4081 }
4082 aSign = extractFloat64Sign( a );
4083 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004084 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004085 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004086 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004087 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004088
4089}
4090
4091/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02004092| Returns 1 if the double-precision floating-point values `a' and `b' cannot
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004093| be compared, and 0 otherwise. The invalid exception is raised if either
4094| operand is a NaN. The comparison is performed according to the IEC/IEEE
4095| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02004096*----------------------------------------------------------------------------*/
4097
4098int float64_unordered( float64 a, float64 b STATUS_PARAM )
4099{
4100 a = float64_squash_input_denormal(a STATUS_VAR);
4101 b = float64_squash_input_denormal(b STATUS_VAR);
4102
4103 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4104 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4105 ) {
4106 float_raise( float_flag_invalid STATUS_VAR);
4107 return 1;
4108 }
4109 return 0;
4110}
4111
4112/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00004113| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004114| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4115| exception.The comparison is performed according to the IEC/IEEE Standard
4116| for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00004117*----------------------------------------------------------------------------*/
4118
Aurelien Jarnob6893622011-04-14 00:49:29 +02004119int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004120{
Andreas Färberbb98fe42011-03-07 01:34:06 +01004121 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004122 a = float64_squash_input_denormal(a STATUS_VAR);
4123 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004124
4125 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4126 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4127 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02004128 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4129 float_raise( float_flag_invalid STATUS_VAR);
4130 }
bellard158142c2005-03-13 16:54:06 +00004131 return 0;
4132 }
pbrookf090c9d2007-11-18 14:33:24 +00004133 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004134 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004135 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00004136
4137}
4138
4139/*----------------------------------------------------------------------------
4140| Returns 1 if the double-precision floating-point value `a' is less than or
4141| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4142| cause an exception. Otherwise, the comparison is performed according to the
4143| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4144*----------------------------------------------------------------------------*/
4145
bellard750afe92006-10-28 19:27:11 +00004146int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004147{
4148 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004149 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004150 a = float64_squash_input_denormal(a STATUS_VAR);
4151 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004152
4153 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4154 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4155 ) {
4156 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4157 float_raise( float_flag_invalid STATUS_VAR);
4158 }
4159 return 0;
4160 }
4161 aSign = extractFloat64Sign( a );
4162 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004163 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004164 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004165 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004166 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004167
4168}
4169
4170/*----------------------------------------------------------------------------
4171| Returns 1 if the double-precision floating-point value `a' is less than
4172| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4173| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4174| Standard for Binary Floating-Point Arithmetic.
4175*----------------------------------------------------------------------------*/
4176
bellard750afe92006-10-28 19:27:11 +00004177int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004178{
4179 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004180 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004181 a = float64_squash_input_denormal(a STATUS_VAR);
4182 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004183
4184 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4185 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4186 ) {
4187 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4188 float_raise( float_flag_invalid STATUS_VAR);
4189 }
4190 return 0;
4191 }
4192 aSign = extractFloat64Sign( a );
4193 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004194 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004195 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004196 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004197 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004198
4199}
4200
Aurelien Jarno67b78612011-04-14 00:49:29 +02004201/*----------------------------------------------------------------------------
4202| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4203| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4204| comparison is performed according to the IEC/IEEE Standard for Binary
4205| Floating-Point Arithmetic.
4206*----------------------------------------------------------------------------*/
4207
4208int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
4209{
4210 a = float64_squash_input_denormal(a STATUS_VAR);
4211 b = float64_squash_input_denormal(b STATUS_VAR);
4212
4213 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4214 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4215 ) {
4216 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4217 float_raise( float_flag_invalid STATUS_VAR);
4218 }
4219 return 1;
4220 }
4221 return 0;
4222}
4223
bellard158142c2005-03-13 16:54:06 +00004224/*----------------------------------------------------------------------------
4225| Returns the result of converting the extended double-precision floating-
4226| point value `a' to the 32-bit two's complement integer format. The
4227| conversion is performed according to the IEC/IEEE Standard for Binary
4228| Floating-Point Arithmetic---which means in particular that the conversion
4229| is rounded according to the current rounding mode. If `a' is a NaN, the
4230| largest positive integer is returned. Otherwise, if the conversion
4231| overflows, the largest integer with the same sign as `a' is returned.
4232*----------------------------------------------------------------------------*/
4233
4234int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
4235{
4236 flag aSign;
4237 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004238 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00004239
4240 aSig = extractFloatx80Frac( a );
4241 aExp = extractFloatx80Exp( a );
4242 aSign = extractFloatx80Sign( a );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004243 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard158142c2005-03-13 16:54:06 +00004244 shiftCount = 0x4037 - aExp;
4245 if ( shiftCount <= 0 ) shiftCount = 1;
4246 shift64RightJamming( aSig, shiftCount, &aSig );
4247 return roundAndPackInt32( aSign, aSig STATUS_VAR );
4248
4249}
4250
4251/*----------------------------------------------------------------------------
4252| Returns the result of converting the extended double-precision floating-
4253| point value `a' to the 32-bit two's complement integer format. The
4254| conversion is performed according to the IEC/IEEE Standard for Binary
4255| Floating-Point Arithmetic, except that the conversion is always rounded
4256| toward zero. If `a' is a NaN, the largest positive integer is returned.
4257| Otherwise, if the conversion overflows, the largest integer with the same
4258| sign as `a' is returned.
4259*----------------------------------------------------------------------------*/
4260
4261int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
4262{
4263 flag aSign;
4264 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004265 uint64_t aSig, savedASig;
Peter Maydellb3a6a2e2012-04-05 19:12:34 +01004266 int32_t z;
bellard158142c2005-03-13 16:54:06 +00004267
4268 aSig = extractFloatx80Frac( a );
4269 aExp = extractFloatx80Exp( a );
4270 aSign = extractFloatx80Sign( a );
4271 if ( 0x401E < aExp ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004272 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard158142c2005-03-13 16:54:06 +00004273 goto invalid;
4274 }
4275 else if ( aExp < 0x3FFF ) {
4276 if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
4277 return 0;
4278 }
4279 shiftCount = 0x403E - aExp;
4280 savedASig = aSig;
4281 aSig >>= shiftCount;
4282 z = aSig;
4283 if ( aSign ) z = - z;
4284 if ( ( z < 0 ) ^ aSign ) {
4285 invalid:
4286 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004287 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +00004288 }
4289 if ( ( aSig<<shiftCount ) != savedASig ) {
4290 STATUS(float_exception_flags) |= float_flag_inexact;
4291 }
4292 return z;
4293
4294}
4295
4296/*----------------------------------------------------------------------------
4297| Returns the result of converting the extended double-precision floating-
4298| point value `a' to the 64-bit two's complement integer format. The
4299| conversion is performed according to the IEC/IEEE Standard for Binary
4300| Floating-Point Arithmetic---which means in particular that the conversion
4301| is rounded according to the current rounding mode. If `a' is a NaN,
4302| the largest positive integer is returned. Otherwise, if the conversion
4303| overflows, the largest integer with the same sign as `a' is returned.
4304*----------------------------------------------------------------------------*/
4305
4306int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
4307{
4308 flag aSign;
4309 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004310 uint64_t aSig, aSigExtra;
bellard158142c2005-03-13 16:54:06 +00004311
4312 aSig = extractFloatx80Frac( a );
4313 aExp = extractFloatx80Exp( a );
4314 aSign = extractFloatx80Sign( a );
4315 shiftCount = 0x403E - aExp;
4316 if ( shiftCount <= 0 ) {
4317 if ( shiftCount ) {
4318 float_raise( float_flag_invalid STATUS_VAR);
4319 if ( ! aSign
4320 || ( ( aExp == 0x7FFF )
4321 && ( aSig != LIT64( 0x8000000000000000 ) ) )
4322 ) {
4323 return LIT64( 0x7FFFFFFFFFFFFFFF );
4324 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01004325 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00004326 }
4327 aSigExtra = 0;
4328 }
4329 else {
4330 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4331 }
4332 return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
4333
4334}
4335
4336/*----------------------------------------------------------------------------
4337| Returns the result of converting the extended double-precision floating-
4338| point value `a' to the 64-bit two's complement integer format. The
4339| conversion is performed according to the IEC/IEEE Standard for Binary
4340| Floating-Point Arithmetic, except that the conversion is always rounded
4341| toward zero. If `a' is a NaN, the largest positive integer is returned.
4342| Otherwise, if the conversion overflows, the largest integer with the same
4343| sign as `a' is returned.
4344*----------------------------------------------------------------------------*/
4345
4346int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
4347{
4348 flag aSign;
4349 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004350 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00004351 int64 z;
4352
4353 aSig = extractFloatx80Frac( a );
4354 aExp = extractFloatx80Exp( a );
4355 aSign = extractFloatx80Sign( a );
4356 shiftCount = aExp - 0x403E;
4357 if ( 0 <= shiftCount ) {
4358 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4359 if ( ( a.high != 0xC03E ) || aSig ) {
4360 float_raise( float_flag_invalid STATUS_VAR);
4361 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4362 return LIT64( 0x7FFFFFFFFFFFFFFF );
4363 }
4364 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01004365 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00004366 }
4367 else if ( aExp < 0x3FFF ) {
4368 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
4369 return 0;
4370 }
4371 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004372 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00004373 STATUS(float_exception_flags) |= float_flag_inexact;
4374 }
4375 if ( aSign ) z = - z;
4376 return z;
4377
4378}
4379
4380/*----------------------------------------------------------------------------
4381| Returns the result of converting the extended double-precision floating-
4382| point value `a' to the single-precision floating-point format. The
4383| conversion is performed according to the IEC/IEEE Standard for Binary
4384| Floating-Point Arithmetic.
4385*----------------------------------------------------------------------------*/
4386
4387float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
4388{
4389 flag aSign;
4390 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004391 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00004392
4393 aSig = extractFloatx80Frac( a );
4394 aExp = extractFloatx80Exp( a );
4395 aSign = extractFloatx80Sign( a );
4396 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004397 if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00004398 return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004399 }
4400 return packFloat32( aSign, 0xFF, 0 );
4401 }
4402 shift64RightJamming( aSig, 33, &aSig );
4403 if ( aExp || aSig ) aExp -= 0x3F81;
4404 return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
4405
4406}
4407
4408/*----------------------------------------------------------------------------
4409| Returns the result of converting the extended double-precision floating-
4410| point value `a' to the double-precision floating-point format. The
4411| conversion is performed according to the IEC/IEEE Standard for Binary
4412| Floating-Point Arithmetic.
4413*----------------------------------------------------------------------------*/
4414
4415float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
4416{
4417 flag aSign;
4418 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004419 uint64_t aSig, zSig;
bellard158142c2005-03-13 16:54:06 +00004420
4421 aSig = extractFloatx80Frac( a );
4422 aExp = extractFloatx80Exp( a );
4423 aSign = extractFloatx80Sign( a );
4424 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004425 if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00004426 return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004427 }
4428 return packFloat64( aSign, 0x7FF, 0 );
4429 }
4430 shift64RightJamming( aSig, 1, &zSig );
4431 if ( aExp || aSig ) aExp -= 0x3C01;
4432 return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
4433
4434}
4435
bellard158142c2005-03-13 16:54:06 +00004436/*----------------------------------------------------------------------------
4437| Returns the result of converting the extended double-precision floating-
4438| point value `a' to the quadruple-precision floating-point format. The
4439| conversion is performed according to the IEC/IEEE Standard for Binary
4440| Floating-Point Arithmetic.
4441*----------------------------------------------------------------------------*/
4442
4443float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
4444{
4445 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02004446 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004447 uint64_t aSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00004448
4449 aSig = extractFloatx80Frac( a );
4450 aExp = extractFloatx80Exp( a );
4451 aSign = extractFloatx80Sign( a );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004452 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00004453 return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004454 }
4455 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4456 return packFloat128( aSign, aExp, zSig0, zSig1 );
4457
4458}
4459
bellard158142c2005-03-13 16:54:06 +00004460/*----------------------------------------------------------------------------
4461| Rounds the extended double-precision floating-point value `a' to an integer,
4462| and returns the result as an extended quadruple-precision floating-point
4463| value. The operation is performed according to the IEC/IEEE Standard for
4464| Binary Floating-Point Arithmetic.
4465*----------------------------------------------------------------------------*/
4466
4467floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
4468{
4469 flag aSign;
4470 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004471 uint64_t lastBitMask, roundBitsMask;
bellard158142c2005-03-13 16:54:06 +00004472 int8 roundingMode;
4473 floatx80 z;
4474
4475 aExp = extractFloatx80Exp( a );
4476 if ( 0x403E <= aExp ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004477 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00004478 return propagateFloatx80NaN( a, a STATUS_VAR );
4479 }
4480 return a;
4481 }
4482 if ( aExp < 0x3FFF ) {
4483 if ( ( aExp == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01004484 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
bellard158142c2005-03-13 16:54:06 +00004485 return a;
4486 }
4487 STATUS(float_exception_flags) |= float_flag_inexact;
4488 aSign = extractFloatx80Sign( a );
4489 switch ( STATUS(float_rounding_mode) ) {
4490 case float_round_nearest_even:
Andreas Färberbb98fe42011-03-07 01:34:06 +01004491 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
bellard158142c2005-03-13 16:54:06 +00004492 ) {
4493 return
4494 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4495 }
4496 break;
4497 case float_round_down:
4498 return
4499 aSign ?
4500 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4501 : packFloatx80( 0, 0, 0 );
4502 case float_round_up:
4503 return
4504 aSign ? packFloatx80( 1, 0, 0 )
4505 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4506 }
4507 return packFloatx80( aSign, 0, 0 );
4508 }
4509 lastBitMask = 1;
4510 lastBitMask <<= 0x403E - aExp;
4511 roundBitsMask = lastBitMask - 1;
4512 z = a;
4513 roundingMode = STATUS(float_rounding_mode);
4514 if ( roundingMode == float_round_nearest_even ) {
4515 z.low += lastBitMask>>1;
4516 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4517 }
4518 else if ( roundingMode != float_round_to_zero ) {
4519 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
4520 z.low += roundBitsMask;
4521 }
4522 }
4523 z.low &= ~ roundBitsMask;
4524 if ( z.low == 0 ) {
4525 ++z.high;
4526 z.low = LIT64( 0x8000000000000000 );
4527 }
4528 if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
4529 return z;
4530
4531}
4532
4533/*----------------------------------------------------------------------------
4534| Returns the result of adding the absolute values of the extended double-
4535| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4536| negated before being returned. `zSign' is ignored if the result is a NaN.
4537| The addition is performed according to the IEC/IEEE Standard for Binary
4538| Floating-Point Arithmetic.
4539*----------------------------------------------------------------------------*/
4540
4541static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
4542{
4543 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004544 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00004545 int32 expDiff;
4546
4547 aSig = extractFloatx80Frac( a );
4548 aExp = extractFloatx80Exp( a );
4549 bSig = extractFloatx80Frac( b );
4550 bExp = extractFloatx80Exp( b );
4551 expDiff = aExp - bExp;
4552 if ( 0 < expDiff ) {
4553 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004554 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004555 return a;
4556 }
4557 if ( bExp == 0 ) --expDiff;
4558 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4559 zExp = aExp;
4560 }
4561 else if ( expDiff < 0 ) {
4562 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004563 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004564 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4565 }
4566 if ( aExp == 0 ) ++expDiff;
4567 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4568 zExp = bExp;
4569 }
4570 else {
4571 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004572 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00004573 return propagateFloatx80NaN( a, b STATUS_VAR );
4574 }
4575 return a;
4576 }
4577 zSig1 = 0;
4578 zSig0 = aSig + bSig;
4579 if ( aExp == 0 ) {
4580 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4581 goto roundAndPack;
4582 }
4583 zExp = aExp;
4584 goto shiftRight1;
4585 }
4586 zSig0 = aSig + bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004587 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
bellard158142c2005-03-13 16:54:06 +00004588 shiftRight1:
4589 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4590 zSig0 |= LIT64( 0x8000000000000000 );
4591 ++zExp;
4592 roundAndPack:
4593 return
4594 roundAndPackFloatx80(
4595 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4596
4597}
4598
4599/*----------------------------------------------------------------------------
4600| Returns the result of subtracting the absolute values of the extended
4601| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
4602| difference is negated before being returned. `zSign' is ignored if the
4603| result is a NaN. The subtraction is performed according to the IEC/IEEE
4604| Standard for Binary Floating-Point Arithmetic.
4605*----------------------------------------------------------------------------*/
4606
4607static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
4608{
4609 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004610 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00004611 int32 expDiff;
4612 floatx80 z;
4613
4614 aSig = extractFloatx80Frac( a );
4615 aExp = extractFloatx80Exp( a );
4616 bSig = extractFloatx80Frac( b );
4617 bExp = extractFloatx80Exp( b );
4618 expDiff = aExp - bExp;
4619 if ( 0 < expDiff ) goto aExpBigger;
4620 if ( expDiff < 0 ) goto bExpBigger;
4621 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004622 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00004623 return propagateFloatx80NaN( a, b STATUS_VAR );
4624 }
4625 float_raise( float_flag_invalid STATUS_VAR);
4626 z.low = floatx80_default_nan_low;
4627 z.high = floatx80_default_nan_high;
4628 return z;
4629 }
4630 if ( aExp == 0 ) {
4631 aExp = 1;
4632 bExp = 1;
4633 }
4634 zSig1 = 0;
4635 if ( bSig < aSig ) goto aBigger;
4636 if ( aSig < bSig ) goto bBigger;
4637 return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
4638 bExpBigger:
4639 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004640 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004641 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
4642 }
4643 if ( aExp == 0 ) ++expDiff;
4644 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4645 bBigger:
4646 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4647 zExp = bExp;
4648 zSign ^= 1;
4649 goto normalizeRoundAndPack;
4650 aExpBigger:
4651 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004652 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004653 return a;
4654 }
4655 if ( bExp == 0 ) --expDiff;
4656 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4657 aBigger:
4658 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4659 zExp = aExp;
4660 normalizeRoundAndPack:
4661 return
4662 normalizeRoundAndPackFloatx80(
4663 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4664
4665}
4666
4667/*----------------------------------------------------------------------------
4668| Returns the result of adding the extended double-precision floating-point
4669| values `a' and `b'. The operation is performed according to the IEC/IEEE
4670| Standard for Binary Floating-Point Arithmetic.
4671*----------------------------------------------------------------------------*/
4672
4673floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
4674{
4675 flag aSign, bSign;
4676
4677 aSign = extractFloatx80Sign( a );
4678 bSign = extractFloatx80Sign( b );
4679 if ( aSign == bSign ) {
4680 return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4681 }
4682 else {
4683 return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4684 }
4685
4686}
4687
4688/*----------------------------------------------------------------------------
4689| Returns the result of subtracting the extended double-precision floating-
4690| point values `a' and `b'. The operation is performed according to the
4691| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4692*----------------------------------------------------------------------------*/
4693
4694floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
4695{
4696 flag aSign, bSign;
4697
4698 aSign = extractFloatx80Sign( a );
4699 bSign = extractFloatx80Sign( b );
4700 if ( aSign == bSign ) {
4701 return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4702 }
4703 else {
4704 return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4705 }
4706
4707}
4708
4709/*----------------------------------------------------------------------------
4710| Returns the result of multiplying the extended double-precision floating-
4711| point values `a' and `b'. The operation is performed according to the
4712| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4713*----------------------------------------------------------------------------*/
4714
4715floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
4716{
4717 flag aSign, bSign, zSign;
4718 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004719 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00004720 floatx80 z;
4721
4722 aSig = extractFloatx80Frac( a );
4723 aExp = extractFloatx80Exp( a );
4724 aSign = extractFloatx80Sign( a );
4725 bSig = extractFloatx80Frac( b );
4726 bExp = extractFloatx80Exp( b );
4727 bSign = extractFloatx80Sign( b );
4728 zSign = aSign ^ bSign;
4729 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004730 if ( (uint64_t) ( aSig<<1 )
4731 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00004732 return propagateFloatx80NaN( a, b STATUS_VAR );
4733 }
4734 if ( ( bExp | bSig ) == 0 ) goto invalid;
4735 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4736 }
4737 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004738 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004739 if ( ( aExp | aSig ) == 0 ) {
4740 invalid:
4741 float_raise( float_flag_invalid STATUS_VAR);
4742 z.low = floatx80_default_nan_low;
4743 z.high = floatx80_default_nan_high;
4744 return z;
4745 }
4746 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4747 }
4748 if ( aExp == 0 ) {
4749 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4750 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4751 }
4752 if ( bExp == 0 ) {
4753 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
4754 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4755 }
4756 zExp = aExp + bExp - 0x3FFE;
4757 mul64To128( aSig, bSig, &zSig0, &zSig1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004758 if ( 0 < (int64_t) zSig0 ) {
bellard158142c2005-03-13 16:54:06 +00004759 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
4760 --zExp;
4761 }
4762 return
4763 roundAndPackFloatx80(
4764 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4765
4766}
4767
4768/*----------------------------------------------------------------------------
4769| Returns the result of dividing the extended double-precision floating-point
4770| value `a' by the corresponding value `b'. The operation is performed
4771| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4772*----------------------------------------------------------------------------*/
4773
4774floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
4775{
4776 flag aSign, bSign, zSign;
4777 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004778 uint64_t aSig, bSig, zSig0, zSig1;
4779 uint64_t rem0, rem1, rem2, term0, term1, term2;
bellard158142c2005-03-13 16:54:06 +00004780 floatx80 z;
4781
4782 aSig = extractFloatx80Frac( a );
4783 aExp = extractFloatx80Exp( a );
4784 aSign = extractFloatx80Sign( a );
4785 bSig = extractFloatx80Frac( b );
4786 bExp = extractFloatx80Exp( b );
4787 bSign = extractFloatx80Sign( b );
4788 zSign = aSign ^ bSign;
4789 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004790 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004791 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004792 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004793 goto invalid;
4794 }
4795 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4796 }
4797 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004798 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004799 return packFloatx80( zSign, 0, 0 );
4800 }
4801 if ( bExp == 0 ) {
4802 if ( bSig == 0 ) {
4803 if ( ( aExp | aSig ) == 0 ) {
4804 invalid:
4805 float_raise( float_flag_invalid STATUS_VAR);
4806 z.low = floatx80_default_nan_low;
4807 z.high = floatx80_default_nan_high;
4808 return z;
4809 }
4810 float_raise( float_flag_divbyzero STATUS_VAR);
4811 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4812 }
4813 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4814 }
4815 if ( aExp == 0 ) {
4816 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4817 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4818 }
4819 zExp = aExp - bExp + 0x3FFE;
4820 rem1 = 0;
4821 if ( bSig <= aSig ) {
4822 shift128Right( aSig, 0, 1, &aSig, &rem1 );
4823 ++zExp;
4824 }
4825 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4826 mul64To128( bSig, zSig0, &term0, &term1 );
4827 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004828 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00004829 --zSig0;
4830 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4831 }
4832 zSig1 = estimateDiv128To64( rem1, 0, bSig );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004833 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
bellard158142c2005-03-13 16:54:06 +00004834 mul64To128( bSig, zSig1, &term1, &term2 );
4835 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004836 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00004837 --zSig1;
4838 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4839 }
4840 zSig1 |= ( ( rem1 | rem2 ) != 0 );
4841 }
4842 return
4843 roundAndPackFloatx80(
4844 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4845
4846}
4847
4848/*----------------------------------------------------------------------------
4849| Returns the remainder of the extended double-precision floating-point value
4850| `a' with respect to the corresponding value `b'. The operation is performed
4851| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4852*----------------------------------------------------------------------------*/
4853
4854floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
4855{
Blue Swirled086f32010-03-07 13:49:58 +00004856 flag aSign, zSign;
bellard158142c2005-03-13 16:54:06 +00004857 int32 aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004858 uint64_t aSig0, aSig1, bSig;
4859 uint64_t q, term0, term1, alternateASig0, alternateASig1;
bellard158142c2005-03-13 16:54:06 +00004860 floatx80 z;
4861
4862 aSig0 = extractFloatx80Frac( a );
4863 aExp = extractFloatx80Exp( a );
4864 aSign = extractFloatx80Sign( a );
4865 bSig = extractFloatx80Frac( b );
4866 bExp = extractFloatx80Exp( b );
bellard158142c2005-03-13 16:54:06 +00004867 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004868 if ( (uint64_t) ( aSig0<<1 )
4869 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00004870 return propagateFloatx80NaN( a, b STATUS_VAR );
4871 }
4872 goto invalid;
4873 }
4874 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004875 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004876 return a;
4877 }
4878 if ( bExp == 0 ) {
4879 if ( bSig == 0 ) {
4880 invalid:
4881 float_raise( float_flag_invalid STATUS_VAR);
4882 z.low = floatx80_default_nan_low;
4883 z.high = floatx80_default_nan_high;
4884 return z;
4885 }
4886 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4887 }
4888 if ( aExp == 0 ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004889 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00004890 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4891 }
4892 bSig |= LIT64( 0x8000000000000000 );
4893 zSign = aSign;
4894 expDiff = aExp - bExp;
4895 aSig1 = 0;
4896 if ( expDiff < 0 ) {
4897 if ( expDiff < -1 ) return a;
4898 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4899 expDiff = 0;
4900 }
4901 q = ( bSig <= aSig0 );
4902 if ( q ) aSig0 -= bSig;
4903 expDiff -= 64;
4904 while ( 0 < expDiff ) {
4905 q = estimateDiv128To64( aSig0, aSig1, bSig );
4906 q = ( 2 < q ) ? q - 2 : 0;
4907 mul64To128( bSig, q, &term0, &term1 );
4908 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4909 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4910 expDiff -= 62;
4911 }
4912 expDiff += 64;
4913 if ( 0 < expDiff ) {
4914 q = estimateDiv128To64( aSig0, aSig1, bSig );
4915 q = ( 2 < q ) ? q - 2 : 0;
4916 q >>= 64 - expDiff;
4917 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4918 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4919 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4920 while ( le128( term0, term1, aSig0, aSig1 ) ) {
4921 ++q;
4922 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4923 }
4924 }
4925 else {
4926 term1 = 0;
4927 term0 = bSig;
4928 }
4929 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4930 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4931 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4932 && ( q & 1 ) )
4933 ) {
4934 aSig0 = alternateASig0;
4935 aSig1 = alternateASig1;
4936 zSign = ! zSign;
4937 }
4938 return
4939 normalizeRoundAndPackFloatx80(
4940 80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
4941
4942}
4943
4944/*----------------------------------------------------------------------------
4945| Returns the square root of the extended double-precision floating-point
4946| value `a'. The operation is performed according to the IEC/IEEE Standard
4947| for Binary Floating-Point Arithmetic.
4948*----------------------------------------------------------------------------*/
4949
4950floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
4951{
4952 flag aSign;
4953 int32 aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004954 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4955 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard158142c2005-03-13 16:54:06 +00004956 floatx80 z;
4957
4958 aSig0 = extractFloatx80Frac( a );
4959 aExp = extractFloatx80Exp( a );
4960 aSign = extractFloatx80Sign( a );
4961 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004962 if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004963 if ( ! aSign ) return a;
4964 goto invalid;
4965 }
4966 if ( aSign ) {
4967 if ( ( aExp | aSig0 ) == 0 ) return a;
4968 invalid:
4969 float_raise( float_flag_invalid STATUS_VAR);
4970 z.low = floatx80_default_nan_low;
4971 z.high = floatx80_default_nan_high;
4972 return z;
4973 }
4974 if ( aExp == 0 ) {
4975 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4976 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4977 }
4978 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4979 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4980 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4981 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4982 doubleZSig0 = zSig0<<1;
4983 mul64To128( zSig0, zSig0, &term0, &term1 );
4984 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004985 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00004986 --zSig0;
4987 doubleZSig0 -= 2;
4988 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4989 }
4990 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4991 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4992 if ( zSig1 == 0 ) zSig1 = 1;
4993 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4994 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4995 mul64To128( zSig1, zSig1, &term2, &term3 );
4996 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004997 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00004998 --zSig1;
4999 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5000 term3 |= 1;
5001 term2 |= doubleZSig0;
5002 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5003 }
5004 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5005 }
5006 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5007 zSig0 |= doubleZSig0;
5008 return
5009 roundAndPackFloatx80(
5010 STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
5011
5012}
5013
5014/*----------------------------------------------------------------------------
Aurelien Jarnob6893622011-04-14 00:49:29 +02005015| Returns 1 if the extended double-precision floating-point value `a' is equal
5016| to the corresponding value `b', and 0 otherwise. The invalid exception is
5017| raised if either operand is a NaN. Otherwise, the comparison is performed
5018| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005019*----------------------------------------------------------------------------*/
5020
Aurelien Jarnob6893622011-04-14 00:49:29 +02005021int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005022{
5023
5024 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005025 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005026 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005027 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005028 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02005029 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00005030 return 0;
5031 }
5032 return
5033 ( a.low == b.low )
5034 && ( ( a.high == b.high )
5035 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005036 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00005037 );
5038
5039}
5040
5041/*----------------------------------------------------------------------------
5042| Returns 1 if the extended double-precision floating-point value `a' is
5043| less than or equal to the corresponding value `b', and 0 otherwise. The
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005044| invalid exception is raised if either operand is a NaN. The comparison is
5045| performed according to the IEC/IEEE Standard for Binary Floating-Point
5046| Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005047*----------------------------------------------------------------------------*/
5048
bellard750afe92006-10-28 19:27:11 +00005049int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005050{
5051 flag aSign, bSign;
5052
5053 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005054 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005055 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005056 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005057 ) {
5058 float_raise( float_flag_invalid STATUS_VAR);
5059 return 0;
5060 }
5061 aSign = extractFloatx80Sign( a );
5062 bSign = extractFloatx80Sign( b );
5063 if ( aSign != bSign ) {
5064 return
5065 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005066 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005067 == 0 );
5068 }
5069 return
5070 aSign ? le128( b.high, b.low, a.high, a.low )
5071 : le128( a.high, a.low, b.high, b.low );
5072
5073}
5074
5075/*----------------------------------------------------------------------------
5076| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005077| less than the corresponding value `b', and 0 otherwise. The invalid
5078| exception is raised if either operand is a NaN. The comparison is performed
5079| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005080*----------------------------------------------------------------------------*/
5081
bellard750afe92006-10-28 19:27:11 +00005082int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005083{
5084 flag aSign, bSign;
5085
5086 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005087 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005088 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005089 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005090 ) {
5091 float_raise( float_flag_invalid STATUS_VAR);
5092 return 0;
5093 }
5094 aSign = extractFloatx80Sign( a );
5095 bSign = extractFloatx80Sign( b );
5096 if ( aSign != bSign ) {
5097 return
5098 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005099 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005100 != 0 );
5101 }
5102 return
5103 aSign ? lt128( b.high, b.low, a.high, a.low )
5104 : lt128( a.high, a.low, b.high, b.low );
5105
5106}
5107
5108/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02005109| Returns 1 if the extended double-precision floating-point values `a' and `b'
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005110| cannot be compared, and 0 otherwise. The invalid exception is raised if
5111| either operand is a NaN. The comparison is performed according to the
5112| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02005113*----------------------------------------------------------------------------*/
5114int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
5115{
5116 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5117 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5118 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5119 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5120 ) {
5121 float_raise( float_flag_invalid STATUS_VAR);
5122 return 1;
5123 }
5124 return 0;
5125}
5126
5127/*----------------------------------------------------------------------------
Aurelien Jarnob6893622011-04-14 00:49:29 +02005128| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005129| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5130| cause an exception. The comparison is performed according to the IEC/IEEE
5131| Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005132*----------------------------------------------------------------------------*/
5133
Aurelien Jarnob6893622011-04-14 00:49:29 +02005134int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005135{
5136
5137 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005138 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005139 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005140 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005141 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02005142 if ( floatx80_is_signaling_nan( a )
5143 || floatx80_is_signaling_nan( b ) ) {
5144 float_raise( float_flag_invalid STATUS_VAR);
5145 }
bellard158142c2005-03-13 16:54:06 +00005146 return 0;
5147 }
5148 return
5149 ( a.low == b.low )
5150 && ( ( a.high == b.high )
5151 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005152 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00005153 );
5154
5155}
5156
5157/*----------------------------------------------------------------------------
5158| Returns 1 if the extended double-precision floating-point value `a' is less
5159| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5160| do not cause an exception. Otherwise, the comparison is performed according
5161| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5162*----------------------------------------------------------------------------*/
5163
bellard750afe92006-10-28 19:27:11 +00005164int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005165{
5166 flag aSign, bSign;
5167
5168 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005169 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005170 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005171 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005172 ) {
5173 if ( floatx80_is_signaling_nan( a )
5174 || floatx80_is_signaling_nan( b ) ) {
5175 float_raise( float_flag_invalid STATUS_VAR);
5176 }
5177 return 0;
5178 }
5179 aSign = extractFloatx80Sign( a );
5180 bSign = extractFloatx80Sign( b );
5181 if ( aSign != bSign ) {
5182 return
5183 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005184 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005185 == 0 );
5186 }
5187 return
5188 aSign ? le128( b.high, b.low, a.high, a.low )
5189 : le128( a.high, a.low, b.high, b.low );
5190
5191}
5192
5193/*----------------------------------------------------------------------------
5194| Returns 1 if the extended double-precision floating-point value `a' is less
5195| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5196| an exception. Otherwise, the comparison is performed according to the
5197| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5198*----------------------------------------------------------------------------*/
5199
bellard750afe92006-10-28 19:27:11 +00005200int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005201{
5202 flag aSign, bSign;
5203
5204 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005205 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005206 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005207 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005208 ) {
5209 if ( floatx80_is_signaling_nan( a )
5210 || floatx80_is_signaling_nan( b ) ) {
5211 float_raise( float_flag_invalid STATUS_VAR);
5212 }
5213 return 0;
5214 }
5215 aSign = extractFloatx80Sign( a );
5216 bSign = extractFloatx80Sign( b );
5217 if ( aSign != bSign ) {
5218 return
5219 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005220 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005221 != 0 );
5222 }
5223 return
5224 aSign ? lt128( b.high, b.low, a.high, a.low )
5225 : lt128( a.high, a.low, b.high, b.low );
5226
5227}
5228
Aurelien Jarno67b78612011-04-14 00:49:29 +02005229/*----------------------------------------------------------------------------
5230| Returns 1 if the extended double-precision floating-point values `a' and `b'
5231| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5232| The comparison is performed according to the IEC/IEEE Standard for Binary
5233| Floating-Point Arithmetic.
5234*----------------------------------------------------------------------------*/
5235int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
5236{
5237 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5238 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5239 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5240 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5241 ) {
5242 if ( floatx80_is_signaling_nan( a )
5243 || floatx80_is_signaling_nan( b ) ) {
5244 float_raise( float_flag_invalid STATUS_VAR);
5245 }
5246 return 1;
5247 }
5248 return 0;
5249}
5250
bellard158142c2005-03-13 16:54:06 +00005251/*----------------------------------------------------------------------------
5252| Returns the result of converting the quadruple-precision floating-point
5253| value `a' to the 32-bit two's complement integer format. The conversion
5254| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5255| Arithmetic---which means in particular that the conversion is rounded
5256| according to the current rounding mode. If `a' is a NaN, the largest
5257| positive integer is returned. Otherwise, if the conversion overflows, the
5258| largest integer with the same sign as `a' is returned.
5259*----------------------------------------------------------------------------*/
5260
5261int32 float128_to_int32( float128 a STATUS_PARAM )
5262{
5263 flag aSign;
5264 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005265 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005266
5267 aSig1 = extractFloat128Frac1( a );
5268 aSig0 = extractFloat128Frac0( a );
5269 aExp = extractFloat128Exp( a );
5270 aSign = extractFloat128Sign( a );
5271 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5272 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5273 aSig0 |= ( aSig1 != 0 );
5274 shiftCount = 0x4028 - aExp;
5275 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
5276 return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
5277
5278}
5279
5280/*----------------------------------------------------------------------------
5281| Returns the result of converting the quadruple-precision floating-point
5282| value `a' to the 32-bit two's complement integer format. The conversion
5283| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5284| Arithmetic, except that the conversion is always rounded toward zero. If
5285| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5286| conversion overflows, the largest integer with the same sign as `a' is
5287| returned.
5288*----------------------------------------------------------------------------*/
5289
5290int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
5291{
5292 flag aSign;
5293 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005294 uint64_t aSig0, aSig1, savedASig;
Peter Maydellb3a6a2e2012-04-05 19:12:34 +01005295 int32_t z;
bellard158142c2005-03-13 16:54:06 +00005296
5297 aSig1 = extractFloat128Frac1( a );
5298 aSig0 = extractFloat128Frac0( a );
5299 aExp = extractFloat128Exp( a );
5300 aSign = extractFloat128Sign( a );
5301 aSig0 |= ( aSig1 != 0 );
5302 if ( 0x401E < aExp ) {
5303 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5304 goto invalid;
5305 }
5306 else if ( aExp < 0x3FFF ) {
5307 if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
5308 return 0;
5309 }
5310 aSig0 |= LIT64( 0x0001000000000000 );
5311 shiftCount = 0x402F - aExp;
5312 savedASig = aSig0;
5313 aSig0 >>= shiftCount;
5314 z = aSig0;
5315 if ( aSign ) z = - z;
5316 if ( ( z < 0 ) ^ aSign ) {
5317 invalid:
5318 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01005319 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +00005320 }
5321 if ( ( aSig0<<shiftCount ) != savedASig ) {
5322 STATUS(float_exception_flags) |= float_flag_inexact;
5323 }
5324 return z;
5325
5326}
5327
5328/*----------------------------------------------------------------------------
5329| Returns the result of converting the quadruple-precision floating-point
5330| value `a' to the 64-bit two's complement integer format. The conversion
5331| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5332| Arithmetic---which means in particular that the conversion is rounded
5333| according to the current rounding mode. If `a' is a NaN, the largest
5334| positive integer is returned. Otherwise, if the conversion overflows, the
5335| largest integer with the same sign as `a' is returned.
5336*----------------------------------------------------------------------------*/
5337
5338int64 float128_to_int64( float128 a STATUS_PARAM )
5339{
5340 flag aSign;
5341 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005342 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005343
5344 aSig1 = extractFloat128Frac1( a );
5345 aSig0 = extractFloat128Frac0( a );
5346 aExp = extractFloat128Exp( a );
5347 aSign = extractFloat128Sign( a );
5348 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5349 shiftCount = 0x402F - aExp;
5350 if ( shiftCount <= 0 ) {
5351 if ( 0x403E < aExp ) {
5352 float_raise( float_flag_invalid STATUS_VAR);
5353 if ( ! aSign
5354 || ( ( aExp == 0x7FFF )
5355 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5356 )
5357 ) {
5358 return LIT64( 0x7FFFFFFFFFFFFFFF );
5359 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01005360 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00005361 }
5362 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5363 }
5364 else {
5365 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5366 }
5367 return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
5368
5369}
5370
5371/*----------------------------------------------------------------------------
5372| Returns the result of converting the quadruple-precision floating-point
5373| value `a' to the 64-bit two's complement integer format. The conversion
5374| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5375| Arithmetic, except that the conversion is always rounded toward zero.
5376| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5377| the conversion overflows, the largest integer with the same sign as `a' is
5378| returned.
5379*----------------------------------------------------------------------------*/
5380
5381int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
5382{
5383 flag aSign;
5384 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005385 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005386 int64 z;
5387
5388 aSig1 = extractFloat128Frac1( a );
5389 aSig0 = extractFloat128Frac0( a );
5390 aExp = extractFloat128Exp( a );
5391 aSign = extractFloat128Sign( a );
5392 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5393 shiftCount = aExp - 0x402F;
5394 if ( 0 < shiftCount ) {
5395 if ( 0x403E <= aExp ) {
5396 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5397 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5398 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
5399 if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
5400 }
5401 else {
5402 float_raise( float_flag_invalid STATUS_VAR);
5403 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5404 return LIT64( 0x7FFFFFFFFFFFFFFF );
5405 }
5406 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01005407 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00005408 }
5409 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005410 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
bellard158142c2005-03-13 16:54:06 +00005411 STATUS(float_exception_flags) |= float_flag_inexact;
5412 }
5413 }
5414 else {
5415 if ( aExp < 0x3FFF ) {
5416 if ( aExp | aSig0 | aSig1 ) {
5417 STATUS(float_exception_flags) |= float_flag_inexact;
5418 }
5419 return 0;
5420 }
5421 z = aSig0>>( - shiftCount );
5422 if ( aSig1
Andreas Färberbb98fe42011-03-07 01:34:06 +01005423 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
bellard158142c2005-03-13 16:54:06 +00005424 STATUS(float_exception_flags) |= float_flag_inexact;
5425 }
5426 }
5427 if ( aSign ) z = - z;
5428 return z;
5429
5430}
5431
5432/*----------------------------------------------------------------------------
5433| Returns the result of converting the quadruple-precision floating-point
5434| value `a' to the single-precision floating-point format. The conversion
5435| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5436| Arithmetic.
5437*----------------------------------------------------------------------------*/
5438
5439float32 float128_to_float32( float128 a STATUS_PARAM )
5440{
5441 flag aSign;
5442 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005443 uint64_t aSig0, aSig1;
5444 uint32_t zSig;
bellard158142c2005-03-13 16:54:06 +00005445
5446 aSig1 = extractFloat128Frac1( a );
5447 aSig0 = extractFloat128Frac0( a );
5448 aExp = extractFloat128Exp( a );
5449 aSign = extractFloat128Sign( a );
5450 if ( aExp == 0x7FFF ) {
5451 if ( aSig0 | aSig1 ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00005452 return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005453 }
5454 return packFloat32( aSign, 0xFF, 0 );
5455 }
5456 aSig0 |= ( aSig1 != 0 );
5457 shift64RightJamming( aSig0, 18, &aSig0 );
5458 zSig = aSig0;
5459 if ( aExp || zSig ) {
5460 zSig |= 0x40000000;
5461 aExp -= 0x3F81;
5462 }
5463 return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
5464
5465}
5466
5467/*----------------------------------------------------------------------------
5468| Returns the result of converting the quadruple-precision floating-point
5469| value `a' to the double-precision floating-point format. The conversion
5470| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5471| Arithmetic.
5472*----------------------------------------------------------------------------*/
5473
5474float64 float128_to_float64( float128 a STATUS_PARAM )
5475{
5476 flag aSign;
5477 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005478 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005479
5480 aSig1 = extractFloat128Frac1( a );
5481 aSig0 = extractFloat128Frac0( a );
5482 aExp = extractFloat128Exp( a );
5483 aSign = extractFloat128Sign( a );
5484 if ( aExp == 0x7FFF ) {
5485 if ( aSig0 | aSig1 ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00005486 return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005487 }
5488 return packFloat64( aSign, 0x7FF, 0 );
5489 }
5490 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5491 aSig0 |= ( aSig1 != 0 );
5492 if ( aExp || aSig0 ) {
5493 aSig0 |= LIT64( 0x4000000000000000 );
5494 aExp -= 0x3C01;
5495 }
5496 return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
5497
5498}
5499
bellard158142c2005-03-13 16:54:06 +00005500/*----------------------------------------------------------------------------
5501| Returns the result of converting the quadruple-precision floating-point
5502| value `a' to the extended double-precision floating-point format. The
5503| conversion is performed according to the IEC/IEEE Standard for Binary
5504| Floating-Point Arithmetic.
5505*----------------------------------------------------------------------------*/
5506
5507floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
5508{
5509 flag aSign;
5510 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005511 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005512
5513 aSig1 = extractFloat128Frac1( a );
5514 aSig0 = extractFloat128Frac0( a );
5515 aExp = extractFloat128Exp( a );
5516 aSign = extractFloat128Sign( a );
5517 if ( aExp == 0x7FFF ) {
5518 if ( aSig0 | aSig1 ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00005519 return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005520 }
5521 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5522 }
5523 if ( aExp == 0 ) {
5524 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5525 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5526 }
5527 else {
5528 aSig0 |= LIT64( 0x0001000000000000 );
5529 }
5530 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
5531 return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
5532
5533}
5534
bellard158142c2005-03-13 16:54:06 +00005535/*----------------------------------------------------------------------------
5536| Rounds the quadruple-precision floating-point value `a' to an integer, and
5537| returns the result as a quadruple-precision floating-point value. The
5538| operation is performed according to the IEC/IEEE Standard for Binary
5539| Floating-Point Arithmetic.
5540*----------------------------------------------------------------------------*/
5541
5542float128 float128_round_to_int( float128 a STATUS_PARAM )
5543{
5544 flag aSign;
5545 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005546 uint64_t lastBitMask, roundBitsMask;
bellard158142c2005-03-13 16:54:06 +00005547 int8 roundingMode;
5548 float128 z;
5549
5550 aExp = extractFloat128Exp( a );
5551 if ( 0x402F <= aExp ) {
5552 if ( 0x406F <= aExp ) {
5553 if ( ( aExp == 0x7FFF )
5554 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5555 ) {
5556 return propagateFloat128NaN( a, a STATUS_VAR );
5557 }
5558 return a;
5559 }
5560 lastBitMask = 1;
5561 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5562 roundBitsMask = lastBitMask - 1;
5563 z = a;
5564 roundingMode = STATUS(float_rounding_mode);
5565 if ( roundingMode == float_round_nearest_even ) {
5566 if ( lastBitMask ) {
5567 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5568 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5569 }
5570 else {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005571 if ( (int64_t) z.low < 0 ) {
bellard158142c2005-03-13 16:54:06 +00005572 ++z.high;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005573 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
bellard158142c2005-03-13 16:54:06 +00005574 }
5575 }
5576 }
5577 else if ( roundingMode != float_round_to_zero ) {
5578 if ( extractFloat128Sign( z )
5579 ^ ( roundingMode == float_round_up ) ) {
5580 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
5581 }
5582 }
5583 z.low &= ~ roundBitsMask;
5584 }
5585 else {
5586 if ( aExp < 0x3FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005587 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00005588 STATUS(float_exception_flags) |= float_flag_inexact;
5589 aSign = extractFloat128Sign( a );
5590 switch ( STATUS(float_rounding_mode) ) {
5591 case float_round_nearest_even:
5592 if ( ( aExp == 0x3FFE )
5593 && ( extractFloat128Frac0( a )
5594 | extractFloat128Frac1( a ) )
5595 ) {
5596 return packFloat128( aSign, 0x3FFF, 0, 0 );
5597 }
5598 break;
5599 case float_round_down:
5600 return
5601 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
5602 : packFloat128( 0, 0, 0, 0 );
5603 case float_round_up:
5604 return
5605 aSign ? packFloat128( 1, 0, 0, 0 )
5606 : packFloat128( 0, 0x3FFF, 0, 0 );
5607 }
5608 return packFloat128( aSign, 0, 0, 0 );
5609 }
5610 lastBitMask = 1;
5611 lastBitMask <<= 0x402F - aExp;
5612 roundBitsMask = lastBitMask - 1;
5613 z.low = 0;
5614 z.high = a.high;
5615 roundingMode = STATUS(float_rounding_mode);
5616 if ( roundingMode == float_round_nearest_even ) {
5617 z.high += lastBitMask>>1;
5618 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
5619 z.high &= ~ lastBitMask;
5620 }
5621 }
5622 else if ( roundingMode != float_round_to_zero ) {
5623 if ( extractFloat128Sign( z )
5624 ^ ( roundingMode == float_round_up ) ) {
5625 z.high |= ( a.low != 0 );
5626 z.high += roundBitsMask;
5627 }
5628 }
5629 z.high &= ~ roundBitsMask;
5630 }
5631 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
5632 STATUS(float_exception_flags) |= float_flag_inexact;
5633 }
5634 return z;
5635
5636}
5637
5638/*----------------------------------------------------------------------------
5639| Returns the result of adding the absolute values of the quadruple-precision
5640| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
5641| before being returned. `zSign' is ignored if the result is a NaN.
5642| The addition is performed according to the IEC/IEEE Standard for Binary
5643| Floating-Point Arithmetic.
5644*----------------------------------------------------------------------------*/
5645
5646static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5647{
5648 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005649 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
bellard158142c2005-03-13 16:54:06 +00005650 int32 expDiff;
5651
5652 aSig1 = extractFloat128Frac1( a );
5653 aSig0 = extractFloat128Frac0( a );
5654 aExp = extractFloat128Exp( a );
5655 bSig1 = extractFloat128Frac1( b );
5656 bSig0 = extractFloat128Frac0( b );
5657 bExp = extractFloat128Exp( b );
5658 expDiff = aExp - bExp;
5659 if ( 0 < expDiff ) {
5660 if ( aExp == 0x7FFF ) {
5661 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5662 return a;
5663 }
5664 if ( bExp == 0 ) {
5665 --expDiff;
5666 }
5667 else {
5668 bSig0 |= LIT64( 0x0001000000000000 );
5669 }
5670 shift128ExtraRightJamming(
5671 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
5672 zExp = aExp;
5673 }
5674 else if ( expDiff < 0 ) {
5675 if ( bExp == 0x7FFF ) {
5676 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5677 return packFloat128( zSign, 0x7FFF, 0, 0 );
5678 }
5679 if ( aExp == 0 ) {
5680 ++expDiff;
5681 }
5682 else {
5683 aSig0 |= LIT64( 0x0001000000000000 );
5684 }
5685 shift128ExtraRightJamming(
5686 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
5687 zExp = bExp;
5688 }
5689 else {
5690 if ( aExp == 0x7FFF ) {
5691 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5692 return propagateFloat128NaN( a, b STATUS_VAR );
5693 }
5694 return a;
5695 }
5696 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
pbrookfe76d972008-12-19 14:33:59 +00005697 if ( aExp == 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01005698 if (STATUS(flush_to_zero)) {
5699 if (zSig0 | zSig1) {
5700 float_raise(float_flag_output_denormal STATUS_VAR);
5701 }
5702 return packFloat128(zSign, 0, 0, 0);
5703 }
pbrookfe76d972008-12-19 14:33:59 +00005704 return packFloat128( zSign, 0, zSig0, zSig1 );
5705 }
bellard158142c2005-03-13 16:54:06 +00005706 zSig2 = 0;
5707 zSig0 |= LIT64( 0x0002000000000000 );
5708 zExp = aExp;
5709 goto shiftRight1;
5710 }
5711 aSig0 |= LIT64( 0x0001000000000000 );
5712 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5713 --zExp;
5714 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
5715 ++zExp;
5716 shiftRight1:
5717 shift128ExtraRightJamming(
5718 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5719 roundAndPack:
5720 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5721
5722}
5723
5724/*----------------------------------------------------------------------------
5725| Returns the result of subtracting the absolute values of the quadruple-
5726| precision floating-point values `a' and `b'. If `zSign' is 1, the
5727| difference is negated before being returned. `zSign' is ignored if the
5728| result is a NaN. The subtraction is performed according to the IEC/IEEE
5729| Standard for Binary Floating-Point Arithmetic.
5730*----------------------------------------------------------------------------*/
5731
5732static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5733{
5734 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005735 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00005736 int32 expDiff;
5737 float128 z;
5738
5739 aSig1 = extractFloat128Frac1( a );
5740 aSig0 = extractFloat128Frac0( a );
5741 aExp = extractFloat128Exp( a );
5742 bSig1 = extractFloat128Frac1( b );
5743 bSig0 = extractFloat128Frac0( b );
5744 bExp = extractFloat128Exp( b );
5745 expDiff = aExp - bExp;
5746 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5747 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
5748 if ( 0 < expDiff ) goto aExpBigger;
5749 if ( expDiff < 0 ) goto bExpBigger;
5750 if ( aExp == 0x7FFF ) {
5751 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5752 return propagateFloat128NaN( a, b STATUS_VAR );
5753 }
5754 float_raise( float_flag_invalid STATUS_VAR);
5755 z.low = float128_default_nan_low;
5756 z.high = float128_default_nan_high;
5757 return z;
5758 }
5759 if ( aExp == 0 ) {
5760 aExp = 1;
5761 bExp = 1;
5762 }
5763 if ( bSig0 < aSig0 ) goto aBigger;
5764 if ( aSig0 < bSig0 ) goto bBigger;
5765 if ( bSig1 < aSig1 ) goto aBigger;
5766 if ( aSig1 < bSig1 ) goto bBigger;
5767 return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
5768 bExpBigger:
5769 if ( bExp == 0x7FFF ) {
5770 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5771 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
5772 }
5773 if ( aExp == 0 ) {
5774 ++expDiff;
5775 }
5776 else {
5777 aSig0 |= LIT64( 0x4000000000000000 );
5778 }
5779 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5780 bSig0 |= LIT64( 0x4000000000000000 );
5781 bBigger:
5782 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
5783 zExp = bExp;
5784 zSign ^= 1;
5785 goto normalizeRoundAndPack;
5786 aExpBigger:
5787 if ( aExp == 0x7FFF ) {
5788 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5789 return a;
5790 }
5791 if ( bExp == 0 ) {
5792 --expDiff;
5793 }
5794 else {
5795 bSig0 |= LIT64( 0x4000000000000000 );
5796 }
5797 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
5798 aSig0 |= LIT64( 0x4000000000000000 );
5799 aBigger:
5800 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5801 zExp = aExp;
5802 normalizeRoundAndPack:
5803 --zExp;
5804 return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
5805
5806}
5807
5808/*----------------------------------------------------------------------------
5809| Returns the result of adding the quadruple-precision floating-point values
5810| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
5811| for Binary Floating-Point Arithmetic.
5812*----------------------------------------------------------------------------*/
5813
5814float128 float128_add( float128 a, float128 b STATUS_PARAM )
5815{
5816 flag aSign, bSign;
5817
5818 aSign = extractFloat128Sign( a );
5819 bSign = extractFloat128Sign( b );
5820 if ( aSign == bSign ) {
5821 return addFloat128Sigs( a, b, aSign STATUS_VAR );
5822 }
5823 else {
5824 return subFloat128Sigs( a, b, aSign STATUS_VAR );
5825 }
5826
5827}
5828
5829/*----------------------------------------------------------------------------
5830| Returns the result of subtracting the quadruple-precision floating-point
5831| values `a' and `b'. The operation is performed according to the IEC/IEEE
5832| Standard for Binary Floating-Point Arithmetic.
5833*----------------------------------------------------------------------------*/
5834
5835float128 float128_sub( float128 a, float128 b STATUS_PARAM )
5836{
5837 flag aSign, bSign;
5838
5839 aSign = extractFloat128Sign( a );
5840 bSign = extractFloat128Sign( b );
5841 if ( aSign == bSign ) {
5842 return subFloat128Sigs( a, b, aSign STATUS_VAR );
5843 }
5844 else {
5845 return addFloat128Sigs( a, b, aSign STATUS_VAR );
5846 }
5847
5848}
5849
5850/*----------------------------------------------------------------------------
5851| Returns the result of multiplying the quadruple-precision floating-point
5852| values `a' and `b'. The operation is performed according to the IEC/IEEE
5853| Standard for Binary Floating-Point Arithmetic.
5854*----------------------------------------------------------------------------*/
5855
5856float128 float128_mul( float128 a, float128 b STATUS_PARAM )
5857{
5858 flag aSign, bSign, zSign;
5859 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005860 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
bellard158142c2005-03-13 16:54:06 +00005861 float128 z;
5862
5863 aSig1 = extractFloat128Frac1( a );
5864 aSig0 = extractFloat128Frac0( a );
5865 aExp = extractFloat128Exp( a );
5866 aSign = extractFloat128Sign( a );
5867 bSig1 = extractFloat128Frac1( b );
5868 bSig0 = extractFloat128Frac0( b );
5869 bExp = extractFloat128Exp( b );
5870 bSign = extractFloat128Sign( b );
5871 zSign = aSign ^ bSign;
5872 if ( aExp == 0x7FFF ) {
5873 if ( ( aSig0 | aSig1 )
5874 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5875 return propagateFloat128NaN( a, b STATUS_VAR );
5876 }
5877 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
5878 return packFloat128( zSign, 0x7FFF, 0, 0 );
5879 }
5880 if ( bExp == 0x7FFF ) {
5881 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5882 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5883 invalid:
5884 float_raise( float_flag_invalid STATUS_VAR);
5885 z.low = float128_default_nan_low;
5886 z.high = float128_default_nan_high;
5887 return z;
5888 }
5889 return packFloat128( zSign, 0x7FFF, 0, 0 );
5890 }
5891 if ( aExp == 0 ) {
5892 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5893 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5894 }
5895 if ( bExp == 0 ) {
5896 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5897 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5898 }
5899 zExp = aExp + bExp - 0x4000;
5900 aSig0 |= LIT64( 0x0001000000000000 );
5901 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5902 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5903 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5904 zSig2 |= ( zSig3 != 0 );
5905 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5906 shift128ExtraRightJamming(
5907 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5908 ++zExp;
5909 }
5910 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5911
5912}
5913
5914/*----------------------------------------------------------------------------
5915| Returns the result of dividing the quadruple-precision floating-point value
5916| `a' by the corresponding value `b'. The operation is performed according to
5917| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5918*----------------------------------------------------------------------------*/
5919
5920float128 float128_div( float128 a, float128 b STATUS_PARAM )
5921{
5922 flag aSign, bSign, zSign;
5923 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005924 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5925 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard158142c2005-03-13 16:54:06 +00005926 float128 z;
5927
5928 aSig1 = extractFloat128Frac1( a );
5929 aSig0 = extractFloat128Frac0( a );
5930 aExp = extractFloat128Exp( a );
5931 aSign = extractFloat128Sign( a );
5932 bSig1 = extractFloat128Frac1( b );
5933 bSig0 = extractFloat128Frac0( b );
5934 bExp = extractFloat128Exp( b );
5935 bSign = extractFloat128Sign( b );
5936 zSign = aSign ^ bSign;
5937 if ( aExp == 0x7FFF ) {
5938 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5939 if ( bExp == 0x7FFF ) {
5940 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5941 goto invalid;
5942 }
5943 return packFloat128( zSign, 0x7FFF, 0, 0 );
5944 }
5945 if ( bExp == 0x7FFF ) {
5946 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5947 return packFloat128( zSign, 0, 0, 0 );
5948 }
5949 if ( bExp == 0 ) {
5950 if ( ( bSig0 | bSig1 ) == 0 ) {
5951 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5952 invalid:
5953 float_raise( float_flag_invalid STATUS_VAR);
5954 z.low = float128_default_nan_low;
5955 z.high = float128_default_nan_high;
5956 return z;
5957 }
5958 float_raise( float_flag_divbyzero STATUS_VAR);
5959 return packFloat128( zSign, 0x7FFF, 0, 0 );
5960 }
5961 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5962 }
5963 if ( aExp == 0 ) {
5964 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5965 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5966 }
5967 zExp = aExp - bExp + 0x3FFD;
5968 shortShift128Left(
5969 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5970 shortShift128Left(
5971 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5972 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5973 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5974 ++zExp;
5975 }
5976 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5977 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5978 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005979 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00005980 --zSig0;
5981 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5982 }
5983 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5984 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5985 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5986 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005987 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00005988 --zSig1;
5989 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5990 }
5991 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5992 }
5993 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5994 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5995
5996}
5997
5998/*----------------------------------------------------------------------------
5999| Returns the remainder of the quadruple-precision floating-point value `a'
6000| with respect to the corresponding value `b'. The operation is performed
6001| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6002*----------------------------------------------------------------------------*/
6003
6004float128 float128_rem( float128 a, float128 b STATUS_PARAM )
6005{
Blue Swirled086f32010-03-07 13:49:58 +00006006 flag aSign, zSign;
bellard158142c2005-03-13 16:54:06 +00006007 int32 aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006008 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6009 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6010 int64_t sigMean0;
bellard158142c2005-03-13 16:54:06 +00006011 float128 z;
6012
6013 aSig1 = extractFloat128Frac1( a );
6014 aSig0 = extractFloat128Frac0( a );
6015 aExp = extractFloat128Exp( a );
6016 aSign = extractFloat128Sign( a );
6017 bSig1 = extractFloat128Frac1( b );
6018 bSig0 = extractFloat128Frac0( b );
6019 bExp = extractFloat128Exp( b );
bellard158142c2005-03-13 16:54:06 +00006020 if ( aExp == 0x7FFF ) {
6021 if ( ( aSig0 | aSig1 )
6022 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6023 return propagateFloat128NaN( a, b STATUS_VAR );
6024 }
6025 goto invalid;
6026 }
6027 if ( bExp == 0x7FFF ) {
6028 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6029 return a;
6030 }
6031 if ( bExp == 0 ) {
6032 if ( ( bSig0 | bSig1 ) == 0 ) {
6033 invalid:
6034 float_raise( float_flag_invalid STATUS_VAR);
6035 z.low = float128_default_nan_low;
6036 z.high = float128_default_nan_high;
6037 return z;
6038 }
6039 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6040 }
6041 if ( aExp == 0 ) {
6042 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6043 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6044 }
6045 expDiff = aExp - bExp;
6046 if ( expDiff < -1 ) return a;
6047 shortShift128Left(
6048 aSig0 | LIT64( 0x0001000000000000 ),
6049 aSig1,
6050 15 - ( expDiff < 0 ),
6051 &aSig0,
6052 &aSig1
6053 );
6054 shortShift128Left(
6055 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6056 q = le128( bSig0, bSig1, aSig0, aSig1 );
6057 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6058 expDiff -= 64;
6059 while ( 0 < expDiff ) {
6060 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6061 q = ( 4 < q ) ? q - 4 : 0;
6062 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6063 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6064 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6065 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6066 expDiff -= 61;
6067 }
6068 if ( -64 < expDiff ) {
6069 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6070 q = ( 4 < q ) ? q - 4 : 0;
6071 q >>= - expDiff;
6072 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6073 expDiff += 52;
6074 if ( expDiff < 0 ) {
6075 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6076 }
6077 else {
6078 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6079 }
6080 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6081 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6082 }
6083 else {
6084 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6085 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6086 }
6087 do {
6088 alternateASig0 = aSig0;
6089 alternateASig1 = aSig1;
6090 ++q;
6091 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01006092 } while ( 0 <= (int64_t) aSig0 );
bellard158142c2005-03-13 16:54:06 +00006093 add128(
Andreas Färberbb98fe42011-03-07 01:34:06 +01006094 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
bellard158142c2005-03-13 16:54:06 +00006095 if ( ( sigMean0 < 0 )
6096 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6097 aSig0 = alternateASig0;
6098 aSig1 = alternateASig1;
6099 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01006100 zSign = ( (int64_t) aSig0 < 0 );
bellard158142c2005-03-13 16:54:06 +00006101 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6102 return
6103 normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
6104
6105}
6106
6107/*----------------------------------------------------------------------------
6108| Returns the square root of the quadruple-precision floating-point value `a'.
6109| The operation is performed according to the IEC/IEEE Standard for Binary
6110| Floating-Point Arithmetic.
6111*----------------------------------------------------------------------------*/
6112
6113float128 float128_sqrt( float128 a STATUS_PARAM )
6114{
6115 flag aSign;
6116 int32 aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006117 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6118 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard158142c2005-03-13 16:54:06 +00006119 float128 z;
6120
6121 aSig1 = extractFloat128Frac1( a );
6122 aSig0 = extractFloat128Frac0( a );
6123 aExp = extractFloat128Exp( a );
6124 aSign = extractFloat128Sign( a );
6125 if ( aExp == 0x7FFF ) {
6126 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
6127 if ( ! aSign ) return a;
6128 goto invalid;
6129 }
6130 if ( aSign ) {
6131 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6132 invalid:
6133 float_raise( float_flag_invalid STATUS_VAR);
6134 z.low = float128_default_nan_low;
6135 z.high = float128_default_nan_high;
6136 return z;
6137 }
6138 if ( aExp == 0 ) {
6139 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6140 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6141 }
6142 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6143 aSig0 |= LIT64( 0x0001000000000000 );
6144 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6145 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6146 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6147 doubleZSig0 = zSig0<<1;
6148 mul64To128( zSig0, zSig0, &term0, &term1 );
6149 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01006150 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00006151 --zSig0;
6152 doubleZSig0 -= 2;
6153 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6154 }
6155 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6156 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6157 if ( zSig1 == 0 ) zSig1 = 1;
6158 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6159 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6160 mul64To128( zSig1, zSig1, &term2, &term3 );
6161 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01006162 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00006163 --zSig1;
6164 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6165 term3 |= 1;
6166 term2 |= doubleZSig0;
6167 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6168 }
6169 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6170 }
6171 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6172 return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
6173
6174}
6175
6176/*----------------------------------------------------------------------------
6177| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarnob6893622011-04-14 00:49:29 +02006178| the corresponding value `b', and 0 otherwise. The invalid exception is
6179| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard158142c2005-03-13 16:54:06 +00006180| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6181*----------------------------------------------------------------------------*/
6182
Aurelien Jarnob6893622011-04-14 00:49:29 +02006183int float128_eq( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006184{
6185
6186 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6187 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6188 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6189 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6190 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02006191 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00006192 return 0;
6193 }
6194 return
6195 ( a.low == b.low )
6196 && ( ( a.high == b.high )
6197 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01006198 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00006199 );
6200
6201}
6202
6203/*----------------------------------------------------------------------------
6204| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006205| or equal to the corresponding value `b', and 0 otherwise. The invalid
6206| exception is raised if either operand is a NaN. The comparison is performed
6207| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00006208*----------------------------------------------------------------------------*/
6209
bellard750afe92006-10-28 19:27:11 +00006210int float128_le( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006211{
6212 flag aSign, bSign;
6213
6214 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6215 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6216 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6217 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6218 ) {
6219 float_raise( float_flag_invalid STATUS_VAR);
6220 return 0;
6221 }
6222 aSign = extractFloat128Sign( a );
6223 bSign = extractFloat128Sign( b );
6224 if ( aSign != bSign ) {
6225 return
6226 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006227 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006228 == 0 );
6229 }
6230 return
6231 aSign ? le128( b.high, b.low, a.high, a.low )
6232 : le128( a.high, a.low, b.high, b.low );
6233
6234}
6235
6236/*----------------------------------------------------------------------------
6237| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006238| the corresponding value `b', and 0 otherwise. The invalid exception is
6239| raised if either operand is a NaN. The comparison is performed according
6240| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00006241*----------------------------------------------------------------------------*/
6242
bellard750afe92006-10-28 19:27:11 +00006243int float128_lt( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006244{
6245 flag aSign, bSign;
6246
6247 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6248 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6249 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6250 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6251 ) {
6252 float_raise( float_flag_invalid STATUS_VAR);
6253 return 0;
6254 }
6255 aSign = extractFloat128Sign( a );
6256 bSign = extractFloat128Sign( b );
6257 if ( aSign != bSign ) {
6258 return
6259 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006260 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006261 != 0 );
6262 }
6263 return
6264 aSign ? lt128( b.high, b.low, a.high, a.low )
6265 : lt128( a.high, a.low, b.high, b.low );
6266
6267}
6268
6269/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02006270| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006271| be compared, and 0 otherwise. The invalid exception is raised if either
6272| operand is a NaN. The comparison is performed according to the IEC/IEEE
6273| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02006274*----------------------------------------------------------------------------*/
6275
6276int float128_unordered( float128 a, float128 b STATUS_PARAM )
6277{
6278 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6279 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6280 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6281 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6282 ) {
6283 float_raise( float_flag_invalid STATUS_VAR);
6284 return 1;
6285 }
6286 return 0;
6287}
6288
6289/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00006290| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006291| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6292| exception. The comparison is performed according to the IEC/IEEE Standard
6293| for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00006294*----------------------------------------------------------------------------*/
6295
Aurelien Jarnob6893622011-04-14 00:49:29 +02006296int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006297{
6298
6299 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6300 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6301 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6302 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6303 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02006304 if ( float128_is_signaling_nan( a )
6305 || float128_is_signaling_nan( b ) ) {
6306 float_raise( float_flag_invalid STATUS_VAR);
6307 }
bellard158142c2005-03-13 16:54:06 +00006308 return 0;
6309 }
6310 return
6311 ( a.low == b.low )
6312 && ( ( a.high == b.high )
6313 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01006314 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00006315 );
6316
6317}
6318
6319/*----------------------------------------------------------------------------
6320| Returns 1 if the quadruple-precision floating-point value `a' is less than
6321| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6322| cause an exception. Otherwise, the comparison is performed according to the
6323| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6324*----------------------------------------------------------------------------*/
6325
bellard750afe92006-10-28 19:27:11 +00006326int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006327{
6328 flag aSign, bSign;
6329
6330 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6331 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6332 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6333 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6334 ) {
6335 if ( float128_is_signaling_nan( a )
6336 || float128_is_signaling_nan( b ) ) {
6337 float_raise( float_flag_invalid STATUS_VAR);
6338 }
6339 return 0;
6340 }
6341 aSign = extractFloat128Sign( a );
6342 bSign = extractFloat128Sign( b );
6343 if ( aSign != bSign ) {
6344 return
6345 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006346 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006347 == 0 );
6348 }
6349 return
6350 aSign ? le128( b.high, b.low, a.high, a.low )
6351 : le128( a.high, a.low, b.high, b.low );
6352
6353}
6354
6355/*----------------------------------------------------------------------------
6356| Returns 1 if the quadruple-precision floating-point value `a' is less than
6357| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6358| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6359| Standard for Binary Floating-Point Arithmetic.
6360*----------------------------------------------------------------------------*/
6361
bellard750afe92006-10-28 19:27:11 +00006362int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006363{
6364 flag aSign, bSign;
6365
6366 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6367 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6368 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6369 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6370 ) {
6371 if ( float128_is_signaling_nan( a )
6372 || float128_is_signaling_nan( b ) ) {
6373 float_raise( float_flag_invalid STATUS_VAR);
6374 }
6375 return 0;
6376 }
6377 aSign = extractFloat128Sign( a );
6378 bSign = extractFloat128Sign( b );
6379 if ( aSign != bSign ) {
6380 return
6381 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006382 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006383 != 0 );
6384 }
6385 return
6386 aSign ? lt128( b.high, b.low, a.high, a.low )
6387 : lt128( a.high, a.low, b.high, b.low );
6388
6389}
6390
Aurelien Jarno67b78612011-04-14 00:49:29 +02006391/*----------------------------------------------------------------------------
6392| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6393| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
6394| comparison is performed according to the IEC/IEEE Standard for Binary
6395| Floating-Point Arithmetic.
6396*----------------------------------------------------------------------------*/
6397
6398int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
6399{
6400 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6401 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6402 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6403 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6404 ) {
6405 if ( float128_is_signaling_nan( a )
6406 || float128_is_signaling_nan( b ) ) {
6407 float_raise( float_flag_invalid STATUS_VAR);
6408 }
6409 return 1;
6410 }
6411 return 0;
6412}
6413
bellard1d6bda32005-03-13 18:52:29 +00006414/* misc functions */
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006415float32 uint32_to_float32( uint32 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006416{
6417 return int64_to_float32(a STATUS_VAR);
6418}
6419
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006420float64 uint32_to_float64( uint32 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006421{
6422 return int64_to_float64(a STATUS_VAR);
6423}
6424
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006425uint32 float32_to_uint32( float32 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006426{
6427 int64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006428 uint32 res;
bellard1d6bda32005-03-13 18:52:29 +00006429
6430 v = float32_to_int64(a STATUS_VAR);
6431 if (v < 0) {
6432 res = 0;
6433 float_raise( float_flag_invalid STATUS_VAR);
6434 } else if (v > 0xffffffff) {
6435 res = 0xffffffff;
6436 float_raise( float_flag_invalid STATUS_VAR);
6437 } else {
6438 res = v;
6439 }
6440 return res;
6441}
6442
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006443uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006444{
6445 int64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006446 uint32 res;
bellard1d6bda32005-03-13 18:52:29 +00006447
6448 v = float32_to_int64_round_to_zero(a STATUS_VAR);
6449 if (v < 0) {
6450 res = 0;
6451 float_raise( float_flag_invalid STATUS_VAR);
6452 } else if (v > 0xffffffff) {
6453 res = 0xffffffff;
6454 float_raise( float_flag_invalid STATUS_VAR);
6455 } else {
6456 res = v;
6457 }
6458 return res;
6459}
6460
Andreas Färber5aea4c52012-04-26 00:15:55 +02006461uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM)
Peter Maydellcbcef452010-12-07 15:37:34 +00006462{
6463 int64_t v;
Andreas Färber5aea4c52012-04-26 00:15:55 +02006464 uint_fast16_t res;
Peter Maydellcbcef452010-12-07 15:37:34 +00006465
6466 v = float32_to_int64_round_to_zero(a STATUS_VAR);
6467 if (v < 0) {
6468 res = 0;
6469 float_raise( float_flag_invalid STATUS_VAR);
6470 } else if (v > 0xffff) {
6471 res = 0xffff;
6472 float_raise( float_flag_invalid STATUS_VAR);
6473 } else {
6474 res = v;
6475 }
6476 return res;
6477}
6478
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006479uint32 float64_to_uint32( float64 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006480{
6481 int64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006482 uint32 res;
bellard1d6bda32005-03-13 18:52:29 +00006483
6484 v = float64_to_int64(a STATUS_VAR);
6485 if (v < 0) {
6486 res = 0;
6487 float_raise( float_flag_invalid STATUS_VAR);
6488 } else if (v > 0xffffffff) {
6489 res = 0xffffffff;
6490 float_raise( float_flag_invalid STATUS_VAR);
6491 } else {
6492 res = v;
6493 }
6494 return res;
6495}
6496
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006497uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006498{
6499 int64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006500 uint32 res;
bellard1d6bda32005-03-13 18:52:29 +00006501
6502 v = float64_to_int64_round_to_zero(a STATUS_VAR);
6503 if (v < 0) {
6504 res = 0;
6505 float_raise( float_flag_invalid STATUS_VAR);
6506 } else if (v > 0xffffffff) {
6507 res = 0xffffffff;
6508 float_raise( float_flag_invalid STATUS_VAR);
6509 } else {
6510 res = v;
6511 }
6512 return res;
6513}
6514
Andreas Färber5aea4c52012-04-26 00:15:55 +02006515uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM)
Peter Maydellcbcef452010-12-07 15:37:34 +00006516{
6517 int64_t v;
Andreas Färber5aea4c52012-04-26 00:15:55 +02006518 uint_fast16_t res;
Peter Maydellcbcef452010-12-07 15:37:34 +00006519
6520 v = float64_to_int64_round_to_zero(a STATUS_VAR);
6521 if (v < 0) {
6522 res = 0;
6523 float_raise( float_flag_invalid STATUS_VAR);
6524 } else if (v > 0xffff) {
6525 res = 0xffff;
6526 float_raise( float_flag_invalid STATUS_VAR);
6527 } else {
6528 res = v;
6529 }
6530 return res;
6531}
6532
pbrookf090c9d2007-11-18 14:33:24 +00006533/* FIXME: This looks broken. */
j_mayer75d62a52007-03-20 22:10:42 +00006534uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
6535{
6536 int64_t v;
6537
pbrookf090c9d2007-11-18 14:33:24 +00006538 v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6539 v += float64_val(a);
6540 v = float64_to_int64(make_float64(v) STATUS_VAR);
j_mayer75d62a52007-03-20 22:10:42 +00006541
6542 return v - INT64_MIN;
6543}
6544
6545uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
6546{
6547 int64_t v;
6548
pbrookf090c9d2007-11-18 14:33:24 +00006549 v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6550 v += float64_val(a);
6551 v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
j_mayer75d62a52007-03-20 22:10:42 +00006552
6553 return v - INT64_MIN;
6554}
6555
bellard1d6bda32005-03-13 18:52:29 +00006556#define COMPARE(s, nan_exp) \
bellard750afe92006-10-28 19:27:11 +00006557INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \
bellard1d6bda32005-03-13 18:52:29 +00006558 int is_quiet STATUS_PARAM ) \
6559{ \
6560 flag aSign, bSign; \
Andreas Färberbb98fe42011-03-07 01:34:06 +01006561 uint ## s ## _t av, bv; \
Peter Maydell37d18662011-01-06 19:37:53 +00006562 a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
6563 b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
bellard1d6bda32005-03-13 18:52:29 +00006564 \
6565 if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
6566 extractFloat ## s ## Frac( a ) ) || \
6567 ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
6568 extractFloat ## s ## Frac( b ) )) { \
6569 if (!is_quiet || \
6570 float ## s ## _is_signaling_nan( a ) || \
6571 float ## s ## _is_signaling_nan( b ) ) { \
6572 float_raise( float_flag_invalid STATUS_VAR); \
6573 } \
6574 return float_relation_unordered; \
6575 } \
6576 aSign = extractFloat ## s ## Sign( a ); \
6577 bSign = extractFloat ## s ## Sign( b ); \
pbrookf090c9d2007-11-18 14:33:24 +00006578 av = float ## s ## _val(a); \
blueswir1cd8a2532007-11-21 18:57:44 +00006579 bv = float ## s ## _val(b); \
bellard1d6bda32005-03-13 18:52:29 +00006580 if ( aSign != bSign ) { \
Andreas Färberbb98fe42011-03-07 01:34:06 +01006581 if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) { \
bellard1d6bda32005-03-13 18:52:29 +00006582 /* zero case */ \
6583 return float_relation_equal; \
6584 } else { \
6585 return 1 - (2 * aSign); \
6586 } \
6587 } else { \
pbrookf090c9d2007-11-18 14:33:24 +00006588 if (av == bv) { \
bellard1d6bda32005-03-13 18:52:29 +00006589 return float_relation_equal; \
6590 } else { \
pbrookf090c9d2007-11-18 14:33:24 +00006591 return 1 - 2 * (aSign ^ ( av < bv )); \
bellard1d6bda32005-03-13 18:52:29 +00006592 } \
6593 } \
6594} \
6595 \
bellard750afe92006-10-28 19:27:11 +00006596int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
bellard1d6bda32005-03-13 18:52:29 +00006597{ \
6598 return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
6599} \
6600 \
bellard750afe92006-10-28 19:27:11 +00006601int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
bellard1d6bda32005-03-13 18:52:29 +00006602{ \
6603 return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
6604}
6605
6606COMPARE(32, 0xff)
6607COMPARE(64, 0x7ff)
pbrook9ee6e8b2007-11-11 00:04:49 +00006608
Aurelien Jarnof6714d32011-04-20 13:04:22 +02006609INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
6610 int is_quiet STATUS_PARAM )
6611{
6612 flag aSign, bSign;
6613
6614 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6615 ( extractFloatx80Frac( a )<<1 ) ) ||
6616 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6617 ( extractFloatx80Frac( b )<<1 ) )) {
6618 if (!is_quiet ||
6619 floatx80_is_signaling_nan( a ) ||
6620 floatx80_is_signaling_nan( b ) ) {
6621 float_raise( float_flag_invalid STATUS_VAR);
6622 }
6623 return float_relation_unordered;
6624 }
6625 aSign = extractFloatx80Sign( a );
6626 bSign = extractFloatx80Sign( b );
6627 if ( aSign != bSign ) {
6628
6629 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6630 ( ( a.low | b.low ) == 0 ) ) {
6631 /* zero case */
6632 return float_relation_equal;
6633 } else {
6634 return 1 - (2 * aSign);
6635 }
6636 } else {
6637 if (a.low == b.low && a.high == b.high) {
6638 return float_relation_equal;
6639 } else {
6640 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6641 }
6642 }
6643}
6644
6645int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
6646{
6647 return floatx80_compare_internal(a, b, 0 STATUS_VAR);
6648}
6649
6650int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
6651{
6652 return floatx80_compare_internal(a, b, 1 STATUS_VAR);
6653}
6654
blueswir11f587322007-11-25 18:40:20 +00006655INLINE int float128_compare_internal( float128 a, float128 b,
6656 int is_quiet STATUS_PARAM )
6657{
6658 flag aSign, bSign;
6659
6660 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6661 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6662 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6663 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6664 if (!is_quiet ||
6665 float128_is_signaling_nan( a ) ||
6666 float128_is_signaling_nan( b ) ) {
6667 float_raise( float_flag_invalid STATUS_VAR);
6668 }
6669 return float_relation_unordered;
6670 }
6671 aSign = extractFloat128Sign( a );
6672 bSign = extractFloat128Sign( b );
6673 if ( aSign != bSign ) {
6674 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6675 /* zero case */
6676 return float_relation_equal;
6677 } else {
6678 return 1 - (2 * aSign);
6679 }
6680 } else {
6681 if (a.low == b.low && a.high == b.high) {
6682 return float_relation_equal;
6683 } else {
6684 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6685 }
6686 }
6687}
6688
6689int float128_compare( float128 a, float128 b STATUS_PARAM )
6690{
6691 return float128_compare_internal(a, b, 0 STATUS_VAR);
6692}
6693
6694int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
6695{
6696 return float128_compare_internal(a, b, 1 STATUS_VAR);
6697}
6698
Peter Maydell274f1b02011-03-11 08:12:25 +00006699/* min() and max() functions. These can't be implemented as
6700 * 'compare and pick one input' because that would mishandle
6701 * NaNs and +0 vs -0.
6702 */
6703#define MINMAX(s, nan_exp) \
6704INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \
6705 int ismin STATUS_PARAM ) \
6706{ \
6707 flag aSign, bSign; \
6708 uint ## s ## _t av, bv; \
6709 a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
6710 b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
6711 if (float ## s ## _is_any_nan(a) || \
6712 float ## s ## _is_any_nan(b)) { \
6713 return propagateFloat ## s ## NaN(a, b STATUS_VAR); \
6714 } \
6715 aSign = extractFloat ## s ## Sign(a); \
6716 bSign = extractFloat ## s ## Sign(b); \
6717 av = float ## s ## _val(a); \
6718 bv = float ## s ## _val(b); \
6719 if (aSign != bSign) { \
6720 if (ismin) { \
6721 return aSign ? a : b; \
6722 } else { \
6723 return aSign ? b : a; \
6724 } \
6725 } else { \
6726 if (ismin) { \
6727 return (aSign ^ (av < bv)) ? a : b; \
6728 } else { \
6729 return (aSign ^ (av < bv)) ? b : a; \
6730 } \
6731 } \
6732} \
6733 \
6734float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM) \
6735{ \
6736 return float ## s ## _minmax(a, b, 1 STATUS_VAR); \
6737} \
6738 \
6739float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \
6740{ \
6741 return float ## s ## _minmax(a, b, 0 STATUS_VAR); \
6742}
6743
6744MINMAX(32, 0xff)
6745MINMAX(64, 0x7ff)
6746
6747
pbrook9ee6e8b2007-11-11 00:04:49 +00006748/* Multiply A by 2 raised to the power N. */
6749float32 float32_scalbn( float32 a, int n STATUS_PARAM )
6750{
6751 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006752 int16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006753 uint32_t aSig;
pbrook9ee6e8b2007-11-11 00:04:49 +00006754
Peter Maydell37d18662011-01-06 19:37:53 +00006755 a = float32_squash_input_denormal(a STATUS_VAR);
pbrook9ee6e8b2007-11-11 00:04:49 +00006756 aSig = extractFloat32Frac( a );
6757 aExp = extractFloat32Exp( a );
6758 aSign = extractFloat32Sign( a );
6759
6760 if ( aExp == 0xFF ) {
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006761 if ( aSig ) {
6762 return propagateFloat32NaN( a, a STATUS_VAR );
6763 }
pbrook9ee6e8b2007-11-11 00:04:49 +00006764 return a;
6765 }
pbrook69397542008-12-19 12:59:28 +00006766 if ( aExp != 0 )
6767 aSig |= 0x00800000;
6768 else if ( aSig == 0 )
6769 return a;
6770
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006771 if (n > 0x200) {
6772 n = 0x200;
6773 } else if (n < -0x200) {
6774 n = -0x200;
6775 }
6776
pbrook69397542008-12-19 12:59:28 +00006777 aExp += n - 1;
6778 aSig <<= 7;
6779 return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00006780}
6781
6782float64 float64_scalbn( float64 a, int n STATUS_PARAM )
6783{
6784 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006785 int16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006786 uint64_t aSig;
pbrook9ee6e8b2007-11-11 00:04:49 +00006787
Peter Maydell37d18662011-01-06 19:37:53 +00006788 a = float64_squash_input_denormal(a STATUS_VAR);
pbrook9ee6e8b2007-11-11 00:04:49 +00006789 aSig = extractFloat64Frac( a );
6790 aExp = extractFloat64Exp( a );
6791 aSign = extractFloat64Sign( a );
6792
6793 if ( aExp == 0x7FF ) {
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006794 if ( aSig ) {
6795 return propagateFloat64NaN( a, a STATUS_VAR );
6796 }
pbrook9ee6e8b2007-11-11 00:04:49 +00006797 return a;
6798 }
pbrook69397542008-12-19 12:59:28 +00006799 if ( aExp != 0 )
6800 aSig |= LIT64( 0x0010000000000000 );
6801 else if ( aSig == 0 )
6802 return a;
6803
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006804 if (n > 0x1000) {
6805 n = 0x1000;
6806 } else if (n < -0x1000) {
6807 n = -0x1000;
6808 }
6809
pbrook69397542008-12-19 12:59:28 +00006810 aExp += n - 1;
6811 aSig <<= 10;
6812 return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00006813}
6814
pbrook9ee6e8b2007-11-11 00:04:49 +00006815floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
6816{
6817 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006818 int32_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006819 uint64_t aSig;
pbrook9ee6e8b2007-11-11 00:04:49 +00006820
6821 aSig = extractFloatx80Frac( a );
6822 aExp = extractFloatx80Exp( a );
6823 aSign = extractFloatx80Sign( a );
6824
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006825 if ( aExp == 0x7FFF ) {
6826 if ( aSig<<1 ) {
6827 return propagateFloatx80NaN( a, a STATUS_VAR );
6828 }
pbrook9ee6e8b2007-11-11 00:04:49 +00006829 return a;
6830 }
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006831
pbrook69397542008-12-19 12:59:28 +00006832 if (aExp == 0 && aSig == 0)
6833 return a;
6834
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006835 if (n > 0x10000) {
6836 n = 0x10000;
6837 } else if (n < -0x10000) {
6838 n = -0x10000;
6839 }
6840
pbrook9ee6e8b2007-11-11 00:04:49 +00006841 aExp += n;
pbrook69397542008-12-19 12:59:28 +00006842 return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
6843 aSign, aExp, aSig, 0 STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00006844}
pbrook9ee6e8b2007-11-11 00:04:49 +00006845
pbrook9ee6e8b2007-11-11 00:04:49 +00006846float128 float128_scalbn( float128 a, int n STATUS_PARAM )
6847{
6848 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006849 int32_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006850 uint64_t aSig0, aSig1;
pbrook9ee6e8b2007-11-11 00:04:49 +00006851
6852 aSig1 = extractFloat128Frac1( a );
6853 aSig0 = extractFloat128Frac0( a );
6854 aExp = extractFloat128Exp( a );
6855 aSign = extractFloat128Sign( a );
6856 if ( aExp == 0x7FFF ) {
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006857 if ( aSig0 | aSig1 ) {
6858 return propagateFloat128NaN( a, a STATUS_VAR );
6859 }
pbrook9ee6e8b2007-11-11 00:04:49 +00006860 return a;
6861 }
pbrook69397542008-12-19 12:59:28 +00006862 if ( aExp != 0 )
6863 aSig0 |= LIT64( 0x0001000000000000 );
6864 else if ( aSig0 == 0 && aSig1 == 0 )
6865 return a;
6866
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006867 if (n > 0x10000) {
6868 n = 0x10000;
6869 } else if (n < -0x10000) {
6870 n = -0x10000;
6871 }
6872
pbrook69397542008-12-19 12:59:28 +00006873 aExp += n - 1;
6874 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6875 STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00006876
6877}