Blame - fpu/softfloat.c - people/peter.maydell/qemu-arm.git

blob: 83ccc4b8cd31254d449a28e29d88375a8d928aee [file] [log] [blame]

Andreas Färber	8d725fa	2011-03-07 01:34:04 +0100	[diff] [blame]	1	/*
				2	* QEMU float support
				3	*
				4	* Derived from SoftFloat.
				5	*/
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6
				7	/*============================================================================
				8
				9	This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
				10	Package, Release 2b.
				11
				12	Written by John R. Hauser. This work was made possible in part by the
				13	International Computer Science Institute, located at Suite 600, 1947 Center
				14	Street, Berkeley, California 94704. Funding was partially provided by the
				15	National Science Foundation under grant MIP-9311980. The original version
				16	of this code was written as part of a project to build a fixed-point vector
				17	processor in collaboration with the University of California at Berkeley,
				18	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				19	is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
				20	arithmetic/SoftFloat.html'.
				21
				22	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
				23	been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
				24	RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
				25	AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
				26	COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
				27	EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
				28	INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
				29	OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
				30
				31	Derivative works are acceptable, even for commercial purposes, so long as
				32	(1) the source code for the derivative work includes prominent notice that
				33	the work is derivative, and (2) the source code includes prominent notice with
				34	these four paragraphs for those parts of this code that are retained.
				35
				36	=============================================================================*/
				37
Peter Maydell	2ac8bd0	2011-09-26 16:56:55 +0100	[diff] [blame]	38	/* softfloat (and in particular the code in softfloat-specialize.h) is
				39	* target-dependent and needs the TARGET_* macros.
				40	*/
				41	#include "config.h"
				42
Paolo Bonzini	6b4c305	2012-10-24 13:12:00 +0200	[diff] [blame]	43	#include "fpu/softfloat.h"
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	44
				45	/*----------------------------------------------------------------------------
				46	\| Primitive arithmetic functions, including multi-word arithmetic, and
				47	\| division and square root approximations. (Can be specialized to target if
				48	\| desired.)
				49	----------------------------------------------------------------------------/
				50	#include "softfloat-macros.h"
				51
				52	/*----------------------------------------------------------------------------
				53	\| Functions and definitions to determine: (1) whether tininess for underflow
				54	\| is detected before or after rounding by default, (2) what (if anything)
				55	\| happens when exceptions are raised, (3) how signaling NaNs are distinguished
				56	\| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
				57	\| are propagated from function inputs to output. These details are target-
				58	\| specific.
				59	----------------------------------------------------------------------------/
				60	#include "softfloat-specialize.h"
				61
				62	void set_float_rounding_mode(int val STATUS_PARAM)
				63	{
				64	STATUS(float_rounding_mode) = val;
				65	}
				66
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	67	void set_float_exception_flags(int val STATUS_PARAM)
				68	{
				69	STATUS(float_exception_flags) = val;
				70	}
				71
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	72	void set_floatx80_rounding_precision(int val STATUS_PARAM)
				73	{
				74	STATUS(floatx80_rounding_precision) = val;
				75	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	76
				77	/*----------------------------------------------------------------------------
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	78	\| Returns the fraction bits of the half-precision floating-point value `a'.
				79	----------------------------------------------------------------------------/
				80
				81	INLINE uint32_t extractFloat16Frac(float16 a)
				82	{
				83	return float16_val(a) & 0x3ff;
				84	}
				85
				86	/*----------------------------------------------------------------------------
				87	\| Returns the exponent bits of the half-precision floating-point value `a'.
				88	----------------------------------------------------------------------------/
				89
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	90	INLINE int_fast16_t extractFloat16Exp(float16 a)
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	91	{
				92	return (float16_val(a) >> 10) & 0x1f;
				93	}
				94
				95	/*----------------------------------------------------------------------------
				96	\| Returns the sign bit of the single-precision floating-point value `a'.
				97	----------------------------------------------------------------------------/
				98
				99	INLINE flag extractFloat16Sign(float16 a)
				100	{
				101	return float16_val(a)>>15;
				102	}
				103
				104	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	105	\| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
				106	\| and 7, and returns the properly rounded 32-bit integer corresponding to the
				107	\| input. If `zSign' is 1, the input is negated before being converted to an
				108	\| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
				109	\| is simply rounded to an integer, with the inexact exception raised if the
				110	\| input cannot be represented exactly as an integer. However, if the fixed-
				111	\| point input is too large, the invalid exception is raised and the largest
				112	\| positive or negative integer is returned.
				113	----------------------------------------------------------------------------/
				114
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	115	static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	116	{
				117	int8 roundingMode;
				118	flag roundNearestEven;
				119	int8 roundIncrement, roundBits;
Peter Maydell	760e141	2012-04-05 19:12:35 +0100	[diff] [blame]	120	int32_t z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	121
				122	roundingMode = STATUS(float_rounding_mode);
				123	roundNearestEven = ( roundingMode == float_round_nearest_even );
				124	roundIncrement = 0x40;
				125	if ( ! roundNearestEven ) {
				126	if ( roundingMode == float_round_to_zero ) {
				127	roundIncrement = 0;
				128	}
				129	else {
				130	roundIncrement = 0x7F;
				131	if ( zSign ) {
				132	if ( roundingMode == float_round_up ) roundIncrement = 0;
				133	}
				134	else {
				135	if ( roundingMode == float_round_down ) roundIncrement = 0;
				136	}
				137	}
				138	}
				139	roundBits = absZ & 0x7F;
				140	absZ = ( absZ + roundIncrement )>>7;
				141	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				142	z = absZ;
				143	if ( zSign ) z = - z;
				144	if ( ( absZ>>32 ) \|\| ( z && ( ( z < 0 ) ^ zSign ) ) ) {
				145	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	146	return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	147	}
				148	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				149	return z;
				150
				151	}
				152
				153	/*----------------------------------------------------------------------------
				154	\| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
				155	\| `absZ1', with binary point between bits 63 and 64 (between the input words),
				156	\| and returns the properly rounded 64-bit integer corresponding to the input.
				157	\| If `zSign' is 1, the input is negated before being converted to an integer.
				158	\| Ordinarily, the fixed-point input is simply rounded to an integer, with
				159	\| the inexact exception raised if the input cannot be represented exactly as
				160	\| an integer. However, if the fixed-point input is too large, the invalid
				161	\| exception is raised and the largest positive or negative integer is
				162	\| returned.
				163	----------------------------------------------------------------------------/
				164
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	165	static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	166	{
				167	int8 roundingMode;
				168	flag roundNearestEven, increment;
Peter Maydell	760e141	2012-04-05 19:12:35 +0100	[diff] [blame]	169	int64_t z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	170
				171	roundingMode = STATUS(float_rounding_mode);
				172	roundNearestEven = ( roundingMode == float_round_nearest_even );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	173	increment = ( (int64_t) absZ1 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	174	if ( ! roundNearestEven ) {
				175	if ( roundingMode == float_round_to_zero ) {
				176	increment = 0;
				177	}
				178	else {
				179	if ( zSign ) {
				180	increment = ( roundingMode == float_round_down ) && absZ1;
				181	}
				182	else {
				183	increment = ( roundingMode == float_round_up ) && absZ1;
				184	}
				185	}
				186	}
				187	if ( increment ) {
				188	++absZ0;
				189	if ( absZ0 == 0 ) goto overflow;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	190	absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	191	}
				192	z = absZ0;
				193	if ( zSign ) z = - z;
				194	if ( z && ( ( z < 0 ) ^ zSign ) ) {
				195	overflow:
				196	float_raise( float_flag_invalid STATUS_VAR);
				197	return
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	198	zSign ? (int64_t) LIT64( 0x8000000000000000 )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	199	: LIT64( 0x7FFFFFFFFFFFFFFF );
				200	}
				201	if ( absZ1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				202	return z;
				203
				204	}
				205
				206	/*----------------------------------------------------------------------------
				207	\| Returns the fraction bits of the single-precision floating-point value `a'.
				208	----------------------------------------------------------------------------/
				209
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	210	INLINE uint32_t extractFloat32Frac( float32 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	211	{
				212
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	213	return float32_val(a) & 0x007FFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	214
				215	}
				216
				217	/*----------------------------------------------------------------------------
				218	\| Returns the exponent bits of the single-precision floating-point value `a'.
				219	----------------------------------------------------------------------------/
				220
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	221	INLINE int_fast16_t extractFloat32Exp(float32 a)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	222	{
				223
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	224	return ( float32_val(a)>>23 ) & 0xFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	225
				226	}
				227
				228	/*----------------------------------------------------------------------------
				229	\| Returns the sign bit of the single-precision floating-point value `a'.
				230	----------------------------------------------------------------------------/
				231
				232	INLINE flag extractFloat32Sign( float32 a )
				233	{
				234
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	235	return float32_val(a)>>31;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	236
				237	}
				238
				239	/*----------------------------------------------------------------------------
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	240	\| If `a' is denormal and we are in flush-to-zero mode then set the
				241	\| input-denormal exception and return zero. Otherwise just return the value.
				242	----------------------------------------------------------------------------/
				243	static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
				244	{
				245	if (STATUS(flush_inputs_to_zero)) {
				246	if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
				247	float_raise(float_flag_input_denormal STATUS_VAR);
				248	return make_float32(float32_val(a) & 0x80000000);
				249	}
				250	}
				251	return a;
				252	}
				253
				254	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	255	\| Normalizes the subnormal single-precision floating-point value represented
				256	\| by the denormalized significand `aSig'. The normalized exponent and
				257	\| significand are stored at the locations pointed to by `zExpPtr' and
				258	\| `zSigPtr', respectively.
				259	----------------------------------------------------------------------------/
				260
				261	static void
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	262	normalizeFloat32Subnormal(uint32_t aSig, int_fast16_t zExpPtr, uint32_t zSigPtr)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	263	{
				264	int8 shiftCount;
				265
				266	shiftCount = countLeadingZeros32( aSig ) - 8;
				267	*zSigPtr = aSig<<shiftCount;
				268	*zExpPtr = 1 - shiftCount;
				269
				270	}
				271
				272	/*----------------------------------------------------------------------------
				273	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				274	\| single-precision floating-point value, returning the result. After being
				275	\| shifted into the proper positions, the three fields are simply added
				276	\| together to form the result. This means that any integer portion of `zSig'
				277	\| will be added into the exponent. Since a properly normalized significand
				278	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				279	\| than the desired result exponent whenever `zSig' is a complete, normalized
				280	\| significand.
				281	----------------------------------------------------------------------------/
				282
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	283	INLINE float32 packFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	284	{
				285
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	286	return make_float32(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	287	( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	288
				289	}
				290
				291	/*----------------------------------------------------------------------------
				292	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				293	\| and significand `zSig', and returns the proper single-precision floating-
				294	\| point value corresponding to the abstract input. Ordinarily, the abstract
				295	\| value is simply rounded and packed into the single-precision format, with
				296	\| the inexact exception raised if the abstract input cannot be represented
				297	\| exactly. However, if the abstract value is too large, the overflow and
				298	\| inexact exceptions are raised and an infinity or maximal finite value is
				299	\| returned. If the abstract value is too small, the input value is rounded to
				300	\| a subnormal number, and the underflow and inexact exceptions are raised if
				301	\| the abstract input cannot be represented exactly as a subnormal single-
				302	\| precision floating-point number.
				303	\| The input significand `zSig' has its binary point between bits 30
				304	\| and 29, which is 7 bits to the left of the usual location. This shifted
				305	\| significand must be normalized or smaller. If `zSig' is not normalized,
				306	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				307	\| and it must not require rounding. In the usual case that `zSig' is
				308	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				309	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				310	\| Binary Floating-Point Arithmetic.
				311	----------------------------------------------------------------------------/
				312
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	313	static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	314	{
				315	int8 roundingMode;
				316	flag roundNearestEven;
				317	int8 roundIncrement, roundBits;
				318	flag isTiny;
				319
				320	roundingMode = STATUS(float_rounding_mode);
				321	roundNearestEven = ( roundingMode == float_round_nearest_even );
				322	roundIncrement = 0x40;
				323	if ( ! roundNearestEven ) {
				324	if ( roundingMode == float_round_to_zero ) {
				325	roundIncrement = 0;
				326	}
				327	else {
				328	roundIncrement = 0x7F;
				329	if ( zSign ) {
				330	if ( roundingMode == float_round_up ) roundIncrement = 0;
				331	}
				332	else {
				333	if ( roundingMode == float_round_down ) roundIncrement = 0;
				334	}
				335	}
				336	}
				337	roundBits = zSig & 0x7F;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	338	if ( 0xFD <= (uint16_t) zExp ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	339	if ( ( 0xFD < zExp )
				340	\|\| ( ( zExp == 0xFD )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	341	&& ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	342	) {
				343	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	344	return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	345	}
				346	if ( zExp < 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	347	if (STATUS(flush_to_zero)) {
				348	float_raise(float_flag_output_denormal STATUS_VAR);
				349	return packFloat32(zSign, 0, 0);
				350	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	351	isTiny =
				352	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				353	\|\| ( zExp < -1 )
				354	\|\| ( zSig + roundIncrement < 0x80000000 );
				355	shift32RightJamming( zSig, - zExp, &zSig );
				356	zExp = 0;
				357	roundBits = zSig & 0x7F;
				358	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				359	}
				360	}
				361	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				362	zSig = ( zSig + roundIncrement )>>7;
				363	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				364	if ( zSig == 0 ) zExp = 0;
				365	return packFloat32( zSign, zExp, zSig );
				366
				367	}
				368
				369	/*----------------------------------------------------------------------------
				370	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				371	\| and significand `zSig', and returns the proper single-precision floating-
				372	\| point value corresponding to the abstract input. This routine is just like
				373	\| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
				374	\| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				375	\| floating-point exponent.
				376	----------------------------------------------------------------------------/
				377
				378	static float32
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	379	normalizeRoundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	380	{
				381	int8 shiftCount;
				382
				383	shiftCount = countLeadingZeros32( zSig ) - 1;
				384	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				385
				386	}
				387
				388	/*----------------------------------------------------------------------------
				389	\| Returns the fraction bits of the double-precision floating-point value `a'.
				390	----------------------------------------------------------------------------/
				391
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	392	INLINE uint64_t extractFloat64Frac( float64 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	393	{
				394
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	395	return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	396
				397	}
				398
				399	/*----------------------------------------------------------------------------
				400	\| Returns the exponent bits of the double-precision floating-point value `a'.
				401	----------------------------------------------------------------------------/
				402
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	403	INLINE int_fast16_t extractFloat64Exp(float64 a)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	404	{
				405
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	406	return ( float64_val(a)>>52 ) & 0x7FF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	407
				408	}
				409
				410	/*----------------------------------------------------------------------------
				411	\| Returns the sign bit of the double-precision floating-point value `a'.
				412	----------------------------------------------------------------------------/
				413
				414	INLINE flag extractFloat64Sign( float64 a )
				415	{
				416
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	417	return float64_val(a)>>63;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	418
				419	}
				420
				421	/*----------------------------------------------------------------------------
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	422	\| If `a' is denormal and we are in flush-to-zero mode then set the
				423	\| input-denormal exception and return zero. Otherwise just return the value.
				424	----------------------------------------------------------------------------/
				425	static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
				426	{
				427	if (STATUS(flush_inputs_to_zero)) {
				428	if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
				429	float_raise(float_flag_input_denormal STATUS_VAR);
				430	return make_float64(float64_val(a) & (1ULL << 63));
				431	}
				432	}
				433	return a;
				434	}
				435
				436	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	437	\| Normalizes the subnormal double-precision floating-point value represented
				438	\| by the denormalized significand `aSig'. The normalized exponent and
				439	\| significand are stored at the locations pointed to by `zExpPtr' and
				440	\| `zSigPtr', respectively.
				441	----------------------------------------------------------------------------/
				442
				443	static void
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	444	normalizeFloat64Subnormal(uint64_t aSig, int_fast16_t zExpPtr, uint64_t zSigPtr)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	445	{
				446	int8 shiftCount;
				447
				448	shiftCount = countLeadingZeros64( aSig ) - 11;
				449	*zSigPtr = aSig<<shiftCount;
				450	*zExpPtr = 1 - shiftCount;
				451
				452	}
				453
				454	/*----------------------------------------------------------------------------
				455	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				456	\| double-precision floating-point value, returning the result. After being
				457	\| shifted into the proper positions, the three fields are simply added
				458	\| together to form the result. This means that any integer portion of `zSig'
				459	\| will be added into the exponent. Since a properly normalized significand
				460	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				461	\| than the desired result exponent whenever `zSig' is a complete, normalized
				462	\| significand.
				463	----------------------------------------------------------------------------/
				464
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	465	INLINE float64 packFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	466	{
				467
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	468	return make_float64(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	469	( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	470
				471	}
				472
				473	/*----------------------------------------------------------------------------
				474	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				475	\| and significand `zSig', and returns the proper double-precision floating-
				476	\| point value corresponding to the abstract input. Ordinarily, the abstract
				477	\| value is simply rounded and packed into the double-precision format, with
				478	\| the inexact exception raised if the abstract input cannot be represented
				479	\| exactly. However, if the abstract value is too large, the overflow and
				480	\| inexact exceptions are raised and an infinity or maximal finite value is
				481	\| returned. If the abstract value is too small, the input value is rounded
				482	\| to a subnormal number, and the underflow and inexact exceptions are raised
				483	\| if the abstract input cannot be represented exactly as a subnormal double-
				484	\| precision floating-point number.
				485	\| The input significand `zSig' has its binary point between bits 62
				486	\| and 61, which is 10 bits to the left of the usual location. This shifted
				487	\| significand must be normalized or smaller. If `zSig' is not normalized,
				488	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				489	\| and it must not require rounding. In the usual case that `zSig' is
				490	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				491	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				492	\| Binary Floating-Point Arithmetic.
				493	----------------------------------------------------------------------------/
				494
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	495	static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	496	{
				497	int8 roundingMode;
				498	flag roundNearestEven;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	499	int_fast16_t roundIncrement, roundBits;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	500	flag isTiny;
				501
				502	roundingMode = STATUS(float_rounding_mode);
				503	roundNearestEven = ( roundingMode == float_round_nearest_even );
				504	roundIncrement = 0x200;
				505	if ( ! roundNearestEven ) {
				506	if ( roundingMode == float_round_to_zero ) {
				507	roundIncrement = 0;
				508	}
				509	else {
				510	roundIncrement = 0x3FF;
				511	if ( zSign ) {
				512	if ( roundingMode == float_round_up ) roundIncrement = 0;
				513	}
				514	else {
				515	if ( roundingMode == float_round_down ) roundIncrement = 0;
				516	}
				517	}
				518	}
				519	roundBits = zSig & 0x3FF;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	520	if ( 0x7FD <= (uint16_t) zExp ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	521	if ( ( 0x7FD < zExp )
				522	\|\| ( ( zExp == 0x7FD )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	523	&& ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	524	) {
				525	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	526	return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	527	}
				528	if ( zExp < 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	529	if (STATUS(flush_to_zero)) {
				530	float_raise(float_flag_output_denormal STATUS_VAR);
				531	return packFloat64(zSign, 0, 0);
				532	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	533	isTiny =
				534	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				535	\|\| ( zExp < -1 )
				536	\|\| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
				537	shift64RightJamming( zSig, - zExp, &zSig );
				538	zExp = 0;
				539	roundBits = zSig & 0x3FF;
				540	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				541	}
				542	}
				543	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				544	zSig = ( zSig + roundIncrement )>>10;
				545	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
				546	if ( zSig == 0 ) zExp = 0;
				547	return packFloat64( zSign, zExp, zSig );
				548
				549	}
				550
				551	/*----------------------------------------------------------------------------
				552	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				553	\| and significand `zSig', and returns the proper double-precision floating-
				554	\| point value corresponding to the abstract input. This routine is just like
				555	\| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
				556	\| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				557	\| floating-point exponent.
				558	----------------------------------------------------------------------------/
				559
				560	static float64
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	561	normalizeRoundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	562	{
				563	int8 shiftCount;
				564
				565	shiftCount = countLeadingZeros64( zSig ) - 1;
				566	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				567
				568	}
				569
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	570	/*----------------------------------------------------------------------------
				571	\| Returns the fraction bits of the extended double-precision floating-point
				572	\| value `a'.
				573	----------------------------------------------------------------------------/
				574
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	575	INLINE uint64_t extractFloatx80Frac( floatx80 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	576	{
				577
				578	return a.low;
				579
				580	}
				581
				582	/*----------------------------------------------------------------------------
				583	\| Returns the exponent bits of the extended double-precision floating-point
				584	\| value `a'.
				585	----------------------------------------------------------------------------/
				586
				587	INLINE int32 extractFloatx80Exp( floatx80 a )
				588	{
				589
				590	return a.high & 0x7FFF;
				591
				592	}
				593
				594	/*----------------------------------------------------------------------------
				595	\| Returns the sign bit of the extended double-precision floating-point value
				596	\| `a'.
				597	----------------------------------------------------------------------------/
				598
				599	INLINE flag extractFloatx80Sign( floatx80 a )
				600	{
				601
				602	return a.high>>15;
				603
				604	}
				605
				606	/*----------------------------------------------------------------------------
				607	\| Normalizes the subnormal extended double-precision floating-point value
				608	\| represented by the denormalized significand `aSig'. The normalized exponent
				609	\| and significand are stored at the locations pointed to by `zExpPtr' and
				610	\| `zSigPtr', respectively.
				611	----------------------------------------------------------------------------/
				612
				613	static void
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	614	normalizeFloatx80Subnormal( uint64_t aSig, int32 zExpPtr, uint64_t zSigPtr )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	615	{
				616	int8 shiftCount;
				617
				618	shiftCount = countLeadingZeros64( aSig );
				619	*zSigPtr = aSig<<shiftCount;
				620	*zExpPtr = 1 - shiftCount;
				621
				622	}
				623
				624	/*----------------------------------------------------------------------------
				625	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
				626	\| extended double-precision floating-point value, returning the result.
				627	----------------------------------------------------------------------------/
				628
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	629	INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	630	{
				631	floatx80 z;
				632
				633	z.low = zSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	634	z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	635	return z;
				636
				637	}
				638
				639	/*----------------------------------------------------------------------------
				640	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				641	\| and extended significand formed by the concatenation of `zSig0' and `zSig1',
				642	\| and returns the proper extended double-precision floating-point value
				643	\| corresponding to the abstract input. Ordinarily, the abstract value is
				644	\| rounded and packed into the extended double-precision format, with the
				645	\| inexact exception raised if the abstract input cannot be represented
				646	\| exactly. However, if the abstract value is too large, the overflow and
				647	\| inexact exceptions are raised and an infinity or maximal finite value is
				648	\| returned. If the abstract value is too small, the input value is rounded to
				649	\| a subnormal number, and the underflow and inexact exceptions are raised if
				650	\| the abstract input cannot be represented exactly as a subnormal extended
				651	\| double-precision floating-point number.
				652	\| If `roundingPrecision' is 32 or 64, the result is rounded to the same
				653	\| number of bits as single or double precision, respectively. Otherwise, the
				654	\| result is rounded to the full precision of the extended double-precision
				655	\| format.
				656	\| The input significand must be normalized or smaller. If the input
				657	\| significand is not normalized, `zExp' must be 0; in that case, the result
				658	\| returned is a subnormal number, and it must not require rounding. The
				659	\| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
				660	\| Floating-Point Arithmetic.
				661	----------------------------------------------------------------------------/
				662
				663	static floatx80
				664	roundAndPackFloatx80(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	665	int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	666	STATUS_PARAM)
				667	{
				668	int8 roundingMode;
				669	flag roundNearestEven, increment, isTiny;
				670	int64 roundIncrement, roundMask, roundBits;
				671
				672	roundingMode = STATUS(float_rounding_mode);
				673	roundNearestEven = ( roundingMode == float_round_nearest_even );
				674	if ( roundingPrecision == 80 ) goto precision80;
				675	if ( roundingPrecision == 64 ) {
				676	roundIncrement = LIT64( 0x0000000000000400 );
				677	roundMask = LIT64( 0x00000000000007FF );
				678	}
				679	else if ( roundingPrecision == 32 ) {
				680	roundIncrement = LIT64( 0x0000008000000000 );
				681	roundMask = LIT64( 0x000000FFFFFFFFFF );
				682	}
				683	else {
				684	goto precision80;
				685	}
				686	zSig0 \|= ( zSig1 != 0 );
				687	if ( ! roundNearestEven ) {
				688	if ( roundingMode == float_round_to_zero ) {
				689	roundIncrement = 0;
				690	}
				691	else {
				692	roundIncrement = roundMask;
				693	if ( zSign ) {
				694	if ( roundingMode == float_round_up ) roundIncrement = 0;
				695	}
				696	else {
				697	if ( roundingMode == float_round_down ) roundIncrement = 0;
				698	}
				699	}
				700	}
				701	roundBits = zSig0 & roundMask;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	702	if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	703	if ( ( 0x7FFE < zExp )
				704	\|\| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
				705	) {
				706	goto overflow;
				707	}
				708	if ( zExp <= 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	709	if (STATUS(flush_to_zero)) {
				710	float_raise(float_flag_output_denormal STATUS_VAR);
				711	return packFloatx80(zSign, 0, 0);
				712	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	713	isTiny =
				714	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				715	\|\| ( zExp < 0 )
				716	\|\| ( zSig0 <= zSig0 + roundIncrement );
				717	shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
				718	zExp = 0;
				719	roundBits = zSig0 & roundMask;
				720	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				721	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				722	zSig0 += roundIncrement;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	723	if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	724	roundIncrement = roundMask + 1;
				725	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				726	roundMask \|= roundIncrement;
				727	}
				728	zSig0 &= ~ roundMask;
				729	return packFloatx80( zSign, zExp, zSig0 );
				730	}
				731	}
				732	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				733	zSig0 += roundIncrement;
				734	if ( zSig0 < roundIncrement ) {
				735	++zExp;
				736	zSig0 = LIT64( 0x8000000000000000 );
				737	}
				738	roundIncrement = roundMask + 1;
				739	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				740	roundMask \|= roundIncrement;
				741	}
				742	zSig0 &= ~ roundMask;
				743	if ( zSig0 == 0 ) zExp = 0;
				744	return packFloatx80( zSign, zExp, zSig0 );
				745	precision80:
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	746	increment = ( (int64_t) zSig1 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	747	if ( ! roundNearestEven ) {
				748	if ( roundingMode == float_round_to_zero ) {
				749	increment = 0;
				750	}
				751	else {
				752	if ( zSign ) {
				753	increment = ( roundingMode == float_round_down ) && zSig1;
				754	}
				755	else {
				756	increment = ( roundingMode == float_round_up ) && zSig1;
				757	}
				758	}
				759	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	760	if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	761	if ( ( 0x7FFE < zExp )
				762	\|\| ( ( zExp == 0x7FFE )
				763	&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
				764	&& increment
				765	)
				766	) {
				767	roundMask = 0;
				768	overflow:
				769	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				770	if ( ( roundingMode == float_round_to_zero )
				771	\|\| ( zSign && ( roundingMode == float_round_up ) )
				772	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				773	) {
				774	return packFloatx80( zSign, 0x7FFE, ~ roundMask );
				775	}
				776	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				777	}
				778	if ( zExp <= 0 ) {
				779	isTiny =
				780	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				781	\|\| ( zExp < 0 )
				782	\|\| ! increment
				783	\|\| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
				784	shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
				785	zExp = 0;
				786	if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
				787	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				788	if ( roundNearestEven ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	789	increment = ( (int64_t) zSig1 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	790	}
				791	else {
				792	if ( zSign ) {
				793	increment = ( roundingMode == float_round_down ) && zSig1;
				794	}
				795	else {
				796	increment = ( roundingMode == float_round_up ) && zSig1;
				797	}
				798	}
				799	if ( increment ) {
				800	++zSig0;
				801	zSig0 &=
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	802	~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
				803	if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	804	}
				805	return packFloatx80( zSign, zExp, zSig0 );
				806	}
				807	}
				808	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				809	if ( increment ) {
				810	++zSig0;
				811	if ( zSig0 == 0 ) {
				812	++zExp;
				813	zSig0 = LIT64( 0x8000000000000000 );
				814	}
				815	else {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	816	zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	817	}
				818	}
				819	else {
				820	if ( zSig0 == 0 ) zExp = 0;
				821	}
				822	return packFloatx80( zSign, zExp, zSig0 );
				823
				824	}
				825
				826	/*----------------------------------------------------------------------------
				827	\| Takes an abstract floating-point value having sign `zSign', exponent
				828	\| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
				829	\| and returns the proper extended double-precision floating-point value
				830	\| corresponding to the abstract input. This routine is just like
				831	\| `roundAndPackFloatx80' except that the input significand does not have to be
				832	\| normalized.
				833	----------------------------------------------------------------------------/
				834
				835	static floatx80
				836	normalizeRoundAndPackFloatx80(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	837	int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	838	STATUS_PARAM)
				839	{
				840	int8 shiftCount;
				841
				842	if ( zSig0 == 0 ) {
				843	zSig0 = zSig1;
				844	zSig1 = 0;
				845	zExp -= 64;
				846	}
				847	shiftCount = countLeadingZeros64( zSig0 );
				848	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				849	zExp -= shiftCount;
				850	return
				851	roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
				852
				853	}
				854
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	855	/*----------------------------------------------------------------------------
				856	\| Returns the least-significant 64 fraction bits of the quadruple-precision
				857	\| floating-point value `a'.
				858	----------------------------------------------------------------------------/
				859
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	860	INLINE uint64_t extractFloat128Frac1( float128 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	861	{
				862
				863	return a.low;
				864
				865	}
				866
				867	/*----------------------------------------------------------------------------
				868	\| Returns the most-significant 48 fraction bits of the quadruple-precision
				869	\| floating-point value `a'.
				870	----------------------------------------------------------------------------/
				871
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	872	INLINE uint64_t extractFloat128Frac0( float128 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	873	{
				874
				875	return a.high & LIT64( 0x0000FFFFFFFFFFFF );
				876
				877	}
				878
				879	/*----------------------------------------------------------------------------
				880	\| Returns the exponent bits of the quadruple-precision floating-point value
				881	\| `a'.
				882	----------------------------------------------------------------------------/
				883
				884	INLINE int32 extractFloat128Exp( float128 a )
				885	{
				886
				887	return ( a.high>>48 ) & 0x7FFF;
				888
				889	}
				890
				891	/*----------------------------------------------------------------------------
				892	\| Returns the sign bit of the quadruple-precision floating-point value `a'.
				893	----------------------------------------------------------------------------/
				894
				895	INLINE flag extractFloat128Sign( float128 a )
				896	{
				897
				898	return a.high>>63;
				899
				900	}
				901
				902	/*----------------------------------------------------------------------------
				903	\| Normalizes the subnormal quadruple-precision floating-point value
				904	\| represented by the denormalized significand formed by the concatenation of
				905	\| `aSig0' and `aSig1'. The normalized exponent is stored at the location
				906	\| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
				907	\| significand are stored at the location pointed to by `zSig0Ptr', and the
				908	\| least significant 64 bits of the normalized significand are stored at the
				909	\| location pointed to by `zSig1Ptr'.
				910	----------------------------------------------------------------------------/
				911
				912	static void
				913	normalizeFloat128Subnormal(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	914	uint64_t aSig0,
				915	uint64_t aSig1,
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	916	int32 *zExpPtr,
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	917	uint64_t *zSig0Ptr,
				918	uint64_t *zSig1Ptr
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	919	)
				920	{
				921	int8 shiftCount;
				922
				923	if ( aSig0 == 0 ) {
				924	shiftCount = countLeadingZeros64( aSig1 ) - 15;
				925	if ( shiftCount < 0 ) {
				926	*zSig0Ptr = aSig1>>( - shiftCount );
				927	*zSig1Ptr = aSig1<<( shiftCount & 63 );
				928	}
				929	else {
				930	*zSig0Ptr = aSig1<<shiftCount;
				931	*zSig1Ptr = 0;
				932	}
				933	*zExpPtr = - shiftCount - 63;
				934	}
				935	else {
				936	shiftCount = countLeadingZeros64( aSig0 ) - 15;
				937	shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
				938	*zExpPtr = 1 - shiftCount;
				939	}
				940
				941	}
				942
				943	/*----------------------------------------------------------------------------
				944	\| Packs the sign `zSign', the exponent `zExp', and the significand formed
				945	\| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
				946	\| floating-point value, returning the result. After being shifted into the
				947	\| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
				948	\| added together to form the most significant 32 bits of the result. This
				949	\| means that any integer portion of `zSig0' will be added into the exponent.
				950	\| Since a properly normalized significand will have an integer portion equal
				951	\| to 1, the `zExp' input should be 1 less than the desired result exponent
				952	\| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
				953	\| significand.
				954	----------------------------------------------------------------------------/
				955
				956	INLINE float128
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	957	packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	958	{
				959	float128 z;
				960
				961	z.low = zSig1;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	962	z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	963	return z;
				964
				965	}
				966
				967	/*----------------------------------------------------------------------------
				968	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				969	\| and extended significand formed by the concatenation of `zSig0', `zSig1',
				970	\| and `zSig2', and returns the proper quadruple-precision floating-point value
				971	\| corresponding to the abstract input. Ordinarily, the abstract value is
				972	\| simply rounded and packed into the quadruple-precision format, with the
				973	\| inexact exception raised if the abstract input cannot be represented
				974	\| exactly. However, if the abstract value is too large, the overflow and
				975	\| inexact exceptions are raised and an infinity or maximal finite value is
				976	\| returned. If the abstract value is too small, the input value is rounded to
				977	\| a subnormal number, and the underflow and inexact exceptions are raised if
				978	\| the abstract input cannot be represented exactly as a subnormal quadruple-
				979	\| precision floating-point number.
				980	\| The input significand must be normalized or smaller. If the input
				981	\| significand is not normalized, `zExp' must be 0; in that case, the result
				982	\| returned is a subnormal number, and it must not require rounding. In the
				983	\| usual case that the input significand is normalized, `zExp' must be 1 less
				984	\| than the ``true'' floating-point exponent. The handling of underflow and
				985	\| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				986	----------------------------------------------------------------------------/
				987
				988	static float128
				989	roundAndPackFloat128(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	990	flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	991	{
				992	int8 roundingMode;
				993	flag roundNearestEven, increment, isTiny;
				994
				995	roundingMode = STATUS(float_rounding_mode);
				996	roundNearestEven = ( roundingMode == float_round_nearest_even );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	997	increment = ( (int64_t) zSig2 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	998	if ( ! roundNearestEven ) {
				999	if ( roundingMode == float_round_to_zero ) {
				1000	increment = 0;
				1001	}
				1002	else {
				1003	if ( zSign ) {
				1004	increment = ( roundingMode == float_round_down ) && zSig2;
				1005	}
				1006	else {
				1007	increment = ( roundingMode == float_round_up ) && zSig2;
				1008	}
				1009	}
				1010	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1011	if ( 0x7FFD <= (uint32_t) zExp ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1012	if ( ( 0x7FFD < zExp )
				1013	\|\| ( ( zExp == 0x7FFD )
				1014	&& eq128(
				1015	LIT64( 0x0001FFFFFFFFFFFF ),
				1016	LIT64( 0xFFFFFFFFFFFFFFFF ),
				1017	zSig0,
				1018	zSig1
				1019	)
				1020	&& increment
				1021	)
				1022	) {
				1023	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				1024	if ( ( roundingMode == float_round_to_zero )
				1025	\|\| ( zSign && ( roundingMode == float_round_up ) )
				1026	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				1027	) {
				1028	return
				1029	packFloat128(
				1030	zSign,
				1031	0x7FFE,
				1032	LIT64( 0x0000FFFFFFFFFFFF ),
				1033	LIT64( 0xFFFFFFFFFFFFFFFF )
				1034	);
				1035	}
				1036	return packFloat128( zSign, 0x7FFF, 0, 0 );
				1037	}
				1038	if ( zExp < 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	1039	if (STATUS(flush_to_zero)) {
				1040	float_raise(float_flag_output_denormal STATUS_VAR);
				1041	return packFloat128(zSign, 0, 0, 0);
				1042	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1043	isTiny =
				1044	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				1045	\|\| ( zExp < -1 )
				1046	\|\| ! increment
				1047	\|\| lt128(
				1048	zSig0,
				1049	zSig1,
				1050	LIT64( 0x0001FFFFFFFFFFFF ),
				1051	LIT64( 0xFFFFFFFFFFFFFFFF )
				1052	);
				1053	shift128ExtraRightJamming(
				1054	zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
				1055	zExp = 0;
				1056	if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
				1057	if ( roundNearestEven ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1058	increment = ( (int64_t) zSig2 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1059	}
				1060	else {
				1061	if ( zSign ) {
				1062	increment = ( roundingMode == float_round_down ) && zSig2;
				1063	}
				1064	else {
				1065	increment = ( roundingMode == float_round_up ) && zSig2;
				1066	}
				1067	}
				1068	}
				1069	}
				1070	if ( zSig2 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1071	if ( increment ) {
				1072	add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
				1073	zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
				1074	}
				1075	else {
				1076	if ( ( zSig0 \| zSig1 ) == 0 ) zExp = 0;
				1077	}
				1078	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1079
				1080	}
				1081
				1082	/*----------------------------------------------------------------------------
				1083	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				1084	\| and significand formed by the concatenation of `zSig0' and `zSig1', and
				1085	\| returns the proper quadruple-precision floating-point value corresponding
				1086	\| to the abstract input. This routine is just like `roundAndPackFloat128'
				1087	\| except that the input significand has fewer bits and does not have to be
				1088	\| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
				1089	\| point exponent.
				1090	----------------------------------------------------------------------------/
				1091
				1092	static float128
				1093	normalizeRoundAndPackFloat128(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1094	flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1095	{
				1096	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1097	uint64_t zSig2;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1098
				1099	if ( zSig0 == 0 ) {
				1100	zSig0 = zSig1;
				1101	zSig1 = 0;
				1102	zExp -= 64;
				1103	}
				1104	shiftCount = countLeadingZeros64( zSig0 ) - 15;
				1105	if ( 0 <= shiftCount ) {
				1106	zSig2 = 0;
				1107	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1108	}
				1109	else {
				1110	shift128ExtraRightJamming(
				1111	zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
				1112	}
				1113	zExp -= shiftCount;
				1114	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
				1115
				1116	}
				1117
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1118	/*----------------------------------------------------------------------------
				1119	\| Returns the result of converting the 32-bit two's complement integer `a'
				1120	\| to the single-precision floating-point format. The conversion is performed
				1121	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1122	----------------------------------------------------------------------------/
				1123
				1124	float32 int32_to_float32( int32 a STATUS_PARAM )
				1125	{
				1126	flag zSign;
				1127
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1128	if ( a == 0 ) return float32_zero;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1129	if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1130	zSign = ( a < 0 );
				1131	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
				1132
				1133	}
				1134
				1135	/*----------------------------------------------------------------------------
				1136	\| Returns the result of converting the 32-bit two's complement integer `a'
				1137	\| to the double-precision floating-point format. The conversion is performed
				1138	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1139	----------------------------------------------------------------------------/
				1140
				1141	float64 int32_to_float64( int32 a STATUS_PARAM )
				1142	{
				1143	flag zSign;
				1144	uint32 absA;
				1145	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1146	uint64_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1147
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1148	if ( a == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1149	zSign = ( a < 0 );
				1150	absA = zSign ? - a : a;
				1151	shiftCount = countLeadingZeros32( absA ) + 21;
				1152	zSig = absA;
				1153	return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
				1154
				1155	}
				1156
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1157	/*----------------------------------------------------------------------------
				1158	\| Returns the result of converting the 32-bit two's complement integer `a'
				1159	\| to the extended double-precision floating-point format. The conversion
				1160	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1161	\| Arithmetic.
				1162	----------------------------------------------------------------------------/
				1163
				1164	floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
				1165	{
				1166	flag zSign;
				1167	uint32 absA;
				1168	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1169	uint64_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1170
				1171	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1172	zSign = ( a < 0 );
				1173	absA = zSign ? - a : a;
				1174	shiftCount = countLeadingZeros32( absA ) + 32;
				1175	zSig = absA;
				1176	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
				1177
				1178	}
				1179
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1180	/*----------------------------------------------------------------------------
				1181	\| Returns the result of converting the 32-bit two's complement integer `a' to
				1182	\| the quadruple-precision floating-point format. The conversion is performed
				1183	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1184	----------------------------------------------------------------------------/
				1185
				1186	float128 int32_to_float128( int32 a STATUS_PARAM )
				1187	{
				1188	flag zSign;
				1189	uint32 absA;
				1190	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1191	uint64_t zSig0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1192
				1193	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1194	zSign = ( a < 0 );
				1195	absA = zSign ? - a : a;
				1196	shiftCount = countLeadingZeros32( absA ) + 17;
				1197	zSig0 = absA;
				1198	return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
				1199
				1200	}
				1201
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1202	/*----------------------------------------------------------------------------
				1203	\| Returns the result of converting the 64-bit two's complement integer `a'
				1204	\| to the single-precision floating-point format. The conversion is performed
				1205	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1206	----------------------------------------------------------------------------/
				1207
				1208	float32 int64_to_float32( int64 a STATUS_PARAM )
				1209	{
				1210	flag zSign;
				1211	uint64 absA;
				1212	int8 shiftCount;
				1213
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1214	if ( a == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1215	zSign = ( a < 0 );
				1216	absA = zSign ? - a : a;
				1217	shiftCount = countLeadingZeros64( absA ) - 40;
				1218	if ( 0 <= shiftCount ) {
				1219	return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
				1220	}
				1221	else {
				1222	shiftCount += 7;
				1223	if ( shiftCount < 0 ) {
				1224	shift64RightJamming( absA, - shiftCount, &absA );
				1225	}
				1226	else {
				1227	absA <<= shiftCount;
				1228	}
				1229	return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
				1230	}
				1231
				1232	}
				1233
j_mayer	3430b0b	2007-03-20 22:25:37 +0000	[diff] [blame]	1234	float32 uint64_to_float32( uint64 a STATUS_PARAM )
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1235	{
				1236	int8 shiftCount;
				1237
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1238	if ( a == 0 ) return float32_zero;
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1239	shiftCount = countLeadingZeros64( a ) - 40;
				1240	if ( 0 <= shiftCount ) {
Peter Maydell	e744c06	2012-09-28 16:17:03 +0100	[diff] [blame]	1241	return packFloat32(0, 0x95 - shiftCount, a<<shiftCount);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1242	}
				1243	else {
				1244	shiftCount += 7;
				1245	if ( shiftCount < 0 ) {
				1246	shift64RightJamming( a, - shiftCount, &a );
				1247	}
				1248	else {
				1249	a <<= shiftCount;
				1250	}
Peter Maydell	e744c06	2012-09-28 16:17:03 +0100	[diff] [blame]	1251	return roundAndPackFloat32(0, 0x9C - shiftCount, a STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1252	}
				1253	}
				1254
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1255	/*----------------------------------------------------------------------------
				1256	\| Returns the result of converting the 64-bit two's complement integer `a'
				1257	\| to the double-precision floating-point format. The conversion is performed
				1258	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1259	----------------------------------------------------------------------------/
				1260
				1261	float64 int64_to_float64( int64 a STATUS_PARAM )
				1262	{
				1263	flag zSign;
				1264
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1265	if ( a == 0 ) return float64_zero;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1266	if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1267	return packFloat64( 1, 0x43E, 0 );
				1268	}
				1269	zSign = ( a < 0 );
				1270	return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
				1271
				1272	}
				1273
Richard Henderson	17ed229	2012-12-31 10:09:03 -0800	[diff] [blame]	1274	float64 uint64_to_float64(uint64 a STATUS_PARAM)
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1275	{
Richard Henderson	17ed229	2012-12-31 10:09:03 -0800	[diff] [blame]	1276	int exp = 0x43C;
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1277
Richard Henderson	17ed229	2012-12-31 10:09:03 -0800	[diff] [blame]	1278	if (a == 0) {
				1279	return float64_zero;
				1280	}
				1281	if ((int64_t)a < 0) {
				1282	shift64RightJamming(a, 1, &a);
				1283	exp += 1;
				1284	}
				1285	return normalizeRoundAndPackFloat64(0, exp, a STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1286	}
				1287
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1288	/*----------------------------------------------------------------------------
				1289	\| Returns the result of converting the 64-bit two's complement integer `a'
				1290	\| to the extended double-precision floating-point format. The conversion
				1291	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1292	\| Arithmetic.
				1293	----------------------------------------------------------------------------/
				1294
				1295	floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
				1296	{
				1297	flag zSign;
				1298	uint64 absA;
				1299	int8 shiftCount;
				1300
				1301	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1302	zSign = ( a < 0 );
				1303	absA = zSign ? - a : a;
				1304	shiftCount = countLeadingZeros64( absA );
				1305	return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
				1306
				1307	}
				1308
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1309	/*----------------------------------------------------------------------------
				1310	\| Returns the result of converting the 64-bit two's complement integer `a' to
				1311	\| the quadruple-precision floating-point format. The conversion is performed
				1312	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1313	----------------------------------------------------------------------------/
				1314
				1315	float128 int64_to_float128( int64 a STATUS_PARAM )
				1316	{
				1317	flag zSign;
				1318	uint64 absA;
				1319	int8 shiftCount;
				1320	int32 zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1321	uint64_t zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1322
				1323	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1324	zSign = ( a < 0 );
				1325	absA = zSign ? - a : a;
				1326	shiftCount = countLeadingZeros64( absA ) + 49;
				1327	zExp = 0x406E - shiftCount;
				1328	if ( 64 <= shiftCount ) {
				1329	zSig1 = 0;
				1330	zSig0 = absA;
				1331	shiftCount -= 64;
				1332	}
				1333	else {
				1334	zSig1 = absA;
				1335	zSig0 = 0;
				1336	}
				1337	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1338	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1339
				1340	}
				1341
Richard Henderson	1e397ea	2012-12-31 10:09:04 -0800	[diff] [blame]	1342	float128 uint64_to_float128(uint64 a STATUS_PARAM)
				1343	{
				1344	if (a == 0) {
				1345	return float128_zero;
				1346	}
				1347	return normalizeRoundAndPackFloat128(0, 0x406E, a, 0 STATUS_VAR);
				1348	}
				1349
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1350	/*----------------------------------------------------------------------------
				1351	\| Returns the result of converting the single-precision floating-point value
				1352	\| `a' to the 32-bit two's complement integer format. The conversion is
				1353	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1354	\| Arithmetic---which means in particular that the conversion is rounded
				1355	\| according to the current rounding mode. If `a' is a NaN, the largest
				1356	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1357	\| largest integer with the same sign as `a' is returned.
				1358	----------------------------------------------------------------------------/
				1359
				1360	int32 float32_to_int32( float32 a STATUS_PARAM )
				1361	{
				1362	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1363	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1364	uint32_t aSig;
				1365	uint64_t aSig64;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1366
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1367	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1368	aSig = extractFloat32Frac( a );
				1369	aExp = extractFloat32Exp( a );
				1370	aSign = extractFloat32Sign( a );
				1371	if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
				1372	if ( aExp ) aSig \|= 0x00800000;
				1373	shiftCount = 0xAF - aExp;
				1374	aSig64 = aSig;
				1375	aSig64 <<= 32;
				1376	if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
				1377	return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
				1378
				1379	}
				1380
				1381	/*----------------------------------------------------------------------------
				1382	\| Returns the result of converting the single-precision floating-point value
				1383	\| `a' to the 32-bit two's complement integer format. The conversion is
				1384	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1385	\| Arithmetic, except that the conversion is always rounded toward zero.
				1386	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				1387	\| the conversion overflows, the largest integer with the same sign as `a' is
				1388	\| returned.
				1389	----------------------------------------------------------------------------/
				1390
				1391	int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
				1392	{
				1393	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1394	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1395	uint32_t aSig;
Peter Maydell	b3a6a2e	2012-04-05 19:12:34 +0100	[diff] [blame]	1396	int32_t z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1397	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1398
				1399	aSig = extractFloat32Frac( a );
				1400	aExp = extractFloat32Exp( a );
				1401	aSign = extractFloat32Sign( a );
				1402	shiftCount = aExp - 0x9E;
				1403	if ( 0 <= shiftCount ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1404	if ( float32_val(a) != 0xCF000000 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1405	float_raise( float_flag_invalid STATUS_VAR);
				1406	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
				1407	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1408	return (int32_t) 0x80000000;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1409	}
				1410	else if ( aExp <= 0x7E ) {
				1411	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1412	return 0;
				1413	}
				1414	aSig = ( aSig \| 0x00800000 )<<8;
				1415	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1416	if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1417	STATUS(float_exception_flags) \|= float_flag_inexact;
				1418	}
				1419	if ( aSign ) z = - z;
				1420	return z;
				1421
				1422	}
				1423
				1424	/*----------------------------------------------------------------------------
				1425	\| Returns the result of converting the single-precision floating-point value
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1426	\| `a' to the 16-bit two's complement integer format. The conversion is
				1427	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1428	\| Arithmetic, except that the conversion is always rounded toward zero.
				1429	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				1430	\| the conversion overflows, the largest integer with the same sign as `a' is
				1431	\| returned.
				1432	----------------------------------------------------------------------------/
				1433
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1434	int_fast16_t float32_to_int16_round_to_zero(float32 a STATUS_PARAM)
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1435	{
				1436	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1437	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1438	uint32_t aSig;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1439	int32 z;
				1440
				1441	aSig = extractFloat32Frac( a );
				1442	aExp = extractFloat32Exp( a );
				1443	aSign = extractFloat32Sign( a );
				1444	shiftCount = aExp - 0x8E;
				1445	if ( 0 <= shiftCount ) {
				1446	if ( float32_val(a) != 0xC7000000 ) {
				1447	float_raise( float_flag_invalid STATUS_VAR);
				1448	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1449	return 0x7FFF;
				1450	}
				1451	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1452	return (int32_t) 0xffff8000;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1453	}
				1454	else if ( aExp <= 0x7E ) {
				1455	if ( aExp \| aSig ) {
				1456	STATUS(float_exception_flags) \|= float_flag_inexact;
				1457	}
				1458	return 0;
				1459	}
				1460	shiftCount -= 0x10;
				1461	aSig = ( aSig \| 0x00800000 )<<8;
				1462	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1463	if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1464	STATUS(float_exception_flags) \|= float_flag_inexact;
				1465	}
				1466	if ( aSign ) {
				1467	z = - z;
				1468	}
				1469	return z;
				1470
				1471	}
				1472
				1473	/*----------------------------------------------------------------------------
				1474	\| Returns the result of converting the single-precision floating-point value
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1475	\| `a' to the 64-bit two's complement integer format. The conversion is
				1476	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1477	\| Arithmetic---which means in particular that the conversion is rounded
				1478	\| according to the current rounding mode. If `a' is a NaN, the largest
				1479	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1480	\| largest integer with the same sign as `a' is returned.
				1481	----------------------------------------------------------------------------/
				1482
				1483	int64 float32_to_int64( float32 a STATUS_PARAM )
				1484	{
				1485	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1486	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1487	uint32_t aSig;
				1488	uint64_t aSig64, aSigExtra;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1489	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1490
				1491	aSig = extractFloat32Frac( a );
				1492	aExp = extractFloat32Exp( a );
				1493	aSign = extractFloat32Sign( a );
				1494	shiftCount = 0xBE - aExp;
				1495	if ( shiftCount < 0 ) {
				1496	float_raise( float_flag_invalid STATUS_VAR);
				1497	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1498	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1499	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1500	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1501	}
				1502	if ( aExp ) aSig \|= 0x00800000;
				1503	aSig64 = aSig;
				1504	aSig64 <<= 40;
				1505	shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
				1506	return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
				1507
				1508	}
				1509
				1510	/*----------------------------------------------------------------------------
				1511	\| Returns the result of converting the single-precision floating-point value
				1512	\| `a' to the 64-bit two's complement integer format. The conversion is
				1513	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1514	\| Arithmetic, except that the conversion is always rounded toward zero. If
				1515	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1516	\| conversion overflows, the largest integer with the same sign as `a' is
				1517	\| returned.
				1518	----------------------------------------------------------------------------/
				1519
				1520	int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
				1521	{
				1522	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1523	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1524	uint32_t aSig;
				1525	uint64_t aSig64;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1526	int64 z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1527	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1528
				1529	aSig = extractFloat32Frac( a );
				1530	aExp = extractFloat32Exp( a );
				1531	aSign = extractFloat32Sign( a );
				1532	shiftCount = aExp - 0xBE;
				1533	if ( 0 <= shiftCount ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1534	if ( float32_val(a) != 0xDF000000 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1535	float_raise( float_flag_invalid STATUS_VAR);
				1536	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1537	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1538	}
				1539	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1540	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1541	}
				1542	else if ( aExp <= 0x7E ) {
				1543	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1544	return 0;
				1545	}
				1546	aSig64 = aSig \| 0x00800000;
				1547	aSig64 <<= 40;
				1548	z = aSig64>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1549	if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1550	STATUS(float_exception_flags) \|= float_flag_inexact;
				1551	}
				1552	if ( aSign ) z = - z;
				1553	return z;
				1554
				1555	}
				1556
				1557	/*----------------------------------------------------------------------------
				1558	\| Returns the result of converting the single-precision floating-point value
				1559	\| `a' to the double-precision floating-point format. The conversion is
				1560	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1561	\| Arithmetic.
				1562	----------------------------------------------------------------------------/
				1563
				1564	float64 float32_to_float64( float32 a STATUS_PARAM )
				1565	{
				1566	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1567	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1568	uint32_t aSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1569	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1570
				1571	aSig = extractFloat32Frac( a );
				1572	aExp = extractFloat32Exp( a );
				1573	aSign = extractFloat32Sign( a );
				1574	if ( aExp == 0xFF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	1575	if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1576	return packFloat64( aSign, 0x7FF, 0 );
				1577	}
				1578	if ( aExp == 0 ) {
				1579	if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
				1580	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1581	--aExp;
				1582	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1583	return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1584
				1585	}
				1586
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1587	/*----------------------------------------------------------------------------
				1588	\| Returns the result of converting the single-precision floating-point value
				1589	\| `a' to the extended double-precision floating-point format. The conversion
				1590	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1591	\| Arithmetic.
				1592	----------------------------------------------------------------------------/
				1593
				1594	floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
				1595	{
				1596	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1597	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1598	uint32_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1599
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1600	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1601	aSig = extractFloat32Frac( a );
				1602	aExp = extractFloat32Exp( a );
				1603	aSign = extractFloat32Sign( a );
				1604	if ( aExp == 0xFF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	1605	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1606	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				1607	}
				1608	if ( aExp == 0 ) {
				1609	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				1610	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1611	}
				1612	aSig \|= 0x00800000;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1613	return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1614
				1615	}
				1616
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1617	/*----------------------------------------------------------------------------
				1618	\| Returns the result of converting the single-precision floating-point value
				1619	\| `a' to the double-precision floating-point format. The conversion is
				1620	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1621	\| Arithmetic.
				1622	----------------------------------------------------------------------------/
				1623
				1624	float128 float32_to_float128( float32 a STATUS_PARAM )
				1625	{
				1626	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1627	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1628	uint32_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1629
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1630	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1631	aSig = extractFloat32Frac( a );
				1632	aExp = extractFloat32Exp( a );
				1633	aSign = extractFloat32Sign( a );
				1634	if ( aExp == 0xFF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	1635	if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1636	return packFloat128( aSign, 0x7FFF, 0, 0 );
				1637	}
				1638	if ( aExp == 0 ) {
				1639	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				1640	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1641	--aExp;
				1642	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1643	return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1644
				1645	}
				1646
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1647	/*----------------------------------------------------------------------------
				1648	\| Rounds the single-precision floating-point value `a' to an integer, and
				1649	\| returns the result as a single-precision floating-point value. The
				1650	\| operation is performed according to the IEC/IEEE Standard for Binary
				1651	\| Floating-Point Arithmetic.
				1652	----------------------------------------------------------------------------/
				1653
				1654	float32 float32_round_to_int( float32 a STATUS_PARAM)
				1655	{
				1656	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1657	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1658	uint32_t lastBitMask, roundBitsMask;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1659	int8 roundingMode;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1660	uint32_t z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1661	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1662
				1663	aExp = extractFloat32Exp( a );
				1664	if ( 0x96 <= aExp ) {
				1665	if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
				1666	return propagateFloat32NaN( a, a STATUS_VAR );
				1667	}
				1668	return a;
				1669	}
				1670	if ( aExp <= 0x7E ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1671	if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1672	STATUS(float_exception_flags) \|= float_flag_inexact;
				1673	aSign = extractFloat32Sign( a );
				1674	switch ( STATUS(float_rounding_mode) ) {
				1675	case float_round_nearest_even:
				1676	if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
				1677	return packFloat32( aSign, 0x7F, 0 );
				1678	}
				1679	break;
				1680	case float_round_down:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1681	return make_float32(aSign ? 0xBF800000 : 0);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1682	case float_round_up:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1683	return make_float32(aSign ? 0x80000000 : 0x3F800000);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1684	}
				1685	return packFloat32( aSign, 0, 0 );
				1686	}
				1687	lastBitMask = 1;
				1688	lastBitMask <<= 0x96 - aExp;
				1689	roundBitsMask = lastBitMask - 1;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1690	z = float32_val(a);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1691	roundingMode = STATUS(float_rounding_mode);
				1692	if ( roundingMode == float_round_nearest_even ) {
				1693	z += lastBitMask>>1;
				1694	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				1695	}
				1696	else if ( roundingMode != float_round_to_zero ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1697	if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1698	z += roundBitsMask;
				1699	}
				1700	}
				1701	z &= ~ roundBitsMask;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1702	if ( z != float32_val(a) ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1703	return make_float32(z);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1704
				1705	}
				1706
				1707	/*----------------------------------------------------------------------------
				1708	\| Returns the result of adding the absolute values of the single-precision
				1709	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				1710	\| before being returned. `zSign' is ignored if the result is a NaN.
				1711	\| The addition is performed according to the IEC/IEEE Standard for Binary
				1712	\| Floating-Point Arithmetic.
				1713	----------------------------------------------------------------------------/
				1714
				1715	static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1716	{
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1717	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1718	uint32_t aSig, bSig, zSig;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1719	int_fast16_t expDiff;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1720
				1721	aSig = extractFloat32Frac( a );
				1722	aExp = extractFloat32Exp( a );
				1723	bSig = extractFloat32Frac( b );
				1724	bExp = extractFloat32Exp( b );
				1725	expDiff = aExp - bExp;
				1726	aSig <<= 6;
				1727	bSig <<= 6;
				1728	if ( 0 < expDiff ) {
				1729	if ( aExp == 0xFF ) {
				1730	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1731	return a;
				1732	}
				1733	if ( bExp == 0 ) {
				1734	--expDiff;
				1735	}
				1736	else {
				1737	bSig \|= 0x20000000;
				1738	}
				1739	shift32RightJamming( bSig, expDiff, &bSig );
				1740	zExp = aExp;
				1741	}
				1742	else if ( expDiff < 0 ) {
				1743	if ( bExp == 0xFF ) {
				1744	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1745	return packFloat32( zSign, 0xFF, 0 );
				1746	}
				1747	if ( aExp == 0 ) {
				1748	++expDiff;
				1749	}
				1750	else {
				1751	aSig \|= 0x20000000;
				1752	}
				1753	shift32RightJamming( aSig, - expDiff, &aSig );
				1754	zExp = bExp;
				1755	}
				1756	else {
				1757	if ( aExp == 0xFF ) {
				1758	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1759	return a;
				1760	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	1761	if ( aExp == 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	1762	if (STATUS(flush_to_zero)) {
				1763	if (aSig \| bSig) {
				1764	float_raise(float_flag_output_denormal STATUS_VAR);
				1765	}
				1766	return packFloat32(zSign, 0, 0);
				1767	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	1768	return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
				1769	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1770	zSig = 0x40000000 + aSig + bSig;
				1771	zExp = aExp;
				1772	goto roundAndPack;
				1773	}
				1774	aSig \|= 0x20000000;
				1775	zSig = ( aSig + bSig )<<1;
				1776	--zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1777	if ( (int32_t) zSig < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1778	zSig = aSig + bSig;
				1779	++zExp;
				1780	}
				1781	roundAndPack:
				1782	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1783
				1784	}
				1785
				1786	/*----------------------------------------------------------------------------
				1787	\| Returns the result of subtracting the absolute values of the single-
				1788	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				1789	\| difference is negated before being returned. `zSign' is ignored if the
				1790	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				1791	\| Standard for Binary Floating-Point Arithmetic.
				1792	----------------------------------------------------------------------------/
				1793
				1794	static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1795	{
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1796	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1797	uint32_t aSig, bSig, zSig;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1798	int_fast16_t expDiff;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1799
				1800	aSig = extractFloat32Frac( a );
				1801	aExp = extractFloat32Exp( a );
				1802	bSig = extractFloat32Frac( b );
				1803	bExp = extractFloat32Exp( b );
				1804	expDiff = aExp - bExp;
				1805	aSig <<= 7;
				1806	bSig <<= 7;
				1807	if ( 0 < expDiff ) goto aExpBigger;
				1808	if ( expDiff < 0 ) goto bExpBigger;
				1809	if ( aExp == 0xFF ) {
				1810	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1811	float_raise( float_flag_invalid STATUS_VAR);
				1812	return float32_default_nan;
				1813	}
				1814	if ( aExp == 0 ) {
				1815	aExp = 1;
				1816	bExp = 1;
				1817	}
				1818	if ( bSig < aSig ) goto aBigger;
				1819	if ( aSig < bSig ) goto bBigger;
				1820	return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				1821	bExpBigger:
				1822	if ( bExp == 0xFF ) {
				1823	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1824	return packFloat32( zSign ^ 1, 0xFF, 0 );
				1825	}
				1826	if ( aExp == 0 ) {
				1827	++expDiff;
				1828	}
				1829	else {
				1830	aSig \|= 0x40000000;
				1831	}
				1832	shift32RightJamming( aSig, - expDiff, &aSig );
				1833	bSig \|= 0x40000000;
				1834	bBigger:
				1835	zSig = bSig - aSig;
				1836	zExp = bExp;
				1837	zSign ^= 1;
				1838	goto normalizeRoundAndPack;
				1839	aExpBigger:
				1840	if ( aExp == 0xFF ) {
				1841	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1842	return a;
				1843	}
				1844	if ( bExp == 0 ) {
				1845	--expDiff;
				1846	}
				1847	else {
				1848	bSig \|= 0x40000000;
				1849	}
				1850	shift32RightJamming( bSig, expDiff, &bSig );
				1851	aSig \|= 0x40000000;
				1852	aBigger:
				1853	zSig = aSig - bSig;
				1854	zExp = aExp;
				1855	normalizeRoundAndPack:
				1856	--zExp;
				1857	return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1858
				1859	}
				1860
				1861	/*----------------------------------------------------------------------------
				1862	\| Returns the result of adding the single-precision floating-point values `a'
				1863	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				1864	\| Binary Floating-Point Arithmetic.
				1865	----------------------------------------------------------------------------/
				1866
				1867	float32 float32_add( float32 a, float32 b STATUS_PARAM )
				1868	{
				1869	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1870	a = float32_squash_input_denormal(a STATUS_VAR);
				1871	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1872
				1873	aSign = extractFloat32Sign( a );
				1874	bSign = extractFloat32Sign( b );
				1875	if ( aSign == bSign ) {
				1876	return addFloat32Sigs( a, b, aSign STATUS_VAR);
				1877	}
				1878	else {
				1879	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				1880	}
				1881
				1882	}
				1883
				1884	/*----------------------------------------------------------------------------
				1885	\| Returns the result of subtracting the single-precision floating-point values
				1886	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1887	\| for Binary Floating-Point Arithmetic.
				1888	----------------------------------------------------------------------------/
				1889
				1890	float32 float32_sub( float32 a, float32 b STATUS_PARAM )
				1891	{
				1892	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1893	a = float32_squash_input_denormal(a STATUS_VAR);
				1894	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1895
				1896	aSign = extractFloat32Sign( a );
				1897	bSign = extractFloat32Sign( b );
				1898	if ( aSign == bSign ) {
				1899	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				1900	}
				1901	else {
				1902	return addFloat32Sigs( a, b, aSign STATUS_VAR );
				1903	}
				1904
				1905	}
				1906
				1907	/*----------------------------------------------------------------------------
				1908	\| Returns the result of multiplying the single-precision floating-point values
				1909	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1910	\| for Binary Floating-Point Arithmetic.
				1911	----------------------------------------------------------------------------/
				1912
				1913	float32 float32_mul( float32 a, float32 b STATUS_PARAM )
				1914	{
				1915	flag aSign, bSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1916	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1917	uint32_t aSig, bSig;
				1918	uint64_t zSig64;
				1919	uint32_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1920
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1921	a = float32_squash_input_denormal(a STATUS_VAR);
				1922	b = float32_squash_input_denormal(b STATUS_VAR);
				1923
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1924	aSig = extractFloat32Frac( a );
				1925	aExp = extractFloat32Exp( a );
				1926	aSign = extractFloat32Sign( a );
				1927	bSig = extractFloat32Frac( b );
				1928	bExp = extractFloat32Exp( b );
				1929	bSign = extractFloat32Sign( b );
				1930	zSign = aSign ^ bSign;
				1931	if ( aExp == 0xFF ) {
				1932	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1933	return propagateFloat32NaN( a, b STATUS_VAR );
				1934	}
				1935	if ( ( bExp \| bSig ) == 0 ) {
				1936	float_raise( float_flag_invalid STATUS_VAR);
				1937	return float32_default_nan;
				1938	}
				1939	return packFloat32( zSign, 0xFF, 0 );
				1940	}
				1941	if ( bExp == 0xFF ) {
				1942	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1943	if ( ( aExp \| aSig ) == 0 ) {
				1944	float_raise( float_flag_invalid STATUS_VAR);
				1945	return float32_default_nan;
				1946	}
				1947	return packFloat32( zSign, 0xFF, 0 );
				1948	}
				1949	if ( aExp == 0 ) {
				1950	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1951	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1952	}
				1953	if ( bExp == 0 ) {
				1954	if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
				1955	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1956	}
				1957	zExp = aExp + bExp - 0x7F;
				1958	aSig = ( aSig \| 0x00800000 )<<7;
				1959	bSig = ( bSig \| 0x00800000 )<<8;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1960	shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1961	zSig = zSig64;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1962	if ( 0 <= (int32_t) ( zSig<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1963	zSig <<= 1;
				1964	--zExp;
				1965	}
				1966	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1967
				1968	}
				1969
				1970	/*----------------------------------------------------------------------------
				1971	\| Returns the result of dividing the single-precision floating-point value `a'
				1972	\| by the corresponding value `b'. The operation is performed according to the
				1973	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1974	----------------------------------------------------------------------------/
				1975
				1976	float32 float32_div( float32 a, float32 b STATUS_PARAM )
				1977	{
				1978	flag aSign, bSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1979	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1980	uint32_t aSig, bSig, zSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1981	a = float32_squash_input_denormal(a STATUS_VAR);
				1982	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1983
				1984	aSig = extractFloat32Frac( a );
				1985	aExp = extractFloat32Exp( a );
				1986	aSign = extractFloat32Sign( a );
				1987	bSig = extractFloat32Frac( b );
				1988	bExp = extractFloat32Exp( b );
				1989	bSign = extractFloat32Sign( b );
				1990	zSign = aSign ^ bSign;
				1991	if ( aExp == 0xFF ) {
				1992	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1993	if ( bExp == 0xFF ) {
				1994	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1995	float_raise( float_flag_invalid STATUS_VAR);
				1996	return float32_default_nan;
				1997	}
				1998	return packFloat32( zSign, 0xFF, 0 );
				1999	}
				2000	if ( bExp == 0xFF ) {
				2001	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				2002	return packFloat32( zSign, 0, 0 );
				2003	}
				2004	if ( bExp == 0 ) {
				2005	if ( bSig == 0 ) {
				2006	if ( ( aExp \| aSig ) == 0 ) {
				2007	float_raise( float_flag_invalid STATUS_VAR);
				2008	return float32_default_nan;
				2009	}
				2010	float_raise( float_flag_divbyzero STATUS_VAR);
				2011	return packFloat32( zSign, 0xFF, 0 );
				2012	}
				2013	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				2014	}
				2015	if ( aExp == 0 ) {
				2016	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				2017	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2018	}
				2019	zExp = aExp - bExp + 0x7D;
				2020	aSig = ( aSig \| 0x00800000 )<<7;
				2021	bSig = ( bSig \| 0x00800000 )<<8;
				2022	if ( bSig <= ( aSig + aSig ) ) {
				2023	aSig >>= 1;
				2024	++zExp;
				2025	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2026	zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2027	if ( ( zSig & 0x3F ) == 0 ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2028	zSig \|= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2029	}
				2030	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				2031
				2032	}
				2033
				2034	/*----------------------------------------------------------------------------
				2035	\| Returns the remainder of the single-precision floating-point value `a'
				2036	\| with respect to the corresponding value `b'. The operation is performed
				2037	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2038	----------------------------------------------------------------------------/
				2039
				2040	float32 float32_rem( float32 a, float32 b STATUS_PARAM )
				2041	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	2042	flag aSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2043	int_fast16_t aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2044	uint32_t aSig, bSig;
				2045	uint32_t q;
				2046	uint64_t aSig64, bSig64, q64;
				2047	uint32_t alternateASig;
				2048	int32_t sigMean;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2049	a = float32_squash_input_denormal(a STATUS_VAR);
				2050	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2051
				2052	aSig = extractFloat32Frac( a );
				2053	aExp = extractFloat32Exp( a );
				2054	aSign = extractFloat32Sign( a );
				2055	bSig = extractFloat32Frac( b );
				2056	bExp = extractFloat32Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2057	if ( aExp == 0xFF ) {
				2058	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				2059	return propagateFloat32NaN( a, b STATUS_VAR );
				2060	}
				2061	float_raise( float_flag_invalid STATUS_VAR);
				2062	return float32_default_nan;
				2063	}
				2064	if ( bExp == 0xFF ) {
				2065	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				2066	return a;
				2067	}
				2068	if ( bExp == 0 ) {
				2069	if ( bSig == 0 ) {
				2070	float_raise( float_flag_invalid STATUS_VAR);
				2071	return float32_default_nan;
				2072	}
				2073	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				2074	}
				2075	if ( aExp == 0 ) {
				2076	if ( aSig == 0 ) return a;
				2077	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2078	}
				2079	expDiff = aExp - bExp;
				2080	aSig \|= 0x00800000;
				2081	bSig \|= 0x00800000;
				2082	if ( expDiff < 32 ) {
				2083	aSig <<= 8;
				2084	bSig <<= 8;
				2085	if ( expDiff < 0 ) {
				2086	if ( expDiff < -1 ) return a;
				2087	aSig >>= 1;
				2088	}
				2089	q = ( bSig <= aSig );
				2090	if ( q ) aSig -= bSig;
				2091	if ( 0 < expDiff ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2092	q = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2093	q >>= 32 - expDiff;
				2094	bSig >>= 2;
				2095	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				2096	}
				2097	else {
				2098	aSig >>= 2;
				2099	bSig >>= 2;
				2100	}
				2101	}
				2102	else {
				2103	if ( bSig <= aSig ) aSig -= bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2104	aSig64 = ( (uint64_t) aSig )<<40;
				2105	bSig64 = ( (uint64_t) bSig )<<40;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2106	expDiff -= 64;
				2107	while ( 0 < expDiff ) {
				2108	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				2109	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				2110	aSig64 = - ( ( bSig * q64 )<<38 );
				2111	expDiff -= 62;
				2112	}
				2113	expDiff += 64;
				2114	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				2115	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				2116	q = q64>>( 64 - expDiff );
				2117	bSig <<= 6;
				2118	aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
				2119	}
				2120	do {
				2121	alternateASig = aSig;
				2122	++q;
				2123	aSig -= bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2124	} while ( 0 <= (int32_t) aSig );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2125	sigMean = aSig + alternateASig;
				2126	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				2127	aSig = alternateASig;
				2128	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2129	zSign = ( (int32_t) aSig < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2130	if ( zSign ) aSig = - aSig;
				2131	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
				2132
				2133	}
				2134
				2135	/*----------------------------------------------------------------------------
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	2136	\| Returns the result of multiplying the single-precision floating-point values
				2137	\| `a' and `b' then adding 'c', with no intermediate rounding step after the
				2138	\| multiplication. The operation is performed according to the IEC/IEEE
				2139	\| Standard for Binary Floating-Point Arithmetic 754-2008.
				2140	\| The flags argument allows the caller to select negation of the
				2141	\| addend, the intermediate product, or the final result. (The difference
				2142	\| between this and having the caller do a separate negation is that negating
				2143	\| externally will flip the sign bit on NaNs.)
				2144	----------------------------------------------------------------------------/
				2145
				2146	float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
				2147	{
				2148	flag aSign, bSign, cSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2149	int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	2150	uint32_t aSig, bSig, cSig;
				2151	flag pInf, pZero, pSign;
				2152	uint64_t pSig64, cSig64, zSig64;
				2153	uint32_t pSig;
				2154	int shiftcount;
				2155	flag signflip, infzero;
				2156
				2157	a = float32_squash_input_denormal(a STATUS_VAR);
				2158	b = float32_squash_input_denormal(b STATUS_VAR);
				2159	c = float32_squash_input_denormal(c STATUS_VAR);
				2160	aSig = extractFloat32Frac(a);
				2161	aExp = extractFloat32Exp(a);
				2162	aSign = extractFloat32Sign(a);
				2163	bSig = extractFloat32Frac(b);
				2164	bExp = extractFloat32Exp(b);
				2165	bSign = extractFloat32Sign(b);
				2166	cSig = extractFloat32Frac(c);
				2167	cExp = extractFloat32Exp(c);
				2168	cSign = extractFloat32Sign(c);
				2169
				2170	infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) \|\|
				2171	(aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
				2172
				2173	/* It is implementation-defined whether the cases of (0,inf,qnan)
				2174	* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
				2175	* they return if they do), so we have to hand this information
				2176	* off to the target-specific pick-a-NaN routine.
				2177	*/
				2178	if (((aExp == 0xff) && aSig) \|\|
				2179	((bExp == 0xff) && bSig) \|\|
				2180	((cExp == 0xff) && cSig)) {
				2181	return propagateFloat32MulAddNaN(a, b, c, infzero STATUS_VAR);
				2182	}
				2183
				2184	if (infzero) {
				2185	float_raise(float_flag_invalid STATUS_VAR);
				2186	return float32_default_nan;
				2187	}
				2188
				2189	if (flags & float_muladd_negate_c) {
				2190	cSign ^= 1;
				2191	}
				2192
				2193	signflip = (flags & float_muladd_negate_result) ? 1 : 0;
				2194
				2195	/* Work out the sign and type of the product */
				2196	pSign = aSign ^ bSign;
				2197	if (flags & float_muladd_negate_product) {
				2198	pSign ^= 1;
				2199	}
				2200	pInf = (aExp == 0xff) \|\| (bExp == 0xff);
				2201	pZero = ((aExp \| aSig) == 0) \|\| ((bExp \| bSig) == 0);
				2202
				2203	if (cExp == 0xff) {
				2204	if (pInf && (pSign ^ cSign)) {
				2205	/* addition of opposite-signed infinities => InvalidOperation */
				2206	float_raise(float_flag_invalid STATUS_VAR);
				2207	return float32_default_nan;
				2208	}
				2209	/* Otherwise generate an infinity of the same sign */
				2210	return packFloat32(cSign ^ signflip, 0xff, 0);
				2211	}
				2212
				2213	if (pInf) {
				2214	return packFloat32(pSign ^ signflip, 0xff, 0);
				2215	}
				2216
				2217	if (pZero) {
				2218	if (cExp == 0) {
				2219	if (cSig == 0) {
				2220	/* Adding two exact zeroes */
				2221	if (pSign == cSign) {
				2222	zSign = pSign;
				2223	} else if (STATUS(float_rounding_mode) == float_round_down) {
				2224	zSign = 1;
				2225	} else {
				2226	zSign = 0;
				2227	}
				2228	return packFloat32(zSign ^ signflip, 0, 0);
				2229	}
				2230	/* Exact zero plus a denorm */
				2231	if (STATUS(flush_to_zero)) {
				2232	float_raise(float_flag_output_denormal STATUS_VAR);
				2233	return packFloat32(cSign ^ signflip, 0, 0);
				2234	}
				2235	}
				2236	/* Zero plus something non-zero : just return the something */
Richard Sandiford	a6e7c18	2013-01-22 17:03:05 +0000	[diff] [blame^]	2237	return packFloat32(cSign ^ signflip, cExp, cSig);
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	2238	}
				2239
				2240	if (aExp == 0) {
				2241	normalizeFloat32Subnormal(aSig, &aExp, &aSig);
				2242	}
				2243	if (bExp == 0) {
				2244	normalizeFloat32Subnormal(bSig, &bExp, &bSig);
				2245	}
				2246
				2247	/* Calculate the actual result a * b + c */
				2248
				2249	/* Multiply first; this is easy. */
				2250	/* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
				2251	* because we want the true exponent, not the "one-less-than"
				2252	* flavour that roundAndPackFloat32() takes.
				2253	*/
				2254	pExp = aExp + bExp - 0x7e;
				2255	aSig = (aSig \| 0x00800000) << 7;
				2256	bSig = (bSig \| 0x00800000) << 8;
				2257	pSig64 = (uint64_t)aSig * bSig;
				2258	if ((int64_t)(pSig64 << 1) >= 0) {
				2259	pSig64 <<= 1;
				2260	pExp--;
				2261	}
				2262
				2263	zSign = pSign ^ signflip;
				2264
				2265	/* Now pSig64 is the significand of the multiply, with the explicit bit in
				2266	* position 62.
				2267	*/
				2268	if (cExp == 0) {
				2269	if (!cSig) {
				2270	/* Throw out the special case of c being an exact zero now */
				2271	shift64RightJamming(pSig64, 32, &pSig64);
				2272	pSig = pSig64;
				2273	return roundAndPackFloat32(zSign, pExp - 1,
				2274	pSig STATUS_VAR);
				2275	}
				2276	normalizeFloat32Subnormal(cSig, &cExp, &cSig);
				2277	}
				2278
				2279	cSig64 = (uint64_t)cSig << (62 - 23);
				2280	cSig64 \|= LIT64(0x4000000000000000);
				2281	expDiff = pExp - cExp;
				2282
				2283	if (pSign == cSign) {
				2284	/* Addition */
				2285	if (expDiff > 0) {
				2286	/* scale c to match p */
				2287	shift64RightJamming(cSig64, expDiff, &cSig64);
				2288	zExp = pExp;
				2289	} else if (expDiff < 0) {
				2290	/* scale p to match c */
				2291	shift64RightJamming(pSig64, -expDiff, &pSig64);
				2292	zExp = cExp;
				2293	} else {
				2294	/* no scaling needed */
				2295	zExp = cExp;
				2296	}
				2297	/* Add significands and make sure explicit bit ends up in posn 62 */
				2298	zSig64 = pSig64 + cSig64;
				2299	if ((int64_t)zSig64 < 0) {
				2300	shift64RightJamming(zSig64, 1, &zSig64);
				2301	} else {
				2302	zExp--;
				2303	}
				2304	} else {
				2305	/* Subtraction */
				2306	if (expDiff > 0) {
				2307	shift64RightJamming(cSig64, expDiff, &cSig64);
				2308	zSig64 = pSig64 - cSig64;
				2309	zExp = pExp;
				2310	} else if (expDiff < 0) {
				2311	shift64RightJamming(pSig64, -expDiff, &pSig64);
				2312	zSig64 = cSig64 - pSig64;
				2313	zExp = cExp;
				2314	zSign ^= 1;
				2315	} else {
				2316	zExp = pExp;
				2317	if (cSig64 < pSig64) {
				2318	zSig64 = pSig64 - cSig64;
				2319	} else if (pSig64 < cSig64) {
				2320	zSig64 = cSig64 - pSig64;
				2321	zSign ^= 1;
				2322	} else {
				2323	/* Exact zero */
				2324	zSign = signflip;
				2325	if (STATUS(float_rounding_mode) == float_round_down) {
				2326	zSign ^= 1;
				2327	}
				2328	return packFloat32(zSign, 0, 0);
				2329	}
				2330	}
				2331	--zExp;
				2332	/* Normalize to put the explicit bit back into bit 62. */
				2333	shiftcount = countLeadingZeros64(zSig64) - 1;
				2334	zSig64 <<= shiftcount;
				2335	zExp -= shiftcount;
				2336	}
				2337	shift64RightJamming(zSig64, 32, &zSig64);
				2338	return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
				2339	}
				2340
				2341
				2342	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2343	\| Returns the square root of the single-precision floating-point value `a'.
				2344	\| The operation is performed according to the IEC/IEEE Standard for Binary
				2345	\| Floating-Point Arithmetic.
				2346	----------------------------------------------------------------------------/
				2347
				2348	float32 float32_sqrt( float32 a STATUS_PARAM )
				2349	{
				2350	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2351	int_fast16_t aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2352	uint32_t aSig, zSig;
				2353	uint64_t rem, term;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2354	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2355
				2356	aSig = extractFloat32Frac( a );
				2357	aExp = extractFloat32Exp( a );
				2358	aSign = extractFloat32Sign( a );
				2359	if ( aExp == 0xFF ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2360	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2361	if ( ! aSign ) return a;
				2362	float_raise( float_flag_invalid STATUS_VAR);
				2363	return float32_default_nan;
				2364	}
				2365	if ( aSign ) {
				2366	if ( ( aExp \| aSig ) == 0 ) return a;
				2367	float_raise( float_flag_invalid STATUS_VAR);
				2368	return float32_default_nan;
				2369	}
				2370	if ( aExp == 0 ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2371	if ( aSig == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2372	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2373	}
				2374	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
				2375	aSig = ( aSig \| 0x00800000 )<<8;
				2376	zSig = estimateSqrt32( aExp, aSig ) + 2;
				2377	if ( ( zSig & 0x7F ) <= 5 ) {
				2378	if ( zSig < 2 ) {
				2379	zSig = 0x7FFFFFFF;
				2380	goto roundAndPack;
				2381	}
				2382	aSig >>= aExp & 1;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2383	term = ( (uint64_t) zSig ) * zSig;
				2384	rem = ( ( (uint64_t) aSig )<<32 ) - term;
				2385	while ( (int64_t) rem < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2386	--zSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2387	rem += ( ( (uint64_t) zSig )<<1 ) \| 1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2388	}
				2389	zSig \|= ( rem != 0 );
				2390	}
				2391	shift32RightJamming( zSig, 1, &zSig );
				2392	roundAndPack:
				2393	return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
				2394
				2395	}
				2396
				2397	/*----------------------------------------------------------------------------
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2398	\| Returns the binary exponential of the single-precision floating-point value
				2399	\| `a'. The operation is performed according to the IEC/IEEE Standard for
				2400	\| Binary Floating-Point Arithmetic.
				2401	\|
				2402	\| Uses the following identities:
				2403	\|
				2404	\| 1. -------------------------------------------------------------------------
				2405	\| x x*ln(2)
				2406	\| 2 = e
				2407	\|
				2408	\| 2. -------------------------------------------------------------------------
				2409	\| 2 3 4 5 n
				2410	\| x x x x x x x
				2411	\| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
				2412	\| 1! 2! 3! 4! 5! n!
				2413	----------------------------------------------------------------------------/
				2414
				2415	static const float64 float32_exp2_coefficients[15] =
				2416	{
Peter Maydell	d5138cf	2011-02-10 13:59:34 +0000	[diff] [blame]	2417	const_float64( 0x3ff0000000000000ll ), /* 1 */
				2418	const_float64( 0x3fe0000000000000ll ), /* 2 */
				2419	const_float64( 0x3fc5555555555555ll ), /* 3 */
				2420	const_float64( 0x3fa5555555555555ll ), /* 4 */
				2421	const_float64( 0x3f81111111111111ll ), /* 5 */
				2422	const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
				2423	const_float64( 0x3f2a01a01a01a01all ), /* 7 */
				2424	const_float64( 0x3efa01a01a01a01all ), /* 8 */
				2425	const_float64( 0x3ec71de3a556c734ll ), /* 9 */
				2426	const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
				2427	const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
				2428	const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
				2429	const_float64( 0x3de6124613a86d09ll ), /* 13 */
				2430	const_float64( 0x3da93974a8c07c9dll ), /* 14 */
				2431	const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2432	};
				2433
				2434	float32 float32_exp2( float32 a STATUS_PARAM )
				2435	{
				2436	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2437	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2438	uint32_t aSig;
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2439	float64 r, x, xn;
				2440	int i;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2441	a = float32_squash_input_denormal(a STATUS_VAR);
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2442
				2443	aSig = extractFloat32Frac( a );
				2444	aExp = extractFloat32Exp( a );
				2445	aSign = extractFloat32Sign( a );
				2446
				2447	if ( aExp == 0xFF) {
				2448	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
				2449	return (aSign) ? float32_zero : a;
				2450	}
				2451	if (aExp == 0) {
				2452	if (aSig == 0) return float32_one;
				2453	}
				2454
				2455	float_raise( float_flag_inexact STATUS_VAR);
				2456
				2457	/* ******************************* */
				2458	/* using float64 for approximation */
				2459	/* ******************************* */
				2460	x = float32_to_float64(a STATUS_VAR);
				2461	x = float64_mul(x, float64_ln2 STATUS_VAR);
				2462
				2463	xn = x;
				2464	r = float64_one;
				2465	for (i = 0 ; i < 15 ; i++) {
				2466	float64 f;
				2467
				2468	f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
				2469	r = float64_add(r, f STATUS_VAR);
				2470
				2471	xn = float64_mul(xn, x STATUS_VAR);
				2472	}
				2473
				2474	return float64_to_float32(r, status);
				2475	}
				2476
				2477	/*----------------------------------------------------------------------------
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2478	\| Returns the binary log of the single-precision floating-point value `a'.
				2479	\| The operation is performed according to the IEC/IEEE Standard for Binary
				2480	\| Floating-Point Arithmetic.
				2481	----------------------------------------------------------------------------/
				2482	float32 float32_log2( float32 a STATUS_PARAM )
				2483	{
				2484	flag aSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2485	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2486	uint32_t aSig, zSig, i;
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2487
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2488	a = float32_squash_input_denormal(a STATUS_VAR);
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2489	aSig = extractFloat32Frac( a );
				2490	aExp = extractFloat32Exp( a );
				2491	aSign = extractFloat32Sign( a );
				2492
				2493	if ( aExp == 0 ) {
				2494	if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
				2495	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2496	}
				2497	if ( aSign ) {
				2498	float_raise( float_flag_invalid STATUS_VAR);
				2499	return float32_default_nan;
				2500	}
				2501	if ( aExp == 0xFF ) {
				2502	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
				2503	return a;
				2504	}
				2505
				2506	aExp -= 0x7F;
				2507	aSig \|= 0x00800000;
				2508	zSign = aExp < 0;
				2509	zSig = aExp << 23;
				2510
				2511	for (i = 1 << 22; i > 0; i >>= 1) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2512	aSig = ( (uint64_t)aSig * aSig ) >> 23;
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2513	if ( aSig & 0x01000000 ) {
				2514	aSig >>= 1;
				2515	zSig \|= i;
				2516	}
				2517	}
				2518
				2519	if ( zSign )
				2520	zSig = -zSig;
				2521
				2522	return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
				2523	}
				2524
				2525	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2526	\| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2527	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				2528	\| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2529	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2530	----------------------------------------------------------------------------/
				2531
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2532	int float32_eq( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2533	{
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2534	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2535	a = float32_squash_input_denormal(a STATUS_VAR);
				2536	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2537
				2538	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2539	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2540	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2541	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2542	return 0;
				2543	}
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2544	av = float32_val(a);
				2545	bv = float32_val(b);
				2546	return ( av == bv ) \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2547	}
				2548
				2549	/*----------------------------------------------------------------------------
				2550	\| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2551	\| or equal to the corresponding value `b', and 0 otherwise. The invalid
				2552	\| exception is raised if either operand is a NaN. The comparison is performed
				2553	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2554	----------------------------------------------------------------------------/
				2555
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2556	int float32_le( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2557	{
				2558	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2559	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2560	a = float32_squash_input_denormal(a STATUS_VAR);
				2561	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2562
				2563	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2564	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2565	) {
				2566	float_raise( float_flag_invalid STATUS_VAR);
				2567	return 0;
				2568	}
				2569	aSign = extractFloat32Sign( a );
				2570	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2571	av = float32_val(a);
				2572	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2573	if ( aSign != bSign ) return aSign \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2574	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2575
				2576	}
				2577
				2578	/*----------------------------------------------------------------------------
				2579	\| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2580	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				2581	\| raised if either operand is a NaN. The comparison is performed according
				2582	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2583	----------------------------------------------------------------------------/
				2584
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2585	int float32_lt( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2586	{
				2587	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2588	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2589	a = float32_squash_input_denormal(a STATUS_VAR);
				2590	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2591
				2592	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2593	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2594	) {
				2595	float_raise( float_flag_invalid STATUS_VAR);
				2596	return 0;
				2597	}
				2598	aSign = extractFloat32Sign( a );
				2599	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2600	av = float32_val(a);
				2601	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2602	if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2603	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2604
				2605	}
				2606
				2607	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2608	\| Returns 1 if the single-precision floating-point values `a' and `b' cannot
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2609	\| be compared, and 0 otherwise. The invalid exception is raised if either
				2610	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
				2611	\| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2612	----------------------------------------------------------------------------/
				2613
				2614	int float32_unordered( float32 a, float32 b STATUS_PARAM )
				2615	{
				2616	a = float32_squash_input_denormal(a STATUS_VAR);
				2617	b = float32_squash_input_denormal(b STATUS_VAR);
				2618
				2619	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2620	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2621	) {
				2622	float_raise( float_flag_invalid STATUS_VAR);
				2623	return 1;
				2624	}
				2625	return 0;
				2626	}
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2627
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2628	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2629	\| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2630	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2631	\| exception. The comparison is performed according to the IEC/IEEE Standard
				2632	\| for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2633	----------------------------------------------------------------------------/
				2634
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2635	int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2636	{
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2637	a = float32_squash_input_denormal(a STATUS_VAR);
				2638	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2639
				2640	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2641	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2642	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2643	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2644	float_raise( float_flag_invalid STATUS_VAR);
				2645	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2646	return 0;
				2647	}
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2648	return ( float32_val(a) == float32_val(b) ) \|\|
				2649	( (uint32_t) ( ( float32_val(a) \| float32_val(b) )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2650	}
				2651
				2652	/*----------------------------------------------------------------------------
				2653	\| Returns 1 if the single-precision floating-point value `a' is less than or
				2654	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				2655	\| cause an exception. Otherwise, the comparison is performed according to the
				2656	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2657	----------------------------------------------------------------------------/
				2658
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2659	int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2660	{
				2661	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2662	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2663	a = float32_squash_input_denormal(a STATUS_VAR);
				2664	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2665
				2666	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2667	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2668	) {
				2669	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2670	float_raise( float_flag_invalid STATUS_VAR);
				2671	}
				2672	return 0;
				2673	}
				2674	aSign = extractFloat32Sign( a );
				2675	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2676	av = float32_val(a);
				2677	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2678	if ( aSign != bSign ) return aSign \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2679	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2680
				2681	}
				2682
				2683	/*----------------------------------------------------------------------------
				2684	\| Returns 1 if the single-precision floating-point value `a' is less than
				2685	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2686	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				2687	\| Standard for Binary Floating-Point Arithmetic.
				2688	----------------------------------------------------------------------------/
				2689
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2690	int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2691	{
				2692	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2693	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2694	a = float32_squash_input_denormal(a STATUS_VAR);
				2695	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2696
				2697	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2698	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2699	) {
				2700	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2701	float_raise( float_flag_invalid STATUS_VAR);
				2702	}
				2703	return 0;
				2704	}
				2705	aSign = extractFloat32Sign( a );
				2706	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2707	av = float32_val(a);
				2708	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2709	if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2710	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2711
				2712	}
				2713
				2714	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2715	\| Returns 1 if the single-precision floating-point values `a' and `b' cannot
				2716	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
				2717	\| comparison is performed according to the IEC/IEEE Standard for Binary
				2718	\| Floating-Point Arithmetic.
				2719	----------------------------------------------------------------------------/
				2720
				2721	int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
				2722	{
				2723	a = float32_squash_input_denormal(a STATUS_VAR);
				2724	b = float32_squash_input_denormal(b STATUS_VAR);
				2725
				2726	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2727	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2728	) {
				2729	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2730	float_raise( float_flag_invalid STATUS_VAR);
				2731	}
				2732	return 1;
				2733	}
				2734	return 0;
				2735	}
				2736
				2737	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2738	\| Returns the result of converting the double-precision floating-point value
				2739	\| `a' to the 32-bit two's complement integer format. The conversion is
				2740	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2741	\| Arithmetic---which means in particular that the conversion is rounded
				2742	\| according to the current rounding mode. If `a' is a NaN, the largest
				2743	\| positive integer is returned. Otherwise, if the conversion overflows, the
				2744	\| largest integer with the same sign as `a' is returned.
				2745	----------------------------------------------------------------------------/
				2746
				2747	int32 float64_to_int32( float64 a STATUS_PARAM )
				2748	{
				2749	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2750	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2751	uint64_t aSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2752	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2753
				2754	aSig = extractFloat64Frac( a );
				2755	aExp = extractFloat64Exp( a );
				2756	aSign = extractFloat64Sign( a );
				2757	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2758	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2759	shiftCount = 0x42C - aExp;
				2760	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				2761	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				2762
				2763	}
				2764
				2765	/*----------------------------------------------------------------------------
				2766	\| Returns the result of converting the double-precision floating-point value
				2767	\| `a' to the 32-bit two's complement integer format. The conversion is
				2768	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2769	\| Arithmetic, except that the conversion is always rounded toward zero.
				2770	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2771	\| the conversion overflows, the largest integer with the same sign as `a' is
				2772	\| returned.
				2773	----------------------------------------------------------------------------/
				2774
				2775	int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
				2776	{
				2777	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2778	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2779	uint64_t aSig, savedASig;
Peter Maydell	b3a6a2e	2012-04-05 19:12:34 +0100	[diff] [blame]	2780	int32_t z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2781	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2782
				2783	aSig = extractFloat64Frac( a );
				2784	aExp = extractFloat64Exp( a );
				2785	aSign = extractFloat64Sign( a );
				2786	if ( 0x41E < aExp ) {
				2787	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2788	goto invalid;
				2789	}
				2790	else if ( aExp < 0x3FF ) {
				2791	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2792	return 0;
				2793	}
				2794	aSig \|= LIT64( 0x0010000000000000 );
				2795	shiftCount = 0x433 - aExp;
				2796	savedASig = aSig;
				2797	aSig >>= shiftCount;
				2798	z = aSig;
				2799	if ( aSign ) z = - z;
				2800	if ( ( z < 0 ) ^ aSign ) {
				2801	invalid:
				2802	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2803	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2804	}
				2805	if ( ( aSig<<shiftCount ) != savedASig ) {
				2806	STATUS(float_exception_flags) \|= float_flag_inexact;
				2807	}
				2808	return z;
				2809
				2810	}
				2811
				2812	/*----------------------------------------------------------------------------
				2813	\| Returns the result of converting the double-precision floating-point value
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	2814	\| `a' to the 16-bit two's complement integer format. The conversion is
				2815	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2816	\| Arithmetic, except that the conversion is always rounded toward zero.
				2817	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2818	\| the conversion overflows, the largest integer with the same sign as `a' is
				2819	\| returned.
				2820	----------------------------------------------------------------------------/
				2821
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2822	int_fast16_t float64_to_int16_round_to_zero(float64 a STATUS_PARAM)
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	2823	{
				2824	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2825	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2826	uint64_t aSig, savedASig;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	2827	int32 z;
				2828
				2829	aSig = extractFloat64Frac( a );
				2830	aExp = extractFloat64Exp( a );
				2831	aSign = extractFloat64Sign( a );
				2832	if ( 0x40E < aExp ) {
				2833	if ( ( aExp == 0x7FF ) && aSig ) {
				2834	aSign = 0;
				2835	}
				2836	goto invalid;
				2837	}
				2838	else if ( aExp < 0x3FF ) {
				2839	if ( aExp \|\| aSig ) {
				2840	STATUS(float_exception_flags) \|= float_flag_inexact;
				2841	}
				2842	return 0;
				2843	}
				2844	aSig \|= LIT64( 0x0010000000000000 );
				2845	shiftCount = 0x433 - aExp;
				2846	savedASig = aSig;
				2847	aSig >>= shiftCount;
				2848	z = aSig;
				2849	if ( aSign ) {
				2850	z = - z;
				2851	}
				2852	if ( ( (int16_t)z < 0 ) ^ aSign ) {
				2853	invalid:
				2854	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2855	return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	2856	}
				2857	if ( ( aSig<<shiftCount ) != savedASig ) {
				2858	STATUS(float_exception_flags) \|= float_flag_inexact;
				2859	}
				2860	return z;
				2861	}
				2862
				2863	/*----------------------------------------------------------------------------
				2864	\| Returns the result of converting the double-precision floating-point value
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2865	\| `a' to the 64-bit two's complement integer format. The conversion is
				2866	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2867	\| Arithmetic---which means in particular that the conversion is rounded
				2868	\| according to the current rounding mode. If `a' is a NaN, the largest
				2869	\| positive integer is returned. Otherwise, if the conversion overflows, the
				2870	\| largest integer with the same sign as `a' is returned.
				2871	----------------------------------------------------------------------------/
				2872
				2873	int64 float64_to_int64( float64 a STATUS_PARAM )
				2874	{
				2875	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2876	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2877	uint64_t aSig, aSigExtra;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2878	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2879
				2880	aSig = extractFloat64Frac( a );
				2881	aExp = extractFloat64Exp( a );
				2882	aSign = extractFloat64Sign( a );
				2883	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2884	shiftCount = 0x433 - aExp;
				2885	if ( shiftCount <= 0 ) {
				2886	if ( 0x43E < aExp ) {
				2887	float_raise( float_flag_invalid STATUS_VAR);
				2888	if ( ! aSign
				2889	\|\| ( ( aExp == 0x7FF )
				2890	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				2891	) {
				2892	return LIT64( 0x7FFFFFFFFFFFFFFF );
				2893	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2894	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2895	}
				2896	aSigExtra = 0;
				2897	aSig <<= - shiftCount;
				2898	}
				2899	else {
				2900	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				2901	}
				2902	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				2903
				2904	}
				2905
				2906	/*----------------------------------------------------------------------------
				2907	\| Returns the result of converting the double-precision floating-point value
				2908	\| `a' to the 64-bit two's complement integer format. The conversion is
				2909	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2910	\| Arithmetic, except that the conversion is always rounded toward zero.
				2911	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2912	\| the conversion overflows, the largest integer with the same sign as `a' is
				2913	\| returned.
				2914	----------------------------------------------------------------------------/
				2915
				2916	int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
				2917	{
				2918	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2919	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2920	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2921	int64 z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2922	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2923
				2924	aSig = extractFloat64Frac( a );
				2925	aExp = extractFloat64Exp( a );
				2926	aSign = extractFloat64Sign( a );
				2927	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2928	shiftCount = aExp - 0x433;
				2929	if ( 0 <= shiftCount ) {
				2930	if ( 0x43E <= aExp ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2931	if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2932	float_raise( float_flag_invalid STATUS_VAR);
				2933	if ( ! aSign
				2934	\|\| ( ( aExp == 0x7FF )
				2935	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				2936	) {
				2937	return LIT64( 0x7FFFFFFFFFFFFFFF );
				2938	}
				2939	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2940	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2941	}
				2942	z = aSig<<shiftCount;
				2943	}
				2944	else {
				2945	if ( aExp < 0x3FE ) {
				2946	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2947	return 0;
				2948	}
				2949	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2950	if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2951	STATUS(float_exception_flags) \|= float_flag_inexact;
				2952	}
				2953	}
				2954	if ( aSign ) z = - z;
				2955	return z;
				2956
				2957	}
				2958
				2959	/*----------------------------------------------------------------------------
				2960	\| Returns the result of converting the double-precision floating-point value
				2961	\| `a' to the single-precision floating-point format. The conversion is
				2962	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2963	\| Arithmetic.
				2964	----------------------------------------------------------------------------/
				2965
				2966	float32 float64_to_float32( float64 a STATUS_PARAM )
				2967	{
				2968	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2969	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2970	uint64_t aSig;
				2971	uint32_t zSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2972	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2973
				2974	aSig = extractFloat64Frac( a );
				2975	aExp = extractFloat64Exp( a );
				2976	aSign = extractFloat64Sign( a );
				2977	if ( aExp == 0x7FF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	2978	if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2979	return packFloat32( aSign, 0xFF, 0 );
				2980	}
				2981	shift64RightJamming( aSig, 22, &aSig );
				2982	zSig = aSig;
				2983	if ( aExp \|\| zSig ) {
				2984	zSig \|= 0x40000000;
				2985	aExp -= 0x381;
				2986	}
				2987	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				2988
				2989	}
				2990
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	2991
				2992	/*----------------------------------------------------------------------------
				2993	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				2994	\| half-precision floating-point value, returning the result. After being
				2995	\| shifted into the proper positions, the three fields are simply added
				2996	\| together to form the result. This means that any integer portion of `zSig'
				2997	\| will be added into the exponent. Since a properly normalized significand
				2998	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				2999	\| than the desired result exponent whenever `zSig' is a complete, normalized
				3000	\| significand.
				3001	----------------------------------------------------------------------------/
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3002	static float16 packFloat16(flag zSign, int_fast16_t zExp, uint16_t zSig)
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3003	{
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	3004	return make_float16(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3005	(((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3006	}
				3007
				3008	/* Half precision floats come in two formats: standard IEEE and "ARM" format.
				3009	The latter gains extra exponent range by omitting the NaN/Inf encodings. */
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	3010
				3011	float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3012	{
				3013	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3014	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3015	uint32_t aSig;
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3016
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	3017	aSign = extractFloat16Sign(a);
				3018	aExp = extractFloat16Exp(a);
				3019	aSig = extractFloat16Frac(a);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3020
				3021	if (aExp == 0x1f && ieee) {
				3022	if (aSig) {
Peter Maydell	f591e1b	2011-02-10 11:28:59 +0000	[diff] [blame]	3023	return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3024	}
Peter Maydell	4be8eea	2012-09-24 17:28:35 +0100	[diff] [blame]	3025	return packFloat32(aSign, 0xff, 0);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3026	}
				3027	if (aExp == 0) {
				3028	int8 shiftCount;
				3029
				3030	if (aSig == 0) {
				3031	return packFloat32(aSign, 0, 0);
				3032	}
				3033
				3034	shiftCount = countLeadingZeros32( aSig ) - 21;
				3035	aSig = aSig << shiftCount;
				3036	aExp = -shiftCount;
				3037	}
				3038	return packFloat32( aSign, aExp + 0x70, aSig << 13);
				3039	}
				3040
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	3041	float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3042	{
				3043	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3044	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3045	uint32_t aSig;
				3046	uint32_t mask;
				3047	uint32_t increment;
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3048	int8 roundingMode;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3049	a = float32_squash_input_denormal(a STATUS_VAR);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3050
				3051	aSig = extractFloat32Frac( a );
				3052	aExp = extractFloat32Exp( a );
				3053	aSign = extractFloat32Sign( a );
				3054	if ( aExp == 0xFF ) {
				3055	if (aSig) {
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3056	/* Input is a NaN */
				3057	float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
				3058	if (!ieee) {
				3059	return packFloat16(aSign, 0, 0);
				3060	}
				3061	return r;
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3062	}
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3063	/* Infinity */
				3064	if (!ieee) {
				3065	float_raise(float_flag_invalid STATUS_VAR);
				3066	return packFloat16(aSign, 0x1f, 0x3ff);
				3067	}
				3068	return packFloat16(aSign, 0x1f, 0);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3069	}
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3070	if (aExp == 0 && aSig == 0) {
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3071	return packFloat16(aSign, 0, 0);
				3072	}
				3073	/* Decimal point between bits 22 and 23. */
				3074	aSig \|= 0x00800000;
				3075	aExp -= 0x7f;
				3076	if (aExp < -14) {
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3077	mask = 0x00ffffff;
				3078	if (aExp >= -24) {
				3079	mask >>= 25 + aExp;
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3080	}
				3081	} else {
				3082	mask = 0x00001fff;
				3083	}
				3084	if (aSig & mask) {
				3085	float_raise( float_flag_underflow STATUS_VAR );
				3086	roundingMode = STATUS(float_rounding_mode);
				3087	switch (roundingMode) {
				3088	case float_round_nearest_even:
				3089	increment = (mask + 1) >> 1;
				3090	if ((aSig & mask) == increment) {
				3091	increment = aSig & (increment << 1);
				3092	}
				3093	break;
				3094	case float_round_up:
				3095	increment = aSign ? 0 : mask;
				3096	break;
				3097	case float_round_down:
				3098	increment = aSign ? mask : 0;
				3099	break;
				3100	default: /* round_to_zero */
				3101	increment = 0;
				3102	break;
				3103	}
				3104	aSig += increment;
				3105	if (aSig >= 0x01000000) {
				3106	aSig >>= 1;
				3107	aExp++;
				3108	}
				3109	} else if (aExp < -14
				3110	&& STATUS(float_detect_tininess) == float_tininess_before_rounding) {
				3111	float_raise( float_flag_underflow STATUS_VAR);
				3112	}
				3113
				3114	if (ieee) {
				3115	if (aExp > 15) {
				3116	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				3117	return packFloat16(aSign, 0x1f, 0);
				3118	}
				3119	} else {
				3120	if (aExp > 16) {
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3121	float_raise(float_flag_invalid \| float_flag_inexact STATUS_VAR);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3122	return packFloat16(aSign, 0x1f, 0x3ff);
				3123	}
				3124	}
				3125	if (aExp < -24) {
				3126	return packFloat16(aSign, 0, 0);
				3127	}
				3128	if (aExp < -14) {
				3129	aSig >>= -14 - aExp;
				3130	aExp = -14;
				3131	}
				3132	return packFloat16(aSign, aExp + 14, aSig >> 13);
				3133	}
				3134
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3135	/*----------------------------------------------------------------------------
				3136	\| Returns the result of converting the double-precision floating-point value
				3137	\| `a' to the extended double-precision floating-point format. The conversion
				3138	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				3139	\| Arithmetic.
				3140	----------------------------------------------------------------------------/
				3141
				3142	floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
				3143	{
				3144	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3145	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3146	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3147
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3148	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3149	aSig = extractFloat64Frac( a );
				3150	aExp = extractFloat64Exp( a );
				3151	aSign = extractFloat64Sign( a );
				3152	if ( aExp == 0x7FF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	3153	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3154	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3155	}
				3156	if ( aExp == 0 ) {
				3157	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				3158	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3159	}
				3160	return
				3161	packFloatx80(
				3162	aSign, aExp + 0x3C00, ( aSig \| LIT64( 0x0010000000000000 ) )<<11 );
				3163
				3164	}
				3165
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3166	/*----------------------------------------------------------------------------
				3167	\| Returns the result of converting the double-precision floating-point value
				3168	\| `a' to the quadruple-precision floating-point format. The conversion is
				3169	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				3170	\| Arithmetic.
				3171	----------------------------------------------------------------------------/
				3172
				3173	float128 float64_to_float128( float64 a STATUS_PARAM )
				3174	{
				3175	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3176	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3177	uint64_t aSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3178
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3179	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3180	aSig = extractFloat64Frac( a );
				3181	aExp = extractFloat64Exp( a );
				3182	aSign = extractFloat64Sign( a );
				3183	if ( aExp == 0x7FF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	3184	if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3185	return packFloat128( aSign, 0x7FFF, 0, 0 );
				3186	}
				3187	if ( aExp == 0 ) {
				3188	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				3189	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3190	--aExp;
				3191	}
				3192	shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
				3193	return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
				3194
				3195	}
				3196
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3197	/*----------------------------------------------------------------------------
				3198	\| Rounds the double-precision floating-point value `a' to an integer, and
				3199	\| returns the result as a double-precision floating-point value. The
				3200	\| operation is performed according to the IEC/IEEE Standard for Binary
				3201	\| Floating-Point Arithmetic.
				3202	----------------------------------------------------------------------------/
				3203
				3204	float64 float64_round_to_int( float64 a STATUS_PARAM )
				3205	{
				3206	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3207	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3208	uint64_t lastBitMask, roundBitsMask;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3209	int8 roundingMode;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3210	uint64_t z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3211	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3212
				3213	aExp = extractFloat64Exp( a );
				3214	if ( 0x433 <= aExp ) {
				3215	if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
				3216	return propagateFloat64NaN( a, a STATUS_VAR );
				3217	}
				3218	return a;
				3219	}
				3220	if ( aExp < 0x3FF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3221	if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3222	STATUS(float_exception_flags) \|= float_flag_inexact;
				3223	aSign = extractFloat64Sign( a );
				3224	switch ( STATUS(float_rounding_mode) ) {
				3225	case float_round_nearest_even:
				3226	if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
				3227	return packFloat64( aSign, 0x3FF, 0 );
				3228	}
				3229	break;
				3230	case float_round_down:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3231	return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3232	case float_round_up:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3233	return make_float64(
				3234	aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3235	}
				3236	return packFloat64( aSign, 0, 0 );
				3237	}
				3238	lastBitMask = 1;
				3239	lastBitMask <<= 0x433 - aExp;
				3240	roundBitsMask = lastBitMask - 1;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3241	z = float64_val(a);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3242	roundingMode = STATUS(float_rounding_mode);
				3243	if ( roundingMode == float_round_nearest_even ) {
				3244	z += lastBitMask>>1;
				3245	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				3246	}
				3247	else if ( roundingMode != float_round_to_zero ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3248	if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3249	z += roundBitsMask;
				3250	}
				3251	}
				3252	z &= ~ roundBitsMask;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3253	if ( z != float64_val(a) )
				3254	STATUS(float_exception_flags) \|= float_flag_inexact;
				3255	return make_float64(z);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3256
				3257	}
				3258
pbrook	e6e5906	2006-10-22 00:18:54 +0000	[diff] [blame]	3259	float64 float64_trunc_to_int( float64 a STATUS_PARAM)
				3260	{
				3261	int oldmode;
				3262	float64 res;
				3263	oldmode = STATUS(float_rounding_mode);
				3264	STATUS(float_rounding_mode) = float_round_to_zero;
				3265	res = float64_round_to_int(a STATUS_VAR);
				3266	STATUS(float_rounding_mode) = oldmode;
				3267	return res;
				3268	}
				3269
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3270	/*----------------------------------------------------------------------------
				3271	\| Returns the result of adding the absolute values of the double-precision
				3272	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				3273	\| before being returned. `zSign' is ignored if the result is a NaN.
				3274	\| The addition is performed according to the IEC/IEEE Standard for Binary
				3275	\| Floating-Point Arithmetic.
				3276	----------------------------------------------------------------------------/
				3277
				3278	static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				3279	{
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3280	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3281	uint64_t aSig, bSig, zSig;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3282	int_fast16_t expDiff;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3283
				3284	aSig = extractFloat64Frac( a );
				3285	aExp = extractFloat64Exp( a );
				3286	bSig = extractFloat64Frac( b );
				3287	bExp = extractFloat64Exp( b );
				3288	expDiff = aExp - bExp;
				3289	aSig <<= 9;
				3290	bSig <<= 9;
				3291	if ( 0 < expDiff ) {
				3292	if ( aExp == 0x7FF ) {
				3293	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3294	return a;
				3295	}
				3296	if ( bExp == 0 ) {
				3297	--expDiff;
				3298	}
				3299	else {
				3300	bSig \|= LIT64( 0x2000000000000000 );
				3301	}
				3302	shift64RightJamming( bSig, expDiff, &bSig );
				3303	zExp = aExp;
				3304	}
				3305	else if ( expDiff < 0 ) {
				3306	if ( bExp == 0x7FF ) {
				3307	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3308	return packFloat64( zSign, 0x7FF, 0 );
				3309	}
				3310	if ( aExp == 0 ) {
				3311	++expDiff;
				3312	}
				3313	else {
				3314	aSig \|= LIT64( 0x2000000000000000 );
				3315	}
				3316	shift64RightJamming( aSig, - expDiff, &aSig );
				3317	zExp = bExp;
				3318	}
				3319	else {
				3320	if ( aExp == 0x7FF ) {
				3321	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3322	return a;
				3323	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	3324	if ( aExp == 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	3325	if (STATUS(flush_to_zero)) {
				3326	if (aSig \| bSig) {
				3327	float_raise(float_flag_output_denormal STATUS_VAR);
				3328	}
				3329	return packFloat64(zSign, 0, 0);
				3330	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	3331	return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
				3332	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3333	zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
				3334	zExp = aExp;
				3335	goto roundAndPack;
				3336	}
				3337	aSig \|= LIT64( 0x2000000000000000 );
				3338	zSig = ( aSig + bSig )<<1;
				3339	--zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3340	if ( (int64_t) zSig < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3341	zSig = aSig + bSig;
				3342	++zExp;
				3343	}
				3344	roundAndPack:
				3345	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				3346
				3347	}
				3348
				3349	/*----------------------------------------------------------------------------
				3350	\| Returns the result of subtracting the absolute values of the double-
				3351	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				3352	\| difference is negated before being returned. `zSign' is ignored if the
				3353	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				3354	\| Standard for Binary Floating-Point Arithmetic.
				3355	----------------------------------------------------------------------------/
				3356
				3357	static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				3358	{
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3359	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3360	uint64_t aSig, bSig, zSig;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3361	int_fast16_t expDiff;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3362
				3363	aSig = extractFloat64Frac( a );
				3364	aExp = extractFloat64Exp( a );
				3365	bSig = extractFloat64Frac( b );
				3366	bExp = extractFloat64Exp( b );
				3367	expDiff = aExp - bExp;
				3368	aSig <<= 10;
				3369	bSig <<= 10;
				3370	if ( 0 < expDiff ) goto aExpBigger;
				3371	if ( expDiff < 0 ) goto bExpBigger;
				3372	if ( aExp == 0x7FF ) {
				3373	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3374	float_raise( float_flag_invalid STATUS_VAR);
				3375	return float64_default_nan;
				3376	}
				3377	if ( aExp == 0 ) {
				3378	aExp = 1;
				3379	bExp = 1;
				3380	}
				3381	if ( bSig < aSig ) goto aBigger;
				3382	if ( aSig < bSig ) goto bBigger;
				3383	return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				3384	bExpBigger:
				3385	if ( bExp == 0x7FF ) {
				3386	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3387	return packFloat64( zSign ^ 1, 0x7FF, 0 );
				3388	}
				3389	if ( aExp == 0 ) {
				3390	++expDiff;
				3391	}
				3392	else {
				3393	aSig \|= LIT64( 0x4000000000000000 );
				3394	}
				3395	shift64RightJamming( aSig, - expDiff, &aSig );
				3396	bSig \|= LIT64( 0x4000000000000000 );
				3397	bBigger:
				3398	zSig = bSig - aSig;
				3399	zExp = bExp;
				3400	zSign ^= 1;
				3401	goto normalizeRoundAndPack;
				3402	aExpBigger:
				3403	if ( aExp == 0x7FF ) {
				3404	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3405	return a;
				3406	}
				3407	if ( bExp == 0 ) {
				3408	--expDiff;
				3409	}
				3410	else {
				3411	bSig \|= LIT64( 0x4000000000000000 );
				3412	}
				3413	shift64RightJamming( bSig, expDiff, &bSig );
				3414	aSig \|= LIT64( 0x4000000000000000 );
				3415	aBigger:
				3416	zSig = aSig - bSig;
				3417	zExp = aExp;
				3418	normalizeRoundAndPack:
				3419	--zExp;
				3420	return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				3421
				3422	}
				3423
				3424	/*----------------------------------------------------------------------------
				3425	\| Returns the result of adding the double-precision floating-point values `a'
				3426	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				3427	\| Binary Floating-Point Arithmetic.
				3428	----------------------------------------------------------------------------/
				3429
				3430	float64 float64_add( float64 a, float64 b STATUS_PARAM )
				3431	{
				3432	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3433	a = float64_squash_input_denormal(a STATUS_VAR);
				3434	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3435
				3436	aSign = extractFloat64Sign( a );
				3437	bSign = extractFloat64Sign( b );
				3438	if ( aSign == bSign ) {
				3439	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				3440	}
				3441	else {
				3442	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				3443	}
				3444
				3445	}
				3446
				3447	/*----------------------------------------------------------------------------
				3448	\| Returns the result of subtracting the double-precision floating-point values
				3449	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				3450	\| for Binary Floating-Point Arithmetic.
				3451	----------------------------------------------------------------------------/
				3452
				3453	float64 float64_sub( float64 a, float64 b STATUS_PARAM )
				3454	{
				3455	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3456	a = float64_squash_input_denormal(a STATUS_VAR);
				3457	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3458
				3459	aSign = extractFloat64Sign( a );
				3460	bSign = extractFloat64Sign( b );
				3461	if ( aSign == bSign ) {
				3462	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				3463	}
				3464	else {
				3465	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				3466	}
				3467
				3468	}
				3469
				3470	/*----------------------------------------------------------------------------
				3471	\| Returns the result of multiplying the double-precision floating-point values
				3472	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				3473	\| for Binary Floating-Point Arithmetic.
				3474	----------------------------------------------------------------------------/
				3475
				3476	float64 float64_mul( float64 a, float64 b STATUS_PARAM )
				3477	{
				3478	flag aSign, bSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3479	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3480	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3481
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3482	a = float64_squash_input_denormal(a STATUS_VAR);
				3483	b = float64_squash_input_denormal(b STATUS_VAR);
				3484
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3485	aSig = extractFloat64Frac( a );
				3486	aExp = extractFloat64Exp( a );
				3487	aSign = extractFloat64Sign( a );
				3488	bSig = extractFloat64Frac( b );
				3489	bExp = extractFloat64Exp( b );
				3490	bSign = extractFloat64Sign( b );
				3491	zSign = aSign ^ bSign;
				3492	if ( aExp == 0x7FF ) {
				3493	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				3494	return propagateFloat64NaN( a, b STATUS_VAR );
				3495	}
				3496	if ( ( bExp \| bSig ) == 0 ) {
				3497	float_raise( float_flag_invalid STATUS_VAR);
				3498	return float64_default_nan;
				3499	}
				3500	return packFloat64( zSign, 0x7FF, 0 );
				3501	}
				3502	if ( bExp == 0x7FF ) {
				3503	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3504	if ( ( aExp \| aSig ) == 0 ) {
				3505	float_raise( float_flag_invalid STATUS_VAR);
				3506	return float64_default_nan;
				3507	}
				3508	return packFloat64( zSign, 0x7FF, 0 );
				3509	}
				3510	if ( aExp == 0 ) {
				3511	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				3512	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3513	}
				3514	if ( bExp == 0 ) {
				3515	if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
				3516	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				3517	}
				3518	zExp = aExp + bExp - 0x3FF;
				3519	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				3520	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				3521	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				3522	zSig0 \|= ( zSig1 != 0 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3523	if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3524	zSig0 <<= 1;
				3525	--zExp;
				3526	}
				3527	return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
				3528
				3529	}
				3530
				3531	/*----------------------------------------------------------------------------
				3532	\| Returns the result of dividing the double-precision floating-point value `a'
				3533	\| by the corresponding value `b'. The operation is performed according to
				3534	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3535	----------------------------------------------------------------------------/
				3536
				3537	float64 float64_div( float64 a, float64 b STATUS_PARAM )
				3538	{
				3539	flag aSign, bSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3540	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3541	uint64_t aSig, bSig, zSig;
				3542	uint64_t rem0, rem1;
				3543	uint64_t term0, term1;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3544	a = float64_squash_input_denormal(a STATUS_VAR);
				3545	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3546
				3547	aSig = extractFloat64Frac( a );
				3548	aExp = extractFloat64Exp( a );
				3549	aSign = extractFloat64Sign( a );
				3550	bSig = extractFloat64Frac( b );
				3551	bExp = extractFloat64Exp( b );
				3552	bSign = extractFloat64Sign( b );
				3553	zSign = aSign ^ bSign;
				3554	if ( aExp == 0x7FF ) {
				3555	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3556	if ( bExp == 0x7FF ) {
				3557	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3558	float_raise( float_flag_invalid STATUS_VAR);
				3559	return float64_default_nan;
				3560	}
				3561	return packFloat64( zSign, 0x7FF, 0 );
				3562	}
				3563	if ( bExp == 0x7FF ) {
				3564	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3565	return packFloat64( zSign, 0, 0 );
				3566	}
				3567	if ( bExp == 0 ) {
				3568	if ( bSig == 0 ) {
				3569	if ( ( aExp \| aSig ) == 0 ) {
				3570	float_raise( float_flag_invalid STATUS_VAR);
				3571	return float64_default_nan;
				3572	}
				3573	float_raise( float_flag_divbyzero STATUS_VAR);
				3574	return packFloat64( zSign, 0x7FF, 0 );
				3575	}
				3576	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				3577	}
				3578	if ( aExp == 0 ) {
				3579	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				3580	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3581	}
				3582	zExp = aExp - bExp + 0x3FD;
				3583	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				3584	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				3585	if ( bSig <= ( aSig + aSig ) ) {
				3586	aSig >>= 1;
				3587	++zExp;
				3588	}
				3589	zSig = estimateDiv128To64( aSig, 0, bSig );
				3590	if ( ( zSig & 0x1FF ) <= 2 ) {
				3591	mul64To128( bSig, zSig, &term0, &term1 );
				3592	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3593	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3594	--zSig;
				3595	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				3596	}
				3597	zSig \|= ( rem1 != 0 );
				3598	}
				3599	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				3600
				3601	}
				3602
				3603	/*----------------------------------------------------------------------------
				3604	\| Returns the remainder of the double-precision floating-point value `a'
				3605	\| with respect to the corresponding value `b'. The operation is performed
				3606	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3607	----------------------------------------------------------------------------/
				3608
				3609	float64 float64_rem( float64 a, float64 b STATUS_PARAM )
				3610	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	3611	flag aSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3612	int_fast16_t aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3613	uint64_t aSig, bSig;
				3614	uint64_t q, alternateASig;
				3615	int64_t sigMean;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3616
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3617	a = float64_squash_input_denormal(a STATUS_VAR);
				3618	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3619	aSig = extractFloat64Frac( a );
				3620	aExp = extractFloat64Exp( a );
				3621	aSign = extractFloat64Sign( a );
				3622	bSig = extractFloat64Frac( b );
				3623	bExp = extractFloat64Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3624	if ( aExp == 0x7FF ) {
				3625	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				3626	return propagateFloat64NaN( a, b STATUS_VAR );
				3627	}
				3628	float_raise( float_flag_invalid STATUS_VAR);
				3629	return float64_default_nan;
				3630	}
				3631	if ( bExp == 0x7FF ) {
				3632	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3633	return a;
				3634	}
				3635	if ( bExp == 0 ) {
				3636	if ( bSig == 0 ) {
				3637	float_raise( float_flag_invalid STATUS_VAR);
				3638	return float64_default_nan;
				3639	}
				3640	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				3641	}
				3642	if ( aExp == 0 ) {
				3643	if ( aSig == 0 ) return a;
				3644	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3645	}
				3646	expDiff = aExp - bExp;
				3647	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<11;
				3648	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				3649	if ( expDiff < 0 ) {
				3650	if ( expDiff < -1 ) return a;
				3651	aSig >>= 1;
				3652	}
				3653	q = ( bSig <= aSig );
				3654	if ( q ) aSig -= bSig;
				3655	expDiff -= 64;
				3656	while ( 0 < expDiff ) {
				3657	q = estimateDiv128To64( aSig, 0, bSig );
				3658	q = ( 2 < q ) ? q - 2 : 0;
				3659	aSig = - ( ( bSig>>2 ) * q );
				3660	expDiff -= 62;
				3661	}
				3662	expDiff += 64;
				3663	if ( 0 < expDiff ) {
				3664	q = estimateDiv128To64( aSig, 0, bSig );
				3665	q = ( 2 < q ) ? q - 2 : 0;
				3666	q >>= 64 - expDiff;
				3667	bSig >>= 2;
				3668	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				3669	}
				3670	else {
				3671	aSig >>= 2;
				3672	bSig >>= 2;
				3673	}
				3674	do {
				3675	alternateASig = aSig;
				3676	++q;
				3677	aSig -= bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3678	} while ( 0 <= (int64_t) aSig );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3679	sigMean = aSig + alternateASig;
				3680	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				3681	aSig = alternateASig;
				3682	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3683	zSign = ( (int64_t) aSig < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3684	if ( zSign ) aSig = - aSig;
				3685	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
				3686
				3687	}
				3688
				3689	/*----------------------------------------------------------------------------
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	3690	\| Returns the result of multiplying the double-precision floating-point values
				3691	\| `a' and `b' then adding 'c', with no intermediate rounding step after the
				3692	\| multiplication. The operation is performed according to the IEC/IEEE
				3693	\| Standard for Binary Floating-Point Arithmetic 754-2008.
				3694	\| The flags argument allows the caller to select negation of the
				3695	\| addend, the intermediate product, or the final result. (The difference
				3696	\| between this and having the caller do a separate negation is that negating
				3697	\| externally will flip the sign bit on NaNs.)
				3698	----------------------------------------------------------------------------/
				3699
				3700	float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
				3701	{
				3702	flag aSign, bSign, cSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3703	int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	3704	uint64_t aSig, bSig, cSig;
				3705	flag pInf, pZero, pSign;
				3706	uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
				3707	int shiftcount;
				3708	flag signflip, infzero;
				3709
				3710	a = float64_squash_input_denormal(a STATUS_VAR);
				3711	b = float64_squash_input_denormal(b STATUS_VAR);
				3712	c = float64_squash_input_denormal(c STATUS_VAR);
				3713	aSig = extractFloat64Frac(a);
				3714	aExp = extractFloat64Exp(a);
				3715	aSign = extractFloat64Sign(a);
				3716	bSig = extractFloat64Frac(b);
				3717	bExp = extractFloat64Exp(b);
				3718	bSign = extractFloat64Sign(b);
				3719	cSig = extractFloat64Frac(c);
				3720	cExp = extractFloat64Exp(c);
				3721	cSign = extractFloat64Sign(c);
				3722
				3723	infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) \|\|
				3724	(aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
				3725
				3726	/* It is implementation-defined whether the cases of (0,inf,qnan)
				3727	* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
				3728	* they return if they do), so we have to hand this information
				3729	* off to the target-specific pick-a-NaN routine.
				3730	*/
				3731	if (((aExp == 0x7ff) && aSig) \|\|
				3732	((bExp == 0x7ff) && bSig) \|\|
				3733	((cExp == 0x7ff) && cSig)) {
				3734	return propagateFloat64MulAddNaN(a, b, c, infzero STATUS_VAR);
				3735	}
				3736
				3737	if (infzero) {
				3738	float_raise(float_flag_invalid STATUS_VAR);
				3739	return float64_default_nan;
				3740	}
				3741
				3742	if (flags & float_muladd_negate_c) {
				3743	cSign ^= 1;
				3744	}
				3745
				3746	signflip = (flags & float_muladd_negate_result) ? 1 : 0;
				3747
				3748	/* Work out the sign and type of the product */
				3749	pSign = aSign ^ bSign;
				3750	if (flags & float_muladd_negate_product) {
				3751	pSign ^= 1;
				3752	}
				3753	pInf = (aExp == 0x7ff) \|\| (bExp == 0x7ff);
				3754	pZero = ((aExp \| aSig) == 0) \|\| ((bExp \| bSig) == 0);
				3755
				3756	if (cExp == 0x7ff) {
				3757	if (pInf && (pSign ^ cSign)) {
				3758	/* addition of opposite-signed infinities => InvalidOperation */
				3759	float_raise(float_flag_invalid STATUS_VAR);
				3760	return float64_default_nan;
				3761	}
				3762	/* Otherwise generate an infinity of the same sign */
				3763	return packFloat64(cSign ^ signflip, 0x7ff, 0);
				3764	}
				3765
				3766	if (pInf) {
				3767	return packFloat64(pSign ^ signflip, 0x7ff, 0);
				3768	}
				3769
				3770	if (pZero) {
				3771	if (cExp == 0) {
				3772	if (cSig == 0) {
				3773	/* Adding two exact zeroes */
				3774	if (pSign == cSign) {
				3775	zSign = pSign;
				3776	} else if (STATUS(float_rounding_mode) == float_round_down) {
				3777	zSign = 1;
				3778	} else {
				3779	zSign = 0;
				3780	}
				3781	return packFloat64(zSign ^ signflip, 0, 0);
				3782	}
				3783	/* Exact zero plus a denorm */
				3784	if (STATUS(flush_to_zero)) {
				3785	float_raise(float_flag_output_denormal STATUS_VAR);
				3786	return packFloat64(cSign ^ signflip, 0, 0);
				3787	}
				3788	}
				3789	/* Zero plus something non-zero : just return the something */
Richard Sandiford	a6e7c18	2013-01-22 17:03:05 +0000	[diff] [blame^]	3790	return packFloat64(cSign ^ signflip, cExp, cSig);
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	3791	}
				3792
				3793	if (aExp == 0) {
				3794	normalizeFloat64Subnormal(aSig, &aExp, &aSig);
				3795	}
				3796	if (bExp == 0) {
				3797	normalizeFloat64Subnormal(bSig, &bExp, &bSig);
				3798	}
				3799
				3800	/* Calculate the actual result a * b + c */
				3801
				3802	/* Multiply first; this is easy. */
				3803	/* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
				3804	* because we want the true exponent, not the "one-less-than"
				3805	* flavour that roundAndPackFloat64() takes.
				3806	*/
				3807	pExp = aExp + bExp - 0x3fe;
				3808	aSig = (aSig \| LIT64(0x0010000000000000))<<10;
				3809	bSig = (bSig \| LIT64(0x0010000000000000))<<11;
				3810	mul64To128(aSig, bSig, &pSig0, &pSig1);
				3811	if ((int64_t)(pSig0 << 1) >= 0) {
				3812	shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
				3813	pExp--;
				3814	}
				3815
				3816	zSign = pSign ^ signflip;
				3817
				3818	/* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
				3819	* bit in position 126.
				3820	*/
				3821	if (cExp == 0) {
				3822	if (!cSig) {
				3823	/* Throw out the special case of c being an exact zero now */
				3824	shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
				3825	return roundAndPackFloat64(zSign, pExp - 1,
				3826	pSig1 STATUS_VAR);
				3827	}
				3828	normalizeFloat64Subnormal(cSig, &cExp, &cSig);
				3829	}
				3830
				3831	/* Shift cSig and add the explicit bit so [cSig0:cSig1] is the
				3832	* significand of the addend, with the explicit bit in position 126.
				3833	*/
				3834	cSig0 = cSig << (126 - 64 - 52);
				3835	cSig1 = 0;
				3836	cSig0 \|= LIT64(0x4000000000000000);
				3837	expDiff = pExp - cExp;
				3838
				3839	if (pSign == cSign) {
				3840	/* Addition */
				3841	if (expDiff > 0) {
				3842	/* scale c to match p */
				3843	shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
				3844	zExp = pExp;
				3845	} else if (expDiff < 0) {
				3846	/* scale p to match c */
				3847	shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
				3848	zExp = cExp;
				3849	} else {
				3850	/* no scaling needed */
				3851	zExp = cExp;
				3852	}
				3853	/* Add significands and make sure explicit bit ends up in posn 126 */
				3854	add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
				3855	if ((int64_t)zSig0 < 0) {
				3856	shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
				3857	} else {
				3858	zExp--;
				3859	}
				3860	shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
				3861	return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
				3862	} else {
				3863	/* Subtraction */
				3864	if (expDiff > 0) {
				3865	shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
				3866	sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
				3867	zExp = pExp;
				3868	} else if (expDiff < 0) {
				3869	shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
				3870	sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
				3871	zExp = cExp;
				3872	zSign ^= 1;
				3873	} else {
				3874	zExp = pExp;
				3875	if (lt128(cSig0, cSig1, pSig0, pSig1)) {
				3876	sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
				3877	} else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
				3878	sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
				3879	zSign ^= 1;
				3880	} else {
				3881	/* Exact zero */
				3882	zSign = signflip;
				3883	if (STATUS(float_rounding_mode) == float_round_down) {
				3884	zSign ^= 1;
				3885	}
				3886	return packFloat64(zSign, 0, 0);
				3887	}
				3888	}
				3889	--zExp;
				3890	/* Do the equivalent of normalizeRoundAndPackFloat64() but
				3891	* starting with the significand in a pair of uint64_t.
				3892	*/
				3893	if (zSig0) {
				3894	shiftcount = countLeadingZeros64(zSig0) - 1;
				3895	shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
				3896	if (zSig1) {
				3897	zSig0 \|= 1;
				3898	}
				3899	zExp -= shiftcount;
				3900	} else {
				3901	shiftcount = countLeadingZeros64(zSig1) - 1;
				3902	zSig0 = zSig1 << shiftcount;
				3903	zExp -= (shiftcount + 64);
				3904	}
				3905	return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
				3906	}
				3907	}
				3908
				3909	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3910	\| Returns the square root of the double-precision floating-point value `a'.
				3911	\| The operation is performed according to the IEC/IEEE Standard for Binary
				3912	\| Floating-Point Arithmetic.
				3913	----------------------------------------------------------------------------/
				3914
				3915	float64 float64_sqrt( float64 a STATUS_PARAM )
				3916	{
				3917	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3918	int_fast16_t aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3919	uint64_t aSig, zSig, doubleZSig;
				3920	uint64_t rem0, rem1, term0, term1;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3921	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3922
				3923	aSig = extractFloat64Frac( a );
				3924	aExp = extractFloat64Exp( a );
				3925	aSign = extractFloat64Sign( a );
				3926	if ( aExp == 0x7FF ) {
				3927	if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
				3928	if ( ! aSign ) return a;
				3929	float_raise( float_flag_invalid STATUS_VAR);
				3930	return float64_default_nan;
				3931	}
				3932	if ( aSign ) {
				3933	if ( ( aExp \| aSig ) == 0 ) return a;
				3934	float_raise( float_flag_invalid STATUS_VAR);
				3935	return float64_default_nan;
				3936	}
				3937	if ( aExp == 0 ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3938	if ( aSig == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3939	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3940	}
				3941	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
				3942	aSig \|= LIT64( 0x0010000000000000 );
				3943	zSig = estimateSqrt32( aExp, aSig>>21 );
				3944	aSig <<= 9 - ( aExp & 1 );
				3945	zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
				3946	if ( ( zSig & 0x1FF ) <= 5 ) {
				3947	doubleZSig = zSig<<1;
				3948	mul64To128( zSig, zSig, &term0, &term1 );
				3949	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3950	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3951	--zSig;
				3952	doubleZSig -= 2;
				3953	add128( rem0, rem1, zSig>>63, doubleZSig \| 1, &rem0, &rem1 );
				3954	}
				3955	zSig \|= ( ( rem0 \| rem1 ) != 0 );
				3956	}
				3957	return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
				3958
				3959	}
				3960
				3961	/*----------------------------------------------------------------------------
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	3962	\| Returns the binary log of the double-precision floating-point value `a'.
				3963	\| The operation is performed according to the IEC/IEEE Standard for Binary
				3964	\| Floating-Point Arithmetic.
				3965	----------------------------------------------------------------------------/
				3966	float64 float64_log2( float64 a STATUS_PARAM )
				3967	{
				3968	flag aSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3969	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3970	uint64_t aSig, aSig0, aSig1, zSig, i;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3971	a = float64_squash_input_denormal(a STATUS_VAR);
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	3972
				3973	aSig = extractFloat64Frac( a );
				3974	aExp = extractFloat64Exp( a );
				3975	aSign = extractFloat64Sign( a );
				3976
				3977	if ( aExp == 0 ) {
				3978	if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
				3979	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3980	}
				3981	if ( aSign ) {
				3982	float_raise( float_flag_invalid STATUS_VAR);
				3983	return float64_default_nan;
				3984	}
				3985	if ( aExp == 0x7FF ) {
				3986	if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
				3987	return a;
				3988	}
				3989
				3990	aExp -= 0x3FF;
				3991	aSig \|= LIT64( 0x0010000000000000 );
				3992	zSign = aExp < 0;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3993	zSig = (uint64_t)aExp << 52;
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	3994	for (i = 1LL << 51; i > 0; i >>= 1) {
				3995	mul64To128( aSig, aSig, &aSig0, &aSig1 );
				3996	aSig = ( aSig0 << 12 ) \| ( aSig1 >> 52 );
				3997	if ( aSig & LIT64( 0x0020000000000000 ) ) {
				3998	aSig >>= 1;
				3999	zSig \|= i;
				4000	}
				4001	}
				4002
				4003	if ( zSign )
				4004	zSig = -zSig;
				4005	return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
				4006	}
				4007
				4008	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4009	\| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4010	\| corresponding value `b', and 0 otherwise. The invalid exception is raised
				4011	\| if either operand is a NaN. Otherwise, the comparison is performed
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4012	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4013	----------------------------------------------------------------------------/
				4014
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4015	int float64_eq( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4016	{
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4017	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4018	a = float64_squash_input_denormal(a STATUS_VAR);
				4019	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4020
				4021	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4022	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4023	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4024	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4025	return 0;
				4026	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4027	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4028	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4029	return ( av == bv ) \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4030
				4031	}
				4032
				4033	/*----------------------------------------------------------------------------
				4034	\| Returns 1 if the double-precision floating-point value `a' is less than or
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4035	\| equal to the corresponding value `b', and 0 otherwise. The invalid
				4036	\| exception is raised if either operand is a NaN. The comparison is performed
				4037	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4038	----------------------------------------------------------------------------/
				4039
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4040	int float64_le( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4041	{
				4042	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4043	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4044	a = float64_squash_input_denormal(a STATUS_VAR);
				4045	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4046
				4047	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4048	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4049	) {
				4050	float_raise( float_flag_invalid STATUS_VAR);
				4051	return 0;
				4052	}
				4053	aSign = extractFloat64Sign( a );
				4054	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4055	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4056	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4057	if ( aSign != bSign ) return aSign \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4058	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4059
				4060	}
				4061
				4062	/*----------------------------------------------------------------------------
				4063	\| Returns 1 if the double-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4064	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				4065	\| raised if either operand is a NaN. The comparison is performed according
				4066	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4067	----------------------------------------------------------------------------/
				4068
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4069	int float64_lt( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4070	{
				4071	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4072	uint64_t av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4073
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4074	a = float64_squash_input_denormal(a STATUS_VAR);
				4075	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4076	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4077	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4078	) {
				4079	float_raise( float_flag_invalid STATUS_VAR);
				4080	return 0;
				4081	}
				4082	aSign = extractFloat64Sign( a );
				4083	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4084	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4085	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4086	if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4087	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4088
				4089	}
				4090
				4091	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	4092	\| Returns 1 if the double-precision floating-point values `a' and `b' cannot
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4093	\| be compared, and 0 otherwise. The invalid exception is raised if either
				4094	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
				4095	\| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	4096	----------------------------------------------------------------------------/
				4097
				4098	int float64_unordered( float64 a, float64 b STATUS_PARAM )
				4099	{
				4100	a = float64_squash_input_denormal(a STATUS_VAR);
				4101	b = float64_squash_input_denormal(b STATUS_VAR);
				4102
				4103	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4104	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4105	) {
				4106	float_raise( float_flag_invalid STATUS_VAR);
				4107	return 1;
				4108	}
				4109	return 0;
				4110	}
				4111
				4112	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4113	\| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4114	\| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				4115	\| exception.The comparison is performed according to the IEC/IEEE Standard
				4116	\| for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4117	----------------------------------------------------------------------------/
				4118
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4119	int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4120	{
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4121	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4122	a = float64_squash_input_denormal(a STATUS_VAR);
				4123	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4124
				4125	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4126	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4127	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4128	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4129	float_raise( float_flag_invalid STATUS_VAR);
				4130	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4131	return 0;
				4132	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4133	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4134	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4135	return ( av == bv ) \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4136
				4137	}
				4138
				4139	/*----------------------------------------------------------------------------
				4140	\| Returns 1 if the double-precision floating-point value `a' is less than or
				4141	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				4142	\| cause an exception. Otherwise, the comparison is performed according to the
				4143	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4144	----------------------------------------------------------------------------/
				4145
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4146	int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4147	{
				4148	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4149	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4150	a = float64_squash_input_denormal(a STATUS_VAR);
				4151	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4152
				4153	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4154	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4155	) {
				4156	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4157	float_raise( float_flag_invalid STATUS_VAR);
				4158	}
				4159	return 0;
				4160	}
				4161	aSign = extractFloat64Sign( a );
				4162	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4163	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4164	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4165	if ( aSign != bSign ) return aSign \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4166	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4167
				4168	}
				4169
				4170	/*----------------------------------------------------------------------------
				4171	\| Returns 1 if the double-precision floating-point value `a' is less than
				4172	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				4173	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				4174	\| Standard for Binary Floating-Point Arithmetic.
				4175	----------------------------------------------------------------------------/
				4176
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4177	int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4178	{
				4179	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4180	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4181	a = float64_squash_input_denormal(a STATUS_VAR);
				4182	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4183
				4184	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4185	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4186	) {
				4187	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4188	float_raise( float_flag_invalid STATUS_VAR);
				4189	}
				4190	return 0;
				4191	}
				4192	aSign = extractFloat64Sign( a );
				4193	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4194	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4195	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4196	if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4197	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4198
				4199	}
				4200
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	4201	/*----------------------------------------------------------------------------
				4202	\| Returns 1 if the double-precision floating-point values `a' and `b' cannot
				4203	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
				4204	\| comparison is performed according to the IEC/IEEE Standard for Binary
				4205	\| Floating-Point Arithmetic.
				4206	----------------------------------------------------------------------------/
				4207
				4208	int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
				4209	{
				4210	a = float64_squash_input_denormal(a STATUS_VAR);
				4211	b = float64_squash_input_denormal(b STATUS_VAR);
				4212
				4213	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4214	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4215	) {
				4216	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4217	float_raise( float_flag_invalid STATUS_VAR);
				4218	}
				4219	return 1;
				4220	}
				4221	return 0;
				4222	}
				4223
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4224	/*----------------------------------------------------------------------------
				4225	\| Returns the result of converting the extended double-precision floating-
				4226	\| point value `a' to the 32-bit two's complement integer format. The
				4227	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4228	\| Floating-Point Arithmetic---which means in particular that the conversion
				4229	\| is rounded according to the current rounding mode. If `a' is a NaN, the
				4230	\| largest positive integer is returned. Otherwise, if the conversion
				4231	\| overflows, the largest integer with the same sign as `a' is returned.
				4232	----------------------------------------------------------------------------/
				4233
				4234	int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
				4235	{
				4236	flag aSign;
				4237	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4238	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4239
				4240	aSig = extractFloatx80Frac( a );
				4241	aExp = extractFloatx80Exp( a );
				4242	aSign = extractFloatx80Sign( a );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4243	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4244	shiftCount = 0x4037 - aExp;
				4245	if ( shiftCount <= 0 ) shiftCount = 1;
				4246	shift64RightJamming( aSig, shiftCount, &aSig );
				4247	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				4248
				4249	}
				4250
				4251	/*----------------------------------------------------------------------------
				4252	\| Returns the result of converting the extended double-precision floating-
				4253	\| point value `a' to the 32-bit two's complement integer format. The
				4254	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4255	\| Floating-Point Arithmetic, except that the conversion is always rounded
				4256	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				4257	\| Otherwise, if the conversion overflows, the largest integer with the same
				4258	\| sign as `a' is returned.
				4259	----------------------------------------------------------------------------/
				4260
				4261	int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
				4262	{
				4263	flag aSign;
				4264	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4265	uint64_t aSig, savedASig;
Peter Maydell	b3a6a2e	2012-04-05 19:12:34 +0100	[diff] [blame]	4266	int32_t z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4267
				4268	aSig = extractFloatx80Frac( a );
				4269	aExp = extractFloatx80Exp( a );
				4270	aSign = extractFloatx80Sign( a );
				4271	if ( 0x401E < aExp ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4272	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4273	goto invalid;
				4274	}
				4275	else if ( aExp < 0x3FFF ) {
				4276	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4277	return 0;
				4278	}
				4279	shiftCount = 0x403E - aExp;
				4280	savedASig = aSig;
				4281	aSig >>= shiftCount;
				4282	z = aSig;
				4283	if ( aSign ) z = - z;
				4284	if ( ( z < 0 ) ^ aSign ) {
				4285	invalid:
				4286	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4287	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4288	}
				4289	if ( ( aSig<<shiftCount ) != savedASig ) {
				4290	STATUS(float_exception_flags) \|= float_flag_inexact;
				4291	}
				4292	return z;
				4293
				4294	}
				4295
				4296	/*----------------------------------------------------------------------------
				4297	\| Returns the result of converting the extended double-precision floating-
				4298	\| point value `a' to the 64-bit two's complement integer format. The
				4299	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4300	\| Floating-Point Arithmetic---which means in particular that the conversion
				4301	\| is rounded according to the current rounding mode. If `a' is a NaN,
				4302	\| the largest positive integer is returned. Otherwise, if the conversion
				4303	\| overflows, the largest integer with the same sign as `a' is returned.
				4304	----------------------------------------------------------------------------/
				4305
				4306	int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
				4307	{
				4308	flag aSign;
				4309	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4310	uint64_t aSig, aSigExtra;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4311
				4312	aSig = extractFloatx80Frac( a );
				4313	aExp = extractFloatx80Exp( a );
				4314	aSign = extractFloatx80Sign( a );
				4315	shiftCount = 0x403E - aExp;
				4316	if ( shiftCount <= 0 ) {
				4317	if ( shiftCount ) {
				4318	float_raise( float_flag_invalid STATUS_VAR);
				4319	if ( ! aSign
				4320	\|\| ( ( aExp == 0x7FFF )
				4321	&& ( aSig != LIT64( 0x8000000000000000 ) ) )
				4322	) {
				4323	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4324	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4325	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4326	}
				4327	aSigExtra = 0;
				4328	}
				4329	else {
				4330	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				4331	}
				4332	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				4333
				4334	}
				4335
				4336	/*----------------------------------------------------------------------------
				4337	\| Returns the result of converting the extended double-precision floating-
				4338	\| point value `a' to the 64-bit two's complement integer format. The
				4339	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4340	\| Floating-Point Arithmetic, except that the conversion is always rounded
				4341	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				4342	\| Otherwise, if the conversion overflows, the largest integer with the same
				4343	\| sign as `a' is returned.
				4344	----------------------------------------------------------------------------/
				4345
				4346	int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
				4347	{
				4348	flag aSign;
				4349	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4350	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4351	int64 z;
				4352
				4353	aSig = extractFloatx80Frac( a );
				4354	aExp = extractFloatx80Exp( a );
				4355	aSign = extractFloatx80Sign( a );
				4356	shiftCount = aExp - 0x403E;
				4357	if ( 0 <= shiftCount ) {
				4358	aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
				4359	if ( ( a.high != 0xC03E ) \|\| aSig ) {
				4360	float_raise( float_flag_invalid STATUS_VAR);
				4361	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && aSig ) ) {
				4362	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4363	}
				4364	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4365	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4366	}
				4367	else if ( aExp < 0x3FFF ) {
				4368	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4369	return 0;
				4370	}
				4371	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4372	if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4373	STATUS(float_exception_flags) \|= float_flag_inexact;
				4374	}
				4375	if ( aSign ) z = - z;
				4376	return z;
				4377
				4378	}
				4379
				4380	/*----------------------------------------------------------------------------
				4381	\| Returns the result of converting the extended double-precision floating-
				4382	\| point value `a' to the single-precision floating-point format. The
				4383	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4384	\| Floating-Point Arithmetic.
				4385	----------------------------------------------------------------------------/
				4386
				4387	float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
				4388	{
				4389	flag aSign;
				4390	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4391	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4392
				4393	aSig = extractFloatx80Frac( a );
				4394	aExp = extractFloatx80Exp( a );
				4395	aSign = extractFloatx80Sign( a );
				4396	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4397	if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	4398	return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4399	}
				4400	return packFloat32( aSign, 0xFF, 0 );
				4401	}
				4402	shift64RightJamming( aSig, 33, &aSig );
				4403	if ( aExp \|\| aSig ) aExp -= 0x3F81;
				4404	return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
				4405
				4406	}
				4407
				4408	/*----------------------------------------------------------------------------
				4409	\| Returns the result of converting the extended double-precision floating-
				4410	\| point value `a' to the double-precision floating-point format. The
				4411	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4412	\| Floating-Point Arithmetic.
				4413	----------------------------------------------------------------------------/
				4414
				4415	float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
				4416	{
				4417	flag aSign;
				4418	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4419	uint64_t aSig, zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4420
				4421	aSig = extractFloatx80Frac( a );
				4422	aExp = extractFloatx80Exp( a );
				4423	aSign = extractFloatx80Sign( a );
				4424	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4425	if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	4426	return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4427	}
				4428	return packFloat64( aSign, 0x7FF, 0 );
				4429	}
				4430	shift64RightJamming( aSig, 1, &zSig );
				4431	if ( aExp \|\| aSig ) aExp -= 0x3C01;
				4432	return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
				4433
				4434	}
				4435
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4436	/*----------------------------------------------------------------------------
				4437	\| Returns the result of converting the extended double-precision floating-
				4438	\| point value `a' to the quadruple-precision floating-point format. The
				4439	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4440	\| Floating-Point Arithmetic.
				4441	----------------------------------------------------------------------------/
				4442
				4443	float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
				4444	{
				4445	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	4446	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4447	uint64_t aSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4448
				4449	aSig = extractFloatx80Frac( a );
				4450	aExp = extractFloatx80Exp( a );
				4451	aSign = extractFloatx80Sign( a );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4452	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	4453	return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4454	}
				4455	shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
				4456	return packFloat128( aSign, aExp, zSig0, zSig1 );
				4457
				4458	}
				4459
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4460	/*----------------------------------------------------------------------------
				4461	\| Rounds the extended double-precision floating-point value `a' to an integer,
				4462	\| and returns the result as an extended quadruple-precision floating-point
				4463	\| value. The operation is performed according to the IEC/IEEE Standard for
				4464	\| Binary Floating-Point Arithmetic.
				4465	----------------------------------------------------------------------------/
				4466
				4467	floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
				4468	{
				4469	flag aSign;
				4470	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4471	uint64_t lastBitMask, roundBitsMask;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4472	int8 roundingMode;
				4473	floatx80 z;
				4474
				4475	aExp = extractFloatx80Exp( a );
				4476	if ( 0x403E <= aExp ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4477	if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4478	return propagateFloatx80NaN( a, a STATUS_VAR );
				4479	}
				4480	return a;
				4481	}
				4482	if ( aExp < 0x3FFF ) {
				4483	if ( ( aExp == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4484	&& ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4485	return a;
				4486	}
				4487	STATUS(float_exception_flags) \|= float_flag_inexact;
				4488	aSign = extractFloatx80Sign( a );
				4489	switch ( STATUS(float_rounding_mode) ) {
				4490	case float_round_nearest_even:
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4491	if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4492	) {
				4493	return
				4494	packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
				4495	}
				4496	break;
				4497	case float_round_down:
				4498	return
				4499	aSign ?
				4500	packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
				4501	: packFloatx80( 0, 0, 0 );
				4502	case float_round_up:
				4503	return
				4504	aSign ? packFloatx80( 1, 0, 0 )
				4505	: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
				4506	}
				4507	return packFloatx80( aSign, 0, 0 );
				4508	}
				4509	lastBitMask = 1;
				4510	lastBitMask <<= 0x403E - aExp;
				4511	roundBitsMask = lastBitMask - 1;
				4512	z = a;
				4513	roundingMode = STATUS(float_rounding_mode);
				4514	if ( roundingMode == float_round_nearest_even ) {
				4515	z.low += lastBitMask>>1;
				4516	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				4517	}
				4518	else if ( roundingMode != float_round_to_zero ) {
				4519	if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				4520	z.low += roundBitsMask;
				4521	}
				4522	}
				4523	z.low &= ~ roundBitsMask;
				4524	if ( z.low == 0 ) {
				4525	++z.high;
				4526	z.low = LIT64( 0x8000000000000000 );
				4527	}
				4528	if ( z.low != a.low ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4529	return z;
				4530
				4531	}
				4532
				4533	/*----------------------------------------------------------------------------
				4534	\| Returns the result of adding the absolute values of the extended double-
				4535	\| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
				4536	\| negated before being returned. `zSign' is ignored if the result is a NaN.
				4537	\| The addition is performed according to the IEC/IEEE Standard for Binary
				4538	\| Floating-Point Arithmetic.
				4539	----------------------------------------------------------------------------/
				4540
				4541	static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
				4542	{
				4543	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4544	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4545	int32 expDiff;
				4546
				4547	aSig = extractFloatx80Frac( a );
				4548	aExp = extractFloatx80Exp( a );
				4549	bSig = extractFloatx80Frac( b );
				4550	bExp = extractFloatx80Exp( b );
				4551	expDiff = aExp - bExp;
				4552	if ( 0 < expDiff ) {
				4553	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4554	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4555	return a;
				4556	}
				4557	if ( bExp == 0 ) --expDiff;
				4558	shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				4559	zExp = aExp;
				4560	}
				4561	else if ( expDiff < 0 ) {
				4562	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4563	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4564	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4565	}
				4566	if ( aExp == 0 ) ++expDiff;
				4567	shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				4568	zExp = bExp;
				4569	}
				4570	else {
				4571	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4572	if ( (uint64_t) ( ( aSig \| bSig )<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4573	return propagateFloatx80NaN( a, b STATUS_VAR );
				4574	}
				4575	return a;
				4576	}
				4577	zSig1 = 0;
				4578	zSig0 = aSig + bSig;
				4579	if ( aExp == 0 ) {
				4580	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
				4581	goto roundAndPack;
				4582	}
				4583	zExp = aExp;
				4584	goto shiftRight1;
				4585	}
				4586	zSig0 = aSig + bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4587	if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4588	shiftRight1:
				4589	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
				4590	zSig0 \|= LIT64( 0x8000000000000000 );
				4591	++zExp;
				4592	roundAndPack:
				4593	return
				4594	roundAndPackFloatx80(
				4595	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				4596
				4597	}
				4598
				4599	/*----------------------------------------------------------------------------
				4600	\| Returns the result of subtracting the absolute values of the extended
				4601	\| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
				4602	\| difference is negated before being returned. `zSign' is ignored if the
				4603	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				4604	\| Standard for Binary Floating-Point Arithmetic.
				4605	----------------------------------------------------------------------------/
				4606
				4607	static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
				4608	{
				4609	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4610	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4611	int32 expDiff;
				4612	floatx80 z;
				4613
				4614	aSig = extractFloatx80Frac( a );
				4615	aExp = extractFloatx80Exp( a );
				4616	bSig = extractFloatx80Frac( b );
				4617	bExp = extractFloatx80Exp( b );
				4618	expDiff = aExp - bExp;
				4619	if ( 0 < expDiff ) goto aExpBigger;
				4620	if ( expDiff < 0 ) goto bExpBigger;
				4621	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4622	if ( (uint64_t) ( ( aSig \| bSig )<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4623	return propagateFloatx80NaN( a, b STATUS_VAR );
				4624	}
				4625	float_raise( float_flag_invalid STATUS_VAR);
				4626	z.low = floatx80_default_nan_low;
				4627	z.high = floatx80_default_nan_high;
				4628	return z;
				4629	}
				4630	if ( aExp == 0 ) {
				4631	aExp = 1;
				4632	bExp = 1;
				4633	}
				4634	zSig1 = 0;
				4635	if ( bSig < aSig ) goto aBigger;
				4636	if ( aSig < bSig ) goto bBigger;
				4637	return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				4638	bExpBigger:
				4639	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4640	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4641	return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4642	}
				4643	if ( aExp == 0 ) ++expDiff;
				4644	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				4645	bBigger:
				4646	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
				4647	zExp = bExp;
				4648	zSign ^= 1;
				4649	goto normalizeRoundAndPack;
				4650	aExpBigger:
				4651	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4652	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4653	return a;
				4654	}
				4655	if ( bExp == 0 ) --expDiff;
				4656	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				4657	aBigger:
				4658	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
				4659	zExp = aExp;
				4660	normalizeRoundAndPack:
				4661	return
				4662	normalizeRoundAndPackFloatx80(
				4663	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				4664
				4665	}
				4666
				4667	/*----------------------------------------------------------------------------
				4668	\| Returns the result of adding the extended double-precision floating-point
				4669	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				4670	\| Standard for Binary Floating-Point Arithmetic.
				4671	----------------------------------------------------------------------------/
				4672
				4673	floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
				4674	{
				4675	flag aSign, bSign;
				4676
				4677	aSign = extractFloatx80Sign( a );
				4678	bSign = extractFloatx80Sign( b );
				4679	if ( aSign == bSign ) {
				4680	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				4681	}
				4682	else {
				4683	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				4684	}
				4685
				4686	}
				4687
				4688	/*----------------------------------------------------------------------------
				4689	\| Returns the result of subtracting the extended double-precision floating-
				4690	\| point values `a' and `b'. The operation is performed according to the
				4691	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4692	----------------------------------------------------------------------------/
				4693
				4694	floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
				4695	{
				4696	flag aSign, bSign;
				4697
				4698	aSign = extractFloatx80Sign( a );
				4699	bSign = extractFloatx80Sign( b );
				4700	if ( aSign == bSign ) {
				4701	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				4702	}
				4703	else {
				4704	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				4705	}
				4706
				4707	}
				4708
				4709	/*----------------------------------------------------------------------------
				4710	\| Returns the result of multiplying the extended double-precision floating-
				4711	\| point values `a' and `b'. The operation is performed according to the
				4712	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4713	----------------------------------------------------------------------------/
				4714
				4715	floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
				4716	{
				4717	flag aSign, bSign, zSign;
				4718	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4719	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4720	floatx80 z;
				4721
				4722	aSig = extractFloatx80Frac( a );
				4723	aExp = extractFloatx80Exp( a );
				4724	aSign = extractFloatx80Sign( a );
				4725	bSig = extractFloatx80Frac( b );
				4726	bExp = extractFloatx80Exp( b );
				4727	bSign = extractFloatx80Sign( b );
				4728	zSign = aSign ^ bSign;
				4729	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4730	if ( (uint64_t) ( aSig<<1 )
				4731	\|\| ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4732	return propagateFloatx80NaN( a, b STATUS_VAR );
				4733	}
				4734	if ( ( bExp \| bSig ) == 0 ) goto invalid;
				4735	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4736	}
				4737	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4738	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4739	if ( ( aExp \| aSig ) == 0 ) {
				4740	invalid:
				4741	float_raise( float_flag_invalid STATUS_VAR);
				4742	z.low = floatx80_default_nan_low;
				4743	z.high = floatx80_default_nan_high;
				4744	return z;
				4745	}
				4746	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4747	}
				4748	if ( aExp == 0 ) {
				4749	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				4750	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				4751	}
				4752	if ( bExp == 0 ) {
				4753	if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
				4754	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				4755	}
				4756	zExp = aExp + bExp - 0x3FFE;
				4757	mul64To128( aSig, bSig, &zSig0, &zSig1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4758	if ( 0 < (int64_t) zSig0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4759	shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
				4760	--zExp;
				4761	}
				4762	return
				4763	roundAndPackFloatx80(
				4764	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				4765
				4766	}
				4767
				4768	/*----------------------------------------------------------------------------
				4769	\| Returns the result of dividing the extended double-precision floating-point
				4770	\| value `a' by the corresponding value `b'. The operation is performed
				4771	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4772	----------------------------------------------------------------------------/
				4773
				4774	floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
				4775	{
				4776	flag aSign, bSign, zSign;
				4777	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4778	uint64_t aSig, bSig, zSig0, zSig1;
				4779	uint64_t rem0, rem1, rem2, term0, term1, term2;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4780	floatx80 z;
				4781
				4782	aSig = extractFloatx80Frac( a );
				4783	aExp = extractFloatx80Exp( a );
				4784	aSign = extractFloatx80Sign( a );
				4785	bSig = extractFloatx80Frac( b );
				4786	bExp = extractFloatx80Exp( b );
				4787	bSign = extractFloatx80Sign( b );
				4788	zSign = aSign ^ bSign;
				4789	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4790	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4791	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4792	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4793	goto invalid;
				4794	}
				4795	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4796	}
				4797	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4798	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4799	return packFloatx80( zSign, 0, 0 );
				4800	}
				4801	if ( bExp == 0 ) {
				4802	if ( bSig == 0 ) {
				4803	if ( ( aExp \| aSig ) == 0 ) {
				4804	invalid:
				4805	float_raise( float_flag_invalid STATUS_VAR);
				4806	z.low = floatx80_default_nan_low;
				4807	z.high = floatx80_default_nan_high;
				4808	return z;
				4809	}
				4810	float_raise( float_flag_divbyzero STATUS_VAR);
				4811	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4812	}
				4813	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				4814	}
				4815	if ( aExp == 0 ) {
				4816	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				4817	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				4818	}
				4819	zExp = aExp - bExp + 0x3FFE;
				4820	rem1 = 0;
				4821	if ( bSig <= aSig ) {
				4822	shift128Right( aSig, 0, 1, &aSig, &rem1 );
				4823	++zExp;
				4824	}
				4825	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
				4826	mul64To128( bSig, zSig0, &term0, &term1 );
				4827	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4828	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4829	--zSig0;
				4830	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				4831	}
				4832	zSig1 = estimateDiv128To64( rem1, 0, bSig );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4833	if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4834	mul64To128( bSig, zSig1, &term1, &term2 );
				4835	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4836	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4837	--zSig1;
				4838	add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
				4839	}
				4840	zSig1 \|= ( ( rem1 \| rem2 ) != 0 );
				4841	}
				4842	return
				4843	roundAndPackFloatx80(
				4844	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				4845
				4846	}
				4847
				4848	/*----------------------------------------------------------------------------
				4849	\| Returns the remainder of the extended double-precision floating-point value
				4850	\| `a' with respect to the corresponding value `b'. The operation is performed
				4851	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4852	----------------------------------------------------------------------------/
				4853
				4854	floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
				4855	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	4856	flag aSign, zSign;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4857	int32 aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4858	uint64_t aSig0, aSig1, bSig;
				4859	uint64_t q, term0, term1, alternateASig0, alternateASig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4860	floatx80 z;
				4861
				4862	aSig0 = extractFloatx80Frac( a );
				4863	aExp = extractFloatx80Exp( a );
				4864	aSign = extractFloatx80Sign( a );
				4865	bSig = extractFloatx80Frac( b );
				4866	bExp = extractFloatx80Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4867	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4868	if ( (uint64_t) ( aSig0<<1 )
				4869	\|\| ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4870	return propagateFloatx80NaN( a, b STATUS_VAR );
				4871	}
				4872	goto invalid;
				4873	}
				4874	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4875	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4876	return a;
				4877	}
				4878	if ( bExp == 0 ) {
				4879	if ( bSig == 0 ) {
				4880	invalid:
				4881	float_raise( float_flag_invalid STATUS_VAR);
				4882	z.low = floatx80_default_nan_low;
				4883	z.high = floatx80_default_nan_high;
				4884	return z;
				4885	}
				4886	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				4887	}
				4888	if ( aExp == 0 ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4889	if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4890	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				4891	}
				4892	bSig \|= LIT64( 0x8000000000000000 );
				4893	zSign = aSign;
				4894	expDiff = aExp - bExp;
				4895	aSig1 = 0;
				4896	if ( expDiff < 0 ) {
				4897	if ( expDiff < -1 ) return a;
				4898	shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
				4899	expDiff = 0;
				4900	}
				4901	q = ( bSig <= aSig0 );
				4902	if ( q ) aSig0 -= bSig;
				4903	expDiff -= 64;
				4904	while ( 0 < expDiff ) {
				4905	q = estimateDiv128To64( aSig0, aSig1, bSig );
				4906	q = ( 2 < q ) ? q - 2 : 0;
				4907	mul64To128( bSig, q, &term0, &term1 );
				4908	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				4909	shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
				4910	expDiff -= 62;
				4911	}
				4912	expDiff += 64;
				4913	if ( 0 < expDiff ) {
				4914	q = estimateDiv128To64( aSig0, aSig1, bSig );
				4915	q = ( 2 < q ) ? q - 2 : 0;
				4916	q >>= 64 - expDiff;
				4917	mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
				4918	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				4919	shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
				4920	while ( le128( term0, term1, aSig0, aSig1 ) ) {
				4921	++q;
				4922	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				4923	}
				4924	}
				4925	else {
				4926	term1 = 0;
				4927	term0 = bSig;
				4928	}
				4929	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
				4930	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
				4931	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
				4932	&& ( q & 1 ) )
				4933	) {
				4934	aSig0 = alternateASig0;
				4935	aSig1 = alternateASig1;
				4936	zSign = ! zSign;
				4937	}
				4938	return
				4939	normalizeRoundAndPackFloatx80(
				4940	80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
				4941
				4942	}
				4943
				4944	/*----------------------------------------------------------------------------
				4945	\| Returns the square root of the extended double-precision floating-point
				4946	\| value `a'. The operation is performed according to the IEC/IEEE Standard
				4947	\| for Binary Floating-Point Arithmetic.
				4948	----------------------------------------------------------------------------/
				4949
				4950	floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
				4951	{
				4952	flag aSign;
				4953	int32 aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4954	uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
				4955	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4956	floatx80 z;
				4957
				4958	aSig0 = extractFloatx80Frac( a );
				4959	aExp = extractFloatx80Exp( a );
				4960	aSign = extractFloatx80Sign( a );
				4961	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4962	if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4963	if ( ! aSign ) return a;
				4964	goto invalid;
				4965	}
				4966	if ( aSign ) {
				4967	if ( ( aExp \| aSig0 ) == 0 ) return a;
				4968	invalid:
				4969	float_raise( float_flag_invalid STATUS_VAR);
				4970	z.low = floatx80_default_nan_low;
				4971	z.high = floatx80_default_nan_high;
				4972	return z;
				4973	}
				4974	if ( aExp == 0 ) {
				4975	if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
				4976	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				4977	}
				4978	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
				4979	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
				4980	shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
				4981	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				4982	doubleZSig0 = zSig0<<1;
				4983	mul64To128( zSig0, zSig0, &term0, &term1 );
				4984	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4985	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4986	--zSig0;
				4987	doubleZSig0 -= 2;
				4988	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				4989	}
				4990	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				4991	if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
				4992	if ( zSig1 == 0 ) zSig1 = 1;
				4993	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				4994	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				4995	mul64To128( zSig1, zSig1, &term2, &term3 );
				4996	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4997	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4998	--zSig1;
				4999	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				5000	term3 \|= 1;
				5001	term2 \|= doubleZSig0;
				5002	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				5003	}
				5004	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				5005	}
				5006	shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
				5007	zSig0 \|= doubleZSig0;
				5008	return
				5009	roundAndPackFloatx80(
				5010	STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
				5011
				5012	}
				5013
				5014	/*----------------------------------------------------------------------------
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5015	\| Returns 1 if the extended double-precision floating-point value `a' is equal
				5016	\| to the corresponding value `b', and 0 otherwise. The invalid exception is
				5017	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				5018	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5019	----------------------------------------------------------------------------/
				5020
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5021	int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5022	{
				5023
				5024	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5025	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5026	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5027	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5028	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5029	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5030	return 0;
				5031	}
				5032	return
				5033	( a.low == b.low )
				5034	&& ( ( a.high == b.high )
				5035	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5036	&& ( (uint16_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5037	);
				5038
				5039	}
				5040
				5041	/*----------------------------------------------------------------------------
				5042	\| Returns 1 if the extended double-precision floating-point value `a' is
				5043	\| less than or equal to the corresponding value `b', and 0 otherwise. The
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5044	\| invalid exception is raised if either operand is a NaN. The comparison is
				5045	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				5046	\| Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5047	----------------------------------------------------------------------------/
				5048
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5049	int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5050	{
				5051	flag aSign, bSign;
				5052
				5053	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5054	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5055	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5056	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5057	) {
				5058	float_raise( float_flag_invalid STATUS_VAR);
				5059	return 0;
				5060	}
				5061	aSign = extractFloatx80Sign( a );
				5062	bSign = extractFloatx80Sign( b );
				5063	if ( aSign != bSign ) {
				5064	return
				5065	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5066	\|\| ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5067	== 0 );
				5068	}
				5069	return
				5070	aSign ? le128( b.high, b.low, a.high, a.low )
				5071	: le128( a.high, a.low, b.high, b.low );
				5072
				5073	}
				5074
				5075	/*----------------------------------------------------------------------------
				5076	\| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5077	\| less than the corresponding value `b', and 0 otherwise. The invalid
				5078	\| exception is raised if either operand is a NaN. The comparison is performed
				5079	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5080	----------------------------------------------------------------------------/
				5081
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5082	int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5083	{
				5084	flag aSign, bSign;
				5085
				5086	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5087	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5088	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5089	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5090	) {
				5091	float_raise( float_flag_invalid STATUS_VAR);
				5092	return 0;
				5093	}
				5094	aSign = extractFloatx80Sign( a );
				5095	bSign = extractFloatx80Sign( b );
				5096	if ( aSign != bSign ) {
				5097	return
				5098	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5099	&& ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5100	!= 0 );
				5101	}
				5102	return
				5103	aSign ? lt128( b.high, b.low, a.high, a.low )
				5104	: lt128( a.high, a.low, b.high, b.low );
				5105
				5106	}
				5107
				5108	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	5109	\| Returns 1 if the extended double-precision floating-point values `a' and `b'
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5110	\| cannot be compared, and 0 otherwise. The invalid exception is raised if
				5111	\| either operand is a NaN. The comparison is performed according to the
				5112	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	5113	----------------------------------------------------------------------------/
				5114	int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
				5115	{
				5116	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				5117	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
				5118	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				5119	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
				5120	) {
				5121	float_raise( float_flag_invalid STATUS_VAR);
				5122	return 1;
				5123	}
				5124	return 0;
				5125	}
				5126
				5127	/*----------------------------------------------------------------------------
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5128	\| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5129	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				5130	\| cause an exception. The comparison is performed according to the IEC/IEEE
				5131	\| Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5132	----------------------------------------------------------------------------/
				5133
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5134	int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5135	{
				5136
				5137	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5138	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5139	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5140	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5141	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5142	if ( floatx80_is_signaling_nan( a )
				5143	\|\| floatx80_is_signaling_nan( b ) ) {
				5144	float_raise( float_flag_invalid STATUS_VAR);
				5145	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5146	return 0;
				5147	}
				5148	return
				5149	( a.low == b.low )
				5150	&& ( ( a.high == b.high )
				5151	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5152	&& ( (uint16_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5153	);
				5154
				5155	}
				5156
				5157	/*----------------------------------------------------------------------------
				5158	\| Returns 1 if the extended double-precision floating-point value `a' is less
				5159	\| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
				5160	\| do not cause an exception. Otherwise, the comparison is performed according
				5161	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5162	----------------------------------------------------------------------------/
				5163
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5164	int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5165	{
				5166	flag aSign, bSign;
				5167
				5168	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5169	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5170	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5171	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5172	) {
				5173	if ( floatx80_is_signaling_nan( a )
				5174	\|\| floatx80_is_signaling_nan( b ) ) {
				5175	float_raise( float_flag_invalid STATUS_VAR);
				5176	}
				5177	return 0;
				5178	}
				5179	aSign = extractFloatx80Sign( a );
				5180	bSign = extractFloatx80Sign( b );
				5181	if ( aSign != bSign ) {
				5182	return
				5183	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5184	\|\| ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5185	== 0 );
				5186	}
				5187	return
				5188	aSign ? le128( b.high, b.low, a.high, a.low )
				5189	: le128( a.high, a.low, b.high, b.low );
				5190
				5191	}
				5192
				5193	/*----------------------------------------------------------------------------
				5194	\| Returns 1 if the extended double-precision floating-point value `a' is less
				5195	\| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
				5196	\| an exception. Otherwise, the comparison is performed according to the
				5197	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5198	----------------------------------------------------------------------------/
				5199
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5200	int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5201	{
				5202	flag aSign, bSign;
				5203
				5204	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5205	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5206	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5207	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5208	) {
				5209	if ( floatx80_is_signaling_nan( a )
				5210	\|\| floatx80_is_signaling_nan( b ) ) {
				5211	float_raise( float_flag_invalid STATUS_VAR);
				5212	}
				5213	return 0;
				5214	}
				5215	aSign = extractFloatx80Sign( a );
				5216	bSign = extractFloatx80Sign( b );
				5217	if ( aSign != bSign ) {
				5218	return
				5219	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5220	&& ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5221	!= 0 );
				5222	}
				5223	return
				5224	aSign ? lt128( b.high, b.low, a.high, a.low )
				5225	: lt128( a.high, a.low, b.high, b.low );
				5226
				5227	}
				5228
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	5229	/*----------------------------------------------------------------------------
				5230	\| Returns 1 if the extended double-precision floating-point values `a' and `b'
				5231	\| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
				5232	\| The comparison is performed according to the IEC/IEEE Standard for Binary
				5233	\| Floating-Point Arithmetic.
				5234	----------------------------------------------------------------------------/
				5235	int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
				5236	{
				5237	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				5238	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
				5239	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				5240	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
				5241	) {
				5242	if ( floatx80_is_signaling_nan( a )
				5243	\|\| floatx80_is_signaling_nan( b ) ) {
				5244	float_raise( float_flag_invalid STATUS_VAR);
				5245	}
				5246	return 1;
				5247	}
				5248	return 0;
				5249	}
				5250
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5251	/*----------------------------------------------------------------------------
				5252	\| Returns the result of converting the quadruple-precision floating-point
				5253	\| value `a' to the 32-bit two's complement integer format. The conversion
				5254	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5255	\| Arithmetic---which means in particular that the conversion is rounded
				5256	\| according to the current rounding mode. If `a' is a NaN, the largest
				5257	\| positive integer is returned. Otherwise, if the conversion overflows, the
				5258	\| largest integer with the same sign as `a' is returned.
				5259	----------------------------------------------------------------------------/
				5260
				5261	int32 float128_to_int32( float128 a STATUS_PARAM )
				5262	{
				5263	flag aSign;
				5264	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5265	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5266
				5267	aSig1 = extractFloat128Frac1( a );
				5268	aSig0 = extractFloat128Frac0( a );
				5269	aExp = extractFloat128Exp( a );
				5270	aSign = extractFloat128Sign( a );
				5271	if ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) aSign = 0;
				5272	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				5273	aSig0 \|= ( aSig1 != 0 );
				5274	shiftCount = 0x4028 - aExp;
				5275	if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
				5276	return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
				5277
				5278	}
				5279
				5280	/*----------------------------------------------------------------------------
				5281	\| Returns the result of converting the quadruple-precision floating-point
				5282	\| value `a' to the 32-bit two's complement integer format. The conversion
				5283	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5284	\| Arithmetic, except that the conversion is always rounded toward zero. If
				5285	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				5286	\| conversion overflows, the largest integer with the same sign as `a' is
				5287	\| returned.
				5288	----------------------------------------------------------------------------/
				5289
				5290	int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
				5291	{
				5292	flag aSign;
				5293	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5294	uint64_t aSig0, aSig1, savedASig;
Peter Maydell	b3a6a2e	2012-04-05 19:12:34 +0100	[diff] [blame]	5295	int32_t z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5296
				5297	aSig1 = extractFloat128Frac1( a );
				5298	aSig0 = extractFloat128Frac0( a );
				5299	aExp = extractFloat128Exp( a );
				5300	aSign = extractFloat128Sign( a );
				5301	aSig0 \|= ( aSig1 != 0 );
				5302	if ( 0x401E < aExp ) {
				5303	if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
				5304	goto invalid;
				5305	}
				5306	else if ( aExp < 0x3FFF ) {
				5307	if ( aExp \|\| aSig0 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				5308	return 0;
				5309	}
				5310	aSig0 \|= LIT64( 0x0001000000000000 );
				5311	shiftCount = 0x402F - aExp;
				5312	savedASig = aSig0;
				5313	aSig0 >>= shiftCount;
				5314	z = aSig0;
				5315	if ( aSign ) z = - z;
				5316	if ( ( z < 0 ) ^ aSign ) {
				5317	invalid:
				5318	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5319	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5320	}
				5321	if ( ( aSig0<<shiftCount ) != savedASig ) {
				5322	STATUS(float_exception_flags) \|= float_flag_inexact;
				5323	}
				5324	return z;
				5325
				5326	}
				5327
				5328	/*----------------------------------------------------------------------------
				5329	\| Returns the result of converting the quadruple-precision floating-point
				5330	\| value `a' to the 64-bit two's complement integer format. The conversion
				5331	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5332	\| Arithmetic---which means in particular that the conversion is rounded
				5333	\| according to the current rounding mode. If `a' is a NaN, the largest
				5334	\| positive integer is returned. Otherwise, if the conversion overflows, the
				5335	\| largest integer with the same sign as `a' is returned.
				5336	----------------------------------------------------------------------------/
				5337
				5338	int64 float128_to_int64( float128 a STATUS_PARAM )
				5339	{
				5340	flag aSign;
				5341	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5342	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5343
				5344	aSig1 = extractFloat128Frac1( a );
				5345	aSig0 = extractFloat128Frac0( a );
				5346	aExp = extractFloat128Exp( a );
				5347	aSign = extractFloat128Sign( a );
				5348	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				5349	shiftCount = 0x402F - aExp;
				5350	if ( shiftCount <= 0 ) {
				5351	if ( 0x403E < aExp ) {
				5352	float_raise( float_flag_invalid STATUS_VAR);
				5353	if ( ! aSign
				5354	\|\| ( ( aExp == 0x7FFF )
				5355	&& ( aSig1 \|\| ( aSig0 != LIT64( 0x0001000000000000 ) ) )
				5356	)
				5357	) {
				5358	return LIT64( 0x7FFFFFFFFFFFFFFF );
				5359	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5360	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5361	}
				5362	shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
				5363	}
				5364	else {
				5365	shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
				5366	}
				5367	return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
				5368
				5369	}
				5370
				5371	/*----------------------------------------------------------------------------
				5372	\| Returns the result of converting the quadruple-precision floating-point
				5373	\| value `a' to the 64-bit two's complement integer format. The conversion
				5374	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5375	\| Arithmetic, except that the conversion is always rounded toward zero.
				5376	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				5377	\| the conversion overflows, the largest integer with the same sign as `a' is
				5378	\| returned.
				5379	----------------------------------------------------------------------------/
				5380
				5381	int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
				5382	{
				5383	flag aSign;
				5384	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5385	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5386	int64 z;
				5387
				5388	aSig1 = extractFloat128Frac1( a );
				5389	aSig0 = extractFloat128Frac0( a );
				5390	aExp = extractFloat128Exp( a );
				5391	aSign = extractFloat128Sign( a );
				5392	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				5393	shiftCount = aExp - 0x402F;
				5394	if ( 0 < shiftCount ) {
				5395	if ( 0x403E <= aExp ) {
				5396	aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
				5397	if ( ( a.high == LIT64( 0xC03E000000000000 ) )
				5398	&& ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
				5399	if ( aSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				5400	}
				5401	else {
				5402	float_raise( float_flag_invalid STATUS_VAR);
				5403	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) ) {
				5404	return LIT64( 0x7FFFFFFFFFFFFFFF );
				5405	}
				5406	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5407	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5408	}
				5409	z = ( aSig0<<shiftCount ) \| ( aSig1>>( ( - shiftCount ) & 63 ) );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5410	if ( (uint64_t) ( aSig1<<shiftCount ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5411	STATUS(float_exception_flags) \|= float_flag_inexact;
				5412	}
				5413	}
				5414	else {
				5415	if ( aExp < 0x3FFF ) {
				5416	if ( aExp \| aSig0 \| aSig1 ) {
				5417	STATUS(float_exception_flags) \|= float_flag_inexact;
				5418	}
				5419	return 0;
				5420	}
				5421	z = aSig0>>( - shiftCount );
				5422	if ( aSig1
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5423	\|\| ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5424	STATUS(float_exception_flags) \|= float_flag_inexact;
				5425	}
				5426	}
				5427	if ( aSign ) z = - z;
				5428	return z;
				5429
				5430	}
				5431
				5432	/*----------------------------------------------------------------------------
				5433	\| Returns the result of converting the quadruple-precision floating-point
				5434	\| value `a' to the single-precision floating-point format. The conversion
				5435	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5436	\| Arithmetic.
				5437	----------------------------------------------------------------------------/
				5438
				5439	float32 float128_to_float32( float128 a STATUS_PARAM )
				5440	{
				5441	flag aSign;
				5442	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5443	uint64_t aSig0, aSig1;
				5444	uint32_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5445
				5446	aSig1 = extractFloat128Frac1( a );
				5447	aSig0 = extractFloat128Frac0( a );
				5448	aExp = extractFloat128Exp( a );
				5449	aSign = extractFloat128Sign( a );
				5450	if ( aExp == 0x7FFF ) {
				5451	if ( aSig0 \| aSig1 ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	5452	return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5453	}
				5454	return packFloat32( aSign, 0xFF, 0 );
				5455	}
				5456	aSig0 \|= ( aSig1 != 0 );
				5457	shift64RightJamming( aSig0, 18, &aSig0 );
				5458	zSig = aSig0;
				5459	if ( aExp \|\| zSig ) {
				5460	zSig \|= 0x40000000;
				5461	aExp -= 0x3F81;
				5462	}
				5463	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				5464
				5465	}
				5466
				5467	/*----------------------------------------------------------------------------
				5468	\| Returns the result of converting the quadruple-precision floating-point
				5469	\| value `a' to the double-precision floating-point format. The conversion
				5470	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5471	\| Arithmetic.
				5472	----------------------------------------------------------------------------/
				5473
				5474	float64 float128_to_float64( float128 a STATUS_PARAM )
				5475	{
				5476	flag aSign;
				5477	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5478	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5479
				5480	aSig1 = extractFloat128Frac1( a );
				5481	aSig0 = extractFloat128Frac0( a );
				5482	aExp = extractFloat128Exp( a );
				5483	aSign = extractFloat128Sign( a );
				5484	if ( aExp == 0x7FFF ) {
				5485	if ( aSig0 \| aSig1 ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	5486	return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5487	}
				5488	return packFloat64( aSign, 0x7FF, 0 );
				5489	}
				5490	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				5491	aSig0 \|= ( aSig1 != 0 );
				5492	if ( aExp \|\| aSig0 ) {
				5493	aSig0 \|= LIT64( 0x4000000000000000 );
				5494	aExp -= 0x3C01;
				5495	}
				5496	return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
				5497
				5498	}
				5499
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5500	/*----------------------------------------------------------------------------
				5501	\| Returns the result of converting the quadruple-precision floating-point
				5502	\| value `a' to the extended double-precision floating-point format. The
				5503	\| conversion is performed according to the IEC/IEEE Standard for Binary
				5504	\| Floating-Point Arithmetic.
				5505	----------------------------------------------------------------------------/
				5506
				5507	floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
				5508	{
				5509	flag aSign;
				5510	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5511	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5512
				5513	aSig1 = extractFloat128Frac1( a );
				5514	aSig0 = extractFloat128Frac0( a );
				5515	aExp = extractFloat128Exp( a );
				5516	aSign = extractFloat128Sign( a );
				5517	if ( aExp == 0x7FFF ) {
				5518	if ( aSig0 \| aSig1 ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	5519	return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5520	}
				5521	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				5522	}
				5523	if ( aExp == 0 ) {
				5524	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
				5525	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				5526	}
				5527	else {
				5528	aSig0 \|= LIT64( 0x0001000000000000 );
				5529	}
				5530	shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
				5531	return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
				5532
				5533	}
				5534
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5535	/*----------------------------------------------------------------------------
				5536	\| Rounds the quadruple-precision floating-point value `a' to an integer, and
				5537	\| returns the result as a quadruple-precision floating-point value. The
				5538	\| operation is performed according to the IEC/IEEE Standard for Binary
				5539	\| Floating-Point Arithmetic.
				5540	----------------------------------------------------------------------------/
				5541
				5542	float128 float128_round_to_int( float128 a STATUS_PARAM )
				5543	{
				5544	flag aSign;
				5545	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5546	uint64_t lastBitMask, roundBitsMask;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5547	int8 roundingMode;
				5548	float128 z;
				5549
				5550	aExp = extractFloat128Exp( a );
				5551	if ( 0x402F <= aExp ) {
				5552	if ( 0x406F <= aExp ) {
				5553	if ( ( aExp == 0x7FFF )
				5554	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) )
				5555	) {
				5556	return propagateFloat128NaN( a, a STATUS_VAR );
				5557	}
				5558	return a;
				5559	}
				5560	lastBitMask = 1;
				5561	lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
				5562	roundBitsMask = lastBitMask - 1;
				5563	z = a;
				5564	roundingMode = STATUS(float_rounding_mode);
				5565	if ( roundingMode == float_round_nearest_even ) {
				5566	if ( lastBitMask ) {
				5567	add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
				5568	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				5569	}
				5570	else {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5571	if ( (int64_t) z.low < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5572	++z.high;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5573	if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5574	}
				5575	}
				5576	}
				5577	else if ( roundingMode != float_round_to_zero ) {
				5578	if ( extractFloat128Sign( z )
				5579	^ ( roundingMode == float_round_up ) ) {
				5580	add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
				5581	}
				5582	}
				5583	z.low &= ~ roundBitsMask;
				5584	}
				5585	else {
				5586	if ( aExp < 0x3FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5587	if ( ( ( (uint64_t) ( a.high<<1 ) ) \| a.low ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5588	STATUS(float_exception_flags) \|= float_flag_inexact;
				5589	aSign = extractFloat128Sign( a );
				5590	switch ( STATUS(float_rounding_mode) ) {
				5591	case float_round_nearest_even:
				5592	if ( ( aExp == 0x3FFE )
				5593	&& ( extractFloat128Frac0( a )
				5594	\| extractFloat128Frac1( a ) )
				5595	) {
				5596	return packFloat128( aSign, 0x3FFF, 0, 0 );
				5597	}
				5598	break;
				5599	case float_round_down:
				5600	return
				5601	aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
				5602	: packFloat128( 0, 0, 0, 0 );
				5603	case float_round_up:
				5604	return
				5605	aSign ? packFloat128( 1, 0, 0, 0 )
				5606	: packFloat128( 0, 0x3FFF, 0, 0 );
				5607	}
				5608	return packFloat128( aSign, 0, 0, 0 );
				5609	}
				5610	lastBitMask = 1;
				5611	lastBitMask <<= 0x402F - aExp;
				5612	roundBitsMask = lastBitMask - 1;
				5613	z.low = 0;
				5614	z.high = a.high;
				5615	roundingMode = STATUS(float_rounding_mode);
				5616	if ( roundingMode == float_round_nearest_even ) {
				5617	z.high += lastBitMask>>1;
				5618	if ( ( ( z.high & roundBitsMask ) \| a.low ) == 0 ) {
				5619	z.high &= ~ lastBitMask;
				5620	}
				5621	}
				5622	else if ( roundingMode != float_round_to_zero ) {
				5623	if ( extractFloat128Sign( z )
				5624	^ ( roundingMode == float_round_up ) ) {
				5625	z.high \|= ( a.low != 0 );
				5626	z.high += roundBitsMask;
				5627	}
				5628	}
				5629	z.high &= ~ roundBitsMask;
				5630	}
				5631	if ( ( z.low != a.low ) \|\| ( z.high != a.high ) ) {
				5632	STATUS(float_exception_flags) \|= float_flag_inexact;
				5633	}
				5634	return z;
				5635
				5636	}
				5637
				5638	/*----------------------------------------------------------------------------
				5639	\| Returns the result of adding the absolute values of the quadruple-precision
				5640	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				5641	\| before being returned. `zSign' is ignored if the result is a NaN.
				5642	\| The addition is performed according to the IEC/IEEE Standard for Binary
				5643	\| Floating-Point Arithmetic.
				5644	----------------------------------------------------------------------------/
				5645
				5646	static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				5647	{
				5648	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5649	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5650	int32 expDiff;
				5651
				5652	aSig1 = extractFloat128Frac1( a );
				5653	aSig0 = extractFloat128Frac0( a );
				5654	aExp = extractFloat128Exp( a );
				5655	bSig1 = extractFloat128Frac1( b );
				5656	bSig0 = extractFloat128Frac0( b );
				5657	bExp = extractFloat128Exp( b );
				5658	expDiff = aExp - bExp;
				5659	if ( 0 < expDiff ) {
				5660	if ( aExp == 0x7FFF ) {
				5661	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5662	return a;
				5663	}
				5664	if ( bExp == 0 ) {
				5665	--expDiff;
				5666	}
				5667	else {
				5668	bSig0 \|= LIT64( 0x0001000000000000 );
				5669	}
				5670	shift128ExtraRightJamming(
				5671	bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
				5672	zExp = aExp;
				5673	}
				5674	else if ( expDiff < 0 ) {
				5675	if ( bExp == 0x7FFF ) {
				5676	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5677	return packFloat128( zSign, 0x7FFF, 0, 0 );
				5678	}
				5679	if ( aExp == 0 ) {
				5680	++expDiff;
				5681	}
				5682	else {
				5683	aSig0 \|= LIT64( 0x0001000000000000 );
				5684	}
				5685	shift128ExtraRightJamming(
				5686	aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
				5687	zExp = bExp;
				5688	}
				5689	else {
				5690	if ( aExp == 0x7FFF ) {
				5691	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				5692	return propagateFloat128NaN( a, b STATUS_VAR );
				5693	}
				5694	return a;
				5695	}
				5696	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	5697	if ( aExp == 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	5698	if (STATUS(flush_to_zero)) {
				5699	if (zSig0 \| zSig1) {
				5700	float_raise(float_flag_output_denormal STATUS_VAR);
				5701	}
				5702	return packFloat128(zSign, 0, 0, 0);
				5703	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	5704	return packFloat128( zSign, 0, zSig0, zSig1 );
				5705	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5706	zSig2 = 0;
				5707	zSig0 \|= LIT64( 0x0002000000000000 );
				5708	zExp = aExp;
				5709	goto shiftRight1;
				5710	}
				5711	aSig0 \|= LIT64( 0x0001000000000000 );
				5712	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				5713	--zExp;
				5714	if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
				5715	++zExp;
				5716	shiftRight1:
				5717	shift128ExtraRightJamming(
				5718	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				5719	roundAndPack:
				5720	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				5721
				5722	}
				5723
				5724	/*----------------------------------------------------------------------------
				5725	\| Returns the result of subtracting the absolute values of the quadruple-
				5726	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				5727	\| difference is negated before being returned. `zSign' is ignored if the
				5728	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				5729	\| Standard for Binary Floating-Point Arithmetic.
				5730	----------------------------------------------------------------------------/
				5731
				5732	static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				5733	{
				5734	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5735	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5736	int32 expDiff;
				5737	float128 z;
				5738
				5739	aSig1 = extractFloat128Frac1( a );
				5740	aSig0 = extractFloat128Frac0( a );
				5741	aExp = extractFloat128Exp( a );
				5742	bSig1 = extractFloat128Frac1( b );
				5743	bSig0 = extractFloat128Frac0( b );
				5744	bExp = extractFloat128Exp( b );
				5745	expDiff = aExp - bExp;
				5746	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				5747	shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
				5748	if ( 0 < expDiff ) goto aExpBigger;
				5749	if ( expDiff < 0 ) goto bExpBigger;
				5750	if ( aExp == 0x7FFF ) {
				5751	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				5752	return propagateFloat128NaN( a, b STATUS_VAR );
				5753	}
				5754	float_raise( float_flag_invalid STATUS_VAR);
				5755	z.low = float128_default_nan_low;
				5756	z.high = float128_default_nan_high;
				5757	return z;
				5758	}
				5759	if ( aExp == 0 ) {
				5760	aExp = 1;
				5761	bExp = 1;
				5762	}
				5763	if ( bSig0 < aSig0 ) goto aBigger;
				5764	if ( aSig0 < bSig0 ) goto bBigger;
				5765	if ( bSig1 < aSig1 ) goto aBigger;
				5766	if ( aSig1 < bSig1 ) goto bBigger;
				5767	return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
				5768	bExpBigger:
				5769	if ( bExp == 0x7FFF ) {
				5770	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5771	return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
				5772	}
				5773	if ( aExp == 0 ) {
				5774	++expDiff;
				5775	}
				5776	else {
				5777	aSig0 \|= LIT64( 0x4000000000000000 );
				5778	}
				5779	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				5780	bSig0 \|= LIT64( 0x4000000000000000 );
				5781	bBigger:
				5782	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
				5783	zExp = bExp;
				5784	zSign ^= 1;
				5785	goto normalizeRoundAndPack;
				5786	aExpBigger:
				5787	if ( aExp == 0x7FFF ) {
				5788	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5789	return a;
				5790	}
				5791	if ( bExp == 0 ) {
				5792	--expDiff;
				5793	}
				5794	else {
				5795	bSig0 \|= LIT64( 0x4000000000000000 );
				5796	}
				5797	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
				5798	aSig0 \|= LIT64( 0x4000000000000000 );
				5799	aBigger:
				5800	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				5801	zExp = aExp;
				5802	normalizeRoundAndPack:
				5803	--zExp;
				5804	return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
				5805
				5806	}
				5807
				5808	/*----------------------------------------------------------------------------
				5809	\| Returns the result of adding the quadruple-precision floating-point values
				5810	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				5811	\| for Binary Floating-Point Arithmetic.
				5812	----------------------------------------------------------------------------/
				5813
				5814	float128 float128_add( float128 a, float128 b STATUS_PARAM )
				5815	{
				5816	flag aSign, bSign;
				5817
				5818	aSign = extractFloat128Sign( a );
				5819	bSign = extractFloat128Sign( b );
				5820	if ( aSign == bSign ) {
				5821	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				5822	}
				5823	else {
				5824	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				5825	}
				5826
				5827	}
				5828
				5829	/*----------------------------------------------------------------------------
				5830	\| Returns the result of subtracting the quadruple-precision floating-point
				5831	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				5832	\| Standard for Binary Floating-Point Arithmetic.
				5833	----------------------------------------------------------------------------/
				5834
				5835	float128 float128_sub( float128 a, float128 b STATUS_PARAM )
				5836	{
				5837	flag aSign, bSign;
				5838
				5839	aSign = extractFloat128Sign( a );
				5840	bSign = extractFloat128Sign( b );
				5841	if ( aSign == bSign ) {
				5842	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				5843	}
				5844	else {
				5845	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				5846	}
				5847
				5848	}
				5849
				5850	/*----------------------------------------------------------------------------
				5851	\| Returns the result of multiplying the quadruple-precision floating-point
				5852	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				5853	\| Standard for Binary Floating-Point Arithmetic.
				5854	----------------------------------------------------------------------------/
				5855
				5856	float128 float128_mul( float128 a, float128 b STATUS_PARAM )
				5857	{
				5858	flag aSign, bSign, zSign;
				5859	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5860	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5861	float128 z;
				5862
				5863	aSig1 = extractFloat128Frac1( a );
				5864	aSig0 = extractFloat128Frac0( a );
				5865	aExp = extractFloat128Exp( a );
				5866	aSign = extractFloat128Sign( a );
				5867	bSig1 = extractFloat128Frac1( b );
				5868	bSig0 = extractFloat128Frac0( b );
				5869	bExp = extractFloat128Exp( b );
				5870	bSign = extractFloat128Sign( b );
				5871	zSign = aSign ^ bSign;
				5872	if ( aExp == 0x7FFF ) {
				5873	if ( ( aSig0 \| aSig1 )
				5874	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				5875	return propagateFloat128NaN( a, b STATUS_VAR );
				5876	}
				5877	if ( ( bExp \| bSig0 \| bSig1 ) == 0 ) goto invalid;
				5878	return packFloat128( zSign, 0x7FFF, 0, 0 );
				5879	}
				5880	if ( bExp == 0x7FFF ) {
				5881	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5882	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				5883	invalid:
				5884	float_raise( float_flag_invalid STATUS_VAR);
				5885	z.low = float128_default_nan_low;
				5886	z.high = float128_default_nan_high;
				5887	return z;
				5888	}
				5889	return packFloat128( zSign, 0x7FFF, 0, 0 );
				5890	}
				5891	if ( aExp == 0 ) {
				5892	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				5893	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				5894	}
				5895	if ( bExp == 0 ) {
				5896	if ( ( bSig0 \| bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				5897	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				5898	}
				5899	zExp = aExp + bExp - 0x4000;
				5900	aSig0 \|= LIT64( 0x0001000000000000 );
				5901	shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
				5902	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
				5903	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
				5904	zSig2 \|= ( zSig3 != 0 );
				5905	if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
				5906	shift128ExtraRightJamming(
				5907	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				5908	++zExp;
				5909	}
				5910	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				5911
				5912	}
				5913
				5914	/*----------------------------------------------------------------------------
				5915	\| Returns the result of dividing the quadruple-precision floating-point value
				5916	\| `a' by the corresponding value `b'. The operation is performed according to
				5917	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5918	----------------------------------------------------------------------------/
				5919
				5920	float128 float128_div( float128 a, float128 b STATUS_PARAM )
				5921	{
				5922	flag aSign, bSign, zSign;
				5923	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5924	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
				5925	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5926	float128 z;
				5927
				5928	aSig1 = extractFloat128Frac1( a );
				5929	aSig0 = extractFloat128Frac0( a );
				5930	aExp = extractFloat128Exp( a );
				5931	aSign = extractFloat128Sign( a );
				5932	bSig1 = extractFloat128Frac1( b );
				5933	bSig0 = extractFloat128Frac0( b );
				5934	bExp = extractFloat128Exp( b );
				5935	bSign = extractFloat128Sign( b );
				5936	zSign = aSign ^ bSign;
				5937	if ( aExp == 0x7FFF ) {
				5938	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5939	if ( bExp == 0x7FFF ) {
				5940	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5941	goto invalid;
				5942	}
				5943	return packFloat128( zSign, 0x7FFF, 0, 0 );
				5944	}
				5945	if ( bExp == 0x7FFF ) {
				5946	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5947	return packFloat128( zSign, 0, 0, 0 );
				5948	}
				5949	if ( bExp == 0 ) {
				5950	if ( ( bSig0 \| bSig1 ) == 0 ) {
				5951	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				5952	invalid:
				5953	float_raise( float_flag_invalid STATUS_VAR);
				5954	z.low = float128_default_nan_low;
				5955	z.high = float128_default_nan_high;
				5956	return z;
				5957	}
				5958	float_raise( float_flag_divbyzero STATUS_VAR);
				5959	return packFloat128( zSign, 0x7FFF, 0, 0 );
				5960	}
				5961	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				5962	}
				5963	if ( aExp == 0 ) {
				5964	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				5965	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				5966	}
				5967	zExp = aExp - bExp + 0x3FFD;
				5968	shortShift128Left(
				5969	aSig0 \| LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
				5970	shortShift128Left(
				5971	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				5972	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
				5973	shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
				5974	++zExp;
				5975	}
				5976	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
				5977	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
				5978	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5979	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5980	--zSig0;
				5981	add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
				5982	}
				5983	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
				5984	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
				5985	mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
				5986	sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5987	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5988	--zSig1;
				5989	add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
				5990	}
				5991	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				5992	}
				5993	shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
				5994	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				5995
				5996	}
				5997
				5998	/*----------------------------------------------------------------------------
				5999	\| Returns the remainder of the quadruple-precision floating-point value `a'
				6000	\| with respect to the corresponding value `b'. The operation is performed
				6001	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				6002	----------------------------------------------------------------------------/
				6003
				6004	float128 float128_rem( float128 a, float128 b STATUS_PARAM )
				6005	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	6006	flag aSign, zSign;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6007	int32 aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6008	uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
				6009	uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
				6010	int64_t sigMean0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6011	float128 z;
				6012
				6013	aSig1 = extractFloat128Frac1( a );
				6014	aSig0 = extractFloat128Frac0( a );
				6015	aExp = extractFloat128Exp( a );
				6016	aSign = extractFloat128Sign( a );
				6017	bSig1 = extractFloat128Frac1( b );
				6018	bSig0 = extractFloat128Frac0( b );
				6019	bExp = extractFloat128Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6020	if ( aExp == 0x7FFF ) {
				6021	if ( ( aSig0 \| aSig1 )
				6022	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				6023	return propagateFloat128NaN( a, b STATUS_VAR );
				6024	}
				6025	goto invalid;
				6026	}
				6027	if ( bExp == 0x7FFF ) {
				6028	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				6029	return a;
				6030	}
				6031	if ( bExp == 0 ) {
				6032	if ( ( bSig0 \| bSig1 ) == 0 ) {
				6033	invalid:
				6034	float_raise( float_flag_invalid STATUS_VAR);
				6035	z.low = float128_default_nan_low;
				6036	z.high = float128_default_nan_high;
				6037	return z;
				6038	}
				6039	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				6040	}
				6041	if ( aExp == 0 ) {
				6042	if ( ( aSig0 \| aSig1 ) == 0 ) return a;
				6043	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				6044	}
				6045	expDiff = aExp - bExp;
				6046	if ( expDiff < -1 ) return a;
				6047	shortShift128Left(
				6048	aSig0 \| LIT64( 0x0001000000000000 ),
				6049	aSig1,
				6050	15 - ( expDiff < 0 ),
				6051	&aSig0,
				6052	&aSig1
				6053	);
				6054	shortShift128Left(
				6055	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				6056	q = le128( bSig0, bSig1, aSig0, aSig1 );
				6057	if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
				6058	expDiff -= 64;
				6059	while ( 0 < expDiff ) {
				6060	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				6061	q = ( 4 < q ) ? q - 4 : 0;
				6062	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				6063	shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
				6064	shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
				6065	sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
				6066	expDiff -= 61;
				6067	}
				6068	if ( -64 < expDiff ) {
				6069	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				6070	q = ( 4 < q ) ? q - 4 : 0;
				6071	q >>= - expDiff;
				6072	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				6073	expDiff += 52;
				6074	if ( expDiff < 0 ) {
				6075	shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				6076	}
				6077	else {
				6078	shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
				6079	}
				6080	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				6081	sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
				6082	}
				6083	else {
				6084	shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
				6085	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				6086	}
				6087	do {
				6088	alternateASig0 = aSig0;
				6089	alternateASig1 = aSig1;
				6090	++q;
				6091	sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6092	} while ( 0 <= (int64_t) aSig0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6093	add128(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6094	aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6095	if ( ( sigMean0 < 0 )
				6096	\|\| ( ( ( sigMean0 \| sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
				6097	aSig0 = alternateASig0;
				6098	aSig1 = alternateASig1;
				6099	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6100	zSign = ( (int64_t) aSig0 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6101	if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
				6102	return
				6103	normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
				6104
				6105	}
				6106
				6107	/*----------------------------------------------------------------------------
				6108	\| Returns the square root of the quadruple-precision floating-point value `a'.
				6109	\| The operation is performed according to the IEC/IEEE Standard for Binary
				6110	\| Floating-Point Arithmetic.
				6111	----------------------------------------------------------------------------/
				6112
				6113	float128 float128_sqrt( float128 a STATUS_PARAM )
				6114	{
				6115	flag aSign;
				6116	int32 aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6117	uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
				6118	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6119	float128 z;
				6120
				6121	aSig1 = extractFloat128Frac1( a );
				6122	aSig0 = extractFloat128Frac0( a );
				6123	aExp = extractFloat128Exp( a );
				6124	aSign = extractFloat128Sign( a );
				6125	if ( aExp == 0x7FFF ) {
				6126	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
				6127	if ( ! aSign ) return a;
				6128	goto invalid;
				6129	}
				6130	if ( aSign ) {
				6131	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) return a;
				6132	invalid:
				6133	float_raise( float_flag_invalid STATUS_VAR);
				6134	z.low = float128_default_nan_low;
				6135	z.high = float128_default_nan_high;
				6136	return z;
				6137	}
				6138	if ( aExp == 0 ) {
				6139	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
				6140	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				6141	}
				6142	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
				6143	aSig0 \|= LIT64( 0x0001000000000000 );
				6144	zSig0 = estimateSqrt32( aExp, aSig0>>17 );
				6145	shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
				6146	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				6147	doubleZSig0 = zSig0<<1;
				6148	mul64To128( zSig0, zSig0, &term0, &term1 );
				6149	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6150	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6151	--zSig0;
				6152	doubleZSig0 -= 2;
				6153	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				6154	}
				6155	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				6156	if ( ( zSig1 & 0x1FFF ) <= 5 ) {
				6157	if ( zSig1 == 0 ) zSig1 = 1;
				6158	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				6159	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				6160	mul64To128( zSig1, zSig1, &term2, &term3 );
				6161	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6162	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6163	--zSig1;
				6164	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				6165	term3 \|= 1;
				6166	term2 \|= doubleZSig0;
				6167	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				6168	}
				6169	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				6170	}
				6171	shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
				6172	return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				6173
				6174	}
				6175
				6176	/*----------------------------------------------------------------------------
				6177	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6178	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				6179	\| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6180	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				6181	----------------------------------------------------------------------------/
				6182
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6183	int float128_eq( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6184	{
				6185
				6186	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6187	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6188	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6189	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6190	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6191	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6192	return 0;
				6193	}
				6194	return
				6195	( a.low == b.low )
				6196	&& ( ( a.high == b.high )
				6197	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6198	&& ( (uint64_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6199	);
				6200
				6201	}
				6202
				6203	/*----------------------------------------------------------------------------
				6204	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6205	\| or equal to the corresponding value `b', and 0 otherwise. The invalid
				6206	\| exception is raised if either operand is a NaN. The comparison is performed
				6207	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6208	----------------------------------------------------------------------------/
				6209
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6210	int float128_le( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6211	{
				6212	flag aSign, bSign;
				6213
				6214	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6215	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6216	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6217	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6218	) {
				6219	float_raise( float_flag_invalid STATUS_VAR);
				6220	return 0;
				6221	}
				6222	aSign = extractFloat128Sign( a );
				6223	bSign = extractFloat128Sign( b );
				6224	if ( aSign != bSign ) {
				6225	return
				6226	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6227	\|\| ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6228	== 0 );
				6229	}
				6230	return
				6231	aSign ? le128( b.high, b.low, a.high, a.low )
				6232	: le128( a.high, a.low, b.high, b.low );
				6233
				6234	}
				6235
				6236	/*----------------------------------------------------------------------------
				6237	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6238	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				6239	\| raised if either operand is a NaN. The comparison is performed according
				6240	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6241	----------------------------------------------------------------------------/
				6242
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6243	int float128_lt( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6244	{
				6245	flag aSign, bSign;
				6246
				6247	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6248	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6249	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6250	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6251	) {
				6252	float_raise( float_flag_invalid STATUS_VAR);
				6253	return 0;
				6254	}
				6255	aSign = extractFloat128Sign( a );
				6256	bSign = extractFloat128Sign( b );
				6257	if ( aSign != bSign ) {
				6258	return
				6259	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6260	&& ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6261	!= 0 );
				6262	}
				6263	return
				6264	aSign ? lt128( b.high, b.low, a.high, a.low )
				6265	: lt128( a.high, a.low, b.high, b.low );
				6266
				6267	}
				6268
				6269	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	6270	\| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6271	\| be compared, and 0 otherwise. The invalid exception is raised if either
				6272	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
				6273	\| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	6274	----------------------------------------------------------------------------/
				6275
				6276	int float128_unordered( float128 a, float128 b STATUS_PARAM )
				6277	{
				6278	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6279	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6280	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6281	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6282	) {
				6283	float_raise( float_flag_invalid STATUS_VAR);
				6284	return 1;
				6285	}
				6286	return 0;
				6287	}
				6288
				6289	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6290	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6291	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				6292	\| exception. The comparison is performed according to the IEC/IEEE Standard
				6293	\| for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6294	----------------------------------------------------------------------------/
				6295
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6296	int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6297	{
				6298
				6299	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6300	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6301	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6302	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6303	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6304	if ( float128_is_signaling_nan( a )
				6305	\|\| float128_is_signaling_nan( b ) ) {
				6306	float_raise( float_flag_invalid STATUS_VAR);
				6307	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6308	return 0;
				6309	}
				6310	return
				6311	( a.low == b.low )
				6312	&& ( ( a.high == b.high )
				6313	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6314	&& ( (uint64_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6315	);
				6316
				6317	}
				6318
				6319	/*----------------------------------------------------------------------------
				6320	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				6321	\| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				6322	\| cause an exception. Otherwise, the comparison is performed according to the
				6323	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				6324	----------------------------------------------------------------------------/
				6325
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6326	int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6327	{
				6328	flag aSign, bSign;
				6329
				6330	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6331	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6332	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6333	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6334	) {
				6335	if ( float128_is_signaling_nan( a )
				6336	\|\| float128_is_signaling_nan( b ) ) {
				6337	float_raise( float_flag_invalid STATUS_VAR);
				6338	}
				6339	return 0;
				6340	}
				6341	aSign = extractFloat128Sign( a );
				6342	bSign = extractFloat128Sign( b );
				6343	if ( aSign != bSign ) {
				6344	return
				6345	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6346	\|\| ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6347	== 0 );
				6348	}
				6349	return
				6350	aSign ? le128( b.high, b.low, a.high, a.low )
				6351	: le128( a.high, a.low, b.high, b.low );
				6352
				6353	}
				6354
				6355	/*----------------------------------------------------------------------------
				6356	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				6357	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				6358	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				6359	\| Standard for Binary Floating-Point Arithmetic.
				6360	----------------------------------------------------------------------------/
				6361
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6362	int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6363	{
				6364	flag aSign, bSign;
				6365
				6366	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6367	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6368	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6369	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6370	) {
				6371	if ( float128_is_signaling_nan( a )
				6372	\|\| float128_is_signaling_nan( b ) ) {
				6373	float_raise( float_flag_invalid STATUS_VAR);
				6374	}
				6375	return 0;
				6376	}
				6377	aSign = extractFloat128Sign( a );
				6378	bSign = extractFloat128Sign( b );
				6379	if ( aSign != bSign ) {
				6380	return
				6381	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6382	&& ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6383	!= 0 );
				6384	}
				6385	return
				6386	aSign ? lt128( b.high, b.low, a.high, a.low )
				6387	: lt128( a.high, a.low, b.high, b.low );
				6388
				6389	}
				6390
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	6391	/*----------------------------------------------------------------------------
				6392	\| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
				6393	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
				6394	\| comparison is performed according to the IEC/IEEE Standard for Binary
				6395	\| Floating-Point Arithmetic.
				6396	----------------------------------------------------------------------------/
				6397
				6398	int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
				6399	{
				6400	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6401	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6402	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6403	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6404	) {
				6405	if ( float128_is_signaling_nan( a )
				6406	\|\| float128_is_signaling_nan( b ) ) {
				6407	float_raise( float_flag_invalid STATUS_VAR);
				6408	}
				6409	return 1;
				6410	}
				6411	return 0;
				6412	}
				6413
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6414	/* misc functions */
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6415	float32 uint32_to_float32( uint32 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6416	{
				6417	return int64_to_float32(a STATUS_VAR);
				6418	}
				6419
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6420	float64 uint32_to_float64( uint32 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6421	{
				6422	return int64_to_float64(a STATUS_VAR);
				6423	}
				6424
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6425	uint32 float32_to_uint32( float32 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6426	{
				6427	int64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6428	uint32 res;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6429
				6430	v = float32_to_int64(a STATUS_VAR);
				6431	if (v < 0) {
				6432	res = 0;
				6433	float_raise( float_flag_invalid STATUS_VAR);
				6434	} else if (v > 0xffffffff) {
				6435	res = 0xffffffff;
				6436	float_raise( float_flag_invalid STATUS_VAR);
				6437	} else {
				6438	res = v;
				6439	}
				6440	return res;
				6441	}
				6442
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6443	uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6444	{
				6445	int64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6446	uint32 res;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6447
				6448	v = float32_to_int64_round_to_zero(a STATUS_VAR);
				6449	if (v < 0) {
				6450	res = 0;
				6451	float_raise( float_flag_invalid STATUS_VAR);
				6452	} else if (v > 0xffffffff) {
				6453	res = 0xffffffff;
				6454	float_raise( float_flag_invalid STATUS_VAR);
				6455	} else {
				6456	res = v;
				6457	}
				6458	return res;
				6459	}
				6460
Andreas Färber	5aea4c5	2012-04-26 00:15:55 +0200	[diff] [blame]	6461	uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM)
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6462	{
				6463	int64_t v;
Andreas Färber	5aea4c5	2012-04-26 00:15:55 +0200	[diff] [blame]	6464	uint_fast16_t res;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6465
				6466	v = float32_to_int64_round_to_zero(a STATUS_VAR);
				6467	if (v < 0) {
				6468	res = 0;
				6469	float_raise( float_flag_invalid STATUS_VAR);
				6470	} else if (v > 0xffff) {
				6471	res = 0xffff;
				6472	float_raise( float_flag_invalid STATUS_VAR);
				6473	} else {
				6474	res = v;
				6475	}
				6476	return res;
				6477	}
				6478
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6479	uint32 float64_to_uint32( float64 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6480	{
				6481	int64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6482	uint32 res;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6483
				6484	v = float64_to_int64(a STATUS_VAR);
				6485	if (v < 0) {
				6486	res = 0;
				6487	float_raise( float_flag_invalid STATUS_VAR);
				6488	} else if (v > 0xffffffff) {
				6489	res = 0xffffffff;
				6490	float_raise( float_flag_invalid STATUS_VAR);
				6491	} else {
				6492	res = v;
				6493	}
				6494	return res;
				6495	}
				6496
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6497	uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6498	{
				6499	int64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6500	uint32 res;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6501
				6502	v = float64_to_int64_round_to_zero(a STATUS_VAR);
				6503	if (v < 0) {
				6504	res = 0;
				6505	float_raise( float_flag_invalid STATUS_VAR);
				6506	} else if (v > 0xffffffff) {
				6507	res = 0xffffffff;
				6508	float_raise( float_flag_invalid STATUS_VAR);
				6509	} else {
				6510	res = v;
				6511	}
				6512	return res;
				6513	}
				6514
Andreas Färber	5aea4c5	2012-04-26 00:15:55 +0200	[diff] [blame]	6515	uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM)
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6516	{
				6517	int64_t v;
Andreas Färber	5aea4c5	2012-04-26 00:15:55 +0200	[diff] [blame]	6518	uint_fast16_t res;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6519
				6520	v = float64_to_int64_round_to_zero(a STATUS_VAR);
				6521	if (v < 0) {
				6522	res = 0;
				6523	float_raise( float_flag_invalid STATUS_VAR);
				6524	} else if (v > 0xffff) {
				6525	res = 0xffff;
				6526	float_raise( float_flag_invalid STATUS_VAR);
				6527	} else {
				6528	res = v;
				6529	}
				6530	return res;
				6531	}
				6532
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6533	/* FIXME: This looks broken. */
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	6534	uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
				6535	{
				6536	int64_t v;
				6537
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6538	v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
				6539	v += float64_val(a);
				6540	v = float64_to_int64(make_float64(v) STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	6541
				6542	return v - INT64_MIN;
				6543	}
				6544
				6545	uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
				6546	{
				6547	int64_t v;
				6548
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6549	v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
				6550	v += float64_val(a);
				6551	v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	6552
				6553	return v - INT64_MIN;
				6554	}
				6555
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6556	#define COMPARE(s, nan_exp) \
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6557	INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6558	int is_quiet STATUS_PARAM ) \
				6559	{ \
				6560	flag aSign, bSign; \
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6561	uint ## s ## _t av, bv; \
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	6562	a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
				6563	b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6564	\
				6565	if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
				6566	extractFloat ## s ## Frac( a ) ) \|\| \
				6567	( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
				6568	extractFloat ## s ## Frac( b ) )) { \
				6569	if (!is_quiet \|\| \
				6570	float ## s ## _is_signaling_nan( a ) \|\| \
				6571	float ## s ## _is_signaling_nan( b ) ) { \
				6572	float_raise( float_flag_invalid STATUS_VAR); \
				6573	} \
				6574	return float_relation_unordered; \
				6575	} \
				6576	aSign = extractFloat ## s ## Sign( a ); \
				6577	bSign = extractFloat ## s ## Sign( b ); \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6578	av = float ## s ## _val(a); \
blueswir1	cd8a253	2007-11-21 18:57:44 +0000	[diff] [blame]	6579	bv = float ## s ## _val(b); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6580	if ( aSign != bSign ) { \
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6581	if ( (uint ## s ## _t) ( ( av \| bv )<<1 ) == 0 ) { \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6582	/* zero case */ \
				6583	return float_relation_equal; \
				6584	} else { \
				6585	return 1 - (2 * aSign); \
				6586	} \
				6587	} else { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6588	if (av == bv) { \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6589	return float_relation_equal; \
				6590	} else { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6591	return 1 - 2 * (aSign ^ ( av < bv )); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6592	} \
				6593	} \
				6594	} \
				6595	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6596	int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6597	{ \
				6598	return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
				6599	} \
				6600	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6601	int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6602	{ \
				6603	return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
				6604	}
				6605
				6606	COMPARE(32, 0xff)
				6607	COMPARE(64, 0x7ff)
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6608
Aurelien Jarno	f6714d3	2011-04-20 13:04:22 +0200	[diff] [blame]	6609	INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
				6610	int is_quiet STATUS_PARAM )
				6611	{
				6612	flag aSign, bSign;
				6613
				6614	if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
				6615	( extractFloatx80Frac( a )<<1 ) ) \|\|
				6616	( ( extractFloatx80Exp( b ) == 0x7fff ) &&
				6617	( extractFloatx80Frac( b )<<1 ) )) {
				6618	if (!is_quiet \|\|
				6619	floatx80_is_signaling_nan( a ) \|\|
				6620	floatx80_is_signaling_nan( b ) ) {
				6621	float_raise( float_flag_invalid STATUS_VAR);
				6622	}
				6623	return float_relation_unordered;
				6624	}
				6625	aSign = extractFloatx80Sign( a );
				6626	bSign = extractFloatx80Sign( b );
				6627	if ( aSign != bSign ) {
				6628
				6629	if ( ( ( (uint16_t) ( ( a.high \| b.high ) << 1 ) ) == 0) &&
				6630	( ( a.low \| b.low ) == 0 ) ) {
				6631	/* zero case */
				6632	return float_relation_equal;
				6633	} else {
				6634	return 1 - (2 * aSign);
				6635	}
				6636	} else {
				6637	if (a.low == b.low && a.high == b.high) {
				6638	return float_relation_equal;
				6639	} else {
				6640	return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
				6641	}
				6642	}
				6643	}
				6644
				6645	int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
				6646	{
				6647	return floatx80_compare_internal(a, b, 0 STATUS_VAR);
				6648	}
				6649
				6650	int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
				6651	{
				6652	return floatx80_compare_internal(a, b, 1 STATUS_VAR);
				6653	}
				6654
blueswir1	1f58732	2007-11-25 18:40:20 +0000	[diff] [blame]	6655	INLINE int float128_compare_internal( float128 a, float128 b,
				6656	int is_quiet STATUS_PARAM )
				6657	{
				6658	flag aSign, bSign;
				6659
				6660	if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
				6661	( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) ) \|\|
				6662	( ( extractFloat128Exp( b ) == 0x7fff ) &&
				6663	( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )) {
				6664	if (!is_quiet \|\|
				6665	float128_is_signaling_nan( a ) \|\|
				6666	float128_is_signaling_nan( b ) ) {
				6667	float_raise( float_flag_invalid STATUS_VAR);
				6668	}
				6669	return float_relation_unordered;
				6670	}
				6671	aSign = extractFloat128Sign( a );
				6672	bSign = extractFloat128Sign( b );
				6673	if ( aSign != bSign ) {
				6674	if ( ( ( ( a.high \| b.high )<<1 ) \| a.low \| b.low ) == 0 ) {
				6675	/* zero case */
				6676	return float_relation_equal;
				6677	} else {
				6678	return 1 - (2 * aSign);
				6679	}
				6680	} else {
				6681	if (a.low == b.low && a.high == b.high) {
				6682	return float_relation_equal;
				6683	} else {
				6684	return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
				6685	}
				6686	}
				6687	}
				6688
				6689	int float128_compare( float128 a, float128 b STATUS_PARAM )
				6690	{
				6691	return float128_compare_internal(a, b, 0 STATUS_VAR);
				6692	}
				6693
				6694	int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
				6695	{
				6696	return float128_compare_internal(a, b, 1 STATUS_VAR);
				6697	}
				6698
Peter Maydell	274f1b0	2011-03-11 08:12:25 +0000	[diff] [blame]	6699	/* min() and max() functions. These can't be implemented as
				6700	* 'compare and pick one input' because that would mishandle
				6701	* NaNs and +0 vs -0.
				6702	*/
				6703	#define MINMAX(s, nan_exp) \
				6704	INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \
				6705	int ismin STATUS_PARAM ) \
				6706	{ \
				6707	flag aSign, bSign; \
				6708	uint ## s ## _t av, bv; \
				6709	a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
				6710	b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
				6711	if (float ## s ## _is_any_nan(a) \|\| \
				6712	float ## s ## _is_any_nan(b)) { \
				6713	return propagateFloat ## s ## NaN(a, b STATUS_VAR); \
				6714	} \
				6715	aSign = extractFloat ## s ## Sign(a); \
				6716	bSign = extractFloat ## s ## Sign(b); \
				6717	av = float ## s ## _val(a); \
				6718	bv = float ## s ## _val(b); \
				6719	if (aSign != bSign) { \
				6720	if (ismin) { \
				6721	return aSign ? a : b; \
				6722	} else { \
				6723	return aSign ? b : a; \
				6724	} \
				6725	} else { \
				6726	if (ismin) { \
				6727	return (aSign ^ (av < bv)) ? a : b; \
				6728	} else { \
				6729	return (aSign ^ (av < bv)) ? b : a; \
				6730	} \
				6731	} \
				6732	} \
				6733	\
				6734	float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM) \
				6735	{ \
				6736	return float ## s ## _minmax(a, b, 1 STATUS_VAR); \
				6737	} \
				6738	\
				6739	float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \
				6740	{ \
				6741	return float ## s ## _minmax(a, b, 0 STATUS_VAR); \
				6742	}
				6743
				6744	MINMAX(32, 0xff)
				6745	MINMAX(64, 0x7ff)
				6746
				6747
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6748	/* Multiply A by 2 raised to the power N. */
				6749	float32 float32_scalbn( float32 a, int n STATUS_PARAM )
				6750	{
				6751	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6752	int16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6753	uint32_t aSig;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6754
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	6755	a = float32_squash_input_denormal(a STATUS_VAR);
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6756	aSig = extractFloat32Frac( a );
				6757	aExp = extractFloat32Exp( a );
				6758	aSign = extractFloat32Sign( a );
				6759
				6760	if ( aExp == 0xFF ) {
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6761	if ( aSig ) {
				6762	return propagateFloat32NaN( a, a STATUS_VAR );
				6763	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6764	return a;
				6765	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6766	if ( aExp != 0 )
				6767	aSig \|= 0x00800000;
				6768	else if ( aSig == 0 )
				6769	return a;
				6770
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6771	if (n > 0x200) {
				6772	n = 0x200;
				6773	} else if (n < -0x200) {
				6774	n = -0x200;
				6775	}
				6776
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6777	aExp += n - 1;
				6778	aSig <<= 7;
				6779	return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6780	}
				6781
				6782	float64 float64_scalbn( float64 a, int n STATUS_PARAM )
				6783	{
				6784	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6785	int16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6786	uint64_t aSig;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6787
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	6788	a = float64_squash_input_denormal(a STATUS_VAR);
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6789	aSig = extractFloat64Frac( a );
				6790	aExp = extractFloat64Exp( a );
				6791	aSign = extractFloat64Sign( a );
				6792
				6793	if ( aExp == 0x7FF ) {
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6794	if ( aSig ) {
				6795	return propagateFloat64NaN( a, a STATUS_VAR );
				6796	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6797	return a;
				6798	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6799	if ( aExp != 0 )
				6800	aSig \|= LIT64( 0x0010000000000000 );
				6801	else if ( aSig == 0 )
				6802	return a;
				6803
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6804	if (n > 0x1000) {
				6805	n = 0x1000;
				6806	} else if (n < -0x1000) {
				6807	n = -0x1000;
				6808	}
				6809
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6810	aExp += n - 1;
				6811	aSig <<= 10;
				6812	return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6813	}
				6814
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6815	floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
				6816	{
				6817	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6818	int32_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6819	uint64_t aSig;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6820
				6821	aSig = extractFloatx80Frac( a );
				6822	aExp = extractFloatx80Exp( a );
				6823	aSign = extractFloatx80Sign( a );
				6824
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6825	if ( aExp == 0x7FFF ) {
				6826	if ( aSig<<1 ) {
				6827	return propagateFloatx80NaN( a, a STATUS_VAR );
				6828	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6829	return a;
				6830	}
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6831
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6832	if (aExp == 0 && aSig == 0)
				6833	return a;
				6834
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6835	if (n > 0x10000) {
				6836	n = 0x10000;
				6837	} else if (n < -0x10000) {
				6838	n = -0x10000;
				6839	}
				6840
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6841	aExp += n;
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6842	return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
				6843	aSign, aExp, aSig, 0 STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6844	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6845
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6846	float128 float128_scalbn( float128 a, int n STATUS_PARAM )
				6847	{
				6848	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6849	int32_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6850	uint64_t aSig0, aSig1;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6851
				6852	aSig1 = extractFloat128Frac1( a );
				6853	aSig0 = extractFloat128Frac0( a );
				6854	aExp = extractFloat128Exp( a );
				6855	aSign = extractFloat128Sign( a );
				6856	if ( aExp == 0x7FFF ) {
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6857	if ( aSig0 \| aSig1 ) {
				6858	return propagateFloat128NaN( a, a STATUS_VAR );
				6859	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6860	return a;
				6861	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6862	if ( aExp != 0 )
				6863	aSig0 \|= LIT64( 0x0001000000000000 );
				6864	else if ( aSig0 == 0 && aSig1 == 0 )
				6865	return a;
				6866
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6867	if (n > 0x10000) {
				6868	n = 0x10000;
				6869	} else if (n < -0x10000) {
				6870	n = -0x10000;
				6871	}
				6872
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6873	aExp += n - 1;
				6874	return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
				6875	STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6876
				6877	}