Blame - fpu/softfloat.c - people/peter.maydell/qemu-arm.git

blob: e66a5d46c81a6415509ef945b3b5eada2fe80d21 [file] [log] [blame]

bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1
				2	/*============================================================================
				3
				4	This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
				5	Package, Release 2b.
				6
				7	Written by John R. Hauser. This work was made possible in part by the
				8	International Computer Science Institute, located at Suite 600, 1947 Center
				9	Street, Berkeley, California 94704. Funding was partially provided by the
				10	National Science Foundation under grant MIP-9311980. The original version
				11	of this code was written as part of a project to build a fixed-point vector
				12	processor in collaboration with the University of California at Berkeley,
				13	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				14	is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
				15	arithmetic/SoftFloat.html'.
				16
				17	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
				18	been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
				19	RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
				20	AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
				21	COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
				22	EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
				23	INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
				24	OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
				25
				26	Derivative works are acceptable, even for commercial purposes, so long as
				27	(1) the source code for the derivative work includes prominent notice that
				28	the work is derivative, and (2) the source code includes prominent notice with
				29	these four paragraphs for those parts of this code that are retained.
				30
				31	=============================================================================*/
				32
				33	#include "softfloat.h"
				34
				35	/*----------------------------------------------------------------------------
				36	\| Primitive arithmetic functions, including multi-word arithmetic, and
				37	\| division and square root approximations. (Can be specialized to target if
				38	\| desired.)
				39	----------------------------------------------------------------------------/
				40	#include "softfloat-macros.h"
				41
				42	/*----------------------------------------------------------------------------
				43	\| Functions and definitions to determine: (1) whether tininess for underflow
				44	\| is detected before or after rounding by default, (2) what (if anything)
				45	\| happens when exceptions are raised, (3) how signaling NaNs are distinguished
				46	\| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
				47	\| are propagated from function inputs to output. These details are target-
				48	\| specific.
				49	----------------------------------------------------------------------------/
				50	#include "softfloat-specialize.h"
				51
				52	void set_float_rounding_mode(int val STATUS_PARAM)
				53	{
				54	STATUS(float_rounding_mode) = val;
				55	}
				56
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	57	void set_float_exception_flags(int val STATUS_PARAM)
				58	{
				59	STATUS(float_exception_flags) = val;
				60	}
				61
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	62	#ifdef FLOATX80
				63	void set_floatx80_rounding_precision(int val STATUS_PARAM)
				64	{
				65	STATUS(floatx80_rounding_precision) = val;
				66	}
				67	#endif
				68
				69	/*----------------------------------------------------------------------------
				70	\| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
				71	\| and 7, and returns the properly rounded 32-bit integer corresponding to the
				72	\| input. If `zSign' is 1, the input is negated before being converted to an
				73	\| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
				74	\| is simply rounded to an integer, with the inexact exception raised if the
				75	\| input cannot be represented exactly as an integer. However, if the fixed-
				76	\| point input is too large, the invalid exception is raised and the largest
				77	\| positive or negative integer is returned.
				78	----------------------------------------------------------------------------/
				79
				80	static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
				81	{
				82	int8 roundingMode;
				83	flag roundNearestEven;
				84	int8 roundIncrement, roundBits;
				85	int32 z;
				86
				87	roundingMode = STATUS(float_rounding_mode);
				88	roundNearestEven = ( roundingMode == float_round_nearest_even );
				89	roundIncrement = 0x40;
				90	if ( ! roundNearestEven ) {
				91	if ( roundingMode == float_round_to_zero ) {
				92	roundIncrement = 0;
				93	}
				94	else {
				95	roundIncrement = 0x7F;
				96	if ( zSign ) {
				97	if ( roundingMode == float_round_up ) roundIncrement = 0;
				98	}
				99	else {
				100	if ( roundingMode == float_round_down ) roundIncrement = 0;
				101	}
				102	}
				103	}
				104	roundBits = absZ & 0x7F;
				105	absZ = ( absZ + roundIncrement )>>7;
				106	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				107	z = absZ;
				108	if ( zSign ) z = - z;
				109	if ( ( absZ>>32 ) \|\| ( z && ( ( z < 0 ) ^ zSign ) ) ) {
				110	float_raise( float_flag_invalid STATUS_VAR);
				111	return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				112	}
				113	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				114	return z;
				115
				116	}
				117
				118	/*----------------------------------------------------------------------------
				119	\| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
				120	\| `absZ1', with binary point between bits 63 and 64 (between the input words),
				121	\| and returns the properly rounded 64-bit integer corresponding to the input.
				122	\| If `zSign' is 1, the input is negated before being converted to an integer.
				123	\| Ordinarily, the fixed-point input is simply rounded to an integer, with
				124	\| the inexact exception raised if the input cannot be represented exactly as
				125	\| an integer. However, if the fixed-point input is too large, the invalid
				126	\| exception is raised and the largest positive or negative integer is
				127	\| returned.
				128	----------------------------------------------------------------------------/
				129
				130	static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
				131	{
				132	int8 roundingMode;
				133	flag roundNearestEven, increment;
				134	int64 z;
				135
				136	roundingMode = STATUS(float_rounding_mode);
				137	roundNearestEven = ( roundingMode == float_round_nearest_even );
				138	increment = ( (sbits64) absZ1 < 0 );
				139	if ( ! roundNearestEven ) {
				140	if ( roundingMode == float_round_to_zero ) {
				141	increment = 0;
				142	}
				143	else {
				144	if ( zSign ) {
				145	increment = ( roundingMode == float_round_down ) && absZ1;
				146	}
				147	else {
				148	increment = ( roundingMode == float_round_up ) && absZ1;
				149	}
				150	}
				151	}
				152	if ( increment ) {
				153	++absZ0;
				154	if ( absZ0 == 0 ) goto overflow;
				155	absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
				156	}
				157	z = absZ0;
				158	if ( zSign ) z = - z;
				159	if ( z && ( ( z < 0 ) ^ zSign ) ) {
				160	overflow:
				161	float_raise( float_flag_invalid STATUS_VAR);
				162	return
				163	zSign ? (sbits64) LIT64( 0x8000000000000000 )
				164	: LIT64( 0x7FFFFFFFFFFFFFFF );
				165	}
				166	if ( absZ1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				167	return z;
				168
				169	}
				170
				171	/*----------------------------------------------------------------------------
				172	\| Returns the fraction bits of the single-precision floating-point value `a'.
				173	----------------------------------------------------------------------------/
				174
				175	INLINE bits32 extractFloat32Frac( float32 a )
				176	{
				177
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	178	return float32_val(a) & 0x007FFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	179
				180	}
				181
				182	/*----------------------------------------------------------------------------
				183	\| Returns the exponent bits of the single-precision floating-point value `a'.
				184	----------------------------------------------------------------------------/
				185
				186	INLINE int16 extractFloat32Exp( float32 a )
				187	{
				188
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	189	return ( float32_val(a)>>23 ) & 0xFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	190
				191	}
				192
				193	/*----------------------------------------------------------------------------
				194	\| Returns the sign bit of the single-precision floating-point value `a'.
				195	----------------------------------------------------------------------------/
				196
				197	INLINE flag extractFloat32Sign( float32 a )
				198	{
				199
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	200	return float32_val(a)>>31;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	201
				202	}
				203
				204	/*----------------------------------------------------------------------------
				205	\| Normalizes the subnormal single-precision floating-point value represented
				206	\| by the denormalized significand `aSig'. The normalized exponent and
				207	\| significand are stored at the locations pointed to by `zExpPtr' and
				208	\| `zSigPtr', respectively.
				209	----------------------------------------------------------------------------/
				210
				211	static void
				212	normalizeFloat32Subnormal( bits32 aSig, int16 zExpPtr, bits32 zSigPtr )
				213	{
				214	int8 shiftCount;
				215
				216	shiftCount = countLeadingZeros32( aSig ) - 8;
				217	*zSigPtr = aSig<<shiftCount;
				218	*zExpPtr = 1 - shiftCount;
				219
				220	}
				221
				222	/*----------------------------------------------------------------------------
				223	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				224	\| single-precision floating-point value, returning the result. After being
				225	\| shifted into the proper positions, the three fields are simply added
				226	\| together to form the result. This means that any integer portion of `zSig'
				227	\| will be added into the exponent. Since a properly normalized significand
				228	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				229	\| than the desired result exponent whenever `zSig' is a complete, normalized
				230	\| significand.
				231	----------------------------------------------------------------------------/
				232
				233	INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
				234	{
				235
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	236	return make_float32(
				237	( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	238
				239	}
				240
				241	/*----------------------------------------------------------------------------
				242	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				243	\| and significand `zSig', and returns the proper single-precision floating-
				244	\| point value corresponding to the abstract input. Ordinarily, the abstract
				245	\| value is simply rounded and packed into the single-precision format, with
				246	\| the inexact exception raised if the abstract input cannot be represented
				247	\| exactly. However, if the abstract value is too large, the overflow and
				248	\| inexact exceptions are raised and an infinity or maximal finite value is
				249	\| returned. If the abstract value is too small, the input value is rounded to
				250	\| a subnormal number, and the underflow and inexact exceptions are raised if
				251	\| the abstract input cannot be represented exactly as a subnormal single-
				252	\| precision floating-point number.
				253	\| The input significand `zSig' has its binary point between bits 30
				254	\| and 29, which is 7 bits to the left of the usual location. This shifted
				255	\| significand must be normalized or smaller. If `zSig' is not normalized,
				256	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				257	\| and it must not require rounding. In the usual case that `zSig' is
				258	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				259	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				260	\| Binary Floating-Point Arithmetic.
				261	----------------------------------------------------------------------------/
				262
				263	static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
				264	{
				265	int8 roundingMode;
				266	flag roundNearestEven;
				267	int8 roundIncrement, roundBits;
				268	flag isTiny;
				269
				270	roundingMode = STATUS(float_rounding_mode);
				271	roundNearestEven = ( roundingMode == float_round_nearest_even );
				272	roundIncrement = 0x40;
				273	if ( ! roundNearestEven ) {
				274	if ( roundingMode == float_round_to_zero ) {
				275	roundIncrement = 0;
				276	}
				277	else {
				278	roundIncrement = 0x7F;
				279	if ( zSign ) {
				280	if ( roundingMode == float_round_up ) roundIncrement = 0;
				281	}
				282	else {
				283	if ( roundingMode == float_round_down ) roundIncrement = 0;
				284	}
				285	}
				286	}
				287	roundBits = zSig & 0x7F;
				288	if ( 0xFD <= (bits16) zExp ) {
				289	if ( ( 0xFD < zExp )
				290	\|\| ( ( zExp == 0xFD )
				291	&& ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
				292	) {
				293	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	294	return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	295	}
				296	if ( zExp < 0 ) {
				297	isTiny =
				298	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				299	\|\| ( zExp < -1 )
				300	\|\| ( zSig + roundIncrement < 0x80000000 );
				301	shift32RightJamming( zSig, - zExp, &zSig );
				302	zExp = 0;
				303	roundBits = zSig & 0x7F;
				304	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				305	}
				306	}
				307	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				308	zSig = ( zSig + roundIncrement )>>7;
				309	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				310	if ( zSig == 0 ) zExp = 0;
				311	return packFloat32( zSign, zExp, zSig );
				312
				313	}
				314
				315	/*----------------------------------------------------------------------------
				316	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				317	\| and significand `zSig', and returns the proper single-precision floating-
				318	\| point value corresponding to the abstract input. This routine is just like
				319	\| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
				320	\| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				321	\| floating-point exponent.
				322	----------------------------------------------------------------------------/
				323
				324	static float32
				325	normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
				326	{
				327	int8 shiftCount;
				328
				329	shiftCount = countLeadingZeros32( zSig ) - 1;
				330	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				331
				332	}
				333
				334	/*----------------------------------------------------------------------------
				335	\| Returns the fraction bits of the double-precision floating-point value `a'.
				336	----------------------------------------------------------------------------/
				337
				338	INLINE bits64 extractFloat64Frac( float64 a )
				339	{
				340
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	341	return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	342
				343	}
				344
				345	/*----------------------------------------------------------------------------
				346	\| Returns the exponent bits of the double-precision floating-point value `a'.
				347	----------------------------------------------------------------------------/
				348
				349	INLINE int16 extractFloat64Exp( float64 a )
				350	{
				351
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	352	return ( float64_val(a)>>52 ) & 0x7FF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	353
				354	}
				355
				356	/*----------------------------------------------------------------------------
				357	\| Returns the sign bit of the double-precision floating-point value `a'.
				358	----------------------------------------------------------------------------/
				359
				360	INLINE flag extractFloat64Sign( float64 a )
				361	{
				362
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	363	return float64_val(a)>>63;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	364
				365	}
				366
				367	/*----------------------------------------------------------------------------
				368	\| Normalizes the subnormal double-precision floating-point value represented
				369	\| by the denormalized significand `aSig'. The normalized exponent and
				370	\| significand are stored at the locations pointed to by `zExpPtr' and
				371	\| `zSigPtr', respectively.
				372	----------------------------------------------------------------------------/
				373
				374	static void
				375	normalizeFloat64Subnormal( bits64 aSig, int16 zExpPtr, bits64 zSigPtr )
				376	{
				377	int8 shiftCount;
				378
				379	shiftCount = countLeadingZeros64( aSig ) - 11;
				380	*zSigPtr = aSig<<shiftCount;
				381	*zExpPtr = 1 - shiftCount;
				382
				383	}
				384
				385	/*----------------------------------------------------------------------------
				386	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				387	\| double-precision floating-point value, returning the result. After being
				388	\| shifted into the proper positions, the three fields are simply added
				389	\| together to form the result. This means that any integer portion of `zSig'
				390	\| will be added into the exponent. Since a properly normalized significand
				391	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				392	\| than the desired result exponent whenever `zSig' is a complete, normalized
				393	\| significand.
				394	----------------------------------------------------------------------------/
				395
				396	INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
				397	{
				398
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	399	return make_float64(
				400	( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	401
				402	}
				403
				404	/*----------------------------------------------------------------------------
				405	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				406	\| and significand `zSig', and returns the proper double-precision floating-
				407	\| point value corresponding to the abstract input. Ordinarily, the abstract
				408	\| value is simply rounded and packed into the double-precision format, with
				409	\| the inexact exception raised if the abstract input cannot be represented
				410	\| exactly. However, if the abstract value is too large, the overflow and
				411	\| inexact exceptions are raised and an infinity or maximal finite value is
				412	\| returned. If the abstract value is too small, the input value is rounded
				413	\| to a subnormal number, and the underflow and inexact exceptions are raised
				414	\| if the abstract input cannot be represented exactly as a subnormal double-
				415	\| precision floating-point number.
				416	\| The input significand `zSig' has its binary point between bits 62
				417	\| and 61, which is 10 bits to the left of the usual location. This shifted
				418	\| significand must be normalized or smaller. If `zSig' is not normalized,
				419	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				420	\| and it must not require rounding. In the usual case that `zSig' is
				421	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				422	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				423	\| Binary Floating-Point Arithmetic.
				424	----------------------------------------------------------------------------/
				425
				426	static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
				427	{
				428	int8 roundingMode;
				429	flag roundNearestEven;
				430	int16 roundIncrement, roundBits;
				431	flag isTiny;
				432
				433	roundingMode = STATUS(float_rounding_mode);
				434	roundNearestEven = ( roundingMode == float_round_nearest_even );
				435	roundIncrement = 0x200;
				436	if ( ! roundNearestEven ) {
				437	if ( roundingMode == float_round_to_zero ) {
				438	roundIncrement = 0;
				439	}
				440	else {
				441	roundIncrement = 0x3FF;
				442	if ( zSign ) {
				443	if ( roundingMode == float_round_up ) roundIncrement = 0;
				444	}
				445	else {
				446	if ( roundingMode == float_round_down ) roundIncrement = 0;
				447	}
				448	}
				449	}
				450	roundBits = zSig & 0x3FF;
				451	if ( 0x7FD <= (bits16) zExp ) {
				452	if ( ( 0x7FD < zExp )
				453	\|\| ( ( zExp == 0x7FD )
				454	&& ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
				455	) {
				456	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	457	return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	458	}
				459	if ( zExp < 0 ) {
				460	isTiny =
				461	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				462	\|\| ( zExp < -1 )
				463	\|\| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
				464	shift64RightJamming( zSig, - zExp, &zSig );
				465	zExp = 0;
				466	roundBits = zSig & 0x3FF;
				467	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				468	}
				469	}
				470	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				471	zSig = ( zSig + roundIncrement )>>10;
				472	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
				473	if ( zSig == 0 ) zExp = 0;
				474	return packFloat64( zSign, zExp, zSig );
				475
				476	}
				477
				478	/*----------------------------------------------------------------------------
				479	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				480	\| and significand `zSig', and returns the proper double-precision floating-
				481	\| point value corresponding to the abstract input. This routine is just like
				482	\| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
				483	\| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				484	\| floating-point exponent.
				485	----------------------------------------------------------------------------/
				486
				487	static float64
				488	normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
				489	{
				490	int8 shiftCount;
				491
				492	shiftCount = countLeadingZeros64( zSig ) - 1;
				493	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				494
				495	}
				496
				497	#ifdef FLOATX80
				498
				499	/*----------------------------------------------------------------------------
				500	\| Returns the fraction bits of the extended double-precision floating-point
				501	\| value `a'.
				502	----------------------------------------------------------------------------/
				503
				504	INLINE bits64 extractFloatx80Frac( floatx80 a )
				505	{
				506
				507	return a.low;
				508
				509	}
				510
				511	/*----------------------------------------------------------------------------
				512	\| Returns the exponent bits of the extended double-precision floating-point
				513	\| value `a'.
				514	----------------------------------------------------------------------------/
				515
				516	INLINE int32 extractFloatx80Exp( floatx80 a )
				517	{
				518
				519	return a.high & 0x7FFF;
				520
				521	}
				522
				523	/*----------------------------------------------------------------------------
				524	\| Returns the sign bit of the extended double-precision floating-point value
				525	\| `a'.
				526	----------------------------------------------------------------------------/
				527
				528	INLINE flag extractFloatx80Sign( floatx80 a )
				529	{
				530
				531	return a.high>>15;
				532
				533	}
				534
				535	/*----------------------------------------------------------------------------
				536	\| Normalizes the subnormal extended double-precision floating-point value
				537	\| represented by the denormalized significand `aSig'. The normalized exponent
				538	\| and significand are stored at the locations pointed to by `zExpPtr' and
				539	\| `zSigPtr', respectively.
				540	----------------------------------------------------------------------------/
				541
				542	static void
				543	normalizeFloatx80Subnormal( bits64 aSig, int32 zExpPtr, bits64 zSigPtr )
				544	{
				545	int8 shiftCount;
				546
				547	shiftCount = countLeadingZeros64( aSig );
				548	*zSigPtr = aSig<<shiftCount;
				549	*zExpPtr = 1 - shiftCount;
				550
				551	}
				552
				553	/*----------------------------------------------------------------------------
				554	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
				555	\| extended double-precision floating-point value, returning the result.
				556	----------------------------------------------------------------------------/
				557
				558	INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
				559	{
				560	floatx80 z;
				561
				562	z.low = zSig;
				563	z.high = ( ( (bits16) zSign )<<15 ) + zExp;
				564	return z;
				565
				566	}
				567
				568	/*----------------------------------------------------------------------------
				569	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				570	\| and extended significand formed by the concatenation of `zSig0' and `zSig1',
				571	\| and returns the proper extended double-precision floating-point value
				572	\| corresponding to the abstract input. Ordinarily, the abstract value is
				573	\| rounded and packed into the extended double-precision format, with the
				574	\| inexact exception raised if the abstract input cannot be represented
				575	\| exactly. However, if the abstract value is too large, the overflow and
				576	\| inexact exceptions are raised and an infinity or maximal finite value is
				577	\| returned. If the abstract value is too small, the input value is rounded to
				578	\| a subnormal number, and the underflow and inexact exceptions are raised if
				579	\| the abstract input cannot be represented exactly as a subnormal extended
				580	\| double-precision floating-point number.
				581	\| If `roundingPrecision' is 32 or 64, the result is rounded to the same
				582	\| number of bits as single or double precision, respectively. Otherwise, the
				583	\| result is rounded to the full precision of the extended double-precision
				584	\| format.
				585	\| The input significand must be normalized or smaller. If the input
				586	\| significand is not normalized, `zExp' must be 0; in that case, the result
				587	\| returned is a subnormal number, and it must not require rounding. The
				588	\| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
				589	\| Floating-Point Arithmetic.
				590	----------------------------------------------------------------------------/
				591
				592	static floatx80
				593	roundAndPackFloatx80(
				594	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				595	STATUS_PARAM)
				596	{
				597	int8 roundingMode;
				598	flag roundNearestEven, increment, isTiny;
				599	int64 roundIncrement, roundMask, roundBits;
				600
				601	roundingMode = STATUS(float_rounding_mode);
				602	roundNearestEven = ( roundingMode == float_round_nearest_even );
				603	if ( roundingPrecision == 80 ) goto precision80;
				604	if ( roundingPrecision == 64 ) {
				605	roundIncrement = LIT64( 0x0000000000000400 );
				606	roundMask = LIT64( 0x00000000000007FF );
				607	}
				608	else if ( roundingPrecision == 32 ) {
				609	roundIncrement = LIT64( 0x0000008000000000 );
				610	roundMask = LIT64( 0x000000FFFFFFFFFF );
				611	}
				612	else {
				613	goto precision80;
				614	}
				615	zSig0 \|= ( zSig1 != 0 );
				616	if ( ! roundNearestEven ) {
				617	if ( roundingMode == float_round_to_zero ) {
				618	roundIncrement = 0;
				619	}
				620	else {
				621	roundIncrement = roundMask;
				622	if ( zSign ) {
				623	if ( roundingMode == float_round_up ) roundIncrement = 0;
				624	}
				625	else {
				626	if ( roundingMode == float_round_down ) roundIncrement = 0;
				627	}
				628	}
				629	}
				630	roundBits = zSig0 & roundMask;
				631	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				632	if ( ( 0x7FFE < zExp )
				633	\|\| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
				634	) {
				635	goto overflow;
				636	}
				637	if ( zExp <= 0 ) {
				638	isTiny =
				639	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				640	\|\| ( zExp < 0 )
				641	\|\| ( zSig0 <= zSig0 + roundIncrement );
				642	shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
				643	zExp = 0;
				644	roundBits = zSig0 & roundMask;
				645	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				646	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				647	zSig0 += roundIncrement;
				648	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				649	roundIncrement = roundMask + 1;
				650	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				651	roundMask \|= roundIncrement;
				652	}
				653	zSig0 &= ~ roundMask;
				654	return packFloatx80( zSign, zExp, zSig0 );
				655	}
				656	}
				657	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				658	zSig0 += roundIncrement;
				659	if ( zSig0 < roundIncrement ) {
				660	++zExp;
				661	zSig0 = LIT64( 0x8000000000000000 );
				662	}
				663	roundIncrement = roundMask + 1;
				664	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				665	roundMask \|= roundIncrement;
				666	}
				667	zSig0 &= ~ roundMask;
				668	if ( zSig0 == 0 ) zExp = 0;
				669	return packFloatx80( zSign, zExp, zSig0 );
				670	precision80:
				671	increment = ( (sbits64) zSig1 < 0 );
				672	if ( ! roundNearestEven ) {
				673	if ( roundingMode == float_round_to_zero ) {
				674	increment = 0;
				675	}
				676	else {
				677	if ( zSign ) {
				678	increment = ( roundingMode == float_round_down ) && zSig1;
				679	}
				680	else {
				681	increment = ( roundingMode == float_round_up ) && zSig1;
				682	}
				683	}
				684	}
				685	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				686	if ( ( 0x7FFE < zExp )
				687	\|\| ( ( zExp == 0x7FFE )
				688	&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
				689	&& increment
				690	)
				691	) {
				692	roundMask = 0;
				693	overflow:
				694	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				695	if ( ( roundingMode == float_round_to_zero )
				696	\|\| ( zSign && ( roundingMode == float_round_up ) )
				697	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				698	) {
				699	return packFloatx80( zSign, 0x7FFE, ~ roundMask );
				700	}
				701	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				702	}
				703	if ( zExp <= 0 ) {
				704	isTiny =
				705	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				706	\|\| ( zExp < 0 )
				707	\|\| ! increment
				708	\|\| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
				709	shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
				710	zExp = 0;
				711	if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
				712	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				713	if ( roundNearestEven ) {
				714	increment = ( (sbits64) zSig1 < 0 );
				715	}
				716	else {
				717	if ( zSign ) {
				718	increment = ( roundingMode == float_round_down ) && zSig1;
				719	}
				720	else {
				721	increment = ( roundingMode == float_round_up ) && zSig1;
				722	}
				723	}
				724	if ( increment ) {
				725	++zSig0;
				726	zSig0 &=
				727	~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
				728	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				729	}
				730	return packFloatx80( zSign, zExp, zSig0 );
				731	}
				732	}
				733	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				734	if ( increment ) {
				735	++zSig0;
				736	if ( zSig0 == 0 ) {
				737	++zExp;
				738	zSig0 = LIT64( 0x8000000000000000 );
				739	}
				740	else {
				741	zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
				742	}
				743	}
				744	else {
				745	if ( zSig0 == 0 ) zExp = 0;
				746	}
				747	return packFloatx80( zSign, zExp, zSig0 );
				748
				749	}
				750
				751	/*----------------------------------------------------------------------------
				752	\| Takes an abstract floating-point value having sign `zSign', exponent
				753	\| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
				754	\| and returns the proper extended double-precision floating-point value
				755	\| corresponding to the abstract input. This routine is just like
				756	\| `roundAndPackFloatx80' except that the input significand does not have to be
				757	\| normalized.
				758	----------------------------------------------------------------------------/
				759
				760	static floatx80
				761	normalizeRoundAndPackFloatx80(
				762	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				763	STATUS_PARAM)
				764	{
				765	int8 shiftCount;
				766
				767	if ( zSig0 == 0 ) {
				768	zSig0 = zSig1;
				769	zSig1 = 0;
				770	zExp -= 64;
				771	}
				772	shiftCount = countLeadingZeros64( zSig0 );
				773	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				774	zExp -= shiftCount;
				775	return
				776	roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
				777
				778	}
				779
				780	#endif
				781
				782	#ifdef FLOAT128
				783
				784	/*----------------------------------------------------------------------------
				785	\| Returns the least-significant 64 fraction bits of the quadruple-precision
				786	\| floating-point value `a'.
				787	----------------------------------------------------------------------------/
				788
				789	INLINE bits64 extractFloat128Frac1( float128 a )
				790	{
				791
				792	return a.low;
				793
				794	}
				795
				796	/*----------------------------------------------------------------------------
				797	\| Returns the most-significant 48 fraction bits of the quadruple-precision
				798	\| floating-point value `a'.
				799	----------------------------------------------------------------------------/
				800
				801	INLINE bits64 extractFloat128Frac0( float128 a )
				802	{
				803
				804	return a.high & LIT64( 0x0000FFFFFFFFFFFF );
				805
				806	}
				807
				808	/*----------------------------------------------------------------------------
				809	\| Returns the exponent bits of the quadruple-precision floating-point value
				810	\| `a'.
				811	----------------------------------------------------------------------------/
				812
				813	INLINE int32 extractFloat128Exp( float128 a )
				814	{
				815
				816	return ( a.high>>48 ) & 0x7FFF;
				817
				818	}
				819
				820	/*----------------------------------------------------------------------------
				821	\| Returns the sign bit of the quadruple-precision floating-point value `a'.
				822	----------------------------------------------------------------------------/
				823
				824	INLINE flag extractFloat128Sign( float128 a )
				825	{
				826
				827	return a.high>>63;
				828
				829	}
				830
				831	/*----------------------------------------------------------------------------
				832	\| Normalizes the subnormal quadruple-precision floating-point value
				833	\| represented by the denormalized significand formed by the concatenation of
				834	\| `aSig0' and `aSig1'. The normalized exponent is stored at the location
				835	\| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
				836	\| significand are stored at the location pointed to by `zSig0Ptr', and the
				837	\| least significant 64 bits of the normalized significand are stored at the
				838	\| location pointed to by `zSig1Ptr'.
				839	----------------------------------------------------------------------------/
				840
				841	static void
				842	normalizeFloat128Subnormal(
				843	bits64 aSig0,
				844	bits64 aSig1,
				845	int32 *zExpPtr,
				846	bits64 *zSig0Ptr,
				847	bits64 *zSig1Ptr
				848	)
				849	{
				850	int8 shiftCount;
				851
				852	if ( aSig0 == 0 ) {
				853	shiftCount = countLeadingZeros64( aSig1 ) - 15;
				854	if ( shiftCount < 0 ) {
				855	*zSig0Ptr = aSig1>>( - shiftCount );
				856	*zSig1Ptr = aSig1<<( shiftCount & 63 );
				857	}
				858	else {
				859	*zSig0Ptr = aSig1<<shiftCount;
				860	*zSig1Ptr = 0;
				861	}
				862	*zExpPtr = - shiftCount - 63;
				863	}
				864	else {
				865	shiftCount = countLeadingZeros64( aSig0 ) - 15;
				866	shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
				867	*zExpPtr = 1 - shiftCount;
				868	}
				869
				870	}
				871
				872	/*----------------------------------------------------------------------------
				873	\| Packs the sign `zSign', the exponent `zExp', and the significand formed
				874	\| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
				875	\| floating-point value, returning the result. After being shifted into the
				876	\| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
				877	\| added together to form the most significant 32 bits of the result. This
				878	\| means that any integer portion of `zSig0' will be added into the exponent.
				879	\| Since a properly normalized significand will have an integer portion equal
				880	\| to 1, the `zExp' input should be 1 less than the desired result exponent
				881	\| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
				882	\| significand.
				883	----------------------------------------------------------------------------/
				884
				885	INLINE float128
				886	packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
				887	{
				888	float128 z;
				889
				890	z.low = zSig1;
				891	z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
				892	return z;
				893
				894	}
				895
				896	/*----------------------------------------------------------------------------
				897	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				898	\| and extended significand formed by the concatenation of `zSig0', `zSig1',
				899	\| and `zSig2', and returns the proper quadruple-precision floating-point value
				900	\| corresponding to the abstract input. Ordinarily, the abstract value is
				901	\| simply rounded and packed into the quadruple-precision format, with the
				902	\| inexact exception raised if the abstract input cannot be represented
				903	\| exactly. However, if the abstract value is too large, the overflow and
				904	\| inexact exceptions are raised and an infinity or maximal finite value is
				905	\| returned. If the abstract value is too small, the input value is rounded to
				906	\| a subnormal number, and the underflow and inexact exceptions are raised if
				907	\| the abstract input cannot be represented exactly as a subnormal quadruple-
				908	\| precision floating-point number.
				909	\| The input significand must be normalized or smaller. If the input
				910	\| significand is not normalized, `zExp' must be 0; in that case, the result
				911	\| returned is a subnormal number, and it must not require rounding. In the
				912	\| usual case that the input significand is normalized, `zExp' must be 1 less
				913	\| than the ``true'' floating-point exponent. The handling of underflow and
				914	\| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				915	----------------------------------------------------------------------------/
				916
				917	static float128
				918	roundAndPackFloat128(
				919	flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
				920	{
				921	int8 roundingMode;
				922	flag roundNearestEven, increment, isTiny;
				923
				924	roundingMode = STATUS(float_rounding_mode);
				925	roundNearestEven = ( roundingMode == float_round_nearest_even );
				926	increment = ( (sbits64) zSig2 < 0 );
				927	if ( ! roundNearestEven ) {
				928	if ( roundingMode == float_round_to_zero ) {
				929	increment = 0;
				930	}
				931	else {
				932	if ( zSign ) {
				933	increment = ( roundingMode == float_round_down ) && zSig2;
				934	}
				935	else {
				936	increment = ( roundingMode == float_round_up ) && zSig2;
				937	}
				938	}
				939	}
				940	if ( 0x7FFD <= (bits32) zExp ) {
				941	if ( ( 0x7FFD < zExp )
				942	\|\| ( ( zExp == 0x7FFD )
				943	&& eq128(
				944	LIT64( 0x0001FFFFFFFFFFFF ),
				945	LIT64( 0xFFFFFFFFFFFFFFFF ),
				946	zSig0,
				947	zSig1
				948	)
				949	&& increment
				950	)
				951	) {
				952	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				953	if ( ( roundingMode == float_round_to_zero )
				954	\|\| ( zSign && ( roundingMode == float_round_up ) )
				955	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				956	) {
				957	return
				958	packFloat128(
				959	zSign,
				960	0x7FFE,
				961	LIT64( 0x0000FFFFFFFFFFFF ),
				962	LIT64( 0xFFFFFFFFFFFFFFFF )
				963	);
				964	}
				965	return packFloat128( zSign, 0x7FFF, 0, 0 );
				966	}
				967	if ( zExp < 0 ) {
				968	isTiny =
				969	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				970	\|\| ( zExp < -1 )
				971	\|\| ! increment
				972	\|\| lt128(
				973	zSig0,
				974	zSig1,
				975	LIT64( 0x0001FFFFFFFFFFFF ),
				976	LIT64( 0xFFFFFFFFFFFFFFFF )
				977	);
				978	shift128ExtraRightJamming(
				979	zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
				980	zExp = 0;
				981	if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
				982	if ( roundNearestEven ) {
				983	increment = ( (sbits64) zSig2 < 0 );
				984	}
				985	else {
				986	if ( zSign ) {
				987	increment = ( roundingMode == float_round_down ) && zSig2;
				988	}
				989	else {
				990	increment = ( roundingMode == float_round_up ) && zSig2;
				991	}
				992	}
				993	}
				994	}
				995	if ( zSig2 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				996	if ( increment ) {
				997	add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
				998	zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
				999	}
				1000	else {
				1001	if ( ( zSig0 \| zSig1 ) == 0 ) zExp = 0;
				1002	}
				1003	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1004
				1005	}
				1006
				1007	/*----------------------------------------------------------------------------
				1008	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				1009	\| and significand formed by the concatenation of `zSig0' and `zSig1', and
				1010	\| returns the proper quadruple-precision floating-point value corresponding
				1011	\| to the abstract input. This routine is just like `roundAndPackFloat128'
				1012	\| except that the input significand has fewer bits and does not have to be
				1013	\| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
				1014	\| point exponent.
				1015	----------------------------------------------------------------------------/
				1016
				1017	static float128
				1018	normalizeRoundAndPackFloat128(
				1019	flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
				1020	{
				1021	int8 shiftCount;
				1022	bits64 zSig2;
				1023
				1024	if ( zSig0 == 0 ) {
				1025	zSig0 = zSig1;
				1026	zSig1 = 0;
				1027	zExp -= 64;
				1028	}
				1029	shiftCount = countLeadingZeros64( zSig0 ) - 15;
				1030	if ( 0 <= shiftCount ) {
				1031	zSig2 = 0;
				1032	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1033	}
				1034	else {
				1035	shift128ExtraRightJamming(
				1036	zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
				1037	}
				1038	zExp -= shiftCount;
				1039	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
				1040
				1041	}
				1042
				1043	#endif
				1044
				1045	/*----------------------------------------------------------------------------
				1046	\| Returns the result of converting the 32-bit two's complement integer `a'
				1047	\| to the single-precision floating-point format. The conversion is performed
				1048	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1049	----------------------------------------------------------------------------/
				1050
				1051	float32 int32_to_float32( int32 a STATUS_PARAM )
				1052	{
				1053	flag zSign;
				1054
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1055	if ( a == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1056	if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
				1057	zSign = ( a < 0 );
				1058	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
				1059
				1060	}
				1061
				1062	/*----------------------------------------------------------------------------
				1063	\| Returns the result of converting the 32-bit two's complement integer `a'
				1064	\| to the double-precision floating-point format. The conversion is performed
				1065	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1066	----------------------------------------------------------------------------/
				1067
				1068	float64 int32_to_float64( int32 a STATUS_PARAM )
				1069	{
				1070	flag zSign;
				1071	uint32 absA;
				1072	int8 shiftCount;
				1073	bits64 zSig;
				1074
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1075	if ( a == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1076	zSign = ( a < 0 );
				1077	absA = zSign ? - a : a;
				1078	shiftCount = countLeadingZeros32( absA ) + 21;
				1079	zSig = absA;
				1080	return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
				1081
				1082	}
				1083
				1084	#ifdef FLOATX80
				1085
				1086	/*----------------------------------------------------------------------------
				1087	\| Returns the result of converting the 32-bit two's complement integer `a'
				1088	\| to the extended double-precision floating-point format. The conversion
				1089	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1090	\| Arithmetic.
				1091	----------------------------------------------------------------------------/
				1092
				1093	floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
				1094	{
				1095	flag zSign;
				1096	uint32 absA;
				1097	int8 shiftCount;
				1098	bits64 zSig;
				1099
				1100	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1101	zSign = ( a < 0 );
				1102	absA = zSign ? - a : a;
				1103	shiftCount = countLeadingZeros32( absA ) + 32;
				1104	zSig = absA;
				1105	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
				1106
				1107	}
				1108
				1109	#endif
				1110
				1111	#ifdef FLOAT128
				1112
				1113	/*----------------------------------------------------------------------------
				1114	\| Returns the result of converting the 32-bit two's complement integer `a' to
				1115	\| the quadruple-precision floating-point format. The conversion is performed
				1116	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1117	----------------------------------------------------------------------------/
				1118
				1119	float128 int32_to_float128( int32 a STATUS_PARAM )
				1120	{
				1121	flag zSign;
				1122	uint32 absA;
				1123	int8 shiftCount;
				1124	bits64 zSig0;
				1125
				1126	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1127	zSign = ( a < 0 );
				1128	absA = zSign ? - a : a;
				1129	shiftCount = countLeadingZeros32( absA ) + 17;
				1130	zSig0 = absA;
				1131	return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
				1132
				1133	}
				1134
				1135	#endif
				1136
				1137	/*----------------------------------------------------------------------------
				1138	\| Returns the result of converting the 64-bit two's complement integer `a'
				1139	\| to the single-precision floating-point format. The conversion is performed
				1140	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1141	----------------------------------------------------------------------------/
				1142
				1143	float32 int64_to_float32( int64 a STATUS_PARAM )
				1144	{
				1145	flag zSign;
				1146	uint64 absA;
				1147	int8 shiftCount;
				1148
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1149	if ( a == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1150	zSign = ( a < 0 );
				1151	absA = zSign ? - a : a;
				1152	shiftCount = countLeadingZeros64( absA ) - 40;
				1153	if ( 0 <= shiftCount ) {
				1154	return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
				1155	}
				1156	else {
				1157	shiftCount += 7;
				1158	if ( shiftCount < 0 ) {
				1159	shift64RightJamming( absA, - shiftCount, &absA );
				1160	}
				1161	else {
				1162	absA <<= shiftCount;
				1163	}
				1164	return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
				1165	}
				1166
				1167	}
				1168
j_mayer	3430b0b	2007-03-20 22:25:37 +0000	[diff] [blame]	1169	float32 uint64_to_float32( uint64 a STATUS_PARAM )
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1170	{
				1171	int8 shiftCount;
				1172
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1173	if ( a == 0 ) return float32_zero;
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1174	shiftCount = countLeadingZeros64( a ) - 40;
				1175	if ( 0 <= shiftCount ) {
				1176	return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
				1177	}
				1178	else {
				1179	shiftCount += 7;
				1180	if ( shiftCount < 0 ) {
				1181	shift64RightJamming( a, - shiftCount, &a );
				1182	}
				1183	else {
				1184	a <<= shiftCount;
				1185	}
				1186	return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
				1187	}
				1188	}
				1189
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1190	/*----------------------------------------------------------------------------
				1191	\| Returns the result of converting the 64-bit two's complement integer `a'
				1192	\| to the double-precision floating-point format. The conversion is performed
				1193	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1194	----------------------------------------------------------------------------/
				1195
				1196	float64 int64_to_float64( int64 a STATUS_PARAM )
				1197	{
				1198	flag zSign;
				1199
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1200	if ( a == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1201	if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
				1202	return packFloat64( 1, 0x43E, 0 );
				1203	}
				1204	zSign = ( a < 0 );
				1205	return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
				1206
				1207	}
				1208
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1209	float64 uint64_to_float64( uint64 a STATUS_PARAM )
				1210	{
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1211	if ( a == 0 ) return float64_zero;
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1212	return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
				1213
				1214	}
				1215
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1216	#ifdef FLOATX80
				1217
				1218	/*----------------------------------------------------------------------------
				1219	\| Returns the result of converting the 64-bit two's complement integer `a'
				1220	\| to the extended double-precision floating-point format. The conversion
				1221	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1222	\| Arithmetic.
				1223	----------------------------------------------------------------------------/
				1224
				1225	floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
				1226	{
				1227	flag zSign;
				1228	uint64 absA;
				1229	int8 shiftCount;
				1230
				1231	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1232	zSign = ( a < 0 );
				1233	absA = zSign ? - a : a;
				1234	shiftCount = countLeadingZeros64( absA );
				1235	return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
				1236
				1237	}
				1238
				1239	#endif
				1240
				1241	#ifdef FLOAT128
				1242
				1243	/*----------------------------------------------------------------------------
				1244	\| Returns the result of converting the 64-bit two's complement integer `a' to
				1245	\| the quadruple-precision floating-point format. The conversion is performed
				1246	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1247	----------------------------------------------------------------------------/
				1248
				1249	float128 int64_to_float128( int64 a STATUS_PARAM )
				1250	{
				1251	flag zSign;
				1252	uint64 absA;
				1253	int8 shiftCount;
				1254	int32 zExp;
				1255	bits64 zSig0, zSig1;
				1256
				1257	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1258	zSign = ( a < 0 );
				1259	absA = zSign ? - a : a;
				1260	shiftCount = countLeadingZeros64( absA ) + 49;
				1261	zExp = 0x406E - shiftCount;
				1262	if ( 64 <= shiftCount ) {
				1263	zSig1 = 0;
				1264	zSig0 = absA;
				1265	shiftCount -= 64;
				1266	}
				1267	else {
				1268	zSig1 = absA;
				1269	zSig0 = 0;
				1270	}
				1271	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1272	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1273
				1274	}
				1275
				1276	#endif
				1277
				1278	/*----------------------------------------------------------------------------
				1279	\| Returns the result of converting the single-precision floating-point value
				1280	\| `a' to the 32-bit two's complement integer format. The conversion is
				1281	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1282	\| Arithmetic---which means in particular that the conversion is rounded
				1283	\| according to the current rounding mode. If `a' is a NaN, the largest
				1284	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1285	\| largest integer with the same sign as `a' is returned.
				1286	----------------------------------------------------------------------------/
				1287
				1288	int32 float32_to_int32( float32 a STATUS_PARAM )
				1289	{
				1290	flag aSign;
				1291	int16 aExp, shiftCount;
				1292	bits32 aSig;
				1293	bits64 aSig64;
				1294
				1295	aSig = extractFloat32Frac( a );
				1296	aExp = extractFloat32Exp( a );
				1297	aSign = extractFloat32Sign( a );
				1298	if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
				1299	if ( aExp ) aSig \|= 0x00800000;
				1300	shiftCount = 0xAF - aExp;
				1301	aSig64 = aSig;
				1302	aSig64 <<= 32;
				1303	if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
				1304	return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
				1305
				1306	}
				1307
				1308	/*----------------------------------------------------------------------------
				1309	\| Returns the result of converting the single-precision floating-point value
				1310	\| `a' to the 32-bit two's complement integer format. The conversion is
				1311	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1312	\| Arithmetic, except that the conversion is always rounded toward zero.
				1313	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				1314	\| the conversion overflows, the largest integer with the same sign as `a' is
				1315	\| returned.
				1316	----------------------------------------------------------------------------/
				1317
				1318	int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
				1319	{
				1320	flag aSign;
				1321	int16 aExp, shiftCount;
				1322	bits32 aSig;
				1323	int32 z;
				1324
				1325	aSig = extractFloat32Frac( a );
				1326	aExp = extractFloat32Exp( a );
				1327	aSign = extractFloat32Sign( a );
				1328	shiftCount = aExp - 0x9E;
				1329	if ( 0 <= shiftCount ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1330	if ( float32_val(a) != 0xCF000000 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1331	float_raise( float_flag_invalid STATUS_VAR);
				1332	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
				1333	}
				1334	return (sbits32) 0x80000000;
				1335	}
				1336	else if ( aExp <= 0x7E ) {
				1337	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1338	return 0;
				1339	}
				1340	aSig = ( aSig \| 0x00800000 )<<8;
				1341	z = aSig>>( - shiftCount );
				1342	if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
				1343	STATUS(float_exception_flags) \|= float_flag_inexact;
				1344	}
				1345	if ( aSign ) z = - z;
				1346	return z;
				1347
				1348	}
				1349
				1350	/*----------------------------------------------------------------------------
				1351	\| Returns the result of converting the single-precision floating-point value
				1352	\| `a' to the 64-bit two's complement integer format. The conversion is
				1353	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1354	\| Arithmetic---which means in particular that the conversion is rounded
				1355	\| according to the current rounding mode. If `a' is a NaN, the largest
				1356	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1357	\| largest integer with the same sign as `a' is returned.
				1358	----------------------------------------------------------------------------/
				1359
				1360	int64 float32_to_int64( float32 a STATUS_PARAM )
				1361	{
				1362	flag aSign;
				1363	int16 aExp, shiftCount;
				1364	bits32 aSig;
				1365	bits64 aSig64, aSigExtra;
				1366
				1367	aSig = extractFloat32Frac( a );
				1368	aExp = extractFloat32Exp( a );
				1369	aSign = extractFloat32Sign( a );
				1370	shiftCount = 0xBE - aExp;
				1371	if ( shiftCount < 0 ) {
				1372	float_raise( float_flag_invalid STATUS_VAR);
				1373	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1374	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1375	}
				1376	return (sbits64) LIT64( 0x8000000000000000 );
				1377	}
				1378	if ( aExp ) aSig \|= 0x00800000;
				1379	aSig64 = aSig;
				1380	aSig64 <<= 40;
				1381	shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
				1382	return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
				1383
				1384	}
				1385
				1386	/*----------------------------------------------------------------------------
				1387	\| Returns the result of converting the single-precision floating-point value
				1388	\| `a' to the 64-bit two's complement integer format. The conversion is
				1389	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1390	\| Arithmetic, except that the conversion is always rounded toward zero. If
				1391	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1392	\| conversion overflows, the largest integer with the same sign as `a' is
				1393	\| returned.
				1394	----------------------------------------------------------------------------/
				1395
				1396	int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
				1397	{
				1398	flag aSign;
				1399	int16 aExp, shiftCount;
				1400	bits32 aSig;
				1401	bits64 aSig64;
				1402	int64 z;
				1403
				1404	aSig = extractFloat32Frac( a );
				1405	aExp = extractFloat32Exp( a );
				1406	aSign = extractFloat32Sign( a );
				1407	shiftCount = aExp - 0xBE;
				1408	if ( 0 <= shiftCount ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1409	if ( float32_val(a) != 0xDF000000 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1410	float_raise( float_flag_invalid STATUS_VAR);
				1411	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1412	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1413	}
				1414	}
				1415	return (sbits64) LIT64( 0x8000000000000000 );
				1416	}
				1417	else if ( aExp <= 0x7E ) {
				1418	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1419	return 0;
				1420	}
				1421	aSig64 = aSig \| 0x00800000;
				1422	aSig64 <<= 40;
				1423	z = aSig64>>( - shiftCount );
				1424	if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
				1425	STATUS(float_exception_flags) \|= float_flag_inexact;
				1426	}
				1427	if ( aSign ) z = - z;
				1428	return z;
				1429
				1430	}
				1431
				1432	/*----------------------------------------------------------------------------
				1433	\| Returns the result of converting the single-precision floating-point value
				1434	\| `a' to the double-precision floating-point format. The conversion is
				1435	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1436	\| Arithmetic.
				1437	----------------------------------------------------------------------------/
				1438
				1439	float64 float32_to_float64( float32 a STATUS_PARAM )
				1440	{
				1441	flag aSign;
				1442	int16 aExp;
				1443	bits32 aSig;
				1444
				1445	aSig = extractFloat32Frac( a );
				1446	aExp = extractFloat32Exp( a );
				1447	aSign = extractFloat32Sign( a );
				1448	if ( aExp == 0xFF ) {
				1449	if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
				1450	return packFloat64( aSign, 0x7FF, 0 );
				1451	}
				1452	if ( aExp == 0 ) {
				1453	if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
				1454	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1455	--aExp;
				1456	}
				1457	return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
				1458
				1459	}
				1460
				1461	#ifdef FLOATX80
				1462
				1463	/*----------------------------------------------------------------------------
				1464	\| Returns the result of converting the single-precision floating-point value
				1465	\| `a' to the extended double-precision floating-point format. The conversion
				1466	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1467	\| Arithmetic.
				1468	----------------------------------------------------------------------------/
				1469
				1470	floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
				1471	{
				1472	flag aSign;
				1473	int16 aExp;
				1474	bits32 aSig;
				1475
				1476	aSig = extractFloat32Frac( a );
				1477	aExp = extractFloat32Exp( a );
				1478	aSign = extractFloat32Sign( a );
				1479	if ( aExp == 0xFF ) {
				1480	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
				1481	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				1482	}
				1483	if ( aExp == 0 ) {
				1484	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				1485	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1486	}
				1487	aSig \|= 0x00800000;
				1488	return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
				1489
				1490	}
				1491
				1492	#endif
				1493
				1494	#ifdef FLOAT128
				1495
				1496	/*----------------------------------------------------------------------------
				1497	\| Returns the result of converting the single-precision floating-point value
				1498	\| `a' to the double-precision floating-point format. The conversion is
				1499	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1500	\| Arithmetic.
				1501	----------------------------------------------------------------------------/
				1502
				1503	float128 float32_to_float128( float32 a STATUS_PARAM )
				1504	{
				1505	flag aSign;
				1506	int16 aExp;
				1507	bits32 aSig;
				1508
				1509	aSig = extractFloat32Frac( a );
				1510	aExp = extractFloat32Exp( a );
				1511	aSign = extractFloat32Sign( a );
				1512	if ( aExp == 0xFF ) {
				1513	if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
				1514	return packFloat128( aSign, 0x7FFF, 0, 0 );
				1515	}
				1516	if ( aExp == 0 ) {
				1517	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				1518	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1519	--aExp;
				1520	}
				1521	return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
				1522
				1523	}
				1524
				1525	#endif
				1526
				1527	/*----------------------------------------------------------------------------
				1528	\| Rounds the single-precision floating-point value `a' to an integer, and
				1529	\| returns the result as a single-precision floating-point value. The
				1530	\| operation is performed according to the IEC/IEEE Standard for Binary
				1531	\| Floating-Point Arithmetic.
				1532	----------------------------------------------------------------------------/
				1533
				1534	float32 float32_round_to_int( float32 a STATUS_PARAM)
				1535	{
				1536	flag aSign;
				1537	int16 aExp;
				1538	bits32 lastBitMask, roundBitsMask;
				1539	int8 roundingMode;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1540	bits32 z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1541
				1542	aExp = extractFloat32Exp( a );
				1543	if ( 0x96 <= aExp ) {
				1544	if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
				1545	return propagateFloat32NaN( a, a STATUS_VAR );
				1546	}
				1547	return a;
				1548	}
				1549	if ( aExp <= 0x7E ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1550	if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1551	STATUS(float_exception_flags) \|= float_flag_inexact;
				1552	aSign = extractFloat32Sign( a );
				1553	switch ( STATUS(float_rounding_mode) ) {
				1554	case float_round_nearest_even:
				1555	if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
				1556	return packFloat32( aSign, 0x7F, 0 );
				1557	}
				1558	break;
				1559	case float_round_down:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1560	return make_float32(aSign ? 0xBF800000 : 0);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1561	case float_round_up:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1562	return make_float32(aSign ? 0x80000000 : 0x3F800000);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1563	}
				1564	return packFloat32( aSign, 0, 0 );
				1565	}
				1566	lastBitMask = 1;
				1567	lastBitMask <<= 0x96 - aExp;
				1568	roundBitsMask = lastBitMask - 1;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1569	z = float32_val(a);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1570	roundingMode = STATUS(float_rounding_mode);
				1571	if ( roundingMode == float_round_nearest_even ) {
				1572	z += lastBitMask>>1;
				1573	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				1574	}
				1575	else if ( roundingMode != float_round_to_zero ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1576	if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1577	z += roundBitsMask;
				1578	}
				1579	}
				1580	z &= ~ roundBitsMask;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1581	if ( z != float32_val(a) ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1582	return make_float32(z);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1583
				1584	}
				1585
				1586	/*----------------------------------------------------------------------------
				1587	\| Returns the result of adding the absolute values of the single-precision
				1588	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				1589	\| before being returned. `zSign' is ignored if the result is a NaN.
				1590	\| The addition is performed according to the IEC/IEEE Standard for Binary
				1591	\| Floating-Point Arithmetic.
				1592	----------------------------------------------------------------------------/
				1593
				1594	static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1595	{
				1596	int16 aExp, bExp, zExp;
				1597	bits32 aSig, bSig, zSig;
				1598	int16 expDiff;
				1599
				1600	aSig = extractFloat32Frac( a );
				1601	aExp = extractFloat32Exp( a );
				1602	bSig = extractFloat32Frac( b );
				1603	bExp = extractFloat32Exp( b );
				1604	expDiff = aExp - bExp;
				1605	aSig <<= 6;
				1606	bSig <<= 6;
				1607	if ( 0 < expDiff ) {
				1608	if ( aExp == 0xFF ) {
				1609	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1610	return a;
				1611	}
				1612	if ( bExp == 0 ) {
				1613	--expDiff;
				1614	}
				1615	else {
				1616	bSig \|= 0x20000000;
				1617	}
				1618	shift32RightJamming( bSig, expDiff, &bSig );
				1619	zExp = aExp;
				1620	}
				1621	else if ( expDiff < 0 ) {
				1622	if ( bExp == 0xFF ) {
				1623	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1624	return packFloat32( zSign, 0xFF, 0 );
				1625	}
				1626	if ( aExp == 0 ) {
				1627	++expDiff;
				1628	}
				1629	else {
				1630	aSig \|= 0x20000000;
				1631	}
				1632	shift32RightJamming( aSig, - expDiff, &aSig );
				1633	zExp = bExp;
				1634	}
				1635	else {
				1636	if ( aExp == 0xFF ) {
				1637	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1638	return a;
				1639	}
				1640	if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
				1641	zSig = 0x40000000 + aSig + bSig;
				1642	zExp = aExp;
				1643	goto roundAndPack;
				1644	}
				1645	aSig \|= 0x20000000;
				1646	zSig = ( aSig + bSig )<<1;
				1647	--zExp;
				1648	if ( (sbits32) zSig < 0 ) {
				1649	zSig = aSig + bSig;
				1650	++zExp;
				1651	}
				1652	roundAndPack:
				1653	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1654
				1655	}
				1656
				1657	/*----------------------------------------------------------------------------
				1658	\| Returns the result of subtracting the absolute values of the single-
				1659	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				1660	\| difference is negated before being returned. `zSign' is ignored if the
				1661	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				1662	\| Standard for Binary Floating-Point Arithmetic.
				1663	----------------------------------------------------------------------------/
				1664
				1665	static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1666	{
				1667	int16 aExp, bExp, zExp;
				1668	bits32 aSig, bSig, zSig;
				1669	int16 expDiff;
				1670
				1671	aSig = extractFloat32Frac( a );
				1672	aExp = extractFloat32Exp( a );
				1673	bSig = extractFloat32Frac( b );
				1674	bExp = extractFloat32Exp( b );
				1675	expDiff = aExp - bExp;
				1676	aSig <<= 7;
				1677	bSig <<= 7;
				1678	if ( 0 < expDiff ) goto aExpBigger;
				1679	if ( expDiff < 0 ) goto bExpBigger;
				1680	if ( aExp == 0xFF ) {
				1681	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1682	float_raise( float_flag_invalid STATUS_VAR);
				1683	return float32_default_nan;
				1684	}
				1685	if ( aExp == 0 ) {
				1686	aExp = 1;
				1687	bExp = 1;
				1688	}
				1689	if ( bSig < aSig ) goto aBigger;
				1690	if ( aSig < bSig ) goto bBigger;
				1691	return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				1692	bExpBigger:
				1693	if ( bExp == 0xFF ) {
				1694	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1695	return packFloat32( zSign ^ 1, 0xFF, 0 );
				1696	}
				1697	if ( aExp == 0 ) {
				1698	++expDiff;
				1699	}
				1700	else {
				1701	aSig \|= 0x40000000;
				1702	}
				1703	shift32RightJamming( aSig, - expDiff, &aSig );
				1704	bSig \|= 0x40000000;
				1705	bBigger:
				1706	zSig = bSig - aSig;
				1707	zExp = bExp;
				1708	zSign ^= 1;
				1709	goto normalizeRoundAndPack;
				1710	aExpBigger:
				1711	if ( aExp == 0xFF ) {
				1712	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1713	return a;
				1714	}
				1715	if ( bExp == 0 ) {
				1716	--expDiff;
				1717	}
				1718	else {
				1719	bSig \|= 0x40000000;
				1720	}
				1721	shift32RightJamming( bSig, expDiff, &bSig );
				1722	aSig \|= 0x40000000;
				1723	aBigger:
				1724	zSig = aSig - bSig;
				1725	zExp = aExp;
				1726	normalizeRoundAndPack:
				1727	--zExp;
				1728	return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1729
				1730	}
				1731
				1732	/*----------------------------------------------------------------------------
				1733	\| Returns the result of adding the single-precision floating-point values `a'
				1734	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				1735	\| Binary Floating-Point Arithmetic.
				1736	----------------------------------------------------------------------------/
				1737
				1738	float32 float32_add( float32 a, float32 b STATUS_PARAM )
				1739	{
				1740	flag aSign, bSign;
				1741
				1742	aSign = extractFloat32Sign( a );
				1743	bSign = extractFloat32Sign( b );
				1744	if ( aSign == bSign ) {
				1745	return addFloat32Sigs( a, b, aSign STATUS_VAR);
				1746	}
				1747	else {
				1748	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				1749	}
				1750
				1751	}
				1752
				1753	/*----------------------------------------------------------------------------
				1754	\| Returns the result of subtracting the single-precision floating-point values
				1755	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1756	\| for Binary Floating-Point Arithmetic.
				1757	----------------------------------------------------------------------------/
				1758
				1759	float32 float32_sub( float32 a, float32 b STATUS_PARAM )
				1760	{
				1761	flag aSign, bSign;
				1762
				1763	aSign = extractFloat32Sign( a );
				1764	bSign = extractFloat32Sign( b );
				1765	if ( aSign == bSign ) {
				1766	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				1767	}
				1768	else {
				1769	return addFloat32Sigs( a, b, aSign STATUS_VAR );
				1770	}
				1771
				1772	}
				1773
				1774	/*----------------------------------------------------------------------------
				1775	\| Returns the result of multiplying the single-precision floating-point values
				1776	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1777	\| for Binary Floating-Point Arithmetic.
				1778	----------------------------------------------------------------------------/
				1779
				1780	float32 float32_mul( float32 a, float32 b STATUS_PARAM )
				1781	{
				1782	flag aSign, bSign, zSign;
				1783	int16 aExp, bExp, zExp;
				1784	bits32 aSig, bSig;
				1785	bits64 zSig64;
				1786	bits32 zSig;
				1787
				1788	aSig = extractFloat32Frac( a );
				1789	aExp = extractFloat32Exp( a );
				1790	aSign = extractFloat32Sign( a );
				1791	bSig = extractFloat32Frac( b );
				1792	bExp = extractFloat32Exp( b );
				1793	bSign = extractFloat32Sign( b );
				1794	zSign = aSign ^ bSign;
				1795	if ( aExp == 0xFF ) {
				1796	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1797	return propagateFloat32NaN( a, b STATUS_VAR );
				1798	}
				1799	if ( ( bExp \| bSig ) == 0 ) {
				1800	float_raise( float_flag_invalid STATUS_VAR);
				1801	return float32_default_nan;
				1802	}
				1803	return packFloat32( zSign, 0xFF, 0 );
				1804	}
				1805	if ( bExp == 0xFF ) {
				1806	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1807	if ( ( aExp \| aSig ) == 0 ) {
				1808	float_raise( float_flag_invalid STATUS_VAR);
				1809	return float32_default_nan;
				1810	}
				1811	return packFloat32( zSign, 0xFF, 0 );
				1812	}
				1813	if ( aExp == 0 ) {
				1814	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1815	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1816	}
				1817	if ( bExp == 0 ) {
				1818	if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
				1819	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1820	}
				1821	zExp = aExp + bExp - 0x7F;
				1822	aSig = ( aSig \| 0x00800000 )<<7;
				1823	bSig = ( bSig \| 0x00800000 )<<8;
				1824	shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
				1825	zSig = zSig64;
				1826	if ( 0 <= (sbits32) ( zSig<<1 ) ) {
				1827	zSig <<= 1;
				1828	--zExp;
				1829	}
				1830	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1831
				1832	}
				1833
				1834	/*----------------------------------------------------------------------------
				1835	\| Returns the result of dividing the single-precision floating-point value `a'
				1836	\| by the corresponding value `b'. The operation is performed according to the
				1837	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1838	----------------------------------------------------------------------------/
				1839
				1840	float32 float32_div( float32 a, float32 b STATUS_PARAM )
				1841	{
				1842	flag aSign, bSign, zSign;
				1843	int16 aExp, bExp, zExp;
				1844	bits32 aSig, bSig, zSig;
				1845
				1846	aSig = extractFloat32Frac( a );
				1847	aExp = extractFloat32Exp( a );
				1848	aSign = extractFloat32Sign( a );
				1849	bSig = extractFloat32Frac( b );
				1850	bExp = extractFloat32Exp( b );
				1851	bSign = extractFloat32Sign( b );
				1852	zSign = aSign ^ bSign;
				1853	if ( aExp == 0xFF ) {
				1854	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1855	if ( bExp == 0xFF ) {
				1856	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1857	float_raise( float_flag_invalid STATUS_VAR);
				1858	return float32_default_nan;
				1859	}
				1860	return packFloat32( zSign, 0xFF, 0 );
				1861	}
				1862	if ( bExp == 0xFF ) {
				1863	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1864	return packFloat32( zSign, 0, 0 );
				1865	}
				1866	if ( bExp == 0 ) {
				1867	if ( bSig == 0 ) {
				1868	if ( ( aExp \| aSig ) == 0 ) {
				1869	float_raise( float_flag_invalid STATUS_VAR);
				1870	return float32_default_nan;
				1871	}
				1872	float_raise( float_flag_divbyzero STATUS_VAR);
				1873	return packFloat32( zSign, 0xFF, 0 );
				1874	}
				1875	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1876	}
				1877	if ( aExp == 0 ) {
				1878	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1879	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1880	}
				1881	zExp = aExp - bExp + 0x7D;
				1882	aSig = ( aSig \| 0x00800000 )<<7;
				1883	bSig = ( bSig \| 0x00800000 )<<8;
				1884	if ( bSig <= ( aSig + aSig ) ) {
				1885	aSig >>= 1;
				1886	++zExp;
				1887	}
				1888	zSig = ( ( (bits64) aSig )<<32 ) / bSig;
				1889	if ( ( zSig & 0x3F ) == 0 ) {
				1890	zSig \|= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
				1891	}
				1892	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1893
				1894	}
				1895
				1896	/*----------------------------------------------------------------------------
				1897	\| Returns the remainder of the single-precision floating-point value `a'
				1898	\| with respect to the corresponding value `b'. The operation is performed
				1899	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1900	----------------------------------------------------------------------------/
				1901
				1902	float32 float32_rem( float32 a, float32 b STATUS_PARAM )
				1903	{
				1904	flag aSign, bSign, zSign;
				1905	int16 aExp, bExp, expDiff;
				1906	bits32 aSig, bSig;
				1907	bits32 q;
				1908	bits64 aSig64, bSig64, q64;
				1909	bits32 alternateASig;
				1910	sbits32 sigMean;
				1911
				1912	aSig = extractFloat32Frac( a );
				1913	aExp = extractFloat32Exp( a );
				1914	aSign = extractFloat32Sign( a );
				1915	bSig = extractFloat32Frac( b );
				1916	bExp = extractFloat32Exp( b );
				1917	bSign = extractFloat32Sign( b );
				1918	if ( aExp == 0xFF ) {
				1919	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1920	return propagateFloat32NaN( a, b STATUS_VAR );
				1921	}
				1922	float_raise( float_flag_invalid STATUS_VAR);
				1923	return float32_default_nan;
				1924	}
				1925	if ( bExp == 0xFF ) {
				1926	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1927	return a;
				1928	}
				1929	if ( bExp == 0 ) {
				1930	if ( bSig == 0 ) {
				1931	float_raise( float_flag_invalid STATUS_VAR);
				1932	return float32_default_nan;
				1933	}
				1934	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1935	}
				1936	if ( aExp == 0 ) {
				1937	if ( aSig == 0 ) return a;
				1938	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1939	}
				1940	expDiff = aExp - bExp;
				1941	aSig \|= 0x00800000;
				1942	bSig \|= 0x00800000;
				1943	if ( expDiff < 32 ) {
				1944	aSig <<= 8;
				1945	bSig <<= 8;
				1946	if ( expDiff < 0 ) {
				1947	if ( expDiff < -1 ) return a;
				1948	aSig >>= 1;
				1949	}
				1950	q = ( bSig <= aSig );
				1951	if ( q ) aSig -= bSig;
				1952	if ( 0 < expDiff ) {
				1953	q = ( ( (bits64) aSig )<<32 ) / bSig;
				1954	q >>= 32 - expDiff;
				1955	bSig >>= 2;
				1956	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				1957	}
				1958	else {
				1959	aSig >>= 2;
				1960	bSig >>= 2;
				1961	}
				1962	}
				1963	else {
				1964	if ( bSig <= aSig ) aSig -= bSig;
				1965	aSig64 = ( (bits64) aSig )<<40;
				1966	bSig64 = ( (bits64) bSig )<<40;
				1967	expDiff -= 64;
				1968	while ( 0 < expDiff ) {
				1969	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1970	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1971	aSig64 = - ( ( bSig * q64 )<<38 );
				1972	expDiff -= 62;
				1973	}
				1974	expDiff += 64;
				1975	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1976	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1977	q = q64>>( 64 - expDiff );
				1978	bSig <<= 6;
				1979	aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
				1980	}
				1981	do {
				1982	alternateASig = aSig;
				1983	++q;
				1984	aSig -= bSig;
				1985	} while ( 0 <= (sbits32) aSig );
				1986	sigMean = aSig + alternateASig;
				1987	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				1988	aSig = alternateASig;
				1989	}
				1990	zSign = ( (sbits32) aSig < 0 );
				1991	if ( zSign ) aSig = - aSig;
				1992	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
				1993
				1994	}
				1995
				1996	/*----------------------------------------------------------------------------
				1997	\| Returns the square root of the single-precision floating-point value `a'.
				1998	\| The operation is performed according to the IEC/IEEE Standard for Binary
				1999	\| Floating-Point Arithmetic.
				2000	----------------------------------------------------------------------------/
				2001
				2002	float32 float32_sqrt( float32 a STATUS_PARAM )
				2003	{
				2004	flag aSign;
				2005	int16 aExp, zExp;
				2006	bits32 aSig, zSig;
				2007	bits64 rem, term;
				2008
				2009	aSig = extractFloat32Frac( a );
				2010	aExp = extractFloat32Exp( a );
				2011	aSign = extractFloat32Sign( a );
				2012	if ( aExp == 0xFF ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2013	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2014	if ( ! aSign ) return a;
				2015	float_raise( float_flag_invalid STATUS_VAR);
				2016	return float32_default_nan;
				2017	}
				2018	if ( aSign ) {
				2019	if ( ( aExp \| aSig ) == 0 ) return a;
				2020	float_raise( float_flag_invalid STATUS_VAR);
				2021	return float32_default_nan;
				2022	}
				2023	if ( aExp == 0 ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2024	if ( aSig == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2025	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2026	}
				2027	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
				2028	aSig = ( aSig \| 0x00800000 )<<8;
				2029	zSig = estimateSqrt32( aExp, aSig ) + 2;
				2030	if ( ( zSig & 0x7F ) <= 5 ) {
				2031	if ( zSig < 2 ) {
				2032	zSig = 0x7FFFFFFF;
				2033	goto roundAndPack;
				2034	}
				2035	aSig >>= aExp & 1;
				2036	term = ( (bits64) zSig ) * zSig;
				2037	rem = ( ( (bits64) aSig )<<32 ) - term;
				2038	while ( (sbits64) rem < 0 ) {
				2039	--zSig;
				2040	rem += ( ( (bits64) zSig )<<1 ) \| 1;
				2041	}
				2042	zSig \|= ( rem != 0 );
				2043	}
				2044	shift32RightJamming( zSig, 1, &zSig );
				2045	roundAndPack:
				2046	return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
				2047
				2048	}
				2049
				2050	/*----------------------------------------------------------------------------
				2051	\| Returns 1 if the single-precision floating-point value `a' is equal to
				2052	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				2053	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2054	----------------------------------------------------------------------------/
				2055
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2056	int float32_eq( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2057	{
				2058
				2059	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2060	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2061	) {
				2062	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2063	float_raise( float_flag_invalid STATUS_VAR);
				2064	}
				2065	return 0;
				2066	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2067	return ( float32_val(a) == float32_val(b) ) \|\|
				2068	( (bits32) ( ( float32_val(a) \| float32_val(b) )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2069
				2070	}
				2071
				2072	/*----------------------------------------------------------------------------
				2073	\| Returns 1 if the single-precision floating-point value `a' is less than
				2074	\| or equal to the corresponding value `b', and 0 otherwise. The comparison
				2075	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				2076	\| Arithmetic.
				2077	----------------------------------------------------------------------------/
				2078
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2079	int float32_le( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2080	{
				2081	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2082	bits32 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2083
				2084	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2085	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2086	) {
				2087	float_raise( float_flag_invalid STATUS_VAR);
				2088	return 0;
				2089	}
				2090	aSign = extractFloat32Sign( a );
				2091	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2092	av = float32_val(a);
				2093	bv = float32_val(b);
				2094	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( av \| bv )<<1 ) == 0 );
				2095	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2096
				2097	}
				2098
				2099	/*----------------------------------------------------------------------------
				2100	\| Returns 1 if the single-precision floating-point value `a' is less than
				2101	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				2102	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2103	----------------------------------------------------------------------------/
				2104
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2105	int float32_lt( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2106	{
				2107	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2108	bits32 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2109
				2110	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2111	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2112	) {
				2113	float_raise( float_flag_invalid STATUS_VAR);
				2114	return 0;
				2115	}
				2116	aSign = extractFloat32Sign( a );
				2117	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2118	av = float32_val(a);
				2119	bv = float32_val(b);
				2120	if ( aSign != bSign ) return aSign && ( (bits32) ( ( av \| bv )<<1 ) != 0 );
				2121	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2122
				2123	}
				2124
				2125	/*----------------------------------------------------------------------------
				2126	\| Returns 1 if the single-precision floating-point value `a' is equal to
				2127	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				2128	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				2129	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2130	----------------------------------------------------------------------------/
				2131
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2132	int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2133	{
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2134	bits32 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2135
				2136	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2137	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2138	) {
				2139	float_raise( float_flag_invalid STATUS_VAR);
				2140	return 0;
				2141	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2142	av = float32_val(a);
				2143	bv = float32_val(b);
				2144	return ( av == bv ) \|\| ( (bits32) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2145
				2146	}
				2147
				2148	/*----------------------------------------------------------------------------
				2149	\| Returns 1 if the single-precision floating-point value `a' is less than or
				2150	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				2151	\| cause an exception. Otherwise, the comparison is performed according to the
				2152	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2153	----------------------------------------------------------------------------/
				2154
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2155	int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2156	{
				2157	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2158	bits32 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2159
				2160	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2161	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2162	) {
				2163	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2164	float_raise( float_flag_invalid STATUS_VAR);
				2165	}
				2166	return 0;
				2167	}
				2168	aSign = extractFloat32Sign( a );
				2169	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2170	av = float32_val(a);
				2171	bv = float32_val(b);
				2172	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( av \| bv )<<1 ) == 0 );
				2173	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2174
				2175	}
				2176
				2177	/*----------------------------------------------------------------------------
				2178	\| Returns 1 if the single-precision floating-point value `a' is less than
				2179	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2180	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				2181	\| Standard for Binary Floating-Point Arithmetic.
				2182	----------------------------------------------------------------------------/
				2183
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2184	int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2185	{
				2186	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2187	bits32 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2188
				2189	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2190	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2191	) {
				2192	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2193	float_raise( float_flag_invalid STATUS_VAR);
				2194	}
				2195	return 0;
				2196	}
				2197	aSign = extractFloat32Sign( a );
				2198	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2199	av = float32_val(a);
				2200	bv = float32_val(b);
				2201	if ( aSign != bSign ) return aSign && ( (bits32) ( ( av \| bv )<<1 ) != 0 );
				2202	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2203
				2204	}
				2205
				2206	/*----------------------------------------------------------------------------
				2207	\| Returns the result of converting the double-precision floating-point value
				2208	\| `a' to the 32-bit two's complement integer format. The conversion is
				2209	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2210	\| Arithmetic---which means in particular that the conversion is rounded
				2211	\| according to the current rounding mode. If `a' is a NaN, the largest
				2212	\| positive integer is returned. Otherwise, if the conversion overflows, the
				2213	\| largest integer with the same sign as `a' is returned.
				2214	----------------------------------------------------------------------------/
				2215
				2216	int32 float64_to_int32( float64 a STATUS_PARAM )
				2217	{
				2218	flag aSign;
				2219	int16 aExp, shiftCount;
				2220	bits64 aSig;
				2221
				2222	aSig = extractFloat64Frac( a );
				2223	aExp = extractFloat64Exp( a );
				2224	aSign = extractFloat64Sign( a );
				2225	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2226	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2227	shiftCount = 0x42C - aExp;
				2228	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				2229	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				2230
				2231	}
				2232
				2233	/*----------------------------------------------------------------------------
				2234	\| Returns the result of converting the double-precision floating-point value
				2235	\| `a' to the 32-bit two's complement integer format. The conversion is
				2236	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2237	\| Arithmetic, except that the conversion is always rounded toward zero.
				2238	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2239	\| the conversion overflows, the largest integer with the same sign as `a' is
				2240	\| returned.
				2241	----------------------------------------------------------------------------/
				2242
				2243	int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
				2244	{
				2245	flag aSign;
				2246	int16 aExp, shiftCount;
				2247	bits64 aSig, savedASig;
				2248	int32 z;
				2249
				2250	aSig = extractFloat64Frac( a );
				2251	aExp = extractFloat64Exp( a );
				2252	aSign = extractFloat64Sign( a );
				2253	if ( 0x41E < aExp ) {
				2254	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2255	goto invalid;
				2256	}
				2257	else if ( aExp < 0x3FF ) {
				2258	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2259	return 0;
				2260	}
				2261	aSig \|= LIT64( 0x0010000000000000 );
				2262	shiftCount = 0x433 - aExp;
				2263	savedASig = aSig;
				2264	aSig >>= shiftCount;
				2265	z = aSig;
				2266	if ( aSign ) z = - z;
				2267	if ( ( z < 0 ) ^ aSign ) {
				2268	invalid:
				2269	float_raise( float_flag_invalid STATUS_VAR);
				2270	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				2271	}
				2272	if ( ( aSig<<shiftCount ) != savedASig ) {
				2273	STATUS(float_exception_flags) \|= float_flag_inexact;
				2274	}
				2275	return z;
				2276
				2277	}
				2278
				2279	/*----------------------------------------------------------------------------
				2280	\| Returns the result of converting the double-precision floating-point value
				2281	\| `a' to the 64-bit two's complement integer format. The conversion is
				2282	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2283	\| Arithmetic---which means in particular that the conversion is rounded
				2284	\| according to the current rounding mode. If `a' is a NaN, the largest
				2285	\| positive integer is returned. Otherwise, if the conversion overflows, the
				2286	\| largest integer with the same sign as `a' is returned.
				2287	----------------------------------------------------------------------------/
				2288
				2289	int64 float64_to_int64( float64 a STATUS_PARAM )
				2290	{
				2291	flag aSign;
				2292	int16 aExp, shiftCount;
				2293	bits64 aSig, aSigExtra;
				2294
				2295	aSig = extractFloat64Frac( a );
				2296	aExp = extractFloat64Exp( a );
				2297	aSign = extractFloat64Sign( a );
				2298	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2299	shiftCount = 0x433 - aExp;
				2300	if ( shiftCount <= 0 ) {
				2301	if ( 0x43E < aExp ) {
				2302	float_raise( float_flag_invalid STATUS_VAR);
				2303	if ( ! aSign
				2304	\|\| ( ( aExp == 0x7FF )
				2305	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				2306	) {
				2307	return LIT64( 0x7FFFFFFFFFFFFFFF );
				2308	}
				2309	return (sbits64) LIT64( 0x8000000000000000 );
				2310	}
				2311	aSigExtra = 0;
				2312	aSig <<= - shiftCount;
				2313	}
				2314	else {
				2315	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				2316	}
				2317	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				2318
				2319	}
				2320
				2321	/*----------------------------------------------------------------------------
				2322	\| Returns the result of converting the double-precision floating-point value
				2323	\| `a' to the 64-bit two's complement integer format. The conversion is
				2324	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2325	\| Arithmetic, except that the conversion is always rounded toward zero.
				2326	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2327	\| the conversion overflows, the largest integer with the same sign as `a' is
				2328	\| returned.
				2329	----------------------------------------------------------------------------/
				2330
				2331	int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
				2332	{
				2333	flag aSign;
				2334	int16 aExp, shiftCount;
				2335	bits64 aSig;
				2336	int64 z;
				2337
				2338	aSig = extractFloat64Frac( a );
				2339	aExp = extractFloat64Exp( a );
				2340	aSign = extractFloat64Sign( a );
				2341	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2342	shiftCount = aExp - 0x433;
				2343	if ( 0 <= shiftCount ) {
				2344	if ( 0x43E <= aExp ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2345	if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2346	float_raise( float_flag_invalid STATUS_VAR);
				2347	if ( ! aSign
				2348	\|\| ( ( aExp == 0x7FF )
				2349	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				2350	) {
				2351	return LIT64( 0x7FFFFFFFFFFFFFFF );
				2352	}
				2353	}
				2354	return (sbits64) LIT64( 0x8000000000000000 );
				2355	}
				2356	z = aSig<<shiftCount;
				2357	}
				2358	else {
				2359	if ( aExp < 0x3FE ) {
				2360	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2361	return 0;
				2362	}
				2363	z = aSig>>( - shiftCount );
				2364	if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
				2365	STATUS(float_exception_flags) \|= float_flag_inexact;
				2366	}
				2367	}
				2368	if ( aSign ) z = - z;
				2369	return z;
				2370
				2371	}
				2372
				2373	/*----------------------------------------------------------------------------
				2374	\| Returns the result of converting the double-precision floating-point value
				2375	\| `a' to the single-precision floating-point format. The conversion is
				2376	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2377	\| Arithmetic.
				2378	----------------------------------------------------------------------------/
				2379
				2380	float32 float64_to_float32( float64 a STATUS_PARAM )
				2381	{
				2382	flag aSign;
				2383	int16 aExp;
				2384	bits64 aSig;
				2385	bits32 zSig;
				2386
				2387	aSig = extractFloat64Frac( a );
				2388	aExp = extractFloat64Exp( a );
				2389	aSign = extractFloat64Sign( a );
				2390	if ( aExp == 0x7FF ) {
				2391	if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
				2392	return packFloat32( aSign, 0xFF, 0 );
				2393	}
				2394	shift64RightJamming( aSig, 22, &aSig );
				2395	zSig = aSig;
				2396	if ( aExp \|\| zSig ) {
				2397	zSig \|= 0x40000000;
				2398	aExp -= 0x381;
				2399	}
				2400	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				2401
				2402	}
				2403
				2404	#ifdef FLOATX80
				2405
				2406	/*----------------------------------------------------------------------------
				2407	\| Returns the result of converting the double-precision floating-point value
				2408	\| `a' to the extended double-precision floating-point format. The conversion
				2409	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				2410	\| Arithmetic.
				2411	----------------------------------------------------------------------------/
				2412
				2413	floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
				2414	{
				2415	flag aSign;
				2416	int16 aExp;
				2417	bits64 aSig;
				2418
				2419	aSig = extractFloat64Frac( a );
				2420	aExp = extractFloat64Exp( a );
				2421	aSign = extractFloat64Sign( a );
				2422	if ( aExp == 0x7FF ) {
				2423	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
				2424	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2425	}
				2426	if ( aExp == 0 ) {
				2427	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				2428	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2429	}
				2430	return
				2431	packFloatx80(
				2432	aSign, aExp + 0x3C00, ( aSig \| LIT64( 0x0010000000000000 ) )<<11 );
				2433
				2434	}
				2435
				2436	#endif
				2437
				2438	#ifdef FLOAT128
				2439
				2440	/*----------------------------------------------------------------------------
				2441	\| Returns the result of converting the double-precision floating-point value
				2442	\| `a' to the quadruple-precision floating-point format. The conversion is
				2443	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2444	\| Arithmetic.
				2445	----------------------------------------------------------------------------/
				2446
				2447	float128 float64_to_float128( float64 a STATUS_PARAM )
				2448	{
				2449	flag aSign;
				2450	int16 aExp;
				2451	bits64 aSig, zSig0, zSig1;
				2452
				2453	aSig = extractFloat64Frac( a );
				2454	aExp = extractFloat64Exp( a );
				2455	aSign = extractFloat64Sign( a );
				2456	if ( aExp == 0x7FF ) {
				2457	if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
				2458	return packFloat128( aSign, 0x7FFF, 0, 0 );
				2459	}
				2460	if ( aExp == 0 ) {
				2461	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				2462	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2463	--aExp;
				2464	}
				2465	shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
				2466	return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
				2467
				2468	}
				2469
				2470	#endif
				2471
				2472	/*----------------------------------------------------------------------------
				2473	\| Rounds the double-precision floating-point value `a' to an integer, and
				2474	\| returns the result as a double-precision floating-point value. The
				2475	\| operation is performed according to the IEC/IEEE Standard for Binary
				2476	\| Floating-Point Arithmetic.
				2477	----------------------------------------------------------------------------/
				2478
				2479	float64 float64_round_to_int( float64 a STATUS_PARAM )
				2480	{
				2481	flag aSign;
				2482	int16 aExp;
				2483	bits64 lastBitMask, roundBitsMask;
				2484	int8 roundingMode;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2485	bits64 z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2486
				2487	aExp = extractFloat64Exp( a );
				2488	if ( 0x433 <= aExp ) {
				2489	if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
				2490	return propagateFloat64NaN( a, a STATUS_VAR );
				2491	}
				2492	return a;
				2493	}
				2494	if ( aExp < 0x3FF ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2495	if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2496	STATUS(float_exception_flags) \|= float_flag_inexact;
				2497	aSign = extractFloat64Sign( a );
				2498	switch ( STATUS(float_rounding_mode) ) {
				2499	case float_round_nearest_even:
				2500	if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
				2501	return packFloat64( aSign, 0x3FF, 0 );
				2502	}
				2503	break;
				2504	case float_round_down:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2505	return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2506	case float_round_up:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2507	return make_float64(
				2508	aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2509	}
				2510	return packFloat64( aSign, 0, 0 );
				2511	}
				2512	lastBitMask = 1;
				2513	lastBitMask <<= 0x433 - aExp;
				2514	roundBitsMask = lastBitMask - 1;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2515	z = float64_val(a);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2516	roundingMode = STATUS(float_rounding_mode);
				2517	if ( roundingMode == float_round_nearest_even ) {
				2518	z += lastBitMask>>1;
				2519	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				2520	}
				2521	else if ( roundingMode != float_round_to_zero ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2522	if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2523	z += roundBitsMask;
				2524	}
				2525	}
				2526	z &= ~ roundBitsMask;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2527	if ( z != float64_val(a) )
				2528	STATUS(float_exception_flags) \|= float_flag_inexact;
				2529	return make_float64(z);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2530
				2531	}
				2532
pbrook	e6e5906	2006-10-22 00:18:54 +0000	[diff] [blame]	2533	float64 float64_trunc_to_int( float64 a STATUS_PARAM)
				2534	{
				2535	int oldmode;
				2536	float64 res;
				2537	oldmode = STATUS(float_rounding_mode);
				2538	STATUS(float_rounding_mode) = float_round_to_zero;
				2539	res = float64_round_to_int(a STATUS_VAR);
				2540	STATUS(float_rounding_mode) = oldmode;
				2541	return res;
				2542	}
				2543
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2544	/*----------------------------------------------------------------------------
				2545	\| Returns the result of adding the absolute values of the double-precision
				2546	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				2547	\| before being returned. `zSign' is ignored if the result is a NaN.
				2548	\| The addition is performed according to the IEC/IEEE Standard for Binary
				2549	\| Floating-Point Arithmetic.
				2550	----------------------------------------------------------------------------/
				2551
				2552	static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				2553	{
				2554	int16 aExp, bExp, zExp;
				2555	bits64 aSig, bSig, zSig;
				2556	int16 expDiff;
				2557
				2558	aSig = extractFloat64Frac( a );
				2559	aExp = extractFloat64Exp( a );
				2560	bSig = extractFloat64Frac( b );
				2561	bExp = extractFloat64Exp( b );
				2562	expDiff = aExp - bExp;
				2563	aSig <<= 9;
				2564	bSig <<= 9;
				2565	if ( 0 < expDiff ) {
				2566	if ( aExp == 0x7FF ) {
				2567	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2568	return a;
				2569	}
				2570	if ( bExp == 0 ) {
				2571	--expDiff;
				2572	}
				2573	else {
				2574	bSig \|= LIT64( 0x2000000000000000 );
				2575	}
				2576	shift64RightJamming( bSig, expDiff, &bSig );
				2577	zExp = aExp;
				2578	}
				2579	else if ( expDiff < 0 ) {
				2580	if ( bExp == 0x7FF ) {
				2581	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2582	return packFloat64( zSign, 0x7FF, 0 );
				2583	}
				2584	if ( aExp == 0 ) {
				2585	++expDiff;
				2586	}
				2587	else {
				2588	aSig \|= LIT64( 0x2000000000000000 );
				2589	}
				2590	shift64RightJamming( aSig, - expDiff, &aSig );
				2591	zExp = bExp;
				2592	}
				2593	else {
				2594	if ( aExp == 0x7FF ) {
				2595	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2596	return a;
				2597	}
				2598	if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
				2599	zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
				2600	zExp = aExp;
				2601	goto roundAndPack;
				2602	}
				2603	aSig \|= LIT64( 0x2000000000000000 );
				2604	zSig = ( aSig + bSig )<<1;
				2605	--zExp;
				2606	if ( (sbits64) zSig < 0 ) {
				2607	zSig = aSig + bSig;
				2608	++zExp;
				2609	}
				2610	roundAndPack:
				2611	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				2612
				2613	}
				2614
				2615	/*----------------------------------------------------------------------------
				2616	\| Returns the result of subtracting the absolute values of the double-
				2617	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				2618	\| difference is negated before being returned. `zSign' is ignored if the
				2619	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				2620	\| Standard for Binary Floating-Point Arithmetic.
				2621	----------------------------------------------------------------------------/
				2622
				2623	static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				2624	{
				2625	int16 aExp, bExp, zExp;
				2626	bits64 aSig, bSig, zSig;
				2627	int16 expDiff;
				2628
				2629	aSig = extractFloat64Frac( a );
				2630	aExp = extractFloat64Exp( a );
				2631	bSig = extractFloat64Frac( b );
				2632	bExp = extractFloat64Exp( b );
				2633	expDiff = aExp - bExp;
				2634	aSig <<= 10;
				2635	bSig <<= 10;
				2636	if ( 0 < expDiff ) goto aExpBigger;
				2637	if ( expDiff < 0 ) goto bExpBigger;
				2638	if ( aExp == 0x7FF ) {
				2639	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2640	float_raise( float_flag_invalid STATUS_VAR);
				2641	return float64_default_nan;
				2642	}
				2643	if ( aExp == 0 ) {
				2644	aExp = 1;
				2645	bExp = 1;
				2646	}
				2647	if ( bSig < aSig ) goto aBigger;
				2648	if ( aSig < bSig ) goto bBigger;
				2649	return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				2650	bExpBigger:
				2651	if ( bExp == 0x7FF ) {
				2652	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2653	return packFloat64( zSign ^ 1, 0x7FF, 0 );
				2654	}
				2655	if ( aExp == 0 ) {
				2656	++expDiff;
				2657	}
				2658	else {
				2659	aSig \|= LIT64( 0x4000000000000000 );
				2660	}
				2661	shift64RightJamming( aSig, - expDiff, &aSig );
				2662	bSig \|= LIT64( 0x4000000000000000 );
				2663	bBigger:
				2664	zSig = bSig - aSig;
				2665	zExp = bExp;
				2666	zSign ^= 1;
				2667	goto normalizeRoundAndPack;
				2668	aExpBigger:
				2669	if ( aExp == 0x7FF ) {
				2670	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2671	return a;
				2672	}
				2673	if ( bExp == 0 ) {
				2674	--expDiff;
				2675	}
				2676	else {
				2677	bSig \|= LIT64( 0x4000000000000000 );
				2678	}
				2679	shift64RightJamming( bSig, expDiff, &bSig );
				2680	aSig \|= LIT64( 0x4000000000000000 );
				2681	aBigger:
				2682	zSig = aSig - bSig;
				2683	zExp = aExp;
				2684	normalizeRoundAndPack:
				2685	--zExp;
				2686	return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				2687
				2688	}
				2689
				2690	/*----------------------------------------------------------------------------
				2691	\| Returns the result of adding the double-precision floating-point values `a'
				2692	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				2693	\| Binary Floating-Point Arithmetic.
				2694	----------------------------------------------------------------------------/
				2695
				2696	float64 float64_add( float64 a, float64 b STATUS_PARAM )
				2697	{
				2698	flag aSign, bSign;
				2699
				2700	aSign = extractFloat64Sign( a );
				2701	bSign = extractFloat64Sign( b );
				2702	if ( aSign == bSign ) {
				2703	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				2704	}
				2705	else {
				2706	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				2707	}
				2708
				2709	}
				2710
				2711	/*----------------------------------------------------------------------------
				2712	\| Returns the result of subtracting the double-precision floating-point values
				2713	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2714	\| for Binary Floating-Point Arithmetic.
				2715	----------------------------------------------------------------------------/
				2716
				2717	float64 float64_sub( float64 a, float64 b STATUS_PARAM )
				2718	{
				2719	flag aSign, bSign;
				2720
				2721	aSign = extractFloat64Sign( a );
				2722	bSign = extractFloat64Sign( b );
				2723	if ( aSign == bSign ) {
				2724	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				2725	}
				2726	else {
				2727	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				2728	}
				2729
				2730	}
				2731
				2732	/*----------------------------------------------------------------------------
				2733	\| Returns the result of multiplying the double-precision floating-point values
				2734	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2735	\| for Binary Floating-Point Arithmetic.
				2736	----------------------------------------------------------------------------/
				2737
				2738	float64 float64_mul( float64 a, float64 b STATUS_PARAM )
				2739	{
				2740	flag aSign, bSign, zSign;
				2741	int16 aExp, bExp, zExp;
				2742	bits64 aSig, bSig, zSig0, zSig1;
				2743
				2744	aSig = extractFloat64Frac( a );
				2745	aExp = extractFloat64Exp( a );
				2746	aSign = extractFloat64Sign( a );
				2747	bSig = extractFloat64Frac( b );
				2748	bExp = extractFloat64Exp( b );
				2749	bSign = extractFloat64Sign( b );
				2750	zSign = aSign ^ bSign;
				2751	if ( aExp == 0x7FF ) {
				2752	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2753	return propagateFloat64NaN( a, b STATUS_VAR );
				2754	}
				2755	if ( ( bExp \| bSig ) == 0 ) {
				2756	float_raise( float_flag_invalid STATUS_VAR);
				2757	return float64_default_nan;
				2758	}
				2759	return packFloat64( zSign, 0x7FF, 0 );
				2760	}
				2761	if ( bExp == 0x7FF ) {
				2762	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2763	if ( ( aExp \| aSig ) == 0 ) {
				2764	float_raise( float_flag_invalid STATUS_VAR);
				2765	return float64_default_nan;
				2766	}
				2767	return packFloat64( zSign, 0x7FF, 0 );
				2768	}
				2769	if ( aExp == 0 ) {
				2770	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2771	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2772	}
				2773	if ( bExp == 0 ) {
				2774	if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
				2775	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2776	}
				2777	zExp = aExp + bExp - 0x3FF;
				2778	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2779	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2780	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				2781	zSig0 \|= ( zSig1 != 0 );
				2782	if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
				2783	zSig0 <<= 1;
				2784	--zExp;
				2785	}
				2786	return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
				2787
				2788	}
				2789
				2790	/*----------------------------------------------------------------------------
				2791	\| Returns the result of dividing the double-precision floating-point value `a'
				2792	\| by the corresponding value `b'. The operation is performed according to
				2793	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2794	----------------------------------------------------------------------------/
				2795
				2796	float64 float64_div( float64 a, float64 b STATUS_PARAM )
				2797	{
				2798	flag aSign, bSign, zSign;
				2799	int16 aExp, bExp, zExp;
				2800	bits64 aSig, bSig, zSig;
				2801	bits64 rem0, rem1;
				2802	bits64 term0, term1;
				2803
				2804	aSig = extractFloat64Frac( a );
				2805	aExp = extractFloat64Exp( a );
				2806	aSign = extractFloat64Sign( a );
				2807	bSig = extractFloat64Frac( b );
				2808	bExp = extractFloat64Exp( b );
				2809	bSign = extractFloat64Sign( b );
				2810	zSign = aSign ^ bSign;
				2811	if ( aExp == 0x7FF ) {
				2812	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2813	if ( bExp == 0x7FF ) {
				2814	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2815	float_raise( float_flag_invalid STATUS_VAR);
				2816	return float64_default_nan;
				2817	}
				2818	return packFloat64( zSign, 0x7FF, 0 );
				2819	}
				2820	if ( bExp == 0x7FF ) {
				2821	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2822	return packFloat64( zSign, 0, 0 );
				2823	}
				2824	if ( bExp == 0 ) {
				2825	if ( bSig == 0 ) {
				2826	if ( ( aExp \| aSig ) == 0 ) {
				2827	float_raise( float_flag_invalid STATUS_VAR);
				2828	return float64_default_nan;
				2829	}
				2830	float_raise( float_flag_divbyzero STATUS_VAR);
				2831	return packFloat64( zSign, 0x7FF, 0 );
				2832	}
				2833	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2834	}
				2835	if ( aExp == 0 ) {
				2836	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2837	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2838	}
				2839	zExp = aExp - bExp + 0x3FD;
				2840	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2841	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2842	if ( bSig <= ( aSig + aSig ) ) {
				2843	aSig >>= 1;
				2844	++zExp;
				2845	}
				2846	zSig = estimateDiv128To64( aSig, 0, bSig );
				2847	if ( ( zSig & 0x1FF ) <= 2 ) {
				2848	mul64To128( bSig, zSig, &term0, &term1 );
				2849	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				2850	while ( (sbits64) rem0 < 0 ) {
				2851	--zSig;
				2852	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				2853	}
				2854	zSig \|= ( rem1 != 0 );
				2855	}
				2856	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				2857
				2858	}
				2859
				2860	/*----------------------------------------------------------------------------
				2861	\| Returns the remainder of the double-precision floating-point value `a'
				2862	\| with respect to the corresponding value `b'. The operation is performed
				2863	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2864	----------------------------------------------------------------------------/
				2865
				2866	float64 float64_rem( float64 a, float64 b STATUS_PARAM )
				2867	{
				2868	flag aSign, bSign, zSign;
				2869	int16 aExp, bExp, expDiff;
				2870	bits64 aSig, bSig;
				2871	bits64 q, alternateASig;
				2872	sbits64 sigMean;
				2873
				2874	aSig = extractFloat64Frac( a );
				2875	aExp = extractFloat64Exp( a );
				2876	aSign = extractFloat64Sign( a );
				2877	bSig = extractFloat64Frac( b );
				2878	bExp = extractFloat64Exp( b );
				2879	bSign = extractFloat64Sign( b );
				2880	if ( aExp == 0x7FF ) {
				2881	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2882	return propagateFloat64NaN( a, b STATUS_VAR );
				2883	}
				2884	float_raise( float_flag_invalid STATUS_VAR);
				2885	return float64_default_nan;
				2886	}
				2887	if ( bExp == 0x7FF ) {
				2888	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2889	return a;
				2890	}
				2891	if ( bExp == 0 ) {
				2892	if ( bSig == 0 ) {
				2893	float_raise( float_flag_invalid STATUS_VAR);
				2894	return float64_default_nan;
				2895	}
				2896	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2897	}
				2898	if ( aExp == 0 ) {
				2899	if ( aSig == 0 ) return a;
				2900	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2901	}
				2902	expDiff = aExp - bExp;
				2903	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<11;
				2904	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2905	if ( expDiff < 0 ) {
				2906	if ( expDiff < -1 ) return a;
				2907	aSig >>= 1;
				2908	}
				2909	q = ( bSig <= aSig );
				2910	if ( q ) aSig -= bSig;
				2911	expDiff -= 64;
				2912	while ( 0 < expDiff ) {
				2913	q = estimateDiv128To64( aSig, 0, bSig );
				2914	q = ( 2 < q ) ? q - 2 : 0;
				2915	aSig = - ( ( bSig>>2 ) * q );
				2916	expDiff -= 62;
				2917	}
				2918	expDiff += 64;
				2919	if ( 0 < expDiff ) {
				2920	q = estimateDiv128To64( aSig, 0, bSig );
				2921	q = ( 2 < q ) ? q - 2 : 0;
				2922	q >>= 64 - expDiff;
				2923	bSig >>= 2;
				2924	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				2925	}
				2926	else {
				2927	aSig >>= 2;
				2928	bSig >>= 2;
				2929	}
				2930	do {
				2931	alternateASig = aSig;
				2932	++q;
				2933	aSig -= bSig;
				2934	} while ( 0 <= (sbits64) aSig );
				2935	sigMean = aSig + alternateASig;
				2936	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				2937	aSig = alternateASig;
				2938	}
				2939	zSign = ( (sbits64) aSig < 0 );
				2940	if ( zSign ) aSig = - aSig;
				2941	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
				2942
				2943	}
				2944
				2945	/*----------------------------------------------------------------------------
				2946	\| Returns the square root of the double-precision floating-point value `a'.
				2947	\| The operation is performed according to the IEC/IEEE Standard for Binary
				2948	\| Floating-Point Arithmetic.
				2949	----------------------------------------------------------------------------/
				2950
				2951	float64 float64_sqrt( float64 a STATUS_PARAM )
				2952	{
				2953	flag aSign;
				2954	int16 aExp, zExp;
				2955	bits64 aSig, zSig, doubleZSig;
				2956	bits64 rem0, rem1, term0, term1;
				2957
				2958	aSig = extractFloat64Frac( a );
				2959	aExp = extractFloat64Exp( a );
				2960	aSign = extractFloat64Sign( a );
				2961	if ( aExp == 0x7FF ) {
				2962	if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
				2963	if ( ! aSign ) return a;
				2964	float_raise( float_flag_invalid STATUS_VAR);
				2965	return float64_default_nan;
				2966	}
				2967	if ( aSign ) {
				2968	if ( ( aExp \| aSig ) == 0 ) return a;
				2969	float_raise( float_flag_invalid STATUS_VAR);
				2970	return float64_default_nan;
				2971	}
				2972	if ( aExp == 0 ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2973	if ( aSig == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2974	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2975	}
				2976	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
				2977	aSig \|= LIT64( 0x0010000000000000 );
				2978	zSig = estimateSqrt32( aExp, aSig>>21 );
				2979	aSig <<= 9 - ( aExp & 1 );
				2980	zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
				2981	if ( ( zSig & 0x1FF ) <= 5 ) {
				2982	doubleZSig = zSig<<1;
				2983	mul64To128( zSig, zSig, &term0, &term1 );
				2984	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				2985	while ( (sbits64) rem0 < 0 ) {
				2986	--zSig;
				2987	doubleZSig -= 2;
				2988	add128( rem0, rem1, zSig>>63, doubleZSig \| 1, &rem0, &rem1 );
				2989	}
				2990	zSig \|= ( ( rem0 \| rem1 ) != 0 );
				2991	}
				2992	return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
				2993
				2994	}
				2995
				2996	/*----------------------------------------------------------------------------
				2997	\| Returns 1 if the double-precision floating-point value `a' is equal to the
				2998	\| corresponding value `b', and 0 otherwise. The comparison is performed
				2999	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3000	----------------------------------------------------------------------------/
				3001
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3002	int float64_eq( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3003	{
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3004	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3005
				3006	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3007	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3008	) {
				3009	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				3010	float_raise( float_flag_invalid STATUS_VAR);
				3011	}
				3012	return 0;
				3013	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3014	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3015	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3016	return ( av == bv ) \|\| ( (bits64) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3017
				3018	}
				3019
				3020	/*----------------------------------------------------------------------------
				3021	\| Returns 1 if the double-precision floating-point value `a' is less than or
				3022	\| equal to the corresponding value `b', and 0 otherwise. The comparison is
				3023	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				3024	\| Arithmetic.
				3025	----------------------------------------------------------------------------/
				3026
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3027	int float64_le( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3028	{
				3029	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3030	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3031
				3032	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3033	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3034	) {
				3035	float_raise( float_flag_invalid STATUS_VAR);
				3036	return 0;
				3037	}
				3038	aSign = extractFloat64Sign( a );
				3039	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3040	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3041	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3042	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( av \| bv )<<1 ) == 0 );
				3043	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3044
				3045	}
				3046
				3047	/*----------------------------------------------------------------------------
				3048	\| Returns 1 if the double-precision floating-point value `a' is less than
				3049	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				3050	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3051	----------------------------------------------------------------------------/
				3052
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3053	int float64_lt( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3054	{
				3055	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3056	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3057
				3058	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3059	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3060	) {
				3061	float_raise( float_flag_invalid STATUS_VAR);
				3062	return 0;
				3063	}
				3064	aSign = extractFloat64Sign( a );
				3065	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3066	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3067	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3068	if ( aSign != bSign ) return aSign && ( (bits64) ( ( av \| bv )<<1 ) != 0 );
				3069	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3070
				3071	}
				3072
				3073	/*----------------------------------------------------------------------------
				3074	\| Returns 1 if the double-precision floating-point value `a' is equal to the
				3075	\| corresponding value `b', and 0 otherwise. The invalid exception is raised
				3076	\| if either operand is a NaN. Otherwise, the comparison is performed
				3077	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3078	----------------------------------------------------------------------------/
				3079
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3080	int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3081	{
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3082	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3083
				3084	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3085	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3086	) {
				3087	float_raise( float_flag_invalid STATUS_VAR);
				3088	return 0;
				3089	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3090	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3091	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3092	return ( av == bv ) \|\| ( (bits64) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3093
				3094	}
				3095
				3096	/*----------------------------------------------------------------------------
				3097	\| Returns 1 if the double-precision floating-point value `a' is less than or
				3098	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				3099	\| cause an exception. Otherwise, the comparison is performed according to the
				3100	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3101	----------------------------------------------------------------------------/
				3102
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3103	int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3104	{
				3105	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3106	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3107
				3108	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3109	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3110	) {
				3111	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				3112	float_raise( float_flag_invalid STATUS_VAR);
				3113	}
				3114	return 0;
				3115	}
				3116	aSign = extractFloat64Sign( a );
				3117	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3118	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3119	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3120	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( av \| bv )<<1 ) == 0 );
				3121	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3122
				3123	}
				3124
				3125	/*----------------------------------------------------------------------------
				3126	\| Returns 1 if the double-precision floating-point value `a' is less than
				3127	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				3128	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				3129	\| Standard for Binary Floating-Point Arithmetic.
				3130	----------------------------------------------------------------------------/
				3131
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3132	int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3133	{
				3134	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3135	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3136
				3137	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3138	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3139	) {
				3140	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				3141	float_raise( float_flag_invalid STATUS_VAR);
				3142	}
				3143	return 0;
				3144	}
				3145	aSign = extractFloat64Sign( a );
				3146	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3147	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3148	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3149	if ( aSign != bSign ) return aSign && ( (bits64) ( ( av \| bv )<<1 ) != 0 );
				3150	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3151
				3152	}
				3153
				3154	#ifdef FLOATX80
				3155
				3156	/*----------------------------------------------------------------------------
				3157	\| Returns the result of converting the extended double-precision floating-
				3158	\| point value `a' to the 32-bit two's complement integer format. The
				3159	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3160	\| Floating-Point Arithmetic---which means in particular that the conversion
				3161	\| is rounded according to the current rounding mode. If `a' is a NaN, the
				3162	\| largest positive integer is returned. Otherwise, if the conversion
				3163	\| overflows, the largest integer with the same sign as `a' is returned.
				3164	----------------------------------------------------------------------------/
				3165
				3166	int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
				3167	{
				3168	flag aSign;
				3169	int32 aExp, shiftCount;
				3170	bits64 aSig;
				3171
				3172	aSig = extractFloatx80Frac( a );
				3173	aExp = extractFloatx80Exp( a );
				3174	aSign = extractFloatx80Sign( a );
				3175	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				3176	shiftCount = 0x4037 - aExp;
				3177	if ( shiftCount <= 0 ) shiftCount = 1;
				3178	shift64RightJamming( aSig, shiftCount, &aSig );
				3179	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				3180
				3181	}
				3182
				3183	/*----------------------------------------------------------------------------
				3184	\| Returns the result of converting the extended double-precision floating-
				3185	\| point value `a' to the 32-bit two's complement integer format. The
				3186	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3187	\| Floating-Point Arithmetic, except that the conversion is always rounded
				3188	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				3189	\| Otherwise, if the conversion overflows, the largest integer with the same
				3190	\| sign as `a' is returned.
				3191	----------------------------------------------------------------------------/
				3192
				3193	int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
				3194	{
				3195	flag aSign;
				3196	int32 aExp, shiftCount;
				3197	bits64 aSig, savedASig;
				3198	int32 z;
				3199
				3200	aSig = extractFloatx80Frac( a );
				3201	aExp = extractFloatx80Exp( a );
				3202	aSign = extractFloatx80Sign( a );
				3203	if ( 0x401E < aExp ) {
				3204	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				3205	goto invalid;
				3206	}
				3207	else if ( aExp < 0x3FFF ) {
				3208	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				3209	return 0;
				3210	}
				3211	shiftCount = 0x403E - aExp;
				3212	savedASig = aSig;
				3213	aSig >>= shiftCount;
				3214	z = aSig;
				3215	if ( aSign ) z = - z;
				3216	if ( ( z < 0 ) ^ aSign ) {
				3217	invalid:
				3218	float_raise( float_flag_invalid STATUS_VAR);
				3219	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				3220	}
				3221	if ( ( aSig<<shiftCount ) != savedASig ) {
				3222	STATUS(float_exception_flags) \|= float_flag_inexact;
				3223	}
				3224	return z;
				3225
				3226	}
				3227
				3228	/*----------------------------------------------------------------------------
				3229	\| Returns the result of converting the extended double-precision floating-
				3230	\| point value `a' to the 64-bit two's complement integer format. The
				3231	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3232	\| Floating-Point Arithmetic---which means in particular that the conversion
				3233	\| is rounded according to the current rounding mode. If `a' is a NaN,
				3234	\| the largest positive integer is returned. Otherwise, if the conversion
				3235	\| overflows, the largest integer with the same sign as `a' is returned.
				3236	----------------------------------------------------------------------------/
				3237
				3238	int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
				3239	{
				3240	flag aSign;
				3241	int32 aExp, shiftCount;
				3242	bits64 aSig, aSigExtra;
				3243
				3244	aSig = extractFloatx80Frac( a );
				3245	aExp = extractFloatx80Exp( a );
				3246	aSign = extractFloatx80Sign( a );
				3247	shiftCount = 0x403E - aExp;
				3248	if ( shiftCount <= 0 ) {
				3249	if ( shiftCount ) {
				3250	float_raise( float_flag_invalid STATUS_VAR);
				3251	if ( ! aSign
				3252	\|\| ( ( aExp == 0x7FFF )
				3253	&& ( aSig != LIT64( 0x8000000000000000 ) ) )
				3254	) {
				3255	return LIT64( 0x7FFFFFFFFFFFFFFF );
				3256	}
				3257	return (sbits64) LIT64( 0x8000000000000000 );
				3258	}
				3259	aSigExtra = 0;
				3260	}
				3261	else {
				3262	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				3263	}
				3264	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				3265
				3266	}
				3267
				3268	/*----------------------------------------------------------------------------
				3269	\| Returns the result of converting the extended double-precision floating-
				3270	\| point value `a' to the 64-bit two's complement integer format. The
				3271	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3272	\| Floating-Point Arithmetic, except that the conversion is always rounded
				3273	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				3274	\| Otherwise, if the conversion overflows, the largest integer with the same
				3275	\| sign as `a' is returned.
				3276	----------------------------------------------------------------------------/
				3277
				3278	int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
				3279	{
				3280	flag aSign;
				3281	int32 aExp, shiftCount;
				3282	bits64 aSig;
				3283	int64 z;
				3284
				3285	aSig = extractFloatx80Frac( a );
				3286	aExp = extractFloatx80Exp( a );
				3287	aSign = extractFloatx80Sign( a );
				3288	shiftCount = aExp - 0x403E;
				3289	if ( 0 <= shiftCount ) {
				3290	aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
				3291	if ( ( a.high != 0xC03E ) \|\| aSig ) {
				3292	float_raise( float_flag_invalid STATUS_VAR);
				3293	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && aSig ) ) {
				3294	return LIT64( 0x7FFFFFFFFFFFFFFF );
				3295	}
				3296	}
				3297	return (sbits64) LIT64( 0x8000000000000000 );
				3298	}
				3299	else if ( aExp < 0x3FFF ) {
				3300	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				3301	return 0;
				3302	}
				3303	z = aSig>>( - shiftCount );
				3304	if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
				3305	STATUS(float_exception_flags) \|= float_flag_inexact;
				3306	}
				3307	if ( aSign ) z = - z;
				3308	return z;
				3309
				3310	}
				3311
				3312	/*----------------------------------------------------------------------------
				3313	\| Returns the result of converting the extended double-precision floating-
				3314	\| point value `a' to the single-precision floating-point format. The
				3315	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3316	\| Floating-Point Arithmetic.
				3317	----------------------------------------------------------------------------/
				3318
				3319	float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
				3320	{
				3321	flag aSign;
				3322	int32 aExp;
				3323	bits64 aSig;
				3324
				3325	aSig = extractFloatx80Frac( a );
				3326	aExp = extractFloatx80Exp( a );
				3327	aSign = extractFloatx80Sign( a );
				3328	if ( aExp == 0x7FFF ) {
				3329	if ( (bits64) ( aSig<<1 ) ) {
				3330	return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
				3331	}
				3332	return packFloat32( aSign, 0xFF, 0 );
				3333	}
				3334	shift64RightJamming( aSig, 33, &aSig );
				3335	if ( aExp \|\| aSig ) aExp -= 0x3F81;
				3336	return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
				3337
				3338	}
				3339
				3340	/*----------------------------------------------------------------------------
				3341	\| Returns the result of converting the extended double-precision floating-
				3342	\| point value `a' to the double-precision floating-point format. The
				3343	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3344	\| Floating-Point Arithmetic.
				3345	----------------------------------------------------------------------------/
				3346
				3347	float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
				3348	{
				3349	flag aSign;
				3350	int32 aExp;
				3351	bits64 aSig, zSig;
				3352
				3353	aSig = extractFloatx80Frac( a );
				3354	aExp = extractFloatx80Exp( a );
				3355	aSign = extractFloatx80Sign( a );
				3356	if ( aExp == 0x7FFF ) {
				3357	if ( (bits64) ( aSig<<1 ) ) {
				3358	return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
				3359	}
				3360	return packFloat64( aSign, 0x7FF, 0 );
				3361	}
				3362	shift64RightJamming( aSig, 1, &zSig );
				3363	if ( aExp \|\| aSig ) aExp -= 0x3C01;
				3364	return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
				3365
				3366	}
				3367
				3368	#ifdef FLOAT128
				3369
				3370	/*----------------------------------------------------------------------------
				3371	\| Returns the result of converting the extended double-precision floating-
				3372	\| point value `a' to the quadruple-precision floating-point format. The
				3373	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3374	\| Floating-Point Arithmetic.
				3375	----------------------------------------------------------------------------/
				3376
				3377	float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
				3378	{
				3379	flag aSign;
				3380	int16 aExp;
				3381	bits64 aSig, zSig0, zSig1;
				3382
				3383	aSig = extractFloatx80Frac( a );
				3384	aExp = extractFloatx80Exp( a );
				3385	aSign = extractFloatx80Sign( a );
				3386	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
				3387	return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
				3388	}
				3389	shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
				3390	return packFloat128( aSign, aExp, zSig0, zSig1 );
				3391
				3392	}
				3393
				3394	#endif
				3395
				3396	/*----------------------------------------------------------------------------
				3397	\| Rounds the extended double-precision floating-point value `a' to an integer,
				3398	\| and returns the result as an extended quadruple-precision floating-point
				3399	\| value. The operation is performed according to the IEC/IEEE Standard for
				3400	\| Binary Floating-Point Arithmetic.
				3401	----------------------------------------------------------------------------/
				3402
				3403	floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
				3404	{
				3405	flag aSign;
				3406	int32 aExp;
				3407	bits64 lastBitMask, roundBitsMask;
				3408	int8 roundingMode;
				3409	floatx80 z;
				3410
				3411	aExp = extractFloatx80Exp( a );
				3412	if ( 0x403E <= aExp ) {
				3413	if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
				3414	return propagateFloatx80NaN( a, a STATUS_VAR );
				3415	}
				3416	return a;
				3417	}
				3418	if ( aExp < 0x3FFF ) {
				3419	if ( ( aExp == 0 )
				3420	&& ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
				3421	return a;
				3422	}
				3423	STATUS(float_exception_flags) \|= float_flag_inexact;
				3424	aSign = extractFloatx80Sign( a );
				3425	switch ( STATUS(float_rounding_mode) ) {
				3426	case float_round_nearest_even:
				3427	if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
				3428	) {
				3429	return
				3430	packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
				3431	}
				3432	break;
				3433	case float_round_down:
				3434	return
				3435	aSign ?
				3436	packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
				3437	: packFloatx80( 0, 0, 0 );
				3438	case float_round_up:
				3439	return
				3440	aSign ? packFloatx80( 1, 0, 0 )
				3441	: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
				3442	}
				3443	return packFloatx80( aSign, 0, 0 );
				3444	}
				3445	lastBitMask = 1;
				3446	lastBitMask <<= 0x403E - aExp;
				3447	roundBitsMask = lastBitMask - 1;
				3448	z = a;
				3449	roundingMode = STATUS(float_rounding_mode);
				3450	if ( roundingMode == float_round_nearest_even ) {
				3451	z.low += lastBitMask>>1;
				3452	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				3453	}
				3454	else if ( roundingMode != float_round_to_zero ) {
				3455	if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				3456	z.low += roundBitsMask;
				3457	}
				3458	}
				3459	z.low &= ~ roundBitsMask;
				3460	if ( z.low == 0 ) {
				3461	++z.high;
				3462	z.low = LIT64( 0x8000000000000000 );
				3463	}
				3464	if ( z.low != a.low ) STATUS(float_exception_flags) \|= float_flag_inexact;
				3465	return z;
				3466
				3467	}
				3468
				3469	/*----------------------------------------------------------------------------
				3470	\| Returns the result of adding the absolute values of the extended double-
				3471	\| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
				3472	\| negated before being returned. `zSign' is ignored if the result is a NaN.
				3473	\| The addition is performed according to the IEC/IEEE Standard for Binary
				3474	\| Floating-Point Arithmetic.
				3475	----------------------------------------------------------------------------/
				3476
				3477	static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
				3478	{
				3479	int32 aExp, bExp, zExp;
				3480	bits64 aSig, bSig, zSig0, zSig1;
				3481	int32 expDiff;
				3482
				3483	aSig = extractFloatx80Frac( a );
				3484	aExp = extractFloatx80Exp( a );
				3485	bSig = extractFloatx80Frac( b );
				3486	bExp = extractFloatx80Exp( b );
				3487	expDiff = aExp - bExp;
				3488	if ( 0 < expDiff ) {
				3489	if ( aExp == 0x7FFF ) {
				3490	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3491	return a;
				3492	}
				3493	if ( bExp == 0 ) --expDiff;
				3494	shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				3495	zExp = aExp;
				3496	}
				3497	else if ( expDiff < 0 ) {
				3498	if ( bExp == 0x7FFF ) {
				3499	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3500	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3501	}
				3502	if ( aExp == 0 ) ++expDiff;
				3503	shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				3504	zExp = bExp;
				3505	}
				3506	else {
				3507	if ( aExp == 0x7FFF ) {
				3508	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				3509	return propagateFloatx80NaN( a, b STATUS_VAR );
				3510	}
				3511	return a;
				3512	}
				3513	zSig1 = 0;
				3514	zSig0 = aSig + bSig;
				3515	if ( aExp == 0 ) {
				3516	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
				3517	goto roundAndPack;
				3518	}
				3519	zExp = aExp;
				3520	goto shiftRight1;
				3521	}
				3522	zSig0 = aSig + bSig;
				3523	if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
				3524	shiftRight1:
				3525	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
				3526	zSig0 \|= LIT64( 0x8000000000000000 );
				3527	++zExp;
				3528	roundAndPack:
				3529	return
				3530	roundAndPackFloatx80(
				3531	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3532
				3533	}
				3534
				3535	/*----------------------------------------------------------------------------
				3536	\| Returns the result of subtracting the absolute values of the extended
				3537	\| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
				3538	\| difference is negated before being returned. `zSign' is ignored if the
				3539	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				3540	\| Standard for Binary Floating-Point Arithmetic.
				3541	----------------------------------------------------------------------------/
				3542
				3543	static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
				3544	{
				3545	int32 aExp, bExp, zExp;
				3546	bits64 aSig, bSig, zSig0, zSig1;
				3547	int32 expDiff;
				3548	floatx80 z;
				3549
				3550	aSig = extractFloatx80Frac( a );
				3551	aExp = extractFloatx80Exp( a );
				3552	bSig = extractFloatx80Frac( b );
				3553	bExp = extractFloatx80Exp( b );
				3554	expDiff = aExp - bExp;
				3555	if ( 0 < expDiff ) goto aExpBigger;
				3556	if ( expDiff < 0 ) goto bExpBigger;
				3557	if ( aExp == 0x7FFF ) {
				3558	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				3559	return propagateFloatx80NaN( a, b STATUS_VAR );
				3560	}
				3561	float_raise( float_flag_invalid STATUS_VAR);
				3562	z.low = floatx80_default_nan_low;
				3563	z.high = floatx80_default_nan_high;
				3564	return z;
				3565	}
				3566	if ( aExp == 0 ) {
				3567	aExp = 1;
				3568	bExp = 1;
				3569	}
				3570	zSig1 = 0;
				3571	if ( bSig < aSig ) goto aBigger;
				3572	if ( aSig < bSig ) goto bBigger;
				3573	return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				3574	bExpBigger:
				3575	if ( bExp == 0x7FFF ) {
				3576	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3577	return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3578	}
				3579	if ( aExp == 0 ) ++expDiff;
				3580	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				3581	bBigger:
				3582	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
				3583	zExp = bExp;
				3584	zSign ^= 1;
				3585	goto normalizeRoundAndPack;
				3586	aExpBigger:
				3587	if ( aExp == 0x7FFF ) {
				3588	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3589	return a;
				3590	}
				3591	if ( bExp == 0 ) --expDiff;
				3592	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				3593	aBigger:
				3594	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
				3595	zExp = aExp;
				3596	normalizeRoundAndPack:
				3597	return
				3598	normalizeRoundAndPackFloatx80(
				3599	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3600
				3601	}
				3602
				3603	/*----------------------------------------------------------------------------
				3604	\| Returns the result of adding the extended double-precision floating-point
				3605	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				3606	\| Standard for Binary Floating-Point Arithmetic.
				3607	----------------------------------------------------------------------------/
				3608
				3609	floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
				3610	{
				3611	flag aSign, bSign;
				3612
				3613	aSign = extractFloatx80Sign( a );
				3614	bSign = extractFloatx80Sign( b );
				3615	if ( aSign == bSign ) {
				3616	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				3617	}
				3618	else {
				3619	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				3620	}
				3621
				3622	}
				3623
				3624	/*----------------------------------------------------------------------------
				3625	\| Returns the result of subtracting the extended double-precision floating-
				3626	\| point values `a' and `b'. The operation is performed according to the
				3627	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3628	----------------------------------------------------------------------------/
				3629
				3630	floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
				3631	{
				3632	flag aSign, bSign;
				3633
				3634	aSign = extractFloatx80Sign( a );
				3635	bSign = extractFloatx80Sign( b );
				3636	if ( aSign == bSign ) {
				3637	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				3638	}
				3639	else {
				3640	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				3641	}
				3642
				3643	}
				3644
				3645	/*----------------------------------------------------------------------------
				3646	\| Returns the result of multiplying the extended double-precision floating-
				3647	\| point values `a' and `b'. The operation is performed according to the
				3648	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3649	----------------------------------------------------------------------------/
				3650
				3651	floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
				3652	{
				3653	flag aSign, bSign, zSign;
				3654	int32 aExp, bExp, zExp;
				3655	bits64 aSig, bSig, zSig0, zSig1;
				3656	floatx80 z;
				3657
				3658	aSig = extractFloatx80Frac( a );
				3659	aExp = extractFloatx80Exp( a );
				3660	aSign = extractFloatx80Sign( a );
				3661	bSig = extractFloatx80Frac( b );
				3662	bExp = extractFloatx80Exp( b );
				3663	bSign = extractFloatx80Sign( b );
				3664	zSign = aSign ^ bSign;
				3665	if ( aExp == 0x7FFF ) {
				3666	if ( (bits64) ( aSig<<1 )
				3667	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				3668	return propagateFloatx80NaN( a, b STATUS_VAR );
				3669	}
				3670	if ( ( bExp \| bSig ) == 0 ) goto invalid;
				3671	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3672	}
				3673	if ( bExp == 0x7FFF ) {
				3674	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3675	if ( ( aExp \| aSig ) == 0 ) {
				3676	invalid:
				3677	float_raise( float_flag_invalid STATUS_VAR);
				3678	z.low = floatx80_default_nan_low;
				3679	z.high = floatx80_default_nan_high;
				3680	return z;
				3681	}
				3682	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3683	}
				3684	if ( aExp == 0 ) {
				3685	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3686	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				3687	}
				3688	if ( bExp == 0 ) {
				3689	if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3690	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3691	}
				3692	zExp = aExp + bExp - 0x3FFE;
				3693	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				3694	if ( 0 < (sbits64) zSig0 ) {
				3695	shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
				3696	--zExp;
				3697	}
				3698	return
				3699	roundAndPackFloatx80(
				3700	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3701
				3702	}
				3703
				3704	/*----------------------------------------------------------------------------
				3705	\| Returns the result of dividing the extended double-precision floating-point
				3706	\| value `a' by the corresponding value `b'. The operation is performed
				3707	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3708	----------------------------------------------------------------------------/
				3709
				3710	floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
				3711	{
				3712	flag aSign, bSign, zSign;
				3713	int32 aExp, bExp, zExp;
				3714	bits64 aSig, bSig, zSig0, zSig1;
				3715	bits64 rem0, rem1, rem2, term0, term1, term2;
				3716	floatx80 z;
				3717
				3718	aSig = extractFloatx80Frac( a );
				3719	aExp = extractFloatx80Exp( a );
				3720	aSign = extractFloatx80Sign( a );
				3721	bSig = extractFloatx80Frac( b );
				3722	bExp = extractFloatx80Exp( b );
				3723	bSign = extractFloatx80Sign( b );
				3724	zSign = aSign ^ bSign;
				3725	if ( aExp == 0x7FFF ) {
				3726	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3727	if ( bExp == 0x7FFF ) {
				3728	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3729	goto invalid;
				3730	}
				3731	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3732	}
				3733	if ( bExp == 0x7FFF ) {
				3734	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3735	return packFloatx80( zSign, 0, 0 );
				3736	}
				3737	if ( bExp == 0 ) {
				3738	if ( bSig == 0 ) {
				3739	if ( ( aExp \| aSig ) == 0 ) {
				3740	invalid:
				3741	float_raise( float_flag_invalid STATUS_VAR);
				3742	z.low = floatx80_default_nan_low;
				3743	z.high = floatx80_default_nan_high;
				3744	return z;
				3745	}
				3746	float_raise( float_flag_divbyzero STATUS_VAR);
				3747	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3748	}
				3749	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3750	}
				3751	if ( aExp == 0 ) {
				3752	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3753	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				3754	}
				3755	zExp = aExp - bExp + 0x3FFE;
				3756	rem1 = 0;
				3757	if ( bSig <= aSig ) {
				3758	shift128Right( aSig, 0, 1, &aSig, &rem1 );
				3759	++zExp;
				3760	}
				3761	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
				3762	mul64To128( bSig, zSig0, &term0, &term1 );
				3763	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
				3764	while ( (sbits64) rem0 < 0 ) {
				3765	--zSig0;
				3766	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				3767	}
				3768	zSig1 = estimateDiv128To64( rem1, 0, bSig );
				3769	if ( (bits64) ( zSig1<<1 ) <= 8 ) {
				3770	mul64To128( bSig, zSig1, &term1, &term2 );
				3771	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				3772	while ( (sbits64) rem1 < 0 ) {
				3773	--zSig1;
				3774	add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
				3775	}
				3776	zSig1 \|= ( ( rem1 \| rem2 ) != 0 );
				3777	}
				3778	return
				3779	roundAndPackFloatx80(
				3780	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3781
				3782	}
				3783
				3784	/*----------------------------------------------------------------------------
				3785	\| Returns the remainder of the extended double-precision floating-point value
				3786	\| `a' with respect to the corresponding value `b'. The operation is performed
				3787	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3788	----------------------------------------------------------------------------/
				3789
				3790	floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
				3791	{
				3792	flag aSign, bSign, zSign;
				3793	int32 aExp, bExp, expDiff;
				3794	bits64 aSig0, aSig1, bSig;
				3795	bits64 q, term0, term1, alternateASig0, alternateASig1;
				3796	floatx80 z;
				3797
				3798	aSig0 = extractFloatx80Frac( a );
				3799	aExp = extractFloatx80Exp( a );
				3800	aSign = extractFloatx80Sign( a );
				3801	bSig = extractFloatx80Frac( b );
				3802	bExp = extractFloatx80Exp( b );
				3803	bSign = extractFloatx80Sign( b );
				3804	if ( aExp == 0x7FFF ) {
				3805	if ( (bits64) ( aSig0<<1 )
				3806	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				3807	return propagateFloatx80NaN( a, b STATUS_VAR );
				3808	}
				3809	goto invalid;
				3810	}
				3811	if ( bExp == 0x7FFF ) {
				3812	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3813	return a;
				3814	}
				3815	if ( bExp == 0 ) {
				3816	if ( bSig == 0 ) {
				3817	invalid:
				3818	float_raise( float_flag_invalid STATUS_VAR);
				3819	z.low = floatx80_default_nan_low;
				3820	z.high = floatx80_default_nan_high;
				3821	return z;
				3822	}
				3823	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3824	}
				3825	if ( aExp == 0 ) {
				3826	if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
				3827	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				3828	}
				3829	bSig \|= LIT64( 0x8000000000000000 );
				3830	zSign = aSign;
				3831	expDiff = aExp - bExp;
				3832	aSig1 = 0;
				3833	if ( expDiff < 0 ) {
				3834	if ( expDiff < -1 ) return a;
				3835	shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
				3836	expDiff = 0;
				3837	}
				3838	q = ( bSig <= aSig0 );
				3839	if ( q ) aSig0 -= bSig;
				3840	expDiff -= 64;
				3841	while ( 0 < expDiff ) {
				3842	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3843	q = ( 2 < q ) ? q - 2 : 0;
				3844	mul64To128( bSig, q, &term0, &term1 );
				3845	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3846	shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
				3847	expDiff -= 62;
				3848	}
				3849	expDiff += 64;
				3850	if ( 0 < expDiff ) {
				3851	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3852	q = ( 2 < q ) ? q - 2 : 0;
				3853	q >>= 64 - expDiff;
				3854	mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
				3855	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3856	shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
				3857	while ( le128( term0, term1, aSig0, aSig1 ) ) {
				3858	++q;
				3859	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3860	}
				3861	}
				3862	else {
				3863	term1 = 0;
				3864	term0 = bSig;
				3865	}
				3866	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
				3867	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3868	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3869	&& ( q & 1 ) )
				3870	) {
				3871	aSig0 = alternateASig0;
				3872	aSig1 = alternateASig1;
				3873	zSign = ! zSign;
				3874	}
				3875	return
				3876	normalizeRoundAndPackFloatx80(
				3877	80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
				3878
				3879	}
				3880
				3881	/*----------------------------------------------------------------------------
				3882	\| Returns the square root of the extended double-precision floating-point
				3883	\| value `a'. The operation is performed according to the IEC/IEEE Standard
				3884	\| for Binary Floating-Point Arithmetic.
				3885	----------------------------------------------------------------------------/
				3886
				3887	floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
				3888	{
				3889	flag aSign;
				3890	int32 aExp, zExp;
				3891	bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
				3892	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				3893	floatx80 z;
				3894
				3895	aSig0 = extractFloatx80Frac( a );
				3896	aExp = extractFloatx80Exp( a );
				3897	aSign = extractFloatx80Sign( a );
				3898	if ( aExp == 0x7FFF ) {
				3899	if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
				3900	if ( ! aSign ) return a;
				3901	goto invalid;
				3902	}
				3903	if ( aSign ) {
				3904	if ( ( aExp \| aSig0 ) == 0 ) return a;
				3905	invalid:
				3906	float_raise( float_flag_invalid STATUS_VAR);
				3907	z.low = floatx80_default_nan_low;
				3908	z.high = floatx80_default_nan_high;
				3909	return z;
				3910	}
				3911	if ( aExp == 0 ) {
				3912	if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
				3913	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				3914	}
				3915	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
				3916	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
				3917	shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
				3918	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				3919	doubleZSig0 = zSig0<<1;
				3920	mul64To128( zSig0, zSig0, &term0, &term1 );
				3921	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
				3922	while ( (sbits64) rem0 < 0 ) {
				3923	--zSig0;
				3924	doubleZSig0 -= 2;
				3925	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				3926	}
				3927	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				3928	if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
				3929	if ( zSig1 == 0 ) zSig1 = 1;
				3930	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				3931	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				3932	mul64To128( zSig1, zSig1, &term2, &term3 );
				3933	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
				3934	while ( (sbits64) rem1 < 0 ) {
				3935	--zSig1;
				3936	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				3937	term3 \|= 1;
				3938	term2 \|= doubleZSig0;
				3939	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				3940	}
				3941	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				3942	}
				3943	shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
				3944	zSig0 \|= doubleZSig0;
				3945	return
				3946	roundAndPackFloatx80(
				3947	STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
				3948
				3949	}
				3950
				3951	/*----------------------------------------------------------------------------
				3952	\| Returns 1 if the extended double-precision floating-point value `a' is
				3953	\| equal to the corresponding value `b', and 0 otherwise. The comparison is
				3954	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				3955	\| Arithmetic.
				3956	----------------------------------------------------------------------------/
				3957
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3958	int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3959	{
				3960
				3961	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3962	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3963	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3964	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3965	) {
				3966	if ( floatx80_is_signaling_nan( a )
				3967	\|\| floatx80_is_signaling_nan( b ) ) {
				3968	float_raise( float_flag_invalid STATUS_VAR);
				3969	}
				3970	return 0;
				3971	}
				3972	return
				3973	( a.low == b.low )
				3974	&& ( ( a.high == b.high )
				3975	\|\| ( ( a.low == 0 )
				3976	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				3977	);
				3978
				3979	}
				3980
				3981	/*----------------------------------------------------------------------------
				3982	\| Returns 1 if the extended double-precision floating-point value `a' is
				3983	\| less than or equal to the corresponding value `b', and 0 otherwise. The
				3984	\| comparison is performed according to the IEC/IEEE Standard for Binary
				3985	\| Floating-Point Arithmetic.
				3986	----------------------------------------------------------------------------/
				3987
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3988	int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3989	{
				3990	flag aSign, bSign;
				3991
				3992	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3993	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3994	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3995	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3996	) {
				3997	float_raise( float_flag_invalid STATUS_VAR);
				3998	return 0;
				3999	}
				4000	aSign = extractFloatx80Sign( a );
				4001	bSign = extractFloatx80Sign( b );
				4002	if ( aSign != bSign ) {
				4003	return
				4004	aSign
				4005	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				4006	== 0 );
				4007	}
				4008	return
				4009	aSign ? le128( b.high, b.low, a.high, a.low )
				4010	: le128( a.high, a.low, b.high, b.low );
				4011
				4012	}
				4013
				4014	/*----------------------------------------------------------------------------
				4015	\| Returns 1 if the extended double-precision floating-point value `a' is
				4016	\| less than the corresponding value `b', and 0 otherwise. The comparison
				4017	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4018	\| Arithmetic.
				4019	----------------------------------------------------------------------------/
				4020
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4021	int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4022	{
				4023	flag aSign, bSign;
				4024
				4025	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4026	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4027	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4028	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4029	) {
				4030	float_raise( float_flag_invalid STATUS_VAR);
				4031	return 0;
				4032	}
				4033	aSign = extractFloatx80Sign( a );
				4034	bSign = extractFloatx80Sign( b );
				4035	if ( aSign != bSign ) {
				4036	return
				4037	aSign
				4038	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				4039	!= 0 );
				4040	}
				4041	return
				4042	aSign ? lt128( b.high, b.low, a.high, a.low )
				4043	: lt128( a.high, a.low, b.high, b.low );
				4044
				4045	}
				4046
				4047	/*----------------------------------------------------------------------------
				4048	\| Returns 1 if the extended double-precision floating-point value `a' is equal
				4049	\| to the corresponding value `b', and 0 otherwise. The invalid exception is
				4050	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				4051	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4052	----------------------------------------------------------------------------/
				4053
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4054	int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4055	{
				4056
				4057	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4058	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4059	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4060	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4061	) {
				4062	float_raise( float_flag_invalid STATUS_VAR);
				4063	return 0;
				4064	}
				4065	return
				4066	( a.low == b.low )
				4067	&& ( ( a.high == b.high )
				4068	\|\| ( ( a.low == 0 )
				4069	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				4070	);
				4071
				4072	}
				4073
				4074	/*----------------------------------------------------------------------------
				4075	\| Returns 1 if the extended double-precision floating-point value `a' is less
				4076	\| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
				4077	\| do not cause an exception. Otherwise, the comparison is performed according
				4078	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4079	----------------------------------------------------------------------------/
				4080
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4081	int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4082	{
				4083	flag aSign, bSign;
				4084
				4085	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4086	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4087	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4088	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4089	) {
				4090	if ( floatx80_is_signaling_nan( a )
				4091	\|\| floatx80_is_signaling_nan( b ) ) {
				4092	float_raise( float_flag_invalid STATUS_VAR);
				4093	}
				4094	return 0;
				4095	}
				4096	aSign = extractFloatx80Sign( a );
				4097	bSign = extractFloatx80Sign( b );
				4098	if ( aSign != bSign ) {
				4099	return
				4100	aSign
				4101	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				4102	== 0 );
				4103	}
				4104	return
				4105	aSign ? le128( b.high, b.low, a.high, a.low )
				4106	: le128( a.high, a.low, b.high, b.low );
				4107
				4108	}
				4109
				4110	/*----------------------------------------------------------------------------
				4111	\| Returns 1 if the extended double-precision floating-point value `a' is less
				4112	\| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
				4113	\| an exception. Otherwise, the comparison is performed according to the
				4114	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4115	----------------------------------------------------------------------------/
				4116
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4117	int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4118	{
				4119	flag aSign, bSign;
				4120
				4121	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4122	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4123	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4124	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4125	) {
				4126	if ( floatx80_is_signaling_nan( a )
				4127	\|\| floatx80_is_signaling_nan( b ) ) {
				4128	float_raise( float_flag_invalid STATUS_VAR);
				4129	}
				4130	return 0;
				4131	}
				4132	aSign = extractFloatx80Sign( a );
				4133	bSign = extractFloatx80Sign( b );
				4134	if ( aSign != bSign ) {
				4135	return
				4136	aSign
				4137	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				4138	!= 0 );
				4139	}
				4140	return
				4141	aSign ? lt128( b.high, b.low, a.high, a.low )
				4142	: lt128( a.high, a.low, b.high, b.low );
				4143
				4144	}
				4145
				4146	#endif
				4147
				4148	#ifdef FLOAT128
				4149
				4150	/*----------------------------------------------------------------------------
				4151	\| Returns the result of converting the quadruple-precision floating-point
				4152	\| value `a' to the 32-bit two's complement integer format. The conversion
				4153	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4154	\| Arithmetic---which means in particular that the conversion is rounded
				4155	\| according to the current rounding mode. If `a' is a NaN, the largest
				4156	\| positive integer is returned. Otherwise, if the conversion overflows, the
				4157	\| largest integer with the same sign as `a' is returned.
				4158	----------------------------------------------------------------------------/
				4159
				4160	int32 float128_to_int32( float128 a STATUS_PARAM )
				4161	{
				4162	flag aSign;
				4163	int32 aExp, shiftCount;
				4164	bits64 aSig0, aSig1;
				4165
				4166	aSig1 = extractFloat128Frac1( a );
				4167	aSig0 = extractFloat128Frac0( a );
				4168	aExp = extractFloat128Exp( a );
				4169	aSign = extractFloat128Sign( a );
				4170	if ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) aSign = 0;
				4171	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				4172	aSig0 \|= ( aSig1 != 0 );
				4173	shiftCount = 0x4028 - aExp;
				4174	if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
				4175	return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
				4176
				4177	}
				4178
				4179	/*----------------------------------------------------------------------------
				4180	\| Returns the result of converting the quadruple-precision floating-point
				4181	\| value `a' to the 32-bit two's complement integer format. The conversion
				4182	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4183	\| Arithmetic, except that the conversion is always rounded toward zero. If
				4184	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				4185	\| conversion overflows, the largest integer with the same sign as `a' is
				4186	\| returned.
				4187	----------------------------------------------------------------------------/
				4188
				4189	int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
				4190	{
				4191	flag aSign;
				4192	int32 aExp, shiftCount;
				4193	bits64 aSig0, aSig1, savedASig;
				4194	int32 z;
				4195
				4196	aSig1 = extractFloat128Frac1( a );
				4197	aSig0 = extractFloat128Frac0( a );
				4198	aExp = extractFloat128Exp( a );
				4199	aSign = extractFloat128Sign( a );
				4200	aSig0 \|= ( aSig1 != 0 );
				4201	if ( 0x401E < aExp ) {
				4202	if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
				4203	goto invalid;
				4204	}
				4205	else if ( aExp < 0x3FFF ) {
				4206	if ( aExp \|\| aSig0 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4207	return 0;
				4208	}
				4209	aSig0 \|= LIT64( 0x0001000000000000 );
				4210	shiftCount = 0x402F - aExp;
				4211	savedASig = aSig0;
				4212	aSig0 >>= shiftCount;
				4213	z = aSig0;
				4214	if ( aSign ) z = - z;
				4215	if ( ( z < 0 ) ^ aSign ) {
				4216	invalid:
				4217	float_raise( float_flag_invalid STATUS_VAR);
				4218	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				4219	}
				4220	if ( ( aSig0<<shiftCount ) != savedASig ) {
				4221	STATUS(float_exception_flags) \|= float_flag_inexact;
				4222	}
				4223	return z;
				4224
				4225	}
				4226
				4227	/*----------------------------------------------------------------------------
				4228	\| Returns the result of converting the quadruple-precision floating-point
				4229	\| value `a' to the 64-bit two's complement integer format. The conversion
				4230	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4231	\| Arithmetic---which means in particular that the conversion is rounded
				4232	\| according to the current rounding mode. If `a' is a NaN, the largest
				4233	\| positive integer is returned. Otherwise, if the conversion overflows, the
				4234	\| largest integer with the same sign as `a' is returned.
				4235	----------------------------------------------------------------------------/
				4236
				4237	int64 float128_to_int64( float128 a STATUS_PARAM )
				4238	{
				4239	flag aSign;
				4240	int32 aExp, shiftCount;
				4241	bits64 aSig0, aSig1;
				4242
				4243	aSig1 = extractFloat128Frac1( a );
				4244	aSig0 = extractFloat128Frac0( a );
				4245	aExp = extractFloat128Exp( a );
				4246	aSign = extractFloat128Sign( a );
				4247	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				4248	shiftCount = 0x402F - aExp;
				4249	if ( shiftCount <= 0 ) {
				4250	if ( 0x403E < aExp ) {
				4251	float_raise( float_flag_invalid STATUS_VAR);
				4252	if ( ! aSign
				4253	\|\| ( ( aExp == 0x7FFF )
				4254	&& ( aSig1 \|\| ( aSig0 != LIT64( 0x0001000000000000 ) ) )
				4255	)
				4256	) {
				4257	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4258	}
				4259	return (sbits64) LIT64( 0x8000000000000000 );
				4260	}
				4261	shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
				4262	}
				4263	else {
				4264	shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
				4265	}
				4266	return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
				4267
				4268	}
				4269
				4270	/*----------------------------------------------------------------------------
				4271	\| Returns the result of converting the quadruple-precision floating-point
				4272	\| value `a' to the 64-bit two's complement integer format. The conversion
				4273	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4274	\| Arithmetic, except that the conversion is always rounded toward zero.
				4275	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				4276	\| the conversion overflows, the largest integer with the same sign as `a' is
				4277	\| returned.
				4278	----------------------------------------------------------------------------/
				4279
				4280	int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
				4281	{
				4282	flag aSign;
				4283	int32 aExp, shiftCount;
				4284	bits64 aSig0, aSig1;
				4285	int64 z;
				4286
				4287	aSig1 = extractFloat128Frac1( a );
				4288	aSig0 = extractFloat128Frac0( a );
				4289	aExp = extractFloat128Exp( a );
				4290	aSign = extractFloat128Sign( a );
				4291	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				4292	shiftCount = aExp - 0x402F;
				4293	if ( 0 < shiftCount ) {
				4294	if ( 0x403E <= aExp ) {
				4295	aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
				4296	if ( ( a.high == LIT64( 0xC03E000000000000 ) )
				4297	&& ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
				4298	if ( aSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4299	}
				4300	else {
				4301	float_raise( float_flag_invalid STATUS_VAR);
				4302	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) ) {
				4303	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4304	}
				4305	}
				4306	return (sbits64) LIT64( 0x8000000000000000 );
				4307	}
				4308	z = ( aSig0<<shiftCount ) \| ( aSig1>>( ( - shiftCount ) & 63 ) );
				4309	if ( (bits64) ( aSig1<<shiftCount ) ) {
				4310	STATUS(float_exception_flags) \|= float_flag_inexact;
				4311	}
				4312	}
				4313	else {
				4314	if ( aExp < 0x3FFF ) {
				4315	if ( aExp \| aSig0 \| aSig1 ) {
				4316	STATUS(float_exception_flags) \|= float_flag_inexact;
				4317	}
				4318	return 0;
				4319	}
				4320	z = aSig0>>( - shiftCount );
				4321	if ( aSig1
				4322	\|\| ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
				4323	STATUS(float_exception_flags) \|= float_flag_inexact;
				4324	}
				4325	}
				4326	if ( aSign ) z = - z;
				4327	return z;
				4328
				4329	}
				4330
				4331	/*----------------------------------------------------------------------------
				4332	\| Returns the result of converting the quadruple-precision floating-point
				4333	\| value `a' to the single-precision floating-point format. The conversion
				4334	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4335	\| Arithmetic.
				4336	----------------------------------------------------------------------------/
				4337
				4338	float32 float128_to_float32( float128 a STATUS_PARAM )
				4339	{
				4340	flag aSign;
				4341	int32 aExp;
				4342	bits64 aSig0, aSig1;
				4343	bits32 zSig;
				4344
				4345	aSig1 = extractFloat128Frac1( a );
				4346	aSig0 = extractFloat128Frac0( a );
				4347	aExp = extractFloat128Exp( a );
				4348	aSign = extractFloat128Sign( a );
				4349	if ( aExp == 0x7FFF ) {
				4350	if ( aSig0 \| aSig1 ) {
				4351	return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
				4352	}
				4353	return packFloat32( aSign, 0xFF, 0 );
				4354	}
				4355	aSig0 \|= ( aSig1 != 0 );
				4356	shift64RightJamming( aSig0, 18, &aSig0 );
				4357	zSig = aSig0;
				4358	if ( aExp \|\| zSig ) {
				4359	zSig \|= 0x40000000;
				4360	aExp -= 0x3F81;
				4361	}
				4362	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				4363
				4364	}
				4365
				4366	/*----------------------------------------------------------------------------
				4367	\| Returns the result of converting the quadruple-precision floating-point
				4368	\| value `a' to the double-precision floating-point format. The conversion
				4369	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4370	\| Arithmetic.
				4371	----------------------------------------------------------------------------/
				4372
				4373	float64 float128_to_float64( float128 a STATUS_PARAM )
				4374	{
				4375	flag aSign;
				4376	int32 aExp;
				4377	bits64 aSig0, aSig1;
				4378
				4379	aSig1 = extractFloat128Frac1( a );
				4380	aSig0 = extractFloat128Frac0( a );
				4381	aExp = extractFloat128Exp( a );
				4382	aSign = extractFloat128Sign( a );
				4383	if ( aExp == 0x7FFF ) {
				4384	if ( aSig0 \| aSig1 ) {
				4385	return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
				4386	}
				4387	return packFloat64( aSign, 0x7FF, 0 );
				4388	}
				4389	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				4390	aSig0 \|= ( aSig1 != 0 );
				4391	if ( aExp \|\| aSig0 ) {
				4392	aSig0 \|= LIT64( 0x4000000000000000 );
				4393	aExp -= 0x3C01;
				4394	}
				4395	return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
				4396
				4397	}
				4398
				4399	#ifdef FLOATX80
				4400
				4401	/*----------------------------------------------------------------------------
				4402	\| Returns the result of converting the quadruple-precision floating-point
				4403	\| value `a' to the extended double-precision floating-point format. The
				4404	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4405	\| Floating-Point Arithmetic.
				4406	----------------------------------------------------------------------------/
				4407
				4408	floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
				4409	{
				4410	flag aSign;
				4411	int32 aExp;
				4412	bits64 aSig0, aSig1;
				4413
				4414	aSig1 = extractFloat128Frac1( a );
				4415	aSig0 = extractFloat128Frac0( a );
				4416	aExp = extractFloat128Exp( a );
				4417	aSign = extractFloat128Sign( a );
				4418	if ( aExp == 0x7FFF ) {
				4419	if ( aSig0 \| aSig1 ) {
				4420	return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
				4421	}
				4422	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4423	}
				4424	if ( aExp == 0 ) {
				4425	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
				4426	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				4427	}
				4428	else {
				4429	aSig0 \|= LIT64( 0x0001000000000000 );
				4430	}
				4431	shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
				4432	return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
				4433
				4434	}
				4435
				4436	#endif
				4437
				4438	/*----------------------------------------------------------------------------
				4439	\| Rounds the quadruple-precision floating-point value `a' to an integer, and
				4440	\| returns the result as a quadruple-precision floating-point value. The
				4441	\| operation is performed according to the IEC/IEEE Standard for Binary
				4442	\| Floating-Point Arithmetic.
				4443	----------------------------------------------------------------------------/
				4444
				4445	float128 float128_round_to_int( float128 a STATUS_PARAM )
				4446	{
				4447	flag aSign;
				4448	int32 aExp;
				4449	bits64 lastBitMask, roundBitsMask;
				4450	int8 roundingMode;
				4451	float128 z;
				4452
				4453	aExp = extractFloat128Exp( a );
				4454	if ( 0x402F <= aExp ) {
				4455	if ( 0x406F <= aExp ) {
				4456	if ( ( aExp == 0x7FFF )
				4457	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) )
				4458	) {
				4459	return propagateFloat128NaN( a, a STATUS_VAR );
				4460	}
				4461	return a;
				4462	}
				4463	lastBitMask = 1;
				4464	lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
				4465	roundBitsMask = lastBitMask - 1;
				4466	z = a;
				4467	roundingMode = STATUS(float_rounding_mode);
				4468	if ( roundingMode == float_round_nearest_even ) {
				4469	if ( lastBitMask ) {
				4470	add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
				4471	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				4472	}
				4473	else {
				4474	if ( (sbits64) z.low < 0 ) {
				4475	++z.high;
				4476	if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
				4477	}
				4478	}
				4479	}
				4480	else if ( roundingMode != float_round_to_zero ) {
				4481	if ( extractFloat128Sign( z )
				4482	^ ( roundingMode == float_round_up ) ) {
				4483	add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
				4484	}
				4485	}
				4486	z.low &= ~ roundBitsMask;
				4487	}
				4488	else {
				4489	if ( aExp < 0x3FFF ) {
				4490	if ( ( ( (bits64) ( a.high<<1 ) ) \| a.low ) == 0 ) return a;
				4491	STATUS(float_exception_flags) \|= float_flag_inexact;
				4492	aSign = extractFloat128Sign( a );
				4493	switch ( STATUS(float_rounding_mode) ) {
				4494	case float_round_nearest_even:
				4495	if ( ( aExp == 0x3FFE )
				4496	&& ( extractFloat128Frac0( a )
				4497	\| extractFloat128Frac1( a ) )
				4498	) {
				4499	return packFloat128( aSign, 0x3FFF, 0, 0 );
				4500	}
				4501	break;
				4502	case float_round_down:
				4503	return
				4504	aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
				4505	: packFloat128( 0, 0, 0, 0 );
				4506	case float_round_up:
				4507	return
				4508	aSign ? packFloat128( 1, 0, 0, 0 )
				4509	: packFloat128( 0, 0x3FFF, 0, 0 );
				4510	}
				4511	return packFloat128( aSign, 0, 0, 0 );
				4512	}
				4513	lastBitMask = 1;
				4514	lastBitMask <<= 0x402F - aExp;
				4515	roundBitsMask = lastBitMask - 1;
				4516	z.low = 0;
				4517	z.high = a.high;
				4518	roundingMode = STATUS(float_rounding_mode);
				4519	if ( roundingMode == float_round_nearest_even ) {
				4520	z.high += lastBitMask>>1;
				4521	if ( ( ( z.high & roundBitsMask ) \| a.low ) == 0 ) {
				4522	z.high &= ~ lastBitMask;
				4523	}
				4524	}
				4525	else if ( roundingMode != float_round_to_zero ) {
				4526	if ( extractFloat128Sign( z )
				4527	^ ( roundingMode == float_round_up ) ) {
				4528	z.high \|= ( a.low != 0 );
				4529	z.high += roundBitsMask;
				4530	}
				4531	}
				4532	z.high &= ~ roundBitsMask;
				4533	}
				4534	if ( ( z.low != a.low ) \|\| ( z.high != a.high ) ) {
				4535	STATUS(float_exception_flags) \|= float_flag_inexact;
				4536	}
				4537	return z;
				4538
				4539	}
				4540
				4541	/*----------------------------------------------------------------------------
				4542	\| Returns the result of adding the absolute values of the quadruple-precision
				4543	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				4544	\| before being returned. `zSign' is ignored if the result is a NaN.
				4545	\| The addition is performed according to the IEC/IEEE Standard for Binary
				4546	\| Floating-Point Arithmetic.
				4547	----------------------------------------------------------------------------/
				4548
				4549	static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				4550	{
				4551	int32 aExp, bExp, zExp;
				4552	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
				4553	int32 expDiff;
				4554
				4555	aSig1 = extractFloat128Frac1( a );
				4556	aSig0 = extractFloat128Frac0( a );
				4557	aExp = extractFloat128Exp( a );
				4558	bSig1 = extractFloat128Frac1( b );
				4559	bSig0 = extractFloat128Frac0( b );
				4560	bExp = extractFloat128Exp( b );
				4561	expDiff = aExp - bExp;
				4562	if ( 0 < expDiff ) {
				4563	if ( aExp == 0x7FFF ) {
				4564	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4565	return a;
				4566	}
				4567	if ( bExp == 0 ) {
				4568	--expDiff;
				4569	}
				4570	else {
				4571	bSig0 \|= LIT64( 0x0001000000000000 );
				4572	}
				4573	shift128ExtraRightJamming(
				4574	bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
				4575	zExp = aExp;
				4576	}
				4577	else if ( expDiff < 0 ) {
				4578	if ( bExp == 0x7FFF ) {
				4579	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4580	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4581	}
				4582	if ( aExp == 0 ) {
				4583	++expDiff;
				4584	}
				4585	else {
				4586	aSig0 \|= LIT64( 0x0001000000000000 );
				4587	}
				4588	shift128ExtraRightJamming(
				4589	aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
				4590	zExp = bExp;
				4591	}
				4592	else {
				4593	if ( aExp == 0x7FFF ) {
				4594	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				4595	return propagateFloat128NaN( a, b STATUS_VAR );
				4596	}
				4597	return a;
				4598	}
				4599	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				4600	if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
				4601	zSig2 = 0;
				4602	zSig0 \|= LIT64( 0x0002000000000000 );
				4603	zExp = aExp;
				4604	goto shiftRight1;
				4605	}
				4606	aSig0 \|= LIT64( 0x0001000000000000 );
				4607	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				4608	--zExp;
				4609	if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
				4610	++zExp;
				4611	shiftRight1:
				4612	shift128ExtraRightJamming(
				4613	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				4614	roundAndPack:
				4615	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				4616
				4617	}
				4618
				4619	/*----------------------------------------------------------------------------
				4620	\| Returns the result of subtracting the absolute values of the quadruple-
				4621	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				4622	\| difference is negated before being returned. `zSign' is ignored if the
				4623	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				4624	\| Standard for Binary Floating-Point Arithmetic.
				4625	----------------------------------------------------------------------------/
				4626
				4627	static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				4628	{
				4629	int32 aExp, bExp, zExp;
				4630	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
				4631	int32 expDiff;
				4632	float128 z;
				4633
				4634	aSig1 = extractFloat128Frac1( a );
				4635	aSig0 = extractFloat128Frac0( a );
				4636	aExp = extractFloat128Exp( a );
				4637	bSig1 = extractFloat128Frac1( b );
				4638	bSig0 = extractFloat128Frac0( b );
				4639	bExp = extractFloat128Exp( b );
				4640	expDiff = aExp - bExp;
				4641	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				4642	shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
				4643	if ( 0 < expDiff ) goto aExpBigger;
				4644	if ( expDiff < 0 ) goto bExpBigger;
				4645	if ( aExp == 0x7FFF ) {
				4646	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				4647	return propagateFloat128NaN( a, b STATUS_VAR );
				4648	}
				4649	float_raise( float_flag_invalid STATUS_VAR);
				4650	z.low = float128_default_nan_low;
				4651	z.high = float128_default_nan_high;
				4652	return z;
				4653	}
				4654	if ( aExp == 0 ) {
				4655	aExp = 1;
				4656	bExp = 1;
				4657	}
				4658	if ( bSig0 < aSig0 ) goto aBigger;
				4659	if ( aSig0 < bSig0 ) goto bBigger;
				4660	if ( bSig1 < aSig1 ) goto aBigger;
				4661	if ( aSig1 < bSig1 ) goto bBigger;
				4662	return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
				4663	bExpBigger:
				4664	if ( bExp == 0x7FFF ) {
				4665	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4666	return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
				4667	}
				4668	if ( aExp == 0 ) {
				4669	++expDiff;
				4670	}
				4671	else {
				4672	aSig0 \|= LIT64( 0x4000000000000000 );
				4673	}
				4674	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				4675	bSig0 \|= LIT64( 0x4000000000000000 );
				4676	bBigger:
				4677	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
				4678	zExp = bExp;
				4679	zSign ^= 1;
				4680	goto normalizeRoundAndPack;
				4681	aExpBigger:
				4682	if ( aExp == 0x7FFF ) {
				4683	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4684	return a;
				4685	}
				4686	if ( bExp == 0 ) {
				4687	--expDiff;
				4688	}
				4689	else {
				4690	bSig0 \|= LIT64( 0x4000000000000000 );
				4691	}
				4692	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
				4693	aSig0 \|= LIT64( 0x4000000000000000 );
				4694	aBigger:
				4695	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				4696	zExp = aExp;
				4697	normalizeRoundAndPack:
				4698	--zExp;
				4699	return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
				4700
				4701	}
				4702
				4703	/*----------------------------------------------------------------------------
				4704	\| Returns the result of adding the quadruple-precision floating-point values
				4705	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				4706	\| for Binary Floating-Point Arithmetic.
				4707	----------------------------------------------------------------------------/
				4708
				4709	float128 float128_add( float128 a, float128 b STATUS_PARAM )
				4710	{
				4711	flag aSign, bSign;
				4712
				4713	aSign = extractFloat128Sign( a );
				4714	bSign = extractFloat128Sign( b );
				4715	if ( aSign == bSign ) {
				4716	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				4717	}
				4718	else {
				4719	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				4720	}
				4721
				4722	}
				4723
				4724	/*----------------------------------------------------------------------------
				4725	\| Returns the result of subtracting the quadruple-precision floating-point
				4726	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				4727	\| Standard for Binary Floating-Point Arithmetic.
				4728	----------------------------------------------------------------------------/
				4729
				4730	float128 float128_sub( float128 a, float128 b STATUS_PARAM )
				4731	{
				4732	flag aSign, bSign;
				4733
				4734	aSign = extractFloat128Sign( a );
				4735	bSign = extractFloat128Sign( b );
				4736	if ( aSign == bSign ) {
				4737	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				4738	}
				4739	else {
				4740	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				4741	}
				4742
				4743	}
				4744
				4745	/*----------------------------------------------------------------------------
				4746	\| Returns the result of multiplying the quadruple-precision floating-point
				4747	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				4748	\| Standard for Binary Floating-Point Arithmetic.
				4749	----------------------------------------------------------------------------/
				4750
				4751	float128 float128_mul( float128 a, float128 b STATUS_PARAM )
				4752	{
				4753	flag aSign, bSign, zSign;
				4754	int32 aExp, bExp, zExp;
				4755	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
				4756	float128 z;
				4757
				4758	aSig1 = extractFloat128Frac1( a );
				4759	aSig0 = extractFloat128Frac0( a );
				4760	aExp = extractFloat128Exp( a );
				4761	aSign = extractFloat128Sign( a );
				4762	bSig1 = extractFloat128Frac1( b );
				4763	bSig0 = extractFloat128Frac0( b );
				4764	bExp = extractFloat128Exp( b );
				4765	bSign = extractFloat128Sign( b );
				4766	zSign = aSign ^ bSign;
				4767	if ( aExp == 0x7FFF ) {
				4768	if ( ( aSig0 \| aSig1 )
				4769	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				4770	return propagateFloat128NaN( a, b STATUS_VAR );
				4771	}
				4772	if ( ( bExp \| bSig0 \| bSig1 ) == 0 ) goto invalid;
				4773	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4774	}
				4775	if ( bExp == 0x7FFF ) {
				4776	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4777	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				4778	invalid:
				4779	float_raise( float_flag_invalid STATUS_VAR);
				4780	z.low = float128_default_nan_low;
				4781	z.high = float128_default_nan_high;
				4782	return z;
				4783	}
				4784	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4785	}
				4786	if ( aExp == 0 ) {
				4787	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				4788	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				4789	}
				4790	if ( bExp == 0 ) {
				4791	if ( ( bSig0 \| bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				4792	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				4793	}
				4794	zExp = aExp + bExp - 0x4000;
				4795	aSig0 \|= LIT64( 0x0001000000000000 );
				4796	shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
				4797	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
				4798	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
				4799	zSig2 \|= ( zSig3 != 0 );
				4800	if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
				4801	shift128ExtraRightJamming(
				4802	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				4803	++zExp;
				4804	}
				4805	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				4806
				4807	}
				4808
				4809	/*----------------------------------------------------------------------------
				4810	\| Returns the result of dividing the quadruple-precision floating-point value
				4811	\| `a' by the corresponding value `b'. The operation is performed according to
				4812	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4813	----------------------------------------------------------------------------/
				4814
				4815	float128 float128_div( float128 a, float128 b STATUS_PARAM )
				4816	{
				4817	flag aSign, bSign, zSign;
				4818	int32 aExp, bExp, zExp;
				4819	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
				4820	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				4821	float128 z;
				4822
				4823	aSig1 = extractFloat128Frac1( a );
				4824	aSig0 = extractFloat128Frac0( a );
				4825	aExp = extractFloat128Exp( a );
				4826	aSign = extractFloat128Sign( a );
				4827	bSig1 = extractFloat128Frac1( b );
				4828	bSig0 = extractFloat128Frac0( b );
				4829	bExp = extractFloat128Exp( b );
				4830	bSign = extractFloat128Sign( b );
				4831	zSign = aSign ^ bSign;
				4832	if ( aExp == 0x7FFF ) {
				4833	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4834	if ( bExp == 0x7FFF ) {
				4835	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4836	goto invalid;
				4837	}
				4838	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4839	}
				4840	if ( bExp == 0x7FFF ) {
				4841	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4842	return packFloat128( zSign, 0, 0, 0 );
				4843	}
				4844	if ( bExp == 0 ) {
				4845	if ( ( bSig0 \| bSig1 ) == 0 ) {
				4846	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				4847	invalid:
				4848	float_raise( float_flag_invalid STATUS_VAR);
				4849	z.low = float128_default_nan_low;
				4850	z.high = float128_default_nan_high;
				4851	return z;
				4852	}
				4853	float_raise( float_flag_divbyzero STATUS_VAR);
				4854	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4855	}
				4856	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				4857	}
				4858	if ( aExp == 0 ) {
				4859	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				4860	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				4861	}
				4862	zExp = aExp - bExp + 0x3FFD;
				4863	shortShift128Left(
				4864	aSig0 \| LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
				4865	shortShift128Left(
				4866	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				4867	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
				4868	shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
				4869	++zExp;
				4870	}
				4871	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
				4872	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
				4873	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
				4874	while ( (sbits64) rem0 < 0 ) {
				4875	--zSig0;
				4876	add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
				4877	}
				4878	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
				4879	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
				4880	mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
				4881	sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
				4882	while ( (sbits64) rem1 < 0 ) {
				4883	--zSig1;
				4884	add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
				4885	}
				4886	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				4887	}
				4888	shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
				4889	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				4890
				4891	}
				4892
				4893	/*----------------------------------------------------------------------------
				4894	\| Returns the remainder of the quadruple-precision floating-point value `a'
				4895	\| with respect to the corresponding value `b'. The operation is performed
				4896	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4897	----------------------------------------------------------------------------/
				4898
				4899	float128 float128_rem( float128 a, float128 b STATUS_PARAM )
				4900	{
				4901	flag aSign, bSign, zSign;
				4902	int32 aExp, bExp, expDiff;
				4903	bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
				4904	bits64 allZero, alternateASig0, alternateASig1, sigMean1;
				4905	sbits64 sigMean0;
				4906	float128 z;
				4907
				4908	aSig1 = extractFloat128Frac1( a );
				4909	aSig0 = extractFloat128Frac0( a );
				4910	aExp = extractFloat128Exp( a );
				4911	aSign = extractFloat128Sign( a );
				4912	bSig1 = extractFloat128Frac1( b );
				4913	bSig0 = extractFloat128Frac0( b );
				4914	bExp = extractFloat128Exp( b );
				4915	bSign = extractFloat128Sign( b );
				4916	if ( aExp == 0x7FFF ) {
				4917	if ( ( aSig0 \| aSig1 )
				4918	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				4919	return propagateFloat128NaN( a, b STATUS_VAR );
				4920	}
				4921	goto invalid;
				4922	}
				4923	if ( bExp == 0x7FFF ) {
				4924	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4925	return a;
				4926	}
				4927	if ( bExp == 0 ) {
				4928	if ( ( bSig0 \| bSig1 ) == 0 ) {
				4929	invalid:
				4930	float_raise( float_flag_invalid STATUS_VAR);
				4931	z.low = float128_default_nan_low;
				4932	z.high = float128_default_nan_high;
				4933	return z;
				4934	}
				4935	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				4936	}
				4937	if ( aExp == 0 ) {
				4938	if ( ( aSig0 \| aSig1 ) == 0 ) return a;
				4939	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				4940	}
				4941	expDiff = aExp - bExp;
				4942	if ( expDiff < -1 ) return a;
				4943	shortShift128Left(
				4944	aSig0 \| LIT64( 0x0001000000000000 ),
				4945	aSig1,
				4946	15 - ( expDiff < 0 ),
				4947	&aSig0,
				4948	&aSig1
				4949	);
				4950	shortShift128Left(
				4951	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				4952	q = le128( bSig0, bSig1, aSig0, aSig1 );
				4953	if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
				4954	expDiff -= 64;
				4955	while ( 0 < expDiff ) {
				4956	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				4957	q = ( 4 < q ) ? q - 4 : 0;
				4958	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				4959	shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
				4960	shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
				4961	sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
				4962	expDiff -= 61;
				4963	}
				4964	if ( -64 < expDiff ) {
				4965	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				4966	q = ( 4 < q ) ? q - 4 : 0;
				4967	q >>= - expDiff;
				4968	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				4969	expDiff += 52;
				4970	if ( expDiff < 0 ) {
				4971	shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				4972	}
				4973	else {
				4974	shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
				4975	}
				4976	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				4977	sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
				4978	}
				4979	else {
				4980	shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
				4981	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				4982	}
				4983	do {
				4984	alternateASig0 = aSig0;
				4985	alternateASig1 = aSig1;
				4986	++q;
				4987	sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
				4988	} while ( 0 <= (sbits64) aSig0 );
				4989	add128(
blueswir1	b55266b	2008-09-20 08:07:15 +0000	[diff] [blame]	4990	aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4991	if ( ( sigMean0 < 0 )
				4992	\|\| ( ( ( sigMean0 \| sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
				4993	aSig0 = alternateASig0;
				4994	aSig1 = alternateASig1;
				4995	}
				4996	zSign = ( (sbits64) aSig0 < 0 );
				4997	if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
				4998	return
				4999	normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
				5000
				5001	}
				5002
				5003	/*----------------------------------------------------------------------------
				5004	\| Returns the square root of the quadruple-precision floating-point value `a'.
				5005	\| The operation is performed according to the IEC/IEEE Standard for Binary
				5006	\| Floating-Point Arithmetic.
				5007	----------------------------------------------------------------------------/
				5008
				5009	float128 float128_sqrt( float128 a STATUS_PARAM )
				5010	{
				5011	flag aSign;
				5012	int32 aExp, zExp;
				5013	bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
				5014	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				5015	float128 z;
				5016
				5017	aSig1 = extractFloat128Frac1( a );
				5018	aSig0 = extractFloat128Frac0( a );
				5019	aExp = extractFloat128Exp( a );
				5020	aSign = extractFloat128Sign( a );
				5021	if ( aExp == 0x7FFF ) {
				5022	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
				5023	if ( ! aSign ) return a;
				5024	goto invalid;
				5025	}
				5026	if ( aSign ) {
				5027	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) return a;
				5028	invalid:
				5029	float_raise( float_flag_invalid STATUS_VAR);
				5030	z.low = float128_default_nan_low;
				5031	z.high = float128_default_nan_high;
				5032	return z;
				5033	}
				5034	if ( aExp == 0 ) {
				5035	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
				5036	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				5037	}
				5038	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
				5039	aSig0 \|= LIT64( 0x0001000000000000 );
				5040	zSig0 = estimateSqrt32( aExp, aSig0>>17 );
				5041	shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
				5042	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				5043	doubleZSig0 = zSig0<<1;
				5044	mul64To128( zSig0, zSig0, &term0, &term1 );
				5045	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
				5046	while ( (sbits64) rem0 < 0 ) {
				5047	--zSig0;
				5048	doubleZSig0 -= 2;
				5049	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				5050	}
				5051	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				5052	if ( ( zSig1 & 0x1FFF ) <= 5 ) {
				5053	if ( zSig1 == 0 ) zSig1 = 1;
				5054	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				5055	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				5056	mul64To128( zSig1, zSig1, &term2, &term3 );
				5057	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
				5058	while ( (sbits64) rem1 < 0 ) {
				5059	--zSig1;
				5060	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				5061	term3 \|= 1;
				5062	term2 \|= doubleZSig0;
				5063	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				5064	}
				5065	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				5066	}
				5067	shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
				5068	return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				5069
				5070	}
				5071
				5072	/*----------------------------------------------------------------------------
				5073	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
				5074	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				5075	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5076	----------------------------------------------------------------------------/
				5077
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5078	int float128_eq( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5079	{
				5080
				5081	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5082	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5083	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5084	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5085	) {
				5086	if ( float128_is_signaling_nan( a )
				5087	\|\| float128_is_signaling_nan( b ) ) {
				5088	float_raise( float_flag_invalid STATUS_VAR);
				5089	}
				5090	return 0;
				5091	}
				5092	return
				5093	( a.low == b.low )
				5094	&& ( ( a.high == b.high )
				5095	\|\| ( ( a.low == 0 )
				5096	&& ( (bits64) ( ( a.high \| b.high )<<1 ) == 0 ) )
				5097	);
				5098
				5099	}
				5100
				5101	/*----------------------------------------------------------------------------
				5102	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5103	\| or equal to the corresponding value `b', and 0 otherwise. The comparison
				5104	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5105	\| Arithmetic.
				5106	----------------------------------------------------------------------------/
				5107
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5108	int float128_le( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5109	{
				5110	flag aSign, bSign;
				5111
				5112	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5113	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5114	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5115	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5116	) {
				5117	float_raise( float_flag_invalid STATUS_VAR);
				5118	return 0;
				5119	}
				5120	aSign = extractFloat128Sign( a );
				5121	bSign = extractFloat128Sign( b );
				5122	if ( aSign != bSign ) {
				5123	return
				5124	aSign
				5125	\|\| ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5126	== 0 );
				5127	}
				5128	return
				5129	aSign ? le128( b.high, b.low, a.high, a.low )
				5130	: le128( a.high, a.low, b.high, b.low );
				5131
				5132	}
				5133
				5134	/*----------------------------------------------------------------------------
				5135	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5136	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				5137	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5138	----------------------------------------------------------------------------/
				5139
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5140	int float128_lt( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5141	{
				5142	flag aSign, bSign;
				5143
				5144	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5145	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5146	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5147	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5148	) {
				5149	float_raise( float_flag_invalid STATUS_VAR);
				5150	return 0;
				5151	}
				5152	aSign = extractFloat128Sign( a );
				5153	bSign = extractFloat128Sign( b );
				5154	if ( aSign != bSign ) {
				5155	return
				5156	aSign
				5157	&& ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5158	!= 0 );
				5159	}
				5160	return
				5161	aSign ? lt128( b.high, b.low, a.high, a.low )
				5162	: lt128( a.high, a.low, b.high, b.low );
				5163
				5164	}
				5165
				5166	/*----------------------------------------------------------------------------
				5167	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
				5168	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				5169	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				5170	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5171	----------------------------------------------------------------------------/
				5172
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5173	int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5174	{
				5175
				5176	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5177	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5178	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5179	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5180	) {
				5181	float_raise( float_flag_invalid STATUS_VAR);
				5182	return 0;
				5183	}
				5184	return
				5185	( a.low == b.low )
				5186	&& ( ( a.high == b.high )
				5187	\|\| ( ( a.low == 0 )
				5188	&& ( (bits64) ( ( a.high \| b.high )<<1 ) == 0 ) )
				5189	);
				5190
				5191	}
				5192
				5193	/*----------------------------------------------------------------------------
				5194	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5195	\| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				5196	\| cause an exception. Otherwise, the comparison is performed according to the
				5197	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5198	----------------------------------------------------------------------------/
				5199
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5200	int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5201	{
				5202	flag aSign, bSign;
				5203
				5204	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5205	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5206	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5207	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5208	) {
				5209	if ( float128_is_signaling_nan( a )
				5210	\|\| float128_is_signaling_nan( b ) ) {
				5211	float_raise( float_flag_invalid STATUS_VAR);
				5212	}
				5213	return 0;
				5214	}
				5215	aSign = extractFloat128Sign( a );
				5216	bSign = extractFloat128Sign( b );
				5217	if ( aSign != bSign ) {
				5218	return
				5219	aSign
				5220	\|\| ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5221	== 0 );
				5222	}
				5223	return
				5224	aSign ? le128( b.high, b.low, a.high, a.low )
				5225	: le128( a.high, a.low, b.high, b.low );
				5226
				5227	}
				5228
				5229	/*----------------------------------------------------------------------------
				5230	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5231	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				5232	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				5233	\| Standard for Binary Floating-Point Arithmetic.
				5234	----------------------------------------------------------------------------/
				5235
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5236	int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5237	{
				5238	flag aSign, bSign;
				5239
				5240	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5241	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5242	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5243	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5244	) {
				5245	if ( float128_is_signaling_nan( a )
				5246	\|\| float128_is_signaling_nan( b ) ) {
				5247	float_raise( float_flag_invalid STATUS_VAR);
				5248	}
				5249	return 0;
				5250	}
				5251	aSign = extractFloat128Sign( a );
				5252	bSign = extractFloat128Sign( b );
				5253	if ( aSign != bSign ) {
				5254	return
				5255	aSign
				5256	&& ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5257	!= 0 );
				5258	}
				5259	return
				5260	aSign ? lt128( b.high, b.low, a.high, a.low )
				5261	: lt128( a.high, a.low, b.high, b.low );
				5262
				5263	}
				5264
				5265	#endif
				5266
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5267	/* misc functions */
				5268	float32 uint32_to_float32( unsigned int a STATUS_PARAM )
				5269	{
				5270	return int64_to_float32(a STATUS_VAR);
				5271	}
				5272
				5273	float64 uint32_to_float64( unsigned int a STATUS_PARAM )
				5274	{
				5275	return int64_to_float64(a STATUS_VAR);
				5276	}
				5277
				5278	unsigned int float32_to_uint32( float32 a STATUS_PARAM )
				5279	{
				5280	int64_t v;
				5281	unsigned int res;
				5282
				5283	v = float32_to_int64(a STATUS_VAR);
				5284	if (v < 0) {
				5285	res = 0;
				5286	float_raise( float_flag_invalid STATUS_VAR);
				5287	} else if (v > 0xffffffff) {
				5288	res = 0xffffffff;
				5289	float_raise( float_flag_invalid STATUS_VAR);
				5290	} else {
				5291	res = v;
				5292	}
				5293	return res;
				5294	}
				5295
				5296	unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
				5297	{
				5298	int64_t v;
				5299	unsigned int res;
				5300
				5301	v = float32_to_int64_round_to_zero(a STATUS_VAR);
				5302	if (v < 0) {
				5303	res = 0;
				5304	float_raise( float_flag_invalid STATUS_VAR);
				5305	} else if (v > 0xffffffff) {
				5306	res = 0xffffffff;
				5307	float_raise( float_flag_invalid STATUS_VAR);
				5308	} else {
				5309	res = v;
				5310	}
				5311	return res;
				5312	}
				5313
				5314	unsigned int float64_to_uint32( float64 a STATUS_PARAM )
				5315	{
				5316	int64_t v;
				5317	unsigned int res;
				5318
				5319	v = float64_to_int64(a STATUS_VAR);
				5320	if (v < 0) {
				5321	res = 0;
				5322	float_raise( float_flag_invalid STATUS_VAR);
				5323	} else if (v > 0xffffffff) {
				5324	res = 0xffffffff;
				5325	float_raise( float_flag_invalid STATUS_VAR);
				5326	} else {
				5327	res = v;
				5328	}
				5329	return res;
				5330	}
				5331
				5332	unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
				5333	{
				5334	int64_t v;
				5335	unsigned int res;
				5336
				5337	v = float64_to_int64_round_to_zero(a STATUS_VAR);
				5338	if (v < 0) {
				5339	res = 0;
				5340	float_raise( float_flag_invalid STATUS_VAR);
				5341	} else if (v > 0xffffffff) {
				5342	res = 0xffffffff;
				5343	float_raise( float_flag_invalid STATUS_VAR);
				5344	} else {
				5345	res = v;
				5346	}
				5347	return res;
				5348	}
				5349
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5350	/* FIXME: This looks broken. */
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	5351	uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
				5352	{
				5353	int64_t v;
				5354
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5355	v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
				5356	v += float64_val(a);
				5357	v = float64_to_int64(make_float64(v) STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	5358
				5359	return v - INT64_MIN;
				5360	}
				5361
				5362	uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
				5363	{
				5364	int64_t v;
				5365
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5366	v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
				5367	v += float64_val(a);
				5368	v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	5369
				5370	return v - INT64_MIN;
				5371	}
				5372
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5373	#define COMPARE(s, nan_exp) \
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5374	INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5375	int is_quiet STATUS_PARAM ) \
				5376	{ \
				5377	flag aSign, bSign; \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5378	bits ## s av, bv; \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5379	\
				5380	if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
				5381	extractFloat ## s ## Frac( a ) ) \|\| \
				5382	( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
				5383	extractFloat ## s ## Frac( b ) )) { \
				5384	if (!is_quiet \|\| \
				5385	float ## s ## _is_signaling_nan( a ) \|\| \
				5386	float ## s ## _is_signaling_nan( b ) ) { \
				5387	float_raise( float_flag_invalid STATUS_VAR); \
				5388	} \
				5389	return float_relation_unordered; \
				5390	} \
				5391	aSign = extractFloat ## s ## Sign( a ); \
				5392	bSign = extractFloat ## s ## Sign( b ); \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5393	av = float ## s ## _val(a); \
blueswir1	cd8a253	2007-11-21 18:57:44 +0000	[diff] [blame]	5394	bv = float ## s ## _val(b); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5395	if ( aSign != bSign ) { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5396	if ( (bits ## s) ( ( av \| bv )<<1 ) == 0 ) { \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5397	/* zero case */ \
				5398	return float_relation_equal; \
				5399	} else { \
				5400	return 1 - (2 * aSign); \
				5401	} \
				5402	} else { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5403	if (av == bv) { \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5404	return float_relation_equal; \
				5405	} else { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5406	return 1 - 2 * (aSign ^ ( av < bv )); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5407	} \
				5408	} \
				5409	} \
				5410	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5411	int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5412	{ \
				5413	return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
				5414	} \
				5415	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5416	int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5417	{ \
				5418	return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
				5419	}
				5420
				5421	COMPARE(32, 0xff)
				5422	COMPARE(64, 0x7ff)
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5423
blueswir1	1f58732	2007-11-25 18:40:20 +0000	[diff] [blame]	5424	INLINE int float128_compare_internal( float128 a, float128 b,
				5425	int is_quiet STATUS_PARAM )
				5426	{
				5427	flag aSign, bSign;
				5428
				5429	if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
				5430	( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) ) \|\|
				5431	( ( extractFloat128Exp( b ) == 0x7fff ) &&
				5432	( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )) {
				5433	if (!is_quiet \|\|
				5434	float128_is_signaling_nan( a ) \|\|
				5435	float128_is_signaling_nan( b ) ) {
				5436	float_raise( float_flag_invalid STATUS_VAR);
				5437	}
				5438	return float_relation_unordered;
				5439	}
				5440	aSign = extractFloat128Sign( a );
				5441	bSign = extractFloat128Sign( b );
				5442	if ( aSign != bSign ) {
				5443	if ( ( ( ( a.high \| b.high )<<1 ) \| a.low \| b.low ) == 0 ) {
				5444	/* zero case */
				5445	return float_relation_equal;
				5446	} else {
				5447	return 1 - (2 * aSign);
				5448	}
				5449	} else {
				5450	if (a.low == b.low && a.high == b.high) {
				5451	return float_relation_equal;
				5452	} else {
				5453	return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
				5454	}
				5455	}
				5456	}
				5457
				5458	int float128_compare( float128 a, float128 b STATUS_PARAM )
				5459	{
				5460	return float128_compare_internal(a, b, 0 STATUS_VAR);
				5461	}
				5462
				5463	int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
				5464	{
				5465	return float128_compare_internal(a, b, 1 STATUS_VAR);
				5466	}
				5467
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5468	/* Multiply A by 2 raised to the power N. */
				5469	float32 float32_scalbn( float32 a, int n STATUS_PARAM )
				5470	{
				5471	flag aSign;
				5472	int16 aExp;
				5473	bits32 aSig;
				5474
				5475	aSig = extractFloat32Frac( a );
				5476	aExp = extractFloat32Exp( a );
				5477	aSign = extractFloat32Sign( a );
				5478
				5479	if ( aExp == 0xFF ) {
				5480	return a;
				5481	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame^]	5482	if ( aExp != 0 )
				5483	aSig \|= 0x00800000;
				5484	else if ( aSig == 0 )
				5485	return a;
				5486
				5487	aExp += n - 1;
				5488	aSig <<= 7;
				5489	return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5490	}
				5491
				5492	float64 float64_scalbn( float64 a, int n STATUS_PARAM )
				5493	{
				5494	flag aSign;
				5495	int16 aExp;
				5496	bits64 aSig;
				5497
				5498	aSig = extractFloat64Frac( a );
				5499	aExp = extractFloat64Exp( a );
				5500	aSign = extractFloat64Sign( a );
				5501
				5502	if ( aExp == 0x7FF ) {
				5503	return a;
				5504	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame^]	5505	if ( aExp != 0 )
				5506	aSig \|= LIT64( 0x0010000000000000 );
				5507	else if ( aSig == 0 )
				5508	return a;
				5509
				5510	aExp += n - 1;
				5511	aSig <<= 10;
				5512	return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5513	}
				5514
				5515	#ifdef FLOATX80
				5516	floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
				5517	{
				5518	flag aSign;
				5519	int16 aExp;
				5520	bits64 aSig;
				5521
				5522	aSig = extractFloatx80Frac( a );
				5523	aExp = extractFloatx80Exp( a );
				5524	aSign = extractFloatx80Sign( a );
				5525
				5526	if ( aExp == 0x7FF ) {
				5527	return a;
				5528	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame^]	5529	if (aExp == 0 && aSig == 0)
				5530	return a;
				5531
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5532	aExp += n;
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame^]	5533	return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
				5534	aSign, aExp, aSig, 0 STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5535	}
				5536	#endif
				5537
				5538	#ifdef FLOAT128
				5539	float128 float128_scalbn( float128 a, int n STATUS_PARAM )
				5540	{
				5541	flag aSign;
				5542	int32 aExp;
				5543	bits64 aSig0, aSig1;
				5544
				5545	aSig1 = extractFloat128Frac1( a );
				5546	aSig0 = extractFloat128Frac0( a );
				5547	aExp = extractFloat128Exp( a );
				5548	aSign = extractFloat128Sign( a );
				5549	if ( aExp == 0x7FFF ) {
				5550	return a;
				5551	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame^]	5552	if ( aExp != 0 )
				5553	aSig0 \|= LIT64( 0x0001000000000000 );
				5554	else if ( aSig0 == 0 && aSig1 == 0 )
				5555	return a;
				5556
				5557	aExp += n - 1;
				5558	return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
				5559	STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5560
				5561	}
				5562	#endif