Blame - fpu/softfloat.c - people/peter.maydell/qemu-arm.git

blob: 8ebb69264830862602f27f8a2e7fcbe1a0e61c29 [file] [log] [blame]

bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1
				2	/*============================================================================
				3
				4	This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
				5	Package, Release 2b.
				6
				7	Written by John R. Hauser. This work was made possible in part by the
				8	International Computer Science Institute, located at Suite 600, 1947 Center
				9	Street, Berkeley, California 94704. Funding was partially provided by the
				10	National Science Foundation under grant MIP-9311980. The original version
				11	of this code was written as part of a project to build a fixed-point vector
				12	processor in collaboration with the University of California at Berkeley,
				13	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				14	is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
				15	arithmetic/SoftFloat.html'.
				16
				17	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
				18	been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
				19	RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
				20	AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
				21	COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
				22	EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
				23	INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
				24	OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
				25
				26	Derivative works are acceptable, even for commercial purposes, so long as
				27	(1) the source code for the derivative work includes prominent notice that
				28	the work is derivative, and (2) the source code includes prominent notice with
				29	these four paragraphs for those parts of this code that are retained.
				30
				31	=============================================================================*/
				32
				33	#include "softfloat.h"
				34
				35	/*----------------------------------------------------------------------------
				36	\| Primitive arithmetic functions, including multi-word arithmetic, and
				37	\| division and square root approximations. (Can be specialized to target if
				38	\| desired.)
				39	----------------------------------------------------------------------------/
				40	#include "softfloat-macros.h"
				41
				42	/*----------------------------------------------------------------------------
				43	\| Functions and definitions to determine: (1) whether tininess for underflow
				44	\| is detected before or after rounding by default, (2) what (if anything)
				45	\| happens when exceptions are raised, (3) how signaling NaNs are distinguished
				46	\| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
				47	\| are propagated from function inputs to output. These details are target-
				48	\| specific.
				49	----------------------------------------------------------------------------/
				50	#include "softfloat-specialize.h"
				51
				52	void set_float_rounding_mode(int val STATUS_PARAM)
				53	{
				54	STATUS(float_rounding_mode) = val;
				55	}
				56
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	57	void set_float_exception_flags(int val STATUS_PARAM)
				58	{
				59	STATUS(float_exception_flags) = val;
				60	}
				61
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	62	#ifdef FLOATX80
				63	void set_floatx80_rounding_precision(int val STATUS_PARAM)
				64	{
				65	STATUS(floatx80_rounding_precision) = val;
				66	}
				67	#endif
				68
				69	/*----------------------------------------------------------------------------
				70	\| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
				71	\| and 7, and returns the properly rounded 32-bit integer corresponding to the
				72	\| input. If `zSign' is 1, the input is negated before being converted to an
				73	\| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
				74	\| is simply rounded to an integer, with the inexact exception raised if the
				75	\| input cannot be represented exactly as an integer. However, if the fixed-
				76	\| point input is too large, the invalid exception is raised and the largest
				77	\| positive or negative integer is returned.
				78	----------------------------------------------------------------------------/
				79
				80	static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
				81	{
				82	int8 roundingMode;
				83	flag roundNearestEven;
				84	int8 roundIncrement, roundBits;
				85	int32 z;
				86
				87	roundingMode = STATUS(float_rounding_mode);
				88	roundNearestEven = ( roundingMode == float_round_nearest_even );
				89	roundIncrement = 0x40;
				90	if ( ! roundNearestEven ) {
				91	if ( roundingMode == float_round_to_zero ) {
				92	roundIncrement = 0;
				93	}
				94	else {
				95	roundIncrement = 0x7F;
				96	if ( zSign ) {
				97	if ( roundingMode == float_round_up ) roundIncrement = 0;
				98	}
				99	else {
				100	if ( roundingMode == float_round_down ) roundIncrement = 0;
				101	}
				102	}
				103	}
				104	roundBits = absZ & 0x7F;
				105	absZ = ( absZ + roundIncrement )>>7;
				106	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				107	z = absZ;
				108	if ( zSign ) z = - z;
				109	if ( ( absZ>>32 ) \|\| ( z && ( ( z < 0 ) ^ zSign ) ) ) {
				110	float_raise( float_flag_invalid STATUS_VAR);
				111	return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				112	}
				113	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				114	return z;
				115
				116	}
				117
				118	/*----------------------------------------------------------------------------
				119	\| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
				120	\| `absZ1', with binary point between bits 63 and 64 (between the input words),
				121	\| and returns the properly rounded 64-bit integer corresponding to the input.
				122	\| If `zSign' is 1, the input is negated before being converted to an integer.
				123	\| Ordinarily, the fixed-point input is simply rounded to an integer, with
				124	\| the inexact exception raised if the input cannot be represented exactly as
				125	\| an integer. However, if the fixed-point input is too large, the invalid
				126	\| exception is raised and the largest positive or negative integer is
				127	\| returned.
				128	----------------------------------------------------------------------------/
				129
				130	static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
				131	{
				132	int8 roundingMode;
				133	flag roundNearestEven, increment;
				134	int64 z;
				135
				136	roundingMode = STATUS(float_rounding_mode);
				137	roundNearestEven = ( roundingMode == float_round_nearest_even );
				138	increment = ( (sbits64) absZ1 < 0 );
				139	if ( ! roundNearestEven ) {
				140	if ( roundingMode == float_round_to_zero ) {
				141	increment = 0;
				142	}
				143	else {
				144	if ( zSign ) {
				145	increment = ( roundingMode == float_round_down ) && absZ1;
				146	}
				147	else {
				148	increment = ( roundingMode == float_round_up ) && absZ1;
				149	}
				150	}
				151	}
				152	if ( increment ) {
				153	++absZ0;
				154	if ( absZ0 == 0 ) goto overflow;
				155	absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
				156	}
				157	z = absZ0;
				158	if ( zSign ) z = - z;
				159	if ( z && ( ( z < 0 ) ^ zSign ) ) {
				160	overflow:
				161	float_raise( float_flag_invalid STATUS_VAR);
				162	return
				163	zSign ? (sbits64) LIT64( 0x8000000000000000 )
				164	: LIT64( 0x7FFFFFFFFFFFFFFF );
				165	}
				166	if ( absZ1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				167	return z;
				168
				169	}
				170
				171	/*----------------------------------------------------------------------------
				172	\| Returns the fraction bits of the single-precision floating-point value `a'.
				173	----------------------------------------------------------------------------/
				174
				175	INLINE bits32 extractFloat32Frac( float32 a )
				176	{
				177
				178	return a & 0x007FFFFF;
				179
				180	}
				181
				182	/*----------------------------------------------------------------------------
				183	\| Returns the exponent bits of the single-precision floating-point value `a'.
				184	----------------------------------------------------------------------------/
				185
				186	INLINE int16 extractFloat32Exp( float32 a )
				187	{
				188
				189	return ( a>>23 ) & 0xFF;
				190
				191	}
				192
				193	/*----------------------------------------------------------------------------
				194	\| Returns the sign bit of the single-precision floating-point value `a'.
				195	----------------------------------------------------------------------------/
				196
				197	INLINE flag extractFloat32Sign( float32 a )
				198	{
				199
				200	return a>>31;
				201
				202	}
				203
				204	/*----------------------------------------------------------------------------
				205	\| Normalizes the subnormal single-precision floating-point value represented
				206	\| by the denormalized significand `aSig'. The normalized exponent and
				207	\| significand are stored at the locations pointed to by `zExpPtr' and
				208	\| `zSigPtr', respectively.
				209	----------------------------------------------------------------------------/
				210
				211	static void
				212	normalizeFloat32Subnormal( bits32 aSig, int16 zExpPtr, bits32 zSigPtr )
				213	{
				214	int8 shiftCount;
				215
				216	shiftCount = countLeadingZeros32( aSig ) - 8;
				217	*zSigPtr = aSig<<shiftCount;
				218	*zExpPtr = 1 - shiftCount;
				219
				220	}
				221
				222	/*----------------------------------------------------------------------------
				223	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				224	\| single-precision floating-point value, returning the result. After being
				225	\| shifted into the proper positions, the three fields are simply added
				226	\| together to form the result. This means that any integer portion of `zSig'
				227	\| will be added into the exponent. Since a properly normalized significand
				228	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				229	\| than the desired result exponent whenever `zSig' is a complete, normalized
				230	\| significand.
				231	----------------------------------------------------------------------------/
				232
				233	INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
				234	{
				235
				236	return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
				237
				238	}
				239
				240	/*----------------------------------------------------------------------------
				241	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				242	\| and significand `zSig', and returns the proper single-precision floating-
				243	\| point value corresponding to the abstract input. Ordinarily, the abstract
				244	\| value is simply rounded and packed into the single-precision format, with
				245	\| the inexact exception raised if the abstract input cannot be represented
				246	\| exactly. However, if the abstract value is too large, the overflow and
				247	\| inexact exceptions are raised and an infinity or maximal finite value is
				248	\| returned. If the abstract value is too small, the input value is rounded to
				249	\| a subnormal number, and the underflow and inexact exceptions are raised if
				250	\| the abstract input cannot be represented exactly as a subnormal single-
				251	\| precision floating-point number.
				252	\| The input significand `zSig' has its binary point between bits 30
				253	\| and 29, which is 7 bits to the left of the usual location. This shifted
				254	\| significand must be normalized or smaller. If `zSig' is not normalized,
				255	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				256	\| and it must not require rounding. In the usual case that `zSig' is
				257	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				258	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				259	\| Binary Floating-Point Arithmetic.
				260	----------------------------------------------------------------------------/
				261
				262	static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
				263	{
				264	int8 roundingMode;
				265	flag roundNearestEven;
				266	int8 roundIncrement, roundBits;
				267	flag isTiny;
				268
				269	roundingMode = STATUS(float_rounding_mode);
				270	roundNearestEven = ( roundingMode == float_round_nearest_even );
				271	roundIncrement = 0x40;
				272	if ( ! roundNearestEven ) {
				273	if ( roundingMode == float_round_to_zero ) {
				274	roundIncrement = 0;
				275	}
				276	else {
				277	roundIncrement = 0x7F;
				278	if ( zSign ) {
				279	if ( roundingMode == float_round_up ) roundIncrement = 0;
				280	}
				281	else {
				282	if ( roundingMode == float_round_down ) roundIncrement = 0;
				283	}
				284	}
				285	}
				286	roundBits = zSig & 0x7F;
				287	if ( 0xFD <= (bits16) zExp ) {
				288	if ( ( 0xFD < zExp )
				289	\|\| ( ( zExp == 0xFD )
				290	&& ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
				291	) {
				292	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				293	return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
				294	}
				295	if ( zExp < 0 ) {
				296	isTiny =
				297	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				298	\|\| ( zExp < -1 )
				299	\|\| ( zSig + roundIncrement < 0x80000000 );
				300	shift32RightJamming( zSig, - zExp, &zSig );
				301	zExp = 0;
				302	roundBits = zSig & 0x7F;
				303	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				304	}
				305	}
				306	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				307	zSig = ( zSig + roundIncrement )>>7;
				308	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				309	if ( zSig == 0 ) zExp = 0;
				310	return packFloat32( zSign, zExp, zSig );
				311
				312	}
				313
				314	/*----------------------------------------------------------------------------
				315	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				316	\| and significand `zSig', and returns the proper single-precision floating-
				317	\| point value corresponding to the abstract input. This routine is just like
				318	\| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
				319	\| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				320	\| floating-point exponent.
				321	----------------------------------------------------------------------------/
				322
				323	static float32
				324	normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
				325	{
				326	int8 shiftCount;
				327
				328	shiftCount = countLeadingZeros32( zSig ) - 1;
				329	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				330
				331	}
				332
				333	/*----------------------------------------------------------------------------
				334	\| Returns the fraction bits of the double-precision floating-point value `a'.
				335	----------------------------------------------------------------------------/
				336
				337	INLINE bits64 extractFloat64Frac( float64 a )
				338	{
				339
				340	return a & LIT64( 0x000FFFFFFFFFFFFF );
				341
				342	}
				343
				344	/*----------------------------------------------------------------------------
				345	\| Returns the exponent bits of the double-precision floating-point value `a'.
				346	----------------------------------------------------------------------------/
				347
				348	INLINE int16 extractFloat64Exp( float64 a )
				349	{
				350
				351	return ( a>>52 ) & 0x7FF;
				352
				353	}
				354
				355	/*----------------------------------------------------------------------------
				356	\| Returns the sign bit of the double-precision floating-point value `a'.
				357	----------------------------------------------------------------------------/
				358
				359	INLINE flag extractFloat64Sign( float64 a )
				360	{
				361
				362	return a>>63;
				363
				364	}
				365
				366	/*----------------------------------------------------------------------------
				367	\| Normalizes the subnormal double-precision floating-point value represented
				368	\| by the denormalized significand `aSig'. The normalized exponent and
				369	\| significand are stored at the locations pointed to by `zExpPtr' and
				370	\| `zSigPtr', respectively.
				371	----------------------------------------------------------------------------/
				372
				373	static void
				374	normalizeFloat64Subnormal( bits64 aSig, int16 zExpPtr, bits64 zSigPtr )
				375	{
				376	int8 shiftCount;
				377
				378	shiftCount = countLeadingZeros64( aSig ) - 11;
				379	*zSigPtr = aSig<<shiftCount;
				380	*zExpPtr = 1 - shiftCount;
				381
				382	}
				383
				384	/*----------------------------------------------------------------------------
				385	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				386	\| double-precision floating-point value, returning the result. After being
				387	\| shifted into the proper positions, the three fields are simply added
				388	\| together to form the result. This means that any integer portion of `zSig'
				389	\| will be added into the exponent. Since a properly normalized significand
				390	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				391	\| than the desired result exponent whenever `zSig' is a complete, normalized
				392	\| significand.
				393	----------------------------------------------------------------------------/
				394
				395	INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
				396	{
				397
				398	return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
				399
				400	}
				401
				402	/*----------------------------------------------------------------------------
				403	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				404	\| and significand `zSig', and returns the proper double-precision floating-
				405	\| point value corresponding to the abstract input. Ordinarily, the abstract
				406	\| value is simply rounded and packed into the double-precision format, with
				407	\| the inexact exception raised if the abstract input cannot be represented
				408	\| exactly. However, if the abstract value is too large, the overflow and
				409	\| inexact exceptions are raised and an infinity or maximal finite value is
				410	\| returned. If the abstract value is too small, the input value is rounded
				411	\| to a subnormal number, and the underflow and inexact exceptions are raised
				412	\| if the abstract input cannot be represented exactly as a subnormal double-
				413	\| precision floating-point number.
				414	\| The input significand `zSig' has its binary point between bits 62
				415	\| and 61, which is 10 bits to the left of the usual location. This shifted
				416	\| significand must be normalized or smaller. If `zSig' is not normalized,
				417	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				418	\| and it must not require rounding. In the usual case that `zSig' is
				419	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				420	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				421	\| Binary Floating-Point Arithmetic.
				422	----------------------------------------------------------------------------/
				423
				424	static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
				425	{
				426	int8 roundingMode;
				427	flag roundNearestEven;
				428	int16 roundIncrement, roundBits;
				429	flag isTiny;
				430
				431	roundingMode = STATUS(float_rounding_mode);
				432	roundNearestEven = ( roundingMode == float_round_nearest_even );
				433	roundIncrement = 0x200;
				434	if ( ! roundNearestEven ) {
				435	if ( roundingMode == float_round_to_zero ) {
				436	roundIncrement = 0;
				437	}
				438	else {
				439	roundIncrement = 0x3FF;
				440	if ( zSign ) {
				441	if ( roundingMode == float_round_up ) roundIncrement = 0;
				442	}
				443	else {
				444	if ( roundingMode == float_round_down ) roundIncrement = 0;
				445	}
				446	}
				447	}
				448	roundBits = zSig & 0x3FF;
				449	if ( 0x7FD <= (bits16) zExp ) {
				450	if ( ( 0x7FD < zExp )
				451	\|\| ( ( zExp == 0x7FD )
				452	&& ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
				453	) {
				454	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				455	return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
				456	}
				457	if ( zExp < 0 ) {
				458	isTiny =
				459	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				460	\|\| ( zExp < -1 )
				461	\|\| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
				462	shift64RightJamming( zSig, - zExp, &zSig );
				463	zExp = 0;
				464	roundBits = zSig & 0x3FF;
				465	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				466	}
				467	}
				468	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				469	zSig = ( zSig + roundIncrement )>>10;
				470	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
				471	if ( zSig == 0 ) zExp = 0;
				472	return packFloat64( zSign, zExp, zSig );
				473
				474	}
				475
				476	/*----------------------------------------------------------------------------
				477	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				478	\| and significand `zSig', and returns the proper double-precision floating-
				479	\| point value corresponding to the abstract input. This routine is just like
				480	\| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
				481	\| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				482	\| floating-point exponent.
				483	----------------------------------------------------------------------------/
				484
				485	static float64
				486	normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
				487	{
				488	int8 shiftCount;
				489
				490	shiftCount = countLeadingZeros64( zSig ) - 1;
				491	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				492
				493	}
				494
				495	#ifdef FLOATX80
				496
				497	/*----------------------------------------------------------------------------
				498	\| Returns the fraction bits of the extended double-precision floating-point
				499	\| value `a'.
				500	----------------------------------------------------------------------------/
				501
				502	INLINE bits64 extractFloatx80Frac( floatx80 a )
				503	{
				504
				505	return a.low;
				506
				507	}
				508
				509	/*----------------------------------------------------------------------------
				510	\| Returns the exponent bits of the extended double-precision floating-point
				511	\| value `a'.
				512	----------------------------------------------------------------------------/
				513
				514	INLINE int32 extractFloatx80Exp( floatx80 a )
				515	{
				516
				517	return a.high & 0x7FFF;
				518
				519	}
				520
				521	/*----------------------------------------------------------------------------
				522	\| Returns the sign bit of the extended double-precision floating-point value
				523	\| `a'.
				524	----------------------------------------------------------------------------/
				525
				526	INLINE flag extractFloatx80Sign( floatx80 a )
				527	{
				528
				529	return a.high>>15;
				530
				531	}
				532
				533	/*----------------------------------------------------------------------------
				534	\| Normalizes the subnormal extended double-precision floating-point value
				535	\| represented by the denormalized significand `aSig'. The normalized exponent
				536	\| and significand are stored at the locations pointed to by `zExpPtr' and
				537	\| `zSigPtr', respectively.
				538	----------------------------------------------------------------------------/
				539
				540	static void
				541	normalizeFloatx80Subnormal( bits64 aSig, int32 zExpPtr, bits64 zSigPtr )
				542	{
				543	int8 shiftCount;
				544
				545	shiftCount = countLeadingZeros64( aSig );
				546	*zSigPtr = aSig<<shiftCount;
				547	*zExpPtr = 1 - shiftCount;
				548
				549	}
				550
				551	/*----------------------------------------------------------------------------
				552	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
				553	\| extended double-precision floating-point value, returning the result.
				554	----------------------------------------------------------------------------/
				555
				556	INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
				557	{
				558	floatx80 z;
				559
				560	z.low = zSig;
				561	z.high = ( ( (bits16) zSign )<<15 ) + zExp;
				562	return z;
				563
				564	}
				565
				566	/*----------------------------------------------------------------------------
				567	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				568	\| and extended significand formed by the concatenation of `zSig0' and `zSig1',
				569	\| and returns the proper extended double-precision floating-point value
				570	\| corresponding to the abstract input. Ordinarily, the abstract value is
				571	\| rounded and packed into the extended double-precision format, with the
				572	\| inexact exception raised if the abstract input cannot be represented
				573	\| exactly. However, if the abstract value is too large, the overflow and
				574	\| inexact exceptions are raised and an infinity or maximal finite value is
				575	\| returned. If the abstract value is too small, the input value is rounded to
				576	\| a subnormal number, and the underflow and inexact exceptions are raised if
				577	\| the abstract input cannot be represented exactly as a subnormal extended
				578	\| double-precision floating-point number.
				579	\| If `roundingPrecision' is 32 or 64, the result is rounded to the same
				580	\| number of bits as single or double precision, respectively. Otherwise, the
				581	\| result is rounded to the full precision of the extended double-precision
				582	\| format.
				583	\| The input significand must be normalized or smaller. If the input
				584	\| significand is not normalized, `zExp' must be 0; in that case, the result
				585	\| returned is a subnormal number, and it must not require rounding. The
				586	\| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
				587	\| Floating-Point Arithmetic.
				588	----------------------------------------------------------------------------/
				589
				590	static floatx80
				591	roundAndPackFloatx80(
				592	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				593	STATUS_PARAM)
				594	{
				595	int8 roundingMode;
				596	flag roundNearestEven, increment, isTiny;
				597	int64 roundIncrement, roundMask, roundBits;
				598
				599	roundingMode = STATUS(float_rounding_mode);
				600	roundNearestEven = ( roundingMode == float_round_nearest_even );
				601	if ( roundingPrecision == 80 ) goto precision80;
				602	if ( roundingPrecision == 64 ) {
				603	roundIncrement = LIT64( 0x0000000000000400 );
				604	roundMask = LIT64( 0x00000000000007FF );
				605	}
				606	else if ( roundingPrecision == 32 ) {
				607	roundIncrement = LIT64( 0x0000008000000000 );
				608	roundMask = LIT64( 0x000000FFFFFFFFFF );
				609	}
				610	else {
				611	goto precision80;
				612	}
				613	zSig0 \|= ( zSig1 != 0 );
				614	if ( ! roundNearestEven ) {
				615	if ( roundingMode == float_round_to_zero ) {
				616	roundIncrement = 0;
				617	}
				618	else {
				619	roundIncrement = roundMask;
				620	if ( zSign ) {
				621	if ( roundingMode == float_round_up ) roundIncrement = 0;
				622	}
				623	else {
				624	if ( roundingMode == float_round_down ) roundIncrement = 0;
				625	}
				626	}
				627	}
				628	roundBits = zSig0 & roundMask;
				629	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				630	if ( ( 0x7FFE < zExp )
				631	\|\| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
				632	) {
				633	goto overflow;
				634	}
				635	if ( zExp <= 0 ) {
				636	isTiny =
				637	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				638	\|\| ( zExp < 0 )
				639	\|\| ( zSig0 <= zSig0 + roundIncrement );
				640	shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
				641	zExp = 0;
				642	roundBits = zSig0 & roundMask;
				643	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				644	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				645	zSig0 += roundIncrement;
				646	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				647	roundIncrement = roundMask + 1;
				648	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				649	roundMask \|= roundIncrement;
				650	}
				651	zSig0 &= ~ roundMask;
				652	return packFloatx80( zSign, zExp, zSig0 );
				653	}
				654	}
				655	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				656	zSig0 += roundIncrement;
				657	if ( zSig0 < roundIncrement ) {
				658	++zExp;
				659	zSig0 = LIT64( 0x8000000000000000 );
				660	}
				661	roundIncrement = roundMask + 1;
				662	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				663	roundMask \|= roundIncrement;
				664	}
				665	zSig0 &= ~ roundMask;
				666	if ( zSig0 == 0 ) zExp = 0;
				667	return packFloatx80( zSign, zExp, zSig0 );
				668	precision80:
				669	increment = ( (sbits64) zSig1 < 0 );
				670	if ( ! roundNearestEven ) {
				671	if ( roundingMode == float_round_to_zero ) {
				672	increment = 0;
				673	}
				674	else {
				675	if ( zSign ) {
				676	increment = ( roundingMode == float_round_down ) && zSig1;
				677	}
				678	else {
				679	increment = ( roundingMode == float_round_up ) && zSig1;
				680	}
				681	}
				682	}
				683	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				684	if ( ( 0x7FFE < zExp )
				685	\|\| ( ( zExp == 0x7FFE )
				686	&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
				687	&& increment
				688	)
				689	) {
				690	roundMask = 0;
				691	overflow:
				692	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				693	if ( ( roundingMode == float_round_to_zero )
				694	\|\| ( zSign && ( roundingMode == float_round_up ) )
				695	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				696	) {
				697	return packFloatx80( zSign, 0x7FFE, ~ roundMask );
				698	}
				699	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				700	}
				701	if ( zExp <= 0 ) {
				702	isTiny =
				703	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				704	\|\| ( zExp < 0 )
				705	\|\| ! increment
				706	\|\| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
				707	shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
				708	zExp = 0;
				709	if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
				710	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				711	if ( roundNearestEven ) {
				712	increment = ( (sbits64) zSig1 < 0 );
				713	}
				714	else {
				715	if ( zSign ) {
				716	increment = ( roundingMode == float_round_down ) && zSig1;
				717	}
				718	else {
				719	increment = ( roundingMode == float_round_up ) && zSig1;
				720	}
				721	}
				722	if ( increment ) {
				723	++zSig0;
				724	zSig0 &=
				725	~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
				726	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				727	}
				728	return packFloatx80( zSign, zExp, zSig0 );
				729	}
				730	}
				731	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				732	if ( increment ) {
				733	++zSig0;
				734	if ( zSig0 == 0 ) {
				735	++zExp;
				736	zSig0 = LIT64( 0x8000000000000000 );
				737	}
				738	else {
				739	zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
				740	}
				741	}
				742	else {
				743	if ( zSig0 == 0 ) zExp = 0;
				744	}
				745	return packFloatx80( zSign, zExp, zSig0 );
				746
				747	}
				748
				749	/*----------------------------------------------------------------------------
				750	\| Takes an abstract floating-point value having sign `zSign', exponent
				751	\| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
				752	\| and returns the proper extended double-precision floating-point value
				753	\| corresponding to the abstract input. This routine is just like
				754	\| `roundAndPackFloatx80' except that the input significand does not have to be
				755	\| normalized.
				756	----------------------------------------------------------------------------/
				757
				758	static floatx80
				759	normalizeRoundAndPackFloatx80(
				760	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				761	STATUS_PARAM)
				762	{
				763	int8 shiftCount;
				764
				765	if ( zSig0 == 0 ) {
				766	zSig0 = zSig1;
				767	zSig1 = 0;
				768	zExp -= 64;
				769	}
				770	shiftCount = countLeadingZeros64( zSig0 );
				771	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				772	zExp -= shiftCount;
				773	return
				774	roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
				775
				776	}
				777
				778	#endif
				779
				780	#ifdef FLOAT128
				781
				782	/*----------------------------------------------------------------------------
				783	\| Returns the least-significant 64 fraction bits of the quadruple-precision
				784	\| floating-point value `a'.
				785	----------------------------------------------------------------------------/
				786
				787	INLINE bits64 extractFloat128Frac1( float128 a )
				788	{
				789
				790	return a.low;
				791
				792	}
				793
				794	/*----------------------------------------------------------------------------
				795	\| Returns the most-significant 48 fraction bits of the quadruple-precision
				796	\| floating-point value `a'.
				797	----------------------------------------------------------------------------/
				798
				799	INLINE bits64 extractFloat128Frac0( float128 a )
				800	{
				801
				802	return a.high & LIT64( 0x0000FFFFFFFFFFFF );
				803
				804	}
				805
				806	/*----------------------------------------------------------------------------
				807	\| Returns the exponent bits of the quadruple-precision floating-point value
				808	\| `a'.
				809	----------------------------------------------------------------------------/
				810
				811	INLINE int32 extractFloat128Exp( float128 a )
				812	{
				813
				814	return ( a.high>>48 ) & 0x7FFF;
				815
				816	}
				817
				818	/*----------------------------------------------------------------------------
				819	\| Returns the sign bit of the quadruple-precision floating-point value `a'.
				820	----------------------------------------------------------------------------/
				821
				822	INLINE flag extractFloat128Sign( float128 a )
				823	{
				824
				825	return a.high>>63;
				826
				827	}
				828
				829	/*----------------------------------------------------------------------------
				830	\| Normalizes the subnormal quadruple-precision floating-point value
				831	\| represented by the denormalized significand formed by the concatenation of
				832	\| `aSig0' and `aSig1'. The normalized exponent is stored at the location
				833	\| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
				834	\| significand are stored at the location pointed to by `zSig0Ptr', and the
				835	\| least significant 64 bits of the normalized significand are stored at the
				836	\| location pointed to by `zSig1Ptr'.
				837	----------------------------------------------------------------------------/
				838
				839	static void
				840	normalizeFloat128Subnormal(
				841	bits64 aSig0,
				842	bits64 aSig1,
				843	int32 *zExpPtr,
				844	bits64 *zSig0Ptr,
				845	bits64 *zSig1Ptr
				846	)
				847	{
				848	int8 shiftCount;
				849
				850	if ( aSig0 == 0 ) {
				851	shiftCount = countLeadingZeros64( aSig1 ) - 15;
				852	if ( shiftCount < 0 ) {
				853	*zSig0Ptr = aSig1>>( - shiftCount );
				854	*zSig1Ptr = aSig1<<( shiftCount & 63 );
				855	}
				856	else {
				857	*zSig0Ptr = aSig1<<shiftCount;
				858	*zSig1Ptr = 0;
				859	}
				860	*zExpPtr = - shiftCount - 63;
				861	}
				862	else {
				863	shiftCount = countLeadingZeros64( aSig0 ) - 15;
				864	shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
				865	*zExpPtr = 1 - shiftCount;
				866	}
				867
				868	}
				869
				870	/*----------------------------------------------------------------------------
				871	\| Packs the sign `zSign', the exponent `zExp', and the significand formed
				872	\| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
				873	\| floating-point value, returning the result. After being shifted into the
				874	\| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
				875	\| added together to form the most significant 32 bits of the result. This
				876	\| means that any integer portion of `zSig0' will be added into the exponent.
				877	\| Since a properly normalized significand will have an integer portion equal
				878	\| to 1, the `zExp' input should be 1 less than the desired result exponent
				879	\| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
				880	\| significand.
				881	----------------------------------------------------------------------------/
				882
				883	INLINE float128
				884	packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
				885	{
				886	float128 z;
				887
				888	z.low = zSig1;
				889	z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
				890	return z;
				891
				892	}
				893
				894	/*----------------------------------------------------------------------------
				895	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				896	\| and extended significand formed by the concatenation of `zSig0', `zSig1',
				897	\| and `zSig2', and returns the proper quadruple-precision floating-point value
				898	\| corresponding to the abstract input. Ordinarily, the abstract value is
				899	\| simply rounded and packed into the quadruple-precision format, with the
				900	\| inexact exception raised if the abstract input cannot be represented
				901	\| exactly. However, if the abstract value is too large, the overflow and
				902	\| inexact exceptions are raised and an infinity or maximal finite value is
				903	\| returned. If the abstract value is too small, the input value is rounded to
				904	\| a subnormal number, and the underflow and inexact exceptions are raised if
				905	\| the abstract input cannot be represented exactly as a subnormal quadruple-
				906	\| precision floating-point number.
				907	\| The input significand must be normalized or smaller. If the input
				908	\| significand is not normalized, `zExp' must be 0; in that case, the result
				909	\| returned is a subnormal number, and it must not require rounding. In the
				910	\| usual case that the input significand is normalized, `zExp' must be 1 less
				911	\| than the ``true'' floating-point exponent. The handling of underflow and
				912	\| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				913	----------------------------------------------------------------------------/
				914
				915	static float128
				916	roundAndPackFloat128(
				917	flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
				918	{
				919	int8 roundingMode;
				920	flag roundNearestEven, increment, isTiny;
				921
				922	roundingMode = STATUS(float_rounding_mode);
				923	roundNearestEven = ( roundingMode == float_round_nearest_even );
				924	increment = ( (sbits64) zSig2 < 0 );
				925	if ( ! roundNearestEven ) {
				926	if ( roundingMode == float_round_to_zero ) {
				927	increment = 0;
				928	}
				929	else {
				930	if ( zSign ) {
				931	increment = ( roundingMode == float_round_down ) && zSig2;
				932	}
				933	else {
				934	increment = ( roundingMode == float_round_up ) && zSig2;
				935	}
				936	}
				937	}
				938	if ( 0x7FFD <= (bits32) zExp ) {
				939	if ( ( 0x7FFD < zExp )
				940	\|\| ( ( zExp == 0x7FFD )
				941	&& eq128(
				942	LIT64( 0x0001FFFFFFFFFFFF ),
				943	LIT64( 0xFFFFFFFFFFFFFFFF ),
				944	zSig0,
				945	zSig1
				946	)
				947	&& increment
				948	)
				949	) {
				950	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				951	if ( ( roundingMode == float_round_to_zero )
				952	\|\| ( zSign && ( roundingMode == float_round_up ) )
				953	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				954	) {
				955	return
				956	packFloat128(
				957	zSign,
				958	0x7FFE,
				959	LIT64( 0x0000FFFFFFFFFFFF ),
				960	LIT64( 0xFFFFFFFFFFFFFFFF )
				961	);
				962	}
				963	return packFloat128( zSign, 0x7FFF, 0, 0 );
				964	}
				965	if ( zExp < 0 ) {
				966	isTiny =
				967	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				968	\|\| ( zExp < -1 )
				969	\|\| ! increment
				970	\|\| lt128(
				971	zSig0,
				972	zSig1,
				973	LIT64( 0x0001FFFFFFFFFFFF ),
				974	LIT64( 0xFFFFFFFFFFFFFFFF )
				975	);
				976	shift128ExtraRightJamming(
				977	zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
				978	zExp = 0;
				979	if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
				980	if ( roundNearestEven ) {
				981	increment = ( (sbits64) zSig2 < 0 );
				982	}
				983	else {
				984	if ( zSign ) {
				985	increment = ( roundingMode == float_round_down ) && zSig2;
				986	}
				987	else {
				988	increment = ( roundingMode == float_round_up ) && zSig2;
				989	}
				990	}
				991	}
				992	}
				993	if ( zSig2 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				994	if ( increment ) {
				995	add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
				996	zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
				997	}
				998	else {
				999	if ( ( zSig0 \| zSig1 ) == 0 ) zExp = 0;
				1000	}
				1001	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1002
				1003	}
				1004
				1005	/*----------------------------------------------------------------------------
				1006	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				1007	\| and significand formed by the concatenation of `zSig0' and `zSig1', and
				1008	\| returns the proper quadruple-precision floating-point value corresponding
				1009	\| to the abstract input. This routine is just like `roundAndPackFloat128'
				1010	\| except that the input significand has fewer bits and does not have to be
				1011	\| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
				1012	\| point exponent.
				1013	----------------------------------------------------------------------------/
				1014
				1015	static float128
				1016	normalizeRoundAndPackFloat128(
				1017	flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
				1018	{
				1019	int8 shiftCount;
				1020	bits64 zSig2;
				1021
				1022	if ( zSig0 == 0 ) {
				1023	zSig0 = zSig1;
				1024	zSig1 = 0;
				1025	zExp -= 64;
				1026	}
				1027	shiftCount = countLeadingZeros64( zSig0 ) - 15;
				1028	if ( 0 <= shiftCount ) {
				1029	zSig2 = 0;
				1030	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1031	}
				1032	else {
				1033	shift128ExtraRightJamming(
				1034	zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
				1035	}
				1036	zExp -= shiftCount;
				1037	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
				1038
				1039	}
				1040
				1041	#endif
				1042
				1043	/*----------------------------------------------------------------------------
				1044	\| Returns the result of converting the 32-bit two's complement integer `a'
				1045	\| to the single-precision floating-point format. The conversion is performed
				1046	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1047	----------------------------------------------------------------------------/
				1048
				1049	float32 int32_to_float32( int32 a STATUS_PARAM )
				1050	{
				1051	flag zSign;
				1052
				1053	if ( a == 0 ) return 0;
				1054	if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
				1055	zSign = ( a < 0 );
				1056	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
				1057
				1058	}
				1059
				1060	/*----------------------------------------------------------------------------
				1061	\| Returns the result of converting the 32-bit two's complement integer `a'
				1062	\| to the double-precision floating-point format. The conversion is performed
				1063	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1064	----------------------------------------------------------------------------/
				1065
				1066	float64 int32_to_float64( int32 a STATUS_PARAM )
				1067	{
				1068	flag zSign;
				1069	uint32 absA;
				1070	int8 shiftCount;
				1071	bits64 zSig;
				1072
				1073	if ( a == 0 ) return 0;
				1074	zSign = ( a < 0 );
				1075	absA = zSign ? - a : a;
				1076	shiftCount = countLeadingZeros32( absA ) + 21;
				1077	zSig = absA;
				1078	return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
				1079
				1080	}
				1081
				1082	#ifdef FLOATX80
				1083
				1084	/*----------------------------------------------------------------------------
				1085	\| Returns the result of converting the 32-bit two's complement integer `a'
				1086	\| to the extended double-precision floating-point format. The conversion
				1087	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1088	\| Arithmetic.
				1089	----------------------------------------------------------------------------/
				1090
				1091	floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
				1092	{
				1093	flag zSign;
				1094	uint32 absA;
				1095	int8 shiftCount;
				1096	bits64 zSig;
				1097
				1098	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1099	zSign = ( a < 0 );
				1100	absA = zSign ? - a : a;
				1101	shiftCount = countLeadingZeros32( absA ) + 32;
				1102	zSig = absA;
				1103	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
				1104
				1105	}
				1106
				1107	#endif
				1108
				1109	#ifdef FLOAT128
				1110
				1111	/*----------------------------------------------------------------------------
				1112	\| Returns the result of converting the 32-bit two's complement integer `a' to
				1113	\| the quadruple-precision floating-point format. The conversion is performed
				1114	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1115	----------------------------------------------------------------------------/
				1116
				1117	float128 int32_to_float128( int32 a STATUS_PARAM )
				1118	{
				1119	flag zSign;
				1120	uint32 absA;
				1121	int8 shiftCount;
				1122	bits64 zSig0;
				1123
				1124	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1125	zSign = ( a < 0 );
				1126	absA = zSign ? - a : a;
				1127	shiftCount = countLeadingZeros32( absA ) + 17;
				1128	zSig0 = absA;
				1129	return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
				1130
				1131	}
				1132
				1133	#endif
				1134
				1135	/*----------------------------------------------------------------------------
				1136	\| Returns the result of converting the 64-bit two's complement integer `a'
				1137	\| to the single-precision floating-point format. The conversion is performed
				1138	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1139	----------------------------------------------------------------------------/
				1140
				1141	float32 int64_to_float32( int64 a STATUS_PARAM )
				1142	{
				1143	flag zSign;
				1144	uint64 absA;
				1145	int8 shiftCount;
				1146
				1147	if ( a == 0 ) return 0;
				1148	zSign = ( a < 0 );
				1149	absA = zSign ? - a : a;
				1150	shiftCount = countLeadingZeros64( absA ) - 40;
				1151	if ( 0 <= shiftCount ) {
				1152	return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
				1153	}
				1154	else {
				1155	shiftCount += 7;
				1156	if ( shiftCount < 0 ) {
				1157	shift64RightJamming( absA, - shiftCount, &absA );
				1158	}
				1159	else {
				1160	absA <<= shiftCount;
				1161	}
				1162	return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
				1163	}
				1164
				1165	}
				1166
j_mayer	3430b0b	2007-03-20 22:25:37 +0000	[diff] [blame]	1167	float32 uint64_to_float32( uint64 a STATUS_PARAM )
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1168	{
				1169	int8 shiftCount;
				1170
				1171	if ( a == 0 ) return 0;
				1172	shiftCount = countLeadingZeros64( a ) - 40;
				1173	if ( 0 <= shiftCount ) {
				1174	return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
				1175	}
				1176	else {
				1177	shiftCount += 7;
				1178	if ( shiftCount < 0 ) {
				1179	shift64RightJamming( a, - shiftCount, &a );
				1180	}
				1181	else {
				1182	a <<= shiftCount;
				1183	}
				1184	return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
				1185	}
				1186	}
				1187
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1188	/*----------------------------------------------------------------------------
				1189	\| Returns the result of converting the 64-bit two's complement integer `a'
				1190	\| to the double-precision floating-point format. The conversion is performed
				1191	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1192	----------------------------------------------------------------------------/
				1193
				1194	float64 int64_to_float64( int64 a STATUS_PARAM )
				1195	{
				1196	flag zSign;
				1197
				1198	if ( a == 0 ) return 0;
				1199	if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
				1200	return packFloat64( 1, 0x43E, 0 );
				1201	}
				1202	zSign = ( a < 0 );
				1203	return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
				1204
				1205	}
				1206
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1207	float64 uint64_to_float64( uint64 a STATUS_PARAM )
				1208	{
				1209	if ( a == 0 ) return 0;
				1210	return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
				1211
				1212	}
				1213
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1214	#ifdef FLOATX80
				1215
				1216	/*----------------------------------------------------------------------------
				1217	\| Returns the result of converting the 64-bit two's complement integer `a'
				1218	\| to the extended double-precision floating-point format. The conversion
				1219	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1220	\| Arithmetic.
				1221	----------------------------------------------------------------------------/
				1222
				1223	floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
				1224	{
				1225	flag zSign;
				1226	uint64 absA;
				1227	int8 shiftCount;
				1228
				1229	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1230	zSign = ( a < 0 );
				1231	absA = zSign ? - a : a;
				1232	shiftCount = countLeadingZeros64( absA );
				1233	return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
				1234
				1235	}
				1236
				1237	#endif
				1238
				1239	#ifdef FLOAT128
				1240
				1241	/*----------------------------------------------------------------------------
				1242	\| Returns the result of converting the 64-bit two's complement integer `a' to
				1243	\| the quadruple-precision floating-point format. The conversion is performed
				1244	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1245	----------------------------------------------------------------------------/
				1246
				1247	float128 int64_to_float128( int64 a STATUS_PARAM )
				1248	{
				1249	flag zSign;
				1250	uint64 absA;
				1251	int8 shiftCount;
				1252	int32 zExp;
				1253	bits64 zSig0, zSig1;
				1254
				1255	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1256	zSign = ( a < 0 );
				1257	absA = zSign ? - a : a;
				1258	shiftCount = countLeadingZeros64( absA ) + 49;
				1259	zExp = 0x406E - shiftCount;
				1260	if ( 64 <= shiftCount ) {
				1261	zSig1 = 0;
				1262	zSig0 = absA;
				1263	shiftCount -= 64;
				1264	}
				1265	else {
				1266	zSig1 = absA;
				1267	zSig0 = 0;
				1268	}
				1269	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1270	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1271
				1272	}
				1273
				1274	#endif
				1275
				1276	/*----------------------------------------------------------------------------
				1277	\| Returns the result of converting the single-precision floating-point value
				1278	\| `a' to the 32-bit two's complement integer format. The conversion is
				1279	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1280	\| Arithmetic---which means in particular that the conversion is rounded
				1281	\| according to the current rounding mode. If `a' is a NaN, the largest
				1282	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1283	\| largest integer with the same sign as `a' is returned.
				1284	----------------------------------------------------------------------------/
				1285
				1286	int32 float32_to_int32( float32 a STATUS_PARAM )
				1287	{
				1288	flag aSign;
				1289	int16 aExp, shiftCount;
				1290	bits32 aSig;
				1291	bits64 aSig64;
				1292
				1293	aSig = extractFloat32Frac( a );
				1294	aExp = extractFloat32Exp( a );
				1295	aSign = extractFloat32Sign( a );
				1296	if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
				1297	if ( aExp ) aSig \|= 0x00800000;
				1298	shiftCount = 0xAF - aExp;
				1299	aSig64 = aSig;
				1300	aSig64 <<= 32;
				1301	if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
				1302	return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
				1303
				1304	}
				1305
				1306	/*----------------------------------------------------------------------------
				1307	\| Returns the result of converting the single-precision floating-point value
				1308	\| `a' to the 32-bit two's complement integer format. The conversion is
				1309	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1310	\| Arithmetic, except that the conversion is always rounded toward zero.
				1311	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				1312	\| the conversion overflows, the largest integer with the same sign as `a' is
				1313	\| returned.
				1314	----------------------------------------------------------------------------/
				1315
				1316	int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
				1317	{
				1318	flag aSign;
				1319	int16 aExp, shiftCount;
				1320	bits32 aSig;
				1321	int32 z;
				1322
				1323	aSig = extractFloat32Frac( a );
				1324	aExp = extractFloat32Exp( a );
				1325	aSign = extractFloat32Sign( a );
				1326	shiftCount = aExp - 0x9E;
				1327	if ( 0 <= shiftCount ) {
				1328	if ( a != 0xCF000000 ) {
				1329	float_raise( float_flag_invalid STATUS_VAR);
				1330	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
				1331	}
				1332	return (sbits32) 0x80000000;
				1333	}
				1334	else if ( aExp <= 0x7E ) {
				1335	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1336	return 0;
				1337	}
				1338	aSig = ( aSig \| 0x00800000 )<<8;
				1339	z = aSig>>( - shiftCount );
				1340	if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
				1341	STATUS(float_exception_flags) \|= float_flag_inexact;
				1342	}
				1343	if ( aSign ) z = - z;
				1344	return z;
				1345
				1346	}
				1347
				1348	/*----------------------------------------------------------------------------
				1349	\| Returns the result of converting the single-precision floating-point value
				1350	\| `a' to the 64-bit two's complement integer format. The conversion is
				1351	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1352	\| Arithmetic---which means in particular that the conversion is rounded
				1353	\| according to the current rounding mode. If `a' is a NaN, the largest
				1354	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1355	\| largest integer with the same sign as `a' is returned.
				1356	----------------------------------------------------------------------------/
				1357
				1358	int64 float32_to_int64( float32 a STATUS_PARAM )
				1359	{
				1360	flag aSign;
				1361	int16 aExp, shiftCount;
				1362	bits32 aSig;
				1363	bits64 aSig64, aSigExtra;
				1364
				1365	aSig = extractFloat32Frac( a );
				1366	aExp = extractFloat32Exp( a );
				1367	aSign = extractFloat32Sign( a );
				1368	shiftCount = 0xBE - aExp;
				1369	if ( shiftCount < 0 ) {
				1370	float_raise( float_flag_invalid STATUS_VAR);
				1371	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1372	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1373	}
				1374	return (sbits64) LIT64( 0x8000000000000000 );
				1375	}
				1376	if ( aExp ) aSig \|= 0x00800000;
				1377	aSig64 = aSig;
				1378	aSig64 <<= 40;
				1379	shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
				1380	return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
				1381
				1382	}
				1383
				1384	/*----------------------------------------------------------------------------
				1385	\| Returns the result of converting the single-precision floating-point value
				1386	\| `a' to the 64-bit two's complement integer format. The conversion is
				1387	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1388	\| Arithmetic, except that the conversion is always rounded toward zero. If
				1389	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1390	\| conversion overflows, the largest integer with the same sign as `a' is
				1391	\| returned.
				1392	----------------------------------------------------------------------------/
				1393
				1394	int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
				1395	{
				1396	flag aSign;
				1397	int16 aExp, shiftCount;
				1398	bits32 aSig;
				1399	bits64 aSig64;
				1400	int64 z;
				1401
				1402	aSig = extractFloat32Frac( a );
				1403	aExp = extractFloat32Exp( a );
				1404	aSign = extractFloat32Sign( a );
				1405	shiftCount = aExp - 0xBE;
				1406	if ( 0 <= shiftCount ) {
				1407	if ( a != 0xDF000000 ) {
				1408	float_raise( float_flag_invalid STATUS_VAR);
				1409	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1410	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1411	}
				1412	}
				1413	return (sbits64) LIT64( 0x8000000000000000 );
				1414	}
				1415	else if ( aExp <= 0x7E ) {
				1416	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1417	return 0;
				1418	}
				1419	aSig64 = aSig \| 0x00800000;
				1420	aSig64 <<= 40;
				1421	z = aSig64>>( - shiftCount );
				1422	if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
				1423	STATUS(float_exception_flags) \|= float_flag_inexact;
				1424	}
				1425	if ( aSign ) z = - z;
				1426	return z;
				1427
				1428	}
				1429
				1430	/*----------------------------------------------------------------------------
				1431	\| Returns the result of converting the single-precision floating-point value
				1432	\| `a' to the double-precision floating-point format. The conversion is
				1433	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1434	\| Arithmetic.
				1435	----------------------------------------------------------------------------/
				1436
				1437	float64 float32_to_float64( float32 a STATUS_PARAM )
				1438	{
				1439	flag aSign;
				1440	int16 aExp;
				1441	bits32 aSig;
				1442
				1443	aSig = extractFloat32Frac( a );
				1444	aExp = extractFloat32Exp( a );
				1445	aSign = extractFloat32Sign( a );
				1446	if ( aExp == 0xFF ) {
				1447	if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
				1448	return packFloat64( aSign, 0x7FF, 0 );
				1449	}
				1450	if ( aExp == 0 ) {
				1451	if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
				1452	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1453	--aExp;
				1454	}
				1455	return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
				1456
				1457	}
				1458
				1459	#ifdef FLOATX80
				1460
				1461	/*----------------------------------------------------------------------------
				1462	\| Returns the result of converting the single-precision floating-point value
				1463	\| `a' to the extended double-precision floating-point format. The conversion
				1464	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1465	\| Arithmetic.
				1466	----------------------------------------------------------------------------/
				1467
				1468	floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
				1469	{
				1470	flag aSign;
				1471	int16 aExp;
				1472	bits32 aSig;
				1473
				1474	aSig = extractFloat32Frac( a );
				1475	aExp = extractFloat32Exp( a );
				1476	aSign = extractFloat32Sign( a );
				1477	if ( aExp == 0xFF ) {
				1478	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
				1479	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				1480	}
				1481	if ( aExp == 0 ) {
				1482	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				1483	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1484	}
				1485	aSig \|= 0x00800000;
				1486	return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
				1487
				1488	}
				1489
				1490	#endif
				1491
				1492	#ifdef FLOAT128
				1493
				1494	/*----------------------------------------------------------------------------
				1495	\| Returns the result of converting the single-precision floating-point value
				1496	\| `a' to the double-precision floating-point format. The conversion is
				1497	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1498	\| Arithmetic.
				1499	----------------------------------------------------------------------------/
				1500
				1501	float128 float32_to_float128( float32 a STATUS_PARAM )
				1502	{
				1503	flag aSign;
				1504	int16 aExp;
				1505	bits32 aSig;
				1506
				1507	aSig = extractFloat32Frac( a );
				1508	aExp = extractFloat32Exp( a );
				1509	aSign = extractFloat32Sign( a );
				1510	if ( aExp == 0xFF ) {
				1511	if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
				1512	return packFloat128( aSign, 0x7FFF, 0, 0 );
				1513	}
				1514	if ( aExp == 0 ) {
				1515	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				1516	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1517	--aExp;
				1518	}
				1519	return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
				1520
				1521	}
				1522
				1523	#endif
				1524
				1525	/*----------------------------------------------------------------------------
				1526	\| Rounds the single-precision floating-point value `a' to an integer, and
				1527	\| returns the result as a single-precision floating-point value. The
				1528	\| operation is performed according to the IEC/IEEE Standard for Binary
				1529	\| Floating-Point Arithmetic.
				1530	----------------------------------------------------------------------------/
				1531
				1532	float32 float32_round_to_int( float32 a STATUS_PARAM)
				1533	{
				1534	flag aSign;
				1535	int16 aExp;
				1536	bits32 lastBitMask, roundBitsMask;
				1537	int8 roundingMode;
				1538	float32 z;
				1539
				1540	aExp = extractFloat32Exp( a );
				1541	if ( 0x96 <= aExp ) {
				1542	if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
				1543	return propagateFloat32NaN( a, a STATUS_VAR );
				1544	}
				1545	return a;
				1546	}
				1547	if ( aExp <= 0x7E ) {
				1548	if ( (bits32) ( a<<1 ) == 0 ) return a;
				1549	STATUS(float_exception_flags) \|= float_flag_inexact;
				1550	aSign = extractFloat32Sign( a );
				1551	switch ( STATUS(float_rounding_mode) ) {
				1552	case float_round_nearest_even:
				1553	if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
				1554	return packFloat32( aSign, 0x7F, 0 );
				1555	}
				1556	break;
				1557	case float_round_down:
				1558	return aSign ? 0xBF800000 : 0;
				1559	case float_round_up:
				1560	return aSign ? 0x80000000 : 0x3F800000;
				1561	}
				1562	return packFloat32( aSign, 0, 0 );
				1563	}
				1564	lastBitMask = 1;
				1565	lastBitMask <<= 0x96 - aExp;
				1566	roundBitsMask = lastBitMask - 1;
				1567	z = a;
				1568	roundingMode = STATUS(float_rounding_mode);
				1569	if ( roundingMode == float_round_nearest_even ) {
				1570	z += lastBitMask>>1;
				1571	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				1572	}
				1573	else if ( roundingMode != float_round_to_zero ) {
				1574	if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				1575	z += roundBitsMask;
				1576	}
				1577	}
				1578	z &= ~ roundBitsMask;
				1579	if ( z != a ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1580	return z;
				1581
				1582	}
				1583
				1584	/*----------------------------------------------------------------------------
				1585	\| Returns the result of adding the absolute values of the single-precision
				1586	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				1587	\| before being returned. `zSign' is ignored if the result is a NaN.
				1588	\| The addition is performed according to the IEC/IEEE Standard for Binary
				1589	\| Floating-Point Arithmetic.
				1590	----------------------------------------------------------------------------/
				1591
				1592	static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1593	{
				1594	int16 aExp, bExp, zExp;
				1595	bits32 aSig, bSig, zSig;
				1596	int16 expDiff;
				1597
				1598	aSig = extractFloat32Frac( a );
				1599	aExp = extractFloat32Exp( a );
				1600	bSig = extractFloat32Frac( b );
				1601	bExp = extractFloat32Exp( b );
				1602	expDiff = aExp - bExp;
				1603	aSig <<= 6;
				1604	bSig <<= 6;
				1605	if ( 0 < expDiff ) {
				1606	if ( aExp == 0xFF ) {
				1607	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1608	return a;
				1609	}
				1610	if ( bExp == 0 ) {
				1611	--expDiff;
				1612	}
				1613	else {
				1614	bSig \|= 0x20000000;
				1615	}
				1616	shift32RightJamming( bSig, expDiff, &bSig );
				1617	zExp = aExp;
				1618	}
				1619	else if ( expDiff < 0 ) {
				1620	if ( bExp == 0xFF ) {
				1621	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1622	return packFloat32( zSign, 0xFF, 0 );
				1623	}
				1624	if ( aExp == 0 ) {
				1625	++expDiff;
				1626	}
				1627	else {
				1628	aSig \|= 0x20000000;
				1629	}
				1630	shift32RightJamming( aSig, - expDiff, &aSig );
				1631	zExp = bExp;
				1632	}
				1633	else {
				1634	if ( aExp == 0xFF ) {
				1635	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1636	return a;
				1637	}
				1638	if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
				1639	zSig = 0x40000000 + aSig + bSig;
				1640	zExp = aExp;
				1641	goto roundAndPack;
				1642	}
				1643	aSig \|= 0x20000000;
				1644	zSig = ( aSig + bSig )<<1;
				1645	--zExp;
				1646	if ( (sbits32) zSig < 0 ) {
				1647	zSig = aSig + bSig;
				1648	++zExp;
				1649	}
				1650	roundAndPack:
				1651	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1652
				1653	}
				1654
				1655	/*----------------------------------------------------------------------------
				1656	\| Returns the result of subtracting the absolute values of the single-
				1657	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				1658	\| difference is negated before being returned. `zSign' is ignored if the
				1659	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				1660	\| Standard for Binary Floating-Point Arithmetic.
				1661	----------------------------------------------------------------------------/
				1662
				1663	static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1664	{
				1665	int16 aExp, bExp, zExp;
				1666	bits32 aSig, bSig, zSig;
				1667	int16 expDiff;
				1668
				1669	aSig = extractFloat32Frac( a );
				1670	aExp = extractFloat32Exp( a );
				1671	bSig = extractFloat32Frac( b );
				1672	bExp = extractFloat32Exp( b );
				1673	expDiff = aExp - bExp;
				1674	aSig <<= 7;
				1675	bSig <<= 7;
				1676	if ( 0 < expDiff ) goto aExpBigger;
				1677	if ( expDiff < 0 ) goto bExpBigger;
				1678	if ( aExp == 0xFF ) {
				1679	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1680	float_raise( float_flag_invalid STATUS_VAR);
				1681	return float32_default_nan;
				1682	}
				1683	if ( aExp == 0 ) {
				1684	aExp = 1;
				1685	bExp = 1;
				1686	}
				1687	if ( bSig < aSig ) goto aBigger;
				1688	if ( aSig < bSig ) goto bBigger;
				1689	return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				1690	bExpBigger:
				1691	if ( bExp == 0xFF ) {
				1692	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1693	return packFloat32( zSign ^ 1, 0xFF, 0 );
				1694	}
				1695	if ( aExp == 0 ) {
				1696	++expDiff;
				1697	}
				1698	else {
				1699	aSig \|= 0x40000000;
				1700	}
				1701	shift32RightJamming( aSig, - expDiff, &aSig );
				1702	bSig \|= 0x40000000;
				1703	bBigger:
				1704	zSig = bSig - aSig;
				1705	zExp = bExp;
				1706	zSign ^= 1;
				1707	goto normalizeRoundAndPack;
				1708	aExpBigger:
				1709	if ( aExp == 0xFF ) {
				1710	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1711	return a;
				1712	}
				1713	if ( bExp == 0 ) {
				1714	--expDiff;
				1715	}
				1716	else {
				1717	bSig \|= 0x40000000;
				1718	}
				1719	shift32RightJamming( bSig, expDiff, &bSig );
				1720	aSig \|= 0x40000000;
				1721	aBigger:
				1722	zSig = aSig - bSig;
				1723	zExp = aExp;
				1724	normalizeRoundAndPack:
				1725	--zExp;
				1726	return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1727
				1728	}
				1729
				1730	/*----------------------------------------------------------------------------
				1731	\| Returns the result of adding the single-precision floating-point values `a'
				1732	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				1733	\| Binary Floating-Point Arithmetic.
				1734	----------------------------------------------------------------------------/
				1735
				1736	float32 float32_add( float32 a, float32 b STATUS_PARAM )
				1737	{
				1738	flag aSign, bSign;
				1739
				1740	aSign = extractFloat32Sign( a );
				1741	bSign = extractFloat32Sign( b );
				1742	if ( aSign == bSign ) {
				1743	return addFloat32Sigs( a, b, aSign STATUS_VAR);
				1744	}
				1745	else {
				1746	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				1747	}
				1748
				1749	}
				1750
				1751	/*----------------------------------------------------------------------------
				1752	\| Returns the result of subtracting the single-precision floating-point values
				1753	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1754	\| for Binary Floating-Point Arithmetic.
				1755	----------------------------------------------------------------------------/
				1756
				1757	float32 float32_sub( float32 a, float32 b STATUS_PARAM )
				1758	{
				1759	flag aSign, bSign;
				1760
				1761	aSign = extractFloat32Sign( a );
				1762	bSign = extractFloat32Sign( b );
				1763	if ( aSign == bSign ) {
				1764	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				1765	}
				1766	else {
				1767	return addFloat32Sigs( a, b, aSign STATUS_VAR );
				1768	}
				1769
				1770	}
				1771
				1772	/*----------------------------------------------------------------------------
				1773	\| Returns the result of multiplying the single-precision floating-point values
				1774	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1775	\| for Binary Floating-Point Arithmetic.
				1776	----------------------------------------------------------------------------/
				1777
				1778	float32 float32_mul( float32 a, float32 b STATUS_PARAM )
				1779	{
				1780	flag aSign, bSign, zSign;
				1781	int16 aExp, bExp, zExp;
				1782	bits32 aSig, bSig;
				1783	bits64 zSig64;
				1784	bits32 zSig;
				1785
				1786	aSig = extractFloat32Frac( a );
				1787	aExp = extractFloat32Exp( a );
				1788	aSign = extractFloat32Sign( a );
				1789	bSig = extractFloat32Frac( b );
				1790	bExp = extractFloat32Exp( b );
				1791	bSign = extractFloat32Sign( b );
				1792	zSign = aSign ^ bSign;
				1793	if ( aExp == 0xFF ) {
				1794	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1795	return propagateFloat32NaN( a, b STATUS_VAR );
				1796	}
				1797	if ( ( bExp \| bSig ) == 0 ) {
				1798	float_raise( float_flag_invalid STATUS_VAR);
				1799	return float32_default_nan;
				1800	}
				1801	return packFloat32( zSign, 0xFF, 0 );
				1802	}
				1803	if ( bExp == 0xFF ) {
				1804	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1805	if ( ( aExp \| aSig ) == 0 ) {
				1806	float_raise( float_flag_invalid STATUS_VAR);
				1807	return float32_default_nan;
				1808	}
				1809	return packFloat32( zSign, 0xFF, 0 );
				1810	}
				1811	if ( aExp == 0 ) {
				1812	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1813	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1814	}
				1815	if ( bExp == 0 ) {
				1816	if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
				1817	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1818	}
				1819	zExp = aExp + bExp - 0x7F;
				1820	aSig = ( aSig \| 0x00800000 )<<7;
				1821	bSig = ( bSig \| 0x00800000 )<<8;
				1822	shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
				1823	zSig = zSig64;
				1824	if ( 0 <= (sbits32) ( zSig<<1 ) ) {
				1825	zSig <<= 1;
				1826	--zExp;
				1827	}
				1828	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1829
				1830	}
				1831
				1832	/*----------------------------------------------------------------------------
				1833	\| Returns the result of dividing the single-precision floating-point value `a'
				1834	\| by the corresponding value `b'. The operation is performed according to the
				1835	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1836	----------------------------------------------------------------------------/
				1837
				1838	float32 float32_div( float32 a, float32 b STATUS_PARAM )
				1839	{
				1840	flag aSign, bSign, zSign;
				1841	int16 aExp, bExp, zExp;
				1842	bits32 aSig, bSig, zSig;
				1843
				1844	aSig = extractFloat32Frac( a );
				1845	aExp = extractFloat32Exp( a );
				1846	aSign = extractFloat32Sign( a );
				1847	bSig = extractFloat32Frac( b );
				1848	bExp = extractFloat32Exp( b );
				1849	bSign = extractFloat32Sign( b );
				1850	zSign = aSign ^ bSign;
				1851	if ( aExp == 0xFF ) {
				1852	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1853	if ( bExp == 0xFF ) {
				1854	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1855	float_raise( float_flag_invalid STATUS_VAR);
				1856	return float32_default_nan;
				1857	}
				1858	return packFloat32( zSign, 0xFF, 0 );
				1859	}
				1860	if ( bExp == 0xFF ) {
				1861	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1862	return packFloat32( zSign, 0, 0 );
				1863	}
				1864	if ( bExp == 0 ) {
				1865	if ( bSig == 0 ) {
				1866	if ( ( aExp \| aSig ) == 0 ) {
				1867	float_raise( float_flag_invalid STATUS_VAR);
				1868	return float32_default_nan;
				1869	}
				1870	float_raise( float_flag_divbyzero STATUS_VAR);
				1871	return packFloat32( zSign, 0xFF, 0 );
				1872	}
				1873	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1874	}
				1875	if ( aExp == 0 ) {
				1876	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1877	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1878	}
				1879	zExp = aExp - bExp + 0x7D;
				1880	aSig = ( aSig \| 0x00800000 )<<7;
				1881	bSig = ( bSig \| 0x00800000 )<<8;
				1882	if ( bSig <= ( aSig + aSig ) ) {
				1883	aSig >>= 1;
				1884	++zExp;
				1885	}
				1886	zSig = ( ( (bits64) aSig )<<32 ) / bSig;
				1887	if ( ( zSig & 0x3F ) == 0 ) {
				1888	zSig \|= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
				1889	}
				1890	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1891
				1892	}
				1893
				1894	/*----------------------------------------------------------------------------
				1895	\| Returns the remainder of the single-precision floating-point value `a'
				1896	\| with respect to the corresponding value `b'. The operation is performed
				1897	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1898	----------------------------------------------------------------------------/
				1899
				1900	float32 float32_rem( float32 a, float32 b STATUS_PARAM )
				1901	{
				1902	flag aSign, bSign, zSign;
				1903	int16 aExp, bExp, expDiff;
				1904	bits32 aSig, bSig;
				1905	bits32 q;
				1906	bits64 aSig64, bSig64, q64;
				1907	bits32 alternateASig;
				1908	sbits32 sigMean;
				1909
				1910	aSig = extractFloat32Frac( a );
				1911	aExp = extractFloat32Exp( a );
				1912	aSign = extractFloat32Sign( a );
				1913	bSig = extractFloat32Frac( b );
				1914	bExp = extractFloat32Exp( b );
				1915	bSign = extractFloat32Sign( b );
				1916	if ( aExp == 0xFF ) {
				1917	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1918	return propagateFloat32NaN( a, b STATUS_VAR );
				1919	}
				1920	float_raise( float_flag_invalid STATUS_VAR);
				1921	return float32_default_nan;
				1922	}
				1923	if ( bExp == 0xFF ) {
				1924	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1925	return a;
				1926	}
				1927	if ( bExp == 0 ) {
				1928	if ( bSig == 0 ) {
				1929	float_raise( float_flag_invalid STATUS_VAR);
				1930	return float32_default_nan;
				1931	}
				1932	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1933	}
				1934	if ( aExp == 0 ) {
				1935	if ( aSig == 0 ) return a;
				1936	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1937	}
				1938	expDiff = aExp - bExp;
				1939	aSig \|= 0x00800000;
				1940	bSig \|= 0x00800000;
				1941	if ( expDiff < 32 ) {
				1942	aSig <<= 8;
				1943	bSig <<= 8;
				1944	if ( expDiff < 0 ) {
				1945	if ( expDiff < -1 ) return a;
				1946	aSig >>= 1;
				1947	}
				1948	q = ( bSig <= aSig );
				1949	if ( q ) aSig -= bSig;
				1950	if ( 0 < expDiff ) {
				1951	q = ( ( (bits64) aSig )<<32 ) / bSig;
				1952	q >>= 32 - expDiff;
				1953	bSig >>= 2;
				1954	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				1955	}
				1956	else {
				1957	aSig >>= 2;
				1958	bSig >>= 2;
				1959	}
				1960	}
				1961	else {
				1962	if ( bSig <= aSig ) aSig -= bSig;
				1963	aSig64 = ( (bits64) aSig )<<40;
				1964	bSig64 = ( (bits64) bSig )<<40;
				1965	expDiff -= 64;
				1966	while ( 0 < expDiff ) {
				1967	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1968	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1969	aSig64 = - ( ( bSig * q64 )<<38 );
				1970	expDiff -= 62;
				1971	}
				1972	expDiff += 64;
				1973	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1974	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1975	q = q64>>( 64 - expDiff );
				1976	bSig <<= 6;
				1977	aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
				1978	}
				1979	do {
				1980	alternateASig = aSig;
				1981	++q;
				1982	aSig -= bSig;
				1983	} while ( 0 <= (sbits32) aSig );
				1984	sigMean = aSig + alternateASig;
				1985	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				1986	aSig = alternateASig;
				1987	}
				1988	zSign = ( (sbits32) aSig < 0 );
				1989	if ( zSign ) aSig = - aSig;
				1990	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
				1991
				1992	}
				1993
				1994	/*----------------------------------------------------------------------------
				1995	\| Returns the square root of the single-precision floating-point value `a'.
				1996	\| The operation is performed according to the IEC/IEEE Standard for Binary
				1997	\| Floating-Point Arithmetic.
				1998	----------------------------------------------------------------------------/
				1999
				2000	float32 float32_sqrt( float32 a STATUS_PARAM )
				2001	{
				2002	flag aSign;
				2003	int16 aExp, zExp;
				2004	bits32 aSig, zSig;
				2005	bits64 rem, term;
				2006
				2007	aSig = extractFloat32Frac( a );
				2008	aExp = extractFloat32Exp( a );
				2009	aSign = extractFloat32Sign( a );
				2010	if ( aExp == 0xFF ) {
				2011	if ( aSig ) return propagateFloat32NaN( a, 0 STATUS_VAR );
				2012	if ( ! aSign ) return a;
				2013	float_raise( float_flag_invalid STATUS_VAR);
				2014	return float32_default_nan;
				2015	}
				2016	if ( aSign ) {
				2017	if ( ( aExp \| aSig ) == 0 ) return a;
				2018	float_raise( float_flag_invalid STATUS_VAR);
				2019	return float32_default_nan;
				2020	}
				2021	if ( aExp == 0 ) {
				2022	if ( aSig == 0 ) return 0;
				2023	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2024	}
				2025	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
				2026	aSig = ( aSig \| 0x00800000 )<<8;
				2027	zSig = estimateSqrt32( aExp, aSig ) + 2;
				2028	if ( ( zSig & 0x7F ) <= 5 ) {
				2029	if ( zSig < 2 ) {
				2030	zSig = 0x7FFFFFFF;
				2031	goto roundAndPack;
				2032	}
				2033	aSig >>= aExp & 1;
				2034	term = ( (bits64) zSig ) * zSig;
				2035	rem = ( ( (bits64) aSig )<<32 ) - term;
				2036	while ( (sbits64) rem < 0 ) {
				2037	--zSig;
				2038	rem += ( ( (bits64) zSig )<<1 ) \| 1;
				2039	}
				2040	zSig \|= ( rem != 0 );
				2041	}
				2042	shift32RightJamming( zSig, 1, &zSig );
				2043	roundAndPack:
				2044	return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
				2045
				2046	}
				2047
				2048	/*----------------------------------------------------------------------------
				2049	\| Returns 1 if the single-precision floating-point value `a' is equal to
				2050	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				2051	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2052	----------------------------------------------------------------------------/
				2053
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2054	int float32_eq( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2055	{
				2056
				2057	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2058	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2059	) {
				2060	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2061	float_raise( float_flag_invalid STATUS_VAR);
				2062	}
				2063	return 0;
				2064	}
				2065	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				2066
				2067	}
				2068
				2069	/*----------------------------------------------------------------------------
				2070	\| Returns 1 if the single-precision floating-point value `a' is less than
				2071	\| or equal to the corresponding value `b', and 0 otherwise. The comparison
				2072	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				2073	\| Arithmetic.
				2074	----------------------------------------------------------------------------/
				2075
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2076	int float32_le( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2077	{
				2078	flag aSign, bSign;
				2079
				2080	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2081	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2082	) {
				2083	float_raise( float_flag_invalid STATUS_VAR);
				2084	return 0;
				2085	}
				2086	aSign = extractFloat32Sign( a );
				2087	bSign = extractFloat32Sign( b );
				2088	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				2089	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				2090
				2091	}
				2092
				2093	/*----------------------------------------------------------------------------
				2094	\| Returns 1 if the single-precision floating-point value `a' is less than
				2095	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				2096	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2097	----------------------------------------------------------------------------/
				2098
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2099	int float32_lt( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2100	{
				2101	flag aSign, bSign;
				2102
				2103	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2104	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2105	) {
				2106	float_raise( float_flag_invalid STATUS_VAR);
				2107	return 0;
				2108	}
				2109	aSign = extractFloat32Sign( a );
				2110	bSign = extractFloat32Sign( b );
				2111	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<1 ) != 0 );
				2112	return ( a != b ) && ( aSign ^ ( a < b ) );
				2113
				2114	}
				2115
				2116	/*----------------------------------------------------------------------------
				2117	\| Returns 1 if the single-precision floating-point value `a' is equal to
				2118	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				2119	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				2120	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2121	----------------------------------------------------------------------------/
				2122
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2123	int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2124	{
				2125
				2126	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2127	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2128	) {
				2129	float_raise( float_flag_invalid STATUS_VAR);
				2130	return 0;
				2131	}
				2132	return ( a == b ) \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				2133
				2134	}
				2135
				2136	/*----------------------------------------------------------------------------
				2137	\| Returns 1 if the single-precision floating-point value `a' is less than or
				2138	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				2139	\| cause an exception. Otherwise, the comparison is performed according to the
				2140	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2141	----------------------------------------------------------------------------/
				2142
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2143	int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2144	{
				2145	flag aSign, bSign;
				2146
				2147	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2148	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2149	) {
				2150	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2151	float_raise( float_flag_invalid STATUS_VAR);
				2152	}
				2153	return 0;
				2154	}
				2155	aSign = extractFloat32Sign( a );
				2156	bSign = extractFloat32Sign( b );
				2157	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( a \| b )<<1 ) == 0 );
				2158	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				2159
				2160	}
				2161
				2162	/*----------------------------------------------------------------------------
				2163	\| Returns 1 if the single-precision floating-point value `a' is less than
				2164	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2165	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				2166	\| Standard for Binary Floating-Point Arithmetic.
				2167	----------------------------------------------------------------------------/
				2168
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2169	int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2170	{
				2171	flag aSign, bSign;
				2172
				2173	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2174	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2175	) {
				2176	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2177	float_raise( float_flag_invalid STATUS_VAR);
				2178	}
				2179	return 0;
				2180	}
				2181	aSign = extractFloat32Sign( a );
				2182	bSign = extractFloat32Sign( b );
				2183	if ( aSign != bSign ) return aSign && ( (bits32) ( ( a \| b )<<1 ) != 0 );
				2184	return ( a != b ) && ( aSign ^ ( a < b ) );
				2185
				2186	}
				2187
				2188	/*----------------------------------------------------------------------------
				2189	\| Returns the result of converting the double-precision floating-point value
				2190	\| `a' to the 32-bit two's complement integer format. The conversion is
				2191	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2192	\| Arithmetic---which means in particular that the conversion is rounded
				2193	\| according to the current rounding mode. If `a' is a NaN, the largest
				2194	\| positive integer is returned. Otherwise, if the conversion overflows, the
				2195	\| largest integer with the same sign as `a' is returned.
				2196	----------------------------------------------------------------------------/
				2197
				2198	int32 float64_to_int32( float64 a STATUS_PARAM )
				2199	{
				2200	flag aSign;
				2201	int16 aExp, shiftCount;
				2202	bits64 aSig;
				2203
				2204	aSig = extractFloat64Frac( a );
				2205	aExp = extractFloat64Exp( a );
				2206	aSign = extractFloat64Sign( a );
				2207	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2208	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2209	shiftCount = 0x42C - aExp;
				2210	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				2211	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				2212
				2213	}
				2214
				2215	/*----------------------------------------------------------------------------
				2216	\| Returns the result of converting the double-precision floating-point value
				2217	\| `a' to the 32-bit two's complement integer format. The conversion is
				2218	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2219	\| Arithmetic, except that the conversion is always rounded toward zero.
				2220	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2221	\| the conversion overflows, the largest integer with the same sign as `a' is
				2222	\| returned.
				2223	----------------------------------------------------------------------------/
				2224
				2225	int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
				2226	{
				2227	flag aSign;
				2228	int16 aExp, shiftCount;
				2229	bits64 aSig, savedASig;
				2230	int32 z;
				2231
				2232	aSig = extractFloat64Frac( a );
				2233	aExp = extractFloat64Exp( a );
				2234	aSign = extractFloat64Sign( a );
				2235	if ( 0x41E < aExp ) {
				2236	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2237	goto invalid;
				2238	}
				2239	else if ( aExp < 0x3FF ) {
				2240	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2241	return 0;
				2242	}
				2243	aSig \|= LIT64( 0x0010000000000000 );
				2244	shiftCount = 0x433 - aExp;
				2245	savedASig = aSig;
				2246	aSig >>= shiftCount;
				2247	z = aSig;
				2248	if ( aSign ) z = - z;
				2249	if ( ( z < 0 ) ^ aSign ) {
				2250	invalid:
				2251	float_raise( float_flag_invalid STATUS_VAR);
				2252	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				2253	}
				2254	if ( ( aSig<<shiftCount ) != savedASig ) {
				2255	STATUS(float_exception_flags) \|= float_flag_inexact;
				2256	}
				2257	return z;
				2258
				2259	}
				2260
				2261	/*----------------------------------------------------------------------------
				2262	\| Returns the result of converting the double-precision floating-point value
				2263	\| `a' to the 64-bit two's complement integer format. The conversion is
				2264	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2265	\| Arithmetic---which means in particular that the conversion is rounded
				2266	\| according to the current rounding mode. If `a' is a NaN, the largest
				2267	\| positive integer is returned. Otherwise, if the conversion overflows, the
				2268	\| largest integer with the same sign as `a' is returned.
				2269	----------------------------------------------------------------------------/
				2270
				2271	int64 float64_to_int64( float64 a STATUS_PARAM )
				2272	{
				2273	flag aSign;
				2274	int16 aExp, shiftCount;
				2275	bits64 aSig, aSigExtra;
				2276
				2277	aSig = extractFloat64Frac( a );
				2278	aExp = extractFloat64Exp( a );
				2279	aSign = extractFloat64Sign( a );
				2280	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2281	shiftCount = 0x433 - aExp;
				2282	if ( shiftCount <= 0 ) {
				2283	if ( 0x43E < aExp ) {
				2284	float_raise( float_flag_invalid STATUS_VAR);
				2285	if ( ! aSign
				2286	\|\| ( ( aExp == 0x7FF )
				2287	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				2288	) {
				2289	return LIT64( 0x7FFFFFFFFFFFFFFF );
				2290	}
				2291	return (sbits64) LIT64( 0x8000000000000000 );
				2292	}
				2293	aSigExtra = 0;
				2294	aSig <<= - shiftCount;
				2295	}
				2296	else {
				2297	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				2298	}
				2299	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				2300
				2301	}
				2302
				2303	/*----------------------------------------------------------------------------
				2304	\| Returns the result of converting the double-precision floating-point value
				2305	\| `a' to the 64-bit two's complement integer format. The conversion is
				2306	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2307	\| Arithmetic, except that the conversion is always rounded toward zero.
				2308	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2309	\| the conversion overflows, the largest integer with the same sign as `a' is
				2310	\| returned.
				2311	----------------------------------------------------------------------------/
				2312
				2313	int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
				2314	{
				2315	flag aSign;
				2316	int16 aExp, shiftCount;
				2317	bits64 aSig;
				2318	int64 z;
				2319
				2320	aSig = extractFloat64Frac( a );
				2321	aExp = extractFloat64Exp( a );
				2322	aSign = extractFloat64Sign( a );
				2323	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2324	shiftCount = aExp - 0x433;
				2325	if ( 0 <= shiftCount ) {
				2326	if ( 0x43E <= aExp ) {
				2327	if ( a != LIT64( 0xC3E0000000000000 ) ) {
				2328	float_raise( float_flag_invalid STATUS_VAR);
				2329	if ( ! aSign
				2330	\|\| ( ( aExp == 0x7FF )
				2331	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				2332	) {
				2333	return LIT64( 0x7FFFFFFFFFFFFFFF );
				2334	}
				2335	}
				2336	return (sbits64) LIT64( 0x8000000000000000 );
				2337	}
				2338	z = aSig<<shiftCount;
				2339	}
				2340	else {
				2341	if ( aExp < 0x3FE ) {
				2342	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2343	return 0;
				2344	}
				2345	z = aSig>>( - shiftCount );
				2346	if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
				2347	STATUS(float_exception_flags) \|= float_flag_inexact;
				2348	}
				2349	}
				2350	if ( aSign ) z = - z;
				2351	return z;
				2352
				2353	}
				2354
				2355	/*----------------------------------------------------------------------------
				2356	\| Returns the result of converting the double-precision floating-point value
				2357	\| `a' to the single-precision floating-point format. The conversion is
				2358	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2359	\| Arithmetic.
				2360	----------------------------------------------------------------------------/
				2361
				2362	float32 float64_to_float32( float64 a STATUS_PARAM )
				2363	{
				2364	flag aSign;
				2365	int16 aExp;
				2366	bits64 aSig;
				2367	bits32 zSig;
				2368
				2369	aSig = extractFloat64Frac( a );
				2370	aExp = extractFloat64Exp( a );
				2371	aSign = extractFloat64Sign( a );
				2372	if ( aExp == 0x7FF ) {
				2373	if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
				2374	return packFloat32( aSign, 0xFF, 0 );
				2375	}
				2376	shift64RightJamming( aSig, 22, &aSig );
				2377	zSig = aSig;
				2378	if ( aExp \|\| zSig ) {
				2379	zSig \|= 0x40000000;
				2380	aExp -= 0x381;
				2381	}
				2382	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				2383
				2384	}
				2385
				2386	#ifdef FLOATX80
				2387
				2388	/*----------------------------------------------------------------------------
				2389	\| Returns the result of converting the double-precision floating-point value
				2390	\| `a' to the extended double-precision floating-point format. The conversion
				2391	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				2392	\| Arithmetic.
				2393	----------------------------------------------------------------------------/
				2394
				2395	floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
				2396	{
				2397	flag aSign;
				2398	int16 aExp;
				2399	bits64 aSig;
				2400
				2401	aSig = extractFloat64Frac( a );
				2402	aExp = extractFloat64Exp( a );
				2403	aSign = extractFloat64Sign( a );
				2404	if ( aExp == 0x7FF ) {
				2405	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
				2406	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2407	}
				2408	if ( aExp == 0 ) {
				2409	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				2410	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2411	}
				2412	return
				2413	packFloatx80(
				2414	aSign, aExp + 0x3C00, ( aSig \| LIT64( 0x0010000000000000 ) )<<11 );
				2415
				2416	}
				2417
				2418	#endif
				2419
				2420	#ifdef FLOAT128
				2421
				2422	/*----------------------------------------------------------------------------
				2423	\| Returns the result of converting the double-precision floating-point value
				2424	\| `a' to the quadruple-precision floating-point format. The conversion is
				2425	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2426	\| Arithmetic.
				2427	----------------------------------------------------------------------------/
				2428
				2429	float128 float64_to_float128( float64 a STATUS_PARAM )
				2430	{
				2431	flag aSign;
				2432	int16 aExp;
				2433	bits64 aSig, zSig0, zSig1;
				2434
				2435	aSig = extractFloat64Frac( a );
				2436	aExp = extractFloat64Exp( a );
				2437	aSign = extractFloat64Sign( a );
				2438	if ( aExp == 0x7FF ) {
				2439	if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
				2440	return packFloat128( aSign, 0x7FFF, 0, 0 );
				2441	}
				2442	if ( aExp == 0 ) {
				2443	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				2444	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2445	--aExp;
				2446	}
				2447	shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
				2448	return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
				2449
				2450	}
				2451
				2452	#endif
				2453
				2454	/*----------------------------------------------------------------------------
				2455	\| Rounds the double-precision floating-point value `a' to an integer, and
				2456	\| returns the result as a double-precision floating-point value. The
				2457	\| operation is performed according to the IEC/IEEE Standard for Binary
				2458	\| Floating-Point Arithmetic.
				2459	----------------------------------------------------------------------------/
				2460
				2461	float64 float64_round_to_int( float64 a STATUS_PARAM )
				2462	{
				2463	flag aSign;
				2464	int16 aExp;
				2465	bits64 lastBitMask, roundBitsMask;
				2466	int8 roundingMode;
				2467	float64 z;
				2468
				2469	aExp = extractFloat64Exp( a );
				2470	if ( 0x433 <= aExp ) {
				2471	if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
				2472	return propagateFloat64NaN( a, a STATUS_VAR );
				2473	}
				2474	return a;
				2475	}
				2476	if ( aExp < 0x3FF ) {
				2477	if ( (bits64) ( a<<1 ) == 0 ) return a;
				2478	STATUS(float_exception_flags) \|= float_flag_inexact;
				2479	aSign = extractFloat64Sign( a );
				2480	switch ( STATUS(float_rounding_mode) ) {
				2481	case float_round_nearest_even:
				2482	if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
				2483	return packFloat64( aSign, 0x3FF, 0 );
				2484	}
				2485	break;
				2486	case float_round_down:
				2487	return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
				2488	case float_round_up:
				2489	return
				2490	aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
				2491	}
				2492	return packFloat64( aSign, 0, 0 );
				2493	}
				2494	lastBitMask = 1;
				2495	lastBitMask <<= 0x433 - aExp;
				2496	roundBitsMask = lastBitMask - 1;
				2497	z = a;
				2498	roundingMode = STATUS(float_rounding_mode);
				2499	if ( roundingMode == float_round_nearest_even ) {
				2500	z += lastBitMask>>1;
				2501	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				2502	}
				2503	else if ( roundingMode != float_round_to_zero ) {
				2504	if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				2505	z += roundBitsMask;
				2506	}
				2507	}
				2508	z &= ~ roundBitsMask;
				2509	if ( z != a ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2510	return z;
				2511
				2512	}
				2513
pbrook	e6e5906	2006-10-22 00:18:54 +0000	[diff] [blame]	2514	float64 float64_trunc_to_int( float64 a STATUS_PARAM)
				2515	{
				2516	int oldmode;
				2517	float64 res;
				2518	oldmode = STATUS(float_rounding_mode);
				2519	STATUS(float_rounding_mode) = float_round_to_zero;
				2520	res = float64_round_to_int(a STATUS_VAR);
				2521	STATUS(float_rounding_mode) = oldmode;
				2522	return res;
				2523	}
				2524
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2525	/*----------------------------------------------------------------------------
				2526	\| Returns the result of adding the absolute values of the double-precision
				2527	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				2528	\| before being returned. `zSign' is ignored if the result is a NaN.
				2529	\| The addition is performed according to the IEC/IEEE Standard for Binary
				2530	\| Floating-Point Arithmetic.
				2531	----------------------------------------------------------------------------/
				2532
				2533	static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				2534	{
				2535	int16 aExp, bExp, zExp;
				2536	bits64 aSig, bSig, zSig;
				2537	int16 expDiff;
				2538
				2539	aSig = extractFloat64Frac( a );
				2540	aExp = extractFloat64Exp( a );
				2541	bSig = extractFloat64Frac( b );
				2542	bExp = extractFloat64Exp( b );
				2543	expDiff = aExp - bExp;
				2544	aSig <<= 9;
				2545	bSig <<= 9;
				2546	if ( 0 < expDiff ) {
				2547	if ( aExp == 0x7FF ) {
				2548	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2549	return a;
				2550	}
				2551	if ( bExp == 0 ) {
				2552	--expDiff;
				2553	}
				2554	else {
				2555	bSig \|= LIT64( 0x2000000000000000 );
				2556	}
				2557	shift64RightJamming( bSig, expDiff, &bSig );
				2558	zExp = aExp;
				2559	}
				2560	else if ( expDiff < 0 ) {
				2561	if ( bExp == 0x7FF ) {
				2562	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2563	return packFloat64( zSign, 0x7FF, 0 );
				2564	}
				2565	if ( aExp == 0 ) {
				2566	++expDiff;
				2567	}
				2568	else {
				2569	aSig \|= LIT64( 0x2000000000000000 );
				2570	}
				2571	shift64RightJamming( aSig, - expDiff, &aSig );
				2572	zExp = bExp;
				2573	}
				2574	else {
				2575	if ( aExp == 0x7FF ) {
				2576	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2577	return a;
				2578	}
				2579	if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
				2580	zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
				2581	zExp = aExp;
				2582	goto roundAndPack;
				2583	}
				2584	aSig \|= LIT64( 0x2000000000000000 );
				2585	zSig = ( aSig + bSig )<<1;
				2586	--zExp;
				2587	if ( (sbits64) zSig < 0 ) {
				2588	zSig = aSig + bSig;
				2589	++zExp;
				2590	}
				2591	roundAndPack:
				2592	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				2593
				2594	}
				2595
				2596	/*----------------------------------------------------------------------------
				2597	\| Returns the result of subtracting the absolute values of the double-
				2598	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				2599	\| difference is negated before being returned. `zSign' is ignored if the
				2600	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				2601	\| Standard for Binary Floating-Point Arithmetic.
				2602	----------------------------------------------------------------------------/
				2603
				2604	static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				2605	{
				2606	int16 aExp, bExp, zExp;
				2607	bits64 aSig, bSig, zSig;
				2608	int16 expDiff;
				2609
				2610	aSig = extractFloat64Frac( a );
				2611	aExp = extractFloat64Exp( a );
				2612	bSig = extractFloat64Frac( b );
				2613	bExp = extractFloat64Exp( b );
				2614	expDiff = aExp - bExp;
				2615	aSig <<= 10;
				2616	bSig <<= 10;
				2617	if ( 0 < expDiff ) goto aExpBigger;
				2618	if ( expDiff < 0 ) goto bExpBigger;
				2619	if ( aExp == 0x7FF ) {
				2620	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2621	float_raise( float_flag_invalid STATUS_VAR);
				2622	return float64_default_nan;
				2623	}
				2624	if ( aExp == 0 ) {
				2625	aExp = 1;
				2626	bExp = 1;
				2627	}
				2628	if ( bSig < aSig ) goto aBigger;
				2629	if ( aSig < bSig ) goto bBigger;
				2630	return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				2631	bExpBigger:
				2632	if ( bExp == 0x7FF ) {
				2633	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2634	return packFloat64( zSign ^ 1, 0x7FF, 0 );
				2635	}
				2636	if ( aExp == 0 ) {
				2637	++expDiff;
				2638	}
				2639	else {
				2640	aSig \|= LIT64( 0x4000000000000000 );
				2641	}
				2642	shift64RightJamming( aSig, - expDiff, &aSig );
				2643	bSig \|= LIT64( 0x4000000000000000 );
				2644	bBigger:
				2645	zSig = bSig - aSig;
				2646	zExp = bExp;
				2647	zSign ^= 1;
				2648	goto normalizeRoundAndPack;
				2649	aExpBigger:
				2650	if ( aExp == 0x7FF ) {
				2651	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2652	return a;
				2653	}
				2654	if ( bExp == 0 ) {
				2655	--expDiff;
				2656	}
				2657	else {
				2658	bSig \|= LIT64( 0x4000000000000000 );
				2659	}
				2660	shift64RightJamming( bSig, expDiff, &bSig );
				2661	aSig \|= LIT64( 0x4000000000000000 );
				2662	aBigger:
				2663	zSig = aSig - bSig;
				2664	zExp = aExp;
				2665	normalizeRoundAndPack:
				2666	--zExp;
				2667	return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				2668
				2669	}
				2670
				2671	/*----------------------------------------------------------------------------
				2672	\| Returns the result of adding the double-precision floating-point values `a'
				2673	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				2674	\| Binary Floating-Point Arithmetic.
				2675	----------------------------------------------------------------------------/
				2676
				2677	float64 float64_add( float64 a, float64 b STATUS_PARAM )
				2678	{
				2679	flag aSign, bSign;
				2680
				2681	aSign = extractFloat64Sign( a );
				2682	bSign = extractFloat64Sign( b );
				2683	if ( aSign == bSign ) {
				2684	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				2685	}
				2686	else {
				2687	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				2688	}
				2689
				2690	}
				2691
				2692	/*----------------------------------------------------------------------------
				2693	\| Returns the result of subtracting the double-precision floating-point values
				2694	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2695	\| for Binary Floating-Point Arithmetic.
				2696	----------------------------------------------------------------------------/
				2697
				2698	float64 float64_sub( float64 a, float64 b STATUS_PARAM )
				2699	{
				2700	flag aSign, bSign;
				2701
				2702	aSign = extractFloat64Sign( a );
				2703	bSign = extractFloat64Sign( b );
				2704	if ( aSign == bSign ) {
				2705	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				2706	}
				2707	else {
				2708	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				2709	}
				2710
				2711	}
				2712
				2713	/*----------------------------------------------------------------------------
				2714	\| Returns the result of multiplying the double-precision floating-point values
				2715	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2716	\| for Binary Floating-Point Arithmetic.
				2717	----------------------------------------------------------------------------/
				2718
				2719	float64 float64_mul( float64 a, float64 b STATUS_PARAM )
				2720	{
				2721	flag aSign, bSign, zSign;
				2722	int16 aExp, bExp, zExp;
				2723	bits64 aSig, bSig, zSig0, zSig1;
				2724
				2725	aSig = extractFloat64Frac( a );
				2726	aExp = extractFloat64Exp( a );
				2727	aSign = extractFloat64Sign( a );
				2728	bSig = extractFloat64Frac( b );
				2729	bExp = extractFloat64Exp( b );
				2730	bSign = extractFloat64Sign( b );
				2731	zSign = aSign ^ bSign;
				2732	if ( aExp == 0x7FF ) {
				2733	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2734	return propagateFloat64NaN( a, b STATUS_VAR );
				2735	}
				2736	if ( ( bExp \| bSig ) == 0 ) {
				2737	float_raise( float_flag_invalid STATUS_VAR);
				2738	return float64_default_nan;
				2739	}
				2740	return packFloat64( zSign, 0x7FF, 0 );
				2741	}
				2742	if ( bExp == 0x7FF ) {
				2743	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2744	if ( ( aExp \| aSig ) == 0 ) {
				2745	float_raise( float_flag_invalid STATUS_VAR);
				2746	return float64_default_nan;
				2747	}
				2748	return packFloat64( zSign, 0x7FF, 0 );
				2749	}
				2750	if ( aExp == 0 ) {
				2751	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2752	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2753	}
				2754	if ( bExp == 0 ) {
				2755	if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
				2756	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2757	}
				2758	zExp = aExp + bExp - 0x3FF;
				2759	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2760	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2761	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				2762	zSig0 \|= ( zSig1 != 0 );
				2763	if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
				2764	zSig0 <<= 1;
				2765	--zExp;
				2766	}
				2767	return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
				2768
				2769	}
				2770
				2771	/*----------------------------------------------------------------------------
				2772	\| Returns the result of dividing the double-precision floating-point value `a'
				2773	\| by the corresponding value `b'. The operation is performed according to
				2774	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2775	----------------------------------------------------------------------------/
				2776
				2777	float64 float64_div( float64 a, float64 b STATUS_PARAM )
				2778	{
				2779	flag aSign, bSign, zSign;
				2780	int16 aExp, bExp, zExp;
				2781	bits64 aSig, bSig, zSig;
				2782	bits64 rem0, rem1;
				2783	bits64 term0, term1;
				2784
				2785	aSig = extractFloat64Frac( a );
				2786	aExp = extractFloat64Exp( a );
				2787	aSign = extractFloat64Sign( a );
				2788	bSig = extractFloat64Frac( b );
				2789	bExp = extractFloat64Exp( b );
				2790	bSign = extractFloat64Sign( b );
				2791	zSign = aSign ^ bSign;
				2792	if ( aExp == 0x7FF ) {
				2793	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2794	if ( bExp == 0x7FF ) {
				2795	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2796	float_raise( float_flag_invalid STATUS_VAR);
				2797	return float64_default_nan;
				2798	}
				2799	return packFloat64( zSign, 0x7FF, 0 );
				2800	}
				2801	if ( bExp == 0x7FF ) {
				2802	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2803	return packFloat64( zSign, 0, 0 );
				2804	}
				2805	if ( bExp == 0 ) {
				2806	if ( bSig == 0 ) {
				2807	if ( ( aExp \| aSig ) == 0 ) {
				2808	float_raise( float_flag_invalid STATUS_VAR);
				2809	return float64_default_nan;
				2810	}
				2811	float_raise( float_flag_divbyzero STATUS_VAR);
				2812	return packFloat64( zSign, 0x7FF, 0 );
				2813	}
				2814	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2815	}
				2816	if ( aExp == 0 ) {
				2817	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2818	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2819	}
				2820	zExp = aExp - bExp + 0x3FD;
				2821	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2822	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2823	if ( bSig <= ( aSig + aSig ) ) {
				2824	aSig >>= 1;
				2825	++zExp;
				2826	}
				2827	zSig = estimateDiv128To64( aSig, 0, bSig );
				2828	if ( ( zSig & 0x1FF ) <= 2 ) {
				2829	mul64To128( bSig, zSig, &term0, &term1 );
				2830	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				2831	while ( (sbits64) rem0 < 0 ) {
				2832	--zSig;
				2833	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				2834	}
				2835	zSig \|= ( rem1 != 0 );
				2836	}
				2837	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				2838
				2839	}
				2840
				2841	/*----------------------------------------------------------------------------
				2842	\| Returns the remainder of the double-precision floating-point value `a'
				2843	\| with respect to the corresponding value `b'. The operation is performed
				2844	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2845	----------------------------------------------------------------------------/
				2846
				2847	float64 float64_rem( float64 a, float64 b STATUS_PARAM )
				2848	{
				2849	flag aSign, bSign, zSign;
				2850	int16 aExp, bExp, expDiff;
				2851	bits64 aSig, bSig;
				2852	bits64 q, alternateASig;
				2853	sbits64 sigMean;
				2854
				2855	aSig = extractFloat64Frac( a );
				2856	aExp = extractFloat64Exp( a );
				2857	aSign = extractFloat64Sign( a );
				2858	bSig = extractFloat64Frac( b );
				2859	bExp = extractFloat64Exp( b );
				2860	bSign = extractFloat64Sign( b );
				2861	if ( aExp == 0x7FF ) {
				2862	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2863	return propagateFloat64NaN( a, b STATUS_VAR );
				2864	}
				2865	float_raise( float_flag_invalid STATUS_VAR);
				2866	return float64_default_nan;
				2867	}
				2868	if ( bExp == 0x7FF ) {
				2869	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2870	return a;
				2871	}
				2872	if ( bExp == 0 ) {
				2873	if ( bSig == 0 ) {
				2874	float_raise( float_flag_invalid STATUS_VAR);
				2875	return float64_default_nan;
				2876	}
				2877	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2878	}
				2879	if ( aExp == 0 ) {
				2880	if ( aSig == 0 ) return a;
				2881	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2882	}
				2883	expDiff = aExp - bExp;
				2884	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<11;
				2885	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2886	if ( expDiff < 0 ) {
				2887	if ( expDiff < -1 ) return a;
				2888	aSig >>= 1;
				2889	}
				2890	q = ( bSig <= aSig );
				2891	if ( q ) aSig -= bSig;
				2892	expDiff -= 64;
				2893	while ( 0 < expDiff ) {
				2894	q = estimateDiv128To64( aSig, 0, bSig );
				2895	q = ( 2 < q ) ? q - 2 : 0;
				2896	aSig = - ( ( bSig>>2 ) * q );
				2897	expDiff -= 62;
				2898	}
				2899	expDiff += 64;
				2900	if ( 0 < expDiff ) {
				2901	q = estimateDiv128To64( aSig, 0, bSig );
				2902	q = ( 2 < q ) ? q - 2 : 0;
				2903	q >>= 64 - expDiff;
				2904	bSig >>= 2;
				2905	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				2906	}
				2907	else {
				2908	aSig >>= 2;
				2909	bSig >>= 2;
				2910	}
				2911	do {
				2912	alternateASig = aSig;
				2913	++q;
				2914	aSig -= bSig;
				2915	} while ( 0 <= (sbits64) aSig );
				2916	sigMean = aSig + alternateASig;
				2917	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				2918	aSig = alternateASig;
				2919	}
				2920	zSign = ( (sbits64) aSig < 0 );
				2921	if ( zSign ) aSig = - aSig;
				2922	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
				2923
				2924	}
				2925
				2926	/*----------------------------------------------------------------------------
				2927	\| Returns the square root of the double-precision floating-point value `a'.
				2928	\| The operation is performed according to the IEC/IEEE Standard for Binary
				2929	\| Floating-Point Arithmetic.
				2930	----------------------------------------------------------------------------/
				2931
				2932	float64 float64_sqrt( float64 a STATUS_PARAM )
				2933	{
				2934	flag aSign;
				2935	int16 aExp, zExp;
				2936	bits64 aSig, zSig, doubleZSig;
				2937	bits64 rem0, rem1, term0, term1;
				2938
				2939	aSig = extractFloat64Frac( a );
				2940	aExp = extractFloat64Exp( a );
				2941	aSign = extractFloat64Sign( a );
				2942	if ( aExp == 0x7FF ) {
				2943	if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
				2944	if ( ! aSign ) return a;
				2945	float_raise( float_flag_invalid STATUS_VAR);
				2946	return float64_default_nan;
				2947	}
				2948	if ( aSign ) {
				2949	if ( ( aExp \| aSig ) == 0 ) return a;
				2950	float_raise( float_flag_invalid STATUS_VAR);
				2951	return float64_default_nan;
				2952	}
				2953	if ( aExp == 0 ) {
				2954	if ( aSig == 0 ) return 0;
				2955	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2956	}
				2957	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
				2958	aSig \|= LIT64( 0x0010000000000000 );
				2959	zSig = estimateSqrt32( aExp, aSig>>21 );
				2960	aSig <<= 9 - ( aExp & 1 );
				2961	zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
				2962	if ( ( zSig & 0x1FF ) <= 5 ) {
				2963	doubleZSig = zSig<<1;
				2964	mul64To128( zSig, zSig, &term0, &term1 );
				2965	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				2966	while ( (sbits64) rem0 < 0 ) {
				2967	--zSig;
				2968	doubleZSig -= 2;
				2969	add128( rem0, rem1, zSig>>63, doubleZSig \| 1, &rem0, &rem1 );
				2970	}
				2971	zSig \|= ( ( rem0 \| rem1 ) != 0 );
				2972	}
				2973	return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
				2974
				2975	}
				2976
				2977	/*----------------------------------------------------------------------------
				2978	\| Returns 1 if the double-precision floating-point value `a' is equal to the
				2979	\| corresponding value `b', and 0 otherwise. The comparison is performed
				2980	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2981	----------------------------------------------------------------------------/
				2982
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2983	int float64_eq( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2984	{
				2985
				2986	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				2987	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				2988	) {
				2989	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				2990	float_raise( float_flag_invalid STATUS_VAR);
				2991	}
				2992	return 0;
				2993	}
				2994	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				2995
				2996	}
				2997
				2998	/*----------------------------------------------------------------------------
				2999	\| Returns 1 if the double-precision floating-point value `a' is less than or
				3000	\| equal to the corresponding value `b', and 0 otherwise. The comparison is
				3001	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				3002	\| Arithmetic.
				3003	----------------------------------------------------------------------------/
				3004
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3005	int float64_le( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3006	{
				3007	flag aSign, bSign;
				3008
				3009	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3010	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3011	) {
				3012	float_raise( float_flag_invalid STATUS_VAR);
				3013	return 0;
				3014	}
				3015	aSign = extractFloat64Sign( a );
				3016	bSign = extractFloat64Sign( b );
				3017	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				3018	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				3019
				3020	}
				3021
				3022	/*----------------------------------------------------------------------------
				3023	\| Returns 1 if the double-precision floating-point value `a' is less than
				3024	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				3025	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3026	----------------------------------------------------------------------------/
				3027
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3028	int float64_lt( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3029	{
				3030	flag aSign, bSign;
				3031
				3032	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3033	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3034	) {
				3035	float_raise( float_flag_invalid STATUS_VAR);
				3036	return 0;
				3037	}
				3038	aSign = extractFloat64Sign( a );
				3039	bSign = extractFloat64Sign( b );
				3040	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<1 ) != 0 );
				3041	return ( a != b ) && ( aSign ^ ( a < b ) );
				3042
				3043	}
				3044
				3045	/*----------------------------------------------------------------------------
				3046	\| Returns 1 if the double-precision floating-point value `a' is equal to the
				3047	\| corresponding value `b', and 0 otherwise. The invalid exception is raised
				3048	\| if either operand is a NaN. Otherwise, the comparison is performed
				3049	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3050	----------------------------------------------------------------------------/
				3051
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3052	int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3053	{
				3054
				3055	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3056	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3057	) {
				3058	float_raise( float_flag_invalid STATUS_VAR);
				3059	return 0;
				3060	}
				3061	return ( a == b ) \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				3062
				3063	}
				3064
				3065	/*----------------------------------------------------------------------------
				3066	\| Returns 1 if the double-precision floating-point value `a' is less than or
				3067	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				3068	\| cause an exception. Otherwise, the comparison is performed according to the
				3069	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3070	----------------------------------------------------------------------------/
				3071
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3072	int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3073	{
				3074	flag aSign, bSign;
				3075
				3076	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3077	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3078	) {
				3079	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				3080	float_raise( float_flag_invalid STATUS_VAR);
				3081	}
				3082	return 0;
				3083	}
				3084	aSign = extractFloat64Sign( a );
				3085	bSign = extractFloat64Sign( b );
				3086	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( a \| b )<<1 ) == 0 );
				3087	return ( a == b ) \|\| ( aSign ^ ( a < b ) );
				3088
				3089	}
				3090
				3091	/*----------------------------------------------------------------------------
				3092	\| Returns 1 if the double-precision floating-point value `a' is less than
				3093	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				3094	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				3095	\| Standard for Binary Floating-Point Arithmetic.
				3096	----------------------------------------------------------------------------/
				3097
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3098	int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3099	{
				3100	flag aSign, bSign;
				3101
				3102	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3103	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3104	) {
				3105	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				3106	float_raise( float_flag_invalid STATUS_VAR);
				3107	}
				3108	return 0;
				3109	}
				3110	aSign = extractFloat64Sign( a );
				3111	bSign = extractFloat64Sign( b );
				3112	if ( aSign != bSign ) return aSign && ( (bits64) ( ( a \| b )<<1 ) != 0 );
				3113	return ( a != b ) && ( aSign ^ ( a < b ) );
				3114
				3115	}
				3116
				3117	#ifdef FLOATX80
				3118
				3119	/*----------------------------------------------------------------------------
				3120	\| Returns the result of converting the extended double-precision floating-
				3121	\| point value `a' to the 32-bit two's complement integer format. The
				3122	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3123	\| Floating-Point Arithmetic---which means in particular that the conversion
				3124	\| is rounded according to the current rounding mode. If `a' is a NaN, the
				3125	\| largest positive integer is returned. Otherwise, if the conversion
				3126	\| overflows, the largest integer with the same sign as `a' is returned.
				3127	----------------------------------------------------------------------------/
				3128
				3129	int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
				3130	{
				3131	flag aSign;
				3132	int32 aExp, shiftCount;
				3133	bits64 aSig;
				3134
				3135	aSig = extractFloatx80Frac( a );
				3136	aExp = extractFloatx80Exp( a );
				3137	aSign = extractFloatx80Sign( a );
				3138	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				3139	shiftCount = 0x4037 - aExp;
				3140	if ( shiftCount <= 0 ) shiftCount = 1;
				3141	shift64RightJamming( aSig, shiftCount, &aSig );
				3142	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				3143
				3144	}
				3145
				3146	/*----------------------------------------------------------------------------
				3147	\| Returns the result of converting the extended double-precision floating-
				3148	\| point value `a' to the 32-bit two's complement integer format. The
				3149	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3150	\| Floating-Point Arithmetic, except that the conversion is always rounded
				3151	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				3152	\| Otherwise, if the conversion overflows, the largest integer with the same
				3153	\| sign as `a' is returned.
				3154	----------------------------------------------------------------------------/
				3155
				3156	int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
				3157	{
				3158	flag aSign;
				3159	int32 aExp, shiftCount;
				3160	bits64 aSig, savedASig;
				3161	int32 z;
				3162
				3163	aSig = extractFloatx80Frac( a );
				3164	aExp = extractFloatx80Exp( a );
				3165	aSign = extractFloatx80Sign( a );
				3166	if ( 0x401E < aExp ) {
				3167	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				3168	goto invalid;
				3169	}
				3170	else if ( aExp < 0x3FFF ) {
				3171	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				3172	return 0;
				3173	}
				3174	shiftCount = 0x403E - aExp;
				3175	savedASig = aSig;
				3176	aSig >>= shiftCount;
				3177	z = aSig;
				3178	if ( aSign ) z = - z;
				3179	if ( ( z < 0 ) ^ aSign ) {
				3180	invalid:
				3181	float_raise( float_flag_invalid STATUS_VAR);
				3182	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				3183	}
				3184	if ( ( aSig<<shiftCount ) != savedASig ) {
				3185	STATUS(float_exception_flags) \|= float_flag_inexact;
				3186	}
				3187	return z;
				3188
				3189	}
				3190
				3191	/*----------------------------------------------------------------------------
				3192	\| Returns the result of converting the extended double-precision floating-
				3193	\| point value `a' to the 64-bit two's complement integer format. The
				3194	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3195	\| Floating-Point Arithmetic---which means in particular that the conversion
				3196	\| is rounded according to the current rounding mode. If `a' is a NaN,
				3197	\| the largest positive integer is returned. Otherwise, if the conversion
				3198	\| overflows, the largest integer with the same sign as `a' is returned.
				3199	----------------------------------------------------------------------------/
				3200
				3201	int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
				3202	{
				3203	flag aSign;
				3204	int32 aExp, shiftCount;
				3205	bits64 aSig, aSigExtra;
				3206
				3207	aSig = extractFloatx80Frac( a );
				3208	aExp = extractFloatx80Exp( a );
				3209	aSign = extractFloatx80Sign( a );
				3210	shiftCount = 0x403E - aExp;
				3211	if ( shiftCount <= 0 ) {
				3212	if ( shiftCount ) {
				3213	float_raise( float_flag_invalid STATUS_VAR);
				3214	if ( ! aSign
				3215	\|\| ( ( aExp == 0x7FFF )
				3216	&& ( aSig != LIT64( 0x8000000000000000 ) ) )
				3217	) {
				3218	return LIT64( 0x7FFFFFFFFFFFFFFF );
				3219	}
				3220	return (sbits64) LIT64( 0x8000000000000000 );
				3221	}
				3222	aSigExtra = 0;
				3223	}
				3224	else {
				3225	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				3226	}
				3227	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				3228
				3229	}
				3230
				3231	/*----------------------------------------------------------------------------
				3232	\| Returns the result of converting the extended double-precision floating-
				3233	\| point value `a' to the 64-bit two's complement integer format. The
				3234	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3235	\| Floating-Point Arithmetic, except that the conversion is always rounded
				3236	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				3237	\| Otherwise, if the conversion overflows, the largest integer with the same
				3238	\| sign as `a' is returned.
				3239	----------------------------------------------------------------------------/
				3240
				3241	int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
				3242	{
				3243	flag aSign;
				3244	int32 aExp, shiftCount;
				3245	bits64 aSig;
				3246	int64 z;
				3247
				3248	aSig = extractFloatx80Frac( a );
				3249	aExp = extractFloatx80Exp( a );
				3250	aSign = extractFloatx80Sign( a );
				3251	shiftCount = aExp - 0x403E;
				3252	if ( 0 <= shiftCount ) {
				3253	aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
				3254	if ( ( a.high != 0xC03E ) \|\| aSig ) {
				3255	float_raise( float_flag_invalid STATUS_VAR);
				3256	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && aSig ) ) {
				3257	return LIT64( 0x7FFFFFFFFFFFFFFF );
				3258	}
				3259	}
				3260	return (sbits64) LIT64( 0x8000000000000000 );
				3261	}
				3262	else if ( aExp < 0x3FFF ) {
				3263	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				3264	return 0;
				3265	}
				3266	z = aSig>>( - shiftCount );
				3267	if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
				3268	STATUS(float_exception_flags) \|= float_flag_inexact;
				3269	}
				3270	if ( aSign ) z = - z;
				3271	return z;
				3272
				3273	}
				3274
				3275	/*----------------------------------------------------------------------------
				3276	\| Returns the result of converting the extended double-precision floating-
				3277	\| point value `a' to the single-precision floating-point format. The
				3278	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3279	\| Floating-Point Arithmetic.
				3280	----------------------------------------------------------------------------/
				3281
				3282	float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
				3283	{
				3284	flag aSign;
				3285	int32 aExp;
				3286	bits64 aSig;
				3287
				3288	aSig = extractFloatx80Frac( a );
				3289	aExp = extractFloatx80Exp( a );
				3290	aSign = extractFloatx80Sign( a );
				3291	if ( aExp == 0x7FFF ) {
				3292	if ( (bits64) ( aSig<<1 ) ) {
				3293	return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
				3294	}
				3295	return packFloat32( aSign, 0xFF, 0 );
				3296	}
				3297	shift64RightJamming( aSig, 33, &aSig );
				3298	if ( aExp \|\| aSig ) aExp -= 0x3F81;
				3299	return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
				3300
				3301	}
				3302
				3303	/*----------------------------------------------------------------------------
				3304	\| Returns the result of converting the extended double-precision floating-
				3305	\| point value `a' to the double-precision floating-point format. The
				3306	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3307	\| Floating-Point Arithmetic.
				3308	----------------------------------------------------------------------------/
				3309
				3310	float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
				3311	{
				3312	flag aSign;
				3313	int32 aExp;
				3314	bits64 aSig, zSig;
				3315
				3316	aSig = extractFloatx80Frac( a );
				3317	aExp = extractFloatx80Exp( a );
				3318	aSign = extractFloatx80Sign( a );
				3319	if ( aExp == 0x7FFF ) {
				3320	if ( (bits64) ( aSig<<1 ) ) {
				3321	return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
				3322	}
				3323	return packFloat64( aSign, 0x7FF, 0 );
				3324	}
				3325	shift64RightJamming( aSig, 1, &zSig );
				3326	if ( aExp \|\| aSig ) aExp -= 0x3C01;
				3327	return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
				3328
				3329	}
				3330
				3331	#ifdef FLOAT128
				3332
				3333	/*----------------------------------------------------------------------------
				3334	\| Returns the result of converting the extended double-precision floating-
				3335	\| point value `a' to the quadruple-precision floating-point format. The
				3336	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3337	\| Floating-Point Arithmetic.
				3338	----------------------------------------------------------------------------/
				3339
				3340	float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
				3341	{
				3342	flag aSign;
				3343	int16 aExp;
				3344	bits64 aSig, zSig0, zSig1;
				3345
				3346	aSig = extractFloatx80Frac( a );
				3347	aExp = extractFloatx80Exp( a );
				3348	aSign = extractFloatx80Sign( a );
				3349	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
				3350	return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
				3351	}
				3352	shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
				3353	return packFloat128( aSign, aExp, zSig0, zSig1 );
				3354
				3355	}
				3356
				3357	#endif
				3358
				3359	/*----------------------------------------------------------------------------
				3360	\| Rounds the extended double-precision floating-point value `a' to an integer,
				3361	\| and returns the result as an extended quadruple-precision floating-point
				3362	\| value. The operation is performed according to the IEC/IEEE Standard for
				3363	\| Binary Floating-Point Arithmetic.
				3364	----------------------------------------------------------------------------/
				3365
				3366	floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
				3367	{
				3368	flag aSign;
				3369	int32 aExp;
				3370	bits64 lastBitMask, roundBitsMask;
				3371	int8 roundingMode;
				3372	floatx80 z;
				3373
				3374	aExp = extractFloatx80Exp( a );
				3375	if ( 0x403E <= aExp ) {
				3376	if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
				3377	return propagateFloatx80NaN( a, a STATUS_VAR );
				3378	}
				3379	return a;
				3380	}
				3381	if ( aExp < 0x3FFF ) {
				3382	if ( ( aExp == 0 )
				3383	&& ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
				3384	return a;
				3385	}
				3386	STATUS(float_exception_flags) \|= float_flag_inexact;
				3387	aSign = extractFloatx80Sign( a );
				3388	switch ( STATUS(float_rounding_mode) ) {
				3389	case float_round_nearest_even:
				3390	if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
				3391	) {
				3392	return
				3393	packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
				3394	}
				3395	break;
				3396	case float_round_down:
				3397	return
				3398	aSign ?
				3399	packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
				3400	: packFloatx80( 0, 0, 0 );
				3401	case float_round_up:
				3402	return
				3403	aSign ? packFloatx80( 1, 0, 0 )
				3404	: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
				3405	}
				3406	return packFloatx80( aSign, 0, 0 );
				3407	}
				3408	lastBitMask = 1;
				3409	lastBitMask <<= 0x403E - aExp;
				3410	roundBitsMask = lastBitMask - 1;
				3411	z = a;
				3412	roundingMode = STATUS(float_rounding_mode);
				3413	if ( roundingMode == float_round_nearest_even ) {
				3414	z.low += lastBitMask>>1;
				3415	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				3416	}
				3417	else if ( roundingMode != float_round_to_zero ) {
				3418	if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				3419	z.low += roundBitsMask;
				3420	}
				3421	}
				3422	z.low &= ~ roundBitsMask;
				3423	if ( z.low == 0 ) {
				3424	++z.high;
				3425	z.low = LIT64( 0x8000000000000000 );
				3426	}
				3427	if ( z.low != a.low ) STATUS(float_exception_flags) \|= float_flag_inexact;
				3428	return z;
				3429
				3430	}
				3431
				3432	/*----------------------------------------------------------------------------
				3433	\| Returns the result of adding the absolute values of the extended double-
				3434	\| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
				3435	\| negated before being returned. `zSign' is ignored if the result is a NaN.
				3436	\| The addition is performed according to the IEC/IEEE Standard for Binary
				3437	\| Floating-Point Arithmetic.
				3438	----------------------------------------------------------------------------/
				3439
				3440	static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
				3441	{
				3442	int32 aExp, bExp, zExp;
				3443	bits64 aSig, bSig, zSig0, zSig1;
				3444	int32 expDiff;
				3445
				3446	aSig = extractFloatx80Frac( a );
				3447	aExp = extractFloatx80Exp( a );
				3448	bSig = extractFloatx80Frac( b );
				3449	bExp = extractFloatx80Exp( b );
				3450	expDiff = aExp - bExp;
				3451	if ( 0 < expDiff ) {
				3452	if ( aExp == 0x7FFF ) {
				3453	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3454	return a;
				3455	}
				3456	if ( bExp == 0 ) --expDiff;
				3457	shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				3458	zExp = aExp;
				3459	}
				3460	else if ( expDiff < 0 ) {
				3461	if ( bExp == 0x7FFF ) {
				3462	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3463	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3464	}
				3465	if ( aExp == 0 ) ++expDiff;
				3466	shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				3467	zExp = bExp;
				3468	}
				3469	else {
				3470	if ( aExp == 0x7FFF ) {
				3471	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				3472	return propagateFloatx80NaN( a, b STATUS_VAR );
				3473	}
				3474	return a;
				3475	}
				3476	zSig1 = 0;
				3477	zSig0 = aSig + bSig;
				3478	if ( aExp == 0 ) {
				3479	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
				3480	goto roundAndPack;
				3481	}
				3482	zExp = aExp;
				3483	goto shiftRight1;
				3484	}
				3485	zSig0 = aSig + bSig;
				3486	if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
				3487	shiftRight1:
				3488	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
				3489	zSig0 \|= LIT64( 0x8000000000000000 );
				3490	++zExp;
				3491	roundAndPack:
				3492	return
				3493	roundAndPackFloatx80(
				3494	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3495
				3496	}
				3497
				3498	/*----------------------------------------------------------------------------
				3499	\| Returns the result of subtracting the absolute values of the extended
				3500	\| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
				3501	\| difference is negated before being returned. `zSign' is ignored if the
				3502	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				3503	\| Standard for Binary Floating-Point Arithmetic.
				3504	----------------------------------------------------------------------------/
				3505
				3506	static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
				3507	{
				3508	int32 aExp, bExp, zExp;
				3509	bits64 aSig, bSig, zSig0, zSig1;
				3510	int32 expDiff;
				3511	floatx80 z;
				3512
				3513	aSig = extractFloatx80Frac( a );
				3514	aExp = extractFloatx80Exp( a );
				3515	bSig = extractFloatx80Frac( b );
				3516	bExp = extractFloatx80Exp( b );
				3517	expDiff = aExp - bExp;
				3518	if ( 0 < expDiff ) goto aExpBigger;
				3519	if ( expDiff < 0 ) goto bExpBigger;
				3520	if ( aExp == 0x7FFF ) {
				3521	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				3522	return propagateFloatx80NaN( a, b STATUS_VAR );
				3523	}
				3524	float_raise( float_flag_invalid STATUS_VAR);
				3525	z.low = floatx80_default_nan_low;
				3526	z.high = floatx80_default_nan_high;
				3527	return z;
				3528	}
				3529	if ( aExp == 0 ) {
				3530	aExp = 1;
				3531	bExp = 1;
				3532	}
				3533	zSig1 = 0;
				3534	if ( bSig < aSig ) goto aBigger;
				3535	if ( aSig < bSig ) goto bBigger;
				3536	return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				3537	bExpBigger:
				3538	if ( bExp == 0x7FFF ) {
				3539	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3540	return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3541	}
				3542	if ( aExp == 0 ) ++expDiff;
				3543	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				3544	bBigger:
				3545	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
				3546	zExp = bExp;
				3547	zSign ^= 1;
				3548	goto normalizeRoundAndPack;
				3549	aExpBigger:
				3550	if ( aExp == 0x7FFF ) {
				3551	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3552	return a;
				3553	}
				3554	if ( bExp == 0 ) --expDiff;
				3555	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				3556	aBigger:
				3557	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
				3558	zExp = aExp;
				3559	normalizeRoundAndPack:
				3560	return
				3561	normalizeRoundAndPackFloatx80(
				3562	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3563
				3564	}
				3565
				3566	/*----------------------------------------------------------------------------
				3567	\| Returns the result of adding the extended double-precision floating-point
				3568	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				3569	\| Standard for Binary Floating-Point Arithmetic.
				3570	----------------------------------------------------------------------------/
				3571
				3572	floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
				3573	{
				3574	flag aSign, bSign;
				3575
				3576	aSign = extractFloatx80Sign( a );
				3577	bSign = extractFloatx80Sign( b );
				3578	if ( aSign == bSign ) {
				3579	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				3580	}
				3581	else {
				3582	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				3583	}
				3584
				3585	}
				3586
				3587	/*----------------------------------------------------------------------------
				3588	\| Returns the result of subtracting the extended double-precision floating-
				3589	\| point values `a' and `b'. The operation is performed according to the
				3590	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3591	----------------------------------------------------------------------------/
				3592
				3593	floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
				3594	{
				3595	flag aSign, bSign;
				3596
				3597	aSign = extractFloatx80Sign( a );
				3598	bSign = extractFloatx80Sign( b );
				3599	if ( aSign == bSign ) {
				3600	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				3601	}
				3602	else {
				3603	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				3604	}
				3605
				3606	}
				3607
				3608	/*----------------------------------------------------------------------------
				3609	\| Returns the result of multiplying the extended double-precision floating-
				3610	\| point values `a' and `b'. The operation is performed according to the
				3611	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3612	----------------------------------------------------------------------------/
				3613
				3614	floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
				3615	{
				3616	flag aSign, bSign, zSign;
				3617	int32 aExp, bExp, zExp;
				3618	bits64 aSig, bSig, zSig0, zSig1;
				3619	floatx80 z;
				3620
				3621	aSig = extractFloatx80Frac( a );
				3622	aExp = extractFloatx80Exp( a );
				3623	aSign = extractFloatx80Sign( a );
				3624	bSig = extractFloatx80Frac( b );
				3625	bExp = extractFloatx80Exp( b );
				3626	bSign = extractFloatx80Sign( b );
				3627	zSign = aSign ^ bSign;
				3628	if ( aExp == 0x7FFF ) {
				3629	if ( (bits64) ( aSig<<1 )
				3630	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				3631	return propagateFloatx80NaN( a, b STATUS_VAR );
				3632	}
				3633	if ( ( bExp \| bSig ) == 0 ) goto invalid;
				3634	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3635	}
				3636	if ( bExp == 0x7FFF ) {
				3637	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3638	if ( ( aExp \| aSig ) == 0 ) {
				3639	invalid:
				3640	float_raise( float_flag_invalid STATUS_VAR);
				3641	z.low = floatx80_default_nan_low;
				3642	z.high = floatx80_default_nan_high;
				3643	return z;
				3644	}
				3645	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3646	}
				3647	if ( aExp == 0 ) {
				3648	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3649	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				3650	}
				3651	if ( bExp == 0 ) {
				3652	if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3653	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3654	}
				3655	zExp = aExp + bExp - 0x3FFE;
				3656	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				3657	if ( 0 < (sbits64) zSig0 ) {
				3658	shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
				3659	--zExp;
				3660	}
				3661	return
				3662	roundAndPackFloatx80(
				3663	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3664
				3665	}
				3666
				3667	/*----------------------------------------------------------------------------
				3668	\| Returns the result of dividing the extended double-precision floating-point
				3669	\| value `a' by the corresponding value `b'. The operation is performed
				3670	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3671	----------------------------------------------------------------------------/
				3672
				3673	floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
				3674	{
				3675	flag aSign, bSign, zSign;
				3676	int32 aExp, bExp, zExp;
				3677	bits64 aSig, bSig, zSig0, zSig1;
				3678	bits64 rem0, rem1, rem2, term0, term1, term2;
				3679	floatx80 z;
				3680
				3681	aSig = extractFloatx80Frac( a );
				3682	aExp = extractFloatx80Exp( a );
				3683	aSign = extractFloatx80Sign( a );
				3684	bSig = extractFloatx80Frac( b );
				3685	bExp = extractFloatx80Exp( b );
				3686	bSign = extractFloatx80Sign( b );
				3687	zSign = aSign ^ bSign;
				3688	if ( aExp == 0x7FFF ) {
				3689	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3690	if ( bExp == 0x7FFF ) {
				3691	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3692	goto invalid;
				3693	}
				3694	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3695	}
				3696	if ( bExp == 0x7FFF ) {
				3697	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3698	return packFloatx80( zSign, 0, 0 );
				3699	}
				3700	if ( bExp == 0 ) {
				3701	if ( bSig == 0 ) {
				3702	if ( ( aExp \| aSig ) == 0 ) {
				3703	invalid:
				3704	float_raise( float_flag_invalid STATUS_VAR);
				3705	z.low = floatx80_default_nan_low;
				3706	z.high = floatx80_default_nan_high;
				3707	return z;
				3708	}
				3709	float_raise( float_flag_divbyzero STATUS_VAR);
				3710	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3711	}
				3712	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3713	}
				3714	if ( aExp == 0 ) {
				3715	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3716	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				3717	}
				3718	zExp = aExp - bExp + 0x3FFE;
				3719	rem1 = 0;
				3720	if ( bSig <= aSig ) {
				3721	shift128Right( aSig, 0, 1, &aSig, &rem1 );
				3722	++zExp;
				3723	}
				3724	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
				3725	mul64To128( bSig, zSig0, &term0, &term1 );
				3726	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
				3727	while ( (sbits64) rem0 < 0 ) {
				3728	--zSig0;
				3729	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				3730	}
				3731	zSig1 = estimateDiv128To64( rem1, 0, bSig );
				3732	if ( (bits64) ( zSig1<<1 ) <= 8 ) {
				3733	mul64To128( bSig, zSig1, &term1, &term2 );
				3734	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				3735	while ( (sbits64) rem1 < 0 ) {
				3736	--zSig1;
				3737	add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
				3738	}
				3739	zSig1 \|= ( ( rem1 \| rem2 ) != 0 );
				3740	}
				3741	return
				3742	roundAndPackFloatx80(
				3743	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3744
				3745	}
				3746
				3747	/*----------------------------------------------------------------------------
				3748	\| Returns the remainder of the extended double-precision floating-point value
				3749	\| `a' with respect to the corresponding value `b'. The operation is performed
				3750	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3751	----------------------------------------------------------------------------/
				3752
				3753	floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
				3754	{
				3755	flag aSign, bSign, zSign;
				3756	int32 aExp, bExp, expDiff;
				3757	bits64 aSig0, aSig1, bSig;
				3758	bits64 q, term0, term1, alternateASig0, alternateASig1;
				3759	floatx80 z;
				3760
				3761	aSig0 = extractFloatx80Frac( a );
				3762	aExp = extractFloatx80Exp( a );
				3763	aSign = extractFloatx80Sign( a );
				3764	bSig = extractFloatx80Frac( b );
				3765	bExp = extractFloatx80Exp( b );
				3766	bSign = extractFloatx80Sign( b );
				3767	if ( aExp == 0x7FFF ) {
				3768	if ( (bits64) ( aSig0<<1 )
				3769	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				3770	return propagateFloatx80NaN( a, b STATUS_VAR );
				3771	}
				3772	goto invalid;
				3773	}
				3774	if ( bExp == 0x7FFF ) {
				3775	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3776	return a;
				3777	}
				3778	if ( bExp == 0 ) {
				3779	if ( bSig == 0 ) {
				3780	invalid:
				3781	float_raise( float_flag_invalid STATUS_VAR);
				3782	z.low = floatx80_default_nan_low;
				3783	z.high = floatx80_default_nan_high;
				3784	return z;
				3785	}
				3786	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3787	}
				3788	if ( aExp == 0 ) {
				3789	if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
				3790	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				3791	}
				3792	bSig \|= LIT64( 0x8000000000000000 );
				3793	zSign = aSign;
				3794	expDiff = aExp - bExp;
				3795	aSig1 = 0;
				3796	if ( expDiff < 0 ) {
				3797	if ( expDiff < -1 ) return a;
				3798	shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
				3799	expDiff = 0;
				3800	}
				3801	q = ( bSig <= aSig0 );
				3802	if ( q ) aSig0 -= bSig;
				3803	expDiff -= 64;
				3804	while ( 0 < expDiff ) {
				3805	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3806	q = ( 2 < q ) ? q - 2 : 0;
				3807	mul64To128( bSig, q, &term0, &term1 );
				3808	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3809	shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
				3810	expDiff -= 62;
				3811	}
				3812	expDiff += 64;
				3813	if ( 0 < expDiff ) {
				3814	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3815	q = ( 2 < q ) ? q - 2 : 0;
				3816	q >>= 64 - expDiff;
				3817	mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
				3818	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3819	shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
				3820	while ( le128( term0, term1, aSig0, aSig1 ) ) {
				3821	++q;
				3822	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3823	}
				3824	}
				3825	else {
				3826	term1 = 0;
				3827	term0 = bSig;
				3828	}
				3829	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
				3830	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3831	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3832	&& ( q & 1 ) )
				3833	) {
				3834	aSig0 = alternateASig0;
				3835	aSig1 = alternateASig1;
				3836	zSign = ! zSign;
				3837	}
				3838	return
				3839	normalizeRoundAndPackFloatx80(
				3840	80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
				3841
				3842	}
				3843
				3844	/*----------------------------------------------------------------------------
				3845	\| Returns the square root of the extended double-precision floating-point
				3846	\| value `a'. The operation is performed according to the IEC/IEEE Standard
				3847	\| for Binary Floating-Point Arithmetic.
				3848	----------------------------------------------------------------------------/
				3849
				3850	floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
				3851	{
				3852	flag aSign;
				3853	int32 aExp, zExp;
				3854	bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
				3855	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				3856	floatx80 z;
				3857
				3858	aSig0 = extractFloatx80Frac( a );
				3859	aExp = extractFloatx80Exp( a );
				3860	aSign = extractFloatx80Sign( a );
				3861	if ( aExp == 0x7FFF ) {
				3862	if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
				3863	if ( ! aSign ) return a;
				3864	goto invalid;
				3865	}
				3866	if ( aSign ) {
				3867	if ( ( aExp \| aSig0 ) == 0 ) return a;
				3868	invalid:
				3869	float_raise( float_flag_invalid STATUS_VAR);
				3870	z.low = floatx80_default_nan_low;
				3871	z.high = floatx80_default_nan_high;
				3872	return z;
				3873	}
				3874	if ( aExp == 0 ) {
				3875	if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
				3876	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				3877	}
				3878	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
				3879	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
				3880	shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
				3881	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				3882	doubleZSig0 = zSig0<<1;
				3883	mul64To128( zSig0, zSig0, &term0, &term1 );
				3884	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
				3885	while ( (sbits64) rem0 < 0 ) {
				3886	--zSig0;
				3887	doubleZSig0 -= 2;
				3888	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				3889	}
				3890	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				3891	if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
				3892	if ( zSig1 == 0 ) zSig1 = 1;
				3893	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				3894	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				3895	mul64To128( zSig1, zSig1, &term2, &term3 );
				3896	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
				3897	while ( (sbits64) rem1 < 0 ) {
				3898	--zSig1;
				3899	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				3900	term3 \|= 1;
				3901	term2 \|= doubleZSig0;
				3902	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				3903	}
				3904	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				3905	}
				3906	shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
				3907	zSig0 \|= doubleZSig0;
				3908	return
				3909	roundAndPackFloatx80(
				3910	STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
				3911
				3912	}
				3913
				3914	/*----------------------------------------------------------------------------
				3915	\| Returns 1 if the extended double-precision floating-point value `a' is
				3916	\| equal to the corresponding value `b', and 0 otherwise. The comparison is
				3917	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				3918	\| Arithmetic.
				3919	----------------------------------------------------------------------------/
				3920
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3921	int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3922	{
				3923
				3924	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3925	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3926	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3927	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3928	) {
				3929	if ( floatx80_is_signaling_nan( a )
				3930	\|\| floatx80_is_signaling_nan( b ) ) {
				3931	float_raise( float_flag_invalid STATUS_VAR);
				3932	}
				3933	return 0;
				3934	}
				3935	return
				3936	( a.low == b.low )
				3937	&& ( ( a.high == b.high )
				3938	\|\| ( ( a.low == 0 )
				3939	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				3940	);
				3941
				3942	}
				3943
				3944	/*----------------------------------------------------------------------------
				3945	\| Returns 1 if the extended double-precision floating-point value `a' is
				3946	\| less than or equal to the corresponding value `b', and 0 otherwise. The
				3947	\| comparison is performed according to the IEC/IEEE Standard for Binary
				3948	\| Floating-Point Arithmetic.
				3949	----------------------------------------------------------------------------/
				3950
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3951	int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3952	{
				3953	flag aSign, bSign;
				3954
				3955	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3956	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3957	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3958	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3959	) {
				3960	float_raise( float_flag_invalid STATUS_VAR);
				3961	return 0;
				3962	}
				3963	aSign = extractFloatx80Sign( a );
				3964	bSign = extractFloatx80Sign( b );
				3965	if ( aSign != bSign ) {
				3966	return
				3967	aSign
				3968	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				3969	== 0 );
				3970	}
				3971	return
				3972	aSign ? le128( b.high, b.low, a.high, a.low )
				3973	: le128( a.high, a.low, b.high, b.low );
				3974
				3975	}
				3976
				3977	/*----------------------------------------------------------------------------
				3978	\| Returns 1 if the extended double-precision floating-point value `a' is
				3979	\| less than the corresponding value `b', and 0 otherwise. The comparison
				3980	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				3981	\| Arithmetic.
				3982	----------------------------------------------------------------------------/
				3983
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3984	int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3985	{
				3986	flag aSign, bSign;
				3987
				3988	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				3989	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				3990	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				3991	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				3992	) {
				3993	float_raise( float_flag_invalid STATUS_VAR);
				3994	return 0;
				3995	}
				3996	aSign = extractFloatx80Sign( a );
				3997	bSign = extractFloatx80Sign( b );
				3998	if ( aSign != bSign ) {
				3999	return
				4000	aSign
				4001	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				4002	!= 0 );
				4003	}
				4004	return
				4005	aSign ? lt128( b.high, b.low, a.high, a.low )
				4006	: lt128( a.high, a.low, b.high, b.low );
				4007
				4008	}
				4009
				4010	/*----------------------------------------------------------------------------
				4011	\| Returns 1 if the extended double-precision floating-point value `a' is equal
				4012	\| to the corresponding value `b', and 0 otherwise. The invalid exception is
				4013	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				4014	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4015	----------------------------------------------------------------------------/
				4016
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4017	int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4018	{
				4019
				4020	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4021	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4022	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4023	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4024	) {
				4025	float_raise( float_flag_invalid STATUS_VAR);
				4026	return 0;
				4027	}
				4028	return
				4029	( a.low == b.low )
				4030	&& ( ( a.high == b.high )
				4031	\|\| ( ( a.low == 0 )
				4032	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				4033	);
				4034
				4035	}
				4036
				4037	/*----------------------------------------------------------------------------
				4038	\| Returns 1 if the extended double-precision floating-point value `a' is less
				4039	\| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
				4040	\| do not cause an exception. Otherwise, the comparison is performed according
				4041	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4042	----------------------------------------------------------------------------/
				4043
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4044	int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4045	{
				4046	flag aSign, bSign;
				4047
				4048	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4049	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4050	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4051	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4052	) {
				4053	if ( floatx80_is_signaling_nan( a )
				4054	\|\| floatx80_is_signaling_nan( b ) ) {
				4055	float_raise( float_flag_invalid STATUS_VAR);
				4056	}
				4057	return 0;
				4058	}
				4059	aSign = extractFloatx80Sign( a );
				4060	bSign = extractFloatx80Sign( b );
				4061	if ( aSign != bSign ) {
				4062	return
				4063	aSign
				4064	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				4065	== 0 );
				4066	}
				4067	return
				4068	aSign ? le128( b.high, b.low, a.high, a.low )
				4069	: le128( a.high, a.low, b.high, b.low );
				4070
				4071	}
				4072
				4073	/*----------------------------------------------------------------------------
				4074	\| Returns 1 if the extended double-precision floating-point value `a' is less
				4075	\| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
				4076	\| an exception. Otherwise, the comparison is performed according to the
				4077	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4078	----------------------------------------------------------------------------/
				4079
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4080	int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4081	{
				4082	flag aSign, bSign;
				4083
				4084	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4085	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4086	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4087	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4088	) {
				4089	if ( floatx80_is_signaling_nan( a )
				4090	\|\| floatx80_is_signaling_nan( b ) ) {
				4091	float_raise( float_flag_invalid STATUS_VAR);
				4092	}
				4093	return 0;
				4094	}
				4095	aSign = extractFloatx80Sign( a );
				4096	bSign = extractFloatx80Sign( b );
				4097	if ( aSign != bSign ) {
				4098	return
				4099	aSign
				4100	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				4101	!= 0 );
				4102	}
				4103	return
				4104	aSign ? lt128( b.high, b.low, a.high, a.low )
				4105	: lt128( a.high, a.low, b.high, b.low );
				4106
				4107	}
				4108
				4109	#endif
				4110
				4111	#ifdef FLOAT128
				4112
				4113	/*----------------------------------------------------------------------------
				4114	\| Returns the result of converting the quadruple-precision floating-point
				4115	\| value `a' to the 32-bit two's complement integer format. The conversion
				4116	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4117	\| Arithmetic---which means in particular that the conversion is rounded
				4118	\| according to the current rounding mode. If `a' is a NaN, the largest
				4119	\| positive integer is returned. Otherwise, if the conversion overflows, the
				4120	\| largest integer with the same sign as `a' is returned.
				4121	----------------------------------------------------------------------------/
				4122
				4123	int32 float128_to_int32( float128 a STATUS_PARAM )
				4124	{
				4125	flag aSign;
				4126	int32 aExp, shiftCount;
				4127	bits64 aSig0, aSig1;
				4128
				4129	aSig1 = extractFloat128Frac1( a );
				4130	aSig0 = extractFloat128Frac0( a );
				4131	aExp = extractFloat128Exp( a );
				4132	aSign = extractFloat128Sign( a );
				4133	if ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) aSign = 0;
				4134	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				4135	aSig0 \|= ( aSig1 != 0 );
				4136	shiftCount = 0x4028 - aExp;
				4137	if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
				4138	return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
				4139
				4140	}
				4141
				4142	/*----------------------------------------------------------------------------
				4143	\| Returns the result of converting the quadruple-precision floating-point
				4144	\| value `a' to the 32-bit two's complement integer format. The conversion
				4145	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4146	\| Arithmetic, except that the conversion is always rounded toward zero. If
				4147	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				4148	\| conversion overflows, the largest integer with the same sign as `a' is
				4149	\| returned.
				4150	----------------------------------------------------------------------------/
				4151
				4152	int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
				4153	{
				4154	flag aSign;
				4155	int32 aExp, shiftCount;
				4156	bits64 aSig0, aSig1, savedASig;
				4157	int32 z;
				4158
				4159	aSig1 = extractFloat128Frac1( a );
				4160	aSig0 = extractFloat128Frac0( a );
				4161	aExp = extractFloat128Exp( a );
				4162	aSign = extractFloat128Sign( a );
				4163	aSig0 \|= ( aSig1 != 0 );
				4164	if ( 0x401E < aExp ) {
				4165	if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
				4166	goto invalid;
				4167	}
				4168	else if ( aExp < 0x3FFF ) {
				4169	if ( aExp \|\| aSig0 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4170	return 0;
				4171	}
				4172	aSig0 \|= LIT64( 0x0001000000000000 );
				4173	shiftCount = 0x402F - aExp;
				4174	savedASig = aSig0;
				4175	aSig0 >>= shiftCount;
				4176	z = aSig0;
				4177	if ( aSign ) z = - z;
				4178	if ( ( z < 0 ) ^ aSign ) {
				4179	invalid:
				4180	float_raise( float_flag_invalid STATUS_VAR);
				4181	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				4182	}
				4183	if ( ( aSig0<<shiftCount ) != savedASig ) {
				4184	STATUS(float_exception_flags) \|= float_flag_inexact;
				4185	}
				4186	return z;
				4187
				4188	}
				4189
				4190	/*----------------------------------------------------------------------------
				4191	\| Returns the result of converting the quadruple-precision floating-point
				4192	\| value `a' to the 64-bit two's complement integer format. The conversion
				4193	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4194	\| Arithmetic---which means in particular that the conversion is rounded
				4195	\| according to the current rounding mode. If `a' is a NaN, the largest
				4196	\| positive integer is returned. Otherwise, if the conversion overflows, the
				4197	\| largest integer with the same sign as `a' is returned.
				4198	----------------------------------------------------------------------------/
				4199
				4200	int64 float128_to_int64( float128 a STATUS_PARAM )
				4201	{
				4202	flag aSign;
				4203	int32 aExp, shiftCount;
				4204	bits64 aSig0, aSig1;
				4205
				4206	aSig1 = extractFloat128Frac1( a );
				4207	aSig0 = extractFloat128Frac0( a );
				4208	aExp = extractFloat128Exp( a );
				4209	aSign = extractFloat128Sign( a );
				4210	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				4211	shiftCount = 0x402F - aExp;
				4212	if ( shiftCount <= 0 ) {
				4213	if ( 0x403E < aExp ) {
				4214	float_raise( float_flag_invalid STATUS_VAR);
				4215	if ( ! aSign
				4216	\|\| ( ( aExp == 0x7FFF )
				4217	&& ( aSig1 \|\| ( aSig0 != LIT64( 0x0001000000000000 ) ) )
				4218	)
				4219	) {
				4220	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4221	}
				4222	return (sbits64) LIT64( 0x8000000000000000 );
				4223	}
				4224	shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
				4225	}
				4226	else {
				4227	shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
				4228	}
				4229	return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
				4230
				4231	}
				4232
				4233	/*----------------------------------------------------------------------------
				4234	\| Returns the result of converting the quadruple-precision floating-point
				4235	\| value `a' to the 64-bit two's complement integer format. The conversion
				4236	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4237	\| Arithmetic, except that the conversion is always rounded toward zero.
				4238	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				4239	\| the conversion overflows, the largest integer with the same sign as `a' is
				4240	\| returned.
				4241	----------------------------------------------------------------------------/
				4242
				4243	int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
				4244	{
				4245	flag aSign;
				4246	int32 aExp, shiftCount;
				4247	bits64 aSig0, aSig1;
				4248	int64 z;
				4249
				4250	aSig1 = extractFloat128Frac1( a );
				4251	aSig0 = extractFloat128Frac0( a );
				4252	aExp = extractFloat128Exp( a );
				4253	aSign = extractFloat128Sign( a );
				4254	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				4255	shiftCount = aExp - 0x402F;
				4256	if ( 0 < shiftCount ) {
				4257	if ( 0x403E <= aExp ) {
				4258	aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
				4259	if ( ( a.high == LIT64( 0xC03E000000000000 ) )
				4260	&& ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
				4261	if ( aSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4262	}
				4263	else {
				4264	float_raise( float_flag_invalid STATUS_VAR);
				4265	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) ) {
				4266	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4267	}
				4268	}
				4269	return (sbits64) LIT64( 0x8000000000000000 );
				4270	}
				4271	z = ( aSig0<<shiftCount ) \| ( aSig1>>( ( - shiftCount ) & 63 ) );
				4272	if ( (bits64) ( aSig1<<shiftCount ) ) {
				4273	STATUS(float_exception_flags) \|= float_flag_inexact;
				4274	}
				4275	}
				4276	else {
				4277	if ( aExp < 0x3FFF ) {
				4278	if ( aExp \| aSig0 \| aSig1 ) {
				4279	STATUS(float_exception_flags) \|= float_flag_inexact;
				4280	}
				4281	return 0;
				4282	}
				4283	z = aSig0>>( - shiftCount );
				4284	if ( aSig1
				4285	\|\| ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
				4286	STATUS(float_exception_flags) \|= float_flag_inexact;
				4287	}
				4288	}
				4289	if ( aSign ) z = - z;
				4290	return z;
				4291
				4292	}
				4293
				4294	/*----------------------------------------------------------------------------
				4295	\| Returns the result of converting the quadruple-precision floating-point
				4296	\| value `a' to the single-precision floating-point format. The conversion
				4297	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4298	\| Arithmetic.
				4299	----------------------------------------------------------------------------/
				4300
				4301	float32 float128_to_float32( float128 a STATUS_PARAM )
				4302	{
				4303	flag aSign;
				4304	int32 aExp;
				4305	bits64 aSig0, aSig1;
				4306	bits32 zSig;
				4307
				4308	aSig1 = extractFloat128Frac1( a );
				4309	aSig0 = extractFloat128Frac0( a );
				4310	aExp = extractFloat128Exp( a );
				4311	aSign = extractFloat128Sign( a );
				4312	if ( aExp == 0x7FFF ) {
				4313	if ( aSig0 \| aSig1 ) {
				4314	return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
				4315	}
				4316	return packFloat32( aSign, 0xFF, 0 );
				4317	}
				4318	aSig0 \|= ( aSig1 != 0 );
				4319	shift64RightJamming( aSig0, 18, &aSig0 );
				4320	zSig = aSig0;
				4321	if ( aExp \|\| zSig ) {
				4322	zSig \|= 0x40000000;
				4323	aExp -= 0x3F81;
				4324	}
				4325	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				4326
				4327	}
				4328
				4329	/*----------------------------------------------------------------------------
				4330	\| Returns the result of converting the quadruple-precision floating-point
				4331	\| value `a' to the double-precision floating-point format. The conversion
				4332	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4333	\| Arithmetic.
				4334	----------------------------------------------------------------------------/
				4335
				4336	float64 float128_to_float64( float128 a STATUS_PARAM )
				4337	{
				4338	flag aSign;
				4339	int32 aExp;
				4340	bits64 aSig0, aSig1;
				4341
				4342	aSig1 = extractFloat128Frac1( a );
				4343	aSig0 = extractFloat128Frac0( a );
				4344	aExp = extractFloat128Exp( a );
				4345	aSign = extractFloat128Sign( a );
				4346	if ( aExp == 0x7FFF ) {
				4347	if ( aSig0 \| aSig1 ) {
				4348	return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
				4349	}
				4350	return packFloat64( aSign, 0x7FF, 0 );
				4351	}
				4352	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				4353	aSig0 \|= ( aSig1 != 0 );
				4354	if ( aExp \|\| aSig0 ) {
				4355	aSig0 \|= LIT64( 0x4000000000000000 );
				4356	aExp -= 0x3C01;
				4357	}
				4358	return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
				4359
				4360	}
				4361
				4362	#ifdef FLOATX80
				4363
				4364	/*----------------------------------------------------------------------------
				4365	\| Returns the result of converting the quadruple-precision floating-point
				4366	\| value `a' to the extended double-precision floating-point format. The
				4367	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4368	\| Floating-Point Arithmetic.
				4369	----------------------------------------------------------------------------/
				4370
				4371	floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
				4372	{
				4373	flag aSign;
				4374	int32 aExp;
				4375	bits64 aSig0, aSig1;
				4376
				4377	aSig1 = extractFloat128Frac1( a );
				4378	aSig0 = extractFloat128Frac0( a );
				4379	aExp = extractFloat128Exp( a );
				4380	aSign = extractFloat128Sign( a );
				4381	if ( aExp == 0x7FFF ) {
				4382	if ( aSig0 \| aSig1 ) {
				4383	return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
				4384	}
				4385	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4386	}
				4387	if ( aExp == 0 ) {
				4388	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
				4389	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				4390	}
				4391	else {
				4392	aSig0 \|= LIT64( 0x0001000000000000 );
				4393	}
				4394	shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
				4395	return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
				4396
				4397	}
				4398
				4399	#endif
				4400
				4401	/*----------------------------------------------------------------------------
				4402	\| Rounds the quadruple-precision floating-point value `a' to an integer, and
				4403	\| returns the result as a quadruple-precision floating-point value. The
				4404	\| operation is performed according to the IEC/IEEE Standard for Binary
				4405	\| Floating-Point Arithmetic.
				4406	----------------------------------------------------------------------------/
				4407
				4408	float128 float128_round_to_int( float128 a STATUS_PARAM )
				4409	{
				4410	flag aSign;
				4411	int32 aExp;
				4412	bits64 lastBitMask, roundBitsMask;
				4413	int8 roundingMode;
				4414	float128 z;
				4415
				4416	aExp = extractFloat128Exp( a );
				4417	if ( 0x402F <= aExp ) {
				4418	if ( 0x406F <= aExp ) {
				4419	if ( ( aExp == 0x7FFF )
				4420	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) )
				4421	) {
				4422	return propagateFloat128NaN( a, a STATUS_VAR );
				4423	}
				4424	return a;
				4425	}
				4426	lastBitMask = 1;
				4427	lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
				4428	roundBitsMask = lastBitMask - 1;
				4429	z = a;
				4430	roundingMode = STATUS(float_rounding_mode);
				4431	if ( roundingMode == float_round_nearest_even ) {
				4432	if ( lastBitMask ) {
				4433	add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
				4434	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				4435	}
				4436	else {
				4437	if ( (sbits64) z.low < 0 ) {
				4438	++z.high;
				4439	if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
				4440	}
				4441	}
				4442	}
				4443	else if ( roundingMode != float_round_to_zero ) {
				4444	if ( extractFloat128Sign( z )
				4445	^ ( roundingMode == float_round_up ) ) {
				4446	add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
				4447	}
				4448	}
				4449	z.low &= ~ roundBitsMask;
				4450	}
				4451	else {
				4452	if ( aExp < 0x3FFF ) {
				4453	if ( ( ( (bits64) ( a.high<<1 ) ) \| a.low ) == 0 ) return a;
				4454	STATUS(float_exception_flags) \|= float_flag_inexact;
				4455	aSign = extractFloat128Sign( a );
				4456	switch ( STATUS(float_rounding_mode) ) {
				4457	case float_round_nearest_even:
				4458	if ( ( aExp == 0x3FFE )
				4459	&& ( extractFloat128Frac0( a )
				4460	\| extractFloat128Frac1( a ) )
				4461	) {
				4462	return packFloat128( aSign, 0x3FFF, 0, 0 );
				4463	}
				4464	break;
				4465	case float_round_down:
				4466	return
				4467	aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
				4468	: packFloat128( 0, 0, 0, 0 );
				4469	case float_round_up:
				4470	return
				4471	aSign ? packFloat128( 1, 0, 0, 0 )
				4472	: packFloat128( 0, 0x3FFF, 0, 0 );
				4473	}
				4474	return packFloat128( aSign, 0, 0, 0 );
				4475	}
				4476	lastBitMask = 1;
				4477	lastBitMask <<= 0x402F - aExp;
				4478	roundBitsMask = lastBitMask - 1;
				4479	z.low = 0;
				4480	z.high = a.high;
				4481	roundingMode = STATUS(float_rounding_mode);
				4482	if ( roundingMode == float_round_nearest_even ) {
				4483	z.high += lastBitMask>>1;
				4484	if ( ( ( z.high & roundBitsMask ) \| a.low ) == 0 ) {
				4485	z.high &= ~ lastBitMask;
				4486	}
				4487	}
				4488	else if ( roundingMode != float_round_to_zero ) {
				4489	if ( extractFloat128Sign( z )
				4490	^ ( roundingMode == float_round_up ) ) {
				4491	z.high \|= ( a.low != 0 );
				4492	z.high += roundBitsMask;
				4493	}
				4494	}
				4495	z.high &= ~ roundBitsMask;
				4496	}
				4497	if ( ( z.low != a.low ) \|\| ( z.high != a.high ) ) {
				4498	STATUS(float_exception_flags) \|= float_flag_inexact;
				4499	}
				4500	return z;
				4501
				4502	}
				4503
				4504	/*----------------------------------------------------------------------------
				4505	\| Returns the result of adding the absolute values of the quadruple-precision
				4506	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				4507	\| before being returned. `zSign' is ignored if the result is a NaN.
				4508	\| The addition is performed according to the IEC/IEEE Standard for Binary
				4509	\| Floating-Point Arithmetic.
				4510	----------------------------------------------------------------------------/
				4511
				4512	static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				4513	{
				4514	int32 aExp, bExp, zExp;
				4515	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
				4516	int32 expDiff;
				4517
				4518	aSig1 = extractFloat128Frac1( a );
				4519	aSig0 = extractFloat128Frac0( a );
				4520	aExp = extractFloat128Exp( a );
				4521	bSig1 = extractFloat128Frac1( b );
				4522	bSig0 = extractFloat128Frac0( b );
				4523	bExp = extractFloat128Exp( b );
				4524	expDiff = aExp - bExp;
				4525	if ( 0 < expDiff ) {
				4526	if ( aExp == 0x7FFF ) {
				4527	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4528	return a;
				4529	}
				4530	if ( bExp == 0 ) {
				4531	--expDiff;
				4532	}
				4533	else {
				4534	bSig0 \|= LIT64( 0x0001000000000000 );
				4535	}
				4536	shift128ExtraRightJamming(
				4537	bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
				4538	zExp = aExp;
				4539	}
				4540	else if ( expDiff < 0 ) {
				4541	if ( bExp == 0x7FFF ) {
				4542	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4543	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4544	}
				4545	if ( aExp == 0 ) {
				4546	++expDiff;
				4547	}
				4548	else {
				4549	aSig0 \|= LIT64( 0x0001000000000000 );
				4550	}
				4551	shift128ExtraRightJamming(
				4552	aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
				4553	zExp = bExp;
				4554	}
				4555	else {
				4556	if ( aExp == 0x7FFF ) {
				4557	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				4558	return propagateFloat128NaN( a, b STATUS_VAR );
				4559	}
				4560	return a;
				4561	}
				4562	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				4563	if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
				4564	zSig2 = 0;
				4565	zSig0 \|= LIT64( 0x0002000000000000 );
				4566	zExp = aExp;
				4567	goto shiftRight1;
				4568	}
				4569	aSig0 \|= LIT64( 0x0001000000000000 );
				4570	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				4571	--zExp;
				4572	if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
				4573	++zExp;
				4574	shiftRight1:
				4575	shift128ExtraRightJamming(
				4576	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				4577	roundAndPack:
				4578	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				4579
				4580	}
				4581
				4582	/*----------------------------------------------------------------------------
				4583	\| Returns the result of subtracting the absolute values of the quadruple-
				4584	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				4585	\| difference is negated before being returned. `zSign' is ignored if the
				4586	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				4587	\| Standard for Binary Floating-Point Arithmetic.
				4588	----------------------------------------------------------------------------/
				4589
				4590	static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				4591	{
				4592	int32 aExp, bExp, zExp;
				4593	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
				4594	int32 expDiff;
				4595	float128 z;
				4596
				4597	aSig1 = extractFloat128Frac1( a );
				4598	aSig0 = extractFloat128Frac0( a );
				4599	aExp = extractFloat128Exp( a );
				4600	bSig1 = extractFloat128Frac1( b );
				4601	bSig0 = extractFloat128Frac0( b );
				4602	bExp = extractFloat128Exp( b );
				4603	expDiff = aExp - bExp;
				4604	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				4605	shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
				4606	if ( 0 < expDiff ) goto aExpBigger;
				4607	if ( expDiff < 0 ) goto bExpBigger;
				4608	if ( aExp == 0x7FFF ) {
				4609	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				4610	return propagateFloat128NaN( a, b STATUS_VAR );
				4611	}
				4612	float_raise( float_flag_invalid STATUS_VAR);
				4613	z.low = float128_default_nan_low;
				4614	z.high = float128_default_nan_high;
				4615	return z;
				4616	}
				4617	if ( aExp == 0 ) {
				4618	aExp = 1;
				4619	bExp = 1;
				4620	}
				4621	if ( bSig0 < aSig0 ) goto aBigger;
				4622	if ( aSig0 < bSig0 ) goto bBigger;
				4623	if ( bSig1 < aSig1 ) goto aBigger;
				4624	if ( aSig1 < bSig1 ) goto bBigger;
				4625	return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
				4626	bExpBigger:
				4627	if ( bExp == 0x7FFF ) {
				4628	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4629	return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
				4630	}
				4631	if ( aExp == 0 ) {
				4632	++expDiff;
				4633	}
				4634	else {
				4635	aSig0 \|= LIT64( 0x4000000000000000 );
				4636	}
				4637	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				4638	bSig0 \|= LIT64( 0x4000000000000000 );
				4639	bBigger:
				4640	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
				4641	zExp = bExp;
				4642	zSign ^= 1;
				4643	goto normalizeRoundAndPack;
				4644	aExpBigger:
				4645	if ( aExp == 0x7FFF ) {
				4646	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4647	return a;
				4648	}
				4649	if ( bExp == 0 ) {
				4650	--expDiff;
				4651	}
				4652	else {
				4653	bSig0 \|= LIT64( 0x4000000000000000 );
				4654	}
				4655	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
				4656	aSig0 \|= LIT64( 0x4000000000000000 );
				4657	aBigger:
				4658	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				4659	zExp = aExp;
				4660	normalizeRoundAndPack:
				4661	--zExp;
				4662	return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
				4663
				4664	}
				4665
				4666	/*----------------------------------------------------------------------------
				4667	\| Returns the result of adding the quadruple-precision floating-point values
				4668	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				4669	\| for Binary Floating-Point Arithmetic.
				4670	----------------------------------------------------------------------------/
				4671
				4672	float128 float128_add( float128 a, float128 b STATUS_PARAM )
				4673	{
				4674	flag aSign, bSign;
				4675
				4676	aSign = extractFloat128Sign( a );
				4677	bSign = extractFloat128Sign( b );
				4678	if ( aSign == bSign ) {
				4679	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				4680	}
				4681	else {
				4682	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				4683	}
				4684
				4685	}
				4686
				4687	/*----------------------------------------------------------------------------
				4688	\| Returns the result of subtracting the quadruple-precision floating-point
				4689	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				4690	\| Standard for Binary Floating-Point Arithmetic.
				4691	----------------------------------------------------------------------------/
				4692
				4693	float128 float128_sub( float128 a, float128 b STATUS_PARAM )
				4694	{
				4695	flag aSign, bSign;
				4696
				4697	aSign = extractFloat128Sign( a );
				4698	bSign = extractFloat128Sign( b );
				4699	if ( aSign == bSign ) {
				4700	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				4701	}
				4702	else {
				4703	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				4704	}
				4705
				4706	}
				4707
				4708	/*----------------------------------------------------------------------------
				4709	\| Returns the result of multiplying the quadruple-precision floating-point
				4710	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				4711	\| Standard for Binary Floating-Point Arithmetic.
				4712	----------------------------------------------------------------------------/
				4713
				4714	float128 float128_mul( float128 a, float128 b STATUS_PARAM )
				4715	{
				4716	flag aSign, bSign, zSign;
				4717	int32 aExp, bExp, zExp;
				4718	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
				4719	float128 z;
				4720
				4721	aSig1 = extractFloat128Frac1( a );
				4722	aSig0 = extractFloat128Frac0( a );
				4723	aExp = extractFloat128Exp( a );
				4724	aSign = extractFloat128Sign( a );
				4725	bSig1 = extractFloat128Frac1( b );
				4726	bSig0 = extractFloat128Frac0( b );
				4727	bExp = extractFloat128Exp( b );
				4728	bSign = extractFloat128Sign( b );
				4729	zSign = aSign ^ bSign;
				4730	if ( aExp == 0x7FFF ) {
				4731	if ( ( aSig0 \| aSig1 )
				4732	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				4733	return propagateFloat128NaN( a, b STATUS_VAR );
				4734	}
				4735	if ( ( bExp \| bSig0 \| bSig1 ) == 0 ) goto invalid;
				4736	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4737	}
				4738	if ( bExp == 0x7FFF ) {
				4739	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4740	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				4741	invalid:
				4742	float_raise( float_flag_invalid STATUS_VAR);
				4743	z.low = float128_default_nan_low;
				4744	z.high = float128_default_nan_high;
				4745	return z;
				4746	}
				4747	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4748	}
				4749	if ( aExp == 0 ) {
				4750	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				4751	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				4752	}
				4753	if ( bExp == 0 ) {
				4754	if ( ( bSig0 \| bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				4755	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				4756	}
				4757	zExp = aExp + bExp - 0x4000;
				4758	aSig0 \|= LIT64( 0x0001000000000000 );
				4759	shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
				4760	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
				4761	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
				4762	zSig2 \|= ( zSig3 != 0 );
				4763	if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
				4764	shift128ExtraRightJamming(
				4765	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				4766	++zExp;
				4767	}
				4768	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				4769
				4770	}
				4771
				4772	/*----------------------------------------------------------------------------
				4773	\| Returns the result of dividing the quadruple-precision floating-point value
				4774	\| `a' by the corresponding value `b'. The operation is performed according to
				4775	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4776	----------------------------------------------------------------------------/
				4777
				4778	float128 float128_div( float128 a, float128 b STATUS_PARAM )
				4779	{
				4780	flag aSign, bSign, zSign;
				4781	int32 aExp, bExp, zExp;
				4782	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
				4783	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				4784	float128 z;
				4785
				4786	aSig1 = extractFloat128Frac1( a );
				4787	aSig0 = extractFloat128Frac0( a );
				4788	aExp = extractFloat128Exp( a );
				4789	aSign = extractFloat128Sign( a );
				4790	bSig1 = extractFloat128Frac1( b );
				4791	bSig0 = extractFloat128Frac0( b );
				4792	bExp = extractFloat128Exp( b );
				4793	bSign = extractFloat128Sign( b );
				4794	zSign = aSign ^ bSign;
				4795	if ( aExp == 0x7FFF ) {
				4796	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4797	if ( bExp == 0x7FFF ) {
				4798	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4799	goto invalid;
				4800	}
				4801	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4802	}
				4803	if ( bExp == 0x7FFF ) {
				4804	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4805	return packFloat128( zSign, 0, 0, 0 );
				4806	}
				4807	if ( bExp == 0 ) {
				4808	if ( ( bSig0 \| bSig1 ) == 0 ) {
				4809	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				4810	invalid:
				4811	float_raise( float_flag_invalid STATUS_VAR);
				4812	z.low = float128_default_nan_low;
				4813	z.high = float128_default_nan_high;
				4814	return z;
				4815	}
				4816	float_raise( float_flag_divbyzero STATUS_VAR);
				4817	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4818	}
				4819	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				4820	}
				4821	if ( aExp == 0 ) {
				4822	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				4823	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				4824	}
				4825	zExp = aExp - bExp + 0x3FFD;
				4826	shortShift128Left(
				4827	aSig0 \| LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
				4828	shortShift128Left(
				4829	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				4830	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
				4831	shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
				4832	++zExp;
				4833	}
				4834	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
				4835	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
				4836	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
				4837	while ( (sbits64) rem0 < 0 ) {
				4838	--zSig0;
				4839	add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
				4840	}
				4841	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
				4842	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
				4843	mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
				4844	sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
				4845	while ( (sbits64) rem1 < 0 ) {
				4846	--zSig1;
				4847	add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
				4848	}
				4849	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				4850	}
				4851	shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
				4852	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				4853
				4854	}
				4855
				4856	/*----------------------------------------------------------------------------
				4857	\| Returns the remainder of the quadruple-precision floating-point value `a'
				4858	\| with respect to the corresponding value `b'. The operation is performed
				4859	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4860	----------------------------------------------------------------------------/
				4861
				4862	float128 float128_rem( float128 a, float128 b STATUS_PARAM )
				4863	{
				4864	flag aSign, bSign, zSign;
				4865	int32 aExp, bExp, expDiff;
				4866	bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
				4867	bits64 allZero, alternateASig0, alternateASig1, sigMean1;
				4868	sbits64 sigMean0;
				4869	float128 z;
				4870
				4871	aSig1 = extractFloat128Frac1( a );
				4872	aSig0 = extractFloat128Frac0( a );
				4873	aExp = extractFloat128Exp( a );
				4874	aSign = extractFloat128Sign( a );
				4875	bSig1 = extractFloat128Frac1( b );
				4876	bSig0 = extractFloat128Frac0( b );
				4877	bExp = extractFloat128Exp( b );
				4878	bSign = extractFloat128Sign( b );
				4879	if ( aExp == 0x7FFF ) {
				4880	if ( ( aSig0 \| aSig1 )
				4881	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				4882	return propagateFloat128NaN( a, b STATUS_VAR );
				4883	}
				4884	goto invalid;
				4885	}
				4886	if ( bExp == 0x7FFF ) {
				4887	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4888	return a;
				4889	}
				4890	if ( bExp == 0 ) {
				4891	if ( ( bSig0 \| bSig1 ) == 0 ) {
				4892	invalid:
				4893	float_raise( float_flag_invalid STATUS_VAR);
				4894	z.low = float128_default_nan_low;
				4895	z.high = float128_default_nan_high;
				4896	return z;
				4897	}
				4898	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				4899	}
				4900	if ( aExp == 0 ) {
				4901	if ( ( aSig0 \| aSig1 ) == 0 ) return a;
				4902	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				4903	}
				4904	expDiff = aExp - bExp;
				4905	if ( expDiff < -1 ) return a;
				4906	shortShift128Left(
				4907	aSig0 \| LIT64( 0x0001000000000000 ),
				4908	aSig1,
				4909	15 - ( expDiff < 0 ),
				4910	&aSig0,
				4911	&aSig1
				4912	);
				4913	shortShift128Left(
				4914	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				4915	q = le128( bSig0, bSig1, aSig0, aSig1 );
				4916	if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
				4917	expDiff -= 64;
				4918	while ( 0 < expDiff ) {
				4919	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				4920	q = ( 4 < q ) ? q - 4 : 0;
				4921	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				4922	shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
				4923	shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
				4924	sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
				4925	expDiff -= 61;
				4926	}
				4927	if ( -64 < expDiff ) {
				4928	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				4929	q = ( 4 < q ) ? q - 4 : 0;
				4930	q >>= - expDiff;
				4931	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				4932	expDiff += 52;
				4933	if ( expDiff < 0 ) {
				4934	shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				4935	}
				4936	else {
				4937	shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
				4938	}
				4939	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				4940	sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
				4941	}
				4942	else {
				4943	shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
				4944	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				4945	}
				4946	do {
				4947	alternateASig0 = aSig0;
				4948	alternateASig1 = aSig1;
				4949	++q;
				4950	sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
				4951	} while ( 0 <= (sbits64) aSig0 );
				4952	add128(
				4953	aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
				4954	if ( ( sigMean0 < 0 )
				4955	\|\| ( ( ( sigMean0 \| sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
				4956	aSig0 = alternateASig0;
				4957	aSig1 = alternateASig1;
				4958	}
				4959	zSign = ( (sbits64) aSig0 < 0 );
				4960	if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
				4961	return
				4962	normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
				4963
				4964	}
				4965
				4966	/*----------------------------------------------------------------------------
				4967	\| Returns the square root of the quadruple-precision floating-point value `a'.
				4968	\| The operation is performed according to the IEC/IEEE Standard for Binary
				4969	\| Floating-Point Arithmetic.
				4970	----------------------------------------------------------------------------/
				4971
				4972	float128 float128_sqrt( float128 a STATUS_PARAM )
				4973	{
				4974	flag aSign;
				4975	int32 aExp, zExp;
				4976	bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
				4977	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				4978	float128 z;
				4979
				4980	aSig1 = extractFloat128Frac1( a );
				4981	aSig0 = extractFloat128Frac0( a );
				4982	aExp = extractFloat128Exp( a );
				4983	aSign = extractFloat128Sign( a );
				4984	if ( aExp == 0x7FFF ) {
				4985	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
				4986	if ( ! aSign ) return a;
				4987	goto invalid;
				4988	}
				4989	if ( aSign ) {
				4990	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) return a;
				4991	invalid:
				4992	float_raise( float_flag_invalid STATUS_VAR);
				4993	z.low = float128_default_nan_low;
				4994	z.high = float128_default_nan_high;
				4995	return z;
				4996	}
				4997	if ( aExp == 0 ) {
				4998	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
				4999	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				5000	}
				5001	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
				5002	aSig0 \|= LIT64( 0x0001000000000000 );
				5003	zSig0 = estimateSqrt32( aExp, aSig0>>17 );
				5004	shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
				5005	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				5006	doubleZSig0 = zSig0<<1;
				5007	mul64To128( zSig0, zSig0, &term0, &term1 );
				5008	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
				5009	while ( (sbits64) rem0 < 0 ) {
				5010	--zSig0;
				5011	doubleZSig0 -= 2;
				5012	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				5013	}
				5014	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				5015	if ( ( zSig1 & 0x1FFF ) <= 5 ) {
				5016	if ( zSig1 == 0 ) zSig1 = 1;
				5017	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				5018	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				5019	mul64To128( zSig1, zSig1, &term2, &term3 );
				5020	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
				5021	while ( (sbits64) rem1 < 0 ) {
				5022	--zSig1;
				5023	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				5024	term3 \|= 1;
				5025	term2 \|= doubleZSig0;
				5026	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				5027	}
				5028	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				5029	}
				5030	shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
				5031	return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				5032
				5033	}
				5034
				5035	/*----------------------------------------------------------------------------
				5036	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
				5037	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				5038	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5039	----------------------------------------------------------------------------/
				5040
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5041	int float128_eq( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5042	{
				5043
				5044	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5045	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5046	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5047	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5048	) {
				5049	if ( float128_is_signaling_nan( a )
				5050	\|\| float128_is_signaling_nan( b ) ) {
				5051	float_raise( float_flag_invalid STATUS_VAR);
				5052	}
				5053	return 0;
				5054	}
				5055	return
				5056	( a.low == b.low )
				5057	&& ( ( a.high == b.high )
				5058	\|\| ( ( a.low == 0 )
				5059	&& ( (bits64) ( ( a.high \| b.high )<<1 ) == 0 ) )
				5060	);
				5061
				5062	}
				5063
				5064	/*----------------------------------------------------------------------------
				5065	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5066	\| or equal to the corresponding value `b', and 0 otherwise. The comparison
				5067	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5068	\| Arithmetic.
				5069	----------------------------------------------------------------------------/
				5070
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5071	int float128_le( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5072	{
				5073	flag aSign, bSign;
				5074
				5075	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5076	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5077	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5078	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5079	) {
				5080	float_raise( float_flag_invalid STATUS_VAR);
				5081	return 0;
				5082	}
				5083	aSign = extractFloat128Sign( a );
				5084	bSign = extractFloat128Sign( b );
				5085	if ( aSign != bSign ) {
				5086	return
				5087	aSign
				5088	\|\| ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5089	== 0 );
				5090	}
				5091	return
				5092	aSign ? le128( b.high, b.low, a.high, a.low )
				5093	: le128( a.high, a.low, b.high, b.low );
				5094
				5095	}
				5096
				5097	/*----------------------------------------------------------------------------
				5098	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5099	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				5100	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5101	----------------------------------------------------------------------------/
				5102
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5103	int float128_lt( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5104	{
				5105	flag aSign, bSign;
				5106
				5107	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5108	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5109	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5110	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5111	) {
				5112	float_raise( float_flag_invalid STATUS_VAR);
				5113	return 0;
				5114	}
				5115	aSign = extractFloat128Sign( a );
				5116	bSign = extractFloat128Sign( b );
				5117	if ( aSign != bSign ) {
				5118	return
				5119	aSign
				5120	&& ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5121	!= 0 );
				5122	}
				5123	return
				5124	aSign ? lt128( b.high, b.low, a.high, a.low )
				5125	: lt128( a.high, a.low, b.high, b.low );
				5126
				5127	}
				5128
				5129	/*----------------------------------------------------------------------------
				5130	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
				5131	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				5132	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				5133	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5134	----------------------------------------------------------------------------/
				5135
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5136	int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5137	{
				5138
				5139	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5140	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5141	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5142	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5143	) {
				5144	float_raise( float_flag_invalid STATUS_VAR);
				5145	return 0;
				5146	}
				5147	return
				5148	( a.low == b.low )
				5149	&& ( ( a.high == b.high )
				5150	\|\| ( ( a.low == 0 )
				5151	&& ( (bits64) ( ( a.high \| b.high )<<1 ) == 0 ) )
				5152	);
				5153
				5154	}
				5155
				5156	/*----------------------------------------------------------------------------
				5157	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5158	\| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				5159	\| cause an exception. Otherwise, the comparison is performed according to the
				5160	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5161	----------------------------------------------------------------------------/
				5162
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5163	int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5164	{
				5165	flag aSign, bSign;
				5166
				5167	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5168	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5169	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5170	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5171	) {
				5172	if ( float128_is_signaling_nan( a )
				5173	\|\| float128_is_signaling_nan( b ) ) {
				5174	float_raise( float_flag_invalid STATUS_VAR);
				5175	}
				5176	return 0;
				5177	}
				5178	aSign = extractFloat128Sign( a );
				5179	bSign = extractFloat128Sign( b );
				5180	if ( aSign != bSign ) {
				5181	return
				5182	aSign
				5183	\|\| ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5184	== 0 );
				5185	}
				5186	return
				5187	aSign ? le128( b.high, b.low, a.high, a.low )
				5188	: le128( a.high, a.low, b.high, b.low );
				5189
				5190	}
				5191
				5192	/*----------------------------------------------------------------------------
				5193	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5194	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				5195	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				5196	\| Standard for Binary Floating-Point Arithmetic.
				5197	----------------------------------------------------------------------------/
				5198
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5199	int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5200	{
				5201	flag aSign, bSign;
				5202
				5203	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5204	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5205	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5206	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5207	) {
				5208	if ( float128_is_signaling_nan( a )
				5209	\|\| float128_is_signaling_nan( b ) ) {
				5210	float_raise( float_flag_invalid STATUS_VAR);
				5211	}
				5212	return 0;
				5213	}
				5214	aSign = extractFloat128Sign( a );
				5215	bSign = extractFloat128Sign( b );
				5216	if ( aSign != bSign ) {
				5217	return
				5218	aSign
				5219	&& ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5220	!= 0 );
				5221	}
				5222	return
				5223	aSign ? lt128( b.high, b.low, a.high, a.low )
				5224	: lt128( a.high, a.low, b.high, b.low );
				5225
				5226	}
				5227
				5228	#endif
				5229
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5230	/* misc functions */
				5231	float32 uint32_to_float32( unsigned int a STATUS_PARAM )
				5232	{
				5233	return int64_to_float32(a STATUS_VAR);
				5234	}
				5235
				5236	float64 uint32_to_float64( unsigned int a STATUS_PARAM )
				5237	{
				5238	return int64_to_float64(a STATUS_VAR);
				5239	}
				5240
				5241	unsigned int float32_to_uint32( float32 a STATUS_PARAM )
				5242	{
				5243	int64_t v;
				5244	unsigned int res;
				5245
				5246	v = float32_to_int64(a STATUS_VAR);
				5247	if (v < 0) {
				5248	res = 0;
				5249	float_raise( float_flag_invalid STATUS_VAR);
				5250	} else if (v > 0xffffffff) {
				5251	res = 0xffffffff;
				5252	float_raise( float_flag_invalid STATUS_VAR);
				5253	} else {
				5254	res = v;
				5255	}
				5256	return res;
				5257	}
				5258
				5259	unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
				5260	{
				5261	int64_t v;
				5262	unsigned int res;
				5263
				5264	v = float32_to_int64_round_to_zero(a STATUS_VAR);
				5265	if (v < 0) {
				5266	res = 0;
				5267	float_raise( float_flag_invalid STATUS_VAR);
				5268	} else if (v > 0xffffffff) {
				5269	res = 0xffffffff;
				5270	float_raise( float_flag_invalid STATUS_VAR);
				5271	} else {
				5272	res = v;
				5273	}
				5274	return res;
				5275	}
				5276
				5277	unsigned int float64_to_uint32( float64 a STATUS_PARAM )
				5278	{
				5279	int64_t v;
				5280	unsigned int res;
				5281
				5282	v = float64_to_int64(a STATUS_VAR);
				5283	if (v < 0) {
				5284	res = 0;
				5285	float_raise( float_flag_invalid STATUS_VAR);
				5286	} else if (v > 0xffffffff) {
				5287	res = 0xffffffff;
				5288	float_raise( float_flag_invalid STATUS_VAR);
				5289	} else {
				5290	res = v;
				5291	}
				5292	return res;
				5293	}
				5294
				5295	unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
				5296	{
				5297	int64_t v;
				5298	unsigned int res;
				5299
				5300	v = float64_to_int64_round_to_zero(a STATUS_VAR);
				5301	if (v < 0) {
				5302	res = 0;
				5303	float_raise( float_flag_invalid STATUS_VAR);
				5304	} else if (v > 0xffffffff) {
				5305	res = 0xffffffff;
				5306	float_raise( float_flag_invalid STATUS_VAR);
				5307	} else {
				5308	res = v;
				5309	}
				5310	return res;
				5311	}
				5312
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	5313	uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
				5314	{
				5315	int64_t v;
				5316
				5317	v = int64_to_float64(INT64_MIN STATUS_VAR);
				5318	v = float64_to_int64((a + v) STATUS_VAR);
				5319
				5320	return v - INT64_MIN;
				5321	}
				5322
				5323	uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
				5324	{
				5325	int64_t v;
				5326
				5327	v = int64_to_float64(INT64_MIN STATUS_VAR);
				5328	v = float64_to_int64_round_to_zero((a + v) STATUS_VAR);
				5329
				5330	return v - INT64_MIN;
				5331	}
				5332
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5333	#define COMPARE(s, nan_exp) \
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5334	INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5335	int is_quiet STATUS_PARAM ) \
				5336	{ \
				5337	flag aSign, bSign; \
				5338	\
				5339	if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
				5340	extractFloat ## s ## Frac( a ) ) \|\| \
				5341	( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
				5342	extractFloat ## s ## Frac( b ) )) { \
				5343	if (!is_quiet \|\| \
				5344	float ## s ## _is_signaling_nan( a ) \|\| \
				5345	float ## s ## _is_signaling_nan( b ) ) { \
				5346	float_raise( float_flag_invalid STATUS_VAR); \
				5347	} \
				5348	return float_relation_unordered; \
				5349	} \
				5350	aSign = extractFloat ## s ## Sign( a ); \
				5351	bSign = extractFloat ## s ## Sign( b ); \
				5352	if ( aSign != bSign ) { \
				5353	if ( (bits ## s) ( ( a \| b )<<1 ) == 0 ) { \
				5354	/* zero case */ \
				5355	return float_relation_equal; \
				5356	} else { \
				5357	return 1 - (2 * aSign); \
				5358	} \
				5359	} else { \
				5360	if (a == b) { \
				5361	return float_relation_equal; \
				5362	} else { \
				5363	return 1 - 2 * (aSign ^ ( a < b )); \
				5364	} \
				5365	} \
				5366	} \
				5367	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5368	int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5369	{ \
				5370	return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
				5371	} \
				5372	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5373	int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5374	{ \
				5375	return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
				5376	}
				5377
				5378	COMPARE(32, 0xff)
				5379	COMPARE(64, 0x7ff)
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame^]	5380
				5381	/* Multiply A by 2 raised to the power N. */
				5382	float32 float32_scalbn( float32 a, int n STATUS_PARAM )
				5383	{
				5384	flag aSign;
				5385	int16 aExp;
				5386	bits32 aSig;
				5387
				5388	aSig = extractFloat32Frac( a );
				5389	aExp = extractFloat32Exp( a );
				5390	aSign = extractFloat32Sign( a );
				5391
				5392	if ( aExp == 0xFF ) {
				5393	return a;
				5394	}
				5395	aExp += n;
				5396	return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
				5397	}
				5398
				5399	float64 float64_scalbn( float64 a, int n STATUS_PARAM )
				5400	{
				5401	flag aSign;
				5402	int16 aExp;
				5403	bits64 aSig;
				5404
				5405	aSig = extractFloat64Frac( a );
				5406	aExp = extractFloat64Exp( a );
				5407	aSign = extractFloat64Sign( a );
				5408
				5409	if ( aExp == 0x7FF ) {
				5410	return a;
				5411	}
				5412	aExp += n;
				5413	return roundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
				5414	}
				5415
				5416	#ifdef FLOATX80
				5417	floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
				5418	{
				5419	flag aSign;
				5420	int16 aExp;
				5421	bits64 aSig;
				5422
				5423	aSig = extractFloatx80Frac( a );
				5424	aExp = extractFloatx80Exp( a );
				5425	aSign = extractFloatx80Sign( a );
				5426
				5427	if ( aExp == 0x7FF ) {
				5428	return a;
				5429	}
				5430	aExp += n;
				5431	return roundAndPackFloatx80( STATUS(floatx80_rounding_precision),
				5432	aSign, aExp, aSig, 0 STATUS_VAR );
				5433	}
				5434	#endif
				5435
				5436	#ifdef FLOAT128
				5437	float128 float128_scalbn( float128 a, int n STATUS_PARAM )
				5438	{
				5439	flag aSign;
				5440	int32 aExp;
				5441	bits64 aSig0, aSig1;
				5442
				5443	aSig1 = extractFloat128Frac1( a );
				5444	aSig0 = extractFloat128Frac0( a );
				5445	aExp = extractFloat128Exp( a );
				5446	aSign = extractFloat128Sign( a );
				5447	if ( aExp == 0x7FFF ) {
				5448	return a;
				5449	}
				5450	aExp += n;
				5451	return roundAndPackFloat128( aSign, aExp, aSig0, aSig1, 0 STATUS_VAR );
				5452
				5453	}
				5454	#endif