Commit 20495218834824723ef81306c6e1fd27fc3ae560

Authored by bellard
1 parent 158142c2

use the generic soft float code


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1333 c046a42c-6fe2-441c-8c8c-71466251a162
target-arm/nwfpe/ARM-gcc.h deleted 100644 → 0
1 -/*  
2 --------------------------------------------------------------------------------  
3 -The macro `BITS64' can be defined to indicate that 64-bit integer types are  
4 -supported by the compiler.  
5 --------------------------------------------------------------------------------  
6 -*/  
7 -#define BITS64  
8 -  
9 -/*  
10 --------------------------------------------------------------------------------  
11 -Each of the following `typedef's defines the most convenient type that holds  
12 -integers of at least as many bits as specified. For example, `uint8' should  
13 -be the most convenient type that can hold unsigned integers of as many as  
14 -8 bits. The `flag' type must be able to hold either a 0 or 1. For most  
15 -implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed  
16 -to the same as `int'.  
17 --------------------------------------------------------------------------------  
18 -*/  
19 -typedef char flag;  
20 -typedef unsigned char uint8;  
21 -typedef signed char int8;  
22 -typedef int uint16;  
23 -typedef int int16;  
24 -typedef unsigned int uint32;  
25 -typedef signed int int32;  
26 -#ifdef BITS64  
27 -typedef unsigned long long int bits64;  
28 -typedef signed long long int sbits64;  
29 -#endif  
30 -  
31 -/*  
32 --------------------------------------------------------------------------------  
33 -Each of the following `typedef's defines a type that holds integers  
34 -of _exactly_ the number of bits specified. For instance, for most  
35 -implementation of C, `bits16' and `sbits16' should be `typedef'ed to  
36 -`unsigned short int' and `signed short int' (or `short int'), respectively.  
37 --------------------------------------------------------------------------------  
38 -*/  
39 -typedef unsigned char bits8;  
40 -typedef signed char sbits8;  
41 -typedef unsigned short int bits16;  
42 -typedef signed short int sbits16;  
43 -typedef unsigned int bits32;  
44 -typedef signed int sbits32;  
45 -#ifdef BITS64  
46 -typedef unsigned long long int uint64;  
47 -typedef signed long long int int64;  
48 -#endif  
49 -  
50 -#ifdef BITS64  
51 -/*  
52 --------------------------------------------------------------------------------  
53 -The `LIT64' macro takes as its argument a textual integer literal and if  
54 -necessary ``marks'' the literal as having a 64-bit integer type. For  
55 -example, the Gnu C Compiler (`gcc') requires that 64-bit literals be  
56 -appended with the letters `LL' standing for `long long', which is `gcc's  
57 -name for the 64-bit integer type. Some compilers may allow `LIT64' to be  
58 -defined as the identity macro: `#define LIT64( a ) a'.  
59 --------------------------------------------------------------------------------  
60 -*/  
61 -#define LIT64( a ) a##LL  
62 -#endif  
63 -  
64 -/*  
65 --------------------------------------------------------------------------------  
66 -The macro `INLINE' can be used before functions that should be inlined. If  
67 -a compiler does not support explicit inlining, this macro should be defined  
68 -to be `static'.  
69 --------------------------------------------------------------------------------  
70 -*/  
71 -#define INLINE extern __inline__  
72 -  
73 -  
74 -/* For use as a GCC soft-float library we need some special function names. */  
75 -  
76 -#ifdef __LIBFLOAT__  
77 -  
78 -/* Some 32-bit ops can be mapped straight across by just changing the name. */  
79 -#define float32_add __addsf3  
80 -#define float32_sub __subsf3  
81 -#define float32_mul __mulsf3  
82 -#define float32_div __divsf3  
83 -#define int32_to_float32 __floatsisf  
84 -#define float32_to_int32_round_to_zero __fixsfsi  
85 -#define float32_to_uint32_round_to_zero __fixunssfsi  
86 -  
87 -/* These ones go through the glue code. To avoid namespace pollution  
88 - we rename the internal functions too. */  
89 -#define float32_eq ___float32_eq  
90 -#define float32_le ___float32_le  
91 -#define float32_lt ___float32_lt  
92 -  
93 -/* All the 64-bit ops have to go through the glue, so we pull the same  
94 - trick. */  
95 -#define float64_add ___float64_add  
96 -#define float64_sub ___float64_sub  
97 -#define float64_mul ___float64_mul  
98 -#define float64_div ___float64_div  
99 -#define int32_to_float64 ___int32_to_float64  
100 -#define float64_to_int32_round_to_zero ___float64_to_int32_round_to_zero  
101 -#define float64_to_uint32_round_to_zero ___float64_to_uint32_round_to_zero  
102 -#define float64_to_float32 ___float64_to_float32  
103 -#define float32_to_float64 ___float32_to_float64  
104 -#define float64_eq ___float64_eq  
105 -#define float64_le ___float64_le  
106 -#define float64_lt ___float64_lt  
107 -  
108 -#if 0  
109 -#define float64_add __adddf3  
110 -#define float64_sub __subdf3  
111 -#define float64_mul __muldf3  
112 -#define float64_div __divdf3  
113 -#define int32_to_float64 __floatsidf  
114 -#define float64_to_int32_round_to_zero __fixdfsi  
115 -#define float64_to_uint32_round_to_zero __fixunsdfsi  
116 -#define float64_to_float32 __truncdfsf2  
117 -#define float32_to_float64 __extendsfdf2  
118 -#endif  
119 -  
120 -#endif  
target-arm/nwfpe/double_cpdo.c
@@ -53,7 +53,7 @@ unsigned int DoubleCPDO(const unsigned int opcode) @@ -53,7 +53,7 @@ unsigned int DoubleCPDO(const unsigned int opcode)
53 switch (fpa11->fType[Fm]) 53 switch (fpa11->fType[Fm])
54 { 54 {
55 case typeSingle: 55 case typeSingle:
56 - rFm = float32_to_float64(fpa11->fpreg[Fm].fSingle); 56 + rFm = float32_to_float64(fpa11->fpreg[Fm].fSingle, &fpa11->fp_status);
57 break; 57 break;
58 58
59 case typeDouble: 59 case typeDouble:
@@ -79,7 +79,7 @@ unsigned int DoubleCPDO(const unsigned int opcode) @@ -79,7 +79,7 @@ unsigned int DoubleCPDO(const unsigned int opcode)
79 switch (fpa11->fType[Fn]) 79 switch (fpa11->fType[Fn])
80 { 80 {
81 case typeSingle: 81 case typeSingle:
82 - rFn = float32_to_float64(fpa11->fpreg[Fn].fSingle); 82 + rFn = float32_to_float64(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status);
83 break; 83 break;
84 84
85 case typeDouble: 85 case typeDouble:
@@ -96,30 +96,30 @@ unsigned int DoubleCPDO(const unsigned int opcode) @@ -96,30 +96,30 @@ unsigned int DoubleCPDO(const unsigned int opcode)
96 { 96 {
97 /* dyadic opcodes */ 97 /* dyadic opcodes */
98 case ADF_CODE: 98 case ADF_CODE:
99 - fpa11->fpreg[Fd].fDouble = float64_add(rFn,rFm); 99 + fpa11->fpreg[Fd].fDouble = float64_add(rFn,rFm, &fpa11->fp_status);
100 break; 100 break;
101 101
102 case MUF_CODE: 102 case MUF_CODE:
103 case FML_CODE: 103 case FML_CODE:
104 - fpa11->fpreg[Fd].fDouble = float64_mul(rFn,rFm); 104 + fpa11->fpreg[Fd].fDouble = float64_mul(rFn,rFm, &fpa11->fp_status);
105 break; 105 break;
106 106
107 case SUF_CODE: 107 case SUF_CODE:
108 - fpa11->fpreg[Fd].fDouble = float64_sub(rFn,rFm); 108 + fpa11->fpreg[Fd].fDouble = float64_sub(rFn,rFm, &fpa11->fp_status);
109 break; 109 break;
110 110
111 case RSF_CODE: 111 case RSF_CODE:
112 - fpa11->fpreg[Fd].fDouble = float64_sub(rFm,rFn); 112 + fpa11->fpreg[Fd].fDouble = float64_sub(rFm,rFn, &fpa11->fp_status);
113 break; 113 break;
114 114
115 case DVF_CODE: 115 case DVF_CODE:
116 case FDV_CODE: 116 case FDV_CODE:
117 - fpa11->fpreg[Fd].fDouble = float64_div(rFn,rFm); 117 + fpa11->fpreg[Fd].fDouble = float64_div(rFn,rFm, &fpa11->fp_status);
118 break; 118 break;
119 119
120 case RDF_CODE: 120 case RDF_CODE:
121 case FRD_CODE: 121 case FRD_CODE:
122 - fpa11->fpreg[Fd].fDouble = float64_div(rFm,rFn); 122 + fpa11->fpreg[Fd].fDouble = float64_div(rFm,rFn, &fpa11->fp_status);
123 break; 123 break;
124 124
125 #if 0 125 #if 0
@@ -133,7 +133,7 @@ unsigned int DoubleCPDO(const unsigned int opcode) @@ -133,7 +133,7 @@ unsigned int DoubleCPDO(const unsigned int opcode)
133 #endif 133 #endif
134 134
135 case RMF_CODE: 135 case RMF_CODE:
136 - fpa11->fpreg[Fd].fDouble = float64_rem(rFn,rFm); 136 + fpa11->fpreg[Fd].fDouble = float64_rem(rFn,rFm, &fpa11->fp_status);
137 break; 137 break;
138 138
139 #if 0 139 #if 0
@@ -173,11 +173,11 @@ unsigned int DoubleCPDO(const unsigned int opcode) @@ -173,11 +173,11 @@ unsigned int DoubleCPDO(const unsigned int opcode)
173 173
174 case RND_CODE: 174 case RND_CODE:
175 case URD_CODE: 175 case URD_CODE:
176 - fpa11->fpreg[Fd].fDouble = float64_round_to_int(rFm); 176 + fpa11->fpreg[Fd].fDouble = float64_round_to_int(rFm, &fpa11->fp_status);
177 break; 177 break;
178 178
179 case SQT_CODE: 179 case SQT_CODE:
180 - fpa11->fpreg[Fd].fDouble = float64_sqrt(rFm); 180 + fpa11->fpreg[Fd].fDouble = float64_sqrt(rFm, &fpa11->fp_status);
181 break; 181 break;
182 182
183 #if 0 183 #if 0
target-arm/nwfpe/extended_cpdo.c
@@ -53,11 +53,11 @@ unsigned int ExtendedCPDO(const unsigned int opcode) @@ -53,11 +53,11 @@ unsigned int ExtendedCPDO(const unsigned int opcode)
53 switch (fpa11->fType[Fm]) 53 switch (fpa11->fType[Fm])
54 { 54 {
55 case typeSingle: 55 case typeSingle:
56 - rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle); 56 + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle, &fpa11->fp_status);
57 break; 57 break;
58 58
59 case typeDouble: 59 case typeDouble:
60 - rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble); 60 + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble, &fpa11->fp_status);
61 break; 61 break;
62 62
63 case typeExtended: 63 case typeExtended:
@@ -74,11 +74,11 @@ unsigned int ExtendedCPDO(const unsigned int opcode) @@ -74,11 +74,11 @@ unsigned int ExtendedCPDO(const unsigned int opcode)
74 switch (fpa11->fType[Fn]) 74 switch (fpa11->fType[Fn])
75 { 75 {
76 case typeSingle: 76 case typeSingle:
77 - rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle); 77 + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status);
78 break; 78 break;
79 79
80 case typeDouble: 80 case typeDouble:
81 - rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble); 81 + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble, &fpa11->fp_status);
82 break; 82 break;
83 83
84 case typeExtended: 84 case typeExtended:
@@ -94,30 +94,30 @@ unsigned int ExtendedCPDO(const unsigned int opcode) @@ -94,30 +94,30 @@ unsigned int ExtendedCPDO(const unsigned int opcode)
94 { 94 {
95 /* dyadic opcodes */ 95 /* dyadic opcodes */
96 case ADF_CODE: 96 case ADF_CODE:
97 - fpa11->fpreg[Fd].fExtended = floatx80_add(rFn,rFm); 97 + fpa11->fpreg[Fd].fExtended = floatx80_add(rFn,rFm, &fpa11->fp_status);
98 break; 98 break;
99 99
100 case MUF_CODE: 100 case MUF_CODE:
101 case FML_CODE: 101 case FML_CODE:
102 - fpa11->fpreg[Fd].fExtended = floatx80_mul(rFn,rFm); 102 + fpa11->fpreg[Fd].fExtended = floatx80_mul(rFn,rFm, &fpa11->fp_status);
103 break; 103 break;
104 104
105 case SUF_CODE: 105 case SUF_CODE:
106 - fpa11->fpreg[Fd].fExtended = floatx80_sub(rFn,rFm); 106 + fpa11->fpreg[Fd].fExtended = floatx80_sub(rFn,rFm, &fpa11->fp_status);
107 break; 107 break;
108 108
109 case RSF_CODE: 109 case RSF_CODE:
110 - fpa11->fpreg[Fd].fExtended = floatx80_sub(rFm,rFn); 110 + fpa11->fpreg[Fd].fExtended = floatx80_sub(rFm,rFn, &fpa11->fp_status);
111 break; 111 break;
112 112
113 case DVF_CODE: 113 case DVF_CODE:
114 case FDV_CODE: 114 case FDV_CODE:
115 - fpa11->fpreg[Fd].fExtended = floatx80_div(rFn,rFm); 115 + fpa11->fpreg[Fd].fExtended = floatx80_div(rFn,rFm, &fpa11->fp_status);
116 break; 116 break;
117 117
118 case RDF_CODE: 118 case RDF_CODE:
119 case FRD_CODE: 119 case FRD_CODE:
120 - fpa11->fpreg[Fd].fExtended = floatx80_div(rFm,rFn); 120 + fpa11->fpreg[Fd].fExtended = floatx80_div(rFm,rFn, &fpa11->fp_status);
121 break; 121 break;
122 122
123 #if 0 123 #if 0
@@ -131,7 +131,7 @@ unsigned int ExtendedCPDO(const unsigned int opcode) @@ -131,7 +131,7 @@ unsigned int ExtendedCPDO(const unsigned int opcode)
131 #endif 131 #endif
132 132
133 case RMF_CODE: 133 case RMF_CODE:
134 - fpa11->fpreg[Fd].fExtended = floatx80_rem(rFn,rFm); 134 + fpa11->fpreg[Fd].fExtended = floatx80_rem(rFn,rFm, &fpa11->fp_status);
135 break; 135 break;
136 136
137 #if 0 137 #if 0
@@ -157,11 +157,11 @@ unsigned int ExtendedCPDO(const unsigned int opcode) @@ -157,11 +157,11 @@ unsigned int ExtendedCPDO(const unsigned int opcode)
157 157
158 case RND_CODE: 158 case RND_CODE:
159 case URD_CODE: 159 case URD_CODE:
160 - fpa11->fpreg[Fd].fExtended = floatx80_round_to_int(rFm); 160 + fpa11->fpreg[Fd].fExtended = floatx80_round_to_int(rFm, &fpa11->fp_status);
161 break; 161 break;
162 162
163 case SQT_CODE: 163 case SQT_CODE:
164 - fpa11->fpreg[Fd].fExtended = floatx80_sqrt(rFm); 164 + fpa11->fpreg[Fd].fExtended = floatx80_sqrt(rFm, &fpa11->fp_status);
165 break; 165 break;
166 166
167 #if 0 167 #if 0
target-arm/nwfpe/fpa11.c
@@ -61,74 +61,79 @@ void resetFPA11(void) @@ -61,74 +61,79 @@ void resetFPA11(void)
61 61
62 void SetRoundingMode(const unsigned int opcode) 62 void SetRoundingMode(const unsigned int opcode)
63 { 63 {
64 -#if MAINTAIN_FPCR 64 + int rounding_mode;
65 FPA11 *fpa11 = GET_FPA11(); 65 FPA11 *fpa11 = GET_FPA11();
  66 +
  67 +#if MAINTAIN_FPCR
66 fpa11->fpcr &= ~MASK_ROUNDING_MODE; 68 fpa11->fpcr &= ~MASK_ROUNDING_MODE;
67 #endif 69 #endif
68 switch (opcode & MASK_ROUNDING_MODE) 70 switch (opcode & MASK_ROUNDING_MODE)
69 { 71 {
70 default: 72 default:
71 case ROUND_TO_NEAREST: 73 case ROUND_TO_NEAREST:
72 - float_rounding_mode = float_round_nearest_even; 74 + rounding_mode = float_round_nearest_even;
73 #if MAINTAIN_FPCR 75 #if MAINTAIN_FPCR
74 fpa11->fpcr |= ROUND_TO_NEAREST; 76 fpa11->fpcr |= ROUND_TO_NEAREST;
75 #endif 77 #endif
76 break; 78 break;
77 79
78 case ROUND_TO_PLUS_INFINITY: 80 case ROUND_TO_PLUS_INFINITY:
79 - float_rounding_mode = float_round_up; 81 + rounding_mode = float_round_up;
80 #if MAINTAIN_FPCR 82 #if MAINTAIN_FPCR
81 fpa11->fpcr |= ROUND_TO_PLUS_INFINITY; 83 fpa11->fpcr |= ROUND_TO_PLUS_INFINITY;
82 #endif 84 #endif
83 break; 85 break;
84 86
85 case ROUND_TO_MINUS_INFINITY: 87 case ROUND_TO_MINUS_INFINITY:
86 - float_rounding_mode = float_round_down; 88 + rounding_mode = float_round_down;
87 #if MAINTAIN_FPCR 89 #if MAINTAIN_FPCR
88 fpa11->fpcr |= ROUND_TO_MINUS_INFINITY; 90 fpa11->fpcr |= ROUND_TO_MINUS_INFINITY;
89 #endif 91 #endif
90 break; 92 break;
91 93
92 case ROUND_TO_ZERO: 94 case ROUND_TO_ZERO:
93 - float_rounding_mode = float_round_to_zero; 95 + rounding_mode = float_round_to_zero;
94 #if MAINTAIN_FPCR 96 #if MAINTAIN_FPCR
95 fpa11->fpcr |= ROUND_TO_ZERO; 97 fpa11->fpcr |= ROUND_TO_ZERO;
96 #endif 98 #endif
97 break; 99 break;
98 } 100 }
  101 + set_float_rounding_mode(rounding_mode, &fpa11->fp_status);
99 } 102 }
100 103
101 void SetRoundingPrecision(const unsigned int opcode) 104 void SetRoundingPrecision(const unsigned int opcode)
102 { 105 {
103 -#if MAINTAIN_FPCR 106 + int rounding_precision;
104 FPA11 *fpa11 = GET_FPA11(); 107 FPA11 *fpa11 = GET_FPA11();
  108 +#if MAINTAIN_FPCR
105 fpa11->fpcr &= ~MASK_ROUNDING_PRECISION; 109 fpa11->fpcr &= ~MASK_ROUNDING_PRECISION;
106 #endif 110 #endif
107 switch (opcode & MASK_ROUNDING_PRECISION) 111 switch (opcode & MASK_ROUNDING_PRECISION)
108 { 112 {
109 case ROUND_SINGLE: 113 case ROUND_SINGLE:
110 - floatx80_rounding_precision = 32; 114 + rounding_precision = 32;
111 #if MAINTAIN_FPCR 115 #if MAINTAIN_FPCR
112 fpa11->fpcr |= ROUND_SINGLE; 116 fpa11->fpcr |= ROUND_SINGLE;
113 #endif 117 #endif
114 break; 118 break;
115 119
116 case ROUND_DOUBLE: 120 case ROUND_DOUBLE:
117 - floatx80_rounding_precision = 64; 121 + rounding_precision = 64;
118 #if MAINTAIN_FPCR 122 #if MAINTAIN_FPCR
119 fpa11->fpcr |= ROUND_DOUBLE; 123 fpa11->fpcr |= ROUND_DOUBLE;
120 #endif 124 #endif
121 break; 125 break;
122 126
123 case ROUND_EXTENDED: 127 case ROUND_EXTENDED:
124 - floatx80_rounding_precision = 80; 128 + rounding_precision = 80;
125 #if MAINTAIN_FPCR 129 #if MAINTAIN_FPCR
126 fpa11->fpcr |= ROUND_EXTENDED; 130 fpa11->fpcr |= ROUND_EXTENDED;
127 #endif 131 #endif
128 break; 132 break;
129 133
130 - default: floatx80_rounding_precision = 80; 134 + default: rounding_precision = 80;
131 } 135 }
  136 + set_floatx80_rounding_precision(rounding_precision, &fpa11->fp_status);
132 } 137 }
133 138
134 /* Emulate the instruction in the opcode. */ 139 /* Emulate the instruction in the opcode. */
target-arm/nwfpe/fpa11.h
@@ -83,6 +83,7 @@ typedef struct tagFPA11 { @@ -83,6 +83,7 @@ typedef struct tagFPA11 {
83 so we can use it to detect whether this 83 so we can use it to detect whether this
84 instance of the emulator needs to be 84 instance of the emulator needs to be
85 initialised. */ 85 initialised. */
  86 + float_status fp_status; /* QEMU float emulator status */
86 } FPA11; 87 } FPA11;
87 88
88 extern FPA11* qemufpa; 89 extern FPA11* qemufpa;
target-arm/nwfpe/fpa11_cpdo.c
@@ -80,10 +80,10 @@ unsigned int EmulateCPDO(const unsigned int opcode) @@ -80,10 +80,10 @@ unsigned int EmulateCPDO(const unsigned int opcode)
80 { 80 {
81 if (typeDouble == nType) 81 if (typeDouble == nType)
82 fpa11->fpreg[Fd].fSingle = 82 fpa11->fpreg[Fd].fSingle =
83 - float64_to_float32(fpa11->fpreg[Fd].fDouble); 83 + float64_to_float32(fpa11->fpreg[Fd].fDouble, &fpa11->fp_status);
84 else 84 else
85 fpa11->fpreg[Fd].fSingle = 85 fpa11->fpreg[Fd].fSingle =
86 - floatx80_to_float32(fpa11->fpreg[Fd].fExtended); 86 + floatx80_to_float32(fpa11->fpreg[Fd].fExtended, &fpa11->fp_status);
87 } 87 }
88 break; 88 break;
89 89
@@ -91,10 +91,10 @@ unsigned int EmulateCPDO(const unsigned int opcode) @@ -91,10 +91,10 @@ unsigned int EmulateCPDO(const unsigned int opcode)
91 { 91 {
92 if (typeSingle == nType) 92 if (typeSingle == nType)
93 fpa11->fpreg[Fd].fDouble = 93 fpa11->fpreg[Fd].fDouble =
94 - float32_to_float64(fpa11->fpreg[Fd].fSingle); 94 + float32_to_float64(fpa11->fpreg[Fd].fSingle, &fpa11->fp_status);
95 else 95 else
96 fpa11->fpreg[Fd].fDouble = 96 fpa11->fpreg[Fd].fDouble =
97 - floatx80_to_float64(fpa11->fpreg[Fd].fExtended); 97 + floatx80_to_float64(fpa11->fpreg[Fd].fExtended, &fpa11->fp_status);
98 } 98 }
99 break; 99 break;
100 100
@@ -102,10 +102,10 @@ unsigned int EmulateCPDO(const unsigned int opcode) @@ -102,10 +102,10 @@ unsigned int EmulateCPDO(const unsigned int opcode)
102 { 102 {
103 if (typeSingle == nType) 103 if (typeSingle == nType)
104 fpa11->fpreg[Fd].fExtended = 104 fpa11->fpreg[Fd].fExtended =
105 - float32_to_floatx80(fpa11->fpreg[Fd].fSingle); 105 + float32_to_floatx80(fpa11->fpreg[Fd].fSingle, &fpa11->fp_status);
106 else 106 else
107 fpa11->fpreg[Fd].fExtended = 107 fpa11->fpreg[Fd].fExtended =
108 - float64_to_floatx80(fpa11->fpreg[Fd].fDouble); 108 + float64_to_floatx80(fpa11->fpreg[Fd].fDouble, &fpa11->fp_status);
109 } 109 }
110 break; 110 break;
111 } 111 }
target-arm/nwfpe/fpa11_cpdt.c
@@ -106,11 +106,11 @@ void storeSingle(const unsigned int Fn,unsigned int *pMem) @@ -106,11 +106,11 @@ void storeSingle(const unsigned int Fn,unsigned int *pMem)
106 switch (fpa11->fType[Fn]) 106 switch (fpa11->fType[Fn])
107 { 107 {
108 case typeDouble: 108 case typeDouble:
109 - val = float64_to_float32(fpa11->fpreg[Fn].fDouble); 109 + val = float64_to_float32(fpa11->fpreg[Fn].fDouble, &fpa11->fp_status);
110 break; 110 break;
111 111
112 case typeExtended: 112 case typeExtended:
113 - val = floatx80_to_float32(fpa11->fpreg[Fn].fExtended); 113 + val = floatx80_to_float32(fpa11->fpreg[Fn].fExtended, &fpa11->fp_status);
114 break; 114 break;
115 115
116 default: val = fpa11->fpreg[Fn].fSingle; 116 default: val = fpa11->fpreg[Fn].fSingle;
@@ -129,11 +129,11 @@ void storeDouble(const unsigned int Fn,unsigned int *pMem) @@ -129,11 +129,11 @@ void storeDouble(const unsigned int Fn,unsigned int *pMem)
129 switch (fpa11->fType[Fn]) 129 switch (fpa11->fType[Fn])
130 { 130 {
131 case typeSingle: 131 case typeSingle:
132 - val = float32_to_float64(fpa11->fpreg[Fn].fSingle); 132 + val = float32_to_float64(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status);
133 break; 133 break;
134 134
135 case typeExtended: 135 case typeExtended:
136 - val = floatx80_to_float64(fpa11->fpreg[Fn].fExtended); 136 + val = floatx80_to_float64(fpa11->fpreg[Fn].fExtended, &fpa11->fp_status);
137 break; 137 break;
138 138
139 default: val = fpa11->fpreg[Fn].fDouble; 139 default: val = fpa11->fpreg[Fn].fDouble;
@@ -157,11 +157,11 @@ void storeExtended(const unsigned int Fn,unsigned int *pMem) @@ -157,11 +157,11 @@ void storeExtended(const unsigned int Fn,unsigned int *pMem)
157 switch (fpa11->fType[Fn]) 157 switch (fpa11->fType[Fn])
158 { 158 {
159 case typeSingle: 159 case typeSingle:
160 - val = float32_to_floatx80(fpa11->fpreg[Fn].fSingle); 160 + val = float32_to_floatx80(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status);
161 break; 161 break;
162 162
163 case typeDouble: 163 case typeDouble:
164 - val = float64_to_floatx80(fpa11->fpreg[Fn].fDouble); 164 + val = float64_to_floatx80(fpa11->fpreg[Fn].fDouble, &fpa11->fp_status);
165 break; 165 break;
166 166
167 default: val = fpa11->fpreg[Fn].fExtended; 167 default: val = fpa11->fpreg[Fn].fExtended;
target-arm/nwfpe/fpa11_cprt.c
@@ -21,7 +21,6 @@ @@ -21,7 +21,6 @@
21 */ 21 */
22 22
23 #include "fpa11.h" 23 #include "fpa11.h"
24 -#include "milieu.h"  
25 #include "softfloat.h" 24 #include "softfloat.h"
26 #include "fpopcode.h" 25 #include "fpopcode.h"
27 #include "fpa11.inl" 26 #include "fpa11.inl"
@@ -89,7 +88,7 @@ unsigned int PerformFLT(const unsigned int opcode) @@ -89,7 +88,7 @@ unsigned int PerformFLT(const unsigned int opcode)
89 { 88 {
90 fpa11->fType[getFn(opcode)] = typeSingle; 89 fpa11->fType[getFn(opcode)] = typeSingle;
91 fpa11->fpreg[getFn(opcode)].fSingle = 90 fpa11->fpreg[getFn(opcode)].fSingle =
92 - int32_to_float32(readRegister(getRd(opcode))); 91 + int32_to_float32(readRegister(getRd(opcode)), &fpa11->fp_status);
93 } 92 }
94 break; 93 break;
95 94
@@ -97,7 +96,7 @@ unsigned int PerformFLT(const unsigned int opcode) @@ -97,7 +96,7 @@ unsigned int PerformFLT(const unsigned int opcode)
97 { 96 {
98 fpa11->fType[getFn(opcode)] = typeDouble; 97 fpa11->fType[getFn(opcode)] = typeDouble;
99 fpa11->fpreg[getFn(opcode)].fDouble = 98 fpa11->fpreg[getFn(opcode)].fDouble =
100 - int32_to_float64(readRegister(getRd(opcode))); 99 + int32_to_float64(readRegister(getRd(opcode)), &fpa11->fp_status);
101 } 100 }
102 break; 101 break;
103 102
@@ -105,7 +104,7 @@ unsigned int PerformFLT(const unsigned int opcode) @@ -105,7 +104,7 @@ unsigned int PerformFLT(const unsigned int opcode)
105 { 104 {
106 fpa11->fType[getFn(opcode)] = typeExtended; 105 fpa11->fType[getFn(opcode)] = typeExtended;
107 fpa11->fpreg[getFn(opcode)].fExtended = 106 fpa11->fpreg[getFn(opcode)].fExtended =
108 - int32_to_floatx80(readRegister(getRd(opcode))); 107 + int32_to_floatx80(readRegister(getRd(opcode)), &fpa11->fp_status);
109 } 108 }
110 break; 109 break;
111 110
@@ -128,7 +127,7 @@ unsigned int PerformFIX(const unsigned int opcode) @@ -128,7 +127,7 @@ unsigned int PerformFIX(const unsigned int opcode)
128 case typeSingle: 127 case typeSingle:
129 { 128 {
130 writeRegister(getRd(opcode), 129 writeRegister(getRd(opcode),
131 - float32_to_int32(fpa11->fpreg[Fn].fSingle)); 130 + float32_to_int32(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status));
132 } 131 }
133 break; 132 break;
134 133
@@ -136,14 +135,14 @@ unsigned int PerformFIX(const unsigned int opcode) @@ -136,14 +135,14 @@ unsigned int PerformFIX(const unsigned int opcode)
136 { 135 {
137 //printf("F%d is 0x%llx\n",Fn,fpa11->fpreg[Fn].fDouble); 136 //printf("F%d is 0x%llx\n",Fn,fpa11->fpreg[Fn].fDouble);
138 writeRegister(getRd(opcode), 137 writeRegister(getRd(opcode),
139 - float64_to_int32(fpa11->fpreg[Fn].fDouble)); 138 + float64_to_int32(fpa11->fpreg[Fn].fDouble, &fpa11->fp_status));
140 } 139 }
141 break; 140 break;
142 141
143 case typeExtended: 142 case typeExtended:
144 { 143 {
145 writeRegister(getRd(opcode), 144 writeRegister(getRd(opcode),
146 - floatx80_to_int32(fpa11->fpreg[Fn].fExtended)); 145 + floatx80_to_int32(fpa11->fpreg[Fn].fExtended, &fpa11->fp_status));
147 } 146 }
148 break; 147 break;
149 148
@@ -157,22 +156,23 @@ unsigned int PerformFIX(const unsigned int opcode) @@ -157,22 +156,23 @@ unsigned int PerformFIX(const unsigned int opcode)
157 static unsigned int __inline__ 156 static unsigned int __inline__
158 PerformComparisonOperation(floatx80 Fn, floatx80 Fm) 157 PerformComparisonOperation(floatx80 Fn, floatx80 Fm)
159 { 158 {
  159 + FPA11 *fpa11 = GET_FPA11();
160 unsigned int flags = 0; 160 unsigned int flags = 0;
161 161
162 /* test for less than condition */ 162 /* test for less than condition */
163 - if (floatx80_lt(Fn,Fm)) 163 + if (floatx80_lt(Fn,Fm, &fpa11->fp_status))
164 { 164 {
165 flags |= CC_NEGATIVE; 165 flags |= CC_NEGATIVE;
166 } 166 }
167 167
168 /* test for equal condition */ 168 /* test for equal condition */
169 - if (floatx80_eq(Fn,Fm)) 169 + if (floatx80_eq(Fn,Fm, &fpa11->fp_status))
170 { 170 {
171 flags |= CC_ZERO; 171 flags |= CC_ZERO;
172 } 172 }
173 173
174 /* test for greater than or equal condition */ 174 /* test for greater than or equal condition */
175 - if (floatx80_lt(Fm,Fn)) 175 + if (floatx80_lt(Fm,Fn, &fpa11->fp_status))
176 { 176 {
177 flags |= CC_CARRY; 177 flags |= CC_CARRY;
178 } 178 }
@@ -208,14 +208,14 @@ static unsigned int PerformComparison(const unsigned int opcode) @@ -208,14 +208,14 @@ static unsigned int PerformComparison(const unsigned int opcode)
208 //printk("single.\n"); 208 //printk("single.\n");
209 if (float32_is_nan(fpa11->fpreg[Fn].fSingle)) 209 if (float32_is_nan(fpa11->fpreg[Fn].fSingle))
210 goto unordered; 210 goto unordered;
211 - rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle); 211 + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status);
212 break; 212 break;
213 213
214 case typeDouble: 214 case typeDouble:
215 //printk("double.\n"); 215 //printk("double.\n");
216 if (float64_is_nan(fpa11->fpreg[Fn].fDouble)) 216 if (float64_is_nan(fpa11->fpreg[Fn].fDouble))
217 goto unordered; 217 goto unordered;
218 - rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble); 218 + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble, &fpa11->fp_status);
219 break; 219 break;
220 220
221 case typeExtended: 221 case typeExtended:
@@ -244,14 +244,14 @@ static unsigned int PerformComparison(const unsigned int opcode) @@ -244,14 +244,14 @@ static unsigned int PerformComparison(const unsigned int opcode)
244 //printk("single.\n"); 244 //printk("single.\n");
245 if (float32_is_nan(fpa11->fpreg[Fm].fSingle)) 245 if (float32_is_nan(fpa11->fpreg[Fm].fSingle))
246 goto unordered; 246 goto unordered;
247 - rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle); 247 + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle, &fpa11->fp_status);
248 break; 248 break;
249 249
250 case typeDouble: 250 case typeDouble:
251 //printk("double.\n"); 251 //printk("double.\n");
252 if (float64_is_nan(fpa11->fpreg[Fm].fDouble)) 252 if (float64_is_nan(fpa11->fpreg[Fm].fDouble))
253 goto unordered; 253 goto unordered;
254 - rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble); 254 + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble, &fpa11->fp_status);
255 break; 255 break;
256 256
257 case typeExtended: 257 case typeExtended:
@@ -283,7 +283,7 @@ static unsigned int PerformComparison(const unsigned int opcode) @@ -283,7 +283,7 @@ static unsigned int PerformComparison(const unsigned int opcode)
283 283
284 if (BIT_AC & readFPSR()) flags |= CC_CARRY; 284 if (BIT_AC & readFPSR()) flags |= CC_CARRY;
285 285
286 - if (e_flag) float_raise(float_flag_invalid); 286 + if (e_flag) float_raise(float_flag_invalid, &fpa11->fp_status);
287 287
288 writeConditionCodes(flags); 288 writeConditionCodes(flags);
289 return 1; 289 return 1;
target-arm/nwfpe/fpopcode.c
@@ -27,14 +27,14 @@ @@ -27,14 +27,14 @@
27 //#include "fpmodule.inl" 27 //#include "fpmodule.inl"
28 28
29 const floatx80 floatx80Constant[] = { 29 const floatx80 floatx80Constant[] = {
30 - { 0x0000, 0x0000000000000000ULL}, /* extended 0.0 */  
31 - { 0x3fff, 0x8000000000000000ULL}, /* extended 1.0 */  
32 - { 0x4000, 0x8000000000000000ULL}, /* extended 2.0 */  
33 - { 0x4000, 0xc000000000000000ULL}, /* extended 3.0 */  
34 - { 0x4001, 0x8000000000000000ULL}, /* extended 4.0 */  
35 - { 0x4001, 0xa000000000000000ULL}, /* extended 5.0 */  
36 - { 0x3ffe, 0x8000000000000000ULL}, /* extended 0.5 */  
37 - { 0x4002, 0xa000000000000000ULL} /* extended 10.0 */ 30 + { 0x0000000000000000ULL, 0x0000}, /* extended 0.0 */
  31 + { 0x8000000000000000ULL, 0x3fff}, /* extended 1.0 */
  32 + { 0x8000000000000000ULL, 0x4000}, /* extended 2.0 */
  33 + { 0xc000000000000000ULL, 0x4000}, /* extended 3.0 */
  34 + { 0x8000000000000000ULL, 0x4001}, /* extended 4.0 */
  35 + { 0xa000000000000000ULL, 0x4001}, /* extended 5.0 */
  36 + { 0x8000000000000000ULL, 0x3ffe}, /* extended 0.5 */
  37 + { 0xa000000000000000ULL, 0x4002} /* extended 10.0 */
38 }; 38 };
39 39
40 const float64 float64Constant[] = { 40 const float64 float64Constant[] = {
target-arm/nwfpe/milieu.h deleted 100644 → 0
1 -  
2 -/*  
3 -===============================================================================  
4 -  
5 -This C header file is part of the SoftFloat IEC/IEEE Floating-point  
6 -Arithmetic Package, Release 2.  
7 -  
8 -Written by John R. Hauser. This work was made possible in part by the  
9 -International Computer Science Institute, located at Suite 600, 1947 Center  
10 -Street, Berkeley, California 94704. Funding was partially provided by the  
11 -National Science Foundation under grant MIP-9311980. The original version  
12 -of this code was written as part of a project to build a fixed-point vector  
13 -processor in collaboration with the University of California at Berkeley,  
14 -overseen by Profs. Nelson Morgan and John Wawrzynek. More information  
15 -is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/  
16 -arithmetic/softfloat.html'.  
17 -  
18 -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort  
19 -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT  
20 -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO  
21 -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY  
22 -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.  
23 -  
24 -Derivative works are acceptable, even for commercial purposes, so long as  
25 -(1) they include prominent notice that the work is derivative, and (2) they  
26 -include prominent notice akin to these three paragraphs for those parts of  
27 -this code that are retained.  
28 -  
29 -===============================================================================  
30 -*/  
31 -  
32 -/*  
33 --------------------------------------------------------------------------------  
34 -Include common integer types and flags.  
35 --------------------------------------------------------------------------------  
36 -*/  
37 -#include "ARM-gcc.h"  
38 -  
39 -/*  
40 --------------------------------------------------------------------------------  
41 -Symbolic Boolean literals.  
42 --------------------------------------------------------------------------------  
43 -*/  
44 -enum {  
45 - FALSE = 0,  
46 - TRUE = 1  
47 -};  
48 -  
target-arm/nwfpe/single_cpdo.c
@@ -76,30 +76,30 @@ unsigned int SingleCPDO(const unsigned int opcode) @@ -76,30 +76,30 @@ unsigned int SingleCPDO(const unsigned int opcode)
76 { 76 {
77 /* dyadic opcodes */ 77 /* dyadic opcodes */
78 case ADF_CODE: 78 case ADF_CODE:
79 - fpa11->fpreg[Fd].fSingle = float32_add(rFn,rFm); 79 + fpa11->fpreg[Fd].fSingle = float32_add(rFn,rFm, &fpa11->fp_status);
80 break; 80 break;
81 81
82 case MUF_CODE: 82 case MUF_CODE:
83 case FML_CODE: 83 case FML_CODE:
84 - fpa11->fpreg[Fd].fSingle = float32_mul(rFn,rFm); 84 + fpa11->fpreg[Fd].fSingle = float32_mul(rFn,rFm, &fpa11->fp_status);
85 break; 85 break;
86 86
87 case SUF_CODE: 87 case SUF_CODE:
88 - fpa11->fpreg[Fd].fSingle = float32_sub(rFn,rFm); 88 + fpa11->fpreg[Fd].fSingle = float32_sub(rFn,rFm, &fpa11->fp_status);
89 break; 89 break;
90 90
91 case RSF_CODE: 91 case RSF_CODE:
92 - fpa11->fpreg[Fd].fSingle = float32_sub(rFm,rFn); 92 + fpa11->fpreg[Fd].fSingle = float32_sub(rFm,rFn, &fpa11->fp_status);
93 break; 93 break;
94 94
95 case DVF_CODE: 95 case DVF_CODE:
96 case FDV_CODE: 96 case FDV_CODE:
97 - fpa11->fpreg[Fd].fSingle = float32_div(rFn,rFm); 97 + fpa11->fpreg[Fd].fSingle = float32_div(rFn,rFm, &fpa11->fp_status);
98 break; 98 break;
99 99
100 case RDF_CODE: 100 case RDF_CODE:
101 case FRD_CODE: 101 case FRD_CODE:
102 - fpa11->fpreg[Fd].fSingle = float32_div(rFm,rFn); 102 + fpa11->fpreg[Fd].fSingle = float32_div(rFm,rFn, &fpa11->fp_status);
103 break; 103 break;
104 104
105 #if 0 105 #if 0
@@ -113,7 +113,7 @@ unsigned int SingleCPDO(const unsigned int opcode) @@ -113,7 +113,7 @@ unsigned int SingleCPDO(const unsigned int opcode)
113 #endif 113 #endif
114 114
115 case RMF_CODE: 115 case RMF_CODE:
116 - fpa11->fpreg[Fd].fSingle = float32_rem(rFn,rFm); 116 + fpa11->fpreg[Fd].fSingle = float32_rem(rFn,rFm, &fpa11->fp_status);
117 break; 117 break;
118 118
119 #if 0 119 #if 0
@@ -139,11 +139,11 @@ unsigned int SingleCPDO(const unsigned int opcode) @@ -139,11 +139,11 @@ unsigned int SingleCPDO(const unsigned int opcode)
139 139
140 case RND_CODE: 140 case RND_CODE:
141 case URD_CODE: 141 case URD_CODE:
142 - fpa11->fpreg[Fd].fSingle = float32_round_to_int(rFm); 142 + fpa11->fpreg[Fd].fSingle = float32_round_to_int(rFm, &fpa11->fp_status);
143 break; 143 break;
144 144
145 case SQT_CODE: 145 case SQT_CODE:
146 - fpa11->fpreg[Fd].fSingle = float32_sqrt(rFm); 146 + fpa11->fpreg[Fd].fSingle = float32_sqrt(rFm, &fpa11->fp_status);
147 break; 147 break;
148 148
149 #if 0 149 #if 0
target-arm/nwfpe/softfloat-macros deleted 100644 → 0
1 -  
2 -/*  
3 -===============================================================================  
4 -  
5 -This C source fragment is part of the SoftFloat IEC/IEEE Floating-point  
6 -Arithmetic Package, Release 2.  
7 -  
8 -Written by John R. Hauser. This work was made possible in part by the  
9 -International Computer Science Institute, located at Suite 600, 1947 Center  
10 -Street, Berkeley, California 94704. Funding was partially provided by the  
11 -National Science Foundation under grant MIP-9311980. The original version  
12 -of this code was written as part of a project to build a fixed-point vector  
13 -processor in collaboration with the University of California at Berkeley,  
14 -overseen by Profs. Nelson Morgan and John Wawrzynek. More information  
15 -is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/  
16 -arithmetic/softfloat.html'.  
17 -  
18 -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort  
19 -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT  
20 -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO  
21 -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY  
22 -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.  
23 -  
24 -Derivative works are acceptable, even for commercial purposes, so long as  
25 -(1) they include prominent notice that the work is derivative, and (2) they  
26 -include prominent notice akin to these three paragraphs for those parts of  
27 -this code that are retained.  
28 -  
29 -===============================================================================  
30 -*/  
31 -  
32 -/*  
33 --------------------------------------------------------------------------------  
34 -Shifts `a' right by the number of bits given in `count'. If any nonzero  
35 -bits are shifted off, they are ``jammed'' into the least significant bit of  
36 -the result by setting the least significant bit to 1. The value of `count'  
37 -can be arbitrarily large; in particular, if `count' is greater than 32, the  
38 -result will be either 0 or 1, depending on whether `a' is zero or nonzero.  
39 -The result is stored in the location pointed to by `zPtr'.  
40 --------------------------------------------------------------------------------  
41 -*/  
42 -INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )  
43 -{  
44 - bits32 z;  
45 - if ( count == 0 ) {  
46 - z = a;  
47 - }  
48 - else if ( count < 32 ) {  
49 - z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );  
50 - }  
51 - else {  
52 - z = ( a != 0 );  
53 - }  
54 - *zPtr = z;  
55 -}  
56 -  
57 -/*  
58 --------------------------------------------------------------------------------  
59 -Shifts `a' right by the number of bits given in `count'. If any nonzero  
60 -bits are shifted off, they are ``jammed'' into the least significant bit of  
61 -the result by setting the least significant bit to 1. The value of `count'  
62 -can be arbitrarily large; in particular, if `count' is greater than 64, the  
63 -result will be either 0 or 1, depending on whether `a' is zero or nonzero.  
64 -The result is stored in the location pointed to by `zPtr'.  
65 --------------------------------------------------------------------------------  
66 -*/  
67 -INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )  
68 -{  
69 - bits64 z;  
70 -  
71 -// __asm__("@shift64RightJamming -- start");  
72 - if ( count == 0 ) {  
73 - z = a;  
74 - }  
75 - else if ( count < 64 ) {  
76 - z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );  
77 - }  
78 - else {  
79 - z = ( a != 0 );  
80 - }  
81 -// __asm__("@shift64RightJamming -- end");  
82 - *zPtr = z;  
83 -}  
84 -  
85 -/*  
86 --------------------------------------------------------------------------------  
87 -Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64  
88 -_plus_ the number of bits given in `count'. The shifted result is at most  
89 -64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The  
90 -bits shifted off form a second 64-bit result as follows: The _last_ bit  
91 -shifted off is the most-significant bit of the extra result, and the other  
92 -63 bits of the extra result are all zero if and only if _all_but_the_last_  
93 -bits shifted off were all zero. This extra result is stored in the location  
94 -pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.  
95 - (This routine makes more sense if `a0' and `a1' are considered to form a  
96 -fixed-point value with binary point between `a0' and `a1'. This fixed-point  
97 -value is shifted right by the number of bits given in `count', and the  
98 -integer part of the result is returned at the location pointed to by  
99 -`z0Ptr'. The fractional part of the result may be slightly corrupted as  
100 -described above, and is returned at the location pointed to by `z1Ptr'.)  
101 --------------------------------------------------------------------------------  
102 -*/  
103 -INLINE void  
104 - shift64ExtraRightJamming(  
105 - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )  
106 -{  
107 - bits64 z0, z1;  
108 - int8 negCount = ( - count ) & 63;  
109 -  
110 - if ( count == 0 ) {  
111 - z1 = a1;  
112 - z0 = a0;  
113 - }  
114 - else if ( count < 64 ) {  
115 - z1 = ( a0<<negCount ) | ( a1 != 0 );  
116 - z0 = a0>>count;  
117 - }  
118 - else {  
119 - if ( count == 64 ) {  
120 - z1 = a0 | ( a1 != 0 );  
121 - }  
122 - else {  
123 - z1 = ( ( a0 | a1 ) != 0 );  
124 - }  
125 - z0 = 0;  
126 - }  
127 - *z1Ptr = z1;  
128 - *z0Ptr = z0;  
129 -  
130 -}  
131 -  
132 -/*  
133 --------------------------------------------------------------------------------  
134 -Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the  
135 -number of bits given in `count'. Any bits shifted off are lost. The value  
136 -of `count' can be arbitrarily large; in particular, if `count' is greater  
137 -than 128, the result will be 0. The result is broken into two 64-bit pieces  
138 -which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  
139 --------------------------------------------------------------------------------  
140 -*/  
141 -INLINE void  
142 - shift128Right(  
143 - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )  
144 -{  
145 - bits64 z0, z1;  
146 - int8 negCount = ( - count ) & 63;  
147 -  
148 - if ( count == 0 ) {  
149 - z1 = a1;  
150 - z0 = a0;  
151 - }  
152 - else if ( count < 64 ) {  
153 - z1 = ( a0<<negCount ) | ( a1>>count );  
154 - z0 = a0>>count;  
155 - }  
156 - else {  
157 - z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;  
158 - z0 = 0;  
159 - }  
160 - *z1Ptr = z1;  
161 - *z0Ptr = z0;  
162 -  
163 -}  
164 -  
165 -/*  
166 --------------------------------------------------------------------------------  
167 -Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the  
168 -number of bits given in `count'. If any nonzero bits are shifted off, they  
169 -are ``jammed'' into the least significant bit of the result by setting the  
170 -least significant bit to 1. The value of `count' can be arbitrarily large;  
171 -in particular, if `count' is greater than 128, the result will be either 0  
172 -or 1, depending on whether the concatenation of `a0' and `a1' is zero or  
173 -nonzero. The result is broken into two 64-bit pieces which are stored at  
174 -the locations pointed to by `z0Ptr' and `z1Ptr'.  
175 --------------------------------------------------------------------------------  
176 -*/  
177 -INLINE void  
178 - shift128RightJamming(  
179 - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )  
180 -{  
181 - bits64 z0, z1;  
182 - int8 negCount = ( - count ) & 63;  
183 -  
184 - if ( count == 0 ) {  
185 - z1 = a1;  
186 - z0 = a0;  
187 - }  
188 - else if ( count < 64 ) {  
189 - z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );  
190 - z0 = a0>>count;  
191 - }  
192 - else {  
193 - if ( count == 64 ) {  
194 - z1 = a0 | ( a1 != 0 );  
195 - }  
196 - else if ( count < 128 ) {  
197 - z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );  
198 - }  
199 - else {  
200 - z1 = ( ( a0 | a1 ) != 0 );  
201 - }  
202 - z0 = 0;  
203 - }  
204 - *z1Ptr = z1;  
205 - *z0Ptr = z0;  
206 -  
207 -}  
208 -  
209 -/*  
210 --------------------------------------------------------------------------------  
211 -Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right  
212 -by 64 _plus_ the number of bits given in `count'. The shifted result is  
213 -at most 128 nonzero bits; these are broken into two 64-bit pieces which are  
214 -stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted  
215 -off form a third 64-bit result as follows: The _last_ bit shifted off is  
216 -the most-significant bit of the extra result, and the other 63 bits of the  
217 -extra result are all zero if and only if _all_but_the_last_ bits shifted off  
218 -were all zero. This extra result is stored in the location pointed to by  
219 -`z2Ptr'. The value of `count' can be arbitrarily large.  
220 - (This routine makes more sense if `a0', `a1', and `a2' are considered  
221 -to form a fixed-point value with binary point between `a1' and `a2'. This  
222 -fixed-point value is shifted right by the number of bits given in `count',  
223 -and the integer part of the result is returned at the locations pointed to  
224 -by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly  
225 -corrupted as described above, and is returned at the location pointed to by  
226 -`z2Ptr'.)  
227 --------------------------------------------------------------------------------  
228 -*/  
229 -INLINE void  
230 - shift128ExtraRightJamming(  
231 - bits64 a0,  
232 - bits64 a1,  
233 - bits64 a2,  
234 - int16 count,  
235 - bits64 *z0Ptr,  
236 - bits64 *z1Ptr,  
237 - bits64 *z2Ptr  
238 - )  
239 -{  
240 - bits64 z0, z1, z2;  
241 - int8 negCount = ( - count ) & 63;  
242 -  
243 - if ( count == 0 ) {  
244 - z2 = a2;  
245 - z1 = a1;  
246 - z0 = a0;  
247 - }  
248 - else {  
249 - if ( count < 64 ) {  
250 - z2 = a1<<negCount;  
251 - z1 = ( a0<<negCount ) | ( a1>>count );  
252 - z0 = a0>>count;  
253 - }  
254 - else {  
255 - if ( count == 64 ) {  
256 - z2 = a1;  
257 - z1 = a0;  
258 - }  
259 - else {  
260 - a2 |= a1;  
261 - if ( count < 128 ) {  
262 - z2 = a0<<negCount;  
263 - z1 = a0>>( count & 63 );  
264 - }  
265 - else {  
266 - z2 = ( count == 128 ) ? a0 : ( a0 != 0 );  
267 - z1 = 0;  
268 - }  
269 - }  
270 - z0 = 0;  
271 - }  
272 - z2 |= ( a2 != 0 );  
273 - }  
274 - *z2Ptr = z2;  
275 - *z1Ptr = z1;  
276 - *z0Ptr = z0;  
277 -  
278 -}  
279 -  
280 -/*  
281 --------------------------------------------------------------------------------  
282 -Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the  
283 -number of bits given in `count'. Any bits shifted off are lost. The value  
284 -of `count' must be less than 64. The result is broken into two 64-bit  
285 -pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  
286 --------------------------------------------------------------------------------  
287 -*/  
288 -INLINE void  
289 - shortShift128Left(  
290 - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )  
291 -{  
292 -  
293 - *z1Ptr = a1<<count;  
294 - *z0Ptr =  
295 - ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );  
296 -  
297 -}  
298 -  
299 -/*  
300 --------------------------------------------------------------------------------  
301 -Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left  
302 -by the number of bits given in `count'. Any bits shifted off are lost.  
303 -The value of `count' must be less than 64. The result is broken into three  
304 -64-bit pieces which are stored at the locations pointed to by `z0Ptr',  
305 -`z1Ptr', and `z2Ptr'.  
306 --------------------------------------------------------------------------------  
307 -*/  
308 -INLINE void  
309 - shortShift192Left(  
310 - bits64 a0,  
311 - bits64 a1,  
312 - bits64 a2,  
313 - int16 count,  
314 - bits64 *z0Ptr,  
315 - bits64 *z1Ptr,  
316 - bits64 *z2Ptr  
317 - )  
318 -{  
319 - bits64 z0, z1, z2;  
320 - int8 negCount;  
321 -  
322 - z2 = a2<<count;  
323 - z1 = a1<<count;  
324 - z0 = a0<<count;  
325 - if ( 0 < count ) {  
326 - negCount = ( ( - count ) & 63 );  
327 - z1 |= a2>>negCount;  
328 - z0 |= a1>>negCount;  
329 - }  
330 - *z2Ptr = z2;  
331 - *z1Ptr = z1;  
332 - *z0Ptr = z0;  
333 -  
334 -}  
335 -  
336 -/*  
337 --------------------------------------------------------------------------------  
338 -Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit  
339 -value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so  
340 -any carry out is lost. The result is broken into two 64-bit pieces which  
341 -are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  
342 --------------------------------------------------------------------------------  
343 -*/  
344 -INLINE void  
345 - add128(  
346 - bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )  
347 -{  
348 - bits64 z1;  
349 -  
350 - z1 = a1 + b1;  
351 - *z1Ptr = z1;  
352 - *z0Ptr = a0 + b0 + ( z1 < a1 );  
353 -  
354 -}  
355 -  
356 -/*  
357 --------------------------------------------------------------------------------  
358 -Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the  
359 -192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is  
360 -modulo 2^192, so any carry out is lost. The result is broken into three  
361 -64-bit pieces which are stored at the locations pointed to by `z0Ptr',  
362 -`z1Ptr', and `z2Ptr'.  
363 --------------------------------------------------------------------------------  
364 -*/  
365 -INLINE void  
366 - add192(  
367 - bits64 a0,  
368 - bits64 a1,  
369 - bits64 a2,  
370 - bits64 b0,  
371 - bits64 b1,  
372 - bits64 b2,  
373 - bits64 *z0Ptr,  
374 - bits64 *z1Ptr,  
375 - bits64 *z2Ptr  
376 - )  
377 -{  
378 - bits64 z0, z1, z2;  
379 - int8 carry0, carry1;  
380 -  
381 - z2 = a2 + b2;  
382 - carry1 = ( z2 < a2 );  
383 - z1 = a1 + b1;  
384 - carry0 = ( z1 < a1 );  
385 - z0 = a0 + b0;  
386 - z1 += carry1;  
387 - z0 += ( z1 < carry1 );  
388 - z0 += carry0;  
389 - *z2Ptr = z2;  
390 - *z1Ptr = z1;  
391 - *z0Ptr = z0;  
392 -  
393 -}  
394 -  
395 -/*  
396 --------------------------------------------------------------------------------  
397 -Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the  
398 -128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo  
399 -2^128, so any borrow out (carry out) is lost. The result is broken into two  
400 -64-bit pieces which are stored at the locations pointed to by `z0Ptr' and  
401 -`z1Ptr'.  
402 --------------------------------------------------------------------------------  
403 -*/  
404 -INLINE void  
405 - sub128(  
406 - bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )  
407 -{  
408 -  
409 - *z1Ptr = a1 - b1;  
410 - *z0Ptr = a0 - b0 - ( a1 < b1 );  
411 -  
412 -}  
413 -  
414 -/*  
415 --------------------------------------------------------------------------------  
416 -Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'  
417 -from the 192-bit value formed by concatenating `a0', `a1', and `a2'.  
418 -Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The  
419 -result is broken into three 64-bit pieces which are stored at the locations  
420 -pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.  
421 --------------------------------------------------------------------------------  
422 -*/  
423 -INLINE void  
424 - sub192(  
425 - bits64 a0,  
426 - bits64 a1,  
427 - bits64 a2,  
428 - bits64 b0,  
429 - bits64 b1,  
430 - bits64 b2,  
431 - bits64 *z0Ptr,  
432 - bits64 *z1Ptr,  
433 - bits64 *z2Ptr  
434 - )  
435 -{  
436 - bits64 z0, z1, z2;  
437 - int8 borrow0, borrow1;  
438 -  
439 - z2 = a2 - b2;  
440 - borrow1 = ( a2 < b2 );  
441 - z1 = a1 - b1;  
442 - borrow0 = ( a1 < b1 );  
443 - z0 = a0 - b0;  
444 - z0 -= ( z1 < borrow1 );  
445 - z1 -= borrow1;  
446 - z0 -= borrow0;  
447 - *z2Ptr = z2;  
448 - *z1Ptr = z1;  
449 - *z0Ptr = z0;  
450 -  
451 -}  
452 -  
453 -/*  
454 --------------------------------------------------------------------------------  
455 -Multiplies `a' by `b' to obtain a 128-bit product. The product is broken  
456 -into two 64-bit pieces which are stored at the locations pointed to by  
457 -`z0Ptr' and `z1Ptr'.  
458 --------------------------------------------------------------------------------  
459 -*/  
460 -INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )  
461 -{  
462 - bits32 aHigh, aLow, bHigh, bLow;  
463 - bits64 z0, zMiddleA, zMiddleB, z1;  
464 -  
465 - aLow = a;  
466 - aHigh = a>>32;  
467 - bLow = b;  
468 - bHigh = b>>32;  
469 - z1 = ( (bits64) aLow ) * bLow;  
470 - zMiddleA = ( (bits64) aLow ) * bHigh;  
471 - zMiddleB = ( (bits64) aHigh ) * bLow;  
472 - z0 = ( (bits64) aHigh ) * bHigh;  
473 - zMiddleA += zMiddleB;  
474 - z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );  
475 - zMiddleA <<= 32;  
476 - z1 += zMiddleA;  
477 - z0 += ( z1 < zMiddleA );  
478 - *z1Ptr = z1;  
479 - *z0Ptr = z0;  
480 -  
481 -}  
482 -  
483 -/*  
484 --------------------------------------------------------------------------------  
485 -Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to  
486 -obtain a 192-bit product. The product is broken into three 64-bit pieces  
487 -which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and  
488 -`z2Ptr'.  
489 --------------------------------------------------------------------------------  
490 -*/  
491 -INLINE void  
492 - mul128By64To192(  
493 - bits64 a0,  
494 - bits64 a1,  
495 - bits64 b,  
496 - bits64 *z0Ptr,  
497 - bits64 *z1Ptr,  
498 - bits64 *z2Ptr  
499 - )  
500 -{  
501 - bits64 z0, z1, z2, more1;  
502 -  
503 - mul64To128( a1, b, &z1, &z2 );  
504 - mul64To128( a0, b, &z0, &more1 );  
505 - add128( z0, more1, 0, z1, &z0, &z1 );  
506 - *z2Ptr = z2;  
507 - *z1Ptr = z1;  
508 - *z0Ptr = z0;  
509 -  
510 -}  
511 -  
512 -/*  
513 --------------------------------------------------------------------------------  
514 -Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the  
515 -128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit  
516 -product. The product is broken into four 64-bit pieces which are stored at  
517 -the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.  
518 --------------------------------------------------------------------------------  
519 -*/  
520 -INLINE void  
521 - mul128To256(  
522 - bits64 a0,  
523 - bits64 a1,  
524 - bits64 b0,  
525 - bits64 b1,  
526 - bits64 *z0Ptr,  
527 - bits64 *z1Ptr,  
528 - bits64 *z2Ptr,  
529 - bits64 *z3Ptr  
530 - )  
531 -{  
532 - bits64 z0, z1, z2, z3;  
533 - bits64 more1, more2;  
534 -  
535 - mul64To128( a1, b1, &z2, &z3 );  
536 - mul64To128( a1, b0, &z1, &more2 );  
537 - add128( z1, more2, 0, z2, &z1, &z2 );  
538 - mul64To128( a0, b0, &z0, &more1 );  
539 - add128( z0, more1, 0, z1, &z0, &z1 );  
540 - mul64To128( a0, b1, &more1, &more2 );  
541 - add128( more1, more2, 0, z2, &more1, &z2 );  
542 - add128( z0, z1, 0, more1, &z0, &z1 );  
543 - *z3Ptr = z3;  
544 - *z2Ptr = z2;  
545 - *z1Ptr = z1;  
546 - *z0Ptr = z0;  
547 -  
548 -}  
549 -  
550 -/*  
551 --------------------------------------------------------------------------------  
552 -Returns an approximation to the 64-bit integer quotient obtained by dividing  
553 -`b' into the 128-bit value formed by concatenating `a0' and `a1'. The  
554 -divisor `b' must be at least 2^63. If q is the exact quotient truncated  
555 -toward zero, the approximation returned lies between q and q + 2 inclusive.  
556 -If the exact quotient q is larger than 64 bits, the maximum positive 64-bit  
557 -unsigned integer is returned.  
558 --------------------------------------------------------------------------------  
559 -*/  
560 -static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )  
561 -{  
562 - bits64 b0, b1;  
563 - bits64 rem0, rem1, term0, term1;  
564 - bits64 z;  
565 - if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );  
566 - b0 = b>>32;  
567 - z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;  
568 - mul64To128( b, z, &term0, &term1 );  
569 - sub128( a0, a1, term0, term1, &rem0, &rem1 );  
570 - while ( ( (sbits64) rem0 ) < 0 ) {  
571 - z -= LIT64( 0x100000000 );  
572 - b1 = b<<32;  
573 - add128( rem0, rem1, b0, b1, &rem0, &rem1 );  
574 - }  
575 - rem0 = ( rem0<<32 ) | ( rem1>>32 );  
576 - z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;  
577 - return z;  
578 -  
579 -}  
580 -  
581 -/*  
582 --------------------------------------------------------------------------------  
583 -Returns an approximation to the square root of the 32-bit significand given  
584 -by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of  
585 -`aExp' (the least significant bit) is 1, the integer returned approximates  
586 -2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'  
587 -is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either  
588 -case, the approximation returned lies strictly within +/-2 of the exact  
589 -value.  
590 --------------------------------------------------------------------------------  
591 -*/  
592 -static bits32 estimateSqrt32( int16 aExp, bits32 a )  
593 -{  
594 - static const bits16 sqrtOddAdjustments[] = {  
595 - 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,  
596 - 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67  
597 - };  
598 - static const bits16 sqrtEvenAdjustments[] = {  
599 - 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,  
600 - 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002  
601 - };  
602 - int8 index;  
603 - bits32 z;  
604 -  
605 - index = ( a>>27 ) & 15;  
606 - if ( aExp & 1 ) {  
607 - z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];  
608 - z = ( ( a / z )<<14 ) + ( z<<15 );  
609 - a >>= 1;  
610 - }  
611 - else {  
612 - z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];  
613 - z = a / z + z;  
614 - z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );  
615 - if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );  
616 - }  
617 - return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );  
618 -  
619 -}  
620 -  
621 -/*  
622 --------------------------------------------------------------------------------  
623 -Returns the number of leading 0 bits before the most-significant 1 bit  
624 -of `a'. If `a' is zero, 32 is returned.  
625 --------------------------------------------------------------------------------  
626 -*/  
627 -static int8 countLeadingZeros32( bits32 a )  
628 -{  
629 - static const int8 countLeadingZerosHigh[] = {  
630 - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,  
631 - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  
632 - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  
633 - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  
634 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  
635 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  
636 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  
637 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  
638 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  
639 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  
640 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  
641 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  
642 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  
643 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  
644 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  
645 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  
646 - };  
647 - int8 shiftCount;  
648 -  
649 - shiftCount = 0;  
650 - if ( a < 0x10000 ) {  
651 - shiftCount += 16;  
652 - a <<= 16;  
653 - }  
654 - if ( a < 0x1000000 ) {  
655 - shiftCount += 8;  
656 - a <<= 8;  
657 - }  
658 - shiftCount += countLeadingZerosHigh[ a>>24 ];  
659 - return shiftCount;  
660 -  
661 -}  
662 -  
663 -/*  
664 --------------------------------------------------------------------------------  
665 -Returns the number of leading 0 bits before the most-significant 1 bit  
666 -of `a'. If `a' is zero, 64 is returned.  
667 --------------------------------------------------------------------------------  
668 -*/  
669 -static int8 countLeadingZeros64( bits64 a )  
670 -{  
671 - int8 shiftCount;  
672 -  
673 - shiftCount = 0;  
674 - if ( a < ( (bits64) 1 )<<32 ) {  
675 - shiftCount += 32;  
676 - }  
677 - else {  
678 - a >>= 32;  
679 - }  
680 - shiftCount += countLeadingZeros32( a );  
681 - return shiftCount;  
682 -  
683 -}  
684 -  
685 -/*  
686 --------------------------------------------------------------------------------  
687 -Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'  
688 -is equal to the 128-bit value formed by concatenating `b0' and `b1'.  
689 -Otherwise, returns 0.  
690 --------------------------------------------------------------------------------  
691 -*/  
692 -INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )  
693 -{  
694 -  
695 - return ( a0 == b0 ) && ( a1 == b1 );  
696 -  
697 -}  
698 -  
699 -/*  
700 --------------------------------------------------------------------------------  
701 -Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less  
702 -than or equal to the 128-bit value formed by concatenating `b0' and `b1'.  
703 -Otherwise, returns 0.  
704 --------------------------------------------------------------------------------  
705 -*/  
706 -INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )  
707 -{  
708 -  
709 - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );  
710 -  
711 -}  
712 -  
713 -/*  
714 --------------------------------------------------------------------------------  
715 -Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less  
716 -than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,  
717 -returns 0.  
718 --------------------------------------------------------------------------------  
719 -*/  
720 -INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )  
721 -{  
722 -  
723 - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );  
724 -  
725 -}  
726 -  
727 -/*  
728 --------------------------------------------------------------------------------  
729 -Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is  
730 -not equal to the 128-bit value formed by concatenating `b0' and `b1'.  
731 -Otherwise, returns 0.  
732 --------------------------------------------------------------------------------  
733 -*/  
734 -INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )  
735 -{  
736 -  
737 - return ( a0 != b0 ) || ( a1 != b1 );  
738 -  
739 -}  
740 -  
target-arm/nwfpe/softfloat-specialize deleted 100644 → 0
1 -  
2 -/*  
3 -===============================================================================  
4 -  
5 -This C source fragment is part of the SoftFloat IEC/IEEE Floating-point  
6 -Arithmetic Package, Release 2.  
7 -  
8 -Written by John R. Hauser. This work was made possible in part by the  
9 -International Computer Science Institute, located at Suite 600, 1947 Center  
10 -Street, Berkeley, California 94704. Funding was partially provided by the  
11 -National Science Foundation under grant MIP-9311980. The original version  
12 -of this code was written as part of a project to build a fixed-point vector  
13 -processor in collaboration with the University of California at Berkeley,  
14 -overseen by Profs. Nelson Morgan and John Wawrzynek. More information  
15 -is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/  
16 -arithmetic/softfloat.html'.  
17 -  
18 -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort  
19 -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT  
20 -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO  
21 -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY  
22 -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.  
23 -  
24 -Derivative works are acceptable, even for commercial purposes, so long as  
25 -(1) they include prominent notice that the work is derivative, and (2) they  
26 -include prominent notice akin to these three paragraphs for those parts of  
27 -this code that are retained.  
28 -  
29 -===============================================================================  
30 -*/  
31 -  
32 -/*  
33 --------------------------------------------------------------------------------  
34 -Underflow tininess-detection mode, statically initialized to default value.  
35 -(The declaration in `softfloat.h' must match the `int8' type here.)  
36 --------------------------------------------------------------------------------  
37 -*/  
38 -int8 float_detect_tininess = float_tininess_after_rounding;  
39 -  
40 -/*  
41 --------------------------------------------------------------------------------  
42 -Raises the exceptions specified by `flags'. Floating-point traps can be  
43 -defined here if desired. It is currently not possible for such a trap to  
44 -substitute a result value. If traps are not implemented, this routine  
45 -should be simply `float_exception_flags |= flags;'.  
46 -  
47 -ScottB: November 4, 1998  
48 -Moved this function out of softfloat-specialize into fpmodule.c.  
49 -This effectively isolates all the changes required for integrating with the  
50 -Linux kernel into fpmodule.c. Porting to NetBSD should only require modifying  
51 -fpmodule.c to integrate with the NetBSD kernel (I hope!).  
52 --------------------------------------------------------------------------------  
53 -*/  
54 -void float_raise( int8 flags )  
55 -{  
56 - float_exception_flags |= flags;  
57 -}  
58 -  
59 -/*  
60 --------------------------------------------------------------------------------  
61 -Internal canonical NaN format.  
62 --------------------------------------------------------------------------------  
63 -*/  
64 -typedef struct {  
65 - flag sign;  
66 - bits64 high, low;  
67 -} commonNaNT;  
68 -  
69 -/*  
70 --------------------------------------------------------------------------------  
71 -The pattern for a default generated single-precision NaN.  
72 --------------------------------------------------------------------------------  
73 -*/  
74 -#define float32_default_nan 0xFFFFFFFF  
75 -  
76 -/*  
77 --------------------------------------------------------------------------------  
78 -Returns 1 if the single-precision floating-point value `a' is a NaN;  
79 -otherwise returns 0.  
80 --------------------------------------------------------------------------------  
81 -*/  
82 -flag float32_is_nan( float32 a )  
83 -{  
84 -  
85 - return ( 0xFF000000 < (bits32) ( a<<1 ) );  
86 -  
87 -}  
88 -  
89 -/*  
90 --------------------------------------------------------------------------------  
91 -Returns 1 if the single-precision floating-point value `a' is a signaling  
92 -NaN; otherwise returns 0.  
93 --------------------------------------------------------------------------------  
94 -*/  
95 -flag float32_is_signaling_nan( float32 a )  
96 -{  
97 -  
98 - return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );  
99 -  
100 -}  
101 -  
102 -/*  
103 --------------------------------------------------------------------------------  
104 -Returns the result of converting the single-precision floating-point NaN  
105 -`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid  
106 -exception is raised.  
107 --------------------------------------------------------------------------------  
108 -*/  
109 -static commonNaNT float32ToCommonNaN( float32 a )  
110 -{  
111 - commonNaNT z;  
112 -  
113 - if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );  
114 - z.sign = a>>31;  
115 - z.low = 0;  
116 - z.high = ( (bits64) a )<<41;  
117 - return z;  
118 -  
119 -}  
120 -  
121 -/*  
122 --------------------------------------------------------------------------------  
123 -Returns the result of converting the canonical NaN `a' to the single-  
124 -precision floating-point format.  
125 --------------------------------------------------------------------------------  
126 -*/  
127 -static float32 commonNaNToFloat32( commonNaNT a )  
128 -{  
129 -  
130 - return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );  
131 -  
132 -}  
133 -  
134 -/*  
135 --------------------------------------------------------------------------------  
136 -Takes two single-precision floating-point values `a' and `b', one of which  
137 -is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a  
138 -signaling NaN, the invalid exception is raised.  
139 --------------------------------------------------------------------------------  
140 -*/  
141 -static float32 propagateFloat32NaN( float32 a, float32 b )  
142 -{  
143 - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;  
144 -  
145 - aIsNaN = float32_is_nan( a );  
146 - aIsSignalingNaN = float32_is_signaling_nan( a );  
147 - bIsNaN = float32_is_nan( b );  
148 - bIsSignalingNaN = float32_is_signaling_nan( b );  
149 - a |= 0x00400000;  
150 - b |= 0x00400000;  
151 - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );  
152 - if ( aIsNaN ) {  
153 - return ( aIsSignalingNaN & bIsNaN ) ? b : a;  
154 - }  
155 - else {  
156 - return b;  
157 - }  
158 -  
159 -}  
160 -  
161 -/*  
162 --------------------------------------------------------------------------------  
163 -The pattern for a default generated double-precision NaN.  
164 --------------------------------------------------------------------------------  
165 -*/  
166 -#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )  
167 -  
168 -/*  
169 --------------------------------------------------------------------------------  
170 -Returns 1 if the double-precision floating-point value `a' is a NaN;  
171 -otherwise returns 0.  
172 --------------------------------------------------------------------------------  
173 -*/  
174 -flag float64_is_nan( float64 a )  
175 -{  
176 -  
177 - return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );  
178 -  
179 -}  
180 -  
181 -/*  
182 --------------------------------------------------------------------------------  
183 -Returns 1 if the double-precision floating-point value `a' is a signaling  
184 -NaN; otherwise returns 0.  
185 --------------------------------------------------------------------------------  
186 -*/  
187 -flag float64_is_signaling_nan( float64 a )  
188 -{  
189 -  
190 - return  
191 - ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )  
192 - && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );  
193 -  
194 -}  
195 -  
196 -/*  
197 --------------------------------------------------------------------------------  
198 -Returns the result of converting the double-precision floating-point NaN  
199 -`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid  
200 -exception is raised.  
201 --------------------------------------------------------------------------------  
202 -*/  
203 -static commonNaNT float64ToCommonNaN( float64 a )  
204 -{  
205 - commonNaNT z;  
206 -  
207 - if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );  
208 - z.sign = a>>63;  
209 - z.low = 0;  
210 - z.high = a<<12;  
211 - return z;  
212 -  
213 -}  
214 -  
215 -/*  
216 --------------------------------------------------------------------------------  
217 -Returns the result of converting the canonical NaN `a' to the double-  
218 -precision floating-point format.  
219 --------------------------------------------------------------------------------  
220 -*/  
221 -static float64 commonNaNToFloat64( commonNaNT a )  
222 -{  
223 -  
224 - return  
225 - ( ( (bits64) a.sign )<<63 )  
226 - | LIT64( 0x7FF8000000000000 )  
227 - | ( a.high>>12 );  
228 -  
229 -}  
230 -  
231 -/*  
232 --------------------------------------------------------------------------------  
233 -Takes two double-precision floating-point values `a' and `b', one of which  
234 -is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a  
235 -signaling NaN, the invalid exception is raised.  
236 --------------------------------------------------------------------------------  
237 -*/  
238 -static float64 propagateFloat64NaN( float64 a, float64 b )  
239 -{  
240 - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;  
241 -  
242 - aIsNaN = float64_is_nan( a );  
243 - aIsSignalingNaN = float64_is_signaling_nan( a );  
244 - bIsNaN = float64_is_nan( b );  
245 - bIsSignalingNaN = float64_is_signaling_nan( b );  
246 - a |= LIT64( 0x0008000000000000 );  
247 - b |= LIT64( 0x0008000000000000 );  
248 - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );  
249 - if ( aIsNaN ) {  
250 - return ( aIsSignalingNaN & bIsNaN ) ? b : a;  
251 - }  
252 - else {  
253 - return b;  
254 - }  
255 -  
256 -}  
257 -  
258 -#ifdef FLOATX80  
259 -  
260 -/*  
261 --------------------------------------------------------------------------------  
262 -The pattern for a default generated extended double-precision NaN. The  
263 -`high' and `low' values hold the most- and least-significant bits,  
264 -respectively.  
265 --------------------------------------------------------------------------------  
266 -*/  
267 -#define floatx80_default_nan_high 0xFFFF  
268 -#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF )  
269 -  
270 -/*  
271 --------------------------------------------------------------------------------  
272 -Returns 1 if the extended double-precision floating-point value `a' is a  
273 -NaN; otherwise returns 0.  
274 --------------------------------------------------------------------------------  
275 -*/  
276 -flag floatx80_is_nan( floatx80 a )  
277 -{  
278 -  
279 - return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );  
280 -  
281 -}  
282 -  
283 -/*  
284 --------------------------------------------------------------------------------  
285 -Returns 1 if the extended double-precision floating-point value `a' is a  
286 -signaling NaN; otherwise returns 0.  
287 --------------------------------------------------------------------------------  
288 -*/  
289 -flag floatx80_is_signaling_nan( floatx80 a )  
290 -{  
291 - //register int lr;  
292 - bits64 aLow;  
293 -  
294 - //__asm__("mov %0, lr" : : "g" (lr));  
295 - //fp_printk("floatx80_is_signalling_nan() called from 0x%08x\n",lr);  
296 - aLow = a.low & ~ LIT64( 0x4000000000000000 );  
297 - return  
298 - ( ( a.high & 0x7FFF ) == 0x7FFF )  
299 - && (bits64) ( aLow<<1 )  
300 - && ( a.low == aLow );  
301 -  
302 -}  
303 -  
304 -/*  
305 --------------------------------------------------------------------------------  
306 -Returns the result of converting the extended double-precision floating-  
307 -point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the  
308 -invalid exception is raised.  
309 --------------------------------------------------------------------------------  
310 -*/  
311 -static commonNaNT floatx80ToCommonNaN( floatx80 a )  
312 -{  
313 - commonNaNT z;  
314 -  
315 - if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );  
316 - z.sign = a.high>>15;  
317 - z.low = 0;  
318 - z.high = a.low<<1;  
319 - return z;  
320 -  
321 -}  
322 -  
323 -/*  
324 --------------------------------------------------------------------------------  
325 -Returns the result of converting the canonical NaN `a' to the extended  
326 -double-precision floating-point format.  
327 --------------------------------------------------------------------------------  
328 -*/  
329 -static floatx80 commonNaNToFloatx80( commonNaNT a )  
330 -{  
331 - floatx80 z;  
332 -  
333 - z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );  
334 - z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;  
335 - return z;  
336 -  
337 -}  
338 -  
339 -/*  
340 --------------------------------------------------------------------------------  
341 -Takes two extended double-precision floating-point values `a' and `b', one  
342 -of which is a NaN, and returns the appropriate NaN result. If either `a' or  
343 -`b' is a signaling NaN, the invalid exception is raised.  
344 --------------------------------------------------------------------------------  
345 -*/  
346 -static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )  
347 -{  
348 - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;  
349 -  
350 - aIsNaN = floatx80_is_nan( a );  
351 - aIsSignalingNaN = floatx80_is_signaling_nan( a );  
352 - bIsNaN = floatx80_is_nan( b );  
353 - bIsSignalingNaN = floatx80_is_signaling_nan( b );  
354 - a.low |= LIT64( 0xC000000000000000 );  
355 - b.low |= LIT64( 0xC000000000000000 );  
356 - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );  
357 - if ( aIsNaN ) {  
358 - return ( aIsSignalingNaN & bIsNaN ) ? b : a;  
359 - }  
360 - else {  
361 - return b;  
362 - }  
363 -  
364 -}  
365 -  
366 -#endif  
target-arm/nwfpe/softfloat.c deleted 100644 → 0
1 -/*  
2 -===============================================================================  
3 -  
4 -This C source file is part of the SoftFloat IEC/IEEE Floating-point  
5 -Arithmetic Package, Release 2.  
6 -  
7 -Written by John R. Hauser. This work was made possible in part by the  
8 -International Computer Science Institute, located at Suite 600, 1947 Center  
9 -Street, Berkeley, California 94704. Funding was partially provided by the  
10 -National Science Foundation under grant MIP-9311980. The original version  
11 -of this code was written as part of a project to build a fixed-point vector  
12 -processor in collaboration with the University of California at Berkeley,  
13 -overseen by Profs. Nelson Morgan and John Wawrzynek. More information  
14 -is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/  
15 -arithmetic/softfloat.html'.  
16 -  
17 -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort  
18 -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT  
19 -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO  
20 -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY  
21 -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.  
22 -  
23 -Derivative works are acceptable, even for commercial purposes, so long as  
24 -(1) they include prominent notice that the work is derivative, and (2) they  
25 -include prominent notice akin to these three paragraphs for those parts of  
26 -this code that are retained.  
27 -  
28 -===============================================================================  
29 -*/  
30 -  
31 -#include "fpa11.h"  
32 -#include "milieu.h"  
33 -#include "softfloat.h"  
34 -  
35 -/*  
36 --------------------------------------------------------------------------------  
37 -Floating-point rounding mode, extended double-precision rounding precision,  
38 -and exception flags.  
39 --------------------------------------------------------------------------------  
40 -*/  
41 -int8 float_rounding_mode = float_round_nearest_even;  
42 -int8 floatx80_rounding_precision = 80;  
43 -int8 float_exception_flags;  
44 -  
45 -/*  
46 --------------------------------------------------------------------------------  
47 -Primitive arithmetic functions, including multi-word arithmetic, and  
48 -division and square root approximations. (Can be specialized to target if  
49 -desired.)  
50 --------------------------------------------------------------------------------  
51 -*/  
52 -#include "softfloat-macros"  
53 -  
54 -/*  
55 --------------------------------------------------------------------------------  
56 -Functions and definitions to determine: (1) whether tininess for underflow  
57 -is detected before or after rounding by default, (2) what (if anything)  
58 -happens when exceptions are raised, (3) how signaling NaNs are distinguished  
59 -from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs  
60 -are propagated from function inputs to output. These details are target-  
61 -specific.  
62 --------------------------------------------------------------------------------  
63 -*/  
64 -#include "softfloat-specialize"  
65 -  
66 -/*  
67 --------------------------------------------------------------------------------  
68 -Takes a 64-bit fixed-point value `absZ' with binary point between bits 6  
69 -and 7, and returns the properly rounded 32-bit integer corresponding to the  
70 -input. If `zSign' is nonzero, the input is negated before being converted  
71 -to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point  
72 -input is simply rounded to an integer, with the inexact exception raised if  
73 -the input cannot be represented exactly as an integer. If the fixed-point  
74 -input is too large, however, the invalid exception is raised and the largest  
75 -positive or negative integer is returned.  
76 --------------------------------------------------------------------------------  
77 -*/  
78 -static int32 roundAndPackInt32( flag zSign, bits64 absZ )  
79 -{  
80 - int8 roundingMode;  
81 - flag roundNearestEven;  
82 - int8 roundIncrement, roundBits;  
83 - int32 z;  
84 -  
85 - roundingMode = float_rounding_mode;  
86 - roundNearestEven = ( roundingMode == float_round_nearest_even );  
87 - roundIncrement = 0x40;  
88 - if ( ! roundNearestEven ) {  
89 - if ( roundingMode == float_round_to_zero ) {  
90 - roundIncrement = 0;  
91 - }  
92 - else {  
93 - roundIncrement = 0x7F;  
94 - if ( zSign ) {  
95 - if ( roundingMode == float_round_up ) roundIncrement = 0;  
96 - }  
97 - else {  
98 - if ( roundingMode == float_round_down ) roundIncrement = 0;  
99 - }  
100 - }  
101 - }  
102 - roundBits = absZ & 0x7F;  
103 - absZ = ( absZ + roundIncrement )>>7;  
104 - absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );  
105 - z = absZ;  
106 - if ( zSign ) z = - z;  
107 - if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {  
108 - float_exception_flags |= float_flag_invalid;  
109 - return zSign ? 0x80000000 : 0x7FFFFFFF;  
110 - }  
111 - if ( roundBits ) float_exception_flags |= float_flag_inexact;  
112 - return z;  
113 -  
114 -}  
115 -  
116 -/*  
117 --------------------------------------------------------------------------------  
118 -Returns the fraction bits of the single-precision floating-point value `a'.  
119 --------------------------------------------------------------------------------  
120 -*/  
121 -INLINE bits32 extractFloat32Frac( float32 a )  
122 -{  
123 -  
124 - return a & 0x007FFFFF;  
125 -  
126 -}  
127 -  
128 -/*  
129 --------------------------------------------------------------------------------  
130 -Returns the exponent bits of the single-precision floating-point value `a'.  
131 --------------------------------------------------------------------------------  
132 -*/  
133 -INLINE int16 extractFloat32Exp( float32 a )  
134 -{  
135 -  
136 - return ( a>>23 ) & 0xFF;  
137 -  
138 -}  
139 -  
140 -/*  
141 --------------------------------------------------------------------------------  
142 -Returns the sign bit of the single-precision floating-point value `a'.  
143 --------------------------------------------------------------------------------  
144 -*/  
145 -INLINE flag extractFloat32Sign( float32 a )  
146 -{  
147 -  
148 - return a>>31;  
149 -  
150 -}  
151 -  
152 -/*  
153 --------------------------------------------------------------------------------  
154 -Normalizes the subnormal single-precision floating-point value represented  
155 -by the denormalized significand `aSig'. The normalized exponent and  
156 -significand are stored at the locations pointed to by `zExpPtr' and  
157 -`zSigPtr', respectively.  
158 --------------------------------------------------------------------------------  
159 -*/  
160 -static void  
161 - normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )  
162 -{  
163 - int8 shiftCount;  
164 -  
165 - shiftCount = countLeadingZeros32( aSig ) - 8;  
166 - *zSigPtr = aSig<<shiftCount;  
167 - *zExpPtr = 1 - shiftCount;  
168 -  
169 -}  
170 -  
171 -/*  
172 --------------------------------------------------------------------------------  
173 -Packs the sign `zSign', exponent `zExp', and significand `zSig' into a  
174 -single-precision floating-point value, returning the result. After being  
175 -shifted into the proper positions, the three fields are simply added  
176 -together to form the result. This means that any integer portion of `zSig'  
177 -will be added into the exponent. Since a properly normalized significand  
178 -will have an integer portion equal to 1, the `zExp' input should be 1 less  
179 -than the desired result exponent whenever `zSig' is a complete, normalized  
180 -significand.  
181 --------------------------------------------------------------------------------  
182 -*/  
183 -INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )  
184 -{  
185 - return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;  
186 -}  
187 -  
188 -/*  
189 --------------------------------------------------------------------------------  
190 -Takes an abstract floating-point value having sign `zSign', exponent `zExp',  
191 -and significand `zSig', and returns the proper single-precision floating-  
192 -point value corresponding to the abstract input. Ordinarily, the abstract  
193 -value is simply rounded and packed into the single-precision format, with  
194 -the inexact exception raised if the abstract input cannot be represented  
195 -exactly. If the abstract value is too large, however, the overflow and  
196 -inexact exceptions are raised and an infinity or maximal finite value is  
197 -returned. If the abstract value is too small, the input value is rounded to  
198 -a subnormal number, and the underflow and inexact exceptions are raised if  
199 -the abstract input cannot be represented exactly as a subnormal single-  
200 -precision floating-point number.  
201 - The input significand `zSig' has its binary point between bits 30  
202 -and 29, which is 7 bits to the left of the usual location. This shifted  
203 -significand must be normalized or smaller. If `zSig' is not normalized,  
204 -`zExp' must be 0; in that case, the result returned is a subnormal number,  
205 -and it must not require rounding. In the usual case that `zSig' is  
206 -normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.  
207 -The handling of underflow and overflow follows the IEC/IEEE Standard for  
208 -Binary Floating-point Arithmetic.  
209 --------------------------------------------------------------------------------  
210 -*/  
211 -static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )  
212 -{  
213 - int8 roundingMode;  
214 - flag roundNearestEven;  
215 - int8 roundIncrement, roundBits;  
216 - flag isTiny;  
217 -  
218 - roundingMode = float_rounding_mode;  
219 - roundNearestEven = ( roundingMode == float_round_nearest_even );  
220 - roundIncrement = 0x40;  
221 - if ( ! roundNearestEven ) {  
222 - if ( roundingMode == float_round_to_zero ) {  
223 - roundIncrement = 0;  
224 - }  
225 - else {  
226 - roundIncrement = 0x7F;  
227 - if ( zSign ) {  
228 - if ( roundingMode == float_round_up ) roundIncrement = 0;  
229 - }  
230 - else {  
231 - if ( roundingMode == float_round_down ) roundIncrement = 0;  
232 - }  
233 - }  
234 - }  
235 - roundBits = zSig & 0x7F;  
236 - if ( 0xFD <= (bits16) zExp ) {  
237 - if ( ( 0xFD < zExp )  
238 - || ( ( zExp == 0xFD )  
239 - && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )  
240 - ) {  
241 - float_raise( float_flag_overflow | float_flag_inexact );  
242 - return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );  
243 - }  
244 - if ( zExp < 0 ) {  
245 - isTiny =  
246 - ( float_detect_tininess == float_tininess_before_rounding )  
247 - || ( zExp < -1 )  
248 - || ( zSig + roundIncrement < 0x80000000 );  
249 - shift32RightJamming( zSig, - zExp, &zSig );  
250 - zExp = 0;  
251 - roundBits = zSig & 0x7F;  
252 - if ( isTiny && roundBits ) float_raise( float_flag_underflow );  
253 - }  
254 - }  
255 - if ( roundBits ) float_exception_flags |= float_flag_inexact;  
256 - zSig = ( zSig + roundIncrement )>>7;  
257 - zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );  
258 - if ( zSig == 0 ) zExp = 0;  
259 - return packFloat32( zSign, zExp, zSig );  
260 -  
261 -}  
262 -  
263 -/*  
264 --------------------------------------------------------------------------------  
265 -Takes an abstract floating-point value having sign `zSign', exponent `zExp',  
266 -and significand `zSig', and returns the proper single-precision floating-  
267 -point value corresponding to the abstract input. This routine is just like  
268 -`roundAndPackFloat32' except that `zSig' does not have to be normalized in  
269 -any way. In all cases, `zExp' must be 1 less than the ``true'' floating-  
270 -point exponent.  
271 --------------------------------------------------------------------------------  
272 -*/  
273 -static float32  
274 - normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )  
275 -{  
276 - int8 shiftCount;  
277 -  
278 - shiftCount = countLeadingZeros32( zSig ) - 1;  
279 - return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );  
280 -  
281 -}  
282 -  
283 -/*  
284 --------------------------------------------------------------------------------  
285 -Returns the fraction bits of the double-precision floating-point value `a'.  
286 --------------------------------------------------------------------------------  
287 -*/  
288 -INLINE bits64 extractFloat64Frac( float64 a )  
289 -{  
290 -  
291 - return a & LIT64( 0x000FFFFFFFFFFFFF );  
292 -  
293 -}  
294 -  
295 -/*  
296 --------------------------------------------------------------------------------  
297 -Returns the exponent bits of the double-precision floating-point value `a'.  
298 --------------------------------------------------------------------------------  
299 -*/  
300 -INLINE int16 extractFloat64Exp( float64 a )  
301 -{  
302 -  
303 - return ( a>>52 ) & 0x7FF;  
304 -  
305 -}  
306 -  
307 -/*  
308 --------------------------------------------------------------------------------  
309 -Returns the sign bit of the double-precision floating-point value `a'.  
310 --------------------------------------------------------------------------------  
311 -*/  
312 -INLINE flag extractFloat64Sign( float64 a )  
313 -{  
314 -  
315 - return a>>63;  
316 -  
317 -}  
318 -  
319 -/*  
320 --------------------------------------------------------------------------------  
321 -Normalizes the subnormal double-precision floating-point value represented  
322 -by the denormalized significand `aSig'. The normalized exponent and  
323 -significand are stored at the locations pointed to by `zExpPtr' and  
324 -`zSigPtr', respectively.  
325 --------------------------------------------------------------------------------  
326 -*/  
327 -static void  
328 - normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )  
329 -{  
330 - int8 shiftCount;  
331 -  
332 - shiftCount = countLeadingZeros64( aSig ) - 11;  
333 - *zSigPtr = aSig<<shiftCount;  
334 - *zExpPtr = 1 - shiftCount;  
335 -  
336 -}  
337 -  
338 -/*  
339 --------------------------------------------------------------------------------  
340 -Packs the sign `zSign', exponent `zExp', and significand `zSig' into a  
341 -double-precision floating-point value, returning the result. After being  
342 -shifted into the proper positions, the three fields are simply added  
343 -together to form the result. This means that any integer portion of `zSig'  
344 -will be added into the exponent. Since a properly normalized significand  
345 -will have an integer portion equal to 1, the `zExp' input should be 1 less  
346 -than the desired result exponent whenever `zSig' is a complete, normalized  
347 -significand.  
348 --------------------------------------------------------------------------------  
349 -*/  
350 -INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )  
351 -{  
352 -  
353 - return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;  
354 -  
355 -}  
356 -  
357 -/*  
358 --------------------------------------------------------------------------------  
359 -Takes an abstract floating-point value having sign `zSign', exponent `zExp',  
360 -and significand `zSig', and returns the proper double-precision floating-  
361 -point value corresponding to the abstract input. Ordinarily, the abstract  
362 -value is simply rounded and packed into the double-precision format, with  
363 -the inexact exception raised if the abstract input cannot be represented  
364 -exactly. If the abstract value is too large, however, the overflow and  
365 -inexact exceptions are raised and an infinity or maximal finite value is  
366 -returned. If the abstract value is too small, the input value is rounded to  
367 -a subnormal number, and the underflow and inexact exceptions are raised if  
368 -the abstract input cannot be represented exactly as a subnormal double-  
369 -precision floating-point number.  
370 - The input significand `zSig' has its binary point between bits 62  
371 -and 61, which is 10 bits to the left of the usual location. This shifted  
372 -significand must be normalized or smaller. If `zSig' is not normalized,  
373 -`zExp' must be 0; in that case, the result returned is a subnormal number,  
374 -and it must not require rounding. In the usual case that `zSig' is  
375 -normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.  
376 -The handling of underflow and overflow follows the IEC/IEEE Standard for  
377 -Binary Floating-point Arithmetic.  
378 --------------------------------------------------------------------------------  
379 -*/  
380 -static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )  
381 -{  
382 - int8 roundingMode;  
383 - flag roundNearestEven;  
384 - int16 roundIncrement, roundBits;  
385 - flag isTiny;  
386 -  
387 - roundingMode = float_rounding_mode;  
388 - roundNearestEven = ( roundingMode == float_round_nearest_even );  
389 - roundIncrement = 0x200;  
390 - if ( ! roundNearestEven ) {  
391 - if ( roundingMode == float_round_to_zero ) {  
392 - roundIncrement = 0;  
393 - }  
394 - else {  
395 - roundIncrement = 0x3FF;  
396 - if ( zSign ) {  
397 - if ( roundingMode == float_round_up ) roundIncrement = 0;  
398 - }  
399 - else {  
400 - if ( roundingMode == float_round_down ) roundIncrement = 0;  
401 - }  
402 - }  
403 - }  
404 - roundBits = zSig & 0x3FF;  
405 - if ( 0x7FD <= (bits16) zExp ) {  
406 - if ( ( 0x7FD < zExp )  
407 - || ( ( zExp == 0x7FD )  
408 - && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )  
409 - ) {  
410 - //register int lr = __builtin_return_address(0);  
411 - //printk("roundAndPackFloat64 called from 0x%08x\n",lr);  
412 - float_raise( float_flag_overflow | float_flag_inexact );  
413 - return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );  
414 - }  
415 - if ( zExp < 0 ) {  
416 - isTiny =  
417 - ( float_detect_tininess == float_tininess_before_rounding )  
418 - || ( zExp < -1 )  
419 - || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );  
420 - shift64RightJamming( zSig, - zExp, &zSig );  
421 - zExp = 0;  
422 - roundBits = zSig & 0x3FF;  
423 - if ( isTiny && roundBits ) float_raise( float_flag_underflow );  
424 - }  
425 - }  
426 - if ( roundBits ) float_exception_flags |= float_flag_inexact;  
427 - zSig = ( zSig + roundIncrement )>>10;  
428 - zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );  
429 - if ( zSig == 0 ) zExp = 0;  
430 - return packFloat64( zSign, zExp, zSig );  
431 -  
432 -}  
433 -  
434 -/*  
435 --------------------------------------------------------------------------------  
436 -Takes an abstract floating-point value having sign `zSign', exponent `zExp',  
437 -and significand `zSig', and returns the proper double-precision floating-  
438 -point value corresponding to the abstract input. This routine is just like  
439 -`roundAndPackFloat64' except that `zSig' does not have to be normalized in  
440 -any way. In all cases, `zExp' must be 1 less than the ``true'' floating-  
441 -point exponent.  
442 --------------------------------------------------------------------------------  
443 -*/  
444 -static float64  
445 - normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )  
446 -{  
447 - int8 shiftCount;  
448 -  
449 - shiftCount = countLeadingZeros64( zSig ) - 1;  
450 - return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );  
451 -  
452 -}  
453 -  
454 -#ifdef FLOATX80  
455 -  
456 -/*  
457 --------------------------------------------------------------------------------  
458 -Returns the fraction bits of the extended double-precision floating-point  
459 -value `a'.  
460 --------------------------------------------------------------------------------  
461 -*/  
462 -INLINE bits64 extractFloatx80Frac( floatx80 a )  
463 -{  
464 -  
465 - return a.low;  
466 -  
467 -}  
468 -  
469 -/*  
470 --------------------------------------------------------------------------------  
471 -Returns the exponent bits of the extended double-precision floating-point  
472 -value `a'.  
473 --------------------------------------------------------------------------------  
474 -*/  
475 -INLINE int32 extractFloatx80Exp( floatx80 a )  
476 -{  
477 -  
478 - return a.high & 0x7FFF;  
479 -  
480 -}  
481 -  
482 -/*  
483 --------------------------------------------------------------------------------  
484 -Returns the sign bit of the extended double-precision floating-point value  
485 -`a'.  
486 --------------------------------------------------------------------------------  
487 -*/  
488 -INLINE flag extractFloatx80Sign( floatx80 a )  
489 -{  
490 -  
491 - return a.high>>15;  
492 -  
493 -}  
494 -  
495 -/*  
496 --------------------------------------------------------------------------------  
497 -Normalizes the subnormal extended double-precision floating-point value  
498 -represented by the denormalized significand `aSig'. The normalized exponent  
499 -and significand are stored at the locations pointed to by `zExpPtr' and  
500 -`zSigPtr', respectively.  
501 --------------------------------------------------------------------------------  
502 -*/  
503 -static void  
504 - normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )  
505 -{  
506 - int8 shiftCount;  
507 -  
508 - shiftCount = countLeadingZeros64( aSig );  
509 - *zSigPtr = aSig<<shiftCount;  
510 - *zExpPtr = 1 - shiftCount;  
511 -  
512 -}  
513 -  
514 -/*  
515 --------------------------------------------------------------------------------  
516 -Packs the sign `zSign', exponent `zExp', and significand `zSig' into an  
517 -extended double-precision floating-point value, returning the result.  
518 --------------------------------------------------------------------------------  
519 -*/  
520 -INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )  
521 -{  
522 - floatx80 z;  
523 -  
524 - z.low = zSig;  
525 - z.high = ( ( (bits16) zSign )<<15 ) + zExp;  
526 - return z;  
527 -  
528 -}  
529 -  
530 -/*  
531 --------------------------------------------------------------------------------  
532 -Takes an abstract floating-point value having sign `zSign', exponent `zExp',  
533 -and extended significand formed by the concatenation of `zSig0' and `zSig1',  
534 -and returns the proper extended double-precision floating-point value  
535 -corresponding to the abstract input. Ordinarily, the abstract value is  
536 -rounded and packed into the extended double-precision format, with the  
537 -inexact exception raised if the abstract input cannot be represented  
538 -exactly. If the abstract value is too large, however, the overflow and  
539 -inexact exceptions are raised and an infinity or maximal finite value is  
540 -returned. If the abstract value is too small, the input value is rounded to  
541 -a subnormal number, and the underflow and inexact exceptions are raised if  
542 -the abstract input cannot be represented exactly as a subnormal extended  
543 -double-precision floating-point number.  
544 - If `roundingPrecision' is 32 or 64, the result is rounded to the same  
545 -number of bits as single or double precision, respectively. Otherwise, the  
546 -result is rounded to the full precision of the extended double-precision  
547 -format.  
548 - The input significand must be normalized or smaller. If the input  
549 -significand is not normalized, `zExp' must be 0; in that case, the result  
550 -returned is a subnormal number, and it must not require rounding. The  
551 -handling of underflow and overflow follows the IEC/IEEE Standard for Binary  
552 -Floating-point Arithmetic.  
553 --------------------------------------------------------------------------------  
554 -*/  
555 -static floatx80  
556 - roundAndPackFloatx80(  
557 - int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1  
558 - )  
559 -{  
560 - int8 roundingMode;  
561 - flag roundNearestEven, increment, isTiny;  
562 - int64 roundIncrement, roundMask, roundBits;  
563 -  
564 - roundingMode = float_rounding_mode;  
565 - roundNearestEven = ( roundingMode == float_round_nearest_even );  
566 - if ( roundingPrecision == 80 ) goto precision80;  
567 - if ( roundingPrecision == 64 ) {  
568 - roundIncrement = LIT64( 0x0000000000000400 );  
569 - roundMask = LIT64( 0x00000000000007FF );  
570 - }  
571 - else if ( roundingPrecision == 32 ) {  
572 - roundIncrement = LIT64( 0x0000008000000000 );  
573 - roundMask = LIT64( 0x000000FFFFFFFFFF );  
574 - }  
575 - else {  
576 - goto precision80;  
577 - }  
578 - zSig0 |= ( zSig1 != 0 );  
579 - if ( ! roundNearestEven ) {  
580 - if ( roundingMode == float_round_to_zero ) {  
581 - roundIncrement = 0;  
582 - }  
583 - else {  
584 - roundIncrement = roundMask;  
585 - if ( zSign ) {  
586 - if ( roundingMode == float_round_up ) roundIncrement = 0;  
587 - }  
588 - else {  
589 - if ( roundingMode == float_round_down ) roundIncrement = 0;  
590 - }  
591 - }  
592 - }  
593 - roundBits = zSig0 & roundMask;  
594 - if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {  
595 - if ( ( 0x7FFE < zExp )  
596 - || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )  
597 - ) {  
598 - goto overflow;  
599 - }  
600 - if ( zExp <= 0 ) {  
601 - isTiny =  
602 - ( float_detect_tininess == float_tininess_before_rounding )  
603 - || ( zExp < 0 )  
604 - || ( zSig0 <= zSig0 + roundIncrement );  
605 - shift64RightJamming( zSig0, 1 - zExp, &zSig0 );  
606 - zExp = 0;  
607 - roundBits = zSig0 & roundMask;  
608 - if ( isTiny && roundBits ) float_raise( float_flag_underflow );  
609 - if ( roundBits ) float_exception_flags |= float_flag_inexact;  
610 - zSig0 += roundIncrement;  
611 - if ( (sbits64) zSig0 < 0 ) zExp = 1;  
612 - roundIncrement = roundMask + 1;  
613 - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {  
614 - roundMask |= roundIncrement;  
615 - }  
616 - zSig0 &= ~ roundMask;  
617 - return packFloatx80( zSign, zExp, zSig0 );  
618 - }  
619 - }  
620 - if ( roundBits ) float_exception_flags |= float_flag_inexact;  
621 - zSig0 += roundIncrement;  
622 - if ( zSig0 < roundIncrement ) {  
623 - ++zExp;  
624 - zSig0 = LIT64( 0x8000000000000000 );  
625 - }  
626 - roundIncrement = roundMask + 1;  
627 - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {  
628 - roundMask |= roundIncrement;  
629 - }  
630 - zSig0 &= ~ roundMask;  
631 - if ( zSig0 == 0 ) zExp = 0;  
632 - return packFloatx80( zSign, zExp, zSig0 );  
633 - precision80:  
634 - increment = ( (sbits64) zSig1 < 0 );  
635 - if ( ! roundNearestEven ) {  
636 - if ( roundingMode == float_round_to_zero ) {  
637 - increment = 0;  
638 - }  
639 - else {  
640 - if ( zSign ) {  
641 - increment = ( roundingMode == float_round_down ) && zSig1;  
642 - }  
643 - else {  
644 - increment = ( roundingMode == float_round_up ) && zSig1;  
645 - }  
646 - }  
647 - }  
648 - if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {  
649 - if ( ( 0x7FFE < zExp )  
650 - || ( ( zExp == 0x7FFE )  
651 - && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )  
652 - && increment  
653 - )  
654 - ) {  
655 - roundMask = 0;  
656 - overflow:  
657 - float_raise( float_flag_overflow | float_flag_inexact );  
658 - if ( ( roundingMode == float_round_to_zero )  
659 - || ( zSign && ( roundingMode == float_round_up ) )  
660 - || ( ! zSign && ( roundingMode == float_round_down ) )  
661 - ) {  
662 - return packFloatx80( zSign, 0x7FFE, ~ roundMask );  
663 - }  
664 - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );  
665 - }  
666 - if ( zExp <= 0 ) {  
667 - isTiny =  
668 - ( float_detect_tininess == float_tininess_before_rounding )  
669 - || ( zExp < 0 )  
670 - || ! increment  
671 - || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );  
672 - shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );  
673 - zExp = 0;  
674 - if ( isTiny && zSig1 ) float_raise( float_flag_underflow );  
675 - if ( zSig1 ) float_exception_flags |= float_flag_inexact;  
676 - if ( roundNearestEven ) {  
677 - increment = ( (sbits64) zSig1 < 0 );  
678 - }  
679 - else {  
680 - if ( zSign ) {  
681 - increment = ( roundingMode == float_round_down ) && zSig1;  
682 - }  
683 - else {  
684 - increment = ( roundingMode == float_round_up ) && zSig1;  
685 - }  
686 - }  
687 - if ( increment ) {  
688 - ++zSig0;  
689 - zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );  
690 - if ( (sbits64) zSig0 < 0 ) zExp = 1;  
691 - }  
692 - return packFloatx80( zSign, zExp, zSig0 );  
693 - }  
694 - }  
695 - if ( zSig1 ) float_exception_flags |= float_flag_inexact;  
696 - if ( increment ) {  
697 - ++zSig0;  
698 - if ( zSig0 == 0 ) {  
699 - ++zExp;  
700 - zSig0 = LIT64( 0x8000000000000000 );  
701 - }  
702 - else {  
703 - zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );  
704 - }  
705 - }  
706 - else {  
707 - if ( zSig0 == 0 ) zExp = 0;  
708 - }  
709 -  
710 - return packFloatx80( zSign, zExp, zSig0 );  
711 -}  
712 -  
713 -/*  
714 --------------------------------------------------------------------------------  
715 -Takes an abstract floating-point value having sign `zSign', exponent  
716 -`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',  
717 -and returns the proper extended double-precision floating-point value  
718 -corresponding to the abstract input. This routine is just like  
719 -`roundAndPackFloatx80' except that the input significand does not have to be  
720 -normalized.  
721 --------------------------------------------------------------------------------  
722 -*/  
723 -static floatx80  
724 - normalizeRoundAndPackFloatx80(  
725 - int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1  
726 - )  
727 -{  
728 - int8 shiftCount;  
729 -  
730 - if ( zSig0 == 0 ) {  
731 - zSig0 = zSig1;  
732 - zSig1 = 0;  
733 - zExp -= 64;  
734 - }  
735 - shiftCount = countLeadingZeros64( zSig0 );  
736 - shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );  
737 - zExp -= shiftCount;  
738 - return  
739 - roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );  
740 -  
741 -}  
742 -  
743 -#endif  
744 -  
745 -/*  
746 --------------------------------------------------------------------------------  
747 -Returns the result of converting the 32-bit two's complement integer `a' to  
748 -the single-precision floating-point format. The conversion is performed  
749 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
750 --------------------------------------------------------------------------------  
751 -*/  
752 -float32 int32_to_float32( int32 a )  
753 -{  
754 - flag zSign;  
755 -  
756 - if ( a == 0 ) return 0;  
757 - if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );  
758 - zSign = ( a < 0 );  
759 - return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );  
760 -  
761 -}  
762 -  
763 -/*  
764 --------------------------------------------------------------------------------  
765 -Returns the result of converting the 32-bit two's complement integer `a' to  
766 -the double-precision floating-point format. The conversion is performed  
767 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
768 --------------------------------------------------------------------------------  
769 -*/  
770 -float64 int32_to_float64( int32 a )  
771 -{  
772 - flag aSign;  
773 - uint32 absA;  
774 - int8 shiftCount;  
775 - bits64 zSig;  
776 -  
777 - if ( a == 0 ) return 0;  
778 - aSign = ( a < 0 );  
779 - absA = aSign ? - a : a;  
780 - shiftCount = countLeadingZeros32( absA ) + 21;  
781 - zSig = absA;  
782 - return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );  
783 -  
784 -}  
785 -  
786 -#ifdef FLOATX80  
787 -  
788 -/*  
789 --------------------------------------------------------------------------------  
790 -Returns the result of converting the 32-bit two's complement integer `a'  
791 -to the extended double-precision floating-point format. The conversion  
792 -is performed according to the IEC/IEEE Standard for Binary Floating-point  
793 -Arithmetic.  
794 --------------------------------------------------------------------------------  
795 -*/  
796 -floatx80 int32_to_floatx80( int32 a )  
797 -{  
798 - flag zSign;  
799 - uint32 absA;  
800 - int8 shiftCount;  
801 - bits64 zSig;  
802 -  
803 - if ( a == 0 ) return packFloatx80( 0, 0, 0 );  
804 - zSign = ( a < 0 );  
805 - absA = zSign ? - a : a;  
806 - shiftCount = countLeadingZeros32( absA ) + 32;  
807 - zSig = absA;  
808 - return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );  
809 -  
810 -}  
811 -  
812 -#endif  
813 -  
814 -/*  
815 --------------------------------------------------------------------------------  
816 -Returns the result of converting the single-precision floating-point value  
817 -`a' to the 32-bit two's complement integer format. The conversion is  
818 -performed according to the IEC/IEEE Standard for Binary Floating-point  
819 -Arithmetic---which means in particular that the conversion is rounded  
820 -according to the current rounding mode. If `a' is a NaN, the largest  
821 -positive integer is returned. Otherwise, if the conversion overflows, the  
822 -largest integer with the same sign as `a' is returned.  
823 --------------------------------------------------------------------------------  
824 -*/  
825 -int32 float32_to_int32( float32 a )  
826 -{  
827 - flag aSign;  
828 - int16 aExp, shiftCount;  
829 - bits32 aSig;  
830 - bits64 zSig;  
831 -  
832 - aSig = extractFloat32Frac( a );  
833 - aExp = extractFloat32Exp( a );  
834 - aSign = extractFloat32Sign( a );  
835 - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;  
836 - if ( aExp ) aSig |= 0x00800000;  
837 - shiftCount = 0xAF - aExp;  
838 - zSig = aSig;  
839 - zSig <<= 32;  
840 - if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );  
841 - return roundAndPackInt32( aSign, zSig );  
842 -  
843 -}  
844 -  
845 -/*  
846 --------------------------------------------------------------------------------  
847 -Returns the result of converting the single-precision floating-point value  
848 -`a' to the 32-bit two's complement integer format. The conversion is  
849 -performed according to the IEC/IEEE Standard for Binary Floating-point  
850 -Arithmetic, except that the conversion is always rounded toward zero. If  
851 -`a' is a NaN, the largest positive integer is returned. Otherwise, if the  
852 -conversion overflows, the largest integer with the same sign as `a' is  
853 -returned.  
854 --------------------------------------------------------------------------------  
855 -*/  
856 -int32 float32_to_int32_round_to_zero( float32 a )  
857 -{  
858 - flag aSign;  
859 - int16 aExp, shiftCount;  
860 - bits32 aSig;  
861 - int32 z;  
862 -  
863 - aSig = extractFloat32Frac( a );  
864 - aExp = extractFloat32Exp( a );  
865 - aSign = extractFloat32Sign( a );  
866 - shiftCount = aExp - 0x9E;  
867 - if ( 0 <= shiftCount ) {  
868 - if ( a == 0xCF000000 ) return 0x80000000;  
869 - float_raise( float_flag_invalid );  
870 - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;  
871 - return 0x80000000;  
872 - }  
873 - else if ( aExp <= 0x7E ) {  
874 - if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;  
875 - return 0;  
876 - }  
877 - aSig = ( aSig | 0x00800000 )<<8;  
878 - z = aSig>>( - shiftCount );  
879 - if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {  
880 - float_exception_flags |= float_flag_inexact;  
881 - }  
882 - return aSign ? - z : z;  
883 -  
884 -}  
885 -  
886 -/*  
887 --------------------------------------------------------------------------------  
888 -Returns the result of converting the single-precision floating-point value  
889 -`a' to the double-precision floating-point format. The conversion is  
890 -performed according to the IEC/IEEE Standard for Binary Floating-point  
891 -Arithmetic.  
892 --------------------------------------------------------------------------------  
893 -*/  
894 -float64 float32_to_float64( float32 a )  
895 -{  
896 - flag aSign;  
897 - int16 aExp;  
898 - bits32 aSig;  
899 -  
900 - aSig = extractFloat32Frac( a );  
901 - aExp = extractFloat32Exp( a );  
902 - aSign = extractFloat32Sign( a );  
903 - if ( aExp == 0xFF ) {  
904 - if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );  
905 - return packFloat64( aSign, 0x7FF, 0 );  
906 - }  
907 - if ( aExp == 0 ) {  
908 - if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );  
909 - normalizeFloat32Subnormal( aSig, &aExp, &aSig );  
910 - --aExp;  
911 - }  
912 - return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );  
913 -  
914 -}  
915 -  
916 -#ifdef FLOATX80  
917 -  
918 -/*  
919 --------------------------------------------------------------------------------  
920 -Returns the result of converting the single-precision floating-point value  
921 -`a' to the extended double-precision floating-point format. The conversion  
922 -is performed according to the IEC/IEEE Standard for Binary Floating-point  
923 -Arithmetic.  
924 --------------------------------------------------------------------------------  
925 -*/  
926 -floatx80 float32_to_floatx80( float32 a )  
927 -{  
928 - flag aSign;  
929 - int16 aExp;  
930 - bits32 aSig;  
931 -  
932 - aSig = extractFloat32Frac( a );  
933 - aExp = extractFloat32Exp( a );  
934 - aSign = extractFloat32Sign( a );  
935 - if ( aExp == 0xFF ) {  
936 - if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );  
937 - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );  
938 - }  
939 - if ( aExp == 0 ) {  
940 - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );  
941 - normalizeFloat32Subnormal( aSig, &aExp, &aSig );  
942 - }  
943 - aSig |= 0x00800000;  
944 - return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );  
945 -  
946 -}  
947 -  
948 -#endif  
949 -  
950 -/*  
951 --------------------------------------------------------------------------------  
952 -Rounds the single-precision floating-point value `a' to an integer, and  
953 -returns the result as a single-precision floating-point value. The  
954 -operation is performed according to the IEC/IEEE Standard for Binary  
955 -Floating-point Arithmetic.  
956 --------------------------------------------------------------------------------  
957 -*/  
958 -float32 float32_round_to_int( float32 a )  
959 -{  
960 - flag aSign;  
961 - int16 aExp;  
962 - bits32 lastBitMask, roundBitsMask;  
963 - int8 roundingMode;  
964 - float32 z;  
965 -  
966 - aExp = extractFloat32Exp( a );  
967 - if ( 0x96 <= aExp ) {  
968 - if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {  
969 - return propagateFloat32NaN( a, a );  
970 - }  
971 - return a;  
972 - }  
973 - if ( aExp <= 0x7E ) {  
974 - if ( (bits32) ( a<<1 ) == 0 ) return a;  
975 - float_exception_flags |= float_flag_inexact;  
976 - aSign = extractFloat32Sign( a );  
977 - switch ( float_rounding_mode ) {  
978 - case float_round_nearest_even:  
979 - if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {  
980 - return packFloat32( aSign, 0x7F, 0 );  
981 - }  
982 - break;  
983 - case float_round_down:  
984 - return aSign ? 0xBF800000 : 0;  
985 - case float_round_up:  
986 - return aSign ? 0x80000000 : 0x3F800000;  
987 - }  
988 - return packFloat32( aSign, 0, 0 );  
989 - }  
990 - lastBitMask = 1;  
991 - lastBitMask <<= 0x96 - aExp;  
992 - roundBitsMask = lastBitMask - 1;  
993 - z = a;  
994 - roundingMode = float_rounding_mode;  
995 - if ( roundingMode == float_round_nearest_even ) {  
996 - z += lastBitMask>>1;  
997 - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;  
998 - }  
999 - else if ( roundingMode != float_round_to_zero ) {  
1000 - if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {  
1001 - z += roundBitsMask;  
1002 - }  
1003 - }  
1004 - z &= ~ roundBitsMask;  
1005 - if ( z != a ) float_exception_flags |= float_flag_inexact;  
1006 - return z;  
1007 -  
1008 -}  
1009 -  
1010 -/*  
1011 --------------------------------------------------------------------------------  
1012 -Returns the result of adding the absolute values of the single-precision  
1013 -floating-point values `a' and `b'. If `zSign' is true, the sum is negated  
1014 -before being returned. `zSign' is ignored if the result is a NaN. The  
1015 -addition is performed according to the IEC/IEEE Standard for Binary  
1016 -Floating-point Arithmetic.  
1017 --------------------------------------------------------------------------------  
1018 -*/  
1019 -static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )  
1020 -{  
1021 - int16 aExp, bExp, zExp;  
1022 - bits32 aSig, bSig, zSig;  
1023 - int16 expDiff;  
1024 -  
1025 - aSig = extractFloat32Frac( a );  
1026 - aExp = extractFloat32Exp( a );  
1027 - bSig = extractFloat32Frac( b );  
1028 - bExp = extractFloat32Exp( b );  
1029 - expDiff = aExp - bExp;  
1030 - aSig <<= 6;  
1031 - bSig <<= 6;  
1032 - if ( 0 < expDiff ) {  
1033 - if ( aExp == 0xFF ) {  
1034 - if ( aSig ) return propagateFloat32NaN( a, b );  
1035 - return a;  
1036 - }  
1037 - if ( bExp == 0 ) {  
1038 - --expDiff;  
1039 - }  
1040 - else {  
1041 - bSig |= 0x20000000;  
1042 - }  
1043 - shift32RightJamming( bSig, expDiff, &bSig );  
1044 - zExp = aExp;  
1045 - }  
1046 - else if ( expDiff < 0 ) {  
1047 - if ( bExp == 0xFF ) {  
1048 - if ( bSig ) return propagateFloat32NaN( a, b );  
1049 - return packFloat32( zSign, 0xFF, 0 );  
1050 - }  
1051 - if ( aExp == 0 ) {  
1052 - ++expDiff;  
1053 - }  
1054 - else {  
1055 - aSig |= 0x20000000;  
1056 - }  
1057 - shift32RightJamming( aSig, - expDiff, &aSig );  
1058 - zExp = bExp;  
1059 - }  
1060 - else {  
1061 - if ( aExp == 0xFF ) {  
1062 - if ( aSig | bSig ) return propagateFloat32NaN( a, b );  
1063 - return a;  
1064 - }  
1065 - if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );  
1066 - zSig = 0x40000000 + aSig + bSig;  
1067 - zExp = aExp;  
1068 - goto roundAndPack;  
1069 - }  
1070 - aSig |= 0x20000000;  
1071 - zSig = ( aSig + bSig )<<1;  
1072 - --zExp;  
1073 - if ( (sbits32) zSig < 0 ) {  
1074 - zSig = aSig + bSig;  
1075 - ++zExp;  
1076 - }  
1077 - roundAndPack:  
1078 - return roundAndPackFloat32( zSign, zExp, zSig );  
1079 -  
1080 -}  
1081 -  
1082 -/*  
1083 --------------------------------------------------------------------------------  
1084 -Returns the result of subtracting the absolute values of the single-  
1085 -precision floating-point values `a' and `b'. If `zSign' is true, the  
1086 -difference is negated before being returned. `zSign' is ignored if the  
1087 -result is a NaN. The subtraction is performed according to the IEC/IEEE  
1088 -Standard for Binary Floating-point Arithmetic.  
1089 --------------------------------------------------------------------------------  
1090 -*/  
1091 -static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )  
1092 -{  
1093 - int16 aExp, bExp, zExp;  
1094 - bits32 aSig, bSig, zSig;  
1095 - int16 expDiff;  
1096 -  
1097 - aSig = extractFloat32Frac( a );  
1098 - aExp = extractFloat32Exp( a );  
1099 - bSig = extractFloat32Frac( b );  
1100 - bExp = extractFloat32Exp( b );  
1101 - expDiff = aExp - bExp;  
1102 - aSig <<= 7;  
1103 - bSig <<= 7;  
1104 - if ( 0 < expDiff ) goto aExpBigger;  
1105 - if ( expDiff < 0 ) goto bExpBigger;  
1106 - if ( aExp == 0xFF ) {  
1107 - if ( aSig | bSig ) return propagateFloat32NaN( a, b );  
1108 - float_raise( float_flag_invalid );  
1109 - return float32_default_nan;  
1110 - }  
1111 - if ( aExp == 0 ) {  
1112 - aExp = 1;  
1113 - bExp = 1;  
1114 - }  
1115 - if ( bSig < aSig ) goto aBigger;  
1116 - if ( aSig < bSig ) goto bBigger;  
1117 - return packFloat32( float_rounding_mode == float_round_down, 0, 0 );  
1118 - bExpBigger:  
1119 - if ( bExp == 0xFF ) {  
1120 - if ( bSig ) return propagateFloat32NaN( a, b );  
1121 - return packFloat32( zSign ^ 1, 0xFF, 0 );  
1122 - }  
1123 - if ( aExp == 0 ) {  
1124 - ++expDiff;  
1125 - }  
1126 - else {  
1127 - aSig |= 0x40000000;  
1128 - }  
1129 - shift32RightJamming( aSig, - expDiff, &aSig );  
1130 - bSig |= 0x40000000;  
1131 - bBigger:  
1132 - zSig = bSig - aSig;  
1133 - zExp = bExp;  
1134 - zSign ^= 1;  
1135 - goto normalizeRoundAndPack;  
1136 - aExpBigger:  
1137 - if ( aExp == 0xFF ) {  
1138 - if ( aSig ) return propagateFloat32NaN( a, b );  
1139 - return a;  
1140 - }  
1141 - if ( bExp == 0 ) {  
1142 - --expDiff;  
1143 - }  
1144 - else {  
1145 - bSig |= 0x40000000;  
1146 - }  
1147 - shift32RightJamming( bSig, expDiff, &bSig );  
1148 - aSig |= 0x40000000;  
1149 - aBigger:  
1150 - zSig = aSig - bSig;  
1151 - zExp = aExp;  
1152 - normalizeRoundAndPack:  
1153 - --zExp;  
1154 - return normalizeRoundAndPackFloat32( zSign, zExp, zSig );  
1155 -  
1156 -}  
1157 -  
1158 -/*  
1159 --------------------------------------------------------------------------------  
1160 -Returns the result of adding the single-precision floating-point values `a'  
1161 -and `b'. The operation is performed according to the IEC/IEEE Standard for  
1162 -Binary Floating-point Arithmetic.  
1163 --------------------------------------------------------------------------------  
1164 -*/  
1165 -float32 float32_add( float32 a, float32 b )  
1166 -{  
1167 - flag aSign, bSign;  
1168 -  
1169 - aSign = extractFloat32Sign( a );  
1170 - bSign = extractFloat32Sign( b );  
1171 - if ( aSign == bSign ) {  
1172 - return addFloat32Sigs( a, b, aSign );  
1173 - }  
1174 - else {  
1175 - return subFloat32Sigs( a, b, aSign );  
1176 - }  
1177 -  
1178 -}  
1179 -  
1180 -/*  
1181 --------------------------------------------------------------------------------  
1182 -Returns the result of subtracting the single-precision floating-point values  
1183 -`a' and `b'. The operation is performed according to the IEC/IEEE Standard  
1184 -for Binary Floating-point Arithmetic.  
1185 --------------------------------------------------------------------------------  
1186 -*/  
1187 -float32 float32_sub( float32 a, float32 b )  
1188 -{  
1189 - flag aSign, bSign;  
1190 -  
1191 - aSign = extractFloat32Sign( a );  
1192 - bSign = extractFloat32Sign( b );  
1193 - if ( aSign == bSign ) {  
1194 - return subFloat32Sigs( a, b, aSign );  
1195 - }  
1196 - else {  
1197 - return addFloat32Sigs( a, b, aSign );  
1198 - }  
1199 -  
1200 -}  
1201 -  
1202 -/*  
1203 --------------------------------------------------------------------------------  
1204 -Returns the result of multiplying the single-precision floating-point values  
1205 -`a' and `b'. The operation is performed according to the IEC/IEEE Standard  
1206 -for Binary Floating-point Arithmetic.  
1207 --------------------------------------------------------------------------------  
1208 -*/  
1209 -float32 float32_mul( float32 a, float32 b )  
1210 -{  
1211 - flag aSign, bSign, zSign;  
1212 - int16 aExp, bExp, zExp;  
1213 - bits32 aSig, bSig;  
1214 - bits64 zSig64;  
1215 - bits32 zSig;  
1216 -  
1217 - aSig = extractFloat32Frac( a );  
1218 - aExp = extractFloat32Exp( a );  
1219 - aSign = extractFloat32Sign( a );  
1220 - bSig = extractFloat32Frac( b );  
1221 - bExp = extractFloat32Exp( b );  
1222 - bSign = extractFloat32Sign( b );  
1223 - zSign = aSign ^ bSign;  
1224 - if ( aExp == 0xFF ) {  
1225 - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {  
1226 - return propagateFloat32NaN( a, b );  
1227 - }  
1228 - if ( ( bExp | bSig ) == 0 ) {  
1229 - float_raise( float_flag_invalid );  
1230 - return float32_default_nan;  
1231 - }  
1232 - return packFloat32( zSign, 0xFF, 0 );  
1233 - }  
1234 - if ( bExp == 0xFF ) {  
1235 - if ( bSig ) return propagateFloat32NaN( a, b );  
1236 - if ( ( aExp | aSig ) == 0 ) {  
1237 - float_raise( float_flag_invalid );  
1238 - return float32_default_nan;  
1239 - }  
1240 - return packFloat32( zSign, 0xFF, 0 );  
1241 - }  
1242 - if ( aExp == 0 ) {  
1243 - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );  
1244 - normalizeFloat32Subnormal( aSig, &aExp, &aSig );  
1245 - }  
1246 - if ( bExp == 0 ) {  
1247 - if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );  
1248 - normalizeFloat32Subnormal( bSig, &bExp, &bSig );  
1249 - }  
1250 - zExp = aExp + bExp - 0x7F;  
1251 - aSig = ( aSig | 0x00800000 )<<7;  
1252 - bSig = ( bSig | 0x00800000 )<<8;  
1253 - shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );  
1254 - zSig = zSig64;  
1255 - if ( 0 <= (sbits32) ( zSig<<1 ) ) {  
1256 - zSig <<= 1;  
1257 - --zExp;  
1258 - }  
1259 - return roundAndPackFloat32( zSign, zExp, zSig );  
1260 -  
1261 -}  
1262 -  
1263 -/*  
1264 --------------------------------------------------------------------------------  
1265 -Returns the result of dividing the single-precision floating-point value `a'  
1266 -by the corresponding value `b'. The operation is performed according to the  
1267 -IEC/IEEE Standard for Binary Floating-point Arithmetic.  
1268 --------------------------------------------------------------------------------  
1269 -*/  
1270 -float32 float32_div( float32 a, float32 b )  
1271 -{  
1272 - flag aSign, bSign, zSign;  
1273 - int16 aExp, bExp, zExp;  
1274 - bits32 aSig, bSig, zSig;  
1275 -  
1276 - aSig = extractFloat32Frac( a );  
1277 - aExp = extractFloat32Exp( a );  
1278 - aSign = extractFloat32Sign( a );  
1279 - bSig = extractFloat32Frac( b );  
1280 - bExp = extractFloat32Exp( b );  
1281 - bSign = extractFloat32Sign( b );  
1282 - zSign = aSign ^ bSign;  
1283 - if ( aExp == 0xFF ) {  
1284 - if ( aSig ) return propagateFloat32NaN( a, b );  
1285 - if ( bExp == 0xFF ) {  
1286 - if ( bSig ) return propagateFloat32NaN( a, b );  
1287 - float_raise( float_flag_invalid );  
1288 - return float32_default_nan;  
1289 - }  
1290 - return packFloat32( zSign, 0xFF, 0 );  
1291 - }  
1292 - if ( bExp == 0xFF ) {  
1293 - if ( bSig ) return propagateFloat32NaN( a, b );  
1294 - return packFloat32( zSign, 0, 0 );  
1295 - }  
1296 - if ( bExp == 0 ) {  
1297 - if ( bSig == 0 ) {  
1298 - if ( ( aExp | aSig ) == 0 ) {  
1299 - float_raise( float_flag_invalid );  
1300 - return float32_default_nan;  
1301 - }  
1302 - float_raise( float_flag_divbyzero );  
1303 - return packFloat32( zSign, 0xFF, 0 );  
1304 - }  
1305 - normalizeFloat32Subnormal( bSig, &bExp, &bSig );  
1306 - }  
1307 - if ( aExp == 0 ) {  
1308 - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );  
1309 - normalizeFloat32Subnormal( aSig, &aExp, &aSig );  
1310 - }  
1311 - zExp = aExp - bExp + 0x7D;  
1312 - aSig = ( aSig | 0x00800000 )<<7;  
1313 - bSig = ( bSig | 0x00800000 )<<8;  
1314 - if ( bSig <= ( aSig + aSig ) ) {  
1315 - aSig >>= 1;  
1316 - ++zExp;  
1317 - }  
1318 - zSig = ( ( (bits64) aSig )<<32 ) / bSig;  
1319 - if ( ( zSig & 0x3F ) == 0 ) {  
1320 - zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );  
1321 - }  
1322 - return roundAndPackFloat32( zSign, zExp, zSig );  
1323 -  
1324 -}  
1325 -  
1326 -/*  
1327 --------------------------------------------------------------------------------  
1328 -Returns the remainder of the single-precision floating-point value `a'  
1329 -with respect to the corresponding value `b'. The operation is performed  
1330 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
1331 --------------------------------------------------------------------------------  
1332 -*/  
1333 -float32 float32_rem( float32 a, float32 b )  
1334 -{  
1335 - flag aSign, bSign, zSign;  
1336 - int16 aExp, bExp, expDiff;  
1337 - bits32 aSig, bSig;  
1338 - bits32 q;  
1339 - bits64 aSig64, bSig64, q64;  
1340 - bits32 alternateASig;  
1341 - sbits32 sigMean;  
1342 -  
1343 - aSig = extractFloat32Frac( a );  
1344 - aExp = extractFloat32Exp( a );  
1345 - aSign = extractFloat32Sign( a );  
1346 - bSig = extractFloat32Frac( b );  
1347 - bExp = extractFloat32Exp( b );  
1348 - bSign = extractFloat32Sign( b );  
1349 - if ( aExp == 0xFF ) {  
1350 - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {  
1351 - return propagateFloat32NaN( a, b );  
1352 - }  
1353 - float_raise( float_flag_invalid );  
1354 - return float32_default_nan;  
1355 - }  
1356 - if ( bExp == 0xFF ) {  
1357 - if ( bSig ) return propagateFloat32NaN( a, b );  
1358 - return a;  
1359 - }  
1360 - if ( bExp == 0 ) {  
1361 - if ( bSig == 0 ) {  
1362 - float_raise( float_flag_invalid );  
1363 - return float32_default_nan;  
1364 - }  
1365 - normalizeFloat32Subnormal( bSig, &bExp, &bSig );  
1366 - }  
1367 - if ( aExp == 0 ) {  
1368 - if ( aSig == 0 ) return a;  
1369 - normalizeFloat32Subnormal( aSig, &aExp, &aSig );  
1370 - }  
1371 - expDiff = aExp - bExp;  
1372 - aSig |= 0x00800000;  
1373 - bSig |= 0x00800000;  
1374 - if ( expDiff < 32 ) {  
1375 - aSig <<= 8;  
1376 - bSig <<= 8;  
1377 - if ( expDiff < 0 ) {  
1378 - if ( expDiff < -1 ) return a;  
1379 - aSig >>= 1;  
1380 - }  
1381 - q = ( bSig <= aSig );  
1382 - if ( q ) aSig -= bSig;  
1383 - if ( 0 < expDiff ) {  
1384 - q = ( ( (bits64) aSig )<<32 ) / bSig;  
1385 - q >>= 32 - expDiff;  
1386 - bSig >>= 2;  
1387 - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;  
1388 - }  
1389 - else {  
1390 - aSig >>= 2;  
1391 - bSig >>= 2;  
1392 - }  
1393 - }  
1394 - else {  
1395 - if ( bSig <= aSig ) aSig -= bSig;  
1396 - aSig64 = ( (bits64) aSig )<<40;  
1397 - bSig64 = ( (bits64) bSig )<<40;  
1398 - expDiff -= 64;  
1399 - while ( 0 < expDiff ) {  
1400 - q64 = estimateDiv128To64( aSig64, 0, bSig64 );  
1401 - q64 = ( 2 < q64 ) ? q64 - 2 : 0;  
1402 - aSig64 = - ( ( bSig * q64 )<<38 );  
1403 - expDiff -= 62;  
1404 - }  
1405 - expDiff += 64;  
1406 - q64 = estimateDiv128To64( aSig64, 0, bSig64 );  
1407 - q64 = ( 2 < q64 ) ? q64 - 2 : 0;  
1408 - q = q64>>( 64 - expDiff );  
1409 - bSig <<= 6;  
1410 - aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;  
1411 - }  
1412 - do {  
1413 - alternateASig = aSig;  
1414 - ++q;  
1415 - aSig -= bSig;  
1416 - } while ( 0 <= (sbits32) aSig );  
1417 - sigMean = aSig + alternateASig;  
1418 - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {  
1419 - aSig = alternateASig;  
1420 - }  
1421 - zSign = ( (sbits32) aSig < 0 );  
1422 - if ( zSign ) aSig = - aSig;  
1423 - return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );  
1424 -  
1425 -}  
1426 -  
1427 -/*  
1428 --------------------------------------------------------------------------------  
1429 -Returns the square root of the single-precision floating-point value `a'.  
1430 -The operation is performed according to the IEC/IEEE Standard for Binary  
1431 -Floating-point Arithmetic.  
1432 --------------------------------------------------------------------------------  
1433 -*/  
1434 -float32 float32_sqrt( float32 a )  
1435 -{  
1436 - flag aSign;  
1437 - int16 aExp, zExp;  
1438 - bits32 aSig, zSig;  
1439 - bits64 rem, term;  
1440 -  
1441 - aSig = extractFloat32Frac( a );  
1442 - aExp = extractFloat32Exp( a );  
1443 - aSign = extractFloat32Sign( a );  
1444 - if ( aExp == 0xFF ) {  
1445 - if ( aSig ) return propagateFloat32NaN( a, 0 );  
1446 - if ( ! aSign ) return a;  
1447 - float_raise( float_flag_invalid );  
1448 - return float32_default_nan;  
1449 - }  
1450 - if ( aSign ) {  
1451 - if ( ( aExp | aSig ) == 0 ) return a;  
1452 - float_raise( float_flag_invalid );  
1453 - return float32_default_nan;  
1454 - }  
1455 - if ( aExp == 0 ) {  
1456 - if ( aSig == 0 ) return 0;  
1457 - normalizeFloat32Subnormal( aSig, &aExp, &aSig );  
1458 - }  
1459 - zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;  
1460 - aSig = ( aSig | 0x00800000 )<<8;  
1461 - zSig = estimateSqrt32( aExp, aSig ) + 2;  
1462 - if ( ( zSig & 0x7F ) <= 5 ) {  
1463 - if ( zSig < 2 ) {  
1464 - zSig = 0xFFFFFFFF;  
1465 - }  
1466 - else {  
1467 - aSig >>= aExp & 1;  
1468 - term = ( (bits64) zSig ) * zSig;  
1469 - rem = ( ( (bits64) aSig )<<32 ) - term;  
1470 - while ( (sbits64) rem < 0 ) {  
1471 - --zSig;  
1472 - rem += ( ( (bits64) zSig )<<1 ) | 1;  
1473 - }  
1474 - zSig |= ( rem != 0 );  
1475 - }  
1476 - }  
1477 - shift32RightJamming( zSig, 1, &zSig );  
1478 - return roundAndPackFloat32( 0, zExp, zSig );  
1479 -  
1480 -}  
1481 -  
1482 -/*  
1483 --------------------------------------------------------------------------------  
1484 -Returns 1 if the single-precision floating-point value `a' is equal to the  
1485 -corresponding value `b', and 0 otherwise. The comparison is performed  
1486 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
1487 --------------------------------------------------------------------------------  
1488 -*/  
1489 -flag float32_eq( float32 a, float32 b )  
1490 -{  
1491 -  
1492 - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )  
1493 - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )  
1494 - ) {  
1495 - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {  
1496 - float_raise( float_flag_invalid );  
1497 - }  
1498 - return 0;  
1499 - }  
1500 - return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );  
1501 -  
1502 -}  
1503 -  
1504 -/*  
1505 --------------------------------------------------------------------------------  
1506 -Returns 1 if the single-precision floating-point value `a' is less than or  
1507 -equal to the corresponding value `b', and 0 otherwise. The comparison is  
1508 -performed according to the IEC/IEEE Standard for Binary Floating-point  
1509 -Arithmetic.  
1510 --------------------------------------------------------------------------------  
1511 -*/  
1512 -flag float32_le( float32 a, float32 b )  
1513 -{  
1514 - flag aSign, bSign;  
1515 -  
1516 - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )  
1517 - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )  
1518 - ) {  
1519 - float_raise( float_flag_invalid );  
1520 - return 0;  
1521 - }  
1522 - aSign = extractFloat32Sign( a );  
1523 - bSign = extractFloat32Sign( b );  
1524 - if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );  
1525 - return ( a == b ) || ( aSign ^ ( a < b ) );  
1526 -  
1527 -}  
1528 -  
1529 -/*  
1530 --------------------------------------------------------------------------------  
1531 -Returns 1 if the single-precision floating-point value `a' is less than  
1532 -the corresponding value `b', and 0 otherwise. The comparison is performed  
1533 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
1534 --------------------------------------------------------------------------------  
1535 -*/  
1536 -flag float32_lt( float32 a, float32 b )  
1537 -{  
1538 - flag aSign, bSign;  
1539 -  
1540 - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )  
1541 - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )  
1542 - ) {  
1543 - float_raise( float_flag_invalid );  
1544 - return 0;  
1545 - }  
1546 - aSign = extractFloat32Sign( a );  
1547 - bSign = extractFloat32Sign( b );  
1548 - if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );  
1549 - return ( a != b ) && ( aSign ^ ( a < b ) );  
1550 -  
1551 -}  
1552 -  
1553 -/*  
1554 --------------------------------------------------------------------------------  
1555 -Returns 1 if the single-precision floating-point value `a' is equal to the  
1556 -corresponding value `b', and 0 otherwise. The invalid exception is raised  
1557 -if either operand is a NaN. Otherwise, the comparison is performed  
1558 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
1559 --------------------------------------------------------------------------------  
1560 -*/  
1561 -flag float32_eq_signaling( float32 a, float32 b )  
1562 -{  
1563 -  
1564 - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )  
1565 - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )  
1566 - ) {  
1567 - float_raise( float_flag_invalid );  
1568 - return 0;  
1569 - }  
1570 - return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );  
1571 -  
1572 -}  
1573 -  
1574 -/*  
1575 --------------------------------------------------------------------------------  
1576 -Returns 1 if the single-precision floating-point value `a' is less than or  
1577 -equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not  
1578 -cause an exception. Otherwise, the comparison is performed according to the  
1579 -IEC/IEEE Standard for Binary Floating-point Arithmetic.  
1580 --------------------------------------------------------------------------------  
1581 -*/  
1582 -flag float32_le_quiet( float32 a, float32 b )  
1583 -{  
1584 - flag aSign, bSign;  
1585 - //int16 aExp, bExp;  
1586 -  
1587 - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )  
1588 - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )  
1589 - ) {  
1590 - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {  
1591 - float_raise( float_flag_invalid );  
1592 - }  
1593 - return 0;  
1594 - }  
1595 - aSign = extractFloat32Sign( a );  
1596 - bSign = extractFloat32Sign( b );  
1597 - if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );  
1598 - return ( a == b ) || ( aSign ^ ( a < b ) );  
1599 -  
1600 -}  
1601 -  
1602 -/*  
1603 --------------------------------------------------------------------------------  
1604 -Returns 1 if the single-precision floating-point value `a' is less than  
1605 -the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an  
1606 -exception. Otherwise, the comparison is performed according to the IEC/IEEE  
1607 -Standard for Binary Floating-point Arithmetic.  
1608 --------------------------------------------------------------------------------  
1609 -*/  
1610 -flag float32_lt_quiet( float32 a, float32 b )  
1611 -{  
1612 - flag aSign, bSign;  
1613 -  
1614 - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )  
1615 - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )  
1616 - ) {  
1617 - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {  
1618 - float_raise( float_flag_invalid );  
1619 - }  
1620 - return 0;  
1621 - }  
1622 - aSign = extractFloat32Sign( a );  
1623 - bSign = extractFloat32Sign( b );  
1624 - if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );  
1625 - return ( a != b ) && ( aSign ^ ( a < b ) );  
1626 -  
1627 -}  
1628 -  
1629 -/*  
1630 --------------------------------------------------------------------------------  
1631 -Returns the result of converting the double-precision floating-point value  
1632 -`a' to the 32-bit two's complement integer format. The conversion is  
1633 -performed according to the IEC/IEEE Standard for Binary Floating-point  
1634 -Arithmetic---which means in particular that the conversion is rounded  
1635 -according to the current rounding mode. If `a' is a NaN, the largest  
1636 -positive integer is returned. Otherwise, if the conversion overflows, the  
1637 -largest integer with the same sign as `a' is returned.  
1638 --------------------------------------------------------------------------------  
1639 -*/  
1640 -int32 float64_to_int32( float64 a )  
1641 -{  
1642 - flag aSign;  
1643 - int16 aExp, shiftCount;  
1644 - bits64 aSig;  
1645 -  
1646 - aSig = extractFloat64Frac( a );  
1647 - aExp = extractFloat64Exp( a );  
1648 - aSign = extractFloat64Sign( a );  
1649 - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;  
1650 - if ( aExp ) aSig |= LIT64( 0x0010000000000000 );  
1651 - shiftCount = 0x42C - aExp;  
1652 - if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );  
1653 - return roundAndPackInt32( aSign, aSig );  
1654 -  
1655 -}  
1656 -  
1657 -/*  
1658 --------------------------------------------------------------------------------  
1659 -Returns the result of converting the double-precision floating-point value  
1660 -`a' to the 32-bit two's complement integer format. The conversion is  
1661 -performed according to the IEC/IEEE Standard for Binary Floating-point  
1662 -Arithmetic, except that the conversion is always rounded toward zero. If  
1663 -`a' is a NaN, the largest positive integer is returned. Otherwise, if the  
1664 -conversion overflows, the largest integer with the same sign as `a' is  
1665 -returned.  
1666 --------------------------------------------------------------------------------  
1667 -*/  
1668 -int32 float64_to_int32_round_to_zero( float64 a )  
1669 -{  
1670 - flag aSign;  
1671 - int16 aExp, shiftCount;  
1672 - bits64 aSig, savedASig;  
1673 - int32 z;  
1674 -  
1675 - aSig = extractFloat64Frac( a );  
1676 - aExp = extractFloat64Exp( a );  
1677 - aSign = extractFloat64Sign( a );  
1678 - shiftCount = 0x433 - aExp;  
1679 - if ( shiftCount < 21 ) {  
1680 - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;  
1681 - goto invalid;  
1682 - }  
1683 - else if ( 52 < shiftCount ) {  
1684 - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;  
1685 - return 0;  
1686 - }  
1687 - aSig |= LIT64( 0x0010000000000000 );  
1688 - savedASig = aSig;  
1689 - aSig >>= shiftCount;  
1690 - z = aSig;  
1691 - if ( aSign ) z = - z;  
1692 - if ( ( z < 0 ) ^ aSign ) {  
1693 - invalid:  
1694 - float_exception_flags |= float_flag_invalid;  
1695 - return aSign ? 0x80000000 : 0x7FFFFFFF;  
1696 - }  
1697 - if ( ( aSig<<shiftCount ) != savedASig ) {  
1698 - float_exception_flags |= float_flag_inexact;  
1699 - }  
1700 - return z;  
1701 -  
1702 -}  
1703 -  
1704 -/*  
1705 --------------------------------------------------------------------------------  
1706 -Returns the result of converting the double-precision floating-point value  
1707 -`a' to the 32-bit two's complement unsigned integer format. The conversion  
1708 -is performed according to the IEC/IEEE Standard for Binary Floating-point  
1709 -Arithmetic---which means in particular that the conversion is rounded  
1710 -according to the current rounding mode. If `a' is a NaN, the largest  
1711 -positive integer is returned. Otherwise, if the conversion overflows, the  
1712 -largest positive integer is returned.  
1713 --------------------------------------------------------------------------------  
1714 -*/  
1715 -int32 float64_to_uint32( float64 a )  
1716 -{  
1717 - flag aSign;  
1718 - int16 aExp, shiftCount;  
1719 - bits64 aSig;  
1720 -  
1721 - aSig = extractFloat64Frac( a );  
1722 - aExp = extractFloat64Exp( a );  
1723 - aSign = 0; //extractFloat64Sign( a );  
1724 - //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;  
1725 - if ( aExp ) aSig |= LIT64( 0x0010000000000000 );  
1726 - shiftCount = 0x42C - aExp;  
1727 - if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );  
1728 - return roundAndPackInt32( aSign, aSig );  
1729 -}  
1730 -  
1731 -/*  
1732 --------------------------------------------------------------------------------  
1733 -Returns the result of converting the double-precision floating-point value  
1734 -`a' to the 32-bit two's complement integer format. The conversion is  
1735 -performed according to the IEC/IEEE Standard for Binary Floating-point  
1736 -Arithmetic, except that the conversion is always rounded toward zero. If  
1737 -`a' is a NaN, the largest positive integer is returned. Otherwise, if the  
1738 -conversion overflows, the largest positive integer is returned.  
1739 --------------------------------------------------------------------------------  
1740 -*/  
1741 -int32 float64_to_uint32_round_to_zero( float64 a )  
1742 -{  
1743 - flag aSign;  
1744 - int16 aExp, shiftCount;  
1745 - bits64 aSig, savedASig;  
1746 - int32 z;  
1747 -  
1748 - aSig = extractFloat64Frac( a );  
1749 - aExp = extractFloat64Exp( a );  
1750 - aSign = extractFloat64Sign( a );  
1751 - shiftCount = 0x433 - aExp;  
1752 - if ( shiftCount < 21 ) {  
1753 - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;  
1754 - goto invalid;  
1755 - }  
1756 - else if ( 52 < shiftCount ) {  
1757 - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;  
1758 - return 0;  
1759 - }  
1760 - aSig |= LIT64( 0x0010000000000000 );  
1761 - savedASig = aSig;  
1762 - aSig >>= shiftCount;  
1763 - z = aSig;  
1764 - if ( aSign ) z = - z;  
1765 - if ( ( z < 0 ) ^ aSign ) {  
1766 - invalid:  
1767 - float_exception_flags |= float_flag_invalid;  
1768 - return aSign ? 0x80000000 : 0x7FFFFFFF;  
1769 - }  
1770 - if ( ( aSig<<shiftCount ) != savedASig ) {  
1771 - float_exception_flags |= float_flag_inexact;  
1772 - }  
1773 - return z;  
1774 -}  
1775 -  
1776 -/*  
1777 --------------------------------------------------------------------------------  
1778 -Returns the result of converting the double-precision floating-point value  
1779 -`a' to the single-precision floating-point format. The conversion is  
1780 -performed according to the IEC/IEEE Standard for Binary Floating-point  
1781 -Arithmetic.  
1782 --------------------------------------------------------------------------------  
1783 -*/  
1784 -float32 float64_to_float32( float64 a )  
1785 -{  
1786 - flag aSign;  
1787 - int16 aExp;  
1788 - bits64 aSig;  
1789 - bits32 zSig;  
1790 -  
1791 - aSig = extractFloat64Frac( a );  
1792 - aExp = extractFloat64Exp( a );  
1793 - aSign = extractFloat64Sign( a );  
1794 - if ( aExp == 0x7FF ) {  
1795 - if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );  
1796 - return packFloat32( aSign, 0xFF, 0 );  
1797 - }  
1798 - shift64RightJamming( aSig, 22, &aSig );  
1799 - zSig = aSig;  
1800 - if ( aExp || zSig ) {  
1801 - zSig |= 0x40000000;  
1802 - aExp -= 0x381;  
1803 - }  
1804 - return roundAndPackFloat32( aSign, aExp, zSig );  
1805 -  
1806 -}  
1807 -  
1808 -#ifdef FLOATX80  
1809 -  
1810 -/*  
1811 --------------------------------------------------------------------------------  
1812 -Returns the result of converting the double-precision floating-point value  
1813 -`a' to the extended double-precision floating-point format. The conversion  
1814 -is performed according to the IEC/IEEE Standard for Binary Floating-point  
1815 -Arithmetic.  
1816 --------------------------------------------------------------------------------  
1817 -*/  
1818 -floatx80 float64_to_floatx80( float64 a )  
1819 -{  
1820 - flag aSign;  
1821 - int16 aExp;  
1822 - bits64 aSig;  
1823 -  
1824 - aSig = extractFloat64Frac( a );  
1825 - aExp = extractFloat64Exp( a );  
1826 - aSign = extractFloat64Sign( a );  
1827 - if ( aExp == 0x7FF ) {  
1828 - if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );  
1829 - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );  
1830 - }  
1831 - if ( aExp == 0 ) {  
1832 - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );  
1833 - normalizeFloat64Subnormal( aSig, &aExp, &aSig );  
1834 - }  
1835 - return  
1836 - packFloatx80(  
1837 - aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );  
1838 -  
1839 -}  
1840 -  
1841 -#endif  
1842 -  
1843 -/*  
1844 --------------------------------------------------------------------------------  
1845 -Rounds the double-precision floating-point value `a' to an integer, and  
1846 -returns the result as a double-precision floating-point value. The  
1847 -operation is performed according to the IEC/IEEE Standard for Binary  
1848 -Floating-point Arithmetic.  
1849 --------------------------------------------------------------------------------  
1850 -*/  
1851 -float64 float64_round_to_int( float64 a )  
1852 -{  
1853 - flag aSign;  
1854 - int16 aExp;  
1855 - bits64 lastBitMask, roundBitsMask;  
1856 - int8 roundingMode;  
1857 - float64 z;  
1858 -  
1859 - aExp = extractFloat64Exp( a );  
1860 - if ( 0x433 <= aExp ) {  
1861 - if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {  
1862 - return propagateFloat64NaN( a, a );  
1863 - }  
1864 - return a;  
1865 - }  
1866 - if ( aExp <= 0x3FE ) {  
1867 - if ( (bits64) ( a<<1 ) == 0 ) return a;  
1868 - float_exception_flags |= float_flag_inexact;  
1869 - aSign = extractFloat64Sign( a );  
1870 - switch ( float_rounding_mode ) {  
1871 - case float_round_nearest_even:  
1872 - if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {  
1873 - return packFloat64( aSign, 0x3FF, 0 );  
1874 - }  
1875 - break;  
1876 - case float_round_down:  
1877 - return aSign ? LIT64( 0xBFF0000000000000 ) : 0;  
1878 - case float_round_up:  
1879 - return  
1880 - aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );  
1881 - }  
1882 - return packFloat64( aSign, 0, 0 );  
1883 - }  
1884 - lastBitMask = 1;  
1885 - lastBitMask <<= 0x433 - aExp;  
1886 - roundBitsMask = lastBitMask - 1;  
1887 - z = a;  
1888 - roundingMode = float_rounding_mode;  
1889 - if ( roundingMode == float_round_nearest_even ) {  
1890 - z += lastBitMask>>1;  
1891 - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;  
1892 - }  
1893 - else if ( roundingMode != float_round_to_zero ) {  
1894 - if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {  
1895 - z += roundBitsMask;  
1896 - }  
1897 - }  
1898 - z &= ~ roundBitsMask;  
1899 - if ( z != a ) float_exception_flags |= float_flag_inexact;  
1900 - return z;  
1901 -  
1902 -}  
1903 -  
1904 -/*  
1905 --------------------------------------------------------------------------------  
1906 -Returns the result of adding the absolute values of the double-precision  
1907 -floating-point values `a' and `b'. If `zSign' is true, the sum is negated  
1908 -before being returned. `zSign' is ignored if the result is a NaN. The  
1909 -addition is performed according to the IEC/IEEE Standard for Binary  
1910 -Floating-point Arithmetic.  
1911 --------------------------------------------------------------------------------  
1912 -*/  
1913 -static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )  
1914 -{  
1915 - int16 aExp, bExp, zExp;  
1916 - bits64 aSig, bSig, zSig;  
1917 - int16 expDiff;  
1918 -  
1919 - aSig = extractFloat64Frac( a );  
1920 - aExp = extractFloat64Exp( a );  
1921 - bSig = extractFloat64Frac( b );  
1922 - bExp = extractFloat64Exp( b );  
1923 - expDiff = aExp - bExp;  
1924 - aSig <<= 9;  
1925 - bSig <<= 9;  
1926 - if ( 0 < expDiff ) {  
1927 - if ( aExp == 0x7FF ) {  
1928 - if ( aSig ) return propagateFloat64NaN( a, b );  
1929 - return a;  
1930 - }  
1931 - if ( bExp == 0 ) {  
1932 - --expDiff;  
1933 - }  
1934 - else {  
1935 - bSig |= LIT64( 0x2000000000000000 );  
1936 - }  
1937 - shift64RightJamming( bSig, expDiff, &bSig );  
1938 - zExp = aExp;  
1939 - }  
1940 - else if ( expDiff < 0 ) {  
1941 - if ( bExp == 0x7FF ) {  
1942 - if ( bSig ) return propagateFloat64NaN( a, b );  
1943 - return packFloat64( zSign, 0x7FF, 0 );  
1944 - }  
1945 - if ( aExp == 0 ) {  
1946 - ++expDiff;  
1947 - }  
1948 - else {  
1949 - aSig |= LIT64( 0x2000000000000000 );  
1950 - }  
1951 - shift64RightJamming( aSig, - expDiff, &aSig );  
1952 - zExp = bExp;  
1953 - }  
1954 - else {  
1955 - if ( aExp == 0x7FF ) {  
1956 - if ( aSig | bSig ) return propagateFloat64NaN( a, b );  
1957 - return a;  
1958 - }  
1959 - if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );  
1960 - zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;  
1961 - zExp = aExp;  
1962 - goto roundAndPack;  
1963 - }  
1964 - aSig |= LIT64( 0x2000000000000000 );  
1965 - zSig = ( aSig + bSig )<<1;  
1966 - --zExp;  
1967 - if ( (sbits64) zSig < 0 ) {  
1968 - zSig = aSig + bSig;  
1969 - ++zExp;  
1970 - }  
1971 - roundAndPack:  
1972 - return roundAndPackFloat64( zSign, zExp, zSig );  
1973 -  
1974 -}  
1975 -  
1976 -/*  
1977 --------------------------------------------------------------------------------  
1978 -Returns the result of subtracting the absolute values of the double-  
1979 -precision floating-point values `a' and `b'. If `zSign' is true, the  
1980 -difference is negated before being returned. `zSign' is ignored if the  
1981 -result is a NaN. The subtraction is performed according to the IEC/IEEE  
1982 -Standard for Binary Floating-point Arithmetic.  
1983 --------------------------------------------------------------------------------  
1984 -*/  
1985 -static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )  
1986 -{  
1987 - int16 aExp, bExp, zExp;  
1988 - bits64 aSig, bSig, zSig;  
1989 - int16 expDiff;  
1990 -  
1991 - aSig = extractFloat64Frac( a );  
1992 - aExp = extractFloat64Exp( a );  
1993 - bSig = extractFloat64Frac( b );  
1994 - bExp = extractFloat64Exp( b );  
1995 - expDiff = aExp - bExp;  
1996 - aSig <<= 10;  
1997 - bSig <<= 10;  
1998 - if ( 0 < expDiff ) goto aExpBigger;  
1999 - if ( expDiff < 0 ) goto bExpBigger;  
2000 - if ( aExp == 0x7FF ) {  
2001 - if ( aSig | bSig ) return propagateFloat64NaN( a, b );  
2002 - float_raise( float_flag_invalid );  
2003 - return float64_default_nan;  
2004 - }  
2005 - if ( aExp == 0 ) {  
2006 - aExp = 1;  
2007 - bExp = 1;  
2008 - }  
2009 - if ( bSig < aSig ) goto aBigger;  
2010 - if ( aSig < bSig ) goto bBigger;  
2011 - return packFloat64( float_rounding_mode == float_round_down, 0, 0 );  
2012 - bExpBigger:  
2013 - if ( bExp == 0x7FF ) {  
2014 - if ( bSig ) return propagateFloat64NaN( a, b );  
2015 - return packFloat64( zSign ^ 1, 0x7FF, 0 );  
2016 - }  
2017 - if ( aExp == 0 ) {  
2018 - ++expDiff;  
2019 - }  
2020 - else {  
2021 - aSig |= LIT64( 0x4000000000000000 );  
2022 - }  
2023 - shift64RightJamming( aSig, - expDiff, &aSig );  
2024 - bSig |= LIT64( 0x4000000000000000 );  
2025 - bBigger:  
2026 - zSig = bSig - aSig;  
2027 - zExp = bExp;  
2028 - zSign ^= 1;  
2029 - goto normalizeRoundAndPack;  
2030 - aExpBigger:  
2031 - if ( aExp == 0x7FF ) {  
2032 - if ( aSig ) return propagateFloat64NaN( a, b );  
2033 - return a;  
2034 - }  
2035 - if ( bExp == 0 ) {  
2036 - --expDiff;  
2037 - }  
2038 - else {  
2039 - bSig |= LIT64( 0x4000000000000000 );  
2040 - }  
2041 - shift64RightJamming( bSig, expDiff, &bSig );  
2042 - aSig |= LIT64( 0x4000000000000000 );  
2043 - aBigger:  
2044 - zSig = aSig - bSig;  
2045 - zExp = aExp;  
2046 - normalizeRoundAndPack:  
2047 - --zExp;  
2048 - return normalizeRoundAndPackFloat64( zSign, zExp, zSig );  
2049 -  
2050 -}  
2051 -  
2052 -/*  
2053 --------------------------------------------------------------------------------  
2054 -Returns the result of adding the double-precision floating-point values `a'  
2055 -and `b'. The operation is performed according to the IEC/IEEE Standard for  
2056 -Binary Floating-point Arithmetic.  
2057 --------------------------------------------------------------------------------  
2058 -*/  
2059 -float64 float64_add( float64 a, float64 b )  
2060 -{  
2061 - flag aSign, bSign;  
2062 -  
2063 - aSign = extractFloat64Sign( a );  
2064 - bSign = extractFloat64Sign( b );  
2065 - if ( aSign == bSign ) {  
2066 - return addFloat64Sigs( a, b, aSign );  
2067 - }  
2068 - else {  
2069 - return subFloat64Sigs( a, b, aSign );  
2070 - }  
2071 -  
2072 -}  
2073 -  
2074 -/*  
2075 --------------------------------------------------------------------------------  
2076 -Returns the result of subtracting the double-precision floating-point values  
2077 -`a' and `b'. The operation is performed according to the IEC/IEEE Standard  
2078 -for Binary Floating-point Arithmetic.  
2079 --------------------------------------------------------------------------------  
2080 -*/  
2081 -float64 float64_sub( float64 a, float64 b )  
2082 -{  
2083 - flag aSign, bSign;  
2084 -  
2085 - aSign = extractFloat64Sign( a );  
2086 - bSign = extractFloat64Sign( b );  
2087 - if ( aSign == bSign ) {  
2088 - return subFloat64Sigs( a, b, aSign );  
2089 - }  
2090 - else {  
2091 - return addFloat64Sigs( a, b, aSign );  
2092 - }  
2093 -  
2094 -}  
2095 -  
2096 -/*  
2097 --------------------------------------------------------------------------------  
2098 -Returns the result of multiplying the double-precision floating-point values  
2099 -`a' and `b'. The operation is performed according to the IEC/IEEE Standard  
2100 -for Binary Floating-point Arithmetic.  
2101 --------------------------------------------------------------------------------  
2102 -*/  
2103 -float64 float64_mul( float64 a, float64 b )  
2104 -{  
2105 - flag aSign, bSign, zSign;  
2106 - int16 aExp, bExp, zExp;  
2107 - bits64 aSig, bSig, zSig0, zSig1;  
2108 -  
2109 - aSig = extractFloat64Frac( a );  
2110 - aExp = extractFloat64Exp( a );  
2111 - aSign = extractFloat64Sign( a );  
2112 - bSig = extractFloat64Frac( b );  
2113 - bExp = extractFloat64Exp( b );  
2114 - bSign = extractFloat64Sign( b );  
2115 - zSign = aSign ^ bSign;  
2116 - if ( aExp == 0x7FF ) {  
2117 - if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {  
2118 - return propagateFloat64NaN( a, b );  
2119 - }  
2120 - if ( ( bExp | bSig ) == 0 ) {  
2121 - float_raise( float_flag_invalid );  
2122 - return float64_default_nan;  
2123 - }  
2124 - return packFloat64( zSign, 0x7FF, 0 );  
2125 - }  
2126 - if ( bExp == 0x7FF ) {  
2127 - if ( bSig ) return propagateFloat64NaN( a, b );  
2128 - if ( ( aExp | aSig ) == 0 ) {  
2129 - float_raise( float_flag_invalid );  
2130 - return float64_default_nan;  
2131 - }  
2132 - return packFloat64( zSign, 0x7FF, 0 );  
2133 - }  
2134 - if ( aExp == 0 ) {  
2135 - if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );  
2136 - normalizeFloat64Subnormal( aSig, &aExp, &aSig );  
2137 - }  
2138 - if ( bExp == 0 ) {  
2139 - if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );  
2140 - normalizeFloat64Subnormal( bSig, &bExp, &bSig );  
2141 - }  
2142 - zExp = aExp + bExp - 0x3FF;  
2143 - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;  
2144 - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;  
2145 - mul64To128( aSig, bSig, &zSig0, &zSig1 );  
2146 - zSig0 |= ( zSig1 != 0 );  
2147 - if ( 0 <= (sbits64) ( zSig0<<1 ) ) {  
2148 - zSig0 <<= 1;  
2149 - --zExp;  
2150 - }  
2151 - return roundAndPackFloat64( zSign, zExp, zSig0 );  
2152 -  
2153 -}  
2154 -  
2155 -/*  
2156 --------------------------------------------------------------------------------  
2157 -Returns the result of dividing the double-precision floating-point value `a'  
2158 -by the corresponding value `b'. The operation is performed according to  
2159 -the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
2160 --------------------------------------------------------------------------------  
2161 -*/  
2162 -float64 float64_div( float64 a, float64 b )  
2163 -{  
2164 - flag aSign, bSign, zSign;  
2165 - int16 aExp, bExp, zExp;  
2166 - bits64 aSig, bSig, zSig;  
2167 - bits64 rem0, rem1;  
2168 - bits64 term0, term1;  
2169 -  
2170 - aSig = extractFloat64Frac( a );  
2171 - aExp = extractFloat64Exp( a );  
2172 - aSign = extractFloat64Sign( a );  
2173 - bSig = extractFloat64Frac( b );  
2174 - bExp = extractFloat64Exp( b );  
2175 - bSign = extractFloat64Sign( b );  
2176 - zSign = aSign ^ bSign;  
2177 - if ( aExp == 0x7FF ) {  
2178 - if ( aSig ) return propagateFloat64NaN( a, b );  
2179 - if ( bExp == 0x7FF ) {  
2180 - if ( bSig ) return propagateFloat64NaN( a, b );  
2181 - float_raise( float_flag_invalid );  
2182 - return float64_default_nan;  
2183 - }  
2184 - return packFloat64( zSign, 0x7FF, 0 );  
2185 - }  
2186 - if ( bExp == 0x7FF ) {  
2187 - if ( bSig ) return propagateFloat64NaN( a, b );  
2188 - return packFloat64( zSign, 0, 0 );  
2189 - }  
2190 - if ( bExp == 0 ) {  
2191 - if ( bSig == 0 ) {  
2192 - if ( ( aExp | aSig ) == 0 ) {  
2193 - float_raise( float_flag_invalid );  
2194 - return float64_default_nan;  
2195 - }  
2196 - float_raise( float_flag_divbyzero );  
2197 - return packFloat64( zSign, 0x7FF, 0 );  
2198 - }  
2199 - normalizeFloat64Subnormal( bSig, &bExp, &bSig );  
2200 - }  
2201 - if ( aExp == 0 ) {  
2202 - if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );  
2203 - normalizeFloat64Subnormal( aSig, &aExp, &aSig );  
2204 - }  
2205 - zExp = aExp - bExp + 0x3FD;  
2206 - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;  
2207 - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;  
2208 - if ( bSig <= ( aSig + aSig ) ) {  
2209 - aSig >>= 1;  
2210 - ++zExp;  
2211 - }  
2212 - zSig = estimateDiv128To64( aSig, 0, bSig );  
2213 - if ( ( zSig & 0x1FF ) <= 2 ) {  
2214 - mul64To128( bSig, zSig, &term0, &term1 );  
2215 - sub128( aSig, 0, term0, term1, &rem0, &rem1 );  
2216 - while ( (sbits64) rem0 < 0 ) {  
2217 - --zSig;  
2218 - add128( rem0, rem1, 0, bSig, &rem0, &rem1 );  
2219 - }  
2220 - zSig |= ( rem1 != 0 );  
2221 - }  
2222 - return roundAndPackFloat64( zSign, zExp, zSig );  
2223 -  
2224 -}  
2225 -  
2226 -/*  
2227 --------------------------------------------------------------------------------  
2228 -Returns the remainder of the double-precision floating-point value `a'  
2229 -with respect to the corresponding value `b'. The operation is performed  
2230 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
2231 --------------------------------------------------------------------------------  
2232 -*/  
2233 -float64 float64_rem( float64 a, float64 b )  
2234 -{  
2235 - flag aSign, bSign, zSign;  
2236 - int16 aExp, bExp, expDiff;  
2237 - bits64 aSig, bSig;  
2238 - bits64 q, alternateASig;  
2239 - sbits64 sigMean;  
2240 -  
2241 - aSig = extractFloat64Frac( a );  
2242 - aExp = extractFloat64Exp( a );  
2243 - aSign = extractFloat64Sign( a );  
2244 - bSig = extractFloat64Frac( b );  
2245 - bExp = extractFloat64Exp( b );  
2246 - bSign = extractFloat64Sign( b );  
2247 - if ( aExp == 0x7FF ) {  
2248 - if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {  
2249 - return propagateFloat64NaN( a, b );  
2250 - }  
2251 - float_raise( float_flag_invalid );  
2252 - return float64_default_nan;  
2253 - }  
2254 - if ( bExp == 0x7FF ) {  
2255 - if ( bSig ) return propagateFloat64NaN( a, b );  
2256 - return a;  
2257 - }  
2258 - if ( bExp == 0 ) {  
2259 - if ( bSig == 0 ) {  
2260 - float_raise( float_flag_invalid );  
2261 - return float64_default_nan;  
2262 - }  
2263 - normalizeFloat64Subnormal( bSig, &bExp, &bSig );  
2264 - }  
2265 - if ( aExp == 0 ) {  
2266 - if ( aSig == 0 ) return a;  
2267 - normalizeFloat64Subnormal( aSig, &aExp, &aSig );  
2268 - }  
2269 - expDiff = aExp - bExp;  
2270 - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;  
2271 - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;  
2272 - if ( expDiff < 0 ) {  
2273 - if ( expDiff < -1 ) return a;  
2274 - aSig >>= 1;  
2275 - }  
2276 - q = ( bSig <= aSig );  
2277 - if ( q ) aSig -= bSig;  
2278 - expDiff -= 64;  
2279 - while ( 0 < expDiff ) {  
2280 - q = estimateDiv128To64( aSig, 0, bSig );  
2281 - q = ( 2 < q ) ? q - 2 : 0;  
2282 - aSig = - ( ( bSig>>2 ) * q );  
2283 - expDiff -= 62;  
2284 - }  
2285 - expDiff += 64;  
2286 - if ( 0 < expDiff ) {  
2287 - q = estimateDiv128To64( aSig, 0, bSig );  
2288 - q = ( 2 < q ) ? q - 2 : 0;  
2289 - q >>= 64 - expDiff;  
2290 - bSig >>= 2;  
2291 - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;  
2292 - }  
2293 - else {  
2294 - aSig >>= 2;  
2295 - bSig >>= 2;  
2296 - }  
2297 - do {  
2298 - alternateASig = aSig;  
2299 - ++q;  
2300 - aSig -= bSig;  
2301 - } while ( 0 <= (sbits64) aSig );  
2302 - sigMean = aSig + alternateASig;  
2303 - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {  
2304 - aSig = alternateASig;  
2305 - }  
2306 - zSign = ( (sbits64) aSig < 0 );  
2307 - if ( zSign ) aSig = - aSig;  
2308 - return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );  
2309 -  
2310 -}  
2311 -  
2312 -/*  
2313 --------------------------------------------------------------------------------  
2314 -Returns the square root of the double-precision floating-point value `a'.  
2315 -The operation is performed according to the IEC/IEEE Standard for Binary  
2316 -Floating-point Arithmetic.  
2317 --------------------------------------------------------------------------------  
2318 -*/  
2319 -float64 float64_sqrt( float64 a )  
2320 -{  
2321 - flag aSign;  
2322 - int16 aExp, zExp;  
2323 - bits64 aSig, zSig;  
2324 - bits64 rem0, rem1, term0, term1; //, shiftedRem;  
2325 - //float64 z;  
2326 -  
2327 - aSig = extractFloat64Frac( a );  
2328 - aExp = extractFloat64Exp( a );  
2329 - aSign = extractFloat64Sign( a );  
2330 - if ( aExp == 0x7FF ) {  
2331 - if ( aSig ) return propagateFloat64NaN( a, a );  
2332 - if ( ! aSign ) return a;  
2333 - float_raise( float_flag_invalid );  
2334 - return float64_default_nan;  
2335 - }  
2336 - if ( aSign ) {  
2337 - if ( ( aExp | aSig ) == 0 ) return a;  
2338 - float_raise( float_flag_invalid );  
2339 - return float64_default_nan;  
2340 - }  
2341 - if ( aExp == 0 ) {  
2342 - if ( aSig == 0 ) return 0;  
2343 - normalizeFloat64Subnormal( aSig, &aExp, &aSig );  
2344 - }  
2345 - zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;  
2346 - aSig |= LIT64( 0x0010000000000000 );  
2347 - zSig = estimateSqrt32( aExp, aSig>>21 );  
2348 - zSig <<= 31;  
2349 - aSig <<= 9 - ( aExp & 1 );  
2350 - zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;  
2351 - if ( ( zSig & 0x3FF ) <= 5 ) {  
2352 - if ( zSig < 2 ) {  
2353 - zSig = LIT64( 0xFFFFFFFFFFFFFFFF );  
2354 - }  
2355 - else {  
2356 - aSig <<= 2;  
2357 - mul64To128( zSig, zSig, &term0, &term1 );  
2358 - sub128( aSig, 0, term0, term1, &rem0, &rem1 );  
2359 - while ( (sbits64) rem0 < 0 ) {  
2360 - --zSig;  
2361 - shortShift128Left( 0, zSig, 1, &term0, &term1 );  
2362 - term1 |= 1;  
2363 - add128( rem0, rem1, term0, term1, &rem0, &rem1 );  
2364 - }  
2365 - zSig |= ( ( rem0 | rem1 ) != 0 );  
2366 - }  
2367 - }  
2368 - shift64RightJamming( zSig, 1, &zSig );  
2369 - return roundAndPackFloat64( 0, zExp, zSig );  
2370 -  
2371 -}  
2372 -  
2373 -/*  
2374 --------------------------------------------------------------------------------  
2375 -Returns 1 if the double-precision floating-point value `a' is equal to the  
2376 -corresponding value `b', and 0 otherwise. The comparison is performed  
2377 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
2378 --------------------------------------------------------------------------------  
2379 -*/  
2380 -flag float64_eq( float64 a, float64 b )  
2381 -{  
2382 -  
2383 - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )  
2384 - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )  
2385 - ) {  
2386 - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {  
2387 - float_raise( float_flag_invalid );  
2388 - }  
2389 - return 0;  
2390 - }  
2391 - return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );  
2392 -  
2393 -}  
2394 -  
2395 -/*  
2396 --------------------------------------------------------------------------------  
2397 -Returns 1 if the double-precision floating-point value `a' is less than or  
2398 -equal to the corresponding value `b', and 0 otherwise. The comparison is  
2399 -performed according to the IEC/IEEE Standard for Binary Floating-point  
2400 -Arithmetic.  
2401 --------------------------------------------------------------------------------  
2402 -*/  
2403 -flag float64_le( float64 a, float64 b )  
2404 -{  
2405 - flag aSign, bSign;  
2406 -  
2407 - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )  
2408 - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )  
2409 - ) {  
2410 - float_raise( float_flag_invalid );  
2411 - return 0;  
2412 - }  
2413 - aSign = extractFloat64Sign( a );  
2414 - bSign = extractFloat64Sign( b );  
2415 - if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );  
2416 - return ( a == b ) || ( aSign ^ ( a < b ) );  
2417 -  
2418 -}  
2419 -  
2420 -/*  
2421 --------------------------------------------------------------------------------  
2422 -Returns 1 if the double-precision floating-point value `a' is less than  
2423 -the corresponding value `b', and 0 otherwise. The comparison is performed  
2424 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
2425 --------------------------------------------------------------------------------  
2426 -*/  
2427 -flag float64_lt( float64 a, float64 b )  
2428 -{  
2429 - flag aSign, bSign;  
2430 -  
2431 - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )  
2432 - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )  
2433 - ) {  
2434 - float_raise( float_flag_invalid );  
2435 - return 0;  
2436 - }  
2437 - aSign = extractFloat64Sign( a );  
2438 - bSign = extractFloat64Sign( b );  
2439 - if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );  
2440 - return ( a != b ) && ( aSign ^ ( a < b ) );  
2441 -  
2442 -}  
2443 -  
2444 -/*  
2445 --------------------------------------------------------------------------------  
2446 -Returns 1 if the double-precision floating-point value `a' is equal to the  
2447 -corresponding value `b', and 0 otherwise. The invalid exception is raised  
2448 -if either operand is a NaN. Otherwise, the comparison is performed  
2449 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
2450 --------------------------------------------------------------------------------  
2451 -*/  
2452 -flag float64_eq_signaling( float64 a, float64 b )  
2453 -{  
2454 -  
2455 - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )  
2456 - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )  
2457 - ) {  
2458 - float_raise( float_flag_invalid );  
2459 - return 0;  
2460 - }  
2461 - return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );  
2462 -  
2463 -}  
2464 -  
2465 -/*  
2466 --------------------------------------------------------------------------------  
2467 -Returns 1 if the double-precision floating-point value `a' is less than or  
2468 -equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not  
2469 -cause an exception. Otherwise, the comparison is performed according to the  
2470 -IEC/IEEE Standard for Binary Floating-point Arithmetic.  
2471 --------------------------------------------------------------------------------  
2472 -*/  
2473 -flag float64_le_quiet( float64 a, float64 b )  
2474 -{  
2475 - flag aSign, bSign;  
2476 - //int16 aExp, bExp;  
2477 -  
2478 - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )  
2479 - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )  
2480 - ) {  
2481 - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {  
2482 - float_raise( float_flag_invalid );  
2483 - }  
2484 - return 0;  
2485 - }  
2486 - aSign = extractFloat64Sign( a );  
2487 - bSign = extractFloat64Sign( b );  
2488 - if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );  
2489 - return ( a == b ) || ( aSign ^ ( a < b ) );  
2490 -  
2491 -}  
2492 -  
2493 -/*  
2494 --------------------------------------------------------------------------------  
2495 -Returns 1 if the double-precision floating-point value `a' is less than  
2496 -the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an  
2497 -exception. Otherwise, the comparison is performed according to the IEC/IEEE  
2498 -Standard for Binary Floating-point Arithmetic.  
2499 --------------------------------------------------------------------------------  
2500 -*/  
2501 -flag float64_lt_quiet( float64 a, float64 b )  
2502 -{  
2503 - flag aSign, bSign;  
2504 -  
2505 - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )  
2506 - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )  
2507 - ) {  
2508 - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {  
2509 - float_raise( float_flag_invalid );  
2510 - }  
2511 - return 0;  
2512 - }  
2513 - aSign = extractFloat64Sign( a );  
2514 - bSign = extractFloat64Sign( b );  
2515 - if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );  
2516 - return ( a != b ) && ( aSign ^ ( a < b ) );  
2517 -  
2518 -}  
2519 -  
2520 -#ifdef FLOATX80  
2521 -  
2522 -/*  
2523 --------------------------------------------------------------------------------  
2524 -Returns the result of converting the extended double-precision floating-  
2525 -point value `a' to the 32-bit two's complement integer format. The  
2526 -conversion is performed according to the IEC/IEEE Standard for Binary  
2527 -Floating-point Arithmetic---which means in particular that the conversion  
2528 -is rounded according to the current rounding mode. If `a' is a NaN, the  
2529 -largest positive integer is returned. Otherwise, if the conversion  
2530 -overflows, the largest integer with the same sign as `a' is returned.  
2531 --------------------------------------------------------------------------------  
2532 -*/  
2533 -int32 floatx80_to_int32( floatx80 a )  
2534 -{  
2535 - flag aSign;  
2536 - int32 aExp, shiftCount;  
2537 - bits64 aSig;  
2538 -  
2539 - aSig = extractFloatx80Frac( a );  
2540 - aExp = extractFloatx80Exp( a );  
2541 - aSign = extractFloatx80Sign( a );  
2542 - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;  
2543 - shiftCount = 0x4037 - aExp;  
2544 - if ( shiftCount <= 0 ) shiftCount = 1;  
2545 - shift64RightJamming( aSig, shiftCount, &aSig );  
2546 - return roundAndPackInt32( aSign, aSig );  
2547 -  
2548 -}  
2549 -  
2550 -/*  
2551 --------------------------------------------------------------------------------  
2552 -Returns the result of converting the extended double-precision floating-  
2553 -point value `a' to the 32-bit two's complement integer format. The  
2554 -conversion is performed according to the IEC/IEEE Standard for Binary  
2555 -Floating-point Arithmetic, except that the conversion is always rounded  
2556 -toward zero. If `a' is a NaN, the largest positive integer is returned.  
2557 -Otherwise, if the conversion overflows, the largest integer with the same  
2558 -sign as `a' is returned.  
2559 --------------------------------------------------------------------------------  
2560 -*/  
2561 -int32 floatx80_to_int32_round_to_zero( floatx80 a )  
2562 -{  
2563 - flag aSign;  
2564 - int32 aExp, shiftCount;  
2565 - bits64 aSig, savedASig;  
2566 - int32 z;  
2567 -  
2568 - aSig = extractFloatx80Frac( a );  
2569 - aExp = extractFloatx80Exp( a );  
2570 - aSign = extractFloatx80Sign( a );  
2571 - shiftCount = 0x403E - aExp;  
2572 - if ( shiftCount < 32 ) {  
2573 - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;  
2574 - goto invalid;  
2575 - }  
2576 - else if ( 63 < shiftCount ) {  
2577 - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;  
2578 - return 0;  
2579 - }  
2580 - savedASig = aSig;  
2581 - aSig >>= shiftCount;  
2582 - z = aSig;  
2583 - if ( aSign ) z = - z;  
2584 - if ( ( z < 0 ) ^ aSign ) {  
2585 - invalid:  
2586 - float_exception_flags |= float_flag_invalid;  
2587 - return aSign ? 0x80000000 : 0x7FFFFFFF;  
2588 - }  
2589 - if ( ( aSig<<shiftCount ) != savedASig ) {  
2590 - float_exception_flags |= float_flag_inexact;  
2591 - }  
2592 - return z;  
2593 -  
2594 -}  
2595 -  
2596 -/*  
2597 --------------------------------------------------------------------------------  
2598 -Returns the result of converting the extended double-precision floating-  
2599 -point value `a' to the single-precision floating-point format. The  
2600 -conversion is performed according to the IEC/IEEE Standard for Binary  
2601 -Floating-point Arithmetic.  
2602 --------------------------------------------------------------------------------  
2603 -*/  
2604 -float32 floatx80_to_float32( floatx80 a )  
2605 -{  
2606 - flag aSign;  
2607 - int32 aExp;  
2608 - bits64 aSig;  
2609 -  
2610 - aSig = extractFloatx80Frac( a );  
2611 - aExp = extractFloatx80Exp( a );  
2612 - aSign = extractFloatx80Sign( a );  
2613 - if ( aExp == 0x7FFF ) {  
2614 - if ( (bits64) ( aSig<<1 ) ) {  
2615 - return commonNaNToFloat32( floatx80ToCommonNaN( a ) );  
2616 - }  
2617 - return packFloat32( aSign, 0xFF, 0 );  
2618 - }  
2619 - shift64RightJamming( aSig, 33, &aSig );  
2620 - if ( aExp || aSig ) aExp -= 0x3F81;  
2621 - return roundAndPackFloat32( aSign, aExp, aSig );  
2622 -  
2623 -}  
2624 -  
2625 -/*  
2626 --------------------------------------------------------------------------------  
2627 -Returns the result of converting the extended double-precision floating-  
2628 -point value `a' to the double-precision floating-point format. The  
2629 -conversion is performed according to the IEC/IEEE Standard for Binary  
2630 -Floating-point Arithmetic.  
2631 --------------------------------------------------------------------------------  
2632 -*/  
2633 -float64 floatx80_to_float64( floatx80 a )  
2634 -{  
2635 - flag aSign;  
2636 - int32 aExp;  
2637 - bits64 aSig, zSig;  
2638 -  
2639 - aSig = extractFloatx80Frac( a );  
2640 - aExp = extractFloatx80Exp( a );  
2641 - aSign = extractFloatx80Sign( a );  
2642 - if ( aExp == 0x7FFF ) {  
2643 - if ( (bits64) ( aSig<<1 ) ) {  
2644 - return commonNaNToFloat64( floatx80ToCommonNaN( a ) );  
2645 - }  
2646 - return packFloat64( aSign, 0x7FF, 0 );  
2647 - }  
2648 - shift64RightJamming( aSig, 1, &zSig );  
2649 - if ( aExp || aSig ) aExp -= 0x3C01;  
2650 - return roundAndPackFloat64( aSign, aExp, zSig );  
2651 -  
2652 -}  
2653 -  
2654 -/*  
2655 --------------------------------------------------------------------------------  
2656 -Rounds the extended double-precision floating-point value `a' to an integer,  
2657 -and returns the result as an extended quadruple-precision floating-point  
2658 -value. The operation is performed according to the IEC/IEEE Standard for  
2659 -Binary Floating-point Arithmetic.  
2660 --------------------------------------------------------------------------------  
2661 -*/  
2662 -floatx80 floatx80_round_to_int( floatx80 a )  
2663 -{  
2664 - flag aSign;  
2665 - int32 aExp;  
2666 - bits64 lastBitMask, roundBitsMask;  
2667 - int8 roundingMode;  
2668 - floatx80 z;  
2669 -  
2670 - aExp = extractFloatx80Exp( a );  
2671 - if ( 0x403E <= aExp ) {  
2672 - if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {  
2673 - return propagateFloatx80NaN( a, a );  
2674 - }  
2675 - return a;  
2676 - }  
2677 - if ( aExp <= 0x3FFE ) {  
2678 - if ( ( aExp == 0 )  
2679 - && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {  
2680 - return a;  
2681 - }  
2682 - float_exception_flags |= float_flag_inexact;  
2683 - aSign = extractFloatx80Sign( a );  
2684 - switch ( float_rounding_mode ) {  
2685 - case float_round_nearest_even:  
2686 - if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )  
2687 - ) {  
2688 - return  
2689 - packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );  
2690 - }  
2691 - break;  
2692 - case float_round_down:  
2693 - return  
2694 - aSign ?  
2695 - packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )  
2696 - : packFloatx80( 0, 0, 0 );  
2697 - case float_round_up:  
2698 - return  
2699 - aSign ? packFloatx80( 1, 0, 0 )  
2700 - : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );  
2701 - }  
2702 - return packFloatx80( aSign, 0, 0 );  
2703 - }  
2704 - lastBitMask = 1;  
2705 - lastBitMask <<= 0x403E - aExp;  
2706 - roundBitsMask = lastBitMask - 1;  
2707 - z = a;  
2708 - roundingMode = float_rounding_mode;  
2709 - if ( roundingMode == float_round_nearest_even ) {  
2710 - z.low += lastBitMask>>1;  
2711 - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;  
2712 - }  
2713 - else if ( roundingMode != float_round_to_zero ) {  
2714 - if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {  
2715 - z.low += roundBitsMask;  
2716 - }  
2717 - }  
2718 - z.low &= ~ roundBitsMask;  
2719 - if ( z.low == 0 ) {  
2720 - ++z.high;  
2721 - z.low = LIT64( 0x8000000000000000 );  
2722 - }  
2723 - if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;  
2724 - return z;  
2725 -  
2726 -}  
2727 -  
2728 -/*  
2729 --------------------------------------------------------------------------------  
2730 -Returns the result of adding the absolute values of the extended double-  
2731 -precision floating-point values `a' and `b'. If `zSign' is true, the sum is  
2732 -negated before being returned. `zSign' is ignored if the result is a NaN.  
2733 -The addition is performed according to the IEC/IEEE Standard for Binary  
2734 -Floating-point Arithmetic.  
2735 --------------------------------------------------------------------------------  
2736 -*/  
2737 -static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )  
2738 -{  
2739 - int32 aExp, bExp, zExp;  
2740 - bits64 aSig, bSig, zSig0, zSig1;  
2741 - int32 expDiff;  
2742 -  
2743 - aSig = extractFloatx80Frac( a );  
2744 - aExp = extractFloatx80Exp( a );  
2745 - bSig = extractFloatx80Frac( b );  
2746 - bExp = extractFloatx80Exp( b );  
2747 - expDiff = aExp - bExp;  
2748 - if ( 0 < expDiff ) {  
2749 - if ( aExp == 0x7FFF ) {  
2750 - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );  
2751 - return a;  
2752 - }  
2753 - if ( bExp == 0 ) --expDiff;  
2754 - shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );  
2755 - zExp = aExp;  
2756 - }  
2757 - else if ( expDiff < 0 ) {  
2758 - if ( bExp == 0x7FFF ) {  
2759 - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );  
2760 - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );  
2761 - }  
2762 - if ( aExp == 0 ) ++expDiff;  
2763 - shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );  
2764 - zExp = bExp;  
2765 - }  
2766 - else {  
2767 - if ( aExp == 0x7FFF ) {  
2768 - if ( (bits64) ( ( aSig | bSig )<<1 ) ) {  
2769 - return propagateFloatx80NaN( a, b );  
2770 - }  
2771 - return a;  
2772 - }  
2773 - zSig1 = 0;  
2774 - zSig0 = aSig + bSig;  
2775 - if ( aExp == 0 ) {  
2776 - normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );  
2777 - goto roundAndPack;  
2778 - }  
2779 - zExp = aExp;  
2780 - goto shiftRight1;  
2781 - }  
2782 -  
2783 - zSig0 = aSig + bSig;  
2784 -  
2785 - if ( (sbits64) zSig0 < 0 ) goto roundAndPack;  
2786 - shiftRight1:  
2787 - shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );  
2788 - zSig0 |= LIT64( 0x8000000000000000 );  
2789 - ++zExp;  
2790 - roundAndPack:  
2791 - return  
2792 - roundAndPackFloatx80(  
2793 - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );  
2794 -  
2795 -}  
2796 -  
2797 -/*  
2798 --------------------------------------------------------------------------------  
2799 -Returns the result of subtracting the absolute values of the extended  
2800 -double-precision floating-point values `a' and `b'. If `zSign' is true,  
2801 -the difference is negated before being returned. `zSign' is ignored if the  
2802 -result is a NaN. The subtraction is performed according to the IEC/IEEE  
2803 -Standard for Binary Floating-point Arithmetic.  
2804 --------------------------------------------------------------------------------  
2805 -*/  
2806 -static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )  
2807 -{  
2808 - int32 aExp, bExp, zExp;  
2809 - bits64 aSig, bSig, zSig0, zSig1;  
2810 - int32 expDiff;  
2811 - floatx80 z;  
2812 -  
2813 - aSig = extractFloatx80Frac( a );  
2814 - aExp = extractFloatx80Exp( a );  
2815 - bSig = extractFloatx80Frac( b );  
2816 - bExp = extractFloatx80Exp( b );  
2817 - expDiff = aExp - bExp;  
2818 - if ( 0 < expDiff ) goto aExpBigger;  
2819 - if ( expDiff < 0 ) goto bExpBigger;  
2820 - if ( aExp == 0x7FFF ) {  
2821 - if ( (bits64) ( ( aSig | bSig )<<1 ) ) {  
2822 - return propagateFloatx80NaN( a, b );  
2823 - }  
2824 - float_raise( float_flag_invalid );  
2825 - z.low = floatx80_default_nan_low;  
2826 - z.high = floatx80_default_nan_high;  
2827 - return z;  
2828 - }  
2829 - if ( aExp == 0 ) {  
2830 - aExp = 1;  
2831 - bExp = 1;  
2832 - }  
2833 - zSig1 = 0;  
2834 - if ( bSig < aSig ) goto aBigger;  
2835 - if ( aSig < bSig ) goto bBigger;  
2836 - return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );  
2837 - bExpBigger:  
2838 - if ( bExp == 0x7FFF ) {  
2839 - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );  
2840 - return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );  
2841 - }  
2842 - if ( aExp == 0 ) ++expDiff;  
2843 - shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );  
2844 - bBigger:  
2845 - sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );  
2846 - zExp = bExp;  
2847 - zSign ^= 1;  
2848 - goto normalizeRoundAndPack;  
2849 - aExpBigger:  
2850 - if ( aExp == 0x7FFF ) {  
2851 - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );  
2852 - return a;  
2853 - }  
2854 - if ( bExp == 0 ) --expDiff;  
2855 - shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );  
2856 - aBigger:  
2857 - sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );  
2858 - zExp = aExp;  
2859 - normalizeRoundAndPack:  
2860 - return  
2861 - normalizeRoundAndPackFloatx80(  
2862 - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );  
2863 -  
2864 -}  
2865 -  
2866 -/*  
2867 --------------------------------------------------------------------------------  
2868 -Returns the result of adding the extended double-precision floating-point  
2869 -values `a' and `b'. The operation is performed according to the IEC/IEEE  
2870 -Standard for Binary Floating-point Arithmetic.  
2871 --------------------------------------------------------------------------------  
2872 -*/  
2873 -floatx80 floatx80_add( floatx80 a, floatx80 b )  
2874 -{  
2875 - flag aSign, bSign;  
2876 -  
2877 - aSign = extractFloatx80Sign( a );  
2878 - bSign = extractFloatx80Sign( b );  
2879 - if ( aSign == bSign ) {  
2880 - return addFloatx80Sigs( a, b, aSign );  
2881 - }  
2882 - else {  
2883 - return subFloatx80Sigs( a, b, aSign );  
2884 - }  
2885 -  
2886 -}  
2887 -  
2888 -/*  
2889 --------------------------------------------------------------------------------  
2890 -Returns the result of subtracting the extended double-precision floating-  
2891 -point values `a' and `b'. The operation is performed according to the  
2892 -IEC/IEEE Standard for Binary Floating-point Arithmetic.  
2893 --------------------------------------------------------------------------------  
2894 -*/  
2895 -floatx80 floatx80_sub( floatx80 a, floatx80 b )  
2896 -{  
2897 - flag aSign, bSign;  
2898 -  
2899 - aSign = extractFloatx80Sign( a );  
2900 - bSign = extractFloatx80Sign( b );  
2901 - if ( aSign == bSign ) {  
2902 - return subFloatx80Sigs( a, b, aSign );  
2903 - }  
2904 - else {  
2905 - return addFloatx80Sigs( a, b, aSign );  
2906 - }  
2907 -  
2908 -}  
2909 -  
2910 -/*  
2911 --------------------------------------------------------------------------------  
2912 -Returns the result of multiplying the extended double-precision floating-  
2913 -point values `a' and `b'. The operation is performed according to the  
2914 -IEC/IEEE Standard for Binary Floating-point Arithmetic.  
2915 --------------------------------------------------------------------------------  
2916 -*/  
2917 -floatx80 floatx80_mul( floatx80 a, floatx80 b )  
2918 -{  
2919 - flag aSign, bSign, zSign;  
2920 - int32 aExp, bExp, zExp;  
2921 - bits64 aSig, bSig, zSig0, zSig1;  
2922 - floatx80 z;  
2923 -  
2924 - aSig = extractFloatx80Frac( a );  
2925 - aExp = extractFloatx80Exp( a );  
2926 - aSign = extractFloatx80Sign( a );  
2927 - bSig = extractFloatx80Frac( b );  
2928 - bExp = extractFloatx80Exp( b );  
2929 - bSign = extractFloatx80Sign( b );  
2930 - zSign = aSign ^ bSign;  
2931 - if ( aExp == 0x7FFF ) {  
2932 - if ( (bits64) ( aSig<<1 )  
2933 - || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {  
2934 - return propagateFloatx80NaN( a, b );  
2935 - }  
2936 - if ( ( bExp | bSig ) == 0 ) goto invalid;  
2937 - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );  
2938 - }  
2939 - if ( bExp == 0x7FFF ) {  
2940 - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );  
2941 - if ( ( aExp | aSig ) == 0 ) {  
2942 - invalid:  
2943 - float_raise( float_flag_invalid );  
2944 - z.low = floatx80_default_nan_low;  
2945 - z.high = floatx80_default_nan_high;  
2946 - return z;  
2947 - }  
2948 - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );  
2949 - }  
2950 - if ( aExp == 0 ) {  
2951 - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );  
2952 - normalizeFloatx80Subnormal( aSig, &aExp, &aSig );  
2953 - }  
2954 - if ( bExp == 0 ) {  
2955 - if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );  
2956 - normalizeFloatx80Subnormal( bSig, &bExp, &bSig );  
2957 - }  
2958 - zExp = aExp + bExp - 0x3FFE;  
2959 - mul64To128( aSig, bSig, &zSig0, &zSig1 );  
2960 - if ( 0 < (sbits64) zSig0 ) {  
2961 - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );  
2962 - --zExp;  
2963 - }  
2964 - return  
2965 - roundAndPackFloatx80(  
2966 - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );  
2967 -  
2968 -}  
2969 -  
2970 -/*  
2971 --------------------------------------------------------------------------------  
2972 -Returns the result of dividing the extended double-precision floating-point  
2973 -value `a' by the corresponding value `b'. The operation is performed  
2974 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
2975 --------------------------------------------------------------------------------  
2976 -*/  
2977 -floatx80 floatx80_div( floatx80 a, floatx80 b )  
2978 -{  
2979 - flag aSign, bSign, zSign;  
2980 - int32 aExp, bExp, zExp;  
2981 - bits64 aSig, bSig, zSig0, zSig1;  
2982 - bits64 rem0, rem1, rem2, term0, term1, term2;  
2983 - floatx80 z;  
2984 -  
2985 - aSig = extractFloatx80Frac( a );  
2986 - aExp = extractFloatx80Exp( a );  
2987 - aSign = extractFloatx80Sign( a );  
2988 - bSig = extractFloatx80Frac( b );  
2989 - bExp = extractFloatx80Exp( b );  
2990 - bSign = extractFloatx80Sign( b );  
2991 - zSign = aSign ^ bSign;  
2992 - if ( aExp == 0x7FFF ) {  
2993 - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );  
2994 - if ( bExp == 0x7FFF ) {  
2995 - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );  
2996 - goto invalid;  
2997 - }  
2998 - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );  
2999 - }  
3000 - if ( bExp == 0x7FFF ) {  
3001 - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );  
3002 - return packFloatx80( zSign, 0, 0 );  
3003 - }  
3004 - if ( bExp == 0 ) {  
3005 - if ( bSig == 0 ) {  
3006 - if ( ( aExp | aSig ) == 0 ) {  
3007 - invalid:  
3008 - float_raise( float_flag_invalid );  
3009 - z.low = floatx80_default_nan_low;  
3010 - z.high = floatx80_default_nan_high;  
3011 - return z;  
3012 - }  
3013 - float_raise( float_flag_divbyzero );  
3014 - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );  
3015 - }  
3016 - normalizeFloatx80Subnormal( bSig, &bExp, &bSig );  
3017 - }  
3018 - if ( aExp == 0 ) {  
3019 - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );  
3020 - normalizeFloatx80Subnormal( aSig, &aExp, &aSig );  
3021 - }  
3022 - zExp = aExp - bExp + 0x3FFE;  
3023 - rem1 = 0;  
3024 - if ( bSig <= aSig ) {  
3025 - shift128Right( aSig, 0, 1, &aSig, &rem1 );  
3026 - ++zExp;  
3027 - }  
3028 - zSig0 = estimateDiv128To64( aSig, rem1, bSig );  
3029 - mul64To128( bSig, zSig0, &term0, &term1 );  
3030 - sub128( aSig, rem1, term0, term1, &rem0, &rem1 );  
3031 - while ( (sbits64) rem0 < 0 ) {  
3032 - --zSig0;  
3033 - add128( rem0, rem1, 0, bSig, &rem0, &rem1 );  
3034 - }  
3035 - zSig1 = estimateDiv128To64( rem1, 0, bSig );  
3036 - if ( (bits64) ( zSig1<<1 ) <= 8 ) {  
3037 - mul64To128( bSig, zSig1, &term1, &term2 );  
3038 - sub128( rem1, 0, term1, term2, &rem1, &rem2 );  
3039 - while ( (sbits64) rem1 < 0 ) {  
3040 - --zSig1;  
3041 - add128( rem1, rem2, 0, bSig, &rem1, &rem2 );  
3042 - }  
3043 - zSig1 |= ( ( rem1 | rem2 ) != 0 );  
3044 - }  
3045 - return  
3046 - roundAndPackFloatx80(  
3047 - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );  
3048 -  
3049 -}  
3050 -  
3051 -/*  
3052 --------------------------------------------------------------------------------  
3053 -Returns the remainder of the extended double-precision floating-point value  
3054 -`a' with respect to the corresponding value `b'. The operation is performed  
3055 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
3056 --------------------------------------------------------------------------------  
3057 -*/  
3058 -floatx80 floatx80_rem( floatx80 a, floatx80 b )  
3059 -{  
3060 - flag aSign, bSign, zSign;  
3061 - int32 aExp, bExp, expDiff;  
3062 - bits64 aSig0, aSig1, bSig;  
3063 - bits64 q, term0, term1, alternateASig0, alternateASig1;  
3064 - floatx80 z;  
3065 -  
3066 - aSig0 = extractFloatx80Frac( a );  
3067 - aExp = extractFloatx80Exp( a );  
3068 - aSign = extractFloatx80Sign( a );  
3069 - bSig = extractFloatx80Frac( b );  
3070 - bExp = extractFloatx80Exp( b );  
3071 - bSign = extractFloatx80Sign( b );  
3072 - if ( aExp == 0x7FFF ) {  
3073 - if ( (bits64) ( aSig0<<1 )  
3074 - || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {  
3075 - return propagateFloatx80NaN( a, b );  
3076 - }  
3077 - goto invalid;  
3078 - }  
3079 - if ( bExp == 0x7FFF ) {  
3080 - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );  
3081 - return a;  
3082 - }  
3083 - if ( bExp == 0 ) {  
3084 - if ( bSig == 0 ) {  
3085 - invalid:  
3086 - float_raise( float_flag_invalid );  
3087 - z.low = floatx80_default_nan_low;  
3088 - z.high = floatx80_default_nan_high;  
3089 - return z;  
3090 - }  
3091 - normalizeFloatx80Subnormal( bSig, &bExp, &bSig );  
3092 - }  
3093 - if ( aExp == 0 ) {  
3094 - if ( (bits64) ( aSig0<<1 ) == 0 ) return a;  
3095 - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );  
3096 - }  
3097 - bSig |= LIT64( 0x8000000000000000 );  
3098 - zSign = aSign;  
3099 - expDiff = aExp - bExp;  
3100 - aSig1 = 0;  
3101 - if ( expDiff < 0 ) {  
3102 - if ( expDiff < -1 ) return a;  
3103 - shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );  
3104 - expDiff = 0;  
3105 - }  
3106 - q = ( bSig <= aSig0 );  
3107 - if ( q ) aSig0 -= bSig;  
3108 - expDiff -= 64;  
3109 - while ( 0 < expDiff ) {  
3110 - q = estimateDiv128To64( aSig0, aSig1, bSig );  
3111 - q = ( 2 < q ) ? q - 2 : 0;  
3112 - mul64To128( bSig, q, &term0, &term1 );  
3113 - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );  
3114 - shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );  
3115 - expDiff -= 62;  
3116 - }  
3117 - expDiff += 64;  
3118 - if ( 0 < expDiff ) {  
3119 - q = estimateDiv128To64( aSig0, aSig1, bSig );  
3120 - q = ( 2 < q ) ? q - 2 : 0;  
3121 - q >>= 64 - expDiff;  
3122 - mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );  
3123 - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );  
3124 - shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );  
3125 - while ( le128( term0, term1, aSig0, aSig1 ) ) {  
3126 - ++q;  
3127 - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );  
3128 - }  
3129 - }  
3130 - else {  
3131 - term1 = 0;  
3132 - term0 = bSig;  
3133 - }  
3134 - sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );  
3135 - if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )  
3136 - || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )  
3137 - && ( q & 1 ) )  
3138 - ) {  
3139 - aSig0 = alternateASig0;  
3140 - aSig1 = alternateASig1;  
3141 - zSign = ! zSign;  
3142 - }  
3143 - return  
3144 - normalizeRoundAndPackFloatx80(  
3145 - 80, zSign, bExp + expDiff, aSig0, aSig1 );  
3146 -  
3147 -}  
3148 -  
3149 -/*  
3150 --------------------------------------------------------------------------------  
3151 -Returns the square root of the extended double-precision floating-point  
3152 -value `a'. The operation is performed according to the IEC/IEEE Standard  
3153 -for Binary Floating-point Arithmetic.  
3154 --------------------------------------------------------------------------------  
3155 -*/  
3156 -floatx80 floatx80_sqrt( floatx80 a )  
3157 -{  
3158 - flag aSign;  
3159 - int32 aExp, zExp;  
3160 - bits64 aSig0, aSig1, zSig0, zSig1;  
3161 - bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;  
3162 - bits64 shiftedRem0, shiftedRem1;  
3163 - floatx80 z;  
3164 -  
3165 - aSig0 = extractFloatx80Frac( a );  
3166 - aExp = extractFloatx80Exp( a );  
3167 - aSign = extractFloatx80Sign( a );  
3168 - if ( aExp == 0x7FFF ) {  
3169 - if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );  
3170 - if ( ! aSign ) return a;  
3171 - goto invalid;  
3172 - }  
3173 - if ( aSign ) {  
3174 - if ( ( aExp | aSig0 ) == 0 ) return a;  
3175 - invalid:  
3176 - float_raise( float_flag_invalid );  
3177 - z.low = floatx80_default_nan_low;  
3178 - z.high = floatx80_default_nan_high;  
3179 - return z;  
3180 - }  
3181 - if ( aExp == 0 ) {  
3182 - if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );  
3183 - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );  
3184 - }  
3185 - zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;  
3186 - zSig0 = estimateSqrt32( aExp, aSig0>>32 );  
3187 - zSig0 <<= 31;  
3188 - aSig1 = 0;  
3189 - shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );  
3190 - zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;  
3191 - if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );  
3192 - shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );  
3193 - mul64To128( zSig0, zSig0, &term0, &term1 );  
3194 - sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );  
3195 - while ( (sbits64) rem0 < 0 ) {  
3196 - --zSig0;  
3197 - shortShift128Left( 0, zSig0, 1, &term0, &term1 );  
3198 - term1 |= 1;  
3199 - add128( rem0, rem1, term0, term1, &rem0, &rem1 );  
3200 - }  
3201 - shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );  
3202 - zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );  
3203 - if ( (bits64) ( zSig1<<1 ) <= 10 ) {  
3204 - if ( zSig1 == 0 ) zSig1 = 1;  
3205 - mul64To128( zSig0, zSig1, &term1, &term2 );  
3206 - shortShift128Left( term1, term2, 1, &term1, &term2 );  
3207 - sub128( rem1, 0, term1, term2, &rem1, &rem2 );  
3208 - mul64To128( zSig1, zSig1, &term2, &term3 );  
3209 - sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );  
3210 - while ( (sbits64) rem1 < 0 ) {  
3211 - --zSig1;  
3212 - shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );  
3213 - term3 |= 1;  
3214 - add192(  
3215 - rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );  
3216 - }  
3217 - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );  
3218 - }  
3219 - return  
3220 - roundAndPackFloatx80(  
3221 - floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );  
3222 -  
3223 -}  
3224 -  
3225 -/*  
3226 --------------------------------------------------------------------------------  
3227 -Returns 1 if the extended double-precision floating-point value `a' is  
3228 -equal to the corresponding value `b', and 0 otherwise. The comparison is  
3229 -performed according to the IEC/IEEE Standard for Binary Floating-point  
3230 -Arithmetic.  
3231 --------------------------------------------------------------------------------  
3232 -*/  
3233 -flag floatx80_eq( floatx80 a, floatx80 b )  
3234 -{  
3235 -  
3236 - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )  
3237 - && (bits64) ( extractFloatx80Frac( a )<<1 ) )  
3238 - || ( ( extractFloatx80Exp( b ) == 0x7FFF )  
3239 - && (bits64) ( extractFloatx80Frac( b )<<1 ) )  
3240 - ) {  
3241 - if ( floatx80_is_signaling_nan( a )  
3242 - || floatx80_is_signaling_nan( b ) ) {  
3243 - float_raise( float_flag_invalid );  
3244 - }  
3245 - return 0;  
3246 - }  
3247 - return  
3248 - ( a.low == b.low )  
3249 - && ( ( a.high == b.high )  
3250 - || ( ( a.low == 0 )  
3251 - && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )  
3252 - );  
3253 -  
3254 -}  
3255 -  
3256 -/*  
3257 --------------------------------------------------------------------------------  
3258 -Returns 1 if the extended double-precision floating-point value `a' is  
3259 -less than or equal to the corresponding value `b', and 0 otherwise. The  
3260 -comparison is performed according to the IEC/IEEE Standard for Binary  
3261 -Floating-point Arithmetic.  
3262 --------------------------------------------------------------------------------  
3263 -*/  
3264 -flag floatx80_le( floatx80 a, floatx80 b )  
3265 -{  
3266 - flag aSign, bSign;  
3267 -  
3268 - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )  
3269 - && (bits64) ( extractFloatx80Frac( a )<<1 ) )  
3270 - || ( ( extractFloatx80Exp( b ) == 0x7FFF )  
3271 - && (bits64) ( extractFloatx80Frac( b )<<1 ) )  
3272 - ) {  
3273 - float_raise( float_flag_invalid );  
3274 - return 0;  
3275 - }  
3276 - aSign = extractFloatx80Sign( a );  
3277 - bSign = extractFloatx80Sign( b );  
3278 - if ( aSign != bSign ) {  
3279 - return  
3280 - aSign  
3281 - || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )  
3282 - == 0 );  
3283 - }  
3284 - return  
3285 - aSign ? le128( b.high, b.low, a.high, a.low )  
3286 - : le128( a.high, a.low, b.high, b.low );  
3287 -  
3288 -}  
3289 -  
3290 -/*  
3291 --------------------------------------------------------------------------------  
3292 -Returns 1 if the extended double-precision floating-point value `a' is  
3293 -less than the corresponding value `b', and 0 otherwise. The comparison  
3294 -is performed according to the IEC/IEEE Standard for Binary Floating-point  
3295 -Arithmetic.  
3296 --------------------------------------------------------------------------------  
3297 -*/  
3298 -flag floatx80_lt( floatx80 a, floatx80 b )  
3299 -{  
3300 - flag aSign, bSign;  
3301 -  
3302 - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )  
3303 - && (bits64) ( extractFloatx80Frac( a )<<1 ) )  
3304 - || ( ( extractFloatx80Exp( b ) == 0x7FFF )  
3305 - && (bits64) ( extractFloatx80Frac( b )<<1 ) )  
3306 - ) {  
3307 - float_raise( float_flag_invalid );  
3308 - return 0;  
3309 - }  
3310 - aSign = extractFloatx80Sign( a );  
3311 - bSign = extractFloatx80Sign( b );  
3312 - if ( aSign != bSign ) {  
3313 - return  
3314 - aSign  
3315 - && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )  
3316 - != 0 );  
3317 - }  
3318 - return  
3319 - aSign ? lt128( b.high, b.low, a.high, a.low )  
3320 - : lt128( a.high, a.low, b.high, b.low );  
3321 -  
3322 -}  
3323 -  
3324 -/*  
3325 --------------------------------------------------------------------------------  
3326 -Returns 1 if the extended double-precision floating-point value `a' is equal  
3327 -to the corresponding value `b', and 0 otherwise. The invalid exception is  
3328 -raised if either operand is a NaN. Otherwise, the comparison is performed  
3329 -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
3330 --------------------------------------------------------------------------------  
3331 -*/  
3332 -flag floatx80_eq_signaling( floatx80 a, floatx80 b )  
3333 -{  
3334 -  
3335 - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )  
3336 - && (bits64) ( extractFloatx80Frac( a )<<1 ) )  
3337 - || ( ( extractFloatx80Exp( b ) == 0x7FFF )  
3338 - && (bits64) ( extractFloatx80Frac( b )<<1 ) )  
3339 - ) {  
3340 - float_raise( float_flag_invalid );  
3341 - return 0;  
3342 - }  
3343 - return  
3344 - ( a.low == b.low )  
3345 - && ( ( a.high == b.high )  
3346 - || ( ( a.low == 0 )  
3347 - && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )  
3348 - );  
3349 -  
3350 -}  
3351 -  
3352 -/*  
3353 --------------------------------------------------------------------------------  
3354 -Returns 1 if the extended double-precision floating-point value `a' is less  
3355 -than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs  
3356 -do not cause an exception. Otherwise, the comparison is performed according  
3357 -to the IEC/IEEE Standard for Binary Floating-point Arithmetic.  
3358 --------------------------------------------------------------------------------  
3359 -*/  
3360 -flag floatx80_le_quiet( floatx80 a, floatx80 b )  
3361 -{  
3362 - flag aSign, bSign;  
3363 -  
3364 - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )  
3365 - && (bits64) ( extractFloatx80Frac( a )<<1 ) )  
3366 - || ( ( extractFloatx80Exp( b ) == 0x7FFF )  
3367 - && (bits64) ( extractFloatx80Frac( b )<<1 ) )  
3368 - ) {  
3369 - if ( floatx80_is_signaling_nan( a )  
3370 - || floatx80_is_signaling_nan( b ) ) {  
3371 - float_raise( float_flag_invalid );  
3372 - }  
3373 - return 0;  
3374 - }  
3375 - aSign = extractFloatx80Sign( a );  
3376 - bSign = extractFloatx80Sign( b );  
3377 - if ( aSign != bSign ) {  
3378 - return  
3379 - aSign  
3380 - || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )  
3381 - == 0 );  
3382 - }  
3383 - return  
3384 - aSign ? le128( b.high, b.low, a.high, a.low )  
3385 - : le128( a.high, a.low, b.high, b.low );  
3386 -  
3387 -}  
3388 -  
3389 -/*  
3390 --------------------------------------------------------------------------------  
3391 -Returns 1 if the extended double-precision floating-point value `a' is less  
3392 -than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause  
3393 -an exception. Otherwise, the comparison is performed according to the  
3394 -IEC/IEEE Standard for Binary Floating-point Arithmetic.  
3395 --------------------------------------------------------------------------------  
3396 -*/  
3397 -flag floatx80_lt_quiet( floatx80 a, floatx80 b )  
3398 -{  
3399 - flag aSign, bSign;  
3400 -  
3401 - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )  
3402 - && (bits64) ( extractFloatx80Frac( a )<<1 ) )  
3403 - || ( ( extractFloatx80Exp( b ) == 0x7FFF )  
3404 - && (bits64) ( extractFloatx80Frac( b )<<1 ) )  
3405 - ) {  
3406 - if ( floatx80_is_signaling_nan( a )  
3407 - || floatx80_is_signaling_nan( b ) ) {  
3408 - float_raise( float_flag_invalid );  
3409 - }  
3410 - return 0;  
3411 - }  
3412 - aSign = extractFloatx80Sign( a );  
3413 - bSign = extractFloatx80Sign( b );  
3414 - if ( aSign != bSign ) {  
3415 - return  
3416 - aSign  
3417 - && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )  
3418 - != 0 );  
3419 - }  
3420 - return  
3421 - aSign ? lt128( b.high, b.low, a.high, a.low )  
3422 - : lt128( a.high, a.low, b.high, b.low );  
3423 -  
3424 -}  
3425 -  
3426 -#endif  
3427 -  
target-arm/nwfpe/softfloat.h deleted 100644 → 0
1 -  
2 -/*  
3 -===============================================================================  
4 -  
5 -This C header file is part of the SoftFloat IEC/IEEE Floating-point  
6 -Arithmetic Package, Release 2.  
7 -  
8 -Written by John R. Hauser. This work was made possible in part by the  
9 -International Computer Science Institute, located at Suite 600, 1947 Center  
10 -Street, Berkeley, California 94704. Funding was partially provided by the  
11 -National Science Foundation under grant MIP-9311980. The original version  
12 -of this code was written as part of a project to build a fixed-point vector  
13 -processor in collaboration with the University of California at Berkeley,  
14 -overseen by Profs. Nelson Morgan and John Wawrzynek. More information  
15 -is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/  
16 -arithmetic/softfloat.html'.  
17 -  
18 -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort  
19 -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT  
20 -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO  
21 -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY  
22 -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.  
23 -  
24 -Derivative works are acceptable, even for commercial purposes, so long as  
25 -(1) they include prominent notice that the work is derivative, and (2) they  
26 -include prominent notice akin to these three paragraphs for those parts of  
27 -this code that are retained.  
28 -  
29 -===============================================================================  
30 -*/  
31 -  
32 -#ifndef __SOFTFLOAT_H__  
33 -#define __SOFTFLOAT_H__  
34 -  
35 -/*  
36 --------------------------------------------------------------------------------  
37 -The macro `FLOATX80' must be defined to enable the extended double-precision  
38 -floating-point format `floatx80'. If this macro is not defined, the  
39 -`floatx80' type will not be defined, and none of the functions that either  
40 -input or output the `floatx80' type will be defined.  
41 --------------------------------------------------------------------------------  
42 -*/  
43 -#define FLOATX80  
44 -  
45 -/*  
46 --------------------------------------------------------------------------------  
47 -Software IEC/IEEE floating-point types.  
48 --------------------------------------------------------------------------------  
49 -*/  
50 -typedef unsigned long int float32;  
51 -typedef unsigned long long float64;  
52 -typedef struct {  
53 - unsigned short high;  
54 - unsigned long long low;  
55 -} floatx80;  
56 -  
57 -/*  
58 --------------------------------------------------------------------------------  
59 -Software IEC/IEEE floating-point underflow tininess-detection mode.  
60 --------------------------------------------------------------------------------  
61 -*/  
62 -extern signed char float_detect_tininess;  
63 -enum {  
64 - float_tininess_after_rounding = 0,  
65 - float_tininess_before_rounding = 1  
66 -};  
67 -  
68 -/*  
69 --------------------------------------------------------------------------------  
70 -Software IEC/IEEE floating-point rounding mode.  
71 --------------------------------------------------------------------------------  
72 -*/  
73 -extern signed char float_rounding_mode;  
74 -enum {  
75 - float_round_nearest_even = 0,  
76 - float_round_to_zero = 1,  
77 - float_round_down = 2,  
78 - float_round_up = 3  
79 -};  
80 -  
81 -/*  
82 --------------------------------------------------------------------------------  
83 -Software IEC/IEEE floating-point exception flags.  
84 --------------------------------------------------------------------------------  
85 -extern signed char float_exception_flags;  
86 -enum {  
87 - float_flag_inexact = 1,  
88 - float_flag_underflow = 2,  
89 - float_flag_overflow = 4,  
90 - float_flag_divbyzero = 8,  
91 - float_flag_invalid = 16  
92 -};  
93 -  
94 -ScottB: November 4, 1998  
95 -Changed the enumeration to match the bit order in the FPA11.  
96 -*/  
97 -  
98 -extern signed char float_exception_flags;  
99 -enum {  
100 - float_flag_invalid = 1,  
101 - float_flag_divbyzero = 2,  
102 - float_flag_overflow = 4,  
103 - float_flag_underflow = 8,  
104 - float_flag_inexact = 16  
105 -};  
106 -  
107 -/*  
108 --------------------------------------------------------------------------------  
109 -Routine to raise any or all of the software IEC/IEEE floating-point  
110 -exception flags.  
111 --------------------------------------------------------------------------------  
112 -*/  
113 -void float_raise( signed char );  
114 -  
115 -/*  
116 --------------------------------------------------------------------------------  
117 -Software IEC/IEEE integer-to-floating-point conversion routines.  
118 --------------------------------------------------------------------------------  
119 -*/  
120 -float32 int32_to_float32( signed int );  
121 -float64 int32_to_float64( signed int );  
122 -#ifdef FLOATX80  
123 -floatx80 int32_to_floatx80( signed int );  
124 -#endif  
125 -  
126 -/*  
127 --------------------------------------------------------------------------------  
128 -Software IEC/IEEE single-precision conversion routines.  
129 --------------------------------------------------------------------------------  
130 -*/  
131 -signed int float32_to_int32( float32 );  
132 -signed int float32_to_int32_round_to_zero( float32 );  
133 -float64 float32_to_float64( float32 );  
134 -#ifdef FLOATX80  
135 -floatx80 float32_to_floatx80( float32 );  
136 -#endif  
137 -  
138 -/*  
139 --------------------------------------------------------------------------------  
140 -Software IEC/IEEE single-precision operations.  
141 --------------------------------------------------------------------------------  
142 -*/  
143 -float32 float32_round_to_int( float32 );  
144 -float32 float32_add( float32, float32 );  
145 -float32 float32_sub( float32, float32 );  
146 -float32 float32_mul( float32, float32 );  
147 -float32 float32_div( float32, float32 );  
148 -float32 float32_rem( float32, float32 );  
149 -float32 float32_sqrt( float32 );  
150 -char float32_eq( float32, float32 );  
151 -char float32_le( float32, float32 );  
152 -char float32_lt( float32, float32 );  
153 -char float32_eq_signaling( float32, float32 );  
154 -char float32_le_quiet( float32, float32 );  
155 -char float32_lt_quiet( float32, float32 );  
156 -char float32_is_signaling_nan( float32 );  
157 -  
158 -/*  
159 --------------------------------------------------------------------------------  
160 -Software IEC/IEEE double-precision conversion routines.  
161 --------------------------------------------------------------------------------  
162 -*/  
163 -signed int float64_to_int32( float64 );  
164 -signed int float64_to_int32_round_to_zero( float64 );  
165 -float32 float64_to_float32( float64 );  
166 -#ifdef FLOATX80  
167 -floatx80 float64_to_floatx80( float64 );  
168 -#endif  
169 -  
170 -/*  
171 --------------------------------------------------------------------------------  
172 -Software IEC/IEEE double-precision operations.  
173 --------------------------------------------------------------------------------  
174 -*/  
175 -float64 float64_round_to_int( float64 );  
176 -float64 float64_add( float64, float64 );  
177 -float64 float64_sub( float64, float64 );  
178 -float64 float64_mul( float64, float64 );  
179 -float64 float64_div( float64, float64 );  
180 -float64 float64_rem( float64, float64 );  
181 -float64 float64_sqrt( float64 );  
182 -char float64_eq( float64, float64 );  
183 -char float64_le( float64, float64 );  
184 -char float64_lt( float64, float64 );  
185 -char float64_eq_signaling( float64, float64 );  
186 -char float64_le_quiet( float64, float64 );  
187 -char float64_lt_quiet( float64, float64 );  
188 -char float64_is_signaling_nan( float64 );  
189 -  
190 -#ifdef FLOATX80  
191 -  
192 -/*  
193 --------------------------------------------------------------------------------  
194 -Software IEC/IEEE extended double-precision conversion routines.  
195 --------------------------------------------------------------------------------  
196 -*/  
197 -signed int floatx80_to_int32( floatx80 );  
198 -signed int floatx80_to_int32_round_to_zero( floatx80 );  
199 -float32 floatx80_to_float32( floatx80 );  
200 -float64 floatx80_to_float64( floatx80 );  
201 -  
202 -/*  
203 --------------------------------------------------------------------------------  
204 -Software IEC/IEEE extended double-precision rounding precision. Valid  
205 -values are 32, 64, and 80.  
206 --------------------------------------------------------------------------------  
207 -*/  
208 -extern signed char floatx80_rounding_precision;  
209 -  
210 -/*  
211 --------------------------------------------------------------------------------  
212 -Software IEC/IEEE extended double-precision operations.  
213 --------------------------------------------------------------------------------  
214 -*/  
215 -floatx80 floatx80_round_to_int( floatx80 );  
216 -floatx80 floatx80_add( floatx80, floatx80 );  
217 -floatx80 floatx80_sub( floatx80, floatx80 );  
218 -floatx80 floatx80_mul( floatx80, floatx80 );  
219 -floatx80 floatx80_div( floatx80, floatx80 );  
220 -floatx80 floatx80_rem( floatx80, floatx80 );  
221 -floatx80 floatx80_sqrt( floatx80 );  
222 -char floatx80_eq( floatx80, floatx80 );  
223 -char floatx80_le( floatx80, floatx80 );  
224 -char floatx80_lt( floatx80, floatx80 );  
225 -char floatx80_eq_signaling( floatx80, floatx80 );  
226 -char floatx80_le_quiet( floatx80, floatx80 );  
227 -char floatx80_lt_quiet( floatx80, floatx80 );  
228 -char floatx80_is_signaling_nan( floatx80 );  
229 -  
230 -#endif  
231 -  
232 -#endif