Commit 20495218834824723ef81306c6e1fd27fc3ae560

Authored by bellard
1 parent 158142c2

use the generic soft float code


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1333 c046a42c-6fe2-441c-8c8c-71466251a162
target-arm/nwfpe/ARM-gcc.h deleted 100644 → 0
1   -/*
2   --------------------------------------------------------------------------------
3   -The macro `BITS64' can be defined to indicate that 64-bit integer types are
4   -supported by the compiler.
5   --------------------------------------------------------------------------------
6   -*/
7   -#define BITS64
8   -
9   -/*
10   --------------------------------------------------------------------------------
11   -Each of the following `typedef's defines the most convenient type that holds
12   -integers of at least as many bits as specified. For example, `uint8' should
13   -be the most convenient type that can hold unsigned integers of as many as
14   -8 bits. The `flag' type must be able to hold either a 0 or 1. For most
15   -implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
16   -to the same as `int'.
17   --------------------------------------------------------------------------------
18   -*/
19   -typedef char flag;
20   -typedef unsigned char uint8;
21   -typedef signed char int8;
22   -typedef int uint16;
23   -typedef int int16;
24   -typedef unsigned int uint32;
25   -typedef signed int int32;
26   -#ifdef BITS64
27   -typedef unsigned long long int bits64;
28   -typedef signed long long int sbits64;
29   -#endif
30   -
31   -/*
32   --------------------------------------------------------------------------------
33   -Each of the following `typedef's defines a type that holds integers
34   -of _exactly_ the number of bits specified. For instance, for most
35   -implementation of C, `bits16' and `sbits16' should be `typedef'ed to
36   -`unsigned short int' and `signed short int' (or `short int'), respectively.
37   --------------------------------------------------------------------------------
38   -*/
39   -typedef unsigned char bits8;
40   -typedef signed char sbits8;
41   -typedef unsigned short int bits16;
42   -typedef signed short int sbits16;
43   -typedef unsigned int bits32;
44   -typedef signed int sbits32;
45   -#ifdef BITS64
46   -typedef unsigned long long int uint64;
47   -typedef signed long long int int64;
48   -#endif
49   -
50   -#ifdef BITS64
51   -/*
52   --------------------------------------------------------------------------------
53   -The `LIT64' macro takes as its argument a textual integer literal and if
54   -necessary ``marks'' the literal as having a 64-bit integer type. For
55   -example, the Gnu C Compiler (`gcc') requires that 64-bit literals be
56   -appended with the letters `LL' standing for `long long', which is `gcc's
57   -name for the 64-bit integer type. Some compilers may allow `LIT64' to be
58   -defined as the identity macro: `#define LIT64( a ) a'.
59   --------------------------------------------------------------------------------
60   -*/
61   -#define LIT64( a ) a##LL
62   -#endif
63   -
64   -/*
65   --------------------------------------------------------------------------------
66   -The macro `INLINE' can be used before functions that should be inlined. If
67   -a compiler does not support explicit inlining, this macro should be defined
68   -to be `static'.
69   --------------------------------------------------------------------------------
70   -*/
71   -#define INLINE extern __inline__
72   -
73   -
74   -/* For use as a GCC soft-float library we need some special function names. */
75   -
76   -#ifdef __LIBFLOAT__
77   -
78   -/* Some 32-bit ops can be mapped straight across by just changing the name. */
79   -#define float32_add __addsf3
80   -#define float32_sub __subsf3
81   -#define float32_mul __mulsf3
82   -#define float32_div __divsf3
83   -#define int32_to_float32 __floatsisf
84   -#define float32_to_int32_round_to_zero __fixsfsi
85   -#define float32_to_uint32_round_to_zero __fixunssfsi
86   -
87   -/* These ones go through the glue code. To avoid namespace pollution
88   - we rename the internal functions too. */
89   -#define float32_eq ___float32_eq
90   -#define float32_le ___float32_le
91   -#define float32_lt ___float32_lt
92   -
93   -/* All the 64-bit ops have to go through the glue, so we pull the same
94   - trick. */
95   -#define float64_add ___float64_add
96   -#define float64_sub ___float64_sub
97   -#define float64_mul ___float64_mul
98   -#define float64_div ___float64_div
99   -#define int32_to_float64 ___int32_to_float64
100   -#define float64_to_int32_round_to_zero ___float64_to_int32_round_to_zero
101   -#define float64_to_uint32_round_to_zero ___float64_to_uint32_round_to_zero
102   -#define float64_to_float32 ___float64_to_float32
103   -#define float32_to_float64 ___float32_to_float64
104   -#define float64_eq ___float64_eq
105   -#define float64_le ___float64_le
106   -#define float64_lt ___float64_lt
107   -
108   -#if 0
109   -#define float64_add __adddf3
110   -#define float64_sub __subdf3
111   -#define float64_mul __muldf3
112   -#define float64_div __divdf3
113   -#define int32_to_float64 __floatsidf
114   -#define float64_to_int32_round_to_zero __fixdfsi
115   -#define float64_to_uint32_round_to_zero __fixunsdfsi
116   -#define float64_to_float32 __truncdfsf2
117   -#define float32_to_float64 __extendsfdf2
118   -#endif
119   -
120   -#endif
target-arm/nwfpe/double_cpdo.c
... ... @@ -53,7 +53,7 @@ unsigned int DoubleCPDO(const unsigned int opcode)
53 53 switch (fpa11->fType[Fm])
54 54 {
55 55 case typeSingle:
56   - rFm = float32_to_float64(fpa11->fpreg[Fm].fSingle);
  56 + rFm = float32_to_float64(fpa11->fpreg[Fm].fSingle, &fpa11->fp_status);
57 57 break;
58 58  
59 59 case typeDouble:
... ... @@ -79,7 +79,7 @@ unsigned int DoubleCPDO(const unsigned int opcode)
79 79 switch (fpa11->fType[Fn])
80 80 {
81 81 case typeSingle:
82   - rFn = float32_to_float64(fpa11->fpreg[Fn].fSingle);
  82 + rFn = float32_to_float64(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status);
83 83 break;
84 84  
85 85 case typeDouble:
... ... @@ -96,30 +96,30 @@ unsigned int DoubleCPDO(const unsigned int opcode)
96 96 {
97 97 /* dyadic opcodes */
98 98 case ADF_CODE:
99   - fpa11->fpreg[Fd].fDouble = float64_add(rFn,rFm);
  99 + fpa11->fpreg[Fd].fDouble = float64_add(rFn,rFm, &fpa11->fp_status);
100 100 break;
101 101  
102 102 case MUF_CODE:
103 103 case FML_CODE:
104   - fpa11->fpreg[Fd].fDouble = float64_mul(rFn,rFm);
  104 + fpa11->fpreg[Fd].fDouble = float64_mul(rFn,rFm, &fpa11->fp_status);
105 105 break;
106 106  
107 107 case SUF_CODE:
108   - fpa11->fpreg[Fd].fDouble = float64_sub(rFn,rFm);
  108 + fpa11->fpreg[Fd].fDouble = float64_sub(rFn,rFm, &fpa11->fp_status);
109 109 break;
110 110  
111 111 case RSF_CODE:
112   - fpa11->fpreg[Fd].fDouble = float64_sub(rFm,rFn);
  112 + fpa11->fpreg[Fd].fDouble = float64_sub(rFm,rFn, &fpa11->fp_status);
113 113 break;
114 114  
115 115 case DVF_CODE:
116 116 case FDV_CODE:
117   - fpa11->fpreg[Fd].fDouble = float64_div(rFn,rFm);
  117 + fpa11->fpreg[Fd].fDouble = float64_div(rFn,rFm, &fpa11->fp_status);
118 118 break;
119 119  
120 120 case RDF_CODE:
121 121 case FRD_CODE:
122   - fpa11->fpreg[Fd].fDouble = float64_div(rFm,rFn);
  122 + fpa11->fpreg[Fd].fDouble = float64_div(rFm,rFn, &fpa11->fp_status);
123 123 break;
124 124  
125 125 #if 0
... ... @@ -133,7 +133,7 @@ unsigned int DoubleCPDO(const unsigned int opcode)
133 133 #endif
134 134  
135 135 case RMF_CODE:
136   - fpa11->fpreg[Fd].fDouble = float64_rem(rFn,rFm);
  136 + fpa11->fpreg[Fd].fDouble = float64_rem(rFn,rFm, &fpa11->fp_status);
137 137 break;
138 138  
139 139 #if 0
... ... @@ -173,11 +173,11 @@ unsigned int DoubleCPDO(const unsigned int opcode)
173 173  
174 174 case RND_CODE:
175 175 case URD_CODE:
176   - fpa11->fpreg[Fd].fDouble = float64_round_to_int(rFm);
  176 + fpa11->fpreg[Fd].fDouble = float64_round_to_int(rFm, &fpa11->fp_status);
177 177 break;
178 178  
179 179 case SQT_CODE:
180   - fpa11->fpreg[Fd].fDouble = float64_sqrt(rFm);
  180 + fpa11->fpreg[Fd].fDouble = float64_sqrt(rFm, &fpa11->fp_status);
181 181 break;
182 182  
183 183 #if 0
... ...
target-arm/nwfpe/extended_cpdo.c
... ... @@ -53,11 +53,11 @@ unsigned int ExtendedCPDO(const unsigned int opcode)
53 53 switch (fpa11->fType[Fm])
54 54 {
55 55 case typeSingle:
56   - rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle);
  56 + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle, &fpa11->fp_status);
57 57 break;
58 58  
59 59 case typeDouble:
60   - rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble);
  60 + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble, &fpa11->fp_status);
61 61 break;
62 62  
63 63 case typeExtended:
... ... @@ -74,11 +74,11 @@ unsigned int ExtendedCPDO(const unsigned int opcode)
74 74 switch (fpa11->fType[Fn])
75 75 {
76 76 case typeSingle:
77   - rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
  77 + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status);
78 78 break;
79 79  
80 80 case typeDouble:
81   - rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
  81 + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble, &fpa11->fp_status);
82 82 break;
83 83  
84 84 case typeExtended:
... ... @@ -94,30 +94,30 @@ unsigned int ExtendedCPDO(const unsigned int opcode)
94 94 {
95 95 /* dyadic opcodes */
96 96 case ADF_CODE:
97   - fpa11->fpreg[Fd].fExtended = floatx80_add(rFn,rFm);
  97 + fpa11->fpreg[Fd].fExtended = floatx80_add(rFn,rFm, &fpa11->fp_status);
98 98 break;
99 99  
100 100 case MUF_CODE:
101 101 case FML_CODE:
102   - fpa11->fpreg[Fd].fExtended = floatx80_mul(rFn,rFm);
  102 + fpa11->fpreg[Fd].fExtended = floatx80_mul(rFn,rFm, &fpa11->fp_status);
103 103 break;
104 104  
105 105 case SUF_CODE:
106   - fpa11->fpreg[Fd].fExtended = floatx80_sub(rFn,rFm);
  106 + fpa11->fpreg[Fd].fExtended = floatx80_sub(rFn,rFm, &fpa11->fp_status);
107 107 break;
108 108  
109 109 case RSF_CODE:
110   - fpa11->fpreg[Fd].fExtended = floatx80_sub(rFm,rFn);
  110 + fpa11->fpreg[Fd].fExtended = floatx80_sub(rFm,rFn, &fpa11->fp_status);
111 111 break;
112 112  
113 113 case DVF_CODE:
114 114 case FDV_CODE:
115   - fpa11->fpreg[Fd].fExtended = floatx80_div(rFn,rFm);
  115 + fpa11->fpreg[Fd].fExtended = floatx80_div(rFn,rFm, &fpa11->fp_status);
116 116 break;
117 117  
118 118 case RDF_CODE:
119 119 case FRD_CODE:
120   - fpa11->fpreg[Fd].fExtended = floatx80_div(rFm,rFn);
  120 + fpa11->fpreg[Fd].fExtended = floatx80_div(rFm,rFn, &fpa11->fp_status);
121 121 break;
122 122  
123 123 #if 0
... ... @@ -131,7 +131,7 @@ unsigned int ExtendedCPDO(const unsigned int opcode)
131 131 #endif
132 132  
133 133 case RMF_CODE:
134   - fpa11->fpreg[Fd].fExtended = floatx80_rem(rFn,rFm);
  134 + fpa11->fpreg[Fd].fExtended = floatx80_rem(rFn,rFm, &fpa11->fp_status);
135 135 break;
136 136  
137 137 #if 0
... ... @@ -157,11 +157,11 @@ unsigned int ExtendedCPDO(const unsigned int opcode)
157 157  
158 158 case RND_CODE:
159 159 case URD_CODE:
160   - fpa11->fpreg[Fd].fExtended = floatx80_round_to_int(rFm);
  160 + fpa11->fpreg[Fd].fExtended = floatx80_round_to_int(rFm, &fpa11->fp_status);
161 161 break;
162 162  
163 163 case SQT_CODE:
164   - fpa11->fpreg[Fd].fExtended = floatx80_sqrt(rFm);
  164 + fpa11->fpreg[Fd].fExtended = floatx80_sqrt(rFm, &fpa11->fp_status);
165 165 break;
166 166  
167 167 #if 0
... ...
target-arm/nwfpe/fpa11.c
... ... @@ -61,74 +61,79 @@ void resetFPA11(void)
61 61  
62 62 void SetRoundingMode(const unsigned int opcode)
63 63 {
64   -#if MAINTAIN_FPCR
  64 + int rounding_mode;
65 65 FPA11 *fpa11 = GET_FPA11();
  66 +
  67 +#if MAINTAIN_FPCR
66 68 fpa11->fpcr &= ~MASK_ROUNDING_MODE;
67 69 #endif
68 70 switch (opcode & MASK_ROUNDING_MODE)
69 71 {
70 72 default:
71 73 case ROUND_TO_NEAREST:
72   - float_rounding_mode = float_round_nearest_even;
  74 + rounding_mode = float_round_nearest_even;
73 75 #if MAINTAIN_FPCR
74 76 fpa11->fpcr |= ROUND_TO_NEAREST;
75 77 #endif
76 78 break;
77 79  
78 80 case ROUND_TO_PLUS_INFINITY:
79   - float_rounding_mode = float_round_up;
  81 + rounding_mode = float_round_up;
80 82 #if MAINTAIN_FPCR
81 83 fpa11->fpcr |= ROUND_TO_PLUS_INFINITY;
82 84 #endif
83 85 break;
84 86  
85 87 case ROUND_TO_MINUS_INFINITY:
86   - float_rounding_mode = float_round_down;
  88 + rounding_mode = float_round_down;
87 89 #if MAINTAIN_FPCR
88 90 fpa11->fpcr |= ROUND_TO_MINUS_INFINITY;
89 91 #endif
90 92 break;
91 93  
92 94 case ROUND_TO_ZERO:
93   - float_rounding_mode = float_round_to_zero;
  95 + rounding_mode = float_round_to_zero;
94 96 #if MAINTAIN_FPCR
95 97 fpa11->fpcr |= ROUND_TO_ZERO;
96 98 #endif
97 99 break;
98 100 }
  101 + set_float_rounding_mode(rounding_mode, &fpa11->fp_status);
99 102 }
100 103  
101 104 void SetRoundingPrecision(const unsigned int opcode)
102 105 {
103   -#if MAINTAIN_FPCR
  106 + int rounding_precision;
104 107 FPA11 *fpa11 = GET_FPA11();
  108 +#if MAINTAIN_FPCR
105 109 fpa11->fpcr &= ~MASK_ROUNDING_PRECISION;
106 110 #endif
107 111 switch (opcode & MASK_ROUNDING_PRECISION)
108 112 {
109 113 case ROUND_SINGLE:
110   - floatx80_rounding_precision = 32;
  114 + rounding_precision = 32;
111 115 #if MAINTAIN_FPCR
112 116 fpa11->fpcr |= ROUND_SINGLE;
113 117 #endif
114 118 break;
115 119  
116 120 case ROUND_DOUBLE:
117   - floatx80_rounding_precision = 64;
  121 + rounding_precision = 64;
118 122 #if MAINTAIN_FPCR
119 123 fpa11->fpcr |= ROUND_DOUBLE;
120 124 #endif
121 125 break;
122 126  
123 127 case ROUND_EXTENDED:
124   - floatx80_rounding_precision = 80;
  128 + rounding_precision = 80;
125 129 #if MAINTAIN_FPCR
126 130 fpa11->fpcr |= ROUND_EXTENDED;
127 131 #endif
128 132 break;
129 133  
130   - default: floatx80_rounding_precision = 80;
  134 + default: rounding_precision = 80;
131 135 }
  136 + set_floatx80_rounding_precision(rounding_precision, &fpa11->fp_status);
132 137 }
133 138  
134 139 /* Emulate the instruction in the opcode. */
... ...
target-arm/nwfpe/fpa11.h
... ... @@ -83,6 +83,7 @@ typedef struct tagFPA11 {
83 83 so we can use it to detect whether this
84 84 instance of the emulator needs to be
85 85 initialised. */
  86 + float_status fp_status; /* QEMU float emulator status */
86 87 } FPA11;
87 88  
88 89 extern FPA11* qemufpa;
... ...
target-arm/nwfpe/fpa11_cpdo.c
... ... @@ -80,10 +80,10 @@ unsigned int EmulateCPDO(const unsigned int opcode)
80 80 {
81 81 if (typeDouble == nType)
82 82 fpa11->fpreg[Fd].fSingle =
83   - float64_to_float32(fpa11->fpreg[Fd].fDouble);
  83 + float64_to_float32(fpa11->fpreg[Fd].fDouble, &fpa11->fp_status);
84 84 else
85 85 fpa11->fpreg[Fd].fSingle =
86   - floatx80_to_float32(fpa11->fpreg[Fd].fExtended);
  86 + floatx80_to_float32(fpa11->fpreg[Fd].fExtended, &fpa11->fp_status);
87 87 }
88 88 break;
89 89  
... ... @@ -91,10 +91,10 @@ unsigned int EmulateCPDO(const unsigned int opcode)
91 91 {
92 92 if (typeSingle == nType)
93 93 fpa11->fpreg[Fd].fDouble =
94   - float32_to_float64(fpa11->fpreg[Fd].fSingle);
  94 + float32_to_float64(fpa11->fpreg[Fd].fSingle, &fpa11->fp_status);
95 95 else
96 96 fpa11->fpreg[Fd].fDouble =
97   - floatx80_to_float64(fpa11->fpreg[Fd].fExtended);
  97 + floatx80_to_float64(fpa11->fpreg[Fd].fExtended, &fpa11->fp_status);
98 98 }
99 99 break;
100 100  
... ... @@ -102,10 +102,10 @@ unsigned int EmulateCPDO(const unsigned int opcode)
102 102 {
103 103 if (typeSingle == nType)
104 104 fpa11->fpreg[Fd].fExtended =
105   - float32_to_floatx80(fpa11->fpreg[Fd].fSingle);
  105 + float32_to_floatx80(fpa11->fpreg[Fd].fSingle, &fpa11->fp_status);
106 106 else
107 107 fpa11->fpreg[Fd].fExtended =
108   - float64_to_floatx80(fpa11->fpreg[Fd].fDouble);
  108 + float64_to_floatx80(fpa11->fpreg[Fd].fDouble, &fpa11->fp_status);
109 109 }
110 110 break;
111 111 }
... ...
target-arm/nwfpe/fpa11_cpdt.c
... ... @@ -106,11 +106,11 @@ void storeSingle(const unsigned int Fn,unsigned int *pMem)
106 106 switch (fpa11->fType[Fn])
107 107 {
108 108 case typeDouble:
109   - val = float64_to_float32(fpa11->fpreg[Fn].fDouble);
  109 + val = float64_to_float32(fpa11->fpreg[Fn].fDouble, &fpa11->fp_status);
110 110 break;
111 111  
112 112 case typeExtended:
113   - val = floatx80_to_float32(fpa11->fpreg[Fn].fExtended);
  113 + val = floatx80_to_float32(fpa11->fpreg[Fn].fExtended, &fpa11->fp_status);
114 114 break;
115 115  
116 116 default: val = fpa11->fpreg[Fn].fSingle;
... ... @@ -129,11 +129,11 @@ void storeDouble(const unsigned int Fn,unsigned int *pMem)
129 129 switch (fpa11->fType[Fn])
130 130 {
131 131 case typeSingle:
132   - val = float32_to_float64(fpa11->fpreg[Fn].fSingle);
  132 + val = float32_to_float64(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status);
133 133 break;
134 134  
135 135 case typeExtended:
136   - val = floatx80_to_float64(fpa11->fpreg[Fn].fExtended);
  136 + val = floatx80_to_float64(fpa11->fpreg[Fn].fExtended, &fpa11->fp_status);
137 137 break;
138 138  
139 139 default: val = fpa11->fpreg[Fn].fDouble;
... ... @@ -157,11 +157,11 @@ void storeExtended(const unsigned int Fn,unsigned int *pMem)
157 157 switch (fpa11->fType[Fn])
158 158 {
159 159 case typeSingle:
160   - val = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
  160 + val = float32_to_floatx80(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status);
161 161 break;
162 162  
163 163 case typeDouble:
164   - val = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
  164 + val = float64_to_floatx80(fpa11->fpreg[Fn].fDouble, &fpa11->fp_status);
165 165 break;
166 166  
167 167 default: val = fpa11->fpreg[Fn].fExtended;
... ...
target-arm/nwfpe/fpa11_cprt.c
... ... @@ -21,7 +21,6 @@
21 21 */
22 22  
23 23 #include "fpa11.h"
24   -#include "milieu.h"
25 24 #include "softfloat.h"
26 25 #include "fpopcode.h"
27 26 #include "fpa11.inl"
... ... @@ -89,7 +88,7 @@ unsigned int PerformFLT(const unsigned int opcode)
89 88 {
90 89 fpa11->fType[getFn(opcode)] = typeSingle;
91 90 fpa11->fpreg[getFn(opcode)].fSingle =
92   - int32_to_float32(readRegister(getRd(opcode)));
  91 + int32_to_float32(readRegister(getRd(opcode)), &fpa11->fp_status);
93 92 }
94 93 break;
95 94  
... ... @@ -97,7 +96,7 @@ unsigned int PerformFLT(const unsigned int opcode)
97 96 {
98 97 fpa11->fType[getFn(opcode)] = typeDouble;
99 98 fpa11->fpreg[getFn(opcode)].fDouble =
100   - int32_to_float64(readRegister(getRd(opcode)));
  99 + int32_to_float64(readRegister(getRd(opcode)), &fpa11->fp_status);
101 100 }
102 101 break;
103 102  
... ... @@ -105,7 +104,7 @@ unsigned int PerformFLT(const unsigned int opcode)
105 104 {
106 105 fpa11->fType[getFn(opcode)] = typeExtended;
107 106 fpa11->fpreg[getFn(opcode)].fExtended =
108   - int32_to_floatx80(readRegister(getRd(opcode)));
  107 + int32_to_floatx80(readRegister(getRd(opcode)), &fpa11->fp_status);
109 108 }
110 109 break;
111 110  
... ... @@ -128,7 +127,7 @@ unsigned int PerformFIX(const unsigned int opcode)
128 127 case typeSingle:
129 128 {
130 129 writeRegister(getRd(opcode),
131   - float32_to_int32(fpa11->fpreg[Fn].fSingle));
  130 + float32_to_int32(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status));
132 131 }
133 132 break;
134 133  
... ... @@ -136,14 +135,14 @@ unsigned int PerformFIX(const unsigned int opcode)
136 135 {
137 136 //printf("F%d is 0x%llx\n",Fn,fpa11->fpreg[Fn].fDouble);
138 137 writeRegister(getRd(opcode),
139   - float64_to_int32(fpa11->fpreg[Fn].fDouble));
  138 + float64_to_int32(fpa11->fpreg[Fn].fDouble, &fpa11->fp_status));
140 139 }
141 140 break;
142 141  
143 142 case typeExtended:
144 143 {
145 144 writeRegister(getRd(opcode),
146   - floatx80_to_int32(fpa11->fpreg[Fn].fExtended));
  145 + floatx80_to_int32(fpa11->fpreg[Fn].fExtended, &fpa11->fp_status));
147 146 }
148 147 break;
149 148  
... ... @@ -157,22 +156,23 @@ unsigned int PerformFIX(const unsigned int opcode)
157 156 static unsigned int __inline__
158 157 PerformComparisonOperation(floatx80 Fn, floatx80 Fm)
159 158 {
  159 + FPA11 *fpa11 = GET_FPA11();
160 160 unsigned int flags = 0;
161 161  
162 162 /* test for less than condition */
163   - if (floatx80_lt(Fn,Fm))
  163 + if (floatx80_lt(Fn,Fm, &fpa11->fp_status))
164 164 {
165 165 flags |= CC_NEGATIVE;
166 166 }
167 167  
168 168 /* test for equal condition */
169   - if (floatx80_eq(Fn,Fm))
  169 + if (floatx80_eq(Fn,Fm, &fpa11->fp_status))
170 170 {
171 171 flags |= CC_ZERO;
172 172 }
173 173  
174 174 /* test for greater than or equal condition */
175   - if (floatx80_lt(Fm,Fn))
  175 + if (floatx80_lt(Fm,Fn, &fpa11->fp_status))
176 176 {
177 177 flags |= CC_CARRY;
178 178 }
... ... @@ -208,14 +208,14 @@ static unsigned int PerformComparison(const unsigned int opcode)
208 208 //printk("single.\n");
209 209 if (float32_is_nan(fpa11->fpreg[Fn].fSingle))
210 210 goto unordered;
211   - rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
  211 + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle, &fpa11->fp_status);
212 212 break;
213 213  
214 214 case typeDouble:
215 215 //printk("double.\n");
216 216 if (float64_is_nan(fpa11->fpreg[Fn].fDouble))
217 217 goto unordered;
218   - rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
  218 + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble, &fpa11->fp_status);
219 219 break;
220 220  
221 221 case typeExtended:
... ... @@ -244,14 +244,14 @@ static unsigned int PerformComparison(const unsigned int opcode)
244 244 //printk("single.\n");
245 245 if (float32_is_nan(fpa11->fpreg[Fm].fSingle))
246 246 goto unordered;
247   - rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle);
  247 + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle, &fpa11->fp_status);
248 248 break;
249 249  
250 250 case typeDouble:
251 251 //printk("double.\n");
252 252 if (float64_is_nan(fpa11->fpreg[Fm].fDouble))
253 253 goto unordered;
254   - rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble);
  254 + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble, &fpa11->fp_status);
255 255 break;
256 256  
257 257 case typeExtended:
... ... @@ -283,7 +283,7 @@ static unsigned int PerformComparison(const unsigned int opcode)
283 283  
284 284 if (BIT_AC & readFPSR()) flags |= CC_CARRY;
285 285  
286   - if (e_flag) float_raise(float_flag_invalid);
  286 + if (e_flag) float_raise(float_flag_invalid, &fpa11->fp_status);
287 287  
288 288 writeConditionCodes(flags);
289 289 return 1;
... ...
target-arm/nwfpe/fpopcode.c
... ... @@ -27,14 +27,14 @@
27 27 //#include "fpmodule.inl"
28 28  
29 29 const floatx80 floatx80Constant[] = {
30   - { 0x0000, 0x0000000000000000ULL}, /* extended 0.0 */
31   - { 0x3fff, 0x8000000000000000ULL}, /* extended 1.0 */
32   - { 0x4000, 0x8000000000000000ULL}, /* extended 2.0 */
33   - { 0x4000, 0xc000000000000000ULL}, /* extended 3.0 */
34   - { 0x4001, 0x8000000000000000ULL}, /* extended 4.0 */
35   - { 0x4001, 0xa000000000000000ULL}, /* extended 5.0 */
36   - { 0x3ffe, 0x8000000000000000ULL}, /* extended 0.5 */
37   - { 0x4002, 0xa000000000000000ULL} /* extended 10.0 */
  30 + { 0x0000000000000000ULL, 0x0000}, /* extended 0.0 */
  31 + { 0x8000000000000000ULL, 0x3fff}, /* extended 1.0 */
  32 + { 0x8000000000000000ULL, 0x4000}, /* extended 2.0 */
  33 + { 0xc000000000000000ULL, 0x4000}, /* extended 3.0 */
  34 + { 0x8000000000000000ULL, 0x4001}, /* extended 4.0 */
  35 + { 0xa000000000000000ULL, 0x4001}, /* extended 5.0 */
  36 + { 0x8000000000000000ULL, 0x3ffe}, /* extended 0.5 */
  37 + { 0xa000000000000000ULL, 0x4002} /* extended 10.0 */
38 38 };
39 39  
40 40 const float64 float64Constant[] = {
... ...
target-arm/nwfpe/milieu.h deleted 100644 → 0
1   -
2   -/*
3   -===============================================================================
4   -
5   -This C header file is part of the SoftFloat IEC/IEEE Floating-point
6   -Arithmetic Package, Release 2.
7   -
8   -Written by John R. Hauser. This work was made possible in part by the
9   -International Computer Science Institute, located at Suite 600, 1947 Center
10   -Street, Berkeley, California 94704. Funding was partially provided by the
11   -National Science Foundation under grant MIP-9311980. The original version
12   -of this code was written as part of a project to build a fixed-point vector
13   -processor in collaboration with the University of California at Berkeley,
14   -overseen by Profs. Nelson Morgan and John Wawrzynek. More information
15   -is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
16   -arithmetic/softfloat.html'.
17   -
18   -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
19   -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20   -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
21   -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22   -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23   -
24   -Derivative works are acceptable, even for commercial purposes, so long as
25   -(1) they include prominent notice that the work is derivative, and (2) they
26   -include prominent notice akin to these three paragraphs for those parts of
27   -this code that are retained.
28   -
29   -===============================================================================
30   -*/
31   -
32   -/*
33   --------------------------------------------------------------------------------
34   -Include common integer types and flags.
35   --------------------------------------------------------------------------------
36   -*/
37   -#include "ARM-gcc.h"
38   -
39   -/*
40   --------------------------------------------------------------------------------
41   -Symbolic Boolean literals.
42   --------------------------------------------------------------------------------
43   -*/
44   -enum {
45   - FALSE = 0,
46   - TRUE = 1
47   -};
48   -
target-arm/nwfpe/single_cpdo.c
... ... @@ -76,30 +76,30 @@ unsigned int SingleCPDO(const unsigned int opcode)
76 76 {
77 77 /* dyadic opcodes */
78 78 case ADF_CODE:
79   - fpa11->fpreg[Fd].fSingle = float32_add(rFn,rFm);
  79 + fpa11->fpreg[Fd].fSingle = float32_add(rFn,rFm, &fpa11->fp_status);
80 80 break;
81 81  
82 82 case MUF_CODE:
83 83 case FML_CODE:
84   - fpa11->fpreg[Fd].fSingle = float32_mul(rFn,rFm);
  84 + fpa11->fpreg[Fd].fSingle = float32_mul(rFn,rFm, &fpa11->fp_status);
85 85 break;
86 86  
87 87 case SUF_CODE:
88   - fpa11->fpreg[Fd].fSingle = float32_sub(rFn,rFm);
  88 + fpa11->fpreg[Fd].fSingle = float32_sub(rFn,rFm, &fpa11->fp_status);
89 89 break;
90 90  
91 91 case RSF_CODE:
92   - fpa11->fpreg[Fd].fSingle = float32_sub(rFm,rFn);
  92 + fpa11->fpreg[Fd].fSingle = float32_sub(rFm,rFn, &fpa11->fp_status);
93 93 break;
94 94  
95 95 case DVF_CODE:
96 96 case FDV_CODE:
97   - fpa11->fpreg[Fd].fSingle = float32_div(rFn,rFm);
  97 + fpa11->fpreg[Fd].fSingle = float32_div(rFn,rFm, &fpa11->fp_status);
98 98 break;
99 99  
100 100 case RDF_CODE:
101 101 case FRD_CODE:
102   - fpa11->fpreg[Fd].fSingle = float32_div(rFm,rFn);
  102 + fpa11->fpreg[Fd].fSingle = float32_div(rFm,rFn, &fpa11->fp_status);
103 103 break;
104 104  
105 105 #if 0
... ... @@ -113,7 +113,7 @@ unsigned int SingleCPDO(const unsigned int opcode)
113 113 #endif
114 114  
115 115 case RMF_CODE:
116   - fpa11->fpreg[Fd].fSingle = float32_rem(rFn,rFm);
  116 + fpa11->fpreg[Fd].fSingle = float32_rem(rFn,rFm, &fpa11->fp_status);
117 117 break;
118 118  
119 119 #if 0
... ... @@ -139,11 +139,11 @@ unsigned int SingleCPDO(const unsigned int opcode)
139 139  
140 140 case RND_CODE:
141 141 case URD_CODE:
142   - fpa11->fpreg[Fd].fSingle = float32_round_to_int(rFm);
  142 + fpa11->fpreg[Fd].fSingle = float32_round_to_int(rFm, &fpa11->fp_status);
143 143 break;
144 144  
145 145 case SQT_CODE:
146   - fpa11->fpreg[Fd].fSingle = float32_sqrt(rFm);
  146 + fpa11->fpreg[Fd].fSingle = float32_sqrt(rFm, &fpa11->fp_status);
147 147 break;
148 148  
149 149 #if 0
... ...
target-arm/nwfpe/softfloat-macros deleted 100644 → 0
1   -
2   -/*
3   -===============================================================================
4   -
5   -This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
6   -Arithmetic Package, Release 2.
7   -
8   -Written by John R. Hauser. This work was made possible in part by the
9   -International Computer Science Institute, located at Suite 600, 1947 Center
10   -Street, Berkeley, California 94704. Funding was partially provided by the
11   -National Science Foundation under grant MIP-9311980. The original version
12   -of this code was written as part of a project to build a fixed-point vector
13   -processor in collaboration with the University of California at Berkeley,
14   -overseen by Profs. Nelson Morgan and John Wawrzynek. More information
15   -is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
16   -arithmetic/softfloat.html'.
17   -
18   -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
19   -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20   -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
21   -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22   -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23   -
24   -Derivative works are acceptable, even for commercial purposes, so long as
25   -(1) they include prominent notice that the work is derivative, and (2) they
26   -include prominent notice akin to these three paragraphs for those parts of
27   -this code that are retained.
28   -
29   -===============================================================================
30   -*/
31   -
32   -/*
33   --------------------------------------------------------------------------------
34   -Shifts `a' right by the number of bits given in `count'. If any nonzero
35   -bits are shifted off, they are ``jammed'' into the least significant bit of
36   -the result by setting the least significant bit to 1. The value of `count'
37   -can be arbitrarily large; in particular, if `count' is greater than 32, the
38   -result will be either 0 or 1, depending on whether `a' is zero or nonzero.
39   -The result is stored in the location pointed to by `zPtr'.
40   --------------------------------------------------------------------------------
41   -*/
42   -INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
43   -{
44   - bits32 z;
45   - if ( count == 0 ) {
46   - z = a;
47   - }
48   - else if ( count < 32 ) {
49   - z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
50   - }
51   - else {
52   - z = ( a != 0 );
53   - }
54   - *zPtr = z;
55   -}
56   -
57   -/*
58   --------------------------------------------------------------------------------
59   -Shifts `a' right by the number of bits given in `count'. If any nonzero
60   -bits are shifted off, they are ``jammed'' into the least significant bit of
61   -the result by setting the least significant bit to 1. The value of `count'
62   -can be arbitrarily large; in particular, if `count' is greater than 64, the
63   -result will be either 0 or 1, depending on whether `a' is zero or nonzero.
64   -The result is stored in the location pointed to by `zPtr'.
65   --------------------------------------------------------------------------------
66   -*/
67   -INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
68   -{
69   - bits64 z;
70   -
71   -// __asm__("@shift64RightJamming -- start");
72   - if ( count == 0 ) {
73   - z = a;
74   - }
75   - else if ( count < 64 ) {
76   - z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
77   - }
78   - else {
79   - z = ( a != 0 );
80   - }
81   -// __asm__("@shift64RightJamming -- end");
82   - *zPtr = z;
83   -}
84   -
85   -/*
86   --------------------------------------------------------------------------------
87   -Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
88   -_plus_ the number of bits given in `count'. The shifted result is at most
89   -64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
90   -bits shifted off form a second 64-bit result as follows: The _last_ bit
91   -shifted off is the most-significant bit of the extra result, and the other
92   -63 bits of the extra result are all zero if and only if _all_but_the_last_
93   -bits shifted off were all zero. This extra result is stored in the location
94   -pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
95   - (This routine makes more sense if `a0' and `a1' are considered to form a
96   -fixed-point value with binary point between `a0' and `a1'. This fixed-point
97   -value is shifted right by the number of bits given in `count', and the
98   -integer part of the result is returned at the location pointed to by
99   -`z0Ptr'. The fractional part of the result may be slightly corrupted as
100   -described above, and is returned at the location pointed to by `z1Ptr'.)
101   --------------------------------------------------------------------------------
102   -*/
103   -INLINE void
104   - shift64ExtraRightJamming(
105   - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
106   -{
107   - bits64 z0, z1;
108   - int8 negCount = ( - count ) & 63;
109   -
110   - if ( count == 0 ) {
111   - z1 = a1;
112   - z0 = a0;
113   - }
114   - else if ( count < 64 ) {
115   - z1 = ( a0<<negCount ) | ( a1 != 0 );
116   - z0 = a0>>count;
117   - }
118   - else {
119   - if ( count == 64 ) {
120   - z1 = a0 | ( a1 != 0 );
121   - }
122   - else {
123   - z1 = ( ( a0 | a1 ) != 0 );
124   - }
125   - z0 = 0;
126   - }
127   - *z1Ptr = z1;
128   - *z0Ptr = z0;
129   -
130   -}
131   -
132   -/*
133   --------------------------------------------------------------------------------
134   -Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
135   -number of bits given in `count'. Any bits shifted off are lost. The value
136   -of `count' can be arbitrarily large; in particular, if `count' is greater
137   -than 128, the result will be 0. The result is broken into two 64-bit pieces
138   -which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
139   --------------------------------------------------------------------------------
140   -*/
141   -INLINE void
142   - shift128Right(
143   - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
144   -{
145   - bits64 z0, z1;
146   - int8 negCount = ( - count ) & 63;
147   -
148   - if ( count == 0 ) {
149   - z1 = a1;
150   - z0 = a0;
151   - }
152   - else if ( count < 64 ) {
153   - z1 = ( a0<<negCount ) | ( a1>>count );
154   - z0 = a0>>count;
155   - }
156   - else {
157   - z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
158   - z0 = 0;
159   - }
160   - *z1Ptr = z1;
161   - *z0Ptr = z0;
162   -
163   -}
164   -
165   -/*
166   --------------------------------------------------------------------------------
167   -Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
168   -number of bits given in `count'. If any nonzero bits are shifted off, they
169   -are ``jammed'' into the least significant bit of the result by setting the
170   -least significant bit to 1. The value of `count' can be arbitrarily large;
171   -in particular, if `count' is greater than 128, the result will be either 0
172   -or 1, depending on whether the concatenation of `a0' and `a1' is zero or
173   -nonzero. The result is broken into two 64-bit pieces which are stored at
174   -the locations pointed to by `z0Ptr' and `z1Ptr'.
175   --------------------------------------------------------------------------------
176   -*/
177   -INLINE void
178   - shift128RightJamming(
179   - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
180   -{
181   - bits64 z0, z1;
182   - int8 negCount = ( - count ) & 63;
183   -
184   - if ( count == 0 ) {
185   - z1 = a1;
186   - z0 = a0;
187   - }
188   - else if ( count < 64 ) {
189   - z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
190   - z0 = a0>>count;
191   - }
192   - else {
193   - if ( count == 64 ) {
194   - z1 = a0 | ( a1 != 0 );
195   - }
196   - else if ( count < 128 ) {
197   - z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
198   - }
199   - else {
200   - z1 = ( ( a0 | a1 ) != 0 );
201   - }
202   - z0 = 0;
203   - }
204   - *z1Ptr = z1;
205   - *z0Ptr = z0;
206   -
207   -}
208   -
209   -/*
210   --------------------------------------------------------------------------------
211   -Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
212   -by 64 _plus_ the number of bits given in `count'. The shifted result is
213   -at most 128 nonzero bits; these are broken into two 64-bit pieces which are
214   -stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
215   -off form a third 64-bit result as follows: The _last_ bit shifted off is
216   -the most-significant bit of the extra result, and the other 63 bits of the
217   -extra result are all zero if and only if _all_but_the_last_ bits shifted off
218   -were all zero. This extra result is stored in the location pointed to by
219   -`z2Ptr'. The value of `count' can be arbitrarily large.
220   - (This routine makes more sense if `a0', `a1', and `a2' are considered
221   -to form a fixed-point value with binary point between `a1' and `a2'. This
222   -fixed-point value is shifted right by the number of bits given in `count',
223   -and the integer part of the result is returned at the locations pointed to
224   -by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
225   -corrupted as described above, and is returned at the location pointed to by
226   -`z2Ptr'.)
227   --------------------------------------------------------------------------------
228   -*/
229   -INLINE void
230   - shift128ExtraRightJamming(
231   - bits64 a0,
232   - bits64 a1,
233   - bits64 a2,
234   - int16 count,
235   - bits64 *z0Ptr,
236   - bits64 *z1Ptr,
237   - bits64 *z2Ptr
238   - )
239   -{
240   - bits64 z0, z1, z2;
241   - int8 negCount = ( - count ) & 63;
242   -
243   - if ( count == 0 ) {
244   - z2 = a2;
245   - z1 = a1;
246   - z0 = a0;
247   - }
248   - else {
249   - if ( count < 64 ) {
250   - z2 = a1<<negCount;
251   - z1 = ( a0<<negCount ) | ( a1>>count );
252   - z0 = a0>>count;
253   - }
254   - else {
255   - if ( count == 64 ) {
256   - z2 = a1;
257   - z1 = a0;
258   - }
259   - else {
260   - a2 |= a1;
261   - if ( count < 128 ) {
262   - z2 = a0<<negCount;
263   - z1 = a0>>( count & 63 );
264   - }
265   - else {
266   - z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
267   - z1 = 0;
268   - }
269   - }
270   - z0 = 0;
271   - }
272   - z2 |= ( a2 != 0 );
273   - }
274   - *z2Ptr = z2;
275   - *z1Ptr = z1;
276   - *z0Ptr = z0;
277   -
278   -}
279   -
280   -/*
281   --------------------------------------------------------------------------------
282   -Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
283   -number of bits given in `count'. Any bits shifted off are lost. The value
284   -of `count' must be less than 64. The result is broken into two 64-bit
285   -pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
286   --------------------------------------------------------------------------------
287   -*/
288   -INLINE void
289   - shortShift128Left(
290   - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
291   -{
292   -
293   - *z1Ptr = a1<<count;
294   - *z0Ptr =
295   - ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
296   -
297   -}
298   -
299   -/*
300   --------------------------------------------------------------------------------
301   -Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
302   -by the number of bits given in `count'. Any bits shifted off are lost.
303   -The value of `count' must be less than 64. The result is broken into three
304   -64-bit pieces which are stored at the locations pointed to by `z0Ptr',
305   -`z1Ptr', and `z2Ptr'.
306   --------------------------------------------------------------------------------
307   -*/
308   -INLINE void
309   - shortShift192Left(
310   - bits64 a0,
311   - bits64 a1,
312   - bits64 a2,
313   - int16 count,
314   - bits64 *z0Ptr,
315   - bits64 *z1Ptr,
316   - bits64 *z2Ptr
317   - )
318   -{
319   - bits64 z0, z1, z2;
320   - int8 negCount;
321   -
322   - z2 = a2<<count;
323   - z1 = a1<<count;
324   - z0 = a0<<count;
325   - if ( 0 < count ) {
326   - negCount = ( ( - count ) & 63 );
327   - z1 |= a2>>negCount;
328   - z0 |= a1>>negCount;
329   - }
330   - *z2Ptr = z2;
331   - *z1Ptr = z1;
332   - *z0Ptr = z0;
333   -
334   -}
335   -
336   -/*
337   --------------------------------------------------------------------------------
338   -Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
339   -value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
340   -any carry out is lost. The result is broken into two 64-bit pieces which
341   -are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
342   --------------------------------------------------------------------------------
343   -*/
344   -INLINE void
345   - add128(
346   - bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
347   -{
348   - bits64 z1;
349   -
350   - z1 = a1 + b1;
351   - *z1Ptr = z1;
352   - *z0Ptr = a0 + b0 + ( z1 < a1 );
353   -
354   -}
355   -
356   -/*
357   --------------------------------------------------------------------------------
358   -Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
359   -192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
360   -modulo 2^192, so any carry out is lost. The result is broken into three
361   -64-bit pieces which are stored at the locations pointed to by `z0Ptr',
362   -`z1Ptr', and `z2Ptr'.
363   --------------------------------------------------------------------------------
364   -*/
365   -INLINE void
366   - add192(
367   - bits64 a0,
368   - bits64 a1,
369   - bits64 a2,
370   - bits64 b0,
371   - bits64 b1,
372   - bits64 b2,
373   - bits64 *z0Ptr,
374   - bits64 *z1Ptr,
375   - bits64 *z2Ptr
376   - )
377   -{
378   - bits64 z0, z1, z2;
379   - int8 carry0, carry1;
380   -
381   - z2 = a2 + b2;
382   - carry1 = ( z2 < a2 );
383   - z1 = a1 + b1;
384   - carry0 = ( z1 < a1 );
385   - z0 = a0 + b0;
386   - z1 += carry1;
387   - z0 += ( z1 < carry1 );
388   - z0 += carry0;
389   - *z2Ptr = z2;
390   - *z1Ptr = z1;
391   - *z0Ptr = z0;
392   -
393   -}
394   -
395   -/*
396   --------------------------------------------------------------------------------
397   -Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
398   -128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
399   -2^128, so any borrow out (carry out) is lost. The result is broken into two
400   -64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
401   -`z1Ptr'.
402   --------------------------------------------------------------------------------
403   -*/
404   -INLINE void
405   - sub128(
406   - bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
407   -{
408   -
409   - *z1Ptr = a1 - b1;
410   - *z0Ptr = a0 - b0 - ( a1 < b1 );
411   -
412   -}
413   -
414   -/*
415   --------------------------------------------------------------------------------
416   -Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
417   -from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
418   -Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
419   -result is broken into three 64-bit pieces which are stored at the locations
420   -pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
421   --------------------------------------------------------------------------------
422   -*/
423   -INLINE void
424   - sub192(
425   - bits64 a0,
426   - bits64 a1,
427   - bits64 a2,
428   - bits64 b0,
429   - bits64 b1,
430   - bits64 b2,
431   - bits64 *z0Ptr,
432   - bits64 *z1Ptr,
433   - bits64 *z2Ptr
434   - )
435   -{
436   - bits64 z0, z1, z2;
437   - int8 borrow0, borrow1;
438   -
439   - z2 = a2 - b2;
440   - borrow1 = ( a2 < b2 );
441   - z1 = a1 - b1;
442   - borrow0 = ( a1 < b1 );
443   - z0 = a0 - b0;
444   - z0 -= ( z1 < borrow1 );
445   - z1 -= borrow1;
446   - z0 -= borrow0;
447   - *z2Ptr = z2;
448   - *z1Ptr = z1;
449   - *z0Ptr = z0;
450   -
451   -}
452   -
453   -/*
454   --------------------------------------------------------------------------------
455   -Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
456   -into two 64-bit pieces which are stored at the locations pointed to by
457   -`z0Ptr' and `z1Ptr'.
458   --------------------------------------------------------------------------------
459   -*/
460   -INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
461   -{
462   - bits32 aHigh, aLow, bHigh, bLow;
463   - bits64 z0, zMiddleA, zMiddleB, z1;
464   -
465   - aLow = a;
466   - aHigh = a>>32;
467   - bLow = b;
468   - bHigh = b>>32;
469   - z1 = ( (bits64) aLow ) * bLow;
470   - zMiddleA = ( (bits64) aLow ) * bHigh;
471   - zMiddleB = ( (bits64) aHigh ) * bLow;
472   - z0 = ( (bits64) aHigh ) * bHigh;
473   - zMiddleA += zMiddleB;
474   - z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
475   - zMiddleA <<= 32;
476   - z1 += zMiddleA;
477   - z0 += ( z1 < zMiddleA );
478   - *z1Ptr = z1;
479   - *z0Ptr = z0;
480   -
481   -}
482   -
483   -/*
484   --------------------------------------------------------------------------------
485   -Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
486   -obtain a 192-bit product. The product is broken into three 64-bit pieces
487   -which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
488   -`z2Ptr'.
489   --------------------------------------------------------------------------------
490   -*/
491   -INLINE void
492   - mul128By64To192(
493   - bits64 a0,
494   - bits64 a1,
495   - bits64 b,
496   - bits64 *z0Ptr,
497   - bits64 *z1Ptr,
498   - bits64 *z2Ptr
499   - )
500   -{
501   - bits64 z0, z1, z2, more1;
502   -
503   - mul64To128( a1, b, &z1, &z2 );
504   - mul64To128( a0, b, &z0, &more1 );
505   - add128( z0, more1, 0, z1, &z0, &z1 );
506   - *z2Ptr = z2;
507   - *z1Ptr = z1;
508   - *z0Ptr = z0;
509   -
510   -}
511   -
512   -/*
513   --------------------------------------------------------------------------------
514   -Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
515   -128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
516   -product. The product is broken into four 64-bit pieces which are stored at
517   -the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
518   --------------------------------------------------------------------------------
519   -*/
520   -INLINE void
521   - mul128To256(
522   - bits64 a0,
523   - bits64 a1,
524   - bits64 b0,
525   - bits64 b1,
526   - bits64 *z0Ptr,
527   - bits64 *z1Ptr,
528   - bits64 *z2Ptr,
529   - bits64 *z3Ptr
530   - )
531   -{
532   - bits64 z0, z1, z2, z3;
533   - bits64 more1, more2;
534   -
535   - mul64To128( a1, b1, &z2, &z3 );
536   - mul64To128( a1, b0, &z1, &more2 );
537   - add128( z1, more2, 0, z2, &z1, &z2 );
538   - mul64To128( a0, b0, &z0, &more1 );
539   - add128( z0, more1, 0, z1, &z0, &z1 );
540   - mul64To128( a0, b1, &more1, &more2 );
541   - add128( more1, more2, 0, z2, &more1, &z2 );
542   - add128( z0, z1, 0, more1, &z0, &z1 );
543   - *z3Ptr = z3;
544   - *z2Ptr = z2;
545   - *z1Ptr = z1;
546   - *z0Ptr = z0;
547   -
548   -}
549   -
550   -/*
551   --------------------------------------------------------------------------------
552   -Returns an approximation to the 64-bit integer quotient obtained by dividing
553   -`b' into the 128-bit value formed by concatenating `a0' and `a1'. The
554   -divisor `b' must be at least 2^63. If q is the exact quotient truncated
555   -toward zero, the approximation returned lies between q and q + 2 inclusive.
556   -If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
557   -unsigned integer is returned.
558   --------------------------------------------------------------------------------
559   -*/
560   -static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
561   -{
562   - bits64 b0, b1;
563   - bits64 rem0, rem1, term0, term1;
564   - bits64 z;
565   - if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
566   - b0 = b>>32;
567   - z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
568   - mul64To128( b, z, &term0, &term1 );
569   - sub128( a0, a1, term0, term1, &rem0, &rem1 );
570   - while ( ( (sbits64) rem0 ) < 0 ) {
571   - z -= LIT64( 0x100000000 );
572   - b1 = b<<32;
573   - add128( rem0, rem1, b0, b1, &rem0, &rem1 );
574   - }
575   - rem0 = ( rem0<<32 ) | ( rem1>>32 );
576   - z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
577   - return z;
578   -
579   -}
580   -
581   -/*
582   --------------------------------------------------------------------------------
583   -Returns an approximation to the square root of the 32-bit significand given
584   -by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
585   -`aExp' (the least significant bit) is 1, the integer returned approximates
586   -2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
587   -is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
588   -case, the approximation returned lies strictly within +/-2 of the exact
589   -value.
590   --------------------------------------------------------------------------------
591   -*/
592   -static bits32 estimateSqrt32( int16 aExp, bits32 a )
593   -{
594   - static const bits16 sqrtOddAdjustments[] = {
595   - 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
596   - 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
597   - };
598   - static const bits16 sqrtEvenAdjustments[] = {
599   - 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
600   - 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
601   - };
602   - int8 index;
603   - bits32 z;
604   -
605   - index = ( a>>27 ) & 15;
606   - if ( aExp & 1 ) {
607   - z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
608   - z = ( ( a / z )<<14 ) + ( z<<15 );
609   - a >>= 1;
610   - }
611   - else {
612   - z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
613   - z = a / z + z;
614   - z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
615   - if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
616   - }
617   - return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
618   -
619   -}
620   -
621   -/*
622   --------------------------------------------------------------------------------
623   -Returns the number of leading 0 bits before the most-significant 1 bit
624   -of `a'. If `a' is zero, 32 is returned.
625   --------------------------------------------------------------------------------
626   -*/
627   -static int8 countLeadingZeros32( bits32 a )
628   -{
629   - static const int8 countLeadingZerosHigh[] = {
630   - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
631   - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
632   - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
633   - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
634   - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
635   - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
636   - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
637   - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
638   - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
639   - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
640   - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
641   - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
642   - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
643   - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
644   - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645   - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
646   - };
647   - int8 shiftCount;
648   -
649   - shiftCount = 0;
650   - if ( a < 0x10000 ) {
651   - shiftCount += 16;
652   - a <<= 16;
653   - }
654   - if ( a < 0x1000000 ) {
655   - shiftCount += 8;
656   - a <<= 8;
657   - }
658   - shiftCount += countLeadingZerosHigh[ a>>24 ];
659   - return shiftCount;
660   -
661   -}
662   -
663   -/*
664   --------------------------------------------------------------------------------
665   -Returns the number of leading 0 bits before the most-significant 1 bit
666   -of `a'. If `a' is zero, 64 is returned.
667   --------------------------------------------------------------------------------
668   -*/
669   -static int8 countLeadingZeros64( bits64 a )
670   -{
671   - int8 shiftCount;
672   -
673   - shiftCount = 0;
674   - if ( a < ( (bits64) 1 )<<32 ) {
675   - shiftCount += 32;
676   - }
677   - else {
678   - a >>= 32;
679   - }
680   - shiftCount += countLeadingZeros32( a );
681   - return shiftCount;
682   -
683   -}
684   -
685   -/*
686   --------------------------------------------------------------------------------
687   -Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
688   -is equal to the 128-bit value formed by concatenating `b0' and `b1'.
689   -Otherwise, returns 0.
690   --------------------------------------------------------------------------------
691   -*/
692   -INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
693   -{
694   -
695   - return ( a0 == b0 ) && ( a1 == b1 );
696   -
697   -}
698   -
699   -/*
700   --------------------------------------------------------------------------------
701   -Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
702   -than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
703   -Otherwise, returns 0.
704   --------------------------------------------------------------------------------
705   -*/
706   -INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
707   -{
708   -
709   - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
710   -
711   -}
712   -
713   -/*
714   --------------------------------------------------------------------------------
715   -Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
716   -than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
717   -returns 0.
718   --------------------------------------------------------------------------------
719   -*/
720   -INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
721   -{
722   -
723   - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
724   -
725   -}
726   -
727   -/*
728   --------------------------------------------------------------------------------
729   -Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
730   -not equal to the 128-bit value formed by concatenating `b0' and `b1'.
731   -Otherwise, returns 0.
732   --------------------------------------------------------------------------------
733   -*/
734   -INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
735   -{
736   -
737   - return ( a0 != b0 ) || ( a1 != b1 );
738   -
739   -}
740   -
target-arm/nwfpe/softfloat-specialize deleted 100644 → 0
1   -
2   -/*
3   -===============================================================================
4   -
5   -This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
6   -Arithmetic Package, Release 2.
7   -
8   -Written by John R. Hauser. This work was made possible in part by the
9   -International Computer Science Institute, located at Suite 600, 1947 Center
10   -Street, Berkeley, California 94704. Funding was partially provided by the
11   -National Science Foundation under grant MIP-9311980. The original version
12   -of this code was written as part of a project to build a fixed-point vector
13   -processor in collaboration with the University of California at Berkeley,
14   -overseen by Profs. Nelson Morgan and John Wawrzynek. More information
15   -is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
16   -arithmetic/softfloat.html'.
17   -
18   -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
19   -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20   -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
21   -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22   -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23   -
24   -Derivative works are acceptable, even for commercial purposes, so long as
25   -(1) they include prominent notice that the work is derivative, and (2) they
26   -include prominent notice akin to these three paragraphs for those parts of
27   -this code that are retained.
28   -
29   -===============================================================================
30   -*/
31   -
32   -/*
33   --------------------------------------------------------------------------------
34   -Underflow tininess-detection mode, statically initialized to default value.
35   -(The declaration in `softfloat.h' must match the `int8' type here.)
36   --------------------------------------------------------------------------------
37   -*/
38   -int8 float_detect_tininess = float_tininess_after_rounding;
39   -
40   -/*
41   --------------------------------------------------------------------------------
42   -Raises the exceptions specified by `flags'. Floating-point traps can be
43   -defined here if desired. It is currently not possible for such a trap to
44   -substitute a result value. If traps are not implemented, this routine
45   -should be simply `float_exception_flags |= flags;'.
46   -
47   -ScottB: November 4, 1998
48   -Moved this function out of softfloat-specialize into fpmodule.c.
49   -This effectively isolates all the changes required for integrating with the
50   -Linux kernel into fpmodule.c. Porting to NetBSD should only require modifying
51   -fpmodule.c to integrate with the NetBSD kernel (I hope!).
52   --------------------------------------------------------------------------------
53   -*/
54   -void float_raise( int8 flags )
55   -{
56   - float_exception_flags |= flags;
57   -}
58   -
59   -/*
60   --------------------------------------------------------------------------------
61   -Internal canonical NaN format.
62   --------------------------------------------------------------------------------
63   -*/
64   -typedef struct {
65   - flag sign;
66   - bits64 high, low;
67   -} commonNaNT;
68   -
69   -/*
70   --------------------------------------------------------------------------------
71   -The pattern for a default generated single-precision NaN.
72   --------------------------------------------------------------------------------
73   -*/
74   -#define float32_default_nan 0xFFFFFFFF
75   -
76   -/*
77   --------------------------------------------------------------------------------
78   -Returns 1 if the single-precision floating-point value `a' is a NaN;
79   -otherwise returns 0.
80   --------------------------------------------------------------------------------
81   -*/
82   -flag float32_is_nan( float32 a )
83   -{
84   -
85   - return ( 0xFF000000 < (bits32) ( a<<1 ) );
86   -
87   -}
88   -
89   -/*
90   --------------------------------------------------------------------------------
91   -Returns 1 if the single-precision floating-point value `a' is a signaling
92   -NaN; otherwise returns 0.
93   --------------------------------------------------------------------------------
94   -*/
95   -flag float32_is_signaling_nan( float32 a )
96   -{
97   -
98   - return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
99   -
100   -}
101   -
102   -/*
103   --------------------------------------------------------------------------------
104   -Returns the result of converting the single-precision floating-point NaN
105   -`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
106   -exception is raised.
107   --------------------------------------------------------------------------------
108   -*/
109   -static commonNaNT float32ToCommonNaN( float32 a )
110   -{
111   - commonNaNT z;
112   -
113   - if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
114   - z.sign = a>>31;
115   - z.low = 0;
116   - z.high = ( (bits64) a )<<41;
117   - return z;
118   -
119   -}
120   -
121   -/*
122   --------------------------------------------------------------------------------
123   -Returns the result of converting the canonical NaN `a' to the single-
124   -precision floating-point format.
125   --------------------------------------------------------------------------------
126   -*/
127   -static float32 commonNaNToFloat32( commonNaNT a )
128   -{
129   -
130   - return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
131   -
132   -}
133   -
134   -/*
135   --------------------------------------------------------------------------------
136   -Takes two single-precision floating-point values `a' and `b', one of which
137   -is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
138   -signaling NaN, the invalid exception is raised.
139   --------------------------------------------------------------------------------
140   -*/
141   -static float32 propagateFloat32NaN( float32 a, float32 b )
142   -{
143   - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
144   -
145   - aIsNaN = float32_is_nan( a );
146   - aIsSignalingNaN = float32_is_signaling_nan( a );
147   - bIsNaN = float32_is_nan( b );
148   - bIsSignalingNaN = float32_is_signaling_nan( b );
149   - a |= 0x00400000;
150   - b |= 0x00400000;
151   - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
152   - if ( aIsNaN ) {
153   - return ( aIsSignalingNaN & bIsNaN ) ? b : a;
154   - }
155   - else {
156   - return b;
157   - }
158   -
159   -}
160   -
161   -/*
162   --------------------------------------------------------------------------------
163   -The pattern for a default generated double-precision NaN.
164   --------------------------------------------------------------------------------
165   -*/
166   -#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
167   -
168   -/*
169   --------------------------------------------------------------------------------
170   -Returns 1 if the double-precision floating-point value `a' is a NaN;
171   -otherwise returns 0.
172   --------------------------------------------------------------------------------
173   -*/
174   -flag float64_is_nan( float64 a )
175   -{
176   -
177   - return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
178   -
179   -}
180   -
181   -/*
182   --------------------------------------------------------------------------------
183   -Returns 1 if the double-precision floating-point value `a' is a signaling
184   -NaN; otherwise returns 0.
185   --------------------------------------------------------------------------------
186   -*/
187   -flag float64_is_signaling_nan( float64 a )
188   -{
189   -
190   - return
191   - ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
192   - && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
193   -
194   -}
195   -
196   -/*
197   --------------------------------------------------------------------------------
198   -Returns the result of converting the double-precision floating-point NaN
199   -`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
200   -exception is raised.
201   --------------------------------------------------------------------------------
202   -*/
203   -static commonNaNT float64ToCommonNaN( float64 a )
204   -{
205   - commonNaNT z;
206   -
207   - if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
208   - z.sign = a>>63;
209   - z.low = 0;
210   - z.high = a<<12;
211   - return z;
212   -
213   -}
214   -
215   -/*
216   --------------------------------------------------------------------------------
217   -Returns the result of converting the canonical NaN `a' to the double-
218   -precision floating-point format.
219   --------------------------------------------------------------------------------
220   -*/
221   -static float64 commonNaNToFloat64( commonNaNT a )
222   -{
223   -
224   - return
225   - ( ( (bits64) a.sign )<<63 )
226   - | LIT64( 0x7FF8000000000000 )
227   - | ( a.high>>12 );
228   -
229   -}
230   -
231   -/*
232   --------------------------------------------------------------------------------
233   -Takes two double-precision floating-point values `a' and `b', one of which
234   -is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
235   -signaling NaN, the invalid exception is raised.
236   --------------------------------------------------------------------------------
237   -*/
238   -static float64 propagateFloat64NaN( float64 a, float64 b )
239   -{
240   - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
241   -
242   - aIsNaN = float64_is_nan( a );
243   - aIsSignalingNaN = float64_is_signaling_nan( a );
244   - bIsNaN = float64_is_nan( b );
245   - bIsSignalingNaN = float64_is_signaling_nan( b );
246   - a |= LIT64( 0x0008000000000000 );
247   - b |= LIT64( 0x0008000000000000 );
248   - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
249   - if ( aIsNaN ) {
250   - return ( aIsSignalingNaN & bIsNaN ) ? b : a;
251   - }
252   - else {
253   - return b;
254   - }
255   -
256   -}
257   -
258   -#ifdef FLOATX80
259   -
260   -/*
261   --------------------------------------------------------------------------------
262   -The pattern for a default generated extended double-precision NaN. The
263   -`high' and `low' values hold the most- and least-significant bits,
264   -respectively.
265   --------------------------------------------------------------------------------
266   -*/
267   -#define floatx80_default_nan_high 0xFFFF
268   -#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF )
269   -
270   -/*
271   --------------------------------------------------------------------------------
272   -Returns 1 if the extended double-precision floating-point value `a' is a
273   -NaN; otherwise returns 0.
274   --------------------------------------------------------------------------------
275   -*/
276   -flag floatx80_is_nan( floatx80 a )
277   -{
278   -
279   - return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
280   -
281   -}
282   -
283   -/*
284   --------------------------------------------------------------------------------
285   -Returns 1 if the extended double-precision floating-point value `a' is a
286   -signaling NaN; otherwise returns 0.
287   --------------------------------------------------------------------------------
288   -*/
289   -flag floatx80_is_signaling_nan( floatx80 a )
290   -{
291   - //register int lr;
292   - bits64 aLow;
293   -
294   - //__asm__("mov %0, lr" : : "g" (lr));
295   - //fp_printk("floatx80_is_signalling_nan() called from 0x%08x\n",lr);
296   - aLow = a.low & ~ LIT64( 0x4000000000000000 );
297   - return
298   - ( ( a.high & 0x7FFF ) == 0x7FFF )
299   - && (bits64) ( aLow<<1 )
300   - && ( a.low == aLow );
301   -
302   -}
303   -
304   -/*
305   --------------------------------------------------------------------------------
306   -Returns the result of converting the extended double-precision floating-
307   -point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the
308   -invalid exception is raised.
309   --------------------------------------------------------------------------------
310   -*/
311   -static commonNaNT floatx80ToCommonNaN( floatx80 a )
312   -{
313   - commonNaNT z;
314   -
315   - if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
316   - z.sign = a.high>>15;
317   - z.low = 0;
318   - z.high = a.low<<1;
319   - return z;
320   -
321   -}
322   -
323   -/*
324   --------------------------------------------------------------------------------
325   -Returns the result of converting the canonical NaN `a' to the extended
326   -double-precision floating-point format.
327   --------------------------------------------------------------------------------
328   -*/
329   -static floatx80 commonNaNToFloatx80( commonNaNT a )
330   -{
331   - floatx80 z;
332   -
333   - z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
334   - z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
335   - return z;
336   -
337   -}
338   -
339   -/*
340   --------------------------------------------------------------------------------
341   -Takes two extended double-precision floating-point values `a' and `b', one
342   -of which is a NaN, and returns the appropriate NaN result. If either `a' or
343   -`b' is a signaling NaN, the invalid exception is raised.
344   --------------------------------------------------------------------------------
345   -*/
346   -static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
347   -{
348   - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
349   -
350   - aIsNaN = floatx80_is_nan( a );
351   - aIsSignalingNaN = floatx80_is_signaling_nan( a );
352   - bIsNaN = floatx80_is_nan( b );
353   - bIsSignalingNaN = floatx80_is_signaling_nan( b );
354   - a.low |= LIT64( 0xC000000000000000 );
355   - b.low |= LIT64( 0xC000000000000000 );
356   - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
357   - if ( aIsNaN ) {
358   - return ( aIsSignalingNaN & bIsNaN ) ? b : a;
359   - }
360   - else {
361   - return b;
362   - }
363   -
364   -}
365   -
366   -#endif
target-arm/nwfpe/softfloat.c deleted 100644 → 0
1   -/*
2   -===============================================================================
3   -
4   -This C source file is part of the SoftFloat IEC/IEEE Floating-point
5   -Arithmetic Package, Release 2.
6   -
7   -Written by John R. Hauser. This work was made possible in part by the
8   -International Computer Science Institute, located at Suite 600, 1947 Center
9   -Street, Berkeley, California 94704. Funding was partially provided by the
10   -National Science Foundation under grant MIP-9311980. The original version
11   -of this code was written as part of a project to build a fixed-point vector
12   -processor in collaboration with the University of California at Berkeley,
13   -overseen by Profs. Nelson Morgan and John Wawrzynek. More information
14   -is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
15   -arithmetic/softfloat.html'.
16   -
17   -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
18   -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
19   -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
20   -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
21   -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
22   -
23   -Derivative works are acceptable, even for commercial purposes, so long as
24   -(1) they include prominent notice that the work is derivative, and (2) they
25   -include prominent notice akin to these three paragraphs for those parts of
26   -this code that are retained.
27   -
28   -===============================================================================
29   -*/
30   -
31   -#include "fpa11.h"
32   -#include "milieu.h"
33   -#include "softfloat.h"
34   -
35   -/*
36   --------------------------------------------------------------------------------
37   -Floating-point rounding mode, extended double-precision rounding precision,
38   -and exception flags.
39   --------------------------------------------------------------------------------
40   -*/
41   -int8 float_rounding_mode = float_round_nearest_even;
42   -int8 floatx80_rounding_precision = 80;
43   -int8 float_exception_flags;
44   -
45   -/*
46   --------------------------------------------------------------------------------
47   -Primitive arithmetic functions, including multi-word arithmetic, and
48   -division and square root approximations. (Can be specialized to target if
49   -desired.)
50   --------------------------------------------------------------------------------
51   -*/
52   -#include "softfloat-macros"
53   -
54   -/*
55   --------------------------------------------------------------------------------
56   -Functions and definitions to determine: (1) whether tininess for underflow
57   -is detected before or after rounding by default, (2) what (if anything)
58   -happens when exceptions are raised, (3) how signaling NaNs are distinguished
59   -from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
60   -are propagated from function inputs to output. These details are target-
61   -specific.
62   --------------------------------------------------------------------------------
63   -*/
64   -#include "softfloat-specialize"
65   -
66   -/*
67   --------------------------------------------------------------------------------
68   -Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
69   -and 7, and returns the properly rounded 32-bit integer corresponding to the
70   -input. If `zSign' is nonzero, the input is negated before being converted
71   -to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point
72   -input is simply rounded to an integer, with the inexact exception raised if
73   -the input cannot be represented exactly as an integer. If the fixed-point
74   -input is too large, however, the invalid exception is raised and the largest
75   -positive or negative integer is returned.
76   --------------------------------------------------------------------------------
77   -*/
78   -static int32 roundAndPackInt32( flag zSign, bits64 absZ )
79   -{
80   - int8 roundingMode;
81   - flag roundNearestEven;
82   - int8 roundIncrement, roundBits;
83   - int32 z;
84   -
85   - roundingMode = float_rounding_mode;
86   - roundNearestEven = ( roundingMode == float_round_nearest_even );
87   - roundIncrement = 0x40;
88   - if ( ! roundNearestEven ) {
89   - if ( roundingMode == float_round_to_zero ) {
90   - roundIncrement = 0;
91   - }
92   - else {
93   - roundIncrement = 0x7F;
94   - if ( zSign ) {
95   - if ( roundingMode == float_round_up ) roundIncrement = 0;
96   - }
97   - else {
98   - if ( roundingMode == float_round_down ) roundIncrement = 0;
99   - }
100   - }
101   - }
102   - roundBits = absZ & 0x7F;
103   - absZ = ( absZ + roundIncrement )>>7;
104   - absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
105   - z = absZ;
106   - if ( zSign ) z = - z;
107   - if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
108   - float_exception_flags |= float_flag_invalid;
109   - return zSign ? 0x80000000 : 0x7FFFFFFF;
110   - }
111   - if ( roundBits ) float_exception_flags |= float_flag_inexact;
112   - return z;
113   -
114   -}
115   -
116   -/*
117   --------------------------------------------------------------------------------
118   -Returns the fraction bits of the single-precision floating-point value `a'.
119   --------------------------------------------------------------------------------
120   -*/
121   -INLINE bits32 extractFloat32Frac( float32 a )
122   -{
123   -
124   - return a & 0x007FFFFF;
125   -
126   -}
127   -
128   -/*
129   --------------------------------------------------------------------------------
130   -Returns the exponent bits of the single-precision floating-point value `a'.
131   --------------------------------------------------------------------------------
132   -*/
133   -INLINE int16 extractFloat32Exp( float32 a )
134   -{
135   -
136   - return ( a>>23 ) & 0xFF;
137   -
138   -}
139   -
140   -/*
141   --------------------------------------------------------------------------------
142   -Returns the sign bit of the single-precision floating-point value `a'.
143   --------------------------------------------------------------------------------
144   -*/
145   -INLINE flag extractFloat32Sign( float32 a )
146   -{
147   -
148   - return a>>31;
149   -
150   -}
151   -
152   -/*
153   --------------------------------------------------------------------------------
154   -Normalizes the subnormal single-precision floating-point value represented
155   -by the denormalized significand `aSig'. The normalized exponent and
156   -significand are stored at the locations pointed to by `zExpPtr' and
157   -`zSigPtr', respectively.
158   --------------------------------------------------------------------------------
159   -*/
160   -static void
161   - normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
162   -{
163   - int8 shiftCount;
164   -
165   - shiftCount = countLeadingZeros32( aSig ) - 8;
166   - *zSigPtr = aSig<<shiftCount;
167   - *zExpPtr = 1 - shiftCount;
168   -
169   -}
170   -
171   -/*
172   --------------------------------------------------------------------------------
173   -Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
174   -single-precision floating-point value, returning the result. After being
175   -shifted into the proper positions, the three fields are simply added
176   -together to form the result. This means that any integer portion of `zSig'
177   -will be added into the exponent. Since a properly normalized significand
178   -will have an integer portion equal to 1, the `zExp' input should be 1 less
179   -than the desired result exponent whenever `zSig' is a complete, normalized
180   -significand.
181   --------------------------------------------------------------------------------
182   -*/
183   -INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
184   -{
185   - return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
186   -}
187   -
188   -/*
189   --------------------------------------------------------------------------------
190   -Takes an abstract floating-point value having sign `zSign', exponent `zExp',
191   -and significand `zSig', and returns the proper single-precision floating-
192   -point value corresponding to the abstract input. Ordinarily, the abstract
193   -value is simply rounded and packed into the single-precision format, with
194   -the inexact exception raised if the abstract input cannot be represented
195   -exactly. If the abstract value is too large, however, the overflow and
196   -inexact exceptions are raised and an infinity or maximal finite value is
197   -returned. If the abstract value is too small, the input value is rounded to
198   -a subnormal number, and the underflow and inexact exceptions are raised if
199   -the abstract input cannot be represented exactly as a subnormal single-
200   -precision floating-point number.
201   - The input significand `zSig' has its binary point between bits 30
202   -and 29, which is 7 bits to the left of the usual location. This shifted
203   -significand must be normalized or smaller. If `zSig' is not normalized,
204   -`zExp' must be 0; in that case, the result returned is a subnormal number,
205   -and it must not require rounding. In the usual case that `zSig' is
206   -normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
207   -The handling of underflow and overflow follows the IEC/IEEE Standard for
208   -Binary Floating-point Arithmetic.
209   --------------------------------------------------------------------------------
210   -*/
211   -static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
212   -{
213   - int8 roundingMode;
214   - flag roundNearestEven;
215   - int8 roundIncrement, roundBits;
216   - flag isTiny;
217   -
218   - roundingMode = float_rounding_mode;
219   - roundNearestEven = ( roundingMode == float_round_nearest_even );
220   - roundIncrement = 0x40;
221   - if ( ! roundNearestEven ) {
222   - if ( roundingMode == float_round_to_zero ) {
223   - roundIncrement = 0;
224   - }
225   - else {
226   - roundIncrement = 0x7F;
227   - if ( zSign ) {
228   - if ( roundingMode == float_round_up ) roundIncrement = 0;
229   - }
230   - else {
231   - if ( roundingMode == float_round_down ) roundIncrement = 0;
232   - }
233   - }
234   - }
235   - roundBits = zSig & 0x7F;
236   - if ( 0xFD <= (bits16) zExp ) {
237   - if ( ( 0xFD < zExp )
238   - || ( ( zExp == 0xFD )
239   - && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
240   - ) {
241   - float_raise( float_flag_overflow | float_flag_inexact );
242   - return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
243   - }
244   - if ( zExp < 0 ) {
245   - isTiny =
246   - ( float_detect_tininess == float_tininess_before_rounding )
247   - || ( zExp < -1 )
248   - || ( zSig + roundIncrement < 0x80000000 );
249   - shift32RightJamming( zSig, - zExp, &zSig );
250   - zExp = 0;
251   - roundBits = zSig & 0x7F;
252   - if ( isTiny && roundBits ) float_raise( float_flag_underflow );
253   - }
254   - }
255   - if ( roundBits ) float_exception_flags |= float_flag_inexact;
256   - zSig = ( zSig + roundIncrement )>>7;
257   - zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
258   - if ( zSig == 0 ) zExp = 0;
259   - return packFloat32( zSign, zExp, zSig );
260   -
261   -}
262   -
263   -/*
264   --------------------------------------------------------------------------------
265   -Takes an abstract floating-point value having sign `zSign', exponent `zExp',
266   -and significand `zSig', and returns the proper single-precision floating-
267   -point value corresponding to the abstract input. This routine is just like
268   -`roundAndPackFloat32' except that `zSig' does not have to be normalized in
269   -any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
270   -point exponent.
271   --------------------------------------------------------------------------------
272   -*/
273   -static float32
274   - normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
275   -{
276   - int8 shiftCount;
277   -
278   - shiftCount = countLeadingZeros32( zSig ) - 1;
279   - return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
280   -
281   -}
282   -
283   -/*
284   --------------------------------------------------------------------------------
285   -Returns the fraction bits of the double-precision floating-point value `a'.
286   --------------------------------------------------------------------------------
287   -*/
288   -INLINE bits64 extractFloat64Frac( float64 a )
289   -{
290   -
291   - return a & LIT64( 0x000FFFFFFFFFFFFF );
292   -
293   -}
294   -
295   -/*
296   --------------------------------------------------------------------------------
297   -Returns the exponent bits of the double-precision floating-point value `a'.
298   --------------------------------------------------------------------------------
299   -*/
300   -INLINE int16 extractFloat64Exp( float64 a )
301   -{
302   -
303   - return ( a>>52 ) & 0x7FF;
304   -
305   -}
306   -
307   -/*
308   --------------------------------------------------------------------------------
309   -Returns the sign bit of the double-precision floating-point value `a'.
310   --------------------------------------------------------------------------------
311   -*/
312   -INLINE flag extractFloat64Sign( float64 a )
313   -{
314   -
315   - return a>>63;
316   -
317   -}
318   -
319   -/*
320   --------------------------------------------------------------------------------
321   -Normalizes the subnormal double-precision floating-point value represented
322   -by the denormalized significand `aSig'. The normalized exponent and
323   -significand are stored at the locations pointed to by `zExpPtr' and
324   -`zSigPtr', respectively.
325   --------------------------------------------------------------------------------
326   -*/
327   -static void
328   - normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
329   -{
330   - int8 shiftCount;
331   -
332   - shiftCount = countLeadingZeros64( aSig ) - 11;
333   - *zSigPtr = aSig<<shiftCount;
334   - *zExpPtr = 1 - shiftCount;
335   -
336   -}
337   -
338   -/*
339   --------------------------------------------------------------------------------
340   -Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
341   -double-precision floating-point value, returning the result. After being
342   -shifted into the proper positions, the three fields are simply added
343   -together to form the result. This means that any integer portion of `zSig'
344   -will be added into the exponent. Since a properly normalized significand
345   -will have an integer portion equal to 1, the `zExp' input should be 1 less
346   -than the desired result exponent whenever `zSig' is a complete, normalized
347   -significand.
348   --------------------------------------------------------------------------------
349   -*/
350   -INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
351   -{
352   -
353   - return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
354   -
355   -}
356   -
357   -/*
358   --------------------------------------------------------------------------------
359   -Takes an abstract floating-point value having sign `zSign', exponent `zExp',
360   -and significand `zSig', and returns the proper double-precision floating-
361   -point value corresponding to the abstract input. Ordinarily, the abstract
362   -value is simply rounded and packed into the double-precision format, with
363   -the inexact exception raised if the abstract input cannot be represented
364   -exactly. If the abstract value is too large, however, the overflow and
365   -inexact exceptions are raised and an infinity or maximal finite value is
366   -returned. If the abstract value is too small, the input value is rounded to
367   -a subnormal number, and the underflow and inexact exceptions are raised if
368   -the abstract input cannot be represented exactly as a subnormal double-
369   -precision floating-point number.
370   - The input significand `zSig' has its binary point between bits 62
371   -and 61, which is 10 bits to the left of the usual location. This shifted
372   -significand must be normalized or smaller. If `zSig' is not normalized,
373   -`zExp' must be 0; in that case, the result returned is a subnormal number,
374   -and it must not require rounding. In the usual case that `zSig' is
375   -normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
376   -The handling of underflow and overflow follows the IEC/IEEE Standard for
377   -Binary Floating-point Arithmetic.
378   --------------------------------------------------------------------------------
379   -*/
380   -static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
381   -{
382   - int8 roundingMode;
383   - flag roundNearestEven;
384   - int16 roundIncrement, roundBits;
385   - flag isTiny;
386   -
387   - roundingMode = float_rounding_mode;
388   - roundNearestEven = ( roundingMode == float_round_nearest_even );
389   - roundIncrement = 0x200;
390   - if ( ! roundNearestEven ) {
391   - if ( roundingMode == float_round_to_zero ) {
392   - roundIncrement = 0;
393   - }
394   - else {
395   - roundIncrement = 0x3FF;
396   - if ( zSign ) {
397   - if ( roundingMode == float_round_up ) roundIncrement = 0;
398   - }
399   - else {
400   - if ( roundingMode == float_round_down ) roundIncrement = 0;
401   - }
402   - }
403   - }
404   - roundBits = zSig & 0x3FF;
405   - if ( 0x7FD <= (bits16) zExp ) {
406   - if ( ( 0x7FD < zExp )
407   - || ( ( zExp == 0x7FD )
408   - && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
409   - ) {
410   - //register int lr = __builtin_return_address(0);
411   - //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
412   - float_raise( float_flag_overflow | float_flag_inexact );
413   - return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
414   - }
415   - if ( zExp < 0 ) {
416   - isTiny =
417   - ( float_detect_tininess == float_tininess_before_rounding )
418   - || ( zExp < -1 )
419   - || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
420   - shift64RightJamming( zSig, - zExp, &zSig );
421   - zExp = 0;
422   - roundBits = zSig & 0x3FF;
423   - if ( isTiny && roundBits ) float_raise( float_flag_underflow );
424   - }
425   - }
426   - if ( roundBits ) float_exception_flags |= float_flag_inexact;
427   - zSig = ( zSig + roundIncrement )>>10;
428   - zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
429   - if ( zSig == 0 ) zExp = 0;
430   - return packFloat64( zSign, zExp, zSig );
431   -
432   -}
433   -
434   -/*
435   --------------------------------------------------------------------------------
436   -Takes an abstract floating-point value having sign `zSign', exponent `zExp',
437   -and significand `zSig', and returns the proper double-precision floating-
438   -point value corresponding to the abstract input. This routine is just like
439   -`roundAndPackFloat64' except that `zSig' does not have to be normalized in
440   -any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
441   -point exponent.
442   --------------------------------------------------------------------------------
443   -*/
444   -static float64
445   - normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
446   -{
447   - int8 shiftCount;
448   -
449   - shiftCount = countLeadingZeros64( zSig ) - 1;
450   - return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
451   -
452   -}
453   -
454   -#ifdef FLOATX80
455   -
456   -/*
457   --------------------------------------------------------------------------------
458   -Returns the fraction bits of the extended double-precision floating-point
459   -value `a'.
460   --------------------------------------------------------------------------------
461   -*/
462   -INLINE bits64 extractFloatx80Frac( floatx80 a )
463   -{
464   -
465   - return a.low;
466   -
467   -}
468   -
469   -/*
470   --------------------------------------------------------------------------------
471   -Returns the exponent bits of the extended double-precision floating-point
472   -value `a'.
473   --------------------------------------------------------------------------------
474   -*/
475   -INLINE int32 extractFloatx80Exp( floatx80 a )
476   -{
477   -
478   - return a.high & 0x7FFF;
479   -
480   -}
481   -
482   -/*
483   --------------------------------------------------------------------------------
484   -Returns the sign bit of the extended double-precision floating-point value
485   -`a'.
486   --------------------------------------------------------------------------------
487   -*/
488   -INLINE flag extractFloatx80Sign( floatx80 a )
489   -{
490   -
491   - return a.high>>15;
492   -
493   -}
494   -
495   -/*
496   --------------------------------------------------------------------------------
497   -Normalizes the subnormal extended double-precision floating-point value
498   -represented by the denormalized significand `aSig'. The normalized exponent
499   -and significand are stored at the locations pointed to by `zExpPtr' and
500   -`zSigPtr', respectively.
501   --------------------------------------------------------------------------------
502   -*/
503   -static void
504   - normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
505   -{
506   - int8 shiftCount;
507   -
508   - shiftCount = countLeadingZeros64( aSig );
509   - *zSigPtr = aSig<<shiftCount;
510   - *zExpPtr = 1 - shiftCount;
511   -
512   -}
513   -
514   -/*
515   --------------------------------------------------------------------------------
516   -Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
517   -extended double-precision floating-point value, returning the result.
518   --------------------------------------------------------------------------------
519   -*/
520   -INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
521   -{
522   - floatx80 z;
523   -
524   - z.low = zSig;
525   - z.high = ( ( (bits16) zSign )<<15 ) + zExp;
526   - return z;
527   -
528   -}
529   -
530   -/*
531   --------------------------------------------------------------------------------
532   -Takes an abstract floating-point value having sign `zSign', exponent `zExp',
533   -and extended significand formed by the concatenation of `zSig0' and `zSig1',
534   -and returns the proper extended double-precision floating-point value
535   -corresponding to the abstract input. Ordinarily, the abstract value is
536   -rounded and packed into the extended double-precision format, with the
537   -inexact exception raised if the abstract input cannot be represented
538   -exactly. If the abstract value is too large, however, the overflow and
539   -inexact exceptions are raised and an infinity or maximal finite value is
540   -returned. If the abstract value is too small, the input value is rounded to
541   -a subnormal number, and the underflow and inexact exceptions are raised if
542   -the abstract input cannot be represented exactly as a subnormal extended
543   -double-precision floating-point number.
544   - If `roundingPrecision' is 32 or 64, the result is rounded to the same
545   -number of bits as single or double precision, respectively. Otherwise, the
546   -result is rounded to the full precision of the extended double-precision
547   -format.
548   - The input significand must be normalized or smaller. If the input
549   -significand is not normalized, `zExp' must be 0; in that case, the result
550   -returned is a subnormal number, and it must not require rounding. The
551   -handling of underflow and overflow follows the IEC/IEEE Standard for Binary
552   -Floating-point Arithmetic.
553   --------------------------------------------------------------------------------
554   -*/
555   -static floatx80
556   - roundAndPackFloatx80(
557   - int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
558   - )
559   -{
560   - int8 roundingMode;
561   - flag roundNearestEven, increment, isTiny;
562   - int64 roundIncrement, roundMask, roundBits;
563   -
564   - roundingMode = float_rounding_mode;
565   - roundNearestEven = ( roundingMode == float_round_nearest_even );
566   - if ( roundingPrecision == 80 ) goto precision80;
567   - if ( roundingPrecision == 64 ) {
568   - roundIncrement = LIT64( 0x0000000000000400 );
569   - roundMask = LIT64( 0x00000000000007FF );
570   - }
571   - else if ( roundingPrecision == 32 ) {
572   - roundIncrement = LIT64( 0x0000008000000000 );
573   - roundMask = LIT64( 0x000000FFFFFFFFFF );
574   - }
575   - else {
576   - goto precision80;
577   - }
578   - zSig0 |= ( zSig1 != 0 );
579   - if ( ! roundNearestEven ) {
580   - if ( roundingMode == float_round_to_zero ) {
581   - roundIncrement = 0;
582   - }
583   - else {
584   - roundIncrement = roundMask;
585   - if ( zSign ) {
586   - if ( roundingMode == float_round_up ) roundIncrement = 0;
587   - }
588   - else {
589   - if ( roundingMode == float_round_down ) roundIncrement = 0;
590   - }
591   - }
592   - }
593   - roundBits = zSig0 & roundMask;
594   - if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
595   - if ( ( 0x7FFE < zExp )
596   - || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
597   - ) {
598   - goto overflow;
599   - }
600   - if ( zExp <= 0 ) {
601   - isTiny =
602   - ( float_detect_tininess == float_tininess_before_rounding )
603   - || ( zExp < 0 )
604   - || ( zSig0 <= zSig0 + roundIncrement );
605   - shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
606   - zExp = 0;
607   - roundBits = zSig0 & roundMask;
608   - if ( isTiny && roundBits ) float_raise( float_flag_underflow );
609   - if ( roundBits ) float_exception_flags |= float_flag_inexact;
610   - zSig0 += roundIncrement;
611   - if ( (sbits64) zSig0 < 0 ) zExp = 1;
612   - roundIncrement = roundMask + 1;
613   - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
614   - roundMask |= roundIncrement;
615   - }
616   - zSig0 &= ~ roundMask;
617   - return packFloatx80( zSign, zExp, zSig0 );
618   - }
619   - }
620   - if ( roundBits ) float_exception_flags |= float_flag_inexact;
621   - zSig0 += roundIncrement;
622   - if ( zSig0 < roundIncrement ) {
623   - ++zExp;
624   - zSig0 = LIT64( 0x8000000000000000 );
625   - }
626   - roundIncrement = roundMask + 1;
627   - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
628   - roundMask |= roundIncrement;
629   - }
630   - zSig0 &= ~ roundMask;
631   - if ( zSig0 == 0 ) zExp = 0;
632   - return packFloatx80( zSign, zExp, zSig0 );
633   - precision80:
634   - increment = ( (sbits64) zSig1 < 0 );
635   - if ( ! roundNearestEven ) {
636   - if ( roundingMode == float_round_to_zero ) {
637   - increment = 0;
638   - }
639   - else {
640   - if ( zSign ) {
641   - increment = ( roundingMode == float_round_down ) && zSig1;
642   - }
643   - else {
644   - increment = ( roundingMode == float_round_up ) && zSig1;
645   - }
646   - }
647   - }
648   - if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
649   - if ( ( 0x7FFE < zExp )
650   - || ( ( zExp == 0x7FFE )
651   - && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
652   - && increment
653   - )
654   - ) {
655   - roundMask = 0;
656   - overflow:
657   - float_raise( float_flag_overflow | float_flag_inexact );
658   - if ( ( roundingMode == float_round_to_zero )
659   - || ( zSign && ( roundingMode == float_round_up ) )
660   - || ( ! zSign && ( roundingMode == float_round_down ) )
661   - ) {
662   - return packFloatx80( zSign, 0x7FFE, ~ roundMask );
663   - }
664   - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
665   - }
666   - if ( zExp <= 0 ) {
667   - isTiny =
668   - ( float_detect_tininess == float_tininess_before_rounding )
669   - || ( zExp < 0 )
670   - || ! increment
671   - || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
672   - shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
673   - zExp = 0;
674   - if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
675   - if ( zSig1 ) float_exception_flags |= float_flag_inexact;
676   - if ( roundNearestEven ) {
677   - increment = ( (sbits64) zSig1 < 0 );
678   - }
679   - else {
680   - if ( zSign ) {
681   - increment = ( roundingMode == float_round_down ) && zSig1;
682   - }
683   - else {
684   - increment = ( roundingMode == float_round_up ) && zSig1;
685   - }
686   - }
687   - if ( increment ) {
688   - ++zSig0;
689   - zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
690   - if ( (sbits64) zSig0 < 0 ) zExp = 1;
691   - }
692   - return packFloatx80( zSign, zExp, zSig0 );
693   - }
694   - }
695   - if ( zSig1 ) float_exception_flags |= float_flag_inexact;
696   - if ( increment ) {
697   - ++zSig0;
698   - if ( zSig0 == 0 ) {
699   - ++zExp;
700   - zSig0 = LIT64( 0x8000000000000000 );
701   - }
702   - else {
703   - zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
704   - }
705   - }
706   - else {
707   - if ( zSig0 == 0 ) zExp = 0;
708   - }
709   -
710   - return packFloatx80( zSign, zExp, zSig0 );
711   -}
712   -
713   -/*
714   --------------------------------------------------------------------------------
715   -Takes an abstract floating-point value having sign `zSign', exponent
716   -`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
717   -and returns the proper extended double-precision floating-point value
718   -corresponding to the abstract input. This routine is just like
719   -`roundAndPackFloatx80' except that the input significand does not have to be
720   -normalized.
721   --------------------------------------------------------------------------------
722   -*/
723   -static floatx80
724   - normalizeRoundAndPackFloatx80(
725   - int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
726   - )
727   -{
728   - int8 shiftCount;
729   -
730   - if ( zSig0 == 0 ) {
731   - zSig0 = zSig1;
732   - zSig1 = 0;
733   - zExp -= 64;
734   - }
735   - shiftCount = countLeadingZeros64( zSig0 );
736   - shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
737   - zExp -= shiftCount;
738   - return
739   - roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
740   -
741   -}
742   -
743   -#endif
744   -
745   -/*
746   --------------------------------------------------------------------------------
747   -Returns the result of converting the 32-bit two's complement integer `a' to
748   -the single-precision floating-point format. The conversion is performed
749   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
750   --------------------------------------------------------------------------------
751   -*/
752   -float32 int32_to_float32( int32 a )
753   -{
754   - flag zSign;
755   -
756   - if ( a == 0 ) return 0;
757   - if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
758   - zSign = ( a < 0 );
759   - return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
760   -
761   -}
762   -
763   -/*
764   --------------------------------------------------------------------------------
765   -Returns the result of converting the 32-bit two's complement integer `a' to
766   -the double-precision floating-point format. The conversion is performed
767   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
768   --------------------------------------------------------------------------------
769   -*/
770   -float64 int32_to_float64( int32 a )
771   -{
772   - flag aSign;
773   - uint32 absA;
774   - int8 shiftCount;
775   - bits64 zSig;
776   -
777   - if ( a == 0 ) return 0;
778   - aSign = ( a < 0 );
779   - absA = aSign ? - a : a;
780   - shiftCount = countLeadingZeros32( absA ) + 21;
781   - zSig = absA;
782   - return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
783   -
784   -}
785   -
786   -#ifdef FLOATX80
787   -
788   -/*
789   --------------------------------------------------------------------------------
790   -Returns the result of converting the 32-bit two's complement integer `a'
791   -to the extended double-precision floating-point format. The conversion
792   -is performed according to the IEC/IEEE Standard for Binary Floating-point
793   -Arithmetic.
794   --------------------------------------------------------------------------------
795   -*/
796   -floatx80 int32_to_floatx80( int32 a )
797   -{
798   - flag zSign;
799   - uint32 absA;
800   - int8 shiftCount;
801   - bits64 zSig;
802   -
803   - if ( a == 0 ) return packFloatx80( 0, 0, 0 );
804   - zSign = ( a < 0 );
805   - absA = zSign ? - a : a;
806   - shiftCount = countLeadingZeros32( absA ) + 32;
807   - zSig = absA;
808   - return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
809   -
810   -}
811   -
812   -#endif
813   -
814   -/*
815   --------------------------------------------------------------------------------
816   -Returns the result of converting the single-precision floating-point value
817   -`a' to the 32-bit two's complement integer format. The conversion is
818   -performed according to the IEC/IEEE Standard for Binary Floating-point
819   -Arithmetic---which means in particular that the conversion is rounded
820   -according to the current rounding mode. If `a' is a NaN, the largest
821   -positive integer is returned. Otherwise, if the conversion overflows, the
822   -largest integer with the same sign as `a' is returned.
823   --------------------------------------------------------------------------------
824   -*/
825   -int32 float32_to_int32( float32 a )
826   -{
827   - flag aSign;
828   - int16 aExp, shiftCount;
829   - bits32 aSig;
830   - bits64 zSig;
831   -
832   - aSig = extractFloat32Frac( a );
833   - aExp = extractFloat32Exp( a );
834   - aSign = extractFloat32Sign( a );
835   - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
836   - if ( aExp ) aSig |= 0x00800000;
837   - shiftCount = 0xAF - aExp;
838   - zSig = aSig;
839   - zSig <<= 32;
840   - if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
841   - return roundAndPackInt32( aSign, zSig );
842   -
843   -}
844   -
845   -/*
846   --------------------------------------------------------------------------------
847   -Returns the result of converting the single-precision floating-point value
848   -`a' to the 32-bit two's complement integer format. The conversion is
849   -performed according to the IEC/IEEE Standard for Binary Floating-point
850   -Arithmetic, except that the conversion is always rounded toward zero. If
851   -`a' is a NaN, the largest positive integer is returned. Otherwise, if the
852   -conversion overflows, the largest integer with the same sign as `a' is
853   -returned.
854   --------------------------------------------------------------------------------
855   -*/
856   -int32 float32_to_int32_round_to_zero( float32 a )
857   -{
858   - flag aSign;
859   - int16 aExp, shiftCount;
860   - bits32 aSig;
861   - int32 z;
862   -
863   - aSig = extractFloat32Frac( a );
864   - aExp = extractFloat32Exp( a );
865   - aSign = extractFloat32Sign( a );
866   - shiftCount = aExp - 0x9E;
867   - if ( 0 <= shiftCount ) {
868   - if ( a == 0xCF000000 ) return 0x80000000;
869   - float_raise( float_flag_invalid );
870   - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
871   - return 0x80000000;
872   - }
873   - else if ( aExp <= 0x7E ) {
874   - if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
875   - return 0;
876   - }
877   - aSig = ( aSig | 0x00800000 )<<8;
878   - z = aSig>>( - shiftCount );
879   - if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
880   - float_exception_flags |= float_flag_inexact;
881   - }
882   - return aSign ? - z : z;
883   -
884   -}
885   -
886   -/*
887   --------------------------------------------------------------------------------
888   -Returns the result of converting the single-precision floating-point value
889   -`a' to the double-precision floating-point format. The conversion is
890   -performed according to the IEC/IEEE Standard for Binary Floating-point
891   -Arithmetic.
892   --------------------------------------------------------------------------------
893   -*/
894   -float64 float32_to_float64( float32 a )
895   -{
896   - flag aSign;
897   - int16 aExp;
898   - bits32 aSig;
899   -
900   - aSig = extractFloat32Frac( a );
901   - aExp = extractFloat32Exp( a );
902   - aSign = extractFloat32Sign( a );
903   - if ( aExp == 0xFF ) {
904   - if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
905   - return packFloat64( aSign, 0x7FF, 0 );
906   - }
907   - if ( aExp == 0 ) {
908   - if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
909   - normalizeFloat32Subnormal( aSig, &aExp, &aSig );
910   - --aExp;
911   - }
912   - return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
913   -
914   -}
915   -
916   -#ifdef FLOATX80
917   -
918   -/*
919   --------------------------------------------------------------------------------
920   -Returns the result of converting the single-precision floating-point value
921   -`a' to the extended double-precision floating-point format. The conversion
922   -is performed according to the IEC/IEEE Standard for Binary Floating-point
923   -Arithmetic.
924   --------------------------------------------------------------------------------
925   -*/
926   -floatx80 float32_to_floatx80( float32 a )
927   -{
928   - flag aSign;
929   - int16 aExp;
930   - bits32 aSig;
931   -
932   - aSig = extractFloat32Frac( a );
933   - aExp = extractFloat32Exp( a );
934   - aSign = extractFloat32Sign( a );
935   - if ( aExp == 0xFF ) {
936   - if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
937   - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
938   - }
939   - if ( aExp == 0 ) {
940   - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
941   - normalizeFloat32Subnormal( aSig, &aExp, &aSig );
942   - }
943   - aSig |= 0x00800000;
944   - return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
945   -
946   -}
947   -
948   -#endif
949   -
950   -/*
951   --------------------------------------------------------------------------------
952   -Rounds the single-precision floating-point value `a' to an integer, and
953   -returns the result as a single-precision floating-point value. The
954   -operation is performed according to the IEC/IEEE Standard for Binary
955   -Floating-point Arithmetic.
956   --------------------------------------------------------------------------------
957   -*/
958   -float32 float32_round_to_int( float32 a )
959   -{
960   - flag aSign;
961   - int16 aExp;
962   - bits32 lastBitMask, roundBitsMask;
963   - int8 roundingMode;
964   - float32 z;
965   -
966   - aExp = extractFloat32Exp( a );
967   - if ( 0x96 <= aExp ) {
968   - if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
969   - return propagateFloat32NaN( a, a );
970   - }
971   - return a;
972   - }
973   - if ( aExp <= 0x7E ) {
974   - if ( (bits32) ( a<<1 ) == 0 ) return a;
975   - float_exception_flags |= float_flag_inexact;
976   - aSign = extractFloat32Sign( a );
977   - switch ( float_rounding_mode ) {
978   - case float_round_nearest_even:
979   - if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
980   - return packFloat32( aSign, 0x7F, 0 );
981   - }
982   - break;
983   - case float_round_down:
984   - return aSign ? 0xBF800000 : 0;
985   - case float_round_up:
986   - return aSign ? 0x80000000 : 0x3F800000;
987   - }
988   - return packFloat32( aSign, 0, 0 );
989   - }
990   - lastBitMask = 1;
991   - lastBitMask <<= 0x96 - aExp;
992   - roundBitsMask = lastBitMask - 1;
993   - z = a;
994   - roundingMode = float_rounding_mode;
995   - if ( roundingMode == float_round_nearest_even ) {
996   - z += lastBitMask>>1;
997   - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
998   - }
999   - else if ( roundingMode != float_round_to_zero ) {
1000   - if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1001   - z += roundBitsMask;
1002   - }
1003   - }
1004   - z &= ~ roundBitsMask;
1005   - if ( z != a ) float_exception_flags |= float_flag_inexact;
1006   - return z;
1007   -
1008   -}
1009   -
1010   -/*
1011   --------------------------------------------------------------------------------
1012   -Returns the result of adding the absolute values of the single-precision
1013   -floating-point values `a' and `b'. If `zSign' is true, the sum is negated
1014   -before being returned. `zSign' is ignored if the result is a NaN. The
1015   -addition is performed according to the IEC/IEEE Standard for Binary
1016   -Floating-point Arithmetic.
1017   --------------------------------------------------------------------------------
1018   -*/
1019   -static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
1020   -{
1021   - int16 aExp, bExp, zExp;
1022   - bits32 aSig, bSig, zSig;
1023   - int16 expDiff;
1024   -
1025   - aSig = extractFloat32Frac( a );
1026   - aExp = extractFloat32Exp( a );
1027   - bSig = extractFloat32Frac( b );
1028   - bExp = extractFloat32Exp( b );
1029   - expDiff = aExp - bExp;
1030   - aSig <<= 6;
1031   - bSig <<= 6;
1032   - if ( 0 < expDiff ) {
1033   - if ( aExp == 0xFF ) {
1034   - if ( aSig ) return propagateFloat32NaN( a, b );
1035   - return a;
1036   - }
1037   - if ( bExp == 0 ) {
1038   - --expDiff;
1039   - }
1040   - else {
1041   - bSig |= 0x20000000;
1042   - }
1043   - shift32RightJamming( bSig, expDiff, &bSig );
1044   - zExp = aExp;
1045   - }
1046   - else if ( expDiff < 0 ) {
1047   - if ( bExp == 0xFF ) {
1048   - if ( bSig ) return propagateFloat32NaN( a, b );
1049   - return packFloat32( zSign, 0xFF, 0 );
1050   - }
1051   - if ( aExp == 0 ) {
1052   - ++expDiff;
1053   - }
1054   - else {
1055   - aSig |= 0x20000000;
1056   - }
1057   - shift32RightJamming( aSig, - expDiff, &aSig );
1058   - zExp = bExp;
1059   - }
1060   - else {
1061   - if ( aExp == 0xFF ) {
1062   - if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1063   - return a;
1064   - }
1065   - if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1066   - zSig = 0x40000000 + aSig + bSig;
1067   - zExp = aExp;
1068   - goto roundAndPack;
1069   - }
1070   - aSig |= 0x20000000;
1071   - zSig = ( aSig + bSig )<<1;
1072   - --zExp;
1073   - if ( (sbits32) zSig < 0 ) {
1074   - zSig = aSig + bSig;
1075   - ++zExp;
1076   - }
1077   - roundAndPack:
1078   - return roundAndPackFloat32( zSign, zExp, zSig );
1079   -
1080   -}
1081   -
1082   -/*
1083   --------------------------------------------------------------------------------
1084   -Returns the result of subtracting the absolute values of the single-
1085   -precision floating-point values `a' and `b'. If `zSign' is true, the
1086   -difference is negated before being returned. `zSign' is ignored if the
1087   -result is a NaN. The subtraction is performed according to the IEC/IEEE
1088   -Standard for Binary Floating-point Arithmetic.
1089   --------------------------------------------------------------------------------
1090   -*/
1091   -static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1092   -{
1093   - int16 aExp, bExp, zExp;
1094   - bits32 aSig, bSig, zSig;
1095   - int16 expDiff;
1096   -
1097   - aSig = extractFloat32Frac( a );
1098   - aExp = extractFloat32Exp( a );
1099   - bSig = extractFloat32Frac( b );
1100   - bExp = extractFloat32Exp( b );
1101   - expDiff = aExp - bExp;
1102   - aSig <<= 7;
1103   - bSig <<= 7;
1104   - if ( 0 < expDiff ) goto aExpBigger;
1105   - if ( expDiff < 0 ) goto bExpBigger;
1106   - if ( aExp == 0xFF ) {
1107   - if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1108   - float_raise( float_flag_invalid );
1109   - return float32_default_nan;
1110   - }
1111   - if ( aExp == 0 ) {
1112   - aExp = 1;
1113   - bExp = 1;
1114   - }
1115   - if ( bSig < aSig ) goto aBigger;
1116   - if ( aSig < bSig ) goto bBigger;
1117   - return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
1118   - bExpBigger:
1119   - if ( bExp == 0xFF ) {
1120   - if ( bSig ) return propagateFloat32NaN( a, b );
1121   - return packFloat32( zSign ^ 1, 0xFF, 0 );
1122   - }
1123   - if ( aExp == 0 ) {
1124   - ++expDiff;
1125   - }
1126   - else {
1127   - aSig |= 0x40000000;
1128   - }
1129   - shift32RightJamming( aSig, - expDiff, &aSig );
1130   - bSig |= 0x40000000;
1131   - bBigger:
1132   - zSig = bSig - aSig;
1133   - zExp = bExp;
1134   - zSign ^= 1;
1135   - goto normalizeRoundAndPack;
1136   - aExpBigger:
1137   - if ( aExp == 0xFF ) {
1138   - if ( aSig ) return propagateFloat32NaN( a, b );
1139   - return a;
1140   - }
1141   - if ( bExp == 0 ) {
1142   - --expDiff;
1143   - }
1144   - else {
1145   - bSig |= 0x40000000;
1146   - }
1147   - shift32RightJamming( bSig, expDiff, &bSig );
1148   - aSig |= 0x40000000;
1149   - aBigger:
1150   - zSig = aSig - bSig;
1151   - zExp = aExp;
1152   - normalizeRoundAndPack:
1153   - --zExp;
1154   - return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
1155   -
1156   -}
1157   -
1158   -/*
1159   --------------------------------------------------------------------------------
1160   -Returns the result of adding the single-precision floating-point values `a'
1161   -and `b'. The operation is performed according to the IEC/IEEE Standard for
1162   -Binary Floating-point Arithmetic.
1163   --------------------------------------------------------------------------------
1164   -*/
1165   -float32 float32_add( float32 a, float32 b )
1166   -{
1167   - flag aSign, bSign;
1168   -
1169   - aSign = extractFloat32Sign( a );
1170   - bSign = extractFloat32Sign( b );
1171   - if ( aSign == bSign ) {
1172   - return addFloat32Sigs( a, b, aSign );
1173   - }
1174   - else {
1175   - return subFloat32Sigs( a, b, aSign );
1176   - }
1177   -
1178   -}
1179   -
1180   -/*
1181   --------------------------------------------------------------------------------
1182   -Returns the result of subtracting the single-precision floating-point values
1183   -`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1184   -for Binary Floating-point Arithmetic.
1185   --------------------------------------------------------------------------------
1186   -*/
1187   -float32 float32_sub( float32 a, float32 b )
1188   -{
1189   - flag aSign, bSign;
1190   -
1191   - aSign = extractFloat32Sign( a );
1192   - bSign = extractFloat32Sign( b );
1193   - if ( aSign == bSign ) {
1194   - return subFloat32Sigs( a, b, aSign );
1195   - }
1196   - else {
1197   - return addFloat32Sigs( a, b, aSign );
1198   - }
1199   -
1200   -}
1201   -
1202   -/*
1203   --------------------------------------------------------------------------------
1204   -Returns the result of multiplying the single-precision floating-point values
1205   -`a' and `b'. The operation is performed according to the IEC/IEEE Standard
1206   -for Binary Floating-point Arithmetic.
1207   --------------------------------------------------------------------------------
1208   -*/
1209   -float32 float32_mul( float32 a, float32 b )
1210   -{
1211   - flag aSign, bSign, zSign;
1212   - int16 aExp, bExp, zExp;
1213   - bits32 aSig, bSig;
1214   - bits64 zSig64;
1215   - bits32 zSig;
1216   -
1217   - aSig = extractFloat32Frac( a );
1218   - aExp = extractFloat32Exp( a );
1219   - aSign = extractFloat32Sign( a );
1220   - bSig = extractFloat32Frac( b );
1221   - bExp = extractFloat32Exp( b );
1222   - bSign = extractFloat32Sign( b );
1223   - zSign = aSign ^ bSign;
1224   - if ( aExp == 0xFF ) {
1225   - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1226   - return propagateFloat32NaN( a, b );
1227   - }
1228   - if ( ( bExp | bSig ) == 0 ) {
1229   - float_raise( float_flag_invalid );
1230   - return float32_default_nan;
1231   - }
1232   - return packFloat32( zSign, 0xFF, 0 );
1233   - }
1234   - if ( bExp == 0xFF ) {
1235   - if ( bSig ) return propagateFloat32NaN( a, b );
1236   - if ( ( aExp | aSig ) == 0 ) {
1237   - float_raise( float_flag_invalid );
1238   - return float32_default_nan;
1239   - }
1240   - return packFloat32( zSign, 0xFF, 0 );
1241   - }
1242   - if ( aExp == 0 ) {
1243   - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1244   - normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1245   - }
1246   - if ( bExp == 0 ) {
1247   - if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1248   - normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1249   - }
1250   - zExp = aExp + bExp - 0x7F;
1251   - aSig = ( aSig | 0x00800000 )<<7;
1252   - bSig = ( bSig | 0x00800000 )<<8;
1253   - shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1254   - zSig = zSig64;
1255   - if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1256   - zSig <<= 1;
1257   - --zExp;
1258   - }
1259   - return roundAndPackFloat32( zSign, zExp, zSig );
1260   -
1261   -}
1262   -
1263   -/*
1264   --------------------------------------------------------------------------------
1265   -Returns the result of dividing the single-precision floating-point value `a'
1266   -by the corresponding value `b'. The operation is performed according to the
1267   -IEC/IEEE Standard for Binary Floating-point Arithmetic.
1268   --------------------------------------------------------------------------------
1269   -*/
1270   -float32 float32_div( float32 a, float32 b )
1271   -{
1272   - flag aSign, bSign, zSign;
1273   - int16 aExp, bExp, zExp;
1274   - bits32 aSig, bSig, zSig;
1275   -
1276   - aSig = extractFloat32Frac( a );
1277   - aExp = extractFloat32Exp( a );
1278   - aSign = extractFloat32Sign( a );
1279   - bSig = extractFloat32Frac( b );
1280   - bExp = extractFloat32Exp( b );
1281   - bSign = extractFloat32Sign( b );
1282   - zSign = aSign ^ bSign;
1283   - if ( aExp == 0xFF ) {
1284   - if ( aSig ) return propagateFloat32NaN( a, b );
1285   - if ( bExp == 0xFF ) {
1286   - if ( bSig ) return propagateFloat32NaN( a, b );
1287   - float_raise( float_flag_invalid );
1288   - return float32_default_nan;
1289   - }
1290   - return packFloat32( zSign, 0xFF, 0 );
1291   - }
1292   - if ( bExp == 0xFF ) {
1293   - if ( bSig ) return propagateFloat32NaN( a, b );
1294   - return packFloat32( zSign, 0, 0 );
1295   - }
1296   - if ( bExp == 0 ) {
1297   - if ( bSig == 0 ) {
1298   - if ( ( aExp | aSig ) == 0 ) {
1299   - float_raise( float_flag_invalid );
1300   - return float32_default_nan;
1301   - }
1302   - float_raise( float_flag_divbyzero );
1303   - return packFloat32( zSign, 0xFF, 0 );
1304   - }
1305   - normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1306   - }
1307   - if ( aExp == 0 ) {
1308   - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1309   - normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1310   - }
1311   - zExp = aExp - bExp + 0x7D;
1312   - aSig = ( aSig | 0x00800000 )<<7;
1313   - bSig = ( bSig | 0x00800000 )<<8;
1314   - if ( bSig <= ( aSig + aSig ) ) {
1315   - aSig >>= 1;
1316   - ++zExp;
1317   - }
1318   - zSig = ( ( (bits64) aSig )<<32 ) / bSig;
1319   - if ( ( zSig & 0x3F ) == 0 ) {
1320   - zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
1321   - }
1322   - return roundAndPackFloat32( zSign, zExp, zSig );
1323   -
1324   -}
1325   -
1326   -/*
1327   --------------------------------------------------------------------------------
1328   -Returns the remainder of the single-precision floating-point value `a'
1329   -with respect to the corresponding value `b'. The operation is performed
1330   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1331   --------------------------------------------------------------------------------
1332   -*/
1333   -float32 float32_rem( float32 a, float32 b )
1334   -{
1335   - flag aSign, bSign, zSign;
1336   - int16 aExp, bExp, expDiff;
1337   - bits32 aSig, bSig;
1338   - bits32 q;
1339   - bits64 aSig64, bSig64, q64;
1340   - bits32 alternateASig;
1341   - sbits32 sigMean;
1342   -
1343   - aSig = extractFloat32Frac( a );
1344   - aExp = extractFloat32Exp( a );
1345   - aSign = extractFloat32Sign( a );
1346   - bSig = extractFloat32Frac( b );
1347   - bExp = extractFloat32Exp( b );
1348   - bSign = extractFloat32Sign( b );
1349   - if ( aExp == 0xFF ) {
1350   - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1351   - return propagateFloat32NaN( a, b );
1352   - }
1353   - float_raise( float_flag_invalid );
1354   - return float32_default_nan;
1355   - }
1356   - if ( bExp == 0xFF ) {
1357   - if ( bSig ) return propagateFloat32NaN( a, b );
1358   - return a;
1359   - }
1360   - if ( bExp == 0 ) {
1361   - if ( bSig == 0 ) {
1362   - float_raise( float_flag_invalid );
1363   - return float32_default_nan;
1364   - }
1365   - normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1366   - }
1367   - if ( aExp == 0 ) {
1368   - if ( aSig == 0 ) return a;
1369   - normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1370   - }
1371   - expDiff = aExp - bExp;
1372   - aSig |= 0x00800000;
1373   - bSig |= 0x00800000;
1374   - if ( expDiff < 32 ) {
1375   - aSig <<= 8;
1376   - bSig <<= 8;
1377   - if ( expDiff < 0 ) {
1378   - if ( expDiff < -1 ) return a;
1379   - aSig >>= 1;
1380   - }
1381   - q = ( bSig <= aSig );
1382   - if ( q ) aSig -= bSig;
1383   - if ( 0 < expDiff ) {
1384   - q = ( ( (bits64) aSig )<<32 ) / bSig;
1385   - q >>= 32 - expDiff;
1386   - bSig >>= 2;
1387   - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1388   - }
1389   - else {
1390   - aSig >>= 2;
1391   - bSig >>= 2;
1392   - }
1393   - }
1394   - else {
1395   - if ( bSig <= aSig ) aSig -= bSig;
1396   - aSig64 = ( (bits64) aSig )<<40;
1397   - bSig64 = ( (bits64) bSig )<<40;
1398   - expDiff -= 64;
1399   - while ( 0 < expDiff ) {
1400   - q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1401   - q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1402   - aSig64 = - ( ( bSig * q64 )<<38 );
1403   - expDiff -= 62;
1404   - }
1405   - expDiff += 64;
1406   - q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1407   - q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1408   - q = q64>>( 64 - expDiff );
1409   - bSig <<= 6;
1410   - aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1411   - }
1412   - do {
1413   - alternateASig = aSig;
1414   - ++q;
1415   - aSig -= bSig;
1416   - } while ( 0 <= (sbits32) aSig );
1417   - sigMean = aSig + alternateASig;
1418   - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1419   - aSig = alternateASig;
1420   - }
1421   - zSign = ( (sbits32) aSig < 0 );
1422   - if ( zSign ) aSig = - aSig;
1423   - return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1424   -
1425   -}
1426   -
1427   -/*
1428   --------------------------------------------------------------------------------
1429   -Returns the square root of the single-precision floating-point value `a'.
1430   -The operation is performed according to the IEC/IEEE Standard for Binary
1431   -Floating-point Arithmetic.
1432   --------------------------------------------------------------------------------
1433   -*/
1434   -float32 float32_sqrt( float32 a )
1435   -{
1436   - flag aSign;
1437   - int16 aExp, zExp;
1438   - bits32 aSig, zSig;
1439   - bits64 rem, term;
1440   -
1441   - aSig = extractFloat32Frac( a );
1442   - aExp = extractFloat32Exp( a );
1443   - aSign = extractFloat32Sign( a );
1444   - if ( aExp == 0xFF ) {
1445   - if ( aSig ) return propagateFloat32NaN( a, 0 );
1446   - if ( ! aSign ) return a;
1447   - float_raise( float_flag_invalid );
1448   - return float32_default_nan;
1449   - }
1450   - if ( aSign ) {
1451   - if ( ( aExp | aSig ) == 0 ) return a;
1452   - float_raise( float_flag_invalid );
1453   - return float32_default_nan;
1454   - }
1455   - if ( aExp == 0 ) {
1456   - if ( aSig == 0 ) return 0;
1457   - normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1458   - }
1459   - zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1460   - aSig = ( aSig | 0x00800000 )<<8;
1461   - zSig = estimateSqrt32( aExp, aSig ) + 2;
1462   - if ( ( zSig & 0x7F ) <= 5 ) {
1463   - if ( zSig < 2 ) {
1464   - zSig = 0xFFFFFFFF;
1465   - }
1466   - else {
1467   - aSig >>= aExp & 1;
1468   - term = ( (bits64) zSig ) * zSig;
1469   - rem = ( ( (bits64) aSig )<<32 ) - term;
1470   - while ( (sbits64) rem < 0 ) {
1471   - --zSig;
1472   - rem += ( ( (bits64) zSig )<<1 ) | 1;
1473   - }
1474   - zSig |= ( rem != 0 );
1475   - }
1476   - }
1477   - shift32RightJamming( zSig, 1, &zSig );
1478   - return roundAndPackFloat32( 0, zExp, zSig );
1479   -
1480   -}
1481   -
1482   -/*
1483   --------------------------------------------------------------------------------
1484   -Returns 1 if the single-precision floating-point value `a' is equal to the
1485   -corresponding value `b', and 0 otherwise. The comparison is performed
1486   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1487   --------------------------------------------------------------------------------
1488   -*/
1489   -flag float32_eq( float32 a, float32 b )
1490   -{
1491   -
1492   - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1493   - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1494   - ) {
1495   - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1496   - float_raise( float_flag_invalid );
1497   - }
1498   - return 0;
1499   - }
1500   - return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1501   -
1502   -}
1503   -
1504   -/*
1505   --------------------------------------------------------------------------------
1506   -Returns 1 if the single-precision floating-point value `a' is less than or
1507   -equal to the corresponding value `b', and 0 otherwise. The comparison is
1508   -performed according to the IEC/IEEE Standard for Binary Floating-point
1509   -Arithmetic.
1510   --------------------------------------------------------------------------------
1511   -*/
1512   -flag float32_le( float32 a, float32 b )
1513   -{
1514   - flag aSign, bSign;
1515   -
1516   - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1517   - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1518   - ) {
1519   - float_raise( float_flag_invalid );
1520   - return 0;
1521   - }
1522   - aSign = extractFloat32Sign( a );
1523   - bSign = extractFloat32Sign( b );
1524   - if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1525   - return ( a == b ) || ( aSign ^ ( a < b ) );
1526   -
1527   -}
1528   -
1529   -/*
1530   --------------------------------------------------------------------------------
1531   -Returns 1 if the single-precision floating-point value `a' is less than
1532   -the corresponding value `b', and 0 otherwise. The comparison is performed
1533   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1534   --------------------------------------------------------------------------------
1535   -*/
1536   -flag float32_lt( float32 a, float32 b )
1537   -{
1538   - flag aSign, bSign;
1539   -
1540   - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1541   - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1542   - ) {
1543   - float_raise( float_flag_invalid );
1544   - return 0;
1545   - }
1546   - aSign = extractFloat32Sign( a );
1547   - bSign = extractFloat32Sign( b );
1548   - if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1549   - return ( a != b ) && ( aSign ^ ( a < b ) );
1550   -
1551   -}
1552   -
1553   -/*
1554   --------------------------------------------------------------------------------
1555   -Returns 1 if the single-precision floating-point value `a' is equal to the
1556   -corresponding value `b', and 0 otherwise. The invalid exception is raised
1557   -if either operand is a NaN. Otherwise, the comparison is performed
1558   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
1559   --------------------------------------------------------------------------------
1560   -*/
1561   -flag float32_eq_signaling( float32 a, float32 b )
1562   -{
1563   -
1564   - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1565   - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1566   - ) {
1567   - float_raise( float_flag_invalid );
1568   - return 0;
1569   - }
1570   - return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1571   -
1572   -}
1573   -
1574   -/*
1575   --------------------------------------------------------------------------------
1576   -Returns 1 if the single-precision floating-point value `a' is less than or
1577   -equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
1578   -cause an exception. Otherwise, the comparison is performed according to the
1579   -IEC/IEEE Standard for Binary Floating-point Arithmetic.
1580   --------------------------------------------------------------------------------
1581   -*/
1582   -flag float32_le_quiet( float32 a, float32 b )
1583   -{
1584   - flag aSign, bSign;
1585   - //int16 aExp, bExp;
1586   -
1587   - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1588   - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1589   - ) {
1590   - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1591   - float_raise( float_flag_invalid );
1592   - }
1593   - return 0;
1594   - }
1595   - aSign = extractFloat32Sign( a );
1596   - bSign = extractFloat32Sign( b );
1597   - if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1598   - return ( a == b ) || ( aSign ^ ( a < b ) );
1599   -
1600   -}
1601   -
1602   -/*
1603   --------------------------------------------------------------------------------
1604   -Returns 1 if the single-precision floating-point value `a' is less than
1605   -the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
1606   -exception. Otherwise, the comparison is performed according to the IEC/IEEE
1607   -Standard for Binary Floating-point Arithmetic.
1608   --------------------------------------------------------------------------------
1609   -*/
1610   -flag float32_lt_quiet( float32 a, float32 b )
1611   -{
1612   - flag aSign, bSign;
1613   -
1614   - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1615   - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1616   - ) {
1617   - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1618   - float_raise( float_flag_invalid );
1619   - }
1620   - return 0;
1621   - }
1622   - aSign = extractFloat32Sign( a );
1623   - bSign = extractFloat32Sign( b );
1624   - if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1625   - return ( a != b ) && ( aSign ^ ( a < b ) );
1626   -
1627   -}
1628   -
1629   -/*
1630   --------------------------------------------------------------------------------
1631   -Returns the result of converting the double-precision floating-point value
1632   -`a' to the 32-bit two's complement integer format. The conversion is
1633   -performed according to the IEC/IEEE Standard for Binary Floating-point
1634   -Arithmetic---which means in particular that the conversion is rounded
1635   -according to the current rounding mode. If `a' is a NaN, the largest
1636   -positive integer is returned. Otherwise, if the conversion overflows, the
1637   -largest integer with the same sign as `a' is returned.
1638   --------------------------------------------------------------------------------
1639   -*/
1640   -int32 float64_to_int32( float64 a )
1641   -{
1642   - flag aSign;
1643   - int16 aExp, shiftCount;
1644   - bits64 aSig;
1645   -
1646   - aSig = extractFloat64Frac( a );
1647   - aExp = extractFloat64Exp( a );
1648   - aSign = extractFloat64Sign( a );
1649   - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1650   - if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1651   - shiftCount = 0x42C - aExp;
1652   - if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
1653   - return roundAndPackInt32( aSign, aSig );
1654   -
1655   -}
1656   -
1657   -/*
1658   --------------------------------------------------------------------------------
1659   -Returns the result of converting the double-precision floating-point value
1660   -`a' to the 32-bit two's complement integer format. The conversion is
1661   -performed according to the IEC/IEEE Standard for Binary Floating-point
1662   -Arithmetic, except that the conversion is always rounded toward zero. If
1663   -`a' is a NaN, the largest positive integer is returned. Otherwise, if the
1664   -conversion overflows, the largest integer with the same sign as `a' is
1665   -returned.
1666   --------------------------------------------------------------------------------
1667   -*/
1668   -int32 float64_to_int32_round_to_zero( float64 a )
1669   -{
1670   - flag aSign;
1671   - int16 aExp, shiftCount;
1672   - bits64 aSig, savedASig;
1673   - int32 z;
1674   -
1675   - aSig = extractFloat64Frac( a );
1676   - aExp = extractFloat64Exp( a );
1677   - aSign = extractFloat64Sign( a );
1678   - shiftCount = 0x433 - aExp;
1679   - if ( shiftCount < 21 ) {
1680   - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1681   - goto invalid;
1682   - }
1683   - else if ( 52 < shiftCount ) {
1684   - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
1685   - return 0;
1686   - }
1687   - aSig |= LIT64( 0x0010000000000000 );
1688   - savedASig = aSig;
1689   - aSig >>= shiftCount;
1690   - z = aSig;
1691   - if ( aSign ) z = - z;
1692   - if ( ( z < 0 ) ^ aSign ) {
1693   - invalid:
1694   - float_exception_flags |= float_flag_invalid;
1695   - return aSign ? 0x80000000 : 0x7FFFFFFF;
1696   - }
1697   - if ( ( aSig<<shiftCount ) != savedASig ) {
1698   - float_exception_flags |= float_flag_inexact;
1699   - }
1700   - return z;
1701   -
1702   -}
1703   -
1704   -/*
1705   --------------------------------------------------------------------------------
1706   -Returns the result of converting the double-precision floating-point value
1707   -`a' to the 32-bit two's complement unsigned integer format. The conversion
1708   -is performed according to the IEC/IEEE Standard for Binary Floating-point
1709   -Arithmetic---which means in particular that the conversion is rounded
1710   -according to the current rounding mode. If `a' is a NaN, the largest
1711   -positive integer is returned. Otherwise, if the conversion overflows, the
1712   -largest positive integer is returned.
1713   --------------------------------------------------------------------------------
1714   -*/
1715   -int32 float64_to_uint32( float64 a )
1716   -{
1717   - flag aSign;
1718   - int16 aExp, shiftCount;
1719   - bits64 aSig;
1720   -
1721   - aSig = extractFloat64Frac( a );
1722   - aExp = extractFloat64Exp( a );
1723   - aSign = 0; //extractFloat64Sign( a );
1724   - //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1725   - if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1726   - shiftCount = 0x42C - aExp;
1727   - if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
1728   - return roundAndPackInt32( aSign, aSig );
1729   -}
1730   -
1731   -/*
1732   --------------------------------------------------------------------------------
1733   -Returns the result of converting the double-precision floating-point value
1734   -`a' to the 32-bit two's complement integer format. The conversion is
1735   -performed according to the IEC/IEEE Standard for Binary Floating-point
1736   -Arithmetic, except that the conversion is always rounded toward zero. If
1737   -`a' is a NaN, the largest positive integer is returned. Otherwise, if the
1738   -conversion overflows, the largest positive integer is returned.
1739   --------------------------------------------------------------------------------
1740   -*/
1741   -int32 float64_to_uint32_round_to_zero( float64 a )
1742   -{
1743   - flag aSign;
1744   - int16 aExp, shiftCount;
1745   - bits64 aSig, savedASig;
1746   - int32 z;
1747   -
1748   - aSig = extractFloat64Frac( a );
1749   - aExp = extractFloat64Exp( a );
1750   - aSign = extractFloat64Sign( a );
1751   - shiftCount = 0x433 - aExp;
1752   - if ( shiftCount < 21 ) {
1753   - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1754   - goto invalid;
1755   - }
1756   - else if ( 52 < shiftCount ) {
1757   - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
1758   - return 0;
1759   - }
1760   - aSig |= LIT64( 0x0010000000000000 );
1761   - savedASig = aSig;
1762   - aSig >>= shiftCount;
1763   - z = aSig;
1764   - if ( aSign ) z = - z;
1765   - if ( ( z < 0 ) ^ aSign ) {
1766   - invalid:
1767   - float_exception_flags |= float_flag_invalid;
1768   - return aSign ? 0x80000000 : 0x7FFFFFFF;
1769   - }
1770   - if ( ( aSig<<shiftCount ) != savedASig ) {
1771   - float_exception_flags |= float_flag_inexact;
1772   - }
1773   - return z;
1774   -}
1775   -
1776   -/*
1777   --------------------------------------------------------------------------------
1778   -Returns the result of converting the double-precision floating-point value
1779   -`a' to the single-precision floating-point format. The conversion is
1780   -performed according to the IEC/IEEE Standard for Binary Floating-point
1781   -Arithmetic.
1782   --------------------------------------------------------------------------------
1783   -*/
1784   -float32 float64_to_float32( float64 a )
1785   -{
1786   - flag aSign;
1787   - int16 aExp;
1788   - bits64 aSig;
1789   - bits32 zSig;
1790   -
1791   - aSig = extractFloat64Frac( a );
1792   - aExp = extractFloat64Exp( a );
1793   - aSign = extractFloat64Sign( a );
1794   - if ( aExp == 0x7FF ) {
1795   - if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
1796   - return packFloat32( aSign, 0xFF, 0 );
1797   - }
1798   - shift64RightJamming( aSig, 22, &aSig );
1799   - zSig = aSig;
1800   - if ( aExp || zSig ) {
1801   - zSig |= 0x40000000;
1802   - aExp -= 0x381;
1803   - }
1804   - return roundAndPackFloat32( aSign, aExp, zSig );
1805   -
1806   -}
1807   -
1808   -#ifdef FLOATX80
1809   -
1810   -/*
1811   --------------------------------------------------------------------------------
1812   -Returns the result of converting the double-precision floating-point value
1813   -`a' to the extended double-precision floating-point format. The conversion
1814   -is performed according to the IEC/IEEE Standard for Binary Floating-point
1815   -Arithmetic.
1816   --------------------------------------------------------------------------------
1817   -*/
1818   -floatx80 float64_to_floatx80( float64 a )
1819   -{
1820   - flag aSign;
1821   - int16 aExp;
1822   - bits64 aSig;
1823   -
1824   - aSig = extractFloat64Frac( a );
1825   - aExp = extractFloat64Exp( a );
1826   - aSign = extractFloat64Sign( a );
1827   - if ( aExp == 0x7FF ) {
1828   - if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
1829   - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1830   - }
1831   - if ( aExp == 0 ) {
1832   - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1833   - normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1834   - }
1835   - return
1836   - packFloatx80(
1837   - aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
1838   -
1839   -}
1840   -
1841   -#endif
1842   -
1843   -/*
1844   --------------------------------------------------------------------------------
1845   -Rounds the double-precision floating-point value `a' to an integer, and
1846   -returns the result as a double-precision floating-point value. The
1847   -operation is performed according to the IEC/IEEE Standard for Binary
1848   -Floating-point Arithmetic.
1849   --------------------------------------------------------------------------------
1850   -*/
1851   -float64 float64_round_to_int( float64 a )
1852   -{
1853   - flag aSign;
1854   - int16 aExp;
1855   - bits64 lastBitMask, roundBitsMask;
1856   - int8 roundingMode;
1857   - float64 z;
1858   -
1859   - aExp = extractFloat64Exp( a );
1860   - if ( 0x433 <= aExp ) {
1861   - if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
1862   - return propagateFloat64NaN( a, a );
1863   - }
1864   - return a;
1865   - }
1866   - if ( aExp <= 0x3FE ) {
1867   - if ( (bits64) ( a<<1 ) == 0 ) return a;
1868   - float_exception_flags |= float_flag_inexact;
1869   - aSign = extractFloat64Sign( a );
1870   - switch ( float_rounding_mode ) {
1871   - case float_round_nearest_even:
1872   - if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
1873   - return packFloat64( aSign, 0x3FF, 0 );
1874   - }
1875   - break;
1876   - case float_round_down:
1877   - return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
1878   - case float_round_up:
1879   - return
1880   - aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
1881   - }
1882   - return packFloat64( aSign, 0, 0 );
1883   - }
1884   - lastBitMask = 1;
1885   - lastBitMask <<= 0x433 - aExp;
1886   - roundBitsMask = lastBitMask - 1;
1887   - z = a;
1888   - roundingMode = float_rounding_mode;
1889   - if ( roundingMode == float_round_nearest_even ) {
1890   - z += lastBitMask>>1;
1891   - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1892   - }
1893   - else if ( roundingMode != float_round_to_zero ) {
1894   - if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1895   - z += roundBitsMask;
1896   - }
1897   - }
1898   - z &= ~ roundBitsMask;
1899   - if ( z != a ) float_exception_flags |= float_flag_inexact;
1900   - return z;
1901   -
1902   -}
1903   -
1904   -/*
1905   --------------------------------------------------------------------------------
1906   -Returns the result of adding the absolute values of the double-precision
1907   -floating-point values `a' and `b'. If `zSign' is true, the sum is negated
1908   -before being returned. `zSign' is ignored if the result is a NaN. The
1909   -addition is performed according to the IEC/IEEE Standard for Binary
1910   -Floating-point Arithmetic.
1911   --------------------------------------------------------------------------------
1912   -*/
1913   -static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1914   -{
1915   - int16 aExp, bExp, zExp;
1916   - bits64 aSig, bSig, zSig;
1917   - int16 expDiff;
1918   -
1919   - aSig = extractFloat64Frac( a );
1920   - aExp = extractFloat64Exp( a );
1921   - bSig = extractFloat64Frac( b );
1922   - bExp = extractFloat64Exp( b );
1923   - expDiff = aExp - bExp;
1924   - aSig <<= 9;
1925   - bSig <<= 9;
1926   - if ( 0 < expDiff ) {
1927   - if ( aExp == 0x7FF ) {
1928   - if ( aSig ) return propagateFloat64NaN( a, b );
1929   - return a;
1930   - }
1931   - if ( bExp == 0 ) {
1932   - --expDiff;
1933   - }
1934   - else {
1935   - bSig |= LIT64( 0x2000000000000000 );
1936   - }
1937   - shift64RightJamming( bSig, expDiff, &bSig );
1938   - zExp = aExp;
1939   - }
1940   - else if ( expDiff < 0 ) {
1941   - if ( bExp == 0x7FF ) {
1942   - if ( bSig ) return propagateFloat64NaN( a, b );
1943   - return packFloat64( zSign, 0x7FF, 0 );
1944   - }
1945   - if ( aExp == 0 ) {
1946   - ++expDiff;
1947   - }
1948   - else {
1949   - aSig |= LIT64( 0x2000000000000000 );
1950   - }
1951   - shift64RightJamming( aSig, - expDiff, &aSig );
1952   - zExp = bExp;
1953   - }
1954   - else {
1955   - if ( aExp == 0x7FF ) {
1956   - if ( aSig | bSig ) return propagateFloat64NaN( a, b );
1957   - return a;
1958   - }
1959   - if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
1960   - zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
1961   - zExp = aExp;
1962   - goto roundAndPack;
1963   - }
1964   - aSig |= LIT64( 0x2000000000000000 );
1965   - zSig = ( aSig + bSig )<<1;
1966   - --zExp;
1967   - if ( (sbits64) zSig < 0 ) {
1968   - zSig = aSig + bSig;
1969   - ++zExp;
1970   - }
1971   - roundAndPack:
1972   - return roundAndPackFloat64( zSign, zExp, zSig );
1973   -
1974   -}
1975   -
1976   -/*
1977   --------------------------------------------------------------------------------
1978   -Returns the result of subtracting the absolute values of the double-
1979   -precision floating-point values `a' and `b'. If `zSign' is true, the
1980   -difference is negated before being returned. `zSign' is ignored if the
1981   -result is a NaN. The subtraction is performed according to the IEC/IEEE
1982   -Standard for Binary Floating-point Arithmetic.
1983   --------------------------------------------------------------------------------
1984   -*/
1985   -static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1986   -{
1987   - int16 aExp, bExp, zExp;
1988   - bits64 aSig, bSig, zSig;
1989   - int16 expDiff;
1990   -
1991   - aSig = extractFloat64Frac( a );
1992   - aExp = extractFloat64Exp( a );
1993   - bSig = extractFloat64Frac( b );
1994   - bExp = extractFloat64Exp( b );
1995   - expDiff = aExp - bExp;
1996   - aSig <<= 10;
1997   - bSig <<= 10;
1998   - if ( 0 < expDiff ) goto aExpBigger;
1999   - if ( expDiff < 0 ) goto bExpBigger;
2000   - if ( aExp == 0x7FF ) {
2001   - if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2002   - float_raise( float_flag_invalid );
2003   - return float64_default_nan;
2004   - }
2005   - if ( aExp == 0 ) {
2006   - aExp = 1;
2007   - bExp = 1;
2008   - }
2009   - if ( bSig < aSig ) goto aBigger;
2010   - if ( aSig < bSig ) goto bBigger;
2011   - return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
2012   - bExpBigger:
2013   - if ( bExp == 0x7FF ) {
2014   - if ( bSig ) return propagateFloat64NaN( a, b );
2015   - return packFloat64( zSign ^ 1, 0x7FF, 0 );
2016   - }
2017   - if ( aExp == 0 ) {
2018   - ++expDiff;
2019   - }
2020   - else {
2021   - aSig |= LIT64( 0x4000000000000000 );
2022   - }
2023   - shift64RightJamming( aSig, - expDiff, &aSig );
2024   - bSig |= LIT64( 0x4000000000000000 );
2025   - bBigger:
2026   - zSig = bSig - aSig;
2027   - zExp = bExp;
2028   - zSign ^= 1;
2029   - goto normalizeRoundAndPack;
2030   - aExpBigger:
2031   - if ( aExp == 0x7FF ) {
2032   - if ( aSig ) return propagateFloat64NaN( a, b );
2033   - return a;
2034   - }
2035   - if ( bExp == 0 ) {
2036   - --expDiff;
2037   - }
2038   - else {
2039   - bSig |= LIT64( 0x4000000000000000 );
2040   - }
2041   - shift64RightJamming( bSig, expDiff, &bSig );
2042   - aSig |= LIT64( 0x4000000000000000 );
2043   - aBigger:
2044   - zSig = aSig - bSig;
2045   - zExp = aExp;
2046   - normalizeRoundAndPack:
2047   - --zExp;
2048   - return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
2049   -
2050   -}
2051   -
2052   -/*
2053   --------------------------------------------------------------------------------
2054   -Returns the result of adding the double-precision floating-point values `a'
2055   -and `b'. The operation is performed according to the IEC/IEEE Standard for
2056   -Binary Floating-point Arithmetic.
2057   --------------------------------------------------------------------------------
2058   -*/
2059   -float64 float64_add( float64 a, float64 b )
2060   -{
2061   - flag aSign, bSign;
2062   -
2063   - aSign = extractFloat64Sign( a );
2064   - bSign = extractFloat64Sign( b );
2065   - if ( aSign == bSign ) {
2066   - return addFloat64Sigs( a, b, aSign );
2067   - }
2068   - else {
2069   - return subFloat64Sigs( a, b, aSign );
2070   - }
2071   -
2072   -}
2073   -
2074   -/*
2075   --------------------------------------------------------------------------------
2076   -Returns the result of subtracting the double-precision floating-point values
2077   -`a' and `b'. The operation is performed according to the IEC/IEEE Standard
2078   -for Binary Floating-point Arithmetic.
2079   --------------------------------------------------------------------------------
2080   -*/
2081   -float64 float64_sub( float64 a, float64 b )
2082   -{
2083   - flag aSign, bSign;
2084   -
2085   - aSign = extractFloat64Sign( a );
2086   - bSign = extractFloat64Sign( b );
2087   - if ( aSign == bSign ) {
2088   - return subFloat64Sigs( a, b, aSign );
2089   - }
2090   - else {
2091   - return addFloat64Sigs( a, b, aSign );
2092   - }
2093   -
2094   -}
2095   -
2096   -/*
2097   --------------------------------------------------------------------------------
2098   -Returns the result of multiplying the double-precision floating-point values
2099   -`a' and `b'. The operation is performed according to the IEC/IEEE Standard
2100   -for Binary Floating-point Arithmetic.
2101   --------------------------------------------------------------------------------
2102   -*/
2103   -float64 float64_mul( float64 a, float64 b )
2104   -{
2105   - flag aSign, bSign, zSign;
2106   - int16 aExp, bExp, zExp;
2107   - bits64 aSig, bSig, zSig0, zSig1;
2108   -
2109   - aSig = extractFloat64Frac( a );
2110   - aExp = extractFloat64Exp( a );
2111   - aSign = extractFloat64Sign( a );
2112   - bSig = extractFloat64Frac( b );
2113   - bExp = extractFloat64Exp( b );
2114   - bSign = extractFloat64Sign( b );
2115   - zSign = aSign ^ bSign;
2116   - if ( aExp == 0x7FF ) {
2117   - if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2118   - return propagateFloat64NaN( a, b );
2119   - }
2120   - if ( ( bExp | bSig ) == 0 ) {
2121   - float_raise( float_flag_invalid );
2122   - return float64_default_nan;
2123   - }
2124   - return packFloat64( zSign, 0x7FF, 0 );
2125   - }
2126   - if ( bExp == 0x7FF ) {
2127   - if ( bSig ) return propagateFloat64NaN( a, b );
2128   - if ( ( aExp | aSig ) == 0 ) {
2129   - float_raise( float_flag_invalid );
2130   - return float64_default_nan;
2131   - }
2132   - return packFloat64( zSign, 0x7FF, 0 );
2133   - }
2134   - if ( aExp == 0 ) {
2135   - if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2136   - normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2137   - }
2138   - if ( bExp == 0 ) {
2139   - if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2140   - normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2141   - }
2142   - zExp = aExp + bExp - 0x3FF;
2143   - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2144   - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2145   - mul64To128( aSig, bSig, &zSig0, &zSig1 );
2146   - zSig0 |= ( zSig1 != 0 );
2147   - if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2148   - zSig0 <<= 1;
2149   - --zExp;
2150   - }
2151   - return roundAndPackFloat64( zSign, zExp, zSig0 );
2152   -
2153   -}
2154   -
2155   -/*
2156   --------------------------------------------------------------------------------
2157   -Returns the result of dividing the double-precision floating-point value `a'
2158   -by the corresponding value `b'. The operation is performed according to
2159   -the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2160   --------------------------------------------------------------------------------
2161   -*/
2162   -float64 float64_div( float64 a, float64 b )
2163   -{
2164   - flag aSign, bSign, zSign;
2165   - int16 aExp, bExp, zExp;
2166   - bits64 aSig, bSig, zSig;
2167   - bits64 rem0, rem1;
2168   - bits64 term0, term1;
2169   -
2170   - aSig = extractFloat64Frac( a );
2171   - aExp = extractFloat64Exp( a );
2172   - aSign = extractFloat64Sign( a );
2173   - bSig = extractFloat64Frac( b );
2174   - bExp = extractFloat64Exp( b );
2175   - bSign = extractFloat64Sign( b );
2176   - zSign = aSign ^ bSign;
2177   - if ( aExp == 0x7FF ) {
2178   - if ( aSig ) return propagateFloat64NaN( a, b );
2179   - if ( bExp == 0x7FF ) {
2180   - if ( bSig ) return propagateFloat64NaN( a, b );
2181   - float_raise( float_flag_invalid );
2182   - return float64_default_nan;
2183   - }
2184   - return packFloat64( zSign, 0x7FF, 0 );
2185   - }
2186   - if ( bExp == 0x7FF ) {
2187   - if ( bSig ) return propagateFloat64NaN( a, b );
2188   - return packFloat64( zSign, 0, 0 );
2189   - }
2190   - if ( bExp == 0 ) {
2191   - if ( bSig == 0 ) {
2192   - if ( ( aExp | aSig ) == 0 ) {
2193   - float_raise( float_flag_invalid );
2194   - return float64_default_nan;
2195   - }
2196   - float_raise( float_flag_divbyzero );
2197   - return packFloat64( zSign, 0x7FF, 0 );
2198   - }
2199   - normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2200   - }
2201   - if ( aExp == 0 ) {
2202   - if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2203   - normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2204   - }
2205   - zExp = aExp - bExp + 0x3FD;
2206   - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2207   - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2208   - if ( bSig <= ( aSig + aSig ) ) {
2209   - aSig >>= 1;
2210   - ++zExp;
2211   - }
2212   - zSig = estimateDiv128To64( aSig, 0, bSig );
2213   - if ( ( zSig & 0x1FF ) <= 2 ) {
2214   - mul64To128( bSig, zSig, &term0, &term1 );
2215   - sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2216   - while ( (sbits64) rem0 < 0 ) {
2217   - --zSig;
2218   - add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2219   - }
2220   - zSig |= ( rem1 != 0 );
2221   - }
2222   - return roundAndPackFloat64( zSign, zExp, zSig );
2223   -
2224   -}
2225   -
2226   -/*
2227   --------------------------------------------------------------------------------
2228   -Returns the remainder of the double-precision floating-point value `a'
2229   -with respect to the corresponding value `b'. The operation is performed
2230   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2231   --------------------------------------------------------------------------------
2232   -*/
2233   -float64 float64_rem( float64 a, float64 b )
2234   -{
2235   - flag aSign, bSign, zSign;
2236   - int16 aExp, bExp, expDiff;
2237   - bits64 aSig, bSig;
2238   - bits64 q, alternateASig;
2239   - sbits64 sigMean;
2240   -
2241   - aSig = extractFloat64Frac( a );
2242   - aExp = extractFloat64Exp( a );
2243   - aSign = extractFloat64Sign( a );
2244   - bSig = extractFloat64Frac( b );
2245   - bExp = extractFloat64Exp( b );
2246   - bSign = extractFloat64Sign( b );
2247   - if ( aExp == 0x7FF ) {
2248   - if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2249   - return propagateFloat64NaN( a, b );
2250   - }
2251   - float_raise( float_flag_invalid );
2252   - return float64_default_nan;
2253   - }
2254   - if ( bExp == 0x7FF ) {
2255   - if ( bSig ) return propagateFloat64NaN( a, b );
2256   - return a;
2257   - }
2258   - if ( bExp == 0 ) {
2259   - if ( bSig == 0 ) {
2260   - float_raise( float_flag_invalid );
2261   - return float64_default_nan;
2262   - }
2263   - normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2264   - }
2265   - if ( aExp == 0 ) {
2266   - if ( aSig == 0 ) return a;
2267   - normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2268   - }
2269   - expDiff = aExp - bExp;
2270   - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
2271   - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2272   - if ( expDiff < 0 ) {
2273   - if ( expDiff < -1 ) return a;
2274   - aSig >>= 1;
2275   - }
2276   - q = ( bSig <= aSig );
2277   - if ( q ) aSig -= bSig;
2278   - expDiff -= 64;
2279   - while ( 0 < expDiff ) {
2280   - q = estimateDiv128To64( aSig, 0, bSig );
2281   - q = ( 2 < q ) ? q - 2 : 0;
2282   - aSig = - ( ( bSig>>2 ) * q );
2283   - expDiff -= 62;
2284   - }
2285   - expDiff += 64;
2286   - if ( 0 < expDiff ) {
2287   - q = estimateDiv128To64( aSig, 0, bSig );
2288   - q = ( 2 < q ) ? q - 2 : 0;
2289   - q >>= 64 - expDiff;
2290   - bSig >>= 2;
2291   - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2292   - }
2293   - else {
2294   - aSig >>= 2;
2295   - bSig >>= 2;
2296   - }
2297   - do {
2298   - alternateASig = aSig;
2299   - ++q;
2300   - aSig -= bSig;
2301   - } while ( 0 <= (sbits64) aSig );
2302   - sigMean = aSig + alternateASig;
2303   - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2304   - aSig = alternateASig;
2305   - }
2306   - zSign = ( (sbits64) aSig < 0 );
2307   - if ( zSign ) aSig = - aSig;
2308   - return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
2309   -
2310   -}
2311   -
2312   -/*
2313   --------------------------------------------------------------------------------
2314   -Returns the square root of the double-precision floating-point value `a'.
2315   -The operation is performed according to the IEC/IEEE Standard for Binary
2316   -Floating-point Arithmetic.
2317   --------------------------------------------------------------------------------
2318   -*/
2319   -float64 float64_sqrt( float64 a )
2320   -{
2321   - flag aSign;
2322   - int16 aExp, zExp;
2323   - bits64 aSig, zSig;
2324   - bits64 rem0, rem1, term0, term1; //, shiftedRem;
2325   - //float64 z;
2326   -
2327   - aSig = extractFloat64Frac( a );
2328   - aExp = extractFloat64Exp( a );
2329   - aSign = extractFloat64Sign( a );
2330   - if ( aExp == 0x7FF ) {
2331   - if ( aSig ) return propagateFloat64NaN( a, a );
2332   - if ( ! aSign ) return a;
2333   - float_raise( float_flag_invalid );
2334   - return float64_default_nan;
2335   - }
2336   - if ( aSign ) {
2337   - if ( ( aExp | aSig ) == 0 ) return a;
2338   - float_raise( float_flag_invalid );
2339   - return float64_default_nan;
2340   - }
2341   - if ( aExp == 0 ) {
2342   - if ( aSig == 0 ) return 0;
2343   - normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2344   - }
2345   - zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2346   - aSig |= LIT64( 0x0010000000000000 );
2347   - zSig = estimateSqrt32( aExp, aSig>>21 );
2348   - zSig <<= 31;
2349   - aSig <<= 9 - ( aExp & 1 );
2350   - zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
2351   - if ( ( zSig & 0x3FF ) <= 5 ) {
2352   - if ( zSig < 2 ) {
2353   - zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
2354   - }
2355   - else {
2356   - aSig <<= 2;
2357   - mul64To128( zSig, zSig, &term0, &term1 );
2358   - sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2359   - while ( (sbits64) rem0 < 0 ) {
2360   - --zSig;
2361   - shortShift128Left( 0, zSig, 1, &term0, &term1 );
2362   - term1 |= 1;
2363   - add128( rem0, rem1, term0, term1, &rem0, &rem1 );
2364   - }
2365   - zSig |= ( ( rem0 | rem1 ) != 0 );
2366   - }
2367   - }
2368   - shift64RightJamming( zSig, 1, &zSig );
2369   - return roundAndPackFloat64( 0, zExp, zSig );
2370   -
2371   -}
2372   -
2373   -/*
2374   --------------------------------------------------------------------------------
2375   -Returns 1 if the double-precision floating-point value `a' is equal to the
2376   -corresponding value `b', and 0 otherwise. The comparison is performed
2377   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2378   --------------------------------------------------------------------------------
2379   -*/
2380   -flag float64_eq( float64 a, float64 b )
2381   -{
2382   -
2383   - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2384   - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2385   - ) {
2386   - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2387   - float_raise( float_flag_invalid );
2388   - }
2389   - return 0;
2390   - }
2391   - return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2392   -
2393   -}
2394   -
2395   -/*
2396   --------------------------------------------------------------------------------
2397   -Returns 1 if the double-precision floating-point value `a' is less than or
2398   -equal to the corresponding value `b', and 0 otherwise. The comparison is
2399   -performed according to the IEC/IEEE Standard for Binary Floating-point
2400   -Arithmetic.
2401   --------------------------------------------------------------------------------
2402   -*/
2403   -flag float64_le( float64 a, float64 b )
2404   -{
2405   - flag aSign, bSign;
2406   -
2407   - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2408   - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2409   - ) {
2410   - float_raise( float_flag_invalid );
2411   - return 0;
2412   - }
2413   - aSign = extractFloat64Sign( a );
2414   - bSign = extractFloat64Sign( b );
2415   - if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2416   - return ( a == b ) || ( aSign ^ ( a < b ) );
2417   -
2418   -}
2419   -
2420   -/*
2421   --------------------------------------------------------------------------------
2422   -Returns 1 if the double-precision floating-point value `a' is less than
2423   -the corresponding value `b', and 0 otherwise. The comparison is performed
2424   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2425   --------------------------------------------------------------------------------
2426   -*/
2427   -flag float64_lt( float64 a, float64 b )
2428   -{
2429   - flag aSign, bSign;
2430   -
2431   - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2432   - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2433   - ) {
2434   - float_raise( float_flag_invalid );
2435   - return 0;
2436   - }
2437   - aSign = extractFloat64Sign( a );
2438   - bSign = extractFloat64Sign( b );
2439   - if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2440   - return ( a != b ) && ( aSign ^ ( a < b ) );
2441   -
2442   -}
2443   -
2444   -/*
2445   --------------------------------------------------------------------------------
2446   -Returns 1 if the double-precision floating-point value `a' is equal to the
2447   -corresponding value `b', and 0 otherwise. The invalid exception is raised
2448   -if either operand is a NaN. Otherwise, the comparison is performed
2449   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2450   --------------------------------------------------------------------------------
2451   -*/
2452   -flag float64_eq_signaling( float64 a, float64 b )
2453   -{
2454   -
2455   - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2456   - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2457   - ) {
2458   - float_raise( float_flag_invalid );
2459   - return 0;
2460   - }
2461   - return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2462   -
2463   -}
2464   -
2465   -/*
2466   --------------------------------------------------------------------------------
2467   -Returns 1 if the double-precision floating-point value `a' is less than or
2468   -equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2469   -cause an exception. Otherwise, the comparison is performed according to the
2470   -IEC/IEEE Standard for Binary Floating-point Arithmetic.
2471   --------------------------------------------------------------------------------
2472   -*/
2473   -flag float64_le_quiet( float64 a, float64 b )
2474   -{
2475   - flag aSign, bSign;
2476   - //int16 aExp, bExp;
2477   -
2478   - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2479   - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2480   - ) {
2481   - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2482   - float_raise( float_flag_invalid );
2483   - }
2484   - return 0;
2485   - }
2486   - aSign = extractFloat64Sign( a );
2487   - bSign = extractFloat64Sign( b );
2488   - if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2489   - return ( a == b ) || ( aSign ^ ( a < b ) );
2490   -
2491   -}
2492   -
2493   -/*
2494   --------------------------------------------------------------------------------
2495   -Returns 1 if the double-precision floating-point value `a' is less than
2496   -the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2497   -exception. Otherwise, the comparison is performed according to the IEC/IEEE
2498   -Standard for Binary Floating-point Arithmetic.
2499   --------------------------------------------------------------------------------
2500   -*/
2501   -flag float64_lt_quiet( float64 a, float64 b )
2502   -{
2503   - flag aSign, bSign;
2504   -
2505   - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2506   - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2507   - ) {
2508   - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2509   - float_raise( float_flag_invalid );
2510   - }
2511   - return 0;
2512   - }
2513   - aSign = extractFloat64Sign( a );
2514   - bSign = extractFloat64Sign( b );
2515   - if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2516   - return ( a != b ) && ( aSign ^ ( a < b ) );
2517   -
2518   -}
2519   -
2520   -#ifdef FLOATX80
2521   -
2522   -/*
2523   --------------------------------------------------------------------------------
2524   -Returns the result of converting the extended double-precision floating-
2525   -point value `a' to the 32-bit two's complement integer format. The
2526   -conversion is performed according to the IEC/IEEE Standard for Binary
2527   -Floating-point Arithmetic---which means in particular that the conversion
2528   -is rounded according to the current rounding mode. If `a' is a NaN, the
2529   -largest positive integer is returned. Otherwise, if the conversion
2530   -overflows, the largest integer with the same sign as `a' is returned.
2531   --------------------------------------------------------------------------------
2532   -*/
2533   -int32 floatx80_to_int32( floatx80 a )
2534   -{
2535   - flag aSign;
2536   - int32 aExp, shiftCount;
2537   - bits64 aSig;
2538   -
2539   - aSig = extractFloatx80Frac( a );
2540   - aExp = extractFloatx80Exp( a );
2541   - aSign = extractFloatx80Sign( a );
2542   - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
2543   - shiftCount = 0x4037 - aExp;
2544   - if ( shiftCount <= 0 ) shiftCount = 1;
2545   - shift64RightJamming( aSig, shiftCount, &aSig );
2546   - return roundAndPackInt32( aSign, aSig );
2547   -
2548   -}
2549   -
2550   -/*
2551   --------------------------------------------------------------------------------
2552   -Returns the result of converting the extended double-precision floating-
2553   -point value `a' to the 32-bit two's complement integer format. The
2554   -conversion is performed according to the IEC/IEEE Standard for Binary
2555   -Floating-point Arithmetic, except that the conversion is always rounded
2556   -toward zero. If `a' is a NaN, the largest positive integer is returned.
2557   -Otherwise, if the conversion overflows, the largest integer with the same
2558   -sign as `a' is returned.
2559   --------------------------------------------------------------------------------
2560   -*/
2561   -int32 floatx80_to_int32_round_to_zero( floatx80 a )
2562   -{
2563   - flag aSign;
2564   - int32 aExp, shiftCount;
2565   - bits64 aSig, savedASig;
2566   - int32 z;
2567   -
2568   - aSig = extractFloatx80Frac( a );
2569   - aExp = extractFloatx80Exp( a );
2570   - aSign = extractFloatx80Sign( a );
2571   - shiftCount = 0x403E - aExp;
2572   - if ( shiftCount < 32 ) {
2573   - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
2574   - goto invalid;
2575   - }
2576   - else if ( 63 < shiftCount ) {
2577   - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
2578   - return 0;
2579   - }
2580   - savedASig = aSig;
2581   - aSig >>= shiftCount;
2582   - z = aSig;
2583   - if ( aSign ) z = - z;
2584   - if ( ( z < 0 ) ^ aSign ) {
2585   - invalid:
2586   - float_exception_flags |= float_flag_invalid;
2587   - return aSign ? 0x80000000 : 0x7FFFFFFF;
2588   - }
2589   - if ( ( aSig<<shiftCount ) != savedASig ) {
2590   - float_exception_flags |= float_flag_inexact;
2591   - }
2592   - return z;
2593   -
2594   -}
2595   -
2596   -/*
2597   --------------------------------------------------------------------------------
2598   -Returns the result of converting the extended double-precision floating-
2599   -point value `a' to the single-precision floating-point format. The
2600   -conversion is performed according to the IEC/IEEE Standard for Binary
2601   -Floating-point Arithmetic.
2602   --------------------------------------------------------------------------------
2603   -*/
2604   -float32 floatx80_to_float32( floatx80 a )
2605   -{
2606   - flag aSign;
2607   - int32 aExp;
2608   - bits64 aSig;
2609   -
2610   - aSig = extractFloatx80Frac( a );
2611   - aExp = extractFloatx80Exp( a );
2612   - aSign = extractFloatx80Sign( a );
2613   - if ( aExp == 0x7FFF ) {
2614   - if ( (bits64) ( aSig<<1 ) ) {
2615   - return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
2616   - }
2617   - return packFloat32( aSign, 0xFF, 0 );
2618   - }
2619   - shift64RightJamming( aSig, 33, &aSig );
2620   - if ( aExp || aSig ) aExp -= 0x3F81;
2621   - return roundAndPackFloat32( aSign, aExp, aSig );
2622   -
2623   -}
2624   -
2625   -/*
2626   --------------------------------------------------------------------------------
2627   -Returns the result of converting the extended double-precision floating-
2628   -point value `a' to the double-precision floating-point format. The
2629   -conversion is performed according to the IEC/IEEE Standard for Binary
2630   -Floating-point Arithmetic.
2631   --------------------------------------------------------------------------------
2632   -*/
2633   -float64 floatx80_to_float64( floatx80 a )
2634   -{
2635   - flag aSign;
2636   - int32 aExp;
2637   - bits64 aSig, zSig;
2638   -
2639   - aSig = extractFloatx80Frac( a );
2640   - aExp = extractFloatx80Exp( a );
2641   - aSign = extractFloatx80Sign( a );
2642   - if ( aExp == 0x7FFF ) {
2643   - if ( (bits64) ( aSig<<1 ) ) {
2644   - return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
2645   - }
2646   - return packFloat64( aSign, 0x7FF, 0 );
2647   - }
2648   - shift64RightJamming( aSig, 1, &zSig );
2649   - if ( aExp || aSig ) aExp -= 0x3C01;
2650   - return roundAndPackFloat64( aSign, aExp, zSig );
2651   -
2652   -}
2653   -
2654   -/*
2655   --------------------------------------------------------------------------------
2656   -Rounds the extended double-precision floating-point value `a' to an integer,
2657   -and returns the result as an extended quadruple-precision floating-point
2658   -value. The operation is performed according to the IEC/IEEE Standard for
2659   -Binary Floating-point Arithmetic.
2660   --------------------------------------------------------------------------------
2661   -*/
2662   -floatx80 floatx80_round_to_int( floatx80 a )
2663   -{
2664   - flag aSign;
2665   - int32 aExp;
2666   - bits64 lastBitMask, roundBitsMask;
2667   - int8 roundingMode;
2668   - floatx80 z;
2669   -
2670   - aExp = extractFloatx80Exp( a );
2671   - if ( 0x403E <= aExp ) {
2672   - if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
2673   - return propagateFloatx80NaN( a, a );
2674   - }
2675   - return a;
2676   - }
2677   - if ( aExp <= 0x3FFE ) {
2678   - if ( ( aExp == 0 )
2679   - && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
2680   - return a;
2681   - }
2682   - float_exception_flags |= float_flag_inexact;
2683   - aSign = extractFloatx80Sign( a );
2684   - switch ( float_rounding_mode ) {
2685   - case float_round_nearest_even:
2686   - if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
2687   - ) {
2688   - return
2689   - packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
2690   - }
2691   - break;
2692   - case float_round_down:
2693   - return
2694   - aSign ?
2695   - packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
2696   - : packFloatx80( 0, 0, 0 );
2697   - case float_round_up:
2698   - return
2699   - aSign ? packFloatx80( 1, 0, 0 )
2700   - : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
2701   - }
2702   - return packFloatx80( aSign, 0, 0 );
2703   - }
2704   - lastBitMask = 1;
2705   - lastBitMask <<= 0x403E - aExp;
2706   - roundBitsMask = lastBitMask - 1;
2707   - z = a;
2708   - roundingMode = float_rounding_mode;
2709   - if ( roundingMode == float_round_nearest_even ) {
2710   - z.low += lastBitMask>>1;
2711   - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
2712   - }
2713   - else if ( roundingMode != float_round_to_zero ) {
2714   - if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2715   - z.low += roundBitsMask;
2716   - }
2717   - }
2718   - z.low &= ~ roundBitsMask;
2719   - if ( z.low == 0 ) {
2720   - ++z.high;
2721   - z.low = LIT64( 0x8000000000000000 );
2722   - }
2723   - if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
2724   - return z;
2725   -
2726   -}
2727   -
2728   -/*
2729   --------------------------------------------------------------------------------
2730   -Returns the result of adding the absolute values of the extended double-
2731   -precision floating-point values `a' and `b'. If `zSign' is true, the sum is
2732   -negated before being returned. `zSign' is ignored if the result is a NaN.
2733   -The addition is performed according to the IEC/IEEE Standard for Binary
2734   -Floating-point Arithmetic.
2735   --------------------------------------------------------------------------------
2736   -*/
2737   -static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
2738   -{
2739   - int32 aExp, bExp, zExp;
2740   - bits64 aSig, bSig, zSig0, zSig1;
2741   - int32 expDiff;
2742   -
2743   - aSig = extractFloatx80Frac( a );
2744   - aExp = extractFloatx80Exp( a );
2745   - bSig = extractFloatx80Frac( b );
2746   - bExp = extractFloatx80Exp( b );
2747   - expDiff = aExp - bExp;
2748   - if ( 0 < expDiff ) {
2749   - if ( aExp == 0x7FFF ) {
2750   - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2751   - return a;
2752   - }
2753   - if ( bExp == 0 ) --expDiff;
2754   - shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2755   - zExp = aExp;
2756   - }
2757   - else if ( expDiff < 0 ) {
2758   - if ( bExp == 0x7FFF ) {
2759   - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2760   - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2761   - }
2762   - if ( aExp == 0 ) ++expDiff;
2763   - shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2764   - zExp = bExp;
2765   - }
2766   - else {
2767   - if ( aExp == 0x7FFF ) {
2768   - if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
2769   - return propagateFloatx80NaN( a, b );
2770   - }
2771   - return a;
2772   - }
2773   - zSig1 = 0;
2774   - zSig0 = aSig + bSig;
2775   - if ( aExp == 0 ) {
2776   - normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
2777   - goto roundAndPack;
2778   - }
2779   - zExp = aExp;
2780   - goto shiftRight1;
2781   - }
2782   -
2783   - zSig0 = aSig + bSig;
2784   -
2785   - if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
2786   - shiftRight1:
2787   - shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
2788   - zSig0 |= LIT64( 0x8000000000000000 );
2789   - ++zExp;
2790   - roundAndPack:
2791   - return
2792   - roundAndPackFloatx80(
2793   - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
2794   -
2795   -}
2796   -
2797   -/*
2798   --------------------------------------------------------------------------------
2799   -Returns the result of subtracting the absolute values of the extended
2800   -double-precision floating-point values `a' and `b'. If `zSign' is true,
2801   -the difference is negated before being returned. `zSign' is ignored if the
2802   -result is a NaN. The subtraction is performed according to the IEC/IEEE
2803   -Standard for Binary Floating-point Arithmetic.
2804   --------------------------------------------------------------------------------
2805   -*/
2806   -static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
2807   -{
2808   - int32 aExp, bExp, zExp;
2809   - bits64 aSig, bSig, zSig0, zSig1;
2810   - int32 expDiff;
2811   - floatx80 z;
2812   -
2813   - aSig = extractFloatx80Frac( a );
2814   - aExp = extractFloatx80Exp( a );
2815   - bSig = extractFloatx80Frac( b );
2816   - bExp = extractFloatx80Exp( b );
2817   - expDiff = aExp - bExp;
2818   - if ( 0 < expDiff ) goto aExpBigger;
2819   - if ( expDiff < 0 ) goto bExpBigger;
2820   - if ( aExp == 0x7FFF ) {
2821   - if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
2822   - return propagateFloatx80NaN( a, b );
2823   - }
2824   - float_raise( float_flag_invalid );
2825   - z.low = floatx80_default_nan_low;
2826   - z.high = floatx80_default_nan_high;
2827   - return z;
2828   - }
2829   - if ( aExp == 0 ) {
2830   - aExp = 1;
2831   - bExp = 1;
2832   - }
2833   - zSig1 = 0;
2834   - if ( bSig < aSig ) goto aBigger;
2835   - if ( aSig < bSig ) goto bBigger;
2836   - return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
2837   - bExpBigger:
2838   - if ( bExp == 0x7FFF ) {
2839   - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2840   - return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
2841   - }
2842   - if ( aExp == 0 ) ++expDiff;
2843   - shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2844   - bBigger:
2845   - sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
2846   - zExp = bExp;
2847   - zSign ^= 1;
2848   - goto normalizeRoundAndPack;
2849   - aExpBigger:
2850   - if ( aExp == 0x7FFF ) {
2851   - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2852   - return a;
2853   - }
2854   - if ( bExp == 0 ) --expDiff;
2855   - shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2856   - aBigger:
2857   - sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
2858   - zExp = aExp;
2859   - normalizeRoundAndPack:
2860   - return
2861   - normalizeRoundAndPackFloatx80(
2862   - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
2863   -
2864   -}
2865   -
2866   -/*
2867   --------------------------------------------------------------------------------
2868   -Returns the result of adding the extended double-precision floating-point
2869   -values `a' and `b'. The operation is performed according to the IEC/IEEE
2870   -Standard for Binary Floating-point Arithmetic.
2871   --------------------------------------------------------------------------------
2872   -*/
2873   -floatx80 floatx80_add( floatx80 a, floatx80 b )
2874   -{
2875   - flag aSign, bSign;
2876   -
2877   - aSign = extractFloatx80Sign( a );
2878   - bSign = extractFloatx80Sign( b );
2879   - if ( aSign == bSign ) {
2880   - return addFloatx80Sigs( a, b, aSign );
2881   - }
2882   - else {
2883   - return subFloatx80Sigs( a, b, aSign );
2884   - }
2885   -
2886   -}
2887   -
2888   -/*
2889   --------------------------------------------------------------------------------
2890   -Returns the result of subtracting the extended double-precision floating-
2891   -point values `a' and `b'. The operation is performed according to the
2892   -IEC/IEEE Standard for Binary Floating-point Arithmetic.
2893   --------------------------------------------------------------------------------
2894   -*/
2895   -floatx80 floatx80_sub( floatx80 a, floatx80 b )
2896   -{
2897   - flag aSign, bSign;
2898   -
2899   - aSign = extractFloatx80Sign( a );
2900   - bSign = extractFloatx80Sign( b );
2901   - if ( aSign == bSign ) {
2902   - return subFloatx80Sigs( a, b, aSign );
2903   - }
2904   - else {
2905   - return addFloatx80Sigs( a, b, aSign );
2906   - }
2907   -
2908   -}
2909   -
2910   -/*
2911   --------------------------------------------------------------------------------
2912   -Returns the result of multiplying the extended double-precision floating-
2913   -point values `a' and `b'. The operation is performed according to the
2914   -IEC/IEEE Standard for Binary Floating-point Arithmetic.
2915   --------------------------------------------------------------------------------
2916   -*/
2917   -floatx80 floatx80_mul( floatx80 a, floatx80 b )
2918   -{
2919   - flag aSign, bSign, zSign;
2920   - int32 aExp, bExp, zExp;
2921   - bits64 aSig, bSig, zSig0, zSig1;
2922   - floatx80 z;
2923   -
2924   - aSig = extractFloatx80Frac( a );
2925   - aExp = extractFloatx80Exp( a );
2926   - aSign = extractFloatx80Sign( a );
2927   - bSig = extractFloatx80Frac( b );
2928   - bExp = extractFloatx80Exp( b );
2929   - bSign = extractFloatx80Sign( b );
2930   - zSign = aSign ^ bSign;
2931   - if ( aExp == 0x7FFF ) {
2932   - if ( (bits64) ( aSig<<1 )
2933   - || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
2934   - return propagateFloatx80NaN( a, b );
2935   - }
2936   - if ( ( bExp | bSig ) == 0 ) goto invalid;
2937   - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2938   - }
2939   - if ( bExp == 0x7FFF ) {
2940   - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2941   - if ( ( aExp | aSig ) == 0 ) {
2942   - invalid:
2943   - float_raise( float_flag_invalid );
2944   - z.low = floatx80_default_nan_low;
2945   - z.high = floatx80_default_nan_high;
2946   - return z;
2947   - }
2948   - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2949   - }
2950   - if ( aExp == 0 ) {
2951   - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2952   - normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2953   - }
2954   - if ( bExp == 0 ) {
2955   - if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2956   - normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2957   - }
2958   - zExp = aExp + bExp - 0x3FFE;
2959   - mul64To128( aSig, bSig, &zSig0, &zSig1 );
2960   - if ( 0 < (sbits64) zSig0 ) {
2961   - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2962   - --zExp;
2963   - }
2964   - return
2965   - roundAndPackFloatx80(
2966   - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
2967   -
2968   -}
2969   -
2970   -/*
2971   --------------------------------------------------------------------------------
2972   -Returns the result of dividing the extended double-precision floating-point
2973   -value `a' by the corresponding value `b'. The operation is performed
2974   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
2975   --------------------------------------------------------------------------------
2976   -*/
2977   -floatx80 floatx80_div( floatx80 a, floatx80 b )
2978   -{
2979   - flag aSign, bSign, zSign;
2980   - int32 aExp, bExp, zExp;
2981   - bits64 aSig, bSig, zSig0, zSig1;
2982   - bits64 rem0, rem1, rem2, term0, term1, term2;
2983   - floatx80 z;
2984   -
2985   - aSig = extractFloatx80Frac( a );
2986   - aExp = extractFloatx80Exp( a );
2987   - aSign = extractFloatx80Sign( a );
2988   - bSig = extractFloatx80Frac( b );
2989   - bExp = extractFloatx80Exp( b );
2990   - bSign = extractFloatx80Sign( b );
2991   - zSign = aSign ^ bSign;
2992   - if ( aExp == 0x7FFF ) {
2993   - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2994   - if ( bExp == 0x7FFF ) {
2995   - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2996   - goto invalid;
2997   - }
2998   - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2999   - }
3000   - if ( bExp == 0x7FFF ) {
3001   - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3002   - return packFloatx80( zSign, 0, 0 );
3003   - }
3004   - if ( bExp == 0 ) {
3005   - if ( bSig == 0 ) {
3006   - if ( ( aExp | aSig ) == 0 ) {
3007   - invalid:
3008   - float_raise( float_flag_invalid );
3009   - z.low = floatx80_default_nan_low;
3010   - z.high = floatx80_default_nan_high;
3011   - return z;
3012   - }
3013   - float_raise( float_flag_divbyzero );
3014   - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3015   - }
3016   - normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3017   - }
3018   - if ( aExp == 0 ) {
3019   - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3020   - normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3021   - }
3022   - zExp = aExp - bExp + 0x3FFE;
3023   - rem1 = 0;
3024   - if ( bSig <= aSig ) {
3025   - shift128Right( aSig, 0, 1, &aSig, &rem1 );
3026   - ++zExp;
3027   - }
3028   - zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3029   - mul64To128( bSig, zSig0, &term0, &term1 );
3030   - sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3031   - while ( (sbits64) rem0 < 0 ) {
3032   - --zSig0;
3033   - add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3034   - }
3035   - zSig1 = estimateDiv128To64( rem1, 0, bSig );
3036   - if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3037   - mul64To128( bSig, zSig1, &term1, &term2 );
3038   - sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3039   - while ( (sbits64) rem1 < 0 ) {
3040   - --zSig1;
3041   - add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3042   - }
3043   - zSig1 |= ( ( rem1 | rem2 ) != 0 );
3044   - }
3045   - return
3046   - roundAndPackFloatx80(
3047   - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3048   -
3049   -}
3050   -
3051   -/*
3052   --------------------------------------------------------------------------------
3053   -Returns the remainder of the extended double-precision floating-point value
3054   -`a' with respect to the corresponding value `b'. The operation is performed
3055   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3056   --------------------------------------------------------------------------------
3057   -*/
3058   -floatx80 floatx80_rem( floatx80 a, floatx80 b )
3059   -{
3060   - flag aSign, bSign, zSign;
3061   - int32 aExp, bExp, expDiff;
3062   - bits64 aSig0, aSig1, bSig;
3063   - bits64 q, term0, term1, alternateASig0, alternateASig1;
3064   - floatx80 z;
3065   -
3066   - aSig0 = extractFloatx80Frac( a );
3067   - aExp = extractFloatx80Exp( a );
3068   - aSign = extractFloatx80Sign( a );
3069   - bSig = extractFloatx80Frac( b );
3070   - bExp = extractFloatx80Exp( b );
3071   - bSign = extractFloatx80Sign( b );
3072   - if ( aExp == 0x7FFF ) {
3073   - if ( (bits64) ( aSig0<<1 )
3074   - || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3075   - return propagateFloatx80NaN( a, b );
3076   - }
3077   - goto invalid;
3078   - }
3079   - if ( bExp == 0x7FFF ) {
3080   - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3081   - return a;
3082   - }
3083   - if ( bExp == 0 ) {
3084   - if ( bSig == 0 ) {
3085   - invalid:
3086   - float_raise( float_flag_invalid );
3087   - z.low = floatx80_default_nan_low;
3088   - z.high = floatx80_default_nan_high;
3089   - return z;
3090   - }
3091   - normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3092   - }
3093   - if ( aExp == 0 ) {
3094   - if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3095   - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3096   - }
3097   - bSig |= LIT64( 0x8000000000000000 );
3098   - zSign = aSign;
3099   - expDiff = aExp - bExp;
3100   - aSig1 = 0;
3101   - if ( expDiff < 0 ) {
3102   - if ( expDiff < -1 ) return a;
3103   - shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3104   - expDiff = 0;
3105   - }
3106   - q = ( bSig <= aSig0 );
3107   - if ( q ) aSig0 -= bSig;
3108   - expDiff -= 64;
3109   - while ( 0 < expDiff ) {
3110   - q = estimateDiv128To64( aSig0, aSig1, bSig );
3111   - q = ( 2 < q ) ? q - 2 : 0;
3112   - mul64To128( bSig, q, &term0, &term1 );
3113   - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3114   - shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3115   - expDiff -= 62;
3116   - }
3117   - expDiff += 64;
3118   - if ( 0 < expDiff ) {
3119   - q = estimateDiv128To64( aSig0, aSig1, bSig );
3120   - q = ( 2 < q ) ? q - 2 : 0;
3121   - q >>= 64 - expDiff;
3122   - mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3123   - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3124   - shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3125   - while ( le128( term0, term1, aSig0, aSig1 ) ) {
3126   - ++q;
3127   - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3128   - }
3129   - }
3130   - else {
3131   - term1 = 0;
3132   - term0 = bSig;
3133   - }
3134   - sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3135   - if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3136   - || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3137   - && ( q & 1 ) )
3138   - ) {
3139   - aSig0 = alternateASig0;
3140   - aSig1 = alternateASig1;
3141   - zSign = ! zSign;
3142   - }
3143   - return
3144   - normalizeRoundAndPackFloatx80(
3145   - 80, zSign, bExp + expDiff, aSig0, aSig1 );
3146   -
3147   -}
3148   -
3149   -/*
3150   --------------------------------------------------------------------------------
3151   -Returns the square root of the extended double-precision floating-point
3152   -value `a'. The operation is performed according to the IEC/IEEE Standard
3153   -for Binary Floating-point Arithmetic.
3154   --------------------------------------------------------------------------------
3155   -*/
3156   -floatx80 floatx80_sqrt( floatx80 a )
3157   -{
3158   - flag aSign;
3159   - int32 aExp, zExp;
3160   - bits64 aSig0, aSig1, zSig0, zSig1;
3161   - bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3162   - bits64 shiftedRem0, shiftedRem1;
3163   - floatx80 z;
3164   -
3165   - aSig0 = extractFloatx80Frac( a );
3166   - aExp = extractFloatx80Exp( a );
3167   - aSign = extractFloatx80Sign( a );
3168   - if ( aExp == 0x7FFF ) {
3169   - if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
3170   - if ( ! aSign ) return a;
3171   - goto invalid;
3172   - }
3173   - if ( aSign ) {
3174   - if ( ( aExp | aSig0 ) == 0 ) return a;
3175   - invalid:
3176   - float_raise( float_flag_invalid );
3177   - z.low = floatx80_default_nan_low;
3178   - z.high = floatx80_default_nan_high;
3179   - return z;
3180   - }
3181   - if ( aExp == 0 ) {
3182   - if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
3183   - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3184   - }
3185   - zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
3186   - zSig0 = estimateSqrt32( aExp, aSig0>>32 );
3187   - zSig0 <<= 31;
3188   - aSig1 = 0;
3189   - shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
3190   - zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
3191   - if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
3192   - shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
3193   - mul64To128( zSig0, zSig0, &term0, &term1 );
3194   - sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
3195   - while ( (sbits64) rem0 < 0 ) {
3196   - --zSig0;
3197   - shortShift128Left( 0, zSig0, 1, &term0, &term1 );
3198   - term1 |= 1;
3199   - add128( rem0, rem1, term0, term1, &rem0, &rem1 );
3200   - }
3201   - shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
3202   - zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
3203   - if ( (bits64) ( zSig1<<1 ) <= 10 ) {
3204   - if ( zSig1 == 0 ) zSig1 = 1;
3205   - mul64To128( zSig0, zSig1, &term1, &term2 );
3206   - shortShift128Left( term1, term2, 1, &term1, &term2 );
3207   - sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3208   - mul64To128( zSig1, zSig1, &term2, &term3 );
3209   - sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
3210   - while ( (sbits64) rem1 < 0 ) {
3211   - --zSig1;
3212   - shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
3213   - term3 |= 1;
3214   - add192(
3215   - rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
3216   - }
3217   - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3218   - }
3219   - return
3220   - roundAndPackFloatx80(
3221   - floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
3222   -
3223   -}
3224   -
3225   -/*
3226   --------------------------------------------------------------------------------
3227   -Returns 1 if the extended double-precision floating-point value `a' is
3228   -equal to the corresponding value `b', and 0 otherwise. The comparison is
3229   -performed according to the IEC/IEEE Standard for Binary Floating-point
3230   -Arithmetic.
3231   --------------------------------------------------------------------------------
3232   -*/
3233   -flag floatx80_eq( floatx80 a, floatx80 b )
3234   -{
3235   -
3236   - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3237   - && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3238   - || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3239   - && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3240   - ) {
3241   - if ( floatx80_is_signaling_nan( a )
3242   - || floatx80_is_signaling_nan( b ) ) {
3243   - float_raise( float_flag_invalid );
3244   - }
3245   - return 0;
3246   - }
3247   - return
3248   - ( a.low == b.low )
3249   - && ( ( a.high == b.high )
3250   - || ( ( a.low == 0 )
3251   - && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3252   - );
3253   -
3254   -}
3255   -
3256   -/*
3257   --------------------------------------------------------------------------------
3258   -Returns 1 if the extended double-precision floating-point value `a' is
3259   -less than or equal to the corresponding value `b', and 0 otherwise. The
3260   -comparison is performed according to the IEC/IEEE Standard for Binary
3261   -Floating-point Arithmetic.
3262   --------------------------------------------------------------------------------
3263   -*/
3264   -flag floatx80_le( floatx80 a, floatx80 b )
3265   -{
3266   - flag aSign, bSign;
3267   -
3268   - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3269   - && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3270   - || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3271   - && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3272   - ) {
3273   - float_raise( float_flag_invalid );
3274   - return 0;
3275   - }
3276   - aSign = extractFloatx80Sign( a );
3277   - bSign = extractFloatx80Sign( b );
3278   - if ( aSign != bSign ) {
3279   - return
3280   - aSign
3281   - || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3282   - == 0 );
3283   - }
3284   - return
3285   - aSign ? le128( b.high, b.low, a.high, a.low )
3286   - : le128( a.high, a.low, b.high, b.low );
3287   -
3288   -}
3289   -
3290   -/*
3291   --------------------------------------------------------------------------------
3292   -Returns 1 if the extended double-precision floating-point value `a' is
3293   -less than the corresponding value `b', and 0 otherwise. The comparison
3294   -is performed according to the IEC/IEEE Standard for Binary Floating-point
3295   -Arithmetic.
3296   --------------------------------------------------------------------------------
3297   -*/
3298   -flag floatx80_lt( floatx80 a, floatx80 b )
3299   -{
3300   - flag aSign, bSign;
3301   -
3302   - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3303   - && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3304   - || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3305   - && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3306   - ) {
3307   - float_raise( float_flag_invalid );
3308   - return 0;
3309   - }
3310   - aSign = extractFloatx80Sign( a );
3311   - bSign = extractFloatx80Sign( b );
3312   - if ( aSign != bSign ) {
3313   - return
3314   - aSign
3315   - && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3316   - != 0 );
3317   - }
3318   - return
3319   - aSign ? lt128( b.high, b.low, a.high, a.low )
3320   - : lt128( a.high, a.low, b.high, b.low );
3321   -
3322   -}
3323   -
3324   -/*
3325   --------------------------------------------------------------------------------
3326   -Returns 1 if the extended double-precision floating-point value `a' is equal
3327   -to the corresponding value `b', and 0 otherwise. The invalid exception is
3328   -raised if either operand is a NaN. Otherwise, the comparison is performed
3329   -according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3330   --------------------------------------------------------------------------------
3331   -*/
3332   -flag floatx80_eq_signaling( floatx80 a, floatx80 b )
3333   -{
3334   -
3335   - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3336   - && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3337   - || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3338   - && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3339   - ) {
3340   - float_raise( float_flag_invalid );
3341   - return 0;
3342   - }
3343   - return
3344   - ( a.low == b.low )
3345   - && ( ( a.high == b.high )
3346   - || ( ( a.low == 0 )
3347   - && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3348   - );
3349   -
3350   -}
3351   -
3352   -/*
3353   --------------------------------------------------------------------------------
3354   -Returns 1 if the extended double-precision floating-point value `a' is less
3355   -than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
3356   -do not cause an exception. Otherwise, the comparison is performed according
3357   -to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
3358   --------------------------------------------------------------------------------
3359   -*/
3360   -flag floatx80_le_quiet( floatx80 a, floatx80 b )
3361   -{
3362   - flag aSign, bSign;
3363   -
3364   - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3365   - && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3366   - || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3367   - && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3368   - ) {
3369   - if ( floatx80_is_signaling_nan( a )
3370   - || floatx80_is_signaling_nan( b ) ) {
3371   - float_raise( float_flag_invalid );
3372   - }
3373   - return 0;
3374   - }
3375   - aSign = extractFloatx80Sign( a );
3376   - bSign = extractFloatx80Sign( b );
3377   - if ( aSign != bSign ) {
3378   - return
3379   - aSign
3380   - || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3381   - == 0 );
3382   - }
3383   - return
3384   - aSign ? le128( b.high, b.low, a.high, a.low )
3385   - : le128( a.high, a.low, b.high, b.low );
3386   -
3387   -}
3388   -
3389   -/*
3390   --------------------------------------------------------------------------------
3391   -Returns 1 if the extended double-precision floating-point value `a' is less
3392   -than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
3393   -an exception. Otherwise, the comparison is performed according to the
3394   -IEC/IEEE Standard for Binary Floating-point Arithmetic.
3395   --------------------------------------------------------------------------------
3396   -*/
3397   -flag floatx80_lt_quiet( floatx80 a, floatx80 b )
3398   -{
3399   - flag aSign, bSign;
3400   -
3401   - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3402   - && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3403   - || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3404   - && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3405   - ) {
3406   - if ( floatx80_is_signaling_nan( a )
3407   - || floatx80_is_signaling_nan( b ) ) {
3408   - float_raise( float_flag_invalid );
3409   - }
3410   - return 0;
3411   - }
3412   - aSign = extractFloatx80Sign( a );
3413   - bSign = extractFloatx80Sign( b );
3414   - if ( aSign != bSign ) {
3415   - return
3416   - aSign
3417   - && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3418   - != 0 );
3419   - }
3420   - return
3421   - aSign ? lt128( b.high, b.low, a.high, a.low )
3422   - : lt128( a.high, a.low, b.high, b.low );
3423   -
3424   -}
3425   -
3426   -#endif
3427   -
target-arm/nwfpe/softfloat.h deleted 100644 → 0
1   -
2   -/*
3   -===============================================================================
4   -
5   -This C header file is part of the SoftFloat IEC/IEEE Floating-point
6   -Arithmetic Package, Release 2.
7   -
8   -Written by John R. Hauser. This work was made possible in part by the
9   -International Computer Science Institute, located at Suite 600, 1947 Center
10   -Street, Berkeley, California 94704. Funding was partially provided by the
11   -National Science Foundation under grant MIP-9311980. The original version
12   -of this code was written as part of a project to build a fixed-point vector
13   -processor in collaboration with the University of California at Berkeley,
14   -overseen by Profs. Nelson Morgan and John Wawrzynek. More information
15   -is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
16   -arithmetic/softfloat.html'.
17   -
18   -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
19   -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20   -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
21   -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22   -AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
23   -
24   -Derivative works are acceptable, even for commercial purposes, so long as
25   -(1) they include prominent notice that the work is derivative, and (2) they
26   -include prominent notice akin to these three paragraphs for those parts of
27   -this code that are retained.
28   -
29   -===============================================================================
30   -*/
31   -
32   -#ifndef __SOFTFLOAT_H__
33   -#define __SOFTFLOAT_H__
34   -
35   -/*
36   --------------------------------------------------------------------------------
37   -The macro `FLOATX80' must be defined to enable the extended double-precision
38   -floating-point format `floatx80'. If this macro is not defined, the
39   -`floatx80' type will not be defined, and none of the functions that either
40   -input or output the `floatx80' type will be defined.
41   --------------------------------------------------------------------------------
42   -*/
43   -#define FLOATX80
44   -
45   -/*
46   --------------------------------------------------------------------------------
47   -Software IEC/IEEE floating-point types.
48   --------------------------------------------------------------------------------
49   -*/
50   -typedef unsigned long int float32;
51   -typedef unsigned long long float64;
52   -typedef struct {
53   - unsigned short high;
54   - unsigned long long low;
55   -} floatx80;
56   -
57   -/*
58   --------------------------------------------------------------------------------
59   -Software IEC/IEEE floating-point underflow tininess-detection mode.
60   --------------------------------------------------------------------------------
61   -*/
62   -extern signed char float_detect_tininess;
63   -enum {
64   - float_tininess_after_rounding = 0,
65   - float_tininess_before_rounding = 1
66   -};
67   -
68   -/*
69   --------------------------------------------------------------------------------
70   -Software IEC/IEEE floating-point rounding mode.
71   --------------------------------------------------------------------------------
72   -*/
73   -extern signed char float_rounding_mode;
74   -enum {
75   - float_round_nearest_even = 0,
76   - float_round_to_zero = 1,
77   - float_round_down = 2,
78   - float_round_up = 3
79   -};
80   -
81   -/*
82   --------------------------------------------------------------------------------
83   -Software IEC/IEEE floating-point exception flags.
84   --------------------------------------------------------------------------------
85   -extern signed char float_exception_flags;
86   -enum {
87   - float_flag_inexact = 1,
88   - float_flag_underflow = 2,
89   - float_flag_overflow = 4,
90   - float_flag_divbyzero = 8,
91   - float_flag_invalid = 16
92   -};
93   -
94   -ScottB: November 4, 1998
95   -Changed the enumeration to match the bit order in the FPA11.
96   -*/
97   -
98   -extern signed char float_exception_flags;
99   -enum {
100   - float_flag_invalid = 1,
101   - float_flag_divbyzero = 2,
102   - float_flag_overflow = 4,
103   - float_flag_underflow = 8,
104   - float_flag_inexact = 16
105   -};
106   -
107   -/*
108   --------------------------------------------------------------------------------
109   -Routine to raise any or all of the software IEC/IEEE floating-point
110   -exception flags.
111   --------------------------------------------------------------------------------
112   -*/
113   -void float_raise( signed char );
114   -
115   -/*
116   --------------------------------------------------------------------------------
117   -Software IEC/IEEE integer-to-floating-point conversion routines.
118   --------------------------------------------------------------------------------
119   -*/
120   -float32 int32_to_float32( signed int );
121   -float64 int32_to_float64( signed int );
122   -#ifdef FLOATX80
123   -floatx80 int32_to_floatx80( signed int );
124   -#endif
125   -
126   -/*
127   --------------------------------------------------------------------------------
128   -Software IEC/IEEE single-precision conversion routines.
129   --------------------------------------------------------------------------------
130   -*/
131   -signed int float32_to_int32( float32 );
132   -signed int float32_to_int32_round_to_zero( float32 );
133   -float64 float32_to_float64( float32 );
134   -#ifdef FLOATX80
135   -floatx80 float32_to_floatx80( float32 );
136   -#endif
137   -
138   -/*
139   --------------------------------------------------------------------------------
140   -Software IEC/IEEE single-precision operations.
141   --------------------------------------------------------------------------------
142   -*/
143   -float32 float32_round_to_int( float32 );
144   -float32 float32_add( float32, float32 );
145   -float32 float32_sub( float32, float32 );
146   -float32 float32_mul( float32, float32 );
147   -float32 float32_div( float32, float32 );
148   -float32 float32_rem( float32, float32 );
149   -float32 float32_sqrt( float32 );
150   -char float32_eq( float32, float32 );
151   -char float32_le( float32, float32 );
152   -char float32_lt( float32, float32 );
153   -char float32_eq_signaling( float32, float32 );
154   -char float32_le_quiet( float32, float32 );
155   -char float32_lt_quiet( float32, float32 );
156   -char float32_is_signaling_nan( float32 );
157   -
158   -/*
159   --------------------------------------------------------------------------------
160   -Software IEC/IEEE double-precision conversion routines.
161   --------------------------------------------------------------------------------
162   -*/
163   -signed int float64_to_int32( float64 );
164   -signed int float64_to_int32_round_to_zero( float64 );
165   -float32 float64_to_float32( float64 );
166   -#ifdef FLOATX80
167   -floatx80 float64_to_floatx80( float64 );
168   -#endif
169   -
170   -/*
171   --------------------------------------------------------------------------------
172   -Software IEC/IEEE double-precision operations.
173   --------------------------------------------------------------------------------
174   -*/
175   -float64 float64_round_to_int( float64 );
176   -float64 float64_add( float64, float64 );
177   -float64 float64_sub( float64, float64 );
178   -float64 float64_mul( float64, float64 );
179   -float64 float64_div( float64, float64 );
180   -float64 float64_rem( float64, float64 );
181   -float64 float64_sqrt( float64 );
182   -char float64_eq( float64, float64 );
183   -char float64_le( float64, float64 );
184   -char float64_lt( float64, float64 );
185   -char float64_eq_signaling( float64, float64 );
186   -char float64_le_quiet( float64, float64 );
187   -char float64_lt_quiet( float64, float64 );
188   -char float64_is_signaling_nan( float64 );
189   -
190   -#ifdef FLOATX80
191   -
192   -/*
193   --------------------------------------------------------------------------------
194   -Software IEC/IEEE extended double-precision conversion routines.
195   --------------------------------------------------------------------------------
196   -*/
197   -signed int floatx80_to_int32( floatx80 );
198   -signed int floatx80_to_int32_round_to_zero( floatx80 );
199   -float32 floatx80_to_float32( floatx80 );
200   -float64 floatx80_to_float64( floatx80 );
201   -
202   -/*
203   --------------------------------------------------------------------------------
204   -Software IEC/IEEE extended double-precision rounding precision. Valid
205   -values are 32, 64, and 80.
206   --------------------------------------------------------------------------------
207   -*/
208   -extern signed char floatx80_rounding_precision;
209   -
210   -/*
211   --------------------------------------------------------------------------------
212   -Software IEC/IEEE extended double-precision operations.
213   --------------------------------------------------------------------------------
214   -*/
215   -floatx80 floatx80_round_to_int( floatx80 );
216   -floatx80 floatx80_add( floatx80, floatx80 );
217   -floatx80 floatx80_sub( floatx80, floatx80 );
218   -floatx80 floatx80_mul( floatx80, floatx80 );
219   -floatx80 floatx80_div( floatx80, floatx80 );
220   -floatx80 floatx80_rem( floatx80, floatx80 );
221   -floatx80 floatx80_sqrt( floatx80 );
222   -char floatx80_eq( floatx80, floatx80 );
223   -char floatx80_le( floatx80, floatx80 );
224   -char floatx80_lt( floatx80, floatx80 );
225   -char floatx80_eq_signaling( floatx80, floatx80 );
226   -char floatx80_le_quiet( floatx80, floatx80 );
227   -char floatx80_lt_quiet( floatx80, floatx80 );
228   -char floatx80_is_signaling_nan( floatx80 );
229   -
230   -#endif
231   -
232   -#endif