Commit 00406dff19893a4fb9fb582792a249b770eb1d11

Authored by bellard
1 parent 69de927c

added arm nwfpe support (initial patch by Ulrich Hecht)


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@609 c046a42c-6fe2-441c-8c8c-71466251a162
target-arm/nwfpe/ARM-gcc.h 0 → 100644
  1 +/*
  2 +-------------------------------------------------------------------------------
  3 +The macro `BITS64' can be defined to indicate that 64-bit integer types are
  4 +supported by the compiler.
  5 +-------------------------------------------------------------------------------
  6 +*/
  7 +#define BITS64
  8 +
  9 +/*
  10 +-------------------------------------------------------------------------------
  11 +Each of the following `typedef's defines the most convenient type that holds
  12 +integers of at least as many bits as specified. For example, `uint8' should
  13 +be the most convenient type that can hold unsigned integers of as many as
  14 +8 bits. The `flag' type must be able to hold either a 0 or 1. For most
  15 +implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
  16 +to the same as `int'.
  17 +-------------------------------------------------------------------------------
  18 +*/
  19 +typedef char flag;
  20 +typedef unsigned char uint8;
  21 +typedef signed char int8;
  22 +typedef int uint16;
  23 +typedef int int16;
  24 +typedef unsigned int uint32;
  25 +typedef signed int int32;
  26 +#ifdef BITS64
  27 +typedef unsigned long long int bits64;
  28 +typedef signed long long int sbits64;
  29 +#endif
  30 +
  31 +/*
  32 +-------------------------------------------------------------------------------
  33 +Each of the following `typedef's defines a type that holds integers
  34 +of _exactly_ the number of bits specified. For instance, for most
  35 +implementation of C, `bits16' and `sbits16' should be `typedef'ed to
  36 +`unsigned short int' and `signed short int' (or `short int'), respectively.
  37 +-------------------------------------------------------------------------------
  38 +*/
  39 +typedef unsigned char bits8;
  40 +typedef signed char sbits8;
  41 +typedef unsigned short int bits16;
  42 +typedef signed short int sbits16;
  43 +typedef unsigned int bits32;
  44 +typedef signed int sbits32;
  45 +#ifdef BITS64
  46 +typedef unsigned long long int uint64;
  47 +typedef signed long long int int64;
  48 +#endif
  49 +
  50 +#ifdef BITS64
  51 +/*
  52 +-------------------------------------------------------------------------------
  53 +The `LIT64' macro takes as its argument a textual integer literal and if
  54 +necessary ``marks'' the literal as having a 64-bit integer type. For
  55 +example, the Gnu C Compiler (`gcc') requires that 64-bit literals be
  56 +appended with the letters `LL' standing for `long long', which is `gcc's
  57 +name for the 64-bit integer type. Some compilers may allow `LIT64' to be
  58 +defined as the identity macro: `#define LIT64( a ) a'.
  59 +-------------------------------------------------------------------------------
  60 +*/
  61 +#define LIT64( a ) a##LL
  62 +#endif
  63 +
  64 +/*
  65 +-------------------------------------------------------------------------------
  66 +The macro `INLINE' can be used before functions that should be inlined. If
  67 +a compiler does not support explicit inlining, this macro should be defined
  68 +to be `static'.
  69 +-------------------------------------------------------------------------------
  70 +*/
  71 +#define INLINE extern __inline__
  72 +
  73 +
  74 +/* For use as a GCC soft-float library we need some special function names. */
  75 +
  76 +#ifdef __LIBFLOAT__
  77 +
  78 +/* Some 32-bit ops can be mapped straight across by just changing the name. */
  79 +#define float32_add __addsf3
  80 +#define float32_sub __subsf3
  81 +#define float32_mul __mulsf3
  82 +#define float32_div __divsf3
  83 +#define int32_to_float32 __floatsisf
  84 +#define float32_to_int32_round_to_zero __fixsfsi
  85 +#define float32_to_uint32_round_to_zero __fixunssfsi
  86 +
  87 +/* These ones go through the glue code. To avoid namespace pollution
  88 + we rename the internal functions too. */
  89 +#define float32_eq ___float32_eq
  90 +#define float32_le ___float32_le
  91 +#define float32_lt ___float32_lt
  92 +
  93 +/* All the 64-bit ops have to go through the glue, so we pull the same
  94 + trick. */
  95 +#define float64_add ___float64_add
  96 +#define float64_sub ___float64_sub
  97 +#define float64_mul ___float64_mul
  98 +#define float64_div ___float64_div
  99 +#define int32_to_float64 ___int32_to_float64
  100 +#define float64_to_int32_round_to_zero ___float64_to_int32_round_to_zero
  101 +#define float64_to_uint32_round_to_zero ___float64_to_uint32_round_to_zero
  102 +#define float64_to_float32 ___float64_to_float32
  103 +#define float32_to_float64 ___float32_to_float64
  104 +#define float64_eq ___float64_eq
  105 +#define float64_le ___float64_le
  106 +#define float64_lt ___float64_lt
  107 +
  108 +#if 0
  109 +#define float64_add __adddf3
  110 +#define float64_sub __subdf3
  111 +#define float64_mul __muldf3
  112 +#define float64_div __divdf3
  113 +#define int32_to_float64 __floatsidf
  114 +#define float64_to_int32_round_to_zero __fixdfsi
  115 +#define float64_to_uint32_round_to_zero __fixunsdfsi
  116 +#define float64_to_float32 __truncdfsf2
  117 +#define float32_to_float64 __extendsfdf2
  118 +#endif
  119 +
  120 +#endif
... ...
target-arm/nwfpe/double_cpdo.c 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.COM, 1998,1999
  4 +
  5 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  6 +
  7 + This program is free software; you can redistribute it and/or modify
  8 + it under the terms of the GNU General Public License as published by
  9 + the Free Software Foundation; either version 2 of the License, or
  10 + (at your option) any later version.
  11 +
  12 + This program is distributed in the hope that it will be useful,
  13 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 + GNU General Public License for more details.
  16 +
  17 + You should have received a copy of the GNU General Public License
  18 + along with this program; if not, write to the Free Software
  19 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 +*/
  21 +
  22 +#include "fpa11.h"
  23 +#include "softfloat.h"
  24 +#include "fpopcode.h"
  25 +
  26 +float64 float64_exp(float64 Fm);
  27 +float64 float64_ln(float64 Fm);
  28 +float64 float64_sin(float64 rFm);
  29 +float64 float64_cos(float64 rFm);
  30 +float64 float64_arcsin(float64 rFm);
  31 +float64 float64_arctan(float64 rFm);
  32 +float64 float64_log(float64 rFm);
  33 +float64 float64_tan(float64 rFm);
  34 +float64 float64_arccos(float64 rFm);
  35 +float64 float64_pow(float64 rFn,float64 rFm);
  36 +float64 float64_pol(float64 rFn,float64 rFm);
  37 +
  38 +unsigned int DoubleCPDO(const unsigned int opcode)
  39 +{
  40 + FPA11 *fpa11 = GET_FPA11();
  41 + float64 rFm, rFn;
  42 + unsigned int Fd, Fm, Fn, nRc = 1;
  43 +
  44 + //printk("DoubleCPDO(0x%08x)\n",opcode);
  45 +
  46 + Fm = getFm(opcode);
  47 + if (CONSTANT_FM(opcode))
  48 + {
  49 + rFm = getDoubleConstant(Fm);
  50 + }
  51 + else
  52 + {
  53 + switch (fpa11->fType[Fm])
  54 + {
  55 + case typeSingle:
  56 + rFm = float32_to_float64(fpa11->fpreg[Fm].fSingle);
  57 + break;
  58 +
  59 + case typeDouble:
  60 + rFm = fpa11->fpreg[Fm].fDouble;
  61 + break;
  62 +
  63 + case typeExtended:
  64 + // !! patb
  65 + //printk("not implemented! why not?\n");
  66 + //!! ScottB
  67 + // should never get here, if extended involved
  68 + // then other operand should be promoted then
  69 + // ExtendedCPDO called.
  70 + break;
  71 +
  72 + default: return 0;
  73 + }
  74 + }
  75 +
  76 + if (!MONADIC_INSTRUCTION(opcode))
  77 + {
  78 + Fn = getFn(opcode);
  79 + switch (fpa11->fType[Fn])
  80 + {
  81 + case typeSingle:
  82 + rFn = float32_to_float64(fpa11->fpreg[Fn].fSingle);
  83 + break;
  84 +
  85 + case typeDouble:
  86 + rFn = fpa11->fpreg[Fn].fDouble;
  87 + break;
  88 +
  89 + default: return 0;
  90 + }
  91 + }
  92 +
  93 + Fd = getFd(opcode);
  94 + /* !! this switch isn't optimized; better (opcode & MASK_ARITHMETIC_OPCODE)>>24, sort of */
  95 + switch (opcode & MASK_ARITHMETIC_OPCODE)
  96 + {
  97 + /* dyadic opcodes */
  98 + case ADF_CODE:
  99 + fpa11->fpreg[Fd].fDouble = float64_add(rFn,rFm);
  100 + break;
  101 +
  102 + case MUF_CODE:
  103 + case FML_CODE:
  104 + fpa11->fpreg[Fd].fDouble = float64_mul(rFn,rFm);
  105 + break;
  106 +
  107 + case SUF_CODE:
  108 + fpa11->fpreg[Fd].fDouble = float64_sub(rFn,rFm);
  109 + break;
  110 +
  111 + case RSF_CODE:
  112 + fpa11->fpreg[Fd].fDouble = float64_sub(rFm,rFn);
  113 + break;
  114 +
  115 + case DVF_CODE:
  116 + case FDV_CODE:
  117 + fpa11->fpreg[Fd].fDouble = float64_div(rFn,rFm);
  118 + break;
  119 +
  120 + case RDF_CODE:
  121 + case FRD_CODE:
  122 + fpa11->fpreg[Fd].fDouble = float64_div(rFm,rFn);
  123 + break;
  124 +
  125 +#if 0
  126 + case POW_CODE:
  127 + fpa11->fpreg[Fd].fDouble = float64_pow(rFn,rFm);
  128 + break;
  129 +
  130 + case RPW_CODE:
  131 + fpa11->fpreg[Fd].fDouble = float64_pow(rFm,rFn);
  132 + break;
  133 +#endif
  134 +
  135 + case RMF_CODE:
  136 + fpa11->fpreg[Fd].fDouble = float64_rem(rFn,rFm);
  137 + break;
  138 +
  139 +#if 0
  140 + case POL_CODE:
  141 + fpa11->fpreg[Fd].fDouble = float64_pol(rFn,rFm);
  142 + break;
  143 +#endif
  144 +
  145 + /* monadic opcodes */
  146 + case MVF_CODE:
  147 + fpa11->fpreg[Fd].fDouble = rFm;
  148 + break;
  149 +
  150 + case MNF_CODE:
  151 + {
  152 + unsigned int *p = (unsigned int*)&rFm;
  153 + p[1] ^= 0x80000000;
  154 + fpa11->fpreg[Fd].fDouble = rFm;
  155 + }
  156 + break;
  157 +
  158 + case ABS_CODE:
  159 + {
  160 + unsigned int *p = (unsigned int*)&rFm;
  161 + p[1] &= 0x7fffffff;
  162 + fpa11->fpreg[Fd].fDouble = rFm;
  163 + }
  164 + break;
  165 +
  166 + case RND_CODE:
  167 + case URD_CODE:
  168 + fpa11->fpreg[Fd].fDouble = float64_round_to_int(rFm);
  169 + break;
  170 +
  171 + case SQT_CODE:
  172 + fpa11->fpreg[Fd].fDouble = float64_sqrt(rFm);
  173 + break;
  174 +
  175 +#if 0
  176 + case LOG_CODE:
  177 + fpa11->fpreg[Fd].fDouble = float64_log(rFm);
  178 + break;
  179 +
  180 + case LGN_CODE:
  181 + fpa11->fpreg[Fd].fDouble = float64_ln(rFm);
  182 + break;
  183 +
  184 + case EXP_CODE:
  185 + fpa11->fpreg[Fd].fDouble = float64_exp(rFm);
  186 + break;
  187 +
  188 + case SIN_CODE:
  189 + fpa11->fpreg[Fd].fDouble = float64_sin(rFm);
  190 + break;
  191 +
  192 + case COS_CODE:
  193 + fpa11->fpreg[Fd].fDouble = float64_cos(rFm);
  194 + break;
  195 +
  196 + case TAN_CODE:
  197 + fpa11->fpreg[Fd].fDouble = float64_tan(rFm);
  198 + break;
  199 +
  200 + case ASN_CODE:
  201 + fpa11->fpreg[Fd].fDouble = float64_arcsin(rFm);
  202 + break;
  203 +
  204 + case ACS_CODE:
  205 + fpa11->fpreg[Fd].fDouble = float64_arccos(rFm);
  206 + break;
  207 +
  208 + case ATN_CODE:
  209 + fpa11->fpreg[Fd].fDouble = float64_arctan(rFm);
  210 + break;
  211 +#endif
  212 +
  213 + case NRM_CODE:
  214 + break;
  215 +
  216 + default:
  217 + {
  218 + nRc = 0;
  219 + }
  220 + }
  221 +
  222 + if (0 != nRc) fpa11->fType[Fd] = typeDouble;
  223 + return nRc;
  224 +}
  225 +
  226 +#if 0
  227 +float64 float64_exp(float64 rFm)
  228 +{
  229 + return rFm;
  230 +//series
  231 +}
  232 +
  233 +float64 float64_ln(float64 rFm)
  234 +{
  235 + return rFm;
  236 +//series
  237 +}
  238 +
  239 +float64 float64_sin(float64 rFm)
  240 +{
  241 + return rFm;
  242 +//series
  243 +}
  244 +
  245 +float64 float64_cos(float64 rFm)
  246 +{
  247 + return rFm;
  248 + //series
  249 +}
  250 +
  251 +#if 0
  252 +float64 float64_arcsin(float64 rFm)
  253 +{
  254 +//series
  255 +}
  256 +
  257 +float64 float64_arctan(float64 rFm)
  258 +{
  259 + //series
  260 +}
  261 +#endif
  262 +
  263 +float64 float64_log(float64 rFm)
  264 +{
  265 + return float64_div(float64_ln(rFm),getDoubleConstant(7));
  266 +}
  267 +
  268 +float64 float64_tan(float64 rFm)
  269 +{
  270 + return float64_div(float64_sin(rFm),float64_cos(rFm));
  271 +}
  272 +
  273 +float64 float64_arccos(float64 rFm)
  274 +{
  275 +return rFm;
  276 + //return float64_sub(halfPi,float64_arcsin(rFm));
  277 +}
  278 +
  279 +float64 float64_pow(float64 rFn,float64 rFm)
  280 +{
  281 + return float64_exp(float64_mul(rFm,float64_ln(rFn)));
  282 +}
  283 +
  284 +float64 float64_pol(float64 rFn,float64 rFm)
  285 +{
  286 + return float64_arctan(float64_div(rFn,rFm));
  287 +}
  288 +#endif
... ...
target-arm/nwfpe/extended_cpdo.c 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.COM, 1998,1999
  4 +
  5 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  6 +
  7 + This program is free software; you can redistribute it and/or modify
  8 + it under the terms of the GNU General Public License as published by
  9 + the Free Software Foundation; either version 2 of the License, or
  10 + (at your option) any later version.
  11 +
  12 + This program is distributed in the hope that it will be useful,
  13 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 + GNU General Public License for more details.
  16 +
  17 + You should have received a copy of the GNU General Public License
  18 + along with this program; if not, write to the Free Software
  19 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 +*/
  21 +
  22 +#include "fpa11.h"
  23 +#include "softfloat.h"
  24 +#include "fpopcode.h"
  25 +
  26 +floatx80 floatx80_exp(floatx80 Fm);
  27 +floatx80 floatx80_ln(floatx80 Fm);
  28 +floatx80 floatx80_sin(floatx80 rFm);
  29 +floatx80 floatx80_cos(floatx80 rFm);
  30 +floatx80 floatx80_arcsin(floatx80 rFm);
  31 +floatx80 floatx80_arctan(floatx80 rFm);
  32 +floatx80 floatx80_log(floatx80 rFm);
  33 +floatx80 floatx80_tan(floatx80 rFm);
  34 +floatx80 floatx80_arccos(floatx80 rFm);
  35 +floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm);
  36 +floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm);
  37 +
  38 +unsigned int ExtendedCPDO(const unsigned int opcode)
  39 +{
  40 + FPA11 *fpa11 = GET_FPA11();
  41 + floatx80 rFm, rFn;
  42 + unsigned int Fd, Fm, Fn, nRc = 1;
  43 +
  44 + //printk("ExtendedCPDO(0x%08x)\n",opcode);
  45 +
  46 + Fm = getFm(opcode);
  47 + if (CONSTANT_FM(opcode))
  48 + {
  49 + rFm = getExtendedConstant(Fm);
  50 + }
  51 + else
  52 + {
  53 + switch (fpa11->fType[Fm])
  54 + {
  55 + case typeSingle:
  56 + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle);
  57 + break;
  58 +
  59 + case typeDouble:
  60 + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble);
  61 + break;
  62 +
  63 + case typeExtended:
  64 + rFm = fpa11->fpreg[Fm].fExtended;
  65 + break;
  66 +
  67 + default: return 0;
  68 + }
  69 + }
  70 +
  71 + if (!MONADIC_INSTRUCTION(opcode))
  72 + {
  73 + Fn = getFn(opcode);
  74 + switch (fpa11->fType[Fn])
  75 + {
  76 + case typeSingle:
  77 + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
  78 + break;
  79 +
  80 + case typeDouble:
  81 + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
  82 + break;
  83 +
  84 + case typeExtended:
  85 + rFn = fpa11->fpreg[Fn].fExtended;
  86 + break;
  87 +
  88 + default: return 0;
  89 + }
  90 + }
  91 +
  92 + Fd = getFd(opcode);
  93 + switch (opcode & MASK_ARITHMETIC_OPCODE)
  94 + {
  95 + /* dyadic opcodes */
  96 + case ADF_CODE:
  97 + fpa11->fpreg[Fd].fExtended = floatx80_add(rFn,rFm);
  98 + break;
  99 +
  100 + case MUF_CODE:
  101 + case FML_CODE:
  102 + fpa11->fpreg[Fd].fExtended = floatx80_mul(rFn,rFm);
  103 + break;
  104 +
  105 + case SUF_CODE:
  106 + fpa11->fpreg[Fd].fExtended = floatx80_sub(rFn,rFm);
  107 + break;
  108 +
  109 + case RSF_CODE:
  110 + fpa11->fpreg[Fd].fExtended = floatx80_sub(rFm,rFn);
  111 + break;
  112 +
  113 + case DVF_CODE:
  114 + case FDV_CODE:
  115 + fpa11->fpreg[Fd].fExtended = floatx80_div(rFn,rFm);
  116 + break;
  117 +
  118 + case RDF_CODE:
  119 + case FRD_CODE:
  120 + fpa11->fpreg[Fd].fExtended = floatx80_div(rFm,rFn);
  121 + break;
  122 +
  123 +#if 0
  124 + case POW_CODE:
  125 + fpa11->fpreg[Fd].fExtended = floatx80_pow(rFn,rFm);
  126 + break;
  127 +
  128 + case RPW_CODE:
  129 + fpa11->fpreg[Fd].fExtended = floatx80_pow(rFm,rFn);
  130 + break;
  131 +#endif
  132 +
  133 + case RMF_CODE:
  134 + fpa11->fpreg[Fd].fExtended = floatx80_rem(rFn,rFm);
  135 + break;
  136 +
  137 +#if 0
  138 + case POL_CODE:
  139 + fpa11->fpreg[Fd].fExtended = floatx80_pol(rFn,rFm);
  140 + break;
  141 +#endif
  142 +
  143 + /* monadic opcodes */
  144 + case MVF_CODE:
  145 + fpa11->fpreg[Fd].fExtended = rFm;
  146 + break;
  147 +
  148 + case MNF_CODE:
  149 + rFm.high ^= 0x8000;
  150 + fpa11->fpreg[Fd].fExtended = rFm;
  151 + break;
  152 +
  153 + case ABS_CODE:
  154 + rFm.high &= 0x7fff;
  155 + fpa11->fpreg[Fd].fExtended = rFm;
  156 + break;
  157 +
  158 + case RND_CODE:
  159 + case URD_CODE:
  160 + fpa11->fpreg[Fd].fExtended = floatx80_round_to_int(rFm);
  161 + break;
  162 +
  163 + case SQT_CODE:
  164 + fpa11->fpreg[Fd].fExtended = floatx80_sqrt(rFm);
  165 + break;
  166 +
  167 +#if 0
  168 + case LOG_CODE:
  169 + fpa11->fpreg[Fd].fExtended = floatx80_log(rFm);
  170 + break;
  171 +
  172 + case LGN_CODE:
  173 + fpa11->fpreg[Fd].fExtended = floatx80_ln(rFm);
  174 + break;
  175 +
  176 + case EXP_CODE:
  177 + fpa11->fpreg[Fd].fExtended = floatx80_exp(rFm);
  178 + break;
  179 +
  180 + case SIN_CODE:
  181 + fpa11->fpreg[Fd].fExtended = floatx80_sin(rFm);
  182 + break;
  183 +
  184 + case COS_CODE:
  185 + fpa11->fpreg[Fd].fExtended = floatx80_cos(rFm);
  186 + break;
  187 +
  188 + case TAN_CODE:
  189 + fpa11->fpreg[Fd].fExtended = floatx80_tan(rFm);
  190 + break;
  191 +
  192 + case ASN_CODE:
  193 + fpa11->fpreg[Fd].fExtended = floatx80_arcsin(rFm);
  194 + break;
  195 +
  196 + case ACS_CODE:
  197 + fpa11->fpreg[Fd].fExtended = floatx80_arccos(rFm);
  198 + break;
  199 +
  200 + case ATN_CODE:
  201 + fpa11->fpreg[Fd].fExtended = floatx80_arctan(rFm);
  202 + break;
  203 +#endif
  204 +
  205 + case NRM_CODE:
  206 + break;
  207 +
  208 + default:
  209 + {
  210 + nRc = 0;
  211 + }
  212 + }
  213 +
  214 + if (0 != nRc) fpa11->fType[Fd] = typeExtended;
  215 + return nRc;
  216 +}
  217 +
  218 +#if 0
  219 +floatx80 floatx80_exp(floatx80 Fm)
  220 +{
  221 +//series
  222 +}
  223 +
  224 +floatx80 floatx80_ln(floatx80 Fm)
  225 +{
  226 +//series
  227 +}
  228 +
  229 +floatx80 floatx80_sin(floatx80 rFm)
  230 +{
  231 +//series
  232 +}
  233 +
  234 +floatx80 floatx80_cos(floatx80 rFm)
  235 +{
  236 +//series
  237 +}
  238 +
  239 +floatx80 floatx80_arcsin(floatx80 rFm)
  240 +{
  241 +//series
  242 +}
  243 +
  244 +floatx80 floatx80_arctan(floatx80 rFm)
  245 +{
  246 + //series
  247 +}
  248 +
  249 +floatx80 floatx80_log(floatx80 rFm)
  250 +{
  251 + return floatx80_div(floatx80_ln(rFm),getExtendedConstant(7));
  252 +}
  253 +
  254 +floatx80 floatx80_tan(floatx80 rFm)
  255 +{
  256 + return floatx80_div(floatx80_sin(rFm),floatx80_cos(rFm));
  257 +}
  258 +
  259 +floatx80 floatx80_arccos(floatx80 rFm)
  260 +{
  261 + //return floatx80_sub(halfPi,floatx80_arcsin(rFm));
  262 +}
  263 +
  264 +floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm)
  265 +{
  266 + return floatx80_exp(floatx80_mul(rFm,floatx80_ln(rFn)));
  267 +}
  268 +
  269 +floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm)
  270 +{
  271 + return floatx80_arctan(floatx80_div(rFn,rFm));
  272 +}
  273 +#endif
... ...
target-arm/nwfpe/fpa11.c 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.COM, 1998,1999
  4 +
  5 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  6 +
  7 + This program is free software; you can redistribute it and/or modify
  8 + it under the terms of the GNU General Public License as published by
  9 + the Free Software Foundation; either version 2 of the License, or
  10 + (at your option) any later version.
  11 +
  12 + This program is distributed in the hope that it will be useful,
  13 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 + GNU General Public License for more details.
  16 +
  17 + You should have received a copy of the GNU General Public License
  18 + along with this program; if not, write to the Free Software
  19 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 +*/
  21 +
  22 +#include "fpa11.h"
  23 +
  24 +#include "fpopcode.h"
  25 +
  26 +//#include "fpmodule.h"
  27 +//#include "fpmodule.inl"
  28 +
  29 +//#include <asm/system.h>
  30 +
  31 +#include <stdio.h>
  32 +
  33 +/* forward declarations */
  34 +unsigned int EmulateCPDO(const unsigned int);
  35 +unsigned int EmulateCPDT(const unsigned int);
  36 +unsigned int EmulateCPRT(const unsigned int);
  37 +
  38 +FPA11* qemufpa=0;
  39 +unsigned int* user_registers=0;
  40 +
  41 +/* Reset the FPA11 chip. Called to initialize and reset the emulator. */
  42 +void resetFPA11(void)
  43 +{
  44 + int i;
  45 + FPA11 *fpa11 = GET_FPA11();
  46 +
  47 + /* initialize the register type array */
  48 + for (i=0;i<=7;i++)
  49 + {
  50 + fpa11->fType[i] = typeNone;
  51 + }
  52 +
  53 + /* FPSR: set system id to FP_EMULATOR, set AC, clear all other bits */
  54 + fpa11->fpsr = FP_EMULATOR | BIT_AC;
  55 +
  56 + /* FPCR: set SB, AB and DA bits, clear all others */
  57 +#if MAINTAIN_FPCR
  58 + fpa11->fpcr = MASK_RESET;
  59 +#endif
  60 +}
  61 +
  62 +void SetRoundingMode(const unsigned int opcode)
  63 +{
  64 +#if MAINTAIN_FPCR
  65 + FPA11 *fpa11 = GET_FPA11();
  66 + fpa11->fpcr &= ~MASK_ROUNDING_MODE;
  67 +#endif
  68 + switch (opcode & MASK_ROUNDING_MODE)
  69 + {
  70 + default:
  71 + case ROUND_TO_NEAREST:
  72 + float_rounding_mode = float_round_nearest_even;
  73 +#if MAINTAIN_FPCR
  74 + fpa11->fpcr |= ROUND_TO_NEAREST;
  75 +#endif
  76 + break;
  77 +
  78 + case ROUND_TO_PLUS_INFINITY:
  79 + float_rounding_mode = float_round_up;
  80 +#if MAINTAIN_FPCR
  81 + fpa11->fpcr |= ROUND_TO_PLUS_INFINITY;
  82 +#endif
  83 + break;
  84 +
  85 + case ROUND_TO_MINUS_INFINITY:
  86 + float_rounding_mode = float_round_down;
  87 +#if MAINTAIN_FPCR
  88 + fpa11->fpcr |= ROUND_TO_MINUS_INFINITY;
  89 +#endif
  90 + break;
  91 +
  92 + case ROUND_TO_ZERO:
  93 + float_rounding_mode = float_round_to_zero;
  94 +#if MAINTAIN_FPCR
  95 + fpa11->fpcr |= ROUND_TO_ZERO;
  96 +#endif
  97 + break;
  98 + }
  99 +}
  100 +
  101 +void SetRoundingPrecision(const unsigned int opcode)
  102 +{
  103 +#if MAINTAIN_FPCR
  104 + FPA11 *fpa11 = GET_FPA11();
  105 + fpa11->fpcr &= ~MASK_ROUNDING_PRECISION;
  106 +#endif
  107 + switch (opcode & MASK_ROUNDING_PRECISION)
  108 + {
  109 + case ROUND_SINGLE:
  110 + floatx80_rounding_precision = 32;
  111 +#if MAINTAIN_FPCR
  112 + fpa11->fpcr |= ROUND_SINGLE;
  113 +#endif
  114 + break;
  115 +
  116 + case ROUND_DOUBLE:
  117 + floatx80_rounding_precision = 64;
  118 +#if MAINTAIN_FPCR
  119 + fpa11->fpcr |= ROUND_DOUBLE;
  120 +#endif
  121 + break;
  122 +
  123 + case ROUND_EXTENDED:
  124 + floatx80_rounding_precision = 80;
  125 +#if MAINTAIN_FPCR
  126 + fpa11->fpcr |= ROUND_EXTENDED;
  127 +#endif
  128 + break;
  129 +
  130 + default: floatx80_rounding_precision = 80;
  131 + }
  132 +}
  133 +
  134 +/* Emulate the instruction in the opcode. */
  135 +unsigned int EmulateAll(unsigned int opcode, FPA11* qfpa, unsigned int* qregs)
  136 +{
  137 + unsigned int nRc = 0;
  138 +// unsigned long flags;
  139 + FPA11 *fpa11;
  140 +// save_flags(flags); sti();
  141 +
  142 + qemufpa=qfpa;
  143 + user_registers=qregs;
  144 +
  145 +#if 0
  146 + fprintf(stderr,"emulating FP insn 0x%08x, PC=0x%08x\n",
  147 + opcode, qregs[REG_PC]);
  148 +#endif
  149 + fpa11 = GET_FPA11();
  150 +
  151 + if (fpa11->initflag == 0) /* good place for __builtin_expect */
  152 + {
  153 + resetFPA11();
  154 + SetRoundingMode(ROUND_TO_NEAREST);
  155 + SetRoundingPrecision(ROUND_EXTENDED);
  156 + fpa11->initflag = 1;
  157 + }
  158 +
  159 + if (TEST_OPCODE(opcode,MASK_CPRT))
  160 + {
  161 + //fprintf(stderr,"emulating CPRT\n");
  162 + /* Emulate conversion opcodes. */
  163 + /* Emulate register transfer opcodes. */
  164 + /* Emulate comparison opcodes. */
  165 + nRc = EmulateCPRT(opcode);
  166 + }
  167 + else if (TEST_OPCODE(opcode,MASK_CPDO))
  168 + {
  169 + //fprintf(stderr,"emulating CPDO\n");
  170 + /* Emulate monadic arithmetic opcodes. */
  171 + /* Emulate dyadic arithmetic opcodes. */
  172 + nRc = EmulateCPDO(opcode);
  173 + }
  174 + else if (TEST_OPCODE(opcode,MASK_CPDT))
  175 + {
  176 + //fprintf(stderr,"emulating CPDT\n");
  177 + /* Emulate load/store opcodes. */
  178 + /* Emulate load/store multiple opcodes. */
  179 + nRc = EmulateCPDT(opcode);
  180 + }
  181 + else
  182 + {
  183 + /* Invalid instruction detected. Return FALSE. */
  184 + nRc = 0;
  185 + }
  186 +
  187 +// restore_flags(flags);
  188 +
  189 + //printf("returning %d\n",nRc);
  190 + return(nRc);
  191 +}
  192 +
  193 +#if 0
  194 +unsigned int EmulateAll1(unsigned int opcode)
  195 +{
  196 + switch ((opcode >> 24) & 0xf)
  197 + {
  198 + case 0xc:
  199 + case 0xd:
  200 + if ((opcode >> 20) & 0x1)
  201 + {
  202 + switch ((opcode >> 8) & 0xf)
  203 + {
  204 + case 0x1: return PerformLDF(opcode); break;
  205 + case 0x2: return PerformLFM(opcode); break;
  206 + default: return 0;
  207 + }
  208 + }
  209 + else
  210 + {
  211 + switch ((opcode >> 8) & 0xf)
  212 + {
  213 + case 0x1: return PerformSTF(opcode); break;
  214 + case 0x2: return PerformSFM(opcode); break;
  215 + default: return 0;
  216 + }
  217 + }
  218 + break;
  219 +
  220 + case 0xe:
  221 + if (opcode & 0x10)
  222 + return EmulateCPDO(opcode);
  223 + else
  224 + return EmulateCPRT(opcode);
  225 + break;
  226 +
  227 + default: return 0;
  228 + }
  229 +}
  230 +#endif
  231 +
... ...
target-arm/nwfpe/fpa11.h 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.com, 1998-1999
  4 +
  5 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  6 +
  7 + This program is free software; you can redistribute it and/or modify
  8 + it under the terms of the GNU General Public License as published by
  9 + the Free Software Foundation; either version 2 of the License, or
  10 + (at your option) any later version.
  11 +
  12 + This program is distributed in the hope that it will be useful,
  13 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 + GNU General Public License for more details.
  16 +
  17 + You should have received a copy of the GNU General Public License
  18 + along with this program; if not, write to the Free Software
  19 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 +*/
  21 +
  22 +#ifndef __FPA11_H__
  23 +#define __FPA11_H__
  24 +
  25 +#define GET_FPA11() (qemufpa)
  26 +
  27 +/*
  28 + * The processes registers are always at the very top of the 8K
  29 + * stack+task struct. Use the same method as 'current' uses to
  30 + * reach them.
  31 + */
  32 +extern unsigned int *user_registers;
  33 +
  34 +#define GET_USERREG() (user_registers)
  35 +
  36 +/* Need task_struct */
  37 +//#include <linux/sched.h>
  38 +
  39 +/* includes */
  40 +#include "fpsr.h" /* FP control and status register definitions */
  41 +#include "softfloat.h"
  42 +
  43 +#define typeNone 0x00
  44 +#define typeSingle 0x01
  45 +#define typeDouble 0x02
  46 +#define typeExtended 0x03
  47 +
  48 +/*
  49 + * This must be no more and no less than 12 bytes.
  50 + */
  51 +typedef union tagFPREG {
  52 + floatx80 fExtended;
  53 + float64 fDouble;
  54 + float32 fSingle;
  55 +} FPREG;
  56 +
  57 +/*
  58 + * FPA11 device model.
  59 + *
  60 + * This structure is exported to user space. Do not re-order.
  61 + * Only add new stuff to the end, and do not change the size of
  62 + * any element. Elements of this structure are used by user
  63 + * space, and must match struct user_fp in include/asm-arm/user.h.
  64 + * We include the byte offsets below for documentation purposes.
  65 + *
  66 + * The size of this structure and FPREG are checked by fpmodule.c
  67 + * on initialisation. If the rules have been broken, NWFPE will
  68 + * not initialise.
  69 + */
  70 +typedef struct tagFPA11 {
  71 +/* 0 */ FPREG fpreg[8]; /* 8 floating point registers */
  72 +/* 96 */ FPSR fpsr; /* floating point status register */
  73 +/* 100 */ FPCR fpcr; /* floating point control register */
  74 +/* 104 */ unsigned char fType[8]; /* type of floating point value held in
  75 + floating point registers. One of none
  76 + single, double or extended. */
  77 +/* 112 */ int initflag; /* this is special. The kernel guarantees
  78 + to set it to 0 when a thread is launched,
  79 + so we can use it to detect whether this
  80 + instance of the emulator needs to be
  81 + initialised. */
  82 +} FPA11;
  83 +
  84 +extern FPA11* qemufpa;
  85 +
  86 +extern void resetFPA11(void);
  87 +extern void SetRoundingMode(const unsigned int);
  88 +extern void SetRoundingPrecision(const unsigned int);
  89 +
  90 +#define get_user(x,y) ((x)=*(y))
  91 +#define put_user(x,y) (*(y)=(x))
  92 +static inline unsigned int readRegister(unsigned int reg)
  93 +{
  94 + return (user_registers[(reg)]);
  95 +}
  96 +
  97 +static inline void writeRegister(unsigned int x, unsigned int y)
  98 +{
  99 +#if 0
  100 + printf("writing %d to r%d\n",y,x);
  101 +#endif
  102 + user_registers[(x)]=(y);
  103 +}
  104 +
  105 +static inline void writeConditionCodes(unsigned int x)
  106 +{
  107 +#if 0
  108 +unsigned int y;
  109 +unsigned int ZF;
  110 + printf("setting flags to %x from %x\n",x,user_registers[16]);
  111 +#endif
  112 + user_registers[16]=(x); // cpsr
  113 + user_registers[17]=(x>>29)&1; // cf
  114 + user_registers[18]=(x<<3)&(1<<31); // vf
  115 + user_registers[19]=x&(1<<31); // nzf
  116 + if(!(x&(1<<30))) user_registers[19]++; // nzf must be non-zero for zf to be cleared
  117 +
  118 +#if 0
  119 + ZF = (user_registers[19] == 0);
  120 + y=user_registers[16] | (user_registers[19] & 0x80000000) | (ZF << 30) |
  121 + (user_registers[17] << 29) | ((user_registers[18] & 0x80000000) >> 3);
  122 + if(y != x)
  123 + printf("GODDAM SHIIIIIIIIIIIIIIIIT! %x %x nzf %x zf %x\n",x,y,user_registers[19],ZF);
  124 +#endif
  125 +}
  126 +
  127 +#define REG_PC 15
  128 +
  129 +unsigned int EmulateAll(unsigned int opcode, FPA11* qfpa, unsigned int* qregs);
  130 +
  131 +#endif
... ...
target-arm/nwfpe/fpa11.inl 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.COM, 1998,1999
  4 +
  5 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  6 +
  7 + This program is free software; you can redistribute it and/or modify
  8 + it under the terms of the GNU General Public License as published by
  9 + the Free Software Foundation; either version 2 of the License, or
  10 + (at your option) any later version.
  11 +
  12 + This program is distributed in the hope that it will be useful,
  13 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 + GNU General Public License for more details.
  16 +
  17 + You should have received a copy of the GNU General Public License
  18 + along with this program; if not, write to the Free Software
  19 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 +*/
  21 +
  22 +#include "fpa11.h"
  23 +
  24 +/* Read and write floating point status register */
  25 +extern __inline__ unsigned int readFPSR(void)
  26 +{
  27 + FPA11 *fpa11 = GET_FPA11();
  28 + return(fpa11->fpsr);
  29 +}
  30 +
  31 +extern __inline__ void writeFPSR(FPSR reg)
  32 +{
  33 + FPA11 *fpa11 = GET_FPA11();
  34 + /* the sysid byte in the status register is readonly */
  35 + fpa11->fpsr = (fpa11->fpsr & MASK_SYSID) | (reg & ~MASK_SYSID);
  36 +}
  37 +
  38 +/* Read and write floating point control register */
  39 +extern __inline__ FPCR readFPCR(void)
  40 +{
  41 + FPA11 *fpa11 = GET_FPA11();
  42 + /* clear SB, AB and DA bits before returning FPCR */
  43 + return(fpa11->fpcr & ~MASK_RFC);
  44 +}
  45 +
  46 +extern __inline__ void writeFPCR(FPCR reg)
  47 +{
  48 + FPA11 *fpa11 = GET_FPA11();
  49 + fpa11->fpcr &= ~MASK_WFC; /* clear SB, AB and DA bits */
  50 + fpa11->fpcr |= (reg & MASK_WFC); /* write SB, AB and DA bits */
  51 +}
... ...
target-arm/nwfpe/fpa11_cpdo.c 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.COM, 1998,1999
  4 +
  5 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  6 +
  7 + This program is free software; you can redistribute it and/or modify
  8 + it under the terms of the GNU General Public License as published by
  9 + the Free Software Foundation; either version 2 of the License, or
  10 + (at your option) any later version.
  11 +
  12 + This program is distributed in the hope that it will be useful,
  13 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 + GNU General Public License for more details.
  16 +
  17 + You should have received a copy of the GNU General Public License
  18 + along with this program; if not, write to the Free Software
  19 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 +*/
  21 +
  22 +#include "fpa11.h"
  23 +#include "fpopcode.h"
  24 +
  25 +unsigned int SingleCPDO(const unsigned int opcode);
  26 +unsigned int DoubleCPDO(const unsigned int opcode);
  27 +unsigned int ExtendedCPDO(const unsigned int opcode);
  28 +
  29 +unsigned int EmulateCPDO(const unsigned int opcode)
  30 +{
  31 + FPA11 *fpa11 = GET_FPA11();
  32 + unsigned int Fd, nType, nDest, nRc = 1;
  33 +
  34 + //printk("EmulateCPDO(0x%08x)\n",opcode);
  35 +
  36 + /* Get the destination size. If not valid let Linux perform
  37 + an invalid instruction trap. */
  38 + nDest = getDestinationSize(opcode);
  39 + if (typeNone == nDest) return 0;
  40 +
  41 + SetRoundingMode(opcode);
  42 +
  43 + /* Compare the size of the operands in Fn and Fm.
  44 + Choose the largest size and perform operations in that size,
  45 + in order to make use of all the precision of the operands.
  46 + If Fm is a constant, we just grab a constant of a size
  47 + matching the size of the operand in Fn. */
  48 + if (MONADIC_INSTRUCTION(opcode))
  49 + nType = nDest;
  50 + else
  51 + nType = fpa11->fType[getFn(opcode)];
  52 +
  53 + if (!CONSTANT_FM(opcode))
  54 + {
  55 + register unsigned int Fm = getFm(opcode);
  56 + if (nType < fpa11->fType[Fm])
  57 + {
  58 + nType = fpa11->fType[Fm];
  59 + }
  60 + }
  61 +
  62 + switch (nType)
  63 + {
  64 + case typeSingle : nRc = SingleCPDO(opcode); break;
  65 + case typeDouble : nRc = DoubleCPDO(opcode); break;
  66 + case typeExtended : nRc = ExtendedCPDO(opcode); break;
  67 + default : nRc = 0;
  68 + }
  69 +
  70 + /* If the operation succeeded, check to see if the result in the
  71 + destination register is the correct size. If not force it
  72 + to be. */
  73 + Fd = getFd(opcode);
  74 + nType = fpa11->fType[Fd];
  75 + if ((0 != nRc) && (nDest != nType))
  76 + {
  77 + switch (nDest)
  78 + {
  79 + case typeSingle:
  80 + {
  81 + if (typeDouble == nType)
  82 + fpa11->fpreg[Fd].fSingle =
  83 + float64_to_float32(fpa11->fpreg[Fd].fDouble);
  84 + else
  85 + fpa11->fpreg[Fd].fSingle =
  86 + floatx80_to_float32(fpa11->fpreg[Fd].fExtended);
  87 + }
  88 + break;
  89 +
  90 + case typeDouble:
  91 + {
  92 + if (typeSingle == nType)
  93 + fpa11->fpreg[Fd].fDouble =
  94 + float32_to_float64(fpa11->fpreg[Fd].fSingle);
  95 + else
  96 + fpa11->fpreg[Fd].fDouble =
  97 + floatx80_to_float64(fpa11->fpreg[Fd].fExtended);
  98 + }
  99 + break;
  100 +
  101 + case typeExtended:
  102 + {
  103 + if (typeSingle == nType)
  104 + fpa11->fpreg[Fd].fExtended =
  105 + float32_to_floatx80(fpa11->fpreg[Fd].fSingle);
  106 + else
  107 + fpa11->fpreg[Fd].fExtended =
  108 + float64_to_floatx80(fpa11->fpreg[Fd].fDouble);
  109 + }
  110 + break;
  111 + }
  112 +
  113 + fpa11->fType[Fd] = nDest;
  114 + }
  115 +
  116 + return nRc;
  117 +}
... ...
target-arm/nwfpe/fpa11_cpdt.c 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.com, 1998-1999
  4 + (c) Philip Blundell, 1998
  5 +
  6 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  7 +
  8 + This program is free software; you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation; either version 2 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program; if not, write to the Free Software
  20 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21 +*/
  22 +
  23 +#include "fpa11.h"
  24 +#include "softfloat.h"
  25 +#include "fpopcode.h"
  26 +//#include "fpmodule.h"
  27 +//#include "fpmodule.inl"
  28 +
  29 +//#include <asm/uaccess.h>
  30 +
  31 +static inline
  32 +void loadSingle(const unsigned int Fn,const unsigned int *pMem)
  33 +{
  34 + FPA11 *fpa11 = GET_FPA11();
  35 + fpa11->fType[Fn] = typeSingle;
  36 + get_user(fpa11->fpreg[Fn].fSingle, pMem);
  37 +}
  38 +
  39 +static inline
  40 +void loadDouble(const unsigned int Fn,const unsigned int *pMem)
  41 +{
  42 + FPA11 *fpa11 = GET_FPA11();
  43 + unsigned int *p;
  44 + p = (unsigned int*)&fpa11->fpreg[Fn].fDouble;
  45 + fpa11->fType[Fn] = typeDouble;
  46 + get_user(p[0], &pMem[1]);
  47 + get_user(p[1], &pMem[0]); /* sign & exponent */
  48 +}
  49 +
  50 +static inline
  51 +void loadExtended(const unsigned int Fn,const unsigned int *pMem)
  52 +{
  53 + FPA11 *fpa11 = GET_FPA11();
  54 + unsigned int *p;
  55 + p = (unsigned int*)&fpa11->fpreg[Fn].fExtended;
  56 + fpa11->fType[Fn] = typeExtended;
  57 + get_user(p[0], &pMem[0]); /* sign & exponent */
  58 + get_user(p[1], &pMem[2]); /* ls bits */
  59 + get_user(p[2], &pMem[1]); /* ms bits */
  60 +}
  61 +
  62 +static inline
  63 +void loadMultiple(const unsigned int Fn,const unsigned int *pMem)
  64 +{
  65 + FPA11 *fpa11 = GET_FPA11();
  66 + register unsigned int *p;
  67 + unsigned long x;
  68 +
  69 + p = (unsigned int*)&(fpa11->fpreg[Fn]);
  70 + get_user(x, &pMem[0]);
  71 + fpa11->fType[Fn] = (x >> 14) & 0x00000003;
  72 +
  73 + switch (fpa11->fType[Fn])
  74 + {
  75 + case typeSingle:
  76 + case typeDouble:
  77 + {
  78 + get_user(p[0], &pMem[2]); /* Single */
  79 + get_user(p[1], &pMem[1]); /* double msw */
  80 + p[2] = 0; /* empty */
  81 + }
  82 + break;
  83 +
  84 + case typeExtended:
  85 + {
  86 + get_user(p[1], &pMem[2]);
  87 + get_user(p[2], &pMem[1]); /* msw */
  88 + p[0] = (x & 0x80003fff);
  89 + }
  90 + break;
  91 + }
  92 +}
  93 +
  94 +static inline
  95 +void storeSingle(const unsigned int Fn,unsigned int *pMem)
  96 +{
  97 + FPA11 *fpa11 = GET_FPA11();
  98 + float32 val;
  99 + register unsigned int *p = (unsigned int*)&val;
  100 +
  101 + switch (fpa11->fType[Fn])
  102 + {
  103 + case typeDouble:
  104 + val = float64_to_float32(fpa11->fpreg[Fn].fDouble);
  105 + break;
  106 +
  107 + case typeExtended:
  108 + val = floatx80_to_float32(fpa11->fpreg[Fn].fExtended);
  109 + break;
  110 +
  111 + default: val = fpa11->fpreg[Fn].fSingle;
  112 + }
  113 +
  114 + put_user(p[0], pMem);
  115 +}
  116 +
  117 +static inline
  118 +void storeDouble(const unsigned int Fn,unsigned int *pMem)
  119 +{
  120 + FPA11 *fpa11 = GET_FPA11();
  121 + float64 val;
  122 + register unsigned int *p = (unsigned int*)&val;
  123 +
  124 + switch (fpa11->fType[Fn])
  125 + {
  126 + case typeSingle:
  127 + val = float32_to_float64(fpa11->fpreg[Fn].fSingle);
  128 + break;
  129 +
  130 + case typeExtended:
  131 + val = floatx80_to_float64(fpa11->fpreg[Fn].fExtended);
  132 + break;
  133 +
  134 + default: val = fpa11->fpreg[Fn].fDouble;
  135 + }
  136 + put_user(p[1], &pMem[0]); /* msw */
  137 + put_user(p[0], &pMem[1]); /* lsw */
  138 +}
  139 +
  140 +static inline
  141 +void storeExtended(const unsigned int Fn,unsigned int *pMem)
  142 +{
  143 + FPA11 *fpa11 = GET_FPA11();
  144 + floatx80 val;
  145 + register unsigned int *p = (unsigned int*)&val;
  146 +
  147 + switch (fpa11->fType[Fn])
  148 + {
  149 + case typeSingle:
  150 + val = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
  151 + break;
  152 +
  153 + case typeDouble:
  154 + val = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
  155 + break;
  156 +
  157 + default: val = fpa11->fpreg[Fn].fExtended;
  158 + }
  159 +
  160 + put_user(p[0], &pMem[0]); /* sign & exp */
  161 + put_user(p[1], &pMem[2]);
  162 + put_user(p[2], &pMem[1]); /* msw */
  163 +}
  164 +
  165 +static inline
  166 +void storeMultiple(const unsigned int Fn,unsigned int *pMem)
  167 +{
  168 + FPA11 *fpa11 = GET_FPA11();
  169 + register unsigned int nType, *p;
  170 +
  171 + p = (unsigned int*)&(fpa11->fpreg[Fn]);
  172 + nType = fpa11->fType[Fn];
  173 +
  174 + switch (nType)
  175 + {
  176 + case typeSingle:
  177 + case typeDouble:
  178 + {
  179 + put_user(p[0], &pMem[2]); /* single */
  180 + put_user(p[1], &pMem[1]); /* double msw */
  181 + put_user(nType << 14, &pMem[0]);
  182 + }
  183 + break;
  184 +
  185 + case typeExtended:
  186 + {
  187 + put_user(p[2], &pMem[1]); /* msw */
  188 + put_user(p[1], &pMem[2]);
  189 + put_user((p[0] & 0x80003fff) | (nType << 14), &pMem[0]);
  190 + }
  191 + break;
  192 + }
  193 +}
  194 +
  195 +unsigned int PerformLDF(const unsigned int opcode)
  196 +{
  197 + unsigned int *pBase, *pAddress, *pFinal, nRc = 1,
  198 + write_back = WRITE_BACK(opcode);
  199 +
  200 + //printk("PerformLDF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode));
  201 +
  202 + pBase = (unsigned int*)readRegister(getRn(opcode));
  203 + if (REG_PC == getRn(opcode))
  204 + {
  205 + pBase += 2;
  206 + write_back = 0;
  207 + }
  208 +
  209 + pFinal = pBase;
  210 + if (BIT_UP_SET(opcode))
  211 + pFinal += getOffset(opcode);
  212 + else
  213 + pFinal -= getOffset(opcode);
  214 +
  215 + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
  216 +
  217 + switch (opcode & MASK_TRANSFER_LENGTH)
  218 + {
  219 + case TRANSFER_SINGLE : loadSingle(getFd(opcode),pAddress); break;
  220 + case TRANSFER_DOUBLE : loadDouble(getFd(opcode),pAddress); break;
  221 + case TRANSFER_EXTENDED: loadExtended(getFd(opcode),pAddress); break;
  222 + default: nRc = 0;
  223 + }
  224 +
  225 + if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
  226 + return nRc;
  227 +}
  228 +
  229 +unsigned int PerformSTF(const unsigned int opcode)
  230 +{
  231 + unsigned int *pBase, *pAddress, *pFinal, nRc = 1,
  232 + write_back = WRITE_BACK(opcode);
  233 +
  234 + //printk("PerformSTF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode));
  235 + SetRoundingMode(ROUND_TO_NEAREST);
  236 +
  237 + pBase = (unsigned int*)readRegister(getRn(opcode));
  238 + if (REG_PC == getRn(opcode))
  239 + {
  240 + pBase += 2;
  241 + write_back = 0;
  242 + }
  243 +
  244 + pFinal = pBase;
  245 + if (BIT_UP_SET(opcode))
  246 + pFinal += getOffset(opcode);
  247 + else
  248 + pFinal -= getOffset(opcode);
  249 +
  250 + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
  251 +
  252 + switch (opcode & MASK_TRANSFER_LENGTH)
  253 + {
  254 + case TRANSFER_SINGLE : storeSingle(getFd(opcode),pAddress); break;
  255 + case TRANSFER_DOUBLE : storeDouble(getFd(opcode),pAddress); break;
  256 + case TRANSFER_EXTENDED: storeExtended(getFd(opcode),pAddress); break;
  257 + default: nRc = 0;
  258 + }
  259 +
  260 + if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
  261 + return nRc;
  262 +}
  263 +
  264 +unsigned int PerformLFM(const unsigned int opcode)
  265 +{
  266 + unsigned int i, Fd, *pBase, *pAddress, *pFinal,
  267 + write_back = WRITE_BACK(opcode);
  268 +
  269 + pBase = (unsigned int*)readRegister(getRn(opcode));
  270 + if (REG_PC == getRn(opcode))
  271 + {
  272 + pBase += 2;
  273 + write_back = 0;
  274 + }
  275 +
  276 + pFinal = pBase;
  277 + if (BIT_UP_SET(opcode))
  278 + pFinal += getOffset(opcode);
  279 + else
  280 + pFinal -= getOffset(opcode);
  281 +
  282 + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
  283 +
  284 + Fd = getFd(opcode);
  285 + for (i=getRegisterCount(opcode);i>0;i--)
  286 + {
  287 + loadMultiple(Fd,pAddress);
  288 + pAddress += 3; Fd++;
  289 + if (Fd == 8) Fd = 0;
  290 + }
  291 +
  292 + if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
  293 + return 1;
  294 +}
  295 +
  296 +unsigned int PerformSFM(const unsigned int opcode)
  297 +{
  298 + unsigned int i, Fd, *pBase, *pAddress, *pFinal,
  299 + write_back = WRITE_BACK(opcode);
  300 +
  301 + pBase = (unsigned int*)readRegister(getRn(opcode));
  302 + if (REG_PC == getRn(opcode))
  303 + {
  304 + pBase += 2;
  305 + write_back = 0;
  306 + }
  307 +
  308 + pFinal = pBase;
  309 + if (BIT_UP_SET(opcode))
  310 + pFinal += getOffset(opcode);
  311 + else
  312 + pFinal -= getOffset(opcode);
  313 +
  314 + if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
  315 +
  316 + Fd = getFd(opcode);
  317 + for (i=getRegisterCount(opcode);i>0;i--)
  318 + {
  319 + storeMultiple(Fd,pAddress);
  320 + pAddress += 3; Fd++;
  321 + if (Fd == 8) Fd = 0;
  322 + }
  323 +
  324 + if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
  325 + return 1;
  326 +}
  327 +
  328 +#if 1
  329 +unsigned int EmulateCPDT(const unsigned int opcode)
  330 +{
  331 + unsigned int nRc = 0;
  332 +
  333 + //printk("EmulateCPDT(0x%08x)\n",opcode);
  334 +
  335 + if (LDF_OP(opcode))
  336 + {
  337 + nRc = PerformLDF(opcode);
  338 + }
  339 + else if (LFM_OP(opcode))
  340 + {
  341 + nRc = PerformLFM(opcode);
  342 + }
  343 + else if (STF_OP(opcode))
  344 + {
  345 + nRc = PerformSTF(opcode);
  346 + }
  347 + else if (SFM_OP(opcode))
  348 + {
  349 + nRc = PerformSFM(opcode);
  350 + }
  351 + else
  352 + {
  353 + nRc = 0;
  354 + }
  355 +
  356 + return nRc;
  357 +}
  358 +#endif
... ...
target-arm/nwfpe/fpa11_cprt.c 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.COM, 1998,1999
  4 + (c) Philip Blundell, 1999
  5 +
  6 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  7 +
  8 + This program is free software; you can redistribute it and/or modify
  9 + it under the terms of the GNU General Public License as published by
  10 + the Free Software Foundation; either version 2 of the License, or
  11 + (at your option) any later version.
  12 +
  13 + This program is distributed in the hope that it will be useful,
  14 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 + GNU General Public License for more details.
  17 +
  18 + You should have received a copy of the GNU General Public License
  19 + along with this program; if not, write to the Free Software
  20 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21 +*/
  22 +
  23 +#include "fpa11.h"
  24 +#include "milieu.h"
  25 +#include "softfloat.h"
  26 +#include "fpopcode.h"
  27 +#include "fpa11.inl"
  28 +//#include "fpmodule.h"
  29 +//#include "fpmodule.inl"
  30 +
  31 +extern flag floatx80_is_nan(floatx80);
  32 +extern flag float64_is_nan( float64);
  33 +extern flag float32_is_nan( float32);
  34 +
  35 +void SetRoundingMode(const unsigned int opcode);
  36 +
  37 +unsigned int PerformFLT(const unsigned int opcode);
  38 +unsigned int PerformFIX(const unsigned int opcode);
  39 +
  40 +static unsigned int
  41 +PerformComparison(const unsigned int opcode);
  42 +
  43 +unsigned int EmulateCPRT(const unsigned int opcode)
  44 +{
  45 + unsigned int nRc = 1;
  46 +
  47 + //printk("EmulateCPRT(0x%08x)\n",opcode);
  48 +
  49 + if (opcode & 0x800000)
  50 + {
  51 + /* This is some variant of a comparison (PerformComparison will
  52 + sort out which one). Since most of the other CPRT
  53 + instructions are oddball cases of some sort or other it makes
  54 + sense to pull this out into a fast path. */
  55 + return PerformComparison(opcode);
  56 + }
  57 +
  58 + /* Hint to GCC that we'd like a jump table rather than a load of CMPs */
  59 + switch ((opcode & 0x700000) >> 20)
  60 + {
  61 + case FLT_CODE >> 20: nRc = PerformFLT(opcode); break;
  62 + case FIX_CODE >> 20: nRc = PerformFIX(opcode); break;
  63 +
  64 + case WFS_CODE >> 20: writeFPSR(readRegister(getRd(opcode))); break;
  65 + case RFS_CODE >> 20: writeRegister(getRd(opcode),readFPSR()); break;
  66 +
  67 +#if 0 /* We currently have no use for the FPCR, so there's no point
  68 + in emulating it. */
  69 + case WFC_CODE >> 20: writeFPCR(readRegister(getRd(opcode)));
  70 + case RFC_CODE >> 20: writeRegister(getRd(opcode),readFPCR()); break;
  71 +#endif
  72 +
  73 + default: nRc = 0;
  74 + }
  75 +
  76 + return nRc;
  77 +}
  78 +
  79 +unsigned int PerformFLT(const unsigned int opcode)
  80 +{
  81 + FPA11 *fpa11 = GET_FPA11();
  82 +
  83 + unsigned int nRc = 1;
  84 + SetRoundingMode(opcode);
  85 +
  86 + switch (opcode & MASK_ROUNDING_PRECISION)
  87 + {
  88 + case ROUND_SINGLE:
  89 + {
  90 + fpa11->fType[getFn(opcode)] = typeSingle;
  91 + fpa11->fpreg[getFn(opcode)].fSingle =
  92 + int32_to_float32(readRegister(getRd(opcode)));
  93 + }
  94 + break;
  95 +
  96 + case ROUND_DOUBLE:
  97 + {
  98 + fpa11->fType[getFn(opcode)] = typeDouble;
  99 + fpa11->fpreg[getFn(opcode)].fDouble =
  100 + int32_to_float64(readRegister(getRd(opcode)));
  101 + }
  102 + break;
  103 +
  104 + case ROUND_EXTENDED:
  105 + {
  106 + fpa11->fType[getFn(opcode)] = typeExtended;
  107 + fpa11->fpreg[getFn(opcode)].fExtended =
  108 + int32_to_floatx80(readRegister(getRd(opcode)));
  109 + }
  110 + break;
  111 +
  112 + default: nRc = 0;
  113 + }
  114 +
  115 + return nRc;
  116 +}
  117 +
  118 +unsigned int PerformFIX(const unsigned int opcode)
  119 +{
  120 + FPA11 *fpa11 = GET_FPA11();
  121 + unsigned int nRc = 1;
  122 + unsigned int Fn = getFm(opcode);
  123 +
  124 + SetRoundingMode(opcode);
  125 +
  126 + switch (fpa11->fType[Fn])
  127 + {
  128 + case typeSingle:
  129 + {
  130 + writeRegister(getRd(opcode),
  131 + float32_to_int32(fpa11->fpreg[Fn].fSingle));
  132 + }
  133 + break;
  134 +
  135 + case typeDouble:
  136 + {
  137 + //printf("F%d is 0x%llx\n",Fn,fpa11->fpreg[Fn].fDouble);
  138 + writeRegister(getRd(opcode),
  139 + float64_to_int32(fpa11->fpreg[Fn].fDouble));
  140 + }
  141 + break;
  142 +
  143 + case typeExtended:
  144 + {
  145 + writeRegister(getRd(opcode),
  146 + floatx80_to_int32(fpa11->fpreg[Fn].fExtended));
  147 + }
  148 + break;
  149 +
  150 + default: nRc = 0;
  151 + }
  152 +
  153 + return nRc;
  154 +}
  155 +
  156 +
  157 +static unsigned int __inline__
  158 +PerformComparisonOperation(floatx80 Fn, floatx80 Fm)
  159 +{
  160 + unsigned int flags = 0;
  161 +
  162 + /* test for less than condition */
  163 + if (floatx80_lt(Fn,Fm))
  164 + {
  165 + flags |= CC_NEGATIVE;
  166 + }
  167 +
  168 + /* test for equal condition */
  169 + if (floatx80_eq(Fn,Fm))
  170 + {
  171 + flags |= CC_ZERO;
  172 + }
  173 +
  174 + /* test for greater than or equal condition */
  175 + if (floatx80_lt(Fm,Fn))
  176 + {
  177 + flags |= CC_CARRY;
  178 + }
  179 +
  180 + writeConditionCodes(flags);
  181 + return 1;
  182 +}
  183 +
  184 +/* This instruction sets the flags N, Z, C, V in the FPSR. */
  185 +
  186 +static unsigned int PerformComparison(const unsigned int opcode)
  187 +{
  188 + FPA11 *fpa11 = GET_FPA11();
  189 + unsigned int Fn, Fm;
  190 + floatx80 rFn, rFm;
  191 + int e_flag = opcode & 0x400000; /* 1 if CxFE */
  192 + int n_flag = opcode & 0x200000; /* 1 if CNxx */
  193 + unsigned int flags = 0;
  194 +
  195 + //printk("PerformComparison(0x%08x)\n",opcode);
  196 +
  197 + Fn = getFn(opcode);
  198 + Fm = getFm(opcode);
  199 +
  200 + /* Check for unordered condition and convert all operands to 80-bit
  201 + format.
  202 + ?? Might be some mileage in avoiding this conversion if possible.
  203 + Eg, if both operands are 32-bit, detect this and do a 32-bit
  204 + comparison (cheaper than an 80-bit one). */
  205 + switch (fpa11->fType[Fn])
  206 + {
  207 + case typeSingle:
  208 + //printk("single.\n");
  209 + if (float32_is_nan(fpa11->fpreg[Fn].fSingle))
  210 + goto unordered;
  211 + rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
  212 + break;
  213 +
  214 + case typeDouble:
  215 + //printk("double.\n");
  216 + if (float64_is_nan(fpa11->fpreg[Fn].fDouble))
  217 + goto unordered;
  218 + rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
  219 + break;
  220 +
  221 + case typeExtended:
  222 + //printk("extended.\n");
  223 + if (floatx80_is_nan(fpa11->fpreg[Fn].fExtended))
  224 + goto unordered;
  225 + rFn = fpa11->fpreg[Fn].fExtended;
  226 + break;
  227 +
  228 + default: return 0;
  229 + }
  230 +
  231 + if (CONSTANT_FM(opcode))
  232 + {
  233 + //printk("Fm is a constant: #%d.\n",Fm);
  234 + rFm = getExtendedConstant(Fm);
  235 + if (floatx80_is_nan(rFm))
  236 + goto unordered;
  237 + }
  238 + else
  239 + {
  240 + //printk("Fm = r%d which contains a ",Fm);
  241 + switch (fpa11->fType[Fm])
  242 + {
  243 + case typeSingle:
  244 + //printk("single.\n");
  245 + if (float32_is_nan(fpa11->fpreg[Fm].fSingle))
  246 + goto unordered;
  247 + rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle);
  248 + break;
  249 +
  250 + case typeDouble:
  251 + //printk("double.\n");
  252 + if (float64_is_nan(fpa11->fpreg[Fm].fDouble))
  253 + goto unordered;
  254 + rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble);
  255 + break;
  256 +
  257 + case typeExtended:
  258 + //printk("extended.\n");
  259 + if (floatx80_is_nan(fpa11->fpreg[Fm].fExtended))
  260 + goto unordered;
  261 + rFm = fpa11->fpreg[Fm].fExtended;
  262 + break;
  263 +
  264 + default: return 0;
  265 + }
  266 + }
  267 +
  268 + if (n_flag)
  269 + {
  270 + rFm.high ^= 0x8000;
  271 + }
  272 +
  273 + return PerformComparisonOperation(rFn,rFm);
  274 +
  275 + unordered:
  276 + /* ?? The FPA data sheet is pretty vague about this, in particular
  277 + about whether the non-E comparisons can ever raise exceptions.
  278 + This implementation is based on a combination of what it says in
  279 + the data sheet, observation of how the Acorn emulator actually
  280 + behaves (and how programs expect it to) and guesswork. */
  281 + flags |= CC_OVERFLOW;
  282 + flags &= ~(CC_ZERO | CC_NEGATIVE);
  283 +
  284 + if (BIT_AC & readFPSR()) flags |= CC_CARRY;
  285 +
  286 + if (e_flag) float_raise(float_flag_invalid);
  287 +
  288 + writeConditionCodes(flags);
  289 + return 1;
  290 +}
... ...
target-arm/nwfpe/fpopcode.c 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.COM, 1998,1999
  4 +
  5 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  6 +
  7 + This program is free software; you can redistribute it and/or modify
  8 + it under the terms of the GNU General Public License as published by
  9 + the Free Software Foundation; either version 2 of the License, or
  10 + (at your option) any later version.
  11 +
  12 + This program is distributed in the hope that it will be useful,
  13 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 + GNU General Public License for more details.
  16 +
  17 + You should have received a copy of the GNU General Public License
  18 + along with this program; if not, write to the Free Software
  19 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 +*/
  21 +
  22 +#include "fpa11.h"
  23 +#include "softfloat.h"
  24 +#include "fpopcode.h"
  25 +#include "fpsr.h"
  26 +//#include "fpmodule.h"
  27 +//#include "fpmodule.inl"
  28 +
  29 +const floatx80 floatx80Constant[] = {
  30 + { 0x0000, 0x0000000000000000ULL}, /* extended 0.0 */
  31 + { 0x3fff, 0x8000000000000000ULL}, /* extended 1.0 */
  32 + { 0x4000, 0x8000000000000000ULL}, /* extended 2.0 */
  33 + { 0x4000, 0xc000000000000000ULL}, /* extended 3.0 */
  34 + { 0x4001, 0x8000000000000000ULL}, /* extended 4.0 */
  35 + { 0x4001, 0xa000000000000000ULL}, /* extended 5.0 */
  36 + { 0x3ffe, 0x8000000000000000ULL}, /* extended 0.5 */
  37 + { 0x4002, 0xa000000000000000ULL} /* extended 10.0 */
  38 +};
  39 +
  40 +const float64 float64Constant[] = {
  41 + 0x0000000000000000ULL, /* double 0.0 */
  42 + 0x3ff0000000000000ULL, /* double 1.0 */
  43 + 0x4000000000000000ULL, /* double 2.0 */
  44 + 0x4008000000000000ULL, /* double 3.0 */
  45 + 0x4010000000000000ULL, /* double 4.0 */
  46 + 0x4014000000000000ULL, /* double 5.0 */
  47 + 0x3fe0000000000000ULL, /* double 0.5 */
  48 + 0x4024000000000000ULL /* double 10.0 */
  49 +};
  50 +
  51 +const float32 float32Constant[] = {
  52 + 0x00000000, /* single 0.0 */
  53 + 0x3f800000, /* single 1.0 */
  54 + 0x40000000, /* single 2.0 */
  55 + 0x40400000, /* single 3.0 */
  56 + 0x40800000, /* single 4.0 */
  57 + 0x40a00000, /* single 5.0 */
  58 + 0x3f000000, /* single 0.5 */
  59 + 0x41200000 /* single 10.0 */
  60 +};
  61 +
  62 +unsigned int getTransferLength(const unsigned int opcode)
  63 +{
  64 + unsigned int nRc;
  65 +
  66 + switch (opcode & MASK_TRANSFER_LENGTH)
  67 + {
  68 + case 0x00000000: nRc = 1; break; /* single precision */
  69 + case 0x00008000: nRc = 2; break; /* double precision */
  70 + case 0x00400000: nRc = 3; break; /* extended precision */
  71 + default: nRc = 0;
  72 + }
  73 +
  74 + return(nRc);
  75 +}
  76 +
  77 +unsigned int getRegisterCount(const unsigned int opcode)
  78 +{
  79 + unsigned int nRc;
  80 +
  81 + switch (opcode & MASK_REGISTER_COUNT)
  82 + {
  83 + case 0x00000000: nRc = 4; break;
  84 + case 0x00008000: nRc = 1; break;
  85 + case 0x00400000: nRc = 2; break;
  86 + case 0x00408000: nRc = 3; break;
  87 + default: nRc = 0;
  88 + }
  89 +
  90 + return(nRc);
  91 +}
  92 +
  93 +unsigned int getRoundingPrecision(const unsigned int opcode)
  94 +{
  95 + unsigned int nRc;
  96 +
  97 + switch (opcode & MASK_ROUNDING_PRECISION)
  98 + {
  99 + case 0x00000000: nRc = 1; break;
  100 + case 0x00000080: nRc = 2; break;
  101 + case 0x00080000: nRc = 3; break;
  102 + default: nRc = 0;
  103 + }
  104 +
  105 + return(nRc);
  106 +}
  107 +
  108 +unsigned int getDestinationSize(const unsigned int opcode)
  109 +{
  110 + unsigned int nRc;
  111 +
  112 + switch (opcode & MASK_DESTINATION_SIZE)
  113 + {
  114 + case 0x00000000: nRc = typeSingle; break;
  115 + case 0x00000080: nRc = typeDouble; break;
  116 + case 0x00080000: nRc = typeExtended; break;
  117 + default: nRc = typeNone;
  118 + }
  119 +
  120 + return(nRc);
  121 +}
  122 +
  123 +/* condition code lookup table
  124 + index into the table is test code: EQ, NE, ... LT, GT, AL, NV
  125 + bit position in short is condition code: NZCV */
  126 +static const unsigned short aCC[16] = {
  127 + 0xF0F0, // EQ == Z set
  128 + 0x0F0F, // NE
  129 + 0xCCCC, // CS == C set
  130 + 0x3333, // CC
  131 + 0xFF00, // MI == N set
  132 + 0x00FF, // PL
  133 + 0xAAAA, // VS == V set
  134 + 0x5555, // VC
  135 + 0x0C0C, // HI == C set && Z clear
  136 + 0xF3F3, // LS == C clear || Z set
  137 + 0xAA55, // GE == (N==V)
  138 + 0x55AA, // LT == (N!=V)
  139 + 0x0A05, // GT == (!Z && (N==V))
  140 + 0xF5FA, // LE == (Z || (N!=V))
  141 + 0xFFFF, // AL always
  142 + 0 // NV
  143 +};
  144 +
  145 +unsigned int checkCondition(const unsigned int opcode, const unsigned int ccodes)
  146 +{
  147 + return (aCC[opcode>>28] >> (ccodes>>28)) & 1;
  148 +}
... ...
target-arm/nwfpe/fpopcode.h 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.COM, 1998,1999
  4 +
  5 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  6 +
  7 + This program is free software; you can redistribute it and/or modify
  8 + it under the terms of the GNU General Public License as published by
  9 + the Free Software Foundation; either version 2 of the License, or
  10 + (at your option) any later version.
  11 +
  12 + This program is distributed in the hope that it will be useful,
  13 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 + GNU General Public License for more details.
  16 +
  17 + You should have received a copy of the GNU General Public License
  18 + along with this program; if not, write to the Free Software
  19 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 +*/
  21 +
  22 +#ifndef __FPOPCODE_H__
  23 +#define __FPOPCODE_H__
  24 +
  25 +/*
  26 +ARM Floating Point Instruction Classes
  27 +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
  28 +|c o n d|1 1 0 P|U|u|W|L| Rn |v| Fd |0|0|0|1| o f f s e t | CPDT
  29 +|c o n d|1 1 0 P|U|w|W|L| Rn |x| Fd |0|0|0|1| o f f s e t | CPDT
  30 +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
  31 +|c o n d|1 1 1 0|a|b|c|d|e| Fn |j| Fd |0|0|0|1|f|g|h|0|i| Fm | CPDO
  32 +|c o n d|1 1 1 0|a|b|c|L|e| Fn | Rd |0|0|0|1|f|g|h|1|i| Fm | CPRT
  33 +|c o n d|1 1 1 0|a|b|c|1|e| Fn |1|1|1|1|0|0|0|1|f|g|h|1|i| Fm | comparisons
  34 +| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
  35 +
  36 +CPDT data transfer instructions
  37 + LDF, STF, LFM, SFM
  38 +
  39 +CPDO dyadic arithmetic instructions
  40 + ADF, MUF, SUF, RSF, DVF, RDF,
  41 + POW, RPW, RMF, FML, FDV, FRD, POL
  42 +
  43 +CPDO monadic arithmetic instructions
  44 + MVF, MNF, ABS, RND, SQT, LOG, LGN, EXP,
  45 + SIN, COS, TAN, ASN, ACS, ATN, URD, NRM
  46 +
  47 +CPRT joint arithmetic/data transfer instructions
  48 + FIX (arithmetic followed by load/store)
  49 + FLT (load/store followed by arithmetic)
  50 + CMF, CNF CMFE, CNFE (comparisons)
  51 + WFS, RFS (write/read floating point status register)
  52 + WFC, RFC (write/read floating point control register)
  53 +
  54 +cond condition codes
  55 +P pre/post index bit: 0 = postindex, 1 = preindex
  56 +U up/down bit: 0 = stack grows down, 1 = stack grows up
  57 +W write back bit: 1 = update base register (Rn)
  58 +L load/store bit: 0 = store, 1 = load
  59 +Rn base register
  60 +Rd destination/source register
  61 +Fd floating point destination register
  62 +Fn floating point source register
  63 +Fm floating point source register or floating point constant
  64 +
  65 +uv transfer length (TABLE 1)
  66 +wx register count (TABLE 2)
  67 +abcd arithmetic opcode (TABLES 3 & 4)
  68 +ef destination size (rounding precision) (TABLE 5)
  69 +gh rounding mode (TABLE 6)
  70 +j dyadic/monadic bit: 0 = dyadic, 1 = monadic
  71 +i constant bit: 1 = constant (TABLE 6)
  72 +*/
  73 +
  74 +/*
  75 +TABLE 1
  76 ++-------------------------+---+---+---------+---------+
  77 +| Precision | u | v | FPSR.EP | length |
  78 ++-------------------------+---+---+---------+---------+
  79 +| Single | 0 ü 0 | x | 1 words |
  80 +| Double | 1 ü 1 | x | 2 words |
  81 +| Extended | 1 ü 1 | x | 3 words |
  82 +| Packed decimal | 1 ü 1 | 0 | 3 words |
  83 +| Expanded packed decimal | 1 ü 1 | 1 | 4 words |
  84 ++-------------------------+---+---+---------+---------+
  85 +Note: x = don't care
  86 +*/
  87 +
  88 +/*
  89 +TABLE 2
  90 ++---+---+---------------------------------+
  91 +| w | x | Number of registers to transfer |
  92 ++---+---+---------------------------------+
  93 +| 0 ü 1 | 1 |
  94 +| 1 ü 0 | 2 |
  95 +| 1 ü 1 | 3 |
  96 +| 0 ü 0 | 4 |
  97 ++---+---+---------------------------------+
  98 +*/
  99 +
  100 +/*
  101 +TABLE 3: Dyadic Floating Point Opcodes
  102 ++---+---+---+---+----------+-----------------------+-----------------------+
  103 +| a | b | c | d | Mnemonic | Description | Operation |
  104 ++---+---+---+---+----------+-----------------------+-----------------------+
  105 +| 0 | 0 | 0 | 0 | ADF | Add | Fd := Fn + Fm |
  106 +| 0 | 0 | 0 | 1 | MUF | Multiply | Fd := Fn * Fm |
  107 +| 0 | 0 | 1 | 0 | SUF | Subtract | Fd := Fn - Fm |
  108 +| 0 | 0 | 1 | 1 | RSF | Reverse subtract | Fd := Fm - Fn |
  109 +| 0 | 1 | 0 | 0 | DVF | Divide | Fd := Fn / Fm |
  110 +| 0 | 1 | 0 | 1 | RDF | Reverse divide | Fd := Fm / Fn |
  111 +| 0 | 1 | 1 | 0 | POW | Power | Fd := Fn ^ Fm |
  112 +| 0 | 1 | 1 | 1 | RPW | Reverse power | Fd := Fm ^ Fn |
  113 +| 1 | 0 | 0 | 0 | RMF | Remainder | Fd := IEEE rem(Fn/Fm) |
  114 +| 1 | 0 | 0 | 1 | FML | Fast Multiply | Fd := Fn * Fm |
  115 +| 1 | 0 | 1 | 0 | FDV | Fast Divide | Fd := Fn / Fm |
  116 +| 1 | 0 | 1 | 1 | FRD | Fast reverse divide | Fd := Fm / Fn |
  117 +| 1 | 1 | 0 | 0 | POL | Polar angle (ArcTan2) | Fd := arctan2(Fn,Fm) |
  118 +| 1 | 1 | 0 | 1 | | undefined instruction | trap |
  119 +| 1 | 1 | 1 | 0 | | undefined instruction | trap |
  120 +| 1 | 1 | 1 | 1 | | undefined instruction | trap |
  121 ++---+---+---+---+----------+-----------------------+-----------------------+
  122 +Note: POW, RPW, POL are deprecated, and are available for backwards
  123 + compatibility only.
  124 +*/
  125 +
  126 +/*
  127 +TABLE 4: Monadic Floating Point Opcodes
  128 ++---+---+---+---+----------+-----------------------+-----------------------+
  129 +| a | b | c | d | Mnemonic | Description | Operation |
  130 ++---+---+---+---+----------+-----------------------+-----------------------+
  131 +| 0 | 0 | 0 | 0 | MVF | Move | Fd := Fm |
  132 +| 0 | 0 | 0 | 1 | MNF | Move negated | Fd := - Fm |
  133 +| 0 | 0 | 1 | 0 | ABS | Absolute value | Fd := abs(Fm) |
  134 +| 0 | 0 | 1 | 1 | RND | Round to integer | Fd := int(Fm) |
  135 +| 0 | 1 | 0 | 0 | SQT | Square root | Fd := sqrt(Fm) |
  136 +| 0 | 1 | 0 | 1 | LOG | Log base 10 | Fd := log10(Fm) |
  137 +| 0 | 1 | 1 | 0 | LGN | Log base e | Fd := ln(Fm) |
  138 +| 0 | 1 | 1 | 1 | EXP | Exponent | Fd := e ^ Fm |
  139 +| 1 | 0 | 0 | 0 | SIN | Sine | Fd := sin(Fm) |
  140 +| 1 | 0 | 0 | 1 | COS | Cosine | Fd := cos(Fm) |
  141 +| 1 | 0 | 1 | 0 | TAN | Tangent | Fd := tan(Fm) |
  142 +| 1 | 0 | 1 | 1 | ASN | Arc Sine | Fd := arcsin(Fm) |
  143 +| 1 | 1 | 0 | 0 | ACS | Arc Cosine | Fd := arccos(Fm) |
  144 +| 1 | 1 | 0 | 1 | ATN | Arc Tangent | Fd := arctan(Fm) |
  145 +| 1 | 1 | 1 | 0 | URD | Unnormalized round | Fd := int(Fm) |
  146 +| 1 | 1 | 1 | 1 | NRM | Normalize | Fd := norm(Fm) |
  147 ++---+---+---+---+----------+-----------------------+-----------------------+
  148 +Note: LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN are deprecated, and are
  149 + available for backwards compatibility only.
  150 +*/
  151 +
  152 +/*
  153 +TABLE 5
  154 ++-------------------------+---+---+
  155 +| Rounding Precision | e | f |
  156 ++-------------------------+---+---+
  157 +| IEEE Single precision | 0 ü 0 |
  158 +| IEEE Double precision | 0 ü 1 |
  159 +| IEEE Extended precision | 1 ü 0 |
  160 +| undefined (trap) | 1 ü 1 |
  161 ++-------------------------+---+---+
  162 +*/
  163 +
  164 +/*
  165 +TABLE 5
  166 ++---------------------------------+---+---+
  167 +| Rounding Mode | g | h |
  168 ++---------------------------------+---+---+
  169 +| Round to nearest (default) | 0 ü 0 |
  170 +| Round toward plus infinity | 0 ü 1 |
  171 +| Round toward negative infinity | 1 ü 0 |
  172 +| Round toward zero | 1 ü 1 |
  173 ++---------------------------------+---+---+
  174 +*/
  175 +
  176 +/*
  177 +===
  178 +=== Definitions for load and store instructions
  179 +===
  180 +*/
  181 +
  182 +/* bit masks */
  183 +#define BIT_PREINDEX 0x01000000
  184 +#define BIT_UP 0x00800000
  185 +#define BIT_WRITE_BACK 0x00200000
  186 +#define BIT_LOAD 0x00100000
  187 +
  188 +/* masks for load/store */
  189 +#define MASK_CPDT 0x0c000000 /* data processing opcode */
  190 +#define MASK_OFFSET 0x000000ff
  191 +#define MASK_TRANSFER_LENGTH 0x00408000
  192 +#define MASK_REGISTER_COUNT MASK_TRANSFER_LENGTH
  193 +#define MASK_COPROCESSOR 0x00000f00
  194 +
  195 +/* Tests for transfer length */
  196 +#define TRANSFER_SINGLE 0x00000000
  197 +#define TRANSFER_DOUBLE 0x00008000
  198 +#define TRANSFER_EXTENDED 0x00400000
  199 +#define TRANSFER_PACKED MASK_TRANSFER_LENGTH
  200 +
  201 +/* Get the coprocessor number from the opcode. */
  202 +#define getCoprocessorNumber(opcode) ((opcode & MASK_COPROCESSOR) >> 8)
  203 +
  204 +/* Get the offset from the opcode. */
  205 +#define getOffset(opcode) (opcode & MASK_OFFSET)
  206 +
  207 +/* Tests for specific data transfer load/store opcodes. */
  208 +#define TEST_OPCODE(opcode,mask) (((opcode) & (mask)) == (mask))
  209 +
  210 +#define LOAD_OP(opcode) TEST_OPCODE((opcode),MASK_CPDT | BIT_LOAD)
  211 +#define STORE_OP(opcode) ((opcode & (MASK_CPDT | BIT_LOAD)) == MASK_CPDT)
  212 +
  213 +#define LDF_OP(opcode) (LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 1))
  214 +#define LFM_OP(opcode) (LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 2))
  215 +#define STF_OP(opcode) (STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 1))
  216 +#define SFM_OP(opcode) (STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 2))
  217 +
  218 +#define PREINDEXED(opcode) ((opcode & BIT_PREINDEX) != 0)
  219 +#define POSTINDEXED(opcode) ((opcode & BIT_PREINDEX) == 0)
  220 +#define BIT_UP_SET(opcode) ((opcode & BIT_UP) != 0)
  221 +#define BIT_UP_CLEAR(opcode) ((opcode & BIT_DOWN) == 0)
  222 +#define WRITE_BACK(opcode) ((opcode & BIT_WRITE_BACK) != 0)
  223 +#define LOAD(opcode) ((opcode & BIT_LOAD) != 0)
  224 +#define STORE(opcode) ((opcode & BIT_LOAD) == 0)
  225 +
  226 +/*
  227 +===
  228 +=== Definitions for arithmetic instructions
  229 +===
  230 +*/
  231 +/* bit masks */
  232 +#define BIT_MONADIC 0x00008000
  233 +#define BIT_CONSTANT 0x00000008
  234 +
  235 +#define CONSTANT_FM(opcode) ((opcode & BIT_CONSTANT) != 0)
  236 +#define MONADIC_INSTRUCTION(opcode) ((opcode & BIT_MONADIC) != 0)
  237 +
  238 +/* instruction identification masks */
  239 +#define MASK_CPDO 0x0e000000 /* arithmetic opcode */
  240 +#define MASK_ARITHMETIC_OPCODE 0x00f08000
  241 +#define MASK_DESTINATION_SIZE 0x00080080
  242 +
  243 +/* dyadic arithmetic opcodes. */
  244 +#define ADF_CODE 0x00000000
  245 +#define MUF_CODE 0x00100000
  246 +#define SUF_CODE 0x00200000
  247 +#define RSF_CODE 0x00300000
  248 +#define DVF_CODE 0x00400000
  249 +#define RDF_CODE 0x00500000
  250 +#define POW_CODE 0x00600000
  251 +#define RPW_CODE 0x00700000
  252 +#define RMF_CODE 0x00800000
  253 +#define FML_CODE 0x00900000
  254 +#define FDV_CODE 0x00a00000
  255 +#define FRD_CODE 0x00b00000
  256 +#define POL_CODE 0x00c00000
  257 +/* 0x00d00000 is an invalid dyadic arithmetic opcode */
  258 +/* 0x00e00000 is an invalid dyadic arithmetic opcode */
  259 +/* 0x00f00000 is an invalid dyadic arithmetic opcode */
  260 +
  261 +/* monadic arithmetic opcodes. */
  262 +#define MVF_CODE 0x00008000
  263 +#define MNF_CODE 0x00108000
  264 +#define ABS_CODE 0x00208000
  265 +#define RND_CODE 0x00308000
  266 +#define SQT_CODE 0x00408000
  267 +#define LOG_CODE 0x00508000
  268 +#define LGN_CODE 0x00608000
  269 +#define EXP_CODE 0x00708000
  270 +#define SIN_CODE 0x00808000
  271 +#define COS_CODE 0x00908000
  272 +#define TAN_CODE 0x00a08000
  273 +#define ASN_CODE 0x00b08000
  274 +#define ACS_CODE 0x00c08000
  275 +#define ATN_CODE 0x00d08000
  276 +#define URD_CODE 0x00e08000
  277 +#define NRM_CODE 0x00f08000
  278 +
  279 +/*
  280 +===
  281 +=== Definitions for register transfer and comparison instructions
  282 +===
  283 +*/
  284 +
  285 +#define MASK_CPRT 0x0e000010 /* register transfer opcode */
  286 +#define MASK_CPRT_CODE 0x00f00000
  287 +#define FLT_CODE 0x00000000
  288 +#define FIX_CODE 0x00100000
  289 +#define WFS_CODE 0x00200000
  290 +#define RFS_CODE 0x00300000
  291 +#define WFC_CODE 0x00400000
  292 +#define RFC_CODE 0x00500000
  293 +#define CMF_CODE 0x00900000
  294 +#define CNF_CODE 0x00b00000
  295 +#define CMFE_CODE 0x00d00000
  296 +#define CNFE_CODE 0x00f00000
  297 +
  298 +/*
  299 +===
  300 +=== Common definitions
  301 +===
  302 +*/
  303 +
  304 +/* register masks */
  305 +#define MASK_Rd 0x0000f000
  306 +#define MASK_Rn 0x000f0000
  307 +#define MASK_Fd 0x00007000
  308 +#define MASK_Fm 0x00000007
  309 +#define MASK_Fn 0x00070000
  310 +
  311 +/* condition code masks */
  312 +#define CC_MASK 0xf0000000
  313 +#define CC_NEGATIVE 0x80000000
  314 +#define CC_ZERO 0x40000000
  315 +#define CC_CARRY 0x20000000
  316 +#define CC_OVERFLOW 0x10000000
  317 +#define CC_EQ 0x00000000
  318 +#define CC_NE 0x10000000
  319 +#define CC_CS 0x20000000
  320 +#define CC_HS CC_CS
  321 +#define CC_CC 0x30000000
  322 +#define CC_LO CC_CC
  323 +#define CC_MI 0x40000000
  324 +#define CC_PL 0x50000000
  325 +#define CC_VS 0x60000000
  326 +#define CC_VC 0x70000000
  327 +#define CC_HI 0x80000000
  328 +#define CC_LS 0x90000000
  329 +#define CC_GE 0xa0000000
  330 +#define CC_LT 0xb0000000
  331 +#define CC_GT 0xc0000000
  332 +#define CC_LE 0xd0000000
  333 +#define CC_AL 0xe0000000
  334 +#define CC_NV 0xf0000000
  335 +
  336 +/* rounding masks/values */
  337 +#define MASK_ROUNDING_MODE 0x00000060
  338 +#define ROUND_TO_NEAREST 0x00000000
  339 +#define ROUND_TO_PLUS_INFINITY 0x00000020
  340 +#define ROUND_TO_MINUS_INFINITY 0x00000040
  341 +#define ROUND_TO_ZERO 0x00000060
  342 +
  343 +#define MASK_ROUNDING_PRECISION 0x00080080
  344 +#define ROUND_SINGLE 0x00000000
  345 +#define ROUND_DOUBLE 0x00000080
  346 +#define ROUND_EXTENDED 0x00080000
  347 +
  348 +/* Get the condition code from the opcode. */
  349 +#define getCondition(opcode) (opcode >> 28)
  350 +
  351 +/* Get the source register from the opcode. */
  352 +#define getRn(opcode) ((opcode & MASK_Rn) >> 16)
  353 +
  354 +/* Get the destination floating point register from the opcode. */
  355 +#define getFd(opcode) ((opcode & MASK_Fd) >> 12)
  356 +
  357 +/* Get the first source floating point register from the opcode. */
  358 +#define getFn(opcode) ((opcode & MASK_Fn) >> 16)
  359 +
  360 +/* Get the second source floating point register from the opcode. */
  361 +#define getFm(opcode) (opcode & MASK_Fm)
  362 +
  363 +/* Get the destination register from the opcode. */
  364 +#define getRd(opcode) ((opcode & MASK_Rd) >> 12)
  365 +
  366 +/* Get the rounding mode from the opcode. */
  367 +#define getRoundingMode(opcode) ((opcode & MASK_ROUNDING_MODE) >> 5)
  368 +
  369 +static inline const floatx80 getExtendedConstant(const unsigned int nIndex)
  370 +{
  371 + extern const floatx80 floatx80Constant[];
  372 + return floatx80Constant[nIndex];
  373 +}
  374 +
  375 +static inline const float64 getDoubleConstant(const unsigned int nIndex)
  376 +{
  377 + extern const float64 float64Constant[];
  378 + return float64Constant[nIndex];
  379 +}
  380 +
  381 +static inline const float32 getSingleConstant(const unsigned int nIndex)
  382 +{
  383 + extern const float32 float32Constant[];
  384 + return float32Constant[nIndex];
  385 +}
  386 +
  387 +extern unsigned int getRegisterCount(const unsigned int opcode);
  388 +extern unsigned int getDestinationSize(const unsigned int opcode);
  389 +
  390 +#endif
... ...
target-arm/nwfpe/fpsr.h 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.com, 1998-1999
  4 +
  5 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  6 +
  7 + This program is free software; you can redistribute it and/or modify
  8 + it under the terms of the GNU General Public License as published by
  9 + the Free Software Foundation; either version 2 of the License, or
  10 + (at your option) any later version.
  11 +
  12 + This program is distributed in the hope that it will be useful,
  13 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 + GNU General Public License for more details.
  16 +
  17 + You should have received a copy of the GNU General Public License
  18 + along with this program; if not, write to the Free Software
  19 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 +*/
  21 +
  22 +#ifndef __FPSR_H__
  23 +#define __FPSR_H__
  24 +
  25 +/*
  26 +The FPSR is a 32 bit register consisting of 4 parts, each exactly
  27 +one byte.
  28 +
  29 + SYSTEM ID
  30 + EXCEPTION TRAP ENABLE BYTE
  31 + SYSTEM CONTROL BYTE
  32 + CUMULATIVE EXCEPTION FLAGS BYTE
  33 +
  34 +The FPCR is a 32 bit register consisting of bit flags.
  35 +*/
  36 +
  37 +/* SYSTEM ID
  38 +------------
  39 +Note: the system id byte is read only */
  40 +
  41 +typedef unsigned int FPSR; /* type for floating point status register */
  42 +typedef unsigned int FPCR; /* type for floating point control register */
  43 +
  44 +#define MASK_SYSID 0xff000000
  45 +#define BIT_HARDWARE 0x80000000
  46 +#define FP_EMULATOR 0x01000000 /* System ID for emulator */
  47 +#define FP_ACCELERATOR 0x81000000 /* System ID for FPA11 */
  48 +
  49 +/* EXCEPTION TRAP ENABLE BYTE
  50 +----------------------------- */
  51 +
  52 +#define MASK_TRAP_ENABLE 0x00ff0000
  53 +#define MASK_TRAP_ENABLE_STRICT 0x001f0000
  54 +#define BIT_IXE 0x00100000 /* inexact exception enable */
  55 +#define BIT_UFE 0x00080000 /* underflow exception enable */
  56 +#define BIT_OFE 0x00040000 /* overflow exception enable */
  57 +#define BIT_DZE 0x00020000 /* divide by zero exception enable */
  58 +#define BIT_IOE 0x00010000 /* invalid operation exception enable */
  59 +
  60 +/* SYSTEM CONTROL BYTE
  61 +---------------------- */
  62 +
  63 +#define MASK_SYSTEM_CONTROL 0x0000ff00
  64 +#define MASK_TRAP_STRICT 0x00001f00
  65 +
  66 +#define BIT_AC 0x00001000 /* use alternative C-flag definition
  67 + for compares */
  68 +#define BIT_EP 0x00000800 /* use expanded packed decimal format */
  69 +#define BIT_SO 0x00000400 /* select synchronous operation of FPA */
  70 +#define BIT_NE 0x00000200 /* NaN exception bit */
  71 +#define BIT_ND 0x00000100 /* no denormalized numbers bit */
  72 +
  73 +/* CUMULATIVE EXCEPTION FLAGS BYTE
  74 +---------------------------------- */
  75 +
  76 +#define MASK_EXCEPTION_FLAGS 0x000000ff
  77 +#define MASK_EXCEPTION_FLAGS_STRICT 0x0000001f
  78 +
  79 +#define BIT_IXC 0x00000010 /* inexact exception flag */
  80 +#define BIT_UFC 0x00000008 /* underflow exception flag */
  81 +#define BIT_OFC 0x00000004 /* overfloat exception flag */
  82 +#define BIT_DZC 0x00000002 /* divide by zero exception flag */
  83 +#define BIT_IOC 0x00000001 /* invalid operation exception flag */
  84 +
  85 +/* Floating Point Control Register
  86 +----------------------------------*/
  87 +
  88 +#define BIT_RU 0x80000000 /* rounded up bit */
  89 +#define BIT_IE 0x10000000 /* inexact bit */
  90 +#define BIT_MO 0x08000000 /* mantissa overflow bit */
  91 +#define BIT_EO 0x04000000 /* exponent overflow bit */
  92 +#define BIT_SB 0x00000800 /* store bounce */
  93 +#define BIT_AB 0x00000400 /* arithmetic bounce */
  94 +#define BIT_RE 0x00000200 /* rounding exception */
  95 +#define BIT_DA 0x00000100 /* disable FPA */
  96 +
  97 +#define MASK_OP 0x00f08010 /* AU operation code */
  98 +#define MASK_PR 0x00080080 /* AU precision */
  99 +#define MASK_S1 0x00070000 /* AU source register 1 */
  100 +#define MASK_S2 0x00000007 /* AU source register 2 */
  101 +#define MASK_DS 0x00007000 /* AU destination register */
  102 +#define MASK_RM 0x00000060 /* AU rounding mode */
  103 +#define MASK_ALU 0x9cfff2ff /* only ALU can write these bits */
  104 +#define MASK_RESET 0x00000d00 /* bits set on reset, all others cleared */
  105 +#define MASK_WFC MASK_RESET
  106 +#define MASK_RFC ~MASK_RESET
  107 +
  108 +#endif
... ...
target-arm/nwfpe/milieu.h 0 → 100644
  1 +
  2 +/*
  3 +===============================================================================
  4 +
  5 +This C header file is part of the SoftFloat IEC/IEEE Floating-point
  6 +Arithmetic Package, Release 2.
  7 +
  8 +Written by John R. Hauser. This work was made possible in part by the
  9 +International Computer Science Institute, located at Suite 600, 1947 Center
  10 +Street, Berkeley, California 94704. Funding was partially provided by the
  11 +National Science Foundation under grant MIP-9311980. The original version
  12 +of this code was written as part of a project to build a fixed-point vector
  13 +processor in collaboration with the University of California at Berkeley,
  14 +overseen by Profs. Nelson Morgan and John Wawrzynek. More information
  15 +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  16 +arithmetic/softfloat.html'.
  17 +
  18 +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
  19 +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  20 +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
  21 +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  22 +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  23 +
  24 +Derivative works are acceptable, even for commercial purposes, so long as
  25 +(1) they include prominent notice that the work is derivative, and (2) they
  26 +include prominent notice akin to these three paragraphs for those parts of
  27 +this code that are retained.
  28 +
  29 +===============================================================================
  30 +*/
  31 +
  32 +/*
  33 +-------------------------------------------------------------------------------
  34 +Include common integer types and flags.
  35 +-------------------------------------------------------------------------------
  36 +*/
  37 +#include "ARM-gcc.h"
  38 +
  39 +/*
  40 +-------------------------------------------------------------------------------
  41 +Symbolic Boolean literals.
  42 +-------------------------------------------------------------------------------
  43 +*/
  44 +enum {
  45 + FALSE = 0,
  46 + TRUE = 1
  47 +};
  48 +
... ...
target-arm/nwfpe/single_cpdo.c 0 → 100644
  1 +/*
  2 + NetWinder Floating Point Emulator
  3 + (c) Rebel.COM, 1998,1999
  4 +
  5 + Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
  6 +
  7 + This program is free software; you can redistribute it and/or modify
  8 + it under the terms of the GNU General Public License as published by
  9 + the Free Software Foundation; either version 2 of the License, or
  10 + (at your option) any later version.
  11 +
  12 + This program is distributed in the hope that it will be useful,
  13 + but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 + GNU General Public License for more details.
  16 +
  17 + You should have received a copy of the GNU General Public License
  18 + along with this program; if not, write to the Free Software
  19 + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20 +*/
  21 +
  22 +#include "fpa11.h"
  23 +#include "softfloat.h"
  24 +#include "fpopcode.h"
  25 +
  26 +float32 float32_exp(float32 Fm);
  27 +float32 float32_ln(float32 Fm);
  28 +float32 float32_sin(float32 rFm);
  29 +float32 float32_cos(float32 rFm);
  30 +float32 float32_arcsin(float32 rFm);
  31 +float32 float32_arctan(float32 rFm);
  32 +float32 float32_log(float32 rFm);
  33 +float32 float32_tan(float32 rFm);
  34 +float32 float32_arccos(float32 rFm);
  35 +float32 float32_pow(float32 rFn,float32 rFm);
  36 +float32 float32_pol(float32 rFn,float32 rFm);
  37 +
  38 +unsigned int SingleCPDO(const unsigned int opcode)
  39 +{
  40 + FPA11 *fpa11 = GET_FPA11();
  41 + float32 rFm, rFn;
  42 + unsigned int Fd, Fm, Fn, nRc = 1;
  43 +
  44 + Fm = getFm(opcode);
  45 + if (CONSTANT_FM(opcode))
  46 + {
  47 + rFm = getSingleConstant(Fm);
  48 + }
  49 + else
  50 + {
  51 + switch (fpa11->fType[Fm])
  52 + {
  53 + case typeSingle:
  54 + rFm = fpa11->fpreg[Fm].fSingle;
  55 + break;
  56 +
  57 + default: return 0;
  58 + }
  59 + }
  60 +
  61 + if (!MONADIC_INSTRUCTION(opcode))
  62 + {
  63 + Fn = getFn(opcode);
  64 + switch (fpa11->fType[Fn])
  65 + {
  66 + case typeSingle:
  67 + rFn = fpa11->fpreg[Fn].fSingle;
  68 + break;
  69 +
  70 + default: return 0;
  71 + }
  72 + }
  73 +
  74 + Fd = getFd(opcode);
  75 + switch (opcode & MASK_ARITHMETIC_OPCODE)
  76 + {
  77 + /* dyadic opcodes */
  78 + case ADF_CODE:
  79 + fpa11->fpreg[Fd].fSingle = float32_add(rFn,rFm);
  80 + break;
  81 +
  82 + case MUF_CODE:
  83 + case FML_CODE:
  84 + fpa11->fpreg[Fd].fSingle = float32_mul(rFn,rFm);
  85 + break;
  86 +
  87 + case SUF_CODE:
  88 + fpa11->fpreg[Fd].fSingle = float32_sub(rFn,rFm);
  89 + break;
  90 +
  91 + case RSF_CODE:
  92 + fpa11->fpreg[Fd].fSingle = float32_sub(rFm,rFn);
  93 + break;
  94 +
  95 + case DVF_CODE:
  96 + case FDV_CODE:
  97 + fpa11->fpreg[Fd].fSingle = float32_div(rFn,rFm);
  98 + break;
  99 +
  100 + case RDF_CODE:
  101 + case FRD_CODE:
  102 + fpa11->fpreg[Fd].fSingle = float32_div(rFm,rFn);
  103 + break;
  104 +
  105 +#if 0
  106 + case POW_CODE:
  107 + fpa11->fpreg[Fd].fSingle = float32_pow(rFn,rFm);
  108 + break;
  109 +
  110 + case RPW_CODE:
  111 + fpa11->fpreg[Fd].fSingle = float32_pow(rFm,rFn);
  112 + break;
  113 +#endif
  114 +
  115 + case RMF_CODE:
  116 + fpa11->fpreg[Fd].fSingle = float32_rem(rFn,rFm);
  117 + break;
  118 +
  119 +#if 0
  120 + case POL_CODE:
  121 + fpa11->fpreg[Fd].fSingle = float32_pol(rFn,rFm);
  122 + break;
  123 +#endif
  124 +
  125 + /* monadic opcodes */
  126 + case MVF_CODE:
  127 + fpa11->fpreg[Fd].fSingle = rFm;
  128 + break;
  129 +
  130 + case MNF_CODE:
  131 + rFm ^= 0x80000000;
  132 + fpa11->fpreg[Fd].fSingle = rFm;
  133 + break;
  134 +
  135 + case ABS_CODE:
  136 + rFm &= 0x7fffffff;
  137 + fpa11->fpreg[Fd].fSingle = rFm;
  138 + break;
  139 +
  140 + case RND_CODE:
  141 + case URD_CODE:
  142 + fpa11->fpreg[Fd].fSingle = float32_round_to_int(rFm);
  143 + break;
  144 +
  145 + case SQT_CODE:
  146 + fpa11->fpreg[Fd].fSingle = float32_sqrt(rFm);
  147 + break;
  148 +
  149 +#if 0
  150 + case LOG_CODE:
  151 + fpa11->fpreg[Fd].fSingle = float32_log(rFm);
  152 + break;
  153 +
  154 + case LGN_CODE:
  155 + fpa11->fpreg[Fd].fSingle = float32_ln(rFm);
  156 + break;
  157 +
  158 + case EXP_CODE:
  159 + fpa11->fpreg[Fd].fSingle = float32_exp(rFm);
  160 + break;
  161 +
  162 + case SIN_CODE:
  163 + fpa11->fpreg[Fd].fSingle = float32_sin(rFm);
  164 + break;
  165 +
  166 + case COS_CODE:
  167 + fpa11->fpreg[Fd].fSingle = float32_cos(rFm);
  168 + break;
  169 +
  170 + case TAN_CODE:
  171 + fpa11->fpreg[Fd].fSingle = float32_tan(rFm);
  172 + break;
  173 +
  174 + case ASN_CODE:
  175 + fpa11->fpreg[Fd].fSingle = float32_arcsin(rFm);
  176 + break;
  177 +
  178 + case ACS_CODE:
  179 + fpa11->fpreg[Fd].fSingle = float32_arccos(rFm);
  180 + break;
  181 +
  182 + case ATN_CODE:
  183 + fpa11->fpreg[Fd].fSingle = float32_arctan(rFm);
  184 + break;
  185 +#endif
  186 +
  187 + case NRM_CODE:
  188 + break;
  189 +
  190 + default:
  191 + {
  192 + nRc = 0;
  193 + }
  194 + }
  195 +
  196 + if (0 != nRc) fpa11->fType[Fd] = typeSingle;
  197 + return nRc;
  198 +}
  199 +
  200 +#if 0
  201 +float32 float32_exp(float32 Fm)
  202 +{
  203 +//series
  204 +}
  205 +
  206 +float32 float32_ln(float32 Fm)
  207 +{
  208 +//series
  209 +}
  210 +
  211 +float32 float32_sin(float32 rFm)
  212 +{
  213 +//series
  214 +}
  215 +
  216 +float32 float32_cos(float32 rFm)
  217 +{
  218 +//series
  219 +}
  220 +
  221 +float32 float32_arcsin(float32 rFm)
  222 +{
  223 +//series
  224 +}
  225 +
  226 +float32 float32_arctan(float32 rFm)
  227 +{
  228 + //series
  229 +}
  230 +
  231 +float32 float32_arccos(float32 rFm)
  232 +{
  233 + //return float32_sub(halfPi,float32_arcsin(rFm));
  234 +}
  235 +
  236 +float32 float32_log(float32 rFm)
  237 +{
  238 + return float32_div(float32_ln(rFm),getSingleConstant(7));
  239 +}
  240 +
  241 +float32 float32_tan(float32 rFm)
  242 +{
  243 + return float32_div(float32_sin(rFm),float32_cos(rFm));
  244 +}
  245 +
  246 +float32 float32_pow(float32 rFn,float32 rFm)
  247 +{
  248 + return float32_exp(float32_mul(rFm,float32_ln(rFn)));
  249 +}
  250 +
  251 +float32 float32_pol(float32 rFn,float32 rFm)
  252 +{
  253 + return float32_arctan(float32_div(rFn,rFm));
  254 +}
  255 +#endif
... ...
target-arm/nwfpe/softfloat-macros 0 → 100644
  1 +
  2 +/*
  3 +===============================================================================
  4 +
  5 +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
  6 +Arithmetic Package, Release 2.
  7 +
  8 +Written by John R. Hauser. This work was made possible in part by the
  9 +International Computer Science Institute, located at Suite 600, 1947 Center
  10 +Street, Berkeley, California 94704. Funding was partially provided by the
  11 +National Science Foundation under grant MIP-9311980. The original version
  12 +of this code was written as part of a project to build a fixed-point vector
  13 +processor in collaboration with the University of California at Berkeley,
  14 +overseen by Profs. Nelson Morgan and John Wawrzynek. More information
  15 +is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  16 +arithmetic/softfloat.html'.
  17 +
  18 +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
  19 +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  20 +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
  21 +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  22 +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  23 +
  24 +Derivative works are acceptable, even for commercial purposes, so long as
  25 +(1) they include prominent notice that the work is derivative, and (2) they
  26 +include prominent notice akin to these three paragraphs for those parts of
  27 +this code that are retained.
  28 +
  29 +===============================================================================
  30 +*/
  31 +
  32 +/*
  33 +-------------------------------------------------------------------------------
  34 +Shifts `a' right by the number of bits given in `count'. If any nonzero
  35 +bits are shifted off, they are ``jammed'' into the least significant bit of
  36 +the result by setting the least significant bit to 1. The value of `count'
  37 +can be arbitrarily large; in particular, if `count' is greater than 32, the
  38 +result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  39 +The result is stored in the location pointed to by `zPtr'.
  40 +-------------------------------------------------------------------------------
  41 +*/
  42 +INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
  43 +{
  44 + bits32 z;
  45 + if ( count == 0 ) {
  46 + z = a;
  47 + }
  48 + else if ( count < 32 ) {
  49 + z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
  50 + }
  51 + else {
  52 + z = ( a != 0 );
  53 + }
  54 + *zPtr = z;
  55 +}
  56 +
  57 +/*
  58 +-------------------------------------------------------------------------------
  59 +Shifts `a' right by the number of bits given in `count'. If any nonzero
  60 +bits are shifted off, they are ``jammed'' into the least significant bit of
  61 +the result by setting the least significant bit to 1. The value of `count'
  62 +can be arbitrarily large; in particular, if `count' is greater than 64, the
  63 +result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  64 +The result is stored in the location pointed to by `zPtr'.
  65 +-------------------------------------------------------------------------------
  66 +*/
  67 +INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
  68 +{
  69 + bits64 z;
  70 +
  71 +// __asm__("@shift64RightJamming -- start");
  72 + if ( count == 0 ) {
  73 + z = a;
  74 + }
  75 + else if ( count < 64 ) {
  76 + z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
  77 + }
  78 + else {
  79 + z = ( a != 0 );
  80 + }
  81 +// __asm__("@shift64RightJamming -- end");
  82 + *zPtr = z;
  83 +}
  84 +
  85 +/*
  86 +-------------------------------------------------------------------------------
  87 +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
  88 +_plus_ the number of bits given in `count'. The shifted result is at most
  89 +64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
  90 +bits shifted off form a second 64-bit result as follows: The _last_ bit
  91 +shifted off is the most-significant bit of the extra result, and the other
  92 +63 bits of the extra result are all zero if and only if _all_but_the_last_
  93 +bits shifted off were all zero. This extra result is stored in the location
  94 +pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
  95 + (This routine makes more sense if `a0' and `a1' are considered to form a
  96 +fixed-point value with binary point between `a0' and `a1'. This fixed-point
  97 +value is shifted right by the number of bits given in `count', and the
  98 +integer part of the result is returned at the location pointed to by
  99 +`z0Ptr'. The fractional part of the result may be slightly corrupted as
  100 +described above, and is returned at the location pointed to by `z1Ptr'.)
  101 +-------------------------------------------------------------------------------
  102 +*/
  103 +INLINE void
  104 + shift64ExtraRightJamming(
  105 + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
  106 +{
  107 + bits64 z0, z1;
  108 + int8 negCount = ( - count ) & 63;
  109 +
  110 + if ( count == 0 ) {
  111 + z1 = a1;
  112 + z0 = a0;
  113 + }
  114 + else if ( count < 64 ) {
  115 + z1 = ( a0<<negCount ) | ( a1 != 0 );
  116 + z0 = a0>>count;
  117 + }
  118 + else {
  119 + if ( count == 64 ) {
  120 + z1 = a0 | ( a1 != 0 );
  121 + }
  122 + else {
  123 + z1 = ( ( a0 | a1 ) != 0 );
  124 + }
  125 + z0 = 0;
  126 + }
  127 + *z1Ptr = z1;
  128 + *z0Ptr = z0;
  129 +
  130 +}
  131 +
  132 +/*
  133 +-------------------------------------------------------------------------------
  134 +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
  135 +number of bits given in `count'. Any bits shifted off are lost. The value
  136 +of `count' can be arbitrarily large; in particular, if `count' is greater
  137 +than 128, the result will be 0. The result is broken into two 64-bit pieces
  138 +which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  139 +-------------------------------------------------------------------------------
  140 +*/
  141 +INLINE void
  142 + shift128Right(
  143 + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
  144 +{
  145 + bits64 z0, z1;
  146 + int8 negCount = ( - count ) & 63;
  147 +
  148 + if ( count == 0 ) {
  149 + z1 = a1;
  150 + z0 = a0;
  151 + }
  152 + else if ( count < 64 ) {
  153 + z1 = ( a0<<negCount ) | ( a1>>count );
  154 + z0 = a0>>count;
  155 + }
  156 + else {
  157 + z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
  158 + z0 = 0;
  159 + }
  160 + *z1Ptr = z1;
  161 + *z0Ptr = z0;
  162 +
  163 +}
  164 +
  165 +/*
  166 +-------------------------------------------------------------------------------
  167 +Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
  168 +number of bits given in `count'. If any nonzero bits are shifted off, they
  169 +are ``jammed'' into the least significant bit of the result by setting the
  170 +least significant bit to 1. The value of `count' can be arbitrarily large;
  171 +in particular, if `count' is greater than 128, the result will be either 0
  172 +or 1, depending on whether the concatenation of `a0' and `a1' is zero or
  173 +nonzero. The result is broken into two 64-bit pieces which are stored at
  174 +the locations pointed to by `z0Ptr' and `z1Ptr'.
  175 +-------------------------------------------------------------------------------
  176 +*/
  177 +INLINE void
  178 + shift128RightJamming(
  179 + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
  180 +{
  181 + bits64 z0, z1;
  182 + int8 negCount = ( - count ) & 63;
  183 +
  184 + if ( count == 0 ) {
  185 + z1 = a1;
  186 + z0 = a0;
  187 + }
  188 + else if ( count < 64 ) {
  189 + z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
  190 + z0 = a0>>count;
  191 + }
  192 + else {
  193 + if ( count == 64 ) {
  194 + z1 = a0 | ( a1 != 0 );
  195 + }
  196 + else if ( count < 128 ) {
  197 + z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
  198 + }
  199 + else {
  200 + z1 = ( ( a0 | a1 ) != 0 );
  201 + }
  202 + z0 = 0;
  203 + }
  204 + *z1Ptr = z1;
  205 + *z0Ptr = z0;
  206 +
  207 +}
  208 +
  209 +/*
  210 +-------------------------------------------------------------------------------
  211 +Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
  212 +by 64 _plus_ the number of bits given in `count'. The shifted result is
  213 +at most 128 nonzero bits; these are broken into two 64-bit pieces which are
  214 +stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
  215 +off form a third 64-bit result as follows: The _last_ bit shifted off is
  216 +the most-significant bit of the extra result, and the other 63 bits of the
  217 +extra result are all zero if and only if _all_but_the_last_ bits shifted off
  218 +were all zero. This extra result is stored in the location pointed to by
  219 +`z2Ptr'. The value of `count' can be arbitrarily large.
  220 + (This routine makes more sense if `a0', `a1', and `a2' are considered
  221 +to form a fixed-point value with binary point between `a1' and `a2'. This
  222 +fixed-point value is shifted right by the number of bits given in `count',
  223 +and the integer part of the result is returned at the locations pointed to
  224 +by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
  225 +corrupted as described above, and is returned at the location pointed to by
  226 +`z2Ptr'.)
  227 +-------------------------------------------------------------------------------
  228 +*/
  229 +INLINE void
  230 + shift128ExtraRightJamming(
  231 + bits64 a0,
  232 + bits64 a1,
  233 + bits64 a2,
  234 + int16 count,
  235 + bits64 *z0Ptr,
  236 + bits64 *z1Ptr,
  237 + bits64 *z2Ptr
  238 + )
  239 +{
  240 + bits64 z0, z1, z2;
  241 + int8 negCount = ( - count ) & 63;
  242 +
  243 + if ( count == 0 ) {
  244 + z2 = a2;
  245 + z1 = a1;
  246 + z0 = a0;
  247 + }
  248 + else {
  249 + if ( count < 64 ) {
  250 + z2 = a1<<negCount;
  251 + z1 = ( a0<<negCount ) | ( a1>>count );
  252 + z0 = a0>>count;
  253 + }
  254 + else {
  255 + if ( count == 64 ) {
  256 + z2 = a1;
  257 + z1 = a0;
  258 + }
  259 + else {
  260 + a2 |= a1;
  261 + if ( count < 128 ) {
  262 + z2 = a0<<negCount;
  263 + z1 = a0>>( count & 63 );
  264 + }
  265 + else {
  266 + z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
  267 + z1 = 0;
  268 + }
  269 + }
  270 + z0 = 0;
  271 + }
  272 + z2 |= ( a2 != 0 );
  273 + }
  274 + *z2Ptr = z2;
  275 + *z1Ptr = z1;
  276 + *z0Ptr = z0;
  277 +
  278 +}
  279 +
  280 +/*
  281 +-------------------------------------------------------------------------------
  282 +Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
  283 +number of bits given in `count'. Any bits shifted off are lost. The value
  284 +of `count' must be less than 64. The result is broken into two 64-bit
  285 +pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  286 +-------------------------------------------------------------------------------
  287 +*/
  288 +INLINE void
  289 + shortShift128Left(
  290 + bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
  291 +{
  292 +
  293 + *z1Ptr = a1<<count;
  294 + *z0Ptr =
  295 + ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
  296 +
  297 +}
  298 +
  299 +/*
  300 +-------------------------------------------------------------------------------
  301 +Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
  302 +by the number of bits given in `count'. Any bits shifted off are lost.
  303 +The value of `count' must be less than 64. The result is broken into three
  304 +64-bit pieces which are stored at the locations pointed to by `z0Ptr',
  305 +`z1Ptr', and `z2Ptr'.
  306 +-------------------------------------------------------------------------------
  307 +*/
  308 +INLINE void
  309 + shortShift192Left(
  310 + bits64 a0,
  311 + bits64 a1,
  312 + bits64 a2,
  313 + int16 count,
  314 + bits64 *z0Ptr,
  315 + bits64 *z1Ptr,
  316 + bits64 *z2Ptr
  317 + )
  318 +{
  319 + bits64 z0, z1, z2;
  320 + int8 negCount;
  321 +
  322 + z2 = a2<<count;
  323 + z1 = a1<<count;
  324 + z0 = a0<<count;
  325 + if ( 0 < count ) {
  326 + negCount = ( ( - count ) & 63 );
  327 + z1 |= a2>>negCount;
  328 + z0 |= a1>>negCount;
  329 + }
  330 + *z2Ptr = z2;
  331 + *z1Ptr = z1;
  332 + *z0Ptr = z0;
  333 +
  334 +}
  335 +
  336 +/*
  337 +-------------------------------------------------------------------------------
  338 +Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
  339 +value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
  340 +any carry out is lost. The result is broken into two 64-bit pieces which
  341 +are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  342 +-------------------------------------------------------------------------------
  343 +*/
  344 +INLINE void
  345 + add128(
  346 + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
  347 +{
  348 + bits64 z1;
  349 +
  350 + z1 = a1 + b1;
  351 + *z1Ptr = z1;
  352 + *z0Ptr = a0 + b0 + ( z1 < a1 );
  353 +
  354 +}
  355 +
  356 +/*
  357 +-------------------------------------------------------------------------------
  358 +Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
  359 +192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
  360 +modulo 2^192, so any carry out is lost. The result is broken into three
  361 +64-bit pieces which are stored at the locations pointed to by `z0Ptr',
  362 +`z1Ptr', and `z2Ptr'.
  363 +-------------------------------------------------------------------------------
  364 +*/
  365 +INLINE void
  366 + add192(
  367 + bits64 a0,
  368 + bits64 a1,
  369 + bits64 a2,
  370 + bits64 b0,
  371 + bits64 b1,
  372 + bits64 b2,
  373 + bits64 *z0Ptr,
  374 + bits64 *z1Ptr,
  375 + bits64 *z2Ptr
  376 + )
  377 +{
  378 + bits64 z0, z1, z2;
  379 + int8 carry0, carry1;
  380 +
  381 + z2 = a2 + b2;
  382 + carry1 = ( z2 < a2 );
  383 + z1 = a1 + b1;
  384 + carry0 = ( z1 < a1 );
  385 + z0 = a0 + b0;
  386 + z1 += carry1;
  387 + z0 += ( z1 < carry1 );
  388 + z0 += carry0;
  389 + *z2Ptr = z2;
  390 + *z1Ptr = z1;
  391 + *z0Ptr = z0;
  392 +
  393 +}
  394 +
  395 +/*
  396 +-------------------------------------------------------------------------------
  397 +Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
  398 +128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
  399 +2^128, so any borrow out (carry out) is lost. The result is broken into two
  400 +64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
  401 +`z1Ptr'.
  402 +-------------------------------------------------------------------------------
  403 +*/
  404 +INLINE void
  405 + sub128(
  406 + bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
  407 +{
  408 +
  409 + *z1Ptr = a1 - b1;
  410 + *z0Ptr = a0 - b0 - ( a1 < b1 );
  411 +
  412 +}
  413 +
  414 +/*
  415 +-------------------------------------------------------------------------------
  416 +Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
  417 +from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
  418 +Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
  419 +result is broken into three 64-bit pieces which are stored at the locations
  420 +pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
  421 +-------------------------------------------------------------------------------
  422 +*/
  423 +INLINE void
  424 + sub192(
  425 + bits64 a0,
  426 + bits64 a1,
  427 + bits64 a2,
  428 + bits64 b0,
  429 + bits64 b1,
  430 + bits64 b2,
  431 + bits64 *z0Ptr,
  432 + bits64 *z1Ptr,
  433 + bits64 *z2Ptr
  434 + )
  435 +{
  436 + bits64 z0, z1, z2;
  437 + int8 borrow0, borrow1;
  438 +
  439 + z2 = a2 - b2;
  440 + borrow1 = ( a2 < b2 );
  441 + z1 = a1 - b1;
  442 + borrow0 = ( a1 < b1 );
  443 + z0 = a0 - b0;
  444 + z0 -= ( z1 < borrow1 );
  445 + z1 -= borrow1;
  446 + z0 -= borrow0;
  447 + *z2Ptr = z2;
  448 + *z1Ptr = z1;
  449 + *z0Ptr = z0;
  450 +
  451 +}
  452 +
  453 +/*
  454 +-------------------------------------------------------------------------------
  455 +Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
  456 +into two 64-bit pieces which are stored at the locations pointed to by
  457 +`z0Ptr' and `z1Ptr'.
  458 +-------------------------------------------------------------------------------
  459 +*/
  460 +INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
  461 +{
  462 + bits32 aHigh, aLow, bHigh, bLow;
  463 + bits64 z0, zMiddleA, zMiddleB, z1;
  464 +
  465 + aLow = a;
  466 + aHigh = a>>32;
  467 + bLow = b;
  468 + bHigh = b>>32;
  469 + z1 = ( (bits64) aLow ) * bLow;
  470 + zMiddleA = ( (bits64) aLow ) * bHigh;
  471 + zMiddleB = ( (bits64) aHigh ) * bLow;
  472 + z0 = ( (bits64) aHigh ) * bHigh;
  473 + zMiddleA += zMiddleB;
  474 + z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
  475 + zMiddleA <<= 32;
  476 + z1 += zMiddleA;
  477 + z0 += ( z1 < zMiddleA );
  478 + *z1Ptr = z1;
  479 + *z0Ptr = z0;
  480 +
  481 +}
  482 +
  483 +/*
  484 +-------------------------------------------------------------------------------
  485 +Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
  486 +obtain a 192-bit product. The product is broken into three 64-bit pieces
  487 +which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
  488 +`z2Ptr'.
  489 +-------------------------------------------------------------------------------
  490 +*/
  491 +INLINE void
  492 + mul128By64To192(
  493 + bits64 a0,
  494 + bits64 a1,
  495 + bits64 b,
  496 + bits64 *z0Ptr,
  497 + bits64 *z1Ptr,
  498 + bits64 *z2Ptr
  499 + )
  500 +{
  501 + bits64 z0, z1, z2, more1;
  502 +
  503 + mul64To128( a1, b, &z1, &z2 );
  504 + mul64To128( a0, b, &z0, &more1 );
  505 + add128( z0, more1, 0, z1, &z0, &z1 );
  506 + *z2Ptr = z2;
  507 + *z1Ptr = z1;
  508 + *z0Ptr = z0;
  509 +
  510 +}
  511 +
  512 +/*
  513 +-------------------------------------------------------------------------------
  514 +Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
  515 +128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
  516 +product. The product is broken into four 64-bit pieces which are stored at
  517 +the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
  518 +-------------------------------------------------------------------------------
  519 +*/
  520 +INLINE void
  521 + mul128To256(
  522 + bits64 a0,
  523 + bits64 a1,
  524 + bits64 b0,
  525 + bits64 b1,
  526 + bits64 *z0Ptr,
  527 + bits64 *z1Ptr,
  528 + bits64 *z2Ptr,
  529 + bits64 *z3Ptr
  530 + )
  531 +{
  532 + bits64 z0, z1, z2, z3;
  533 + bits64 more1, more2;
  534 +
  535 + mul64To128( a1, b1, &z2, &z3 );
  536 + mul64To128( a1, b0, &z1, &more2 );
  537 + add128( z1, more2, 0, z2, &z1, &z2 );
  538 + mul64To128( a0, b0, &z0, &more1 );
  539 + add128( z0, more1, 0, z1, &z0, &z1 );
  540 + mul64To128( a0, b1, &more1, &more2 );
  541 + add128( more1, more2, 0, z2, &more1, &z2 );
  542 + add128( z0, z1, 0, more1, &z0, &z1 );
  543 + *z3Ptr = z3;
  544 + *z2Ptr = z2;
  545 + *z1Ptr = z1;
  546 + *z0Ptr = z0;
  547 +
  548 +}
  549 +
  550 +/*
  551 +-------------------------------------------------------------------------------
  552 +Returns an approximation to the 64-bit integer quotient obtained by dividing
  553 +`b' into the 128-bit value formed by concatenating `a0' and `a1'. The
  554 +divisor `b' must be at least 2^63. If q is the exact quotient truncated
  555 +toward zero, the approximation returned lies between q and q + 2 inclusive.
  556 +If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
  557 +unsigned integer is returned.
  558 +-------------------------------------------------------------------------------
  559 +*/
  560 +static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
  561 +{
  562 + bits64 b0, b1;
  563 + bits64 rem0, rem1, term0, term1;
  564 + bits64 z;
  565 + if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
  566 + b0 = b>>32;
  567 + z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
  568 + mul64To128( b, z, &term0, &term1 );
  569 + sub128( a0, a1, term0, term1, &rem0, &rem1 );
  570 + while ( ( (sbits64) rem0 ) < 0 ) {
  571 + z -= LIT64( 0x100000000 );
  572 + b1 = b<<32;
  573 + add128( rem0, rem1, b0, b1, &rem0, &rem1 );
  574 + }
  575 + rem0 = ( rem0<<32 ) | ( rem1>>32 );
  576 + z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
  577 + return z;
  578 +
  579 +}
  580 +
  581 +/*
  582 +-------------------------------------------------------------------------------
  583 +Returns an approximation to the square root of the 32-bit significand given
  584 +by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
  585 +`aExp' (the least significant bit) is 1, the integer returned approximates
  586 +2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
  587 +is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
  588 +case, the approximation returned lies strictly within +/-2 of the exact
  589 +value.
  590 +-------------------------------------------------------------------------------
  591 +*/
  592 +static bits32 estimateSqrt32( int16 aExp, bits32 a )
  593 +{
  594 + static const bits16 sqrtOddAdjustments[] = {
  595 + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
  596 + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
  597 + };
  598 + static const bits16 sqrtEvenAdjustments[] = {
  599 + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
  600 + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
  601 + };
  602 + int8 index;
  603 + bits32 z;
  604 +
  605 + index = ( a>>27 ) & 15;
  606 + if ( aExp & 1 ) {
  607 + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
  608 + z = ( ( a / z )<<14 ) + ( z<<15 );
  609 + a >>= 1;
  610 + }
  611 + else {
  612 + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
  613 + z = a / z + z;
  614 + z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
  615 + if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
  616 + }
  617 + return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
  618 +
  619 +}
  620 +
  621 +/*
  622 +-------------------------------------------------------------------------------
  623 +Returns the number of leading 0 bits before the most-significant 1 bit
  624 +of `a'. If `a' is zero, 32 is returned.
  625 +-------------------------------------------------------------------------------
  626 +*/
  627 +static int8 countLeadingZeros32( bits32 a )
  628 +{
  629 + static const int8 countLeadingZerosHigh[] = {
  630 + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
  631 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
  632 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  633 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  634 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  635 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  636 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  637 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  638 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  639 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  640 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  641 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  642 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  643 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  644 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  645 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  646 + };
  647 + int8 shiftCount;
  648 +
  649 + shiftCount = 0;
  650 + if ( a < 0x10000 ) {
  651 + shiftCount += 16;
  652 + a <<= 16;
  653 + }
  654 + if ( a < 0x1000000 ) {
  655 + shiftCount += 8;
  656 + a <<= 8;
  657 + }
  658 + shiftCount += countLeadingZerosHigh[ a>>24 ];
  659 + return shiftCount;
  660 +
  661 +}
  662 +
  663 +/*
  664 +-------------------------------------------------------------------------------
  665 +Returns the number of leading 0 bits before the most-significant 1 bit
  666 +of `a'. If `a' is zero, 64 is returned.
  667 +-------------------------------------------------------------------------------
  668 +*/
  669 +static int8 countLeadingZeros64( bits64 a )
  670 +{
  671 + int8 shiftCount;
  672 +
  673 + shiftCount = 0;
  674 + if ( a < ( (bits64) 1 )<<32 ) {
  675 + shiftCount += 32;
  676 + }
  677 + else {
  678 + a >>= 32;
  679 + }
  680 + shiftCount += countLeadingZeros32( a );
  681 + return shiftCount;
  682 +
  683 +}
  684 +
  685 +/*
  686 +-------------------------------------------------------------------------------
  687 +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
  688 +is equal to the 128-bit value formed by concatenating `b0' and `b1'.
  689 +Otherwise, returns 0.
  690 +-------------------------------------------------------------------------------
  691 +*/
  692 +INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
  693 +{
  694 +
  695 + return ( a0 == b0 ) && ( a1 == b1 );
  696 +
  697 +}
  698 +
  699 +/*
  700 +-------------------------------------------------------------------------------
  701 +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
  702 +than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
  703 +Otherwise, returns 0.
  704 +-------------------------------------------------------------------------------
  705 +*/
  706 +INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
  707 +{
  708 +
  709 + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
  710 +
  711 +}
  712 +
  713 +/*
  714 +-------------------------------------------------------------------------------
  715 +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
  716 +than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
  717 +returns 0.
  718 +-------------------------------------------------------------------------------
  719 +*/
  720 +INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
  721 +{
  722 +
  723 + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
  724 +
  725 +}
  726 +
  727 +/*
  728 +-------------------------------------------------------------------------------
  729 +Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
  730 +not equal to the 128-bit value formed by concatenating `b0' and `b1'.
  731 +Otherwise, returns 0.
  732 +-------------------------------------------------------------------------------
  733 +*/
  734 +INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
  735 +{
  736 +
  737 + return ( a0 != b0 ) || ( a1 != b1 );
  738 +
  739 +}
  740 +
... ...
target-arm/nwfpe/softfloat-specialize 0 → 100644
  1 +
  2 +/*
  3 +===============================================================================
  4 +
  5 +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
  6 +Arithmetic Package, Release 2.
  7 +
  8 +Written by John R. Hauser. This work was made possible in part by the
  9 +International Computer Science Institute, located at Suite 600, 1947 Center
  10 +Street, Berkeley, California 94704. Funding was partially provided by the
  11 +National Science Foundation under grant MIP-9311980. The original version
  12 +of this code was written as part of a project to build a fixed-point vector
  13 +processor in collaboration with the University of California at Berkeley,
  14 +overseen by Profs. Nelson Morgan and John Wawrzynek. More information
  15 +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  16 +arithmetic/softfloat.html'.
  17 +
  18 +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
  19 +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  20 +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
  21 +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  22 +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  23 +
  24 +Derivative works are acceptable, even for commercial purposes, so long as
  25 +(1) they include prominent notice that the work is derivative, and (2) they
  26 +include prominent notice akin to these three paragraphs for those parts of
  27 +this code that are retained.
  28 +
  29 +===============================================================================
  30 +*/
  31 +
  32 +/*
  33 +-------------------------------------------------------------------------------
  34 +Underflow tininess-detection mode, statically initialized to default value.
  35 +(The declaration in `softfloat.h' must match the `int8' type here.)
  36 +-------------------------------------------------------------------------------
  37 +*/
  38 +int8 float_detect_tininess = float_tininess_after_rounding;
  39 +
  40 +/*
  41 +-------------------------------------------------------------------------------
  42 +Raises the exceptions specified by `flags'. Floating-point traps can be
  43 +defined here if desired. It is currently not possible for such a trap to
  44 +substitute a result value. If traps are not implemented, this routine
  45 +should be simply `float_exception_flags |= flags;'.
  46 +
  47 +ScottB: November 4, 1998
  48 +Moved this function out of softfloat-specialize into fpmodule.c.
  49 +This effectively isolates all the changes required for integrating with the
  50 +Linux kernel into fpmodule.c. Porting to NetBSD should only require modifying
  51 +fpmodule.c to integrate with the NetBSD kernel (I hope!).
  52 +-------------------------------------------------------------------------------
  53 +*/
  54 +void float_raise( int8 flags )
  55 +{
  56 + float_exception_flags |= flags;
  57 +}
  58 +
  59 +/*
  60 +-------------------------------------------------------------------------------
  61 +Internal canonical NaN format.
  62 +-------------------------------------------------------------------------------
  63 +*/
  64 +typedef struct {
  65 + flag sign;
  66 + bits64 high, low;
  67 +} commonNaNT;
  68 +
  69 +/*
  70 +-------------------------------------------------------------------------------
  71 +The pattern for a default generated single-precision NaN.
  72 +-------------------------------------------------------------------------------
  73 +*/
  74 +#define float32_default_nan 0xFFFFFFFF
  75 +
  76 +/*
  77 +-------------------------------------------------------------------------------
  78 +Returns 1 if the single-precision floating-point value `a' is a NaN;
  79 +otherwise returns 0.
  80 +-------------------------------------------------------------------------------
  81 +*/
  82 +flag float32_is_nan( float32 a )
  83 +{
  84 +
  85 + return ( 0xFF000000 < (bits32) ( a<<1 ) );
  86 +
  87 +}
  88 +
  89 +/*
  90 +-------------------------------------------------------------------------------
  91 +Returns 1 if the single-precision floating-point value `a' is a signaling
  92 +NaN; otherwise returns 0.
  93 +-------------------------------------------------------------------------------
  94 +*/
  95 +flag float32_is_signaling_nan( float32 a )
  96 +{
  97 +
  98 + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
  99 +
  100 +}
  101 +
  102 +/*
  103 +-------------------------------------------------------------------------------
  104 +Returns the result of converting the single-precision floating-point NaN
  105 +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
  106 +exception is raised.
  107 +-------------------------------------------------------------------------------
  108 +*/
  109 +static commonNaNT float32ToCommonNaN( float32 a )
  110 +{
  111 + commonNaNT z;
  112 +
  113 + if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
  114 + z.sign = a>>31;
  115 + z.low = 0;
  116 + z.high = ( (bits64) a )<<41;
  117 + return z;
  118 +
  119 +}
  120 +
  121 +/*
  122 +-------------------------------------------------------------------------------
  123 +Returns the result of converting the canonical NaN `a' to the single-
  124 +precision floating-point format.
  125 +-------------------------------------------------------------------------------
  126 +*/
  127 +static float32 commonNaNToFloat32( commonNaNT a )
  128 +{
  129 +
  130 + return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
  131 +
  132 +}
  133 +
  134 +/*
  135 +-------------------------------------------------------------------------------
  136 +Takes two single-precision floating-point values `a' and `b', one of which
  137 +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
  138 +signaling NaN, the invalid exception is raised.
  139 +-------------------------------------------------------------------------------
  140 +*/
  141 +static float32 propagateFloat32NaN( float32 a, float32 b )
  142 +{
  143 + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
  144 +
  145 + aIsNaN = float32_is_nan( a );
  146 + aIsSignalingNaN = float32_is_signaling_nan( a );
  147 + bIsNaN = float32_is_nan( b );
  148 + bIsSignalingNaN = float32_is_signaling_nan( b );
  149 + a |= 0x00400000;
  150 + b |= 0x00400000;
  151 + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
  152 + if ( aIsNaN ) {
  153 + return ( aIsSignalingNaN & bIsNaN ) ? b : a;
  154 + }
  155 + else {
  156 + return b;
  157 + }
  158 +
  159 +}
  160 +
  161 +/*
  162 +-------------------------------------------------------------------------------
  163 +The pattern for a default generated double-precision NaN.
  164 +-------------------------------------------------------------------------------
  165 +*/
  166 +#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
  167 +
  168 +/*
  169 +-------------------------------------------------------------------------------
  170 +Returns 1 if the double-precision floating-point value `a' is a NaN;
  171 +otherwise returns 0.
  172 +-------------------------------------------------------------------------------
  173 +*/
  174 +flag float64_is_nan( float64 a )
  175 +{
  176 +
  177 + return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
  178 +
  179 +}
  180 +
  181 +/*
  182 +-------------------------------------------------------------------------------
  183 +Returns 1 if the double-precision floating-point value `a' is a signaling
  184 +NaN; otherwise returns 0.
  185 +-------------------------------------------------------------------------------
  186 +*/
  187 +flag float64_is_signaling_nan( float64 a )
  188 +{
  189 +
  190 + return
  191 + ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
  192 + && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
  193 +
  194 +}
  195 +
  196 +/*
  197 +-------------------------------------------------------------------------------
  198 +Returns the result of converting the double-precision floating-point NaN
  199 +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
  200 +exception is raised.
  201 +-------------------------------------------------------------------------------
  202 +*/
  203 +static commonNaNT float64ToCommonNaN( float64 a )
  204 +{
  205 + commonNaNT z;
  206 +
  207 + if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
  208 + z.sign = a>>63;
  209 + z.low = 0;
  210 + z.high = a<<12;
  211 + return z;
  212 +
  213 +}
  214 +
  215 +/*
  216 +-------------------------------------------------------------------------------
  217 +Returns the result of converting the canonical NaN `a' to the double-
  218 +precision floating-point format.
  219 +-------------------------------------------------------------------------------
  220 +*/
  221 +static float64 commonNaNToFloat64( commonNaNT a )
  222 +{
  223 +
  224 + return
  225 + ( ( (bits64) a.sign )<<63 )
  226 + | LIT64( 0x7FF8000000000000 )
  227 + | ( a.high>>12 );
  228 +
  229 +}
  230 +
  231 +/*
  232 +-------------------------------------------------------------------------------
  233 +Takes two double-precision floating-point values `a' and `b', one of which
  234 +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
  235 +signaling NaN, the invalid exception is raised.
  236 +-------------------------------------------------------------------------------
  237 +*/
  238 +static float64 propagateFloat64NaN( float64 a, float64 b )
  239 +{
  240 + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
  241 +
  242 + aIsNaN = float64_is_nan( a );
  243 + aIsSignalingNaN = float64_is_signaling_nan( a );
  244 + bIsNaN = float64_is_nan( b );
  245 + bIsSignalingNaN = float64_is_signaling_nan( b );
  246 + a |= LIT64( 0x0008000000000000 );
  247 + b |= LIT64( 0x0008000000000000 );
  248 + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
  249 + if ( aIsNaN ) {
  250 + return ( aIsSignalingNaN & bIsNaN ) ? b : a;
  251 + }
  252 + else {
  253 + return b;
  254 + }
  255 +
  256 +}
  257 +
  258 +#ifdef FLOATX80
  259 +
  260 +/*
  261 +-------------------------------------------------------------------------------
  262 +The pattern for a default generated extended double-precision NaN. The
  263 +`high' and `low' values hold the most- and least-significant bits,
  264 +respectively.
  265 +-------------------------------------------------------------------------------
  266 +*/
  267 +#define floatx80_default_nan_high 0xFFFF
  268 +#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF )
  269 +
  270 +/*
  271 +-------------------------------------------------------------------------------
  272 +Returns 1 if the extended double-precision floating-point value `a' is a
  273 +NaN; otherwise returns 0.
  274 +-------------------------------------------------------------------------------
  275 +*/
  276 +flag floatx80_is_nan( floatx80 a )
  277 +{
  278 +
  279 + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
  280 +
  281 +}
  282 +
  283 +/*
  284 +-------------------------------------------------------------------------------
  285 +Returns 1 if the extended double-precision floating-point value `a' is a
  286 +signaling NaN; otherwise returns 0.
  287 +-------------------------------------------------------------------------------
  288 +*/
  289 +flag floatx80_is_signaling_nan( floatx80 a )
  290 +{
  291 + //register int lr;
  292 + bits64 aLow;
  293 +
  294 + //__asm__("mov %0, lr" : : "g" (lr));
  295 + //fp_printk("floatx80_is_signalling_nan() called from 0x%08x\n",lr);
  296 + aLow = a.low & ~ LIT64( 0x4000000000000000 );
  297 + return
  298 + ( ( a.high & 0x7FFF ) == 0x7FFF )
  299 + && (bits64) ( aLow<<1 )
  300 + && ( a.low == aLow );
  301 +
  302 +}
  303 +
  304 +/*
  305 +-------------------------------------------------------------------------------
  306 +Returns the result of converting the extended double-precision floating-
  307 +point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the
  308 +invalid exception is raised.
  309 +-------------------------------------------------------------------------------
  310 +*/
  311 +static commonNaNT floatx80ToCommonNaN( floatx80 a )
  312 +{
  313 + commonNaNT z;
  314 +
  315 + if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
  316 + z.sign = a.high>>15;
  317 + z.low = 0;
  318 + z.high = a.low<<1;
  319 + return z;
  320 +
  321 +}
  322 +
  323 +/*
  324 +-------------------------------------------------------------------------------
  325 +Returns the result of converting the canonical NaN `a' to the extended
  326 +double-precision floating-point format.
  327 +-------------------------------------------------------------------------------
  328 +*/
  329 +static floatx80 commonNaNToFloatx80( commonNaNT a )
  330 +{
  331 + floatx80 z;
  332 +
  333 + z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
  334 + z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
  335 + return z;
  336 +
  337 +}
  338 +
  339 +/*
  340 +-------------------------------------------------------------------------------
  341 +Takes two extended double-precision floating-point values `a' and `b', one
  342 +of which is a NaN, and returns the appropriate NaN result. If either `a' or
  343 +`b' is a signaling NaN, the invalid exception is raised.
  344 +-------------------------------------------------------------------------------
  345 +*/
  346 +static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
  347 +{
  348 + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
  349 +
  350 + aIsNaN = floatx80_is_nan( a );
  351 + aIsSignalingNaN = floatx80_is_signaling_nan( a );
  352 + bIsNaN = floatx80_is_nan( b );
  353 + bIsSignalingNaN = floatx80_is_signaling_nan( b );
  354 + a.low |= LIT64( 0xC000000000000000 );
  355 + b.low |= LIT64( 0xC000000000000000 );
  356 + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
  357 + if ( aIsNaN ) {
  358 + return ( aIsSignalingNaN & bIsNaN ) ? b : a;
  359 + }
  360 + else {
  361 + return b;
  362 + }
  363 +
  364 +}
  365 +
  366 +#endif
... ...
target-arm/nwfpe/softfloat.c 0 → 100644
  1 +/*
  2 +===============================================================================
  3 +
  4 +This C source file is part of the SoftFloat IEC/IEEE Floating-point
  5 +Arithmetic Package, Release 2.
  6 +
  7 +Written by John R. Hauser. This work was made possible in part by the
  8 +International Computer Science Institute, located at Suite 600, 1947 Center
  9 +Street, Berkeley, California 94704. Funding was partially provided by the
  10 +National Science Foundation under grant MIP-9311980. The original version
  11 +of this code was written as part of a project to build a fixed-point vector
  12 +processor in collaboration with the University of California at Berkeley,
  13 +overseen by Profs. Nelson Morgan and John Wawrzynek. More information
  14 +is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  15 +arithmetic/softfloat.html'.
  16 +
  17 +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
  18 +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  19 +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
  20 +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  21 +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  22 +
  23 +Derivative works are acceptable, even for commercial purposes, so long as
  24 +(1) they include prominent notice that the work is derivative, and (2) they
  25 +include prominent notice akin to these three paragraphs for those parts of
  26 +this code that are retained.
  27 +
  28 +===============================================================================
  29 +*/
  30 +
  31 +#include "fpa11.h"
  32 +#include "milieu.h"
  33 +#include "softfloat.h"
  34 +
  35 +/*
  36 +-------------------------------------------------------------------------------
  37 +Floating-point rounding mode, extended double-precision rounding precision,
  38 +and exception flags.
  39 +-------------------------------------------------------------------------------
  40 +*/
  41 +int8 float_rounding_mode = float_round_nearest_even;
  42 +int8 floatx80_rounding_precision = 80;
  43 +int8 float_exception_flags;
  44 +
  45 +/*
  46 +-------------------------------------------------------------------------------
  47 +Primitive arithmetic functions, including multi-word arithmetic, and
  48 +division and square root approximations. (Can be specialized to target if
  49 +desired.)
  50 +-------------------------------------------------------------------------------
  51 +*/
  52 +#include "softfloat-macros"
  53 +
  54 +/*
  55 +-------------------------------------------------------------------------------
  56 +Functions and definitions to determine: (1) whether tininess for underflow
  57 +is detected before or after rounding by default, (2) what (if anything)
  58 +happens when exceptions are raised, (3) how signaling NaNs are distinguished
  59 +from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
  60 +are propagated from function inputs to output. These details are target-
  61 +specific.
  62 +-------------------------------------------------------------------------------
  63 +*/
  64 +#include "softfloat-specialize"
  65 +
  66 +/*
  67 +-------------------------------------------------------------------------------
  68 +Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
  69 +and 7, and returns the properly rounded 32-bit integer corresponding to the
  70 +input. If `zSign' is nonzero, the input is negated before being converted
  71 +to an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point
  72 +input is simply rounded to an integer, with the inexact exception raised if
  73 +the input cannot be represented exactly as an integer. If the fixed-point
  74 +input is too large, however, the invalid exception is raised and the largest
  75 +positive or negative integer is returned.
  76 +-------------------------------------------------------------------------------
  77 +*/
  78 +static int32 roundAndPackInt32( flag zSign, bits64 absZ )
  79 +{
  80 + int8 roundingMode;
  81 + flag roundNearestEven;
  82 + int8 roundIncrement, roundBits;
  83 + int32 z;
  84 +
  85 + roundingMode = float_rounding_mode;
  86 + roundNearestEven = ( roundingMode == float_round_nearest_even );
  87 + roundIncrement = 0x40;
  88 + if ( ! roundNearestEven ) {
  89 + if ( roundingMode == float_round_to_zero ) {
  90 + roundIncrement = 0;
  91 + }
  92 + else {
  93 + roundIncrement = 0x7F;
  94 + if ( zSign ) {
  95 + if ( roundingMode == float_round_up ) roundIncrement = 0;
  96 + }
  97 + else {
  98 + if ( roundingMode == float_round_down ) roundIncrement = 0;
  99 + }
  100 + }
  101 + }
  102 + roundBits = absZ & 0x7F;
  103 + absZ = ( absZ + roundIncrement )>>7;
  104 + absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
  105 + z = absZ;
  106 + if ( zSign ) z = - z;
  107 + if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
  108 + float_exception_flags |= float_flag_invalid;
  109 + return zSign ? 0x80000000 : 0x7FFFFFFF;
  110 + }
  111 + if ( roundBits ) float_exception_flags |= float_flag_inexact;
  112 + return z;
  113 +
  114 +}
  115 +
  116 +/*
  117 +-------------------------------------------------------------------------------
  118 +Returns the fraction bits of the single-precision floating-point value `a'.
  119 +-------------------------------------------------------------------------------
  120 +*/
  121 +INLINE bits32 extractFloat32Frac( float32 a )
  122 +{
  123 +
  124 + return a & 0x007FFFFF;
  125 +
  126 +}
  127 +
  128 +/*
  129 +-------------------------------------------------------------------------------
  130 +Returns the exponent bits of the single-precision floating-point value `a'.
  131 +-------------------------------------------------------------------------------
  132 +*/
  133 +INLINE int16 extractFloat32Exp( float32 a )
  134 +{
  135 +
  136 + return ( a>>23 ) & 0xFF;
  137 +
  138 +}
  139 +
  140 +/*
  141 +-------------------------------------------------------------------------------
  142 +Returns the sign bit of the single-precision floating-point value `a'.
  143 +-------------------------------------------------------------------------------
  144 +*/
  145 +INLINE flag extractFloat32Sign( float32 a )
  146 +{
  147 +
  148 + return a>>31;
  149 +
  150 +}
  151 +
  152 +/*
  153 +-------------------------------------------------------------------------------
  154 +Normalizes the subnormal single-precision floating-point value represented
  155 +by the denormalized significand `aSig'. The normalized exponent and
  156 +significand are stored at the locations pointed to by `zExpPtr' and
  157 +`zSigPtr', respectively.
  158 +-------------------------------------------------------------------------------
  159 +*/
  160 +static void
  161 + normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
  162 +{
  163 + int8 shiftCount;
  164 +
  165 + shiftCount = countLeadingZeros32( aSig ) - 8;
  166 + *zSigPtr = aSig<<shiftCount;
  167 + *zExpPtr = 1 - shiftCount;
  168 +
  169 +}
  170 +
  171 +/*
  172 +-------------------------------------------------------------------------------
  173 +Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
  174 +single-precision floating-point value, returning the result. After being
  175 +shifted into the proper positions, the three fields are simply added
  176 +together to form the result. This means that any integer portion of `zSig'
  177 +will be added into the exponent. Since a properly normalized significand
  178 +will have an integer portion equal to 1, the `zExp' input should be 1 less
  179 +than the desired result exponent whenever `zSig' is a complete, normalized
  180 +significand.
  181 +-------------------------------------------------------------------------------
  182 +*/
  183 +INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
  184 +{
  185 + return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
  186 +}
  187 +
  188 +/*
  189 +-------------------------------------------------------------------------------
  190 +Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  191 +and significand `zSig', and returns the proper single-precision floating-
  192 +point value corresponding to the abstract input. Ordinarily, the abstract
  193 +value is simply rounded and packed into the single-precision format, with
  194 +the inexact exception raised if the abstract input cannot be represented
  195 +exactly. If the abstract value is too large, however, the overflow and
  196 +inexact exceptions are raised and an infinity or maximal finite value is
  197 +returned. If the abstract value is too small, the input value is rounded to
  198 +a subnormal number, and the underflow and inexact exceptions are raised if
  199 +the abstract input cannot be represented exactly as a subnormal single-
  200 +precision floating-point number.
  201 + The input significand `zSig' has its binary point between bits 30
  202 +and 29, which is 7 bits to the left of the usual location. This shifted
  203 +significand must be normalized or smaller. If `zSig' is not normalized,
  204 +`zExp' must be 0; in that case, the result returned is a subnormal number,
  205 +and it must not require rounding. In the usual case that `zSig' is
  206 +normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
  207 +The handling of underflow and overflow follows the IEC/IEEE Standard for
  208 +Binary Floating-point Arithmetic.
  209 +-------------------------------------------------------------------------------
  210 +*/
  211 +static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
  212 +{
  213 + int8 roundingMode;
  214 + flag roundNearestEven;
  215 + int8 roundIncrement, roundBits;
  216 + flag isTiny;
  217 +
  218 + roundingMode = float_rounding_mode;
  219 + roundNearestEven = ( roundingMode == float_round_nearest_even );
  220 + roundIncrement = 0x40;
  221 + if ( ! roundNearestEven ) {
  222 + if ( roundingMode == float_round_to_zero ) {
  223 + roundIncrement = 0;
  224 + }
  225 + else {
  226 + roundIncrement = 0x7F;
  227 + if ( zSign ) {
  228 + if ( roundingMode == float_round_up ) roundIncrement = 0;
  229 + }
  230 + else {
  231 + if ( roundingMode == float_round_down ) roundIncrement = 0;
  232 + }
  233 + }
  234 + }
  235 + roundBits = zSig & 0x7F;
  236 + if ( 0xFD <= (bits16) zExp ) {
  237 + if ( ( 0xFD < zExp )
  238 + || ( ( zExp == 0xFD )
  239 + && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
  240 + ) {
  241 + float_raise( float_flag_overflow | float_flag_inexact );
  242 + return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
  243 + }
  244 + if ( zExp < 0 ) {
  245 + isTiny =
  246 + ( float_detect_tininess == float_tininess_before_rounding )
  247 + || ( zExp < -1 )
  248 + || ( zSig + roundIncrement < 0x80000000 );
  249 + shift32RightJamming( zSig, - zExp, &zSig );
  250 + zExp = 0;
  251 + roundBits = zSig & 0x7F;
  252 + if ( isTiny && roundBits ) float_raise( float_flag_underflow );
  253 + }
  254 + }
  255 + if ( roundBits ) float_exception_flags |= float_flag_inexact;
  256 + zSig = ( zSig + roundIncrement )>>7;
  257 + zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
  258 + if ( zSig == 0 ) zExp = 0;
  259 + return packFloat32( zSign, zExp, zSig );
  260 +
  261 +}
  262 +
  263 +/*
  264 +-------------------------------------------------------------------------------
  265 +Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  266 +and significand `zSig', and returns the proper single-precision floating-
  267 +point value corresponding to the abstract input. This routine is just like
  268 +`roundAndPackFloat32' except that `zSig' does not have to be normalized in
  269 +any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
  270 +point exponent.
  271 +-------------------------------------------------------------------------------
  272 +*/
  273 +static float32
  274 + normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
  275 +{
  276 + int8 shiftCount;
  277 +
  278 + shiftCount = countLeadingZeros32( zSig ) - 1;
  279 + return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
  280 +
  281 +}
  282 +
  283 +/*
  284 +-------------------------------------------------------------------------------
  285 +Returns the fraction bits of the double-precision floating-point value `a'.
  286 +-------------------------------------------------------------------------------
  287 +*/
  288 +INLINE bits64 extractFloat64Frac( float64 a )
  289 +{
  290 +
  291 + return a & LIT64( 0x000FFFFFFFFFFFFF );
  292 +
  293 +}
  294 +
  295 +/*
  296 +-------------------------------------------------------------------------------
  297 +Returns the exponent bits of the double-precision floating-point value `a'.
  298 +-------------------------------------------------------------------------------
  299 +*/
  300 +INLINE int16 extractFloat64Exp( float64 a )
  301 +{
  302 +
  303 + return ( a>>52 ) & 0x7FF;
  304 +
  305 +}
  306 +
  307 +/*
  308 +-------------------------------------------------------------------------------
  309 +Returns the sign bit of the double-precision floating-point value `a'.
  310 +-------------------------------------------------------------------------------
  311 +*/
  312 +INLINE flag extractFloat64Sign( float64 a )
  313 +{
  314 +
  315 + return a>>63;
  316 +
  317 +}
  318 +
  319 +/*
  320 +-------------------------------------------------------------------------------
  321 +Normalizes the subnormal double-precision floating-point value represented
  322 +by the denormalized significand `aSig'. The normalized exponent and
  323 +significand are stored at the locations pointed to by `zExpPtr' and
  324 +`zSigPtr', respectively.
  325 +-------------------------------------------------------------------------------
  326 +*/
  327 +static void
  328 + normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
  329 +{
  330 + int8 shiftCount;
  331 +
  332 + shiftCount = countLeadingZeros64( aSig ) - 11;
  333 + *zSigPtr = aSig<<shiftCount;
  334 + *zExpPtr = 1 - shiftCount;
  335 +
  336 +}
  337 +
  338 +/*
  339 +-------------------------------------------------------------------------------
  340 +Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
  341 +double-precision floating-point value, returning the result. After being
  342 +shifted into the proper positions, the three fields are simply added
  343 +together to form the result. This means that any integer portion of `zSig'
  344 +will be added into the exponent. Since a properly normalized significand
  345 +will have an integer portion equal to 1, the `zExp' input should be 1 less
  346 +than the desired result exponent whenever `zSig' is a complete, normalized
  347 +significand.
  348 +-------------------------------------------------------------------------------
  349 +*/
  350 +INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
  351 +{
  352 +
  353 + return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
  354 +
  355 +}
  356 +
  357 +/*
  358 +-------------------------------------------------------------------------------
  359 +Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  360 +and significand `zSig', and returns the proper double-precision floating-
  361 +point value corresponding to the abstract input. Ordinarily, the abstract
  362 +value is simply rounded and packed into the double-precision format, with
  363 +the inexact exception raised if the abstract input cannot be represented
  364 +exactly. If the abstract value is too large, however, the overflow and
  365 +inexact exceptions are raised and an infinity or maximal finite value is
  366 +returned. If the abstract value is too small, the input value is rounded to
  367 +a subnormal number, and the underflow and inexact exceptions are raised if
  368 +the abstract input cannot be represented exactly as a subnormal double-
  369 +precision floating-point number.
  370 + The input significand `zSig' has its binary point between bits 62
  371 +and 61, which is 10 bits to the left of the usual location. This shifted
  372 +significand must be normalized or smaller. If `zSig' is not normalized,
  373 +`zExp' must be 0; in that case, the result returned is a subnormal number,
  374 +and it must not require rounding. In the usual case that `zSig' is
  375 +normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
  376 +The handling of underflow and overflow follows the IEC/IEEE Standard for
  377 +Binary Floating-point Arithmetic.
  378 +-------------------------------------------------------------------------------
  379 +*/
  380 +static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
  381 +{
  382 + int8 roundingMode;
  383 + flag roundNearestEven;
  384 + int16 roundIncrement, roundBits;
  385 + flag isTiny;
  386 +
  387 + roundingMode = float_rounding_mode;
  388 + roundNearestEven = ( roundingMode == float_round_nearest_even );
  389 + roundIncrement = 0x200;
  390 + if ( ! roundNearestEven ) {
  391 + if ( roundingMode == float_round_to_zero ) {
  392 + roundIncrement = 0;
  393 + }
  394 + else {
  395 + roundIncrement = 0x3FF;
  396 + if ( zSign ) {
  397 + if ( roundingMode == float_round_up ) roundIncrement = 0;
  398 + }
  399 + else {
  400 + if ( roundingMode == float_round_down ) roundIncrement = 0;
  401 + }
  402 + }
  403 + }
  404 + roundBits = zSig & 0x3FF;
  405 + if ( 0x7FD <= (bits16) zExp ) {
  406 + if ( ( 0x7FD < zExp )
  407 + || ( ( zExp == 0x7FD )
  408 + && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
  409 + ) {
  410 + //register int lr = __builtin_return_address(0);
  411 + //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
  412 + float_raise( float_flag_overflow | float_flag_inexact );
  413 + return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
  414 + }
  415 + if ( zExp < 0 ) {
  416 + isTiny =
  417 + ( float_detect_tininess == float_tininess_before_rounding )
  418 + || ( zExp < -1 )
  419 + || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
  420 + shift64RightJamming( zSig, - zExp, &zSig );
  421 + zExp = 0;
  422 + roundBits = zSig & 0x3FF;
  423 + if ( isTiny && roundBits ) float_raise( float_flag_underflow );
  424 + }
  425 + }
  426 + if ( roundBits ) float_exception_flags |= float_flag_inexact;
  427 + zSig = ( zSig + roundIncrement )>>10;
  428 + zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
  429 + if ( zSig == 0 ) zExp = 0;
  430 + return packFloat64( zSign, zExp, zSig );
  431 +
  432 +}
  433 +
  434 +/*
  435 +-------------------------------------------------------------------------------
  436 +Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  437 +and significand `zSig', and returns the proper double-precision floating-
  438 +point value corresponding to the abstract input. This routine is just like
  439 +`roundAndPackFloat64' except that `zSig' does not have to be normalized in
  440 +any way. In all cases, `zExp' must be 1 less than the ``true'' floating-
  441 +point exponent.
  442 +-------------------------------------------------------------------------------
  443 +*/
  444 +static float64
  445 + normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
  446 +{
  447 + int8 shiftCount;
  448 +
  449 + shiftCount = countLeadingZeros64( zSig ) - 1;
  450 + return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
  451 +
  452 +}
  453 +
  454 +#ifdef FLOATX80
  455 +
  456 +/*
  457 +-------------------------------------------------------------------------------
  458 +Returns the fraction bits of the extended double-precision floating-point
  459 +value `a'.
  460 +-------------------------------------------------------------------------------
  461 +*/
  462 +INLINE bits64 extractFloatx80Frac( floatx80 a )
  463 +{
  464 +
  465 + return a.low;
  466 +
  467 +}
  468 +
  469 +/*
  470 +-------------------------------------------------------------------------------
  471 +Returns the exponent bits of the extended double-precision floating-point
  472 +value `a'.
  473 +-------------------------------------------------------------------------------
  474 +*/
  475 +INLINE int32 extractFloatx80Exp( floatx80 a )
  476 +{
  477 +
  478 + return a.high & 0x7FFF;
  479 +
  480 +}
  481 +
  482 +/*
  483 +-------------------------------------------------------------------------------
  484 +Returns the sign bit of the extended double-precision floating-point value
  485 +`a'.
  486 +-------------------------------------------------------------------------------
  487 +*/
  488 +INLINE flag extractFloatx80Sign( floatx80 a )
  489 +{
  490 +
  491 + return a.high>>15;
  492 +
  493 +}
  494 +
  495 +/*
  496 +-------------------------------------------------------------------------------
  497 +Normalizes the subnormal extended double-precision floating-point value
  498 +represented by the denormalized significand `aSig'. The normalized exponent
  499 +and significand are stored at the locations pointed to by `zExpPtr' and
  500 +`zSigPtr', respectively.
  501 +-------------------------------------------------------------------------------
  502 +*/
  503 +static void
  504 + normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
  505 +{
  506 + int8 shiftCount;
  507 +
  508 + shiftCount = countLeadingZeros64( aSig );
  509 + *zSigPtr = aSig<<shiftCount;
  510 + *zExpPtr = 1 - shiftCount;
  511 +
  512 +}
  513 +
  514 +/*
  515 +-------------------------------------------------------------------------------
  516 +Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
  517 +extended double-precision floating-point value, returning the result.
  518 +-------------------------------------------------------------------------------
  519 +*/
  520 +INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
  521 +{
  522 + floatx80 z;
  523 +
  524 + z.low = zSig;
  525 + z.high = ( ( (bits16) zSign )<<15 ) + zExp;
  526 + return z;
  527 +
  528 +}
  529 +
  530 +/*
  531 +-------------------------------------------------------------------------------
  532 +Takes an abstract floating-point value having sign `zSign', exponent `zExp',
  533 +and extended significand formed by the concatenation of `zSig0' and `zSig1',
  534 +and returns the proper extended double-precision floating-point value
  535 +corresponding to the abstract input. Ordinarily, the abstract value is
  536 +rounded and packed into the extended double-precision format, with the
  537 +inexact exception raised if the abstract input cannot be represented
  538 +exactly. If the abstract value is too large, however, the overflow and
  539 +inexact exceptions are raised and an infinity or maximal finite value is
  540 +returned. If the abstract value is too small, the input value is rounded to
  541 +a subnormal number, and the underflow and inexact exceptions are raised if
  542 +the abstract input cannot be represented exactly as a subnormal extended
  543 +double-precision floating-point number.
  544 + If `roundingPrecision' is 32 or 64, the result is rounded to the same
  545 +number of bits as single or double precision, respectively. Otherwise, the
  546 +result is rounded to the full precision of the extended double-precision
  547 +format.
  548 + The input significand must be normalized or smaller. If the input
  549 +significand is not normalized, `zExp' must be 0; in that case, the result
  550 +returned is a subnormal number, and it must not require rounding. The
  551 +handling of underflow and overflow follows the IEC/IEEE Standard for Binary
  552 +Floating-point Arithmetic.
  553 +-------------------------------------------------------------------------------
  554 +*/
  555 +static floatx80
  556 + roundAndPackFloatx80(
  557 + int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
  558 + )
  559 +{
  560 + int8 roundingMode;
  561 + flag roundNearestEven, increment, isTiny;
  562 + int64 roundIncrement, roundMask, roundBits;
  563 +
  564 + roundingMode = float_rounding_mode;
  565 + roundNearestEven = ( roundingMode == float_round_nearest_even );
  566 + if ( roundingPrecision == 80 ) goto precision80;
  567 + if ( roundingPrecision == 64 ) {
  568 + roundIncrement = LIT64( 0x0000000000000400 );
  569 + roundMask = LIT64( 0x00000000000007FF );
  570 + }
  571 + else if ( roundingPrecision == 32 ) {
  572 + roundIncrement = LIT64( 0x0000008000000000 );
  573 + roundMask = LIT64( 0x000000FFFFFFFFFF );
  574 + }
  575 + else {
  576 + goto precision80;
  577 + }
  578 + zSig0 |= ( zSig1 != 0 );
  579 + if ( ! roundNearestEven ) {
  580 + if ( roundingMode == float_round_to_zero ) {
  581 + roundIncrement = 0;
  582 + }
  583 + else {
  584 + roundIncrement = roundMask;
  585 + if ( zSign ) {
  586 + if ( roundingMode == float_round_up ) roundIncrement = 0;
  587 + }
  588 + else {
  589 + if ( roundingMode == float_round_down ) roundIncrement = 0;
  590 + }
  591 + }
  592 + }
  593 + roundBits = zSig0 & roundMask;
  594 + if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
  595 + if ( ( 0x7FFE < zExp )
  596 + || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
  597 + ) {
  598 + goto overflow;
  599 + }
  600 + if ( zExp <= 0 ) {
  601 + isTiny =
  602 + ( float_detect_tininess == float_tininess_before_rounding )
  603 + || ( zExp < 0 )
  604 + || ( zSig0 <= zSig0 + roundIncrement );
  605 + shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
  606 + zExp = 0;
  607 + roundBits = zSig0 & roundMask;
  608 + if ( isTiny && roundBits ) float_raise( float_flag_underflow );
  609 + if ( roundBits ) float_exception_flags |= float_flag_inexact;
  610 + zSig0 += roundIncrement;
  611 + if ( (sbits64) zSig0 < 0 ) zExp = 1;
  612 + roundIncrement = roundMask + 1;
  613 + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
  614 + roundMask |= roundIncrement;
  615 + }
  616 + zSig0 &= ~ roundMask;
  617 + return packFloatx80( zSign, zExp, zSig0 );
  618 + }
  619 + }
  620 + if ( roundBits ) float_exception_flags |= float_flag_inexact;
  621 + zSig0 += roundIncrement;
  622 + if ( zSig0 < roundIncrement ) {
  623 + ++zExp;
  624 + zSig0 = LIT64( 0x8000000000000000 );
  625 + }
  626 + roundIncrement = roundMask + 1;
  627 + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
  628 + roundMask |= roundIncrement;
  629 + }
  630 + zSig0 &= ~ roundMask;
  631 + if ( zSig0 == 0 ) zExp = 0;
  632 + return packFloatx80( zSign, zExp, zSig0 );
  633 + precision80:
  634 + increment = ( (sbits64) zSig1 < 0 );
  635 + if ( ! roundNearestEven ) {
  636 + if ( roundingMode == float_round_to_zero ) {
  637 + increment = 0;
  638 + }
  639 + else {
  640 + if ( zSign ) {
  641 + increment = ( roundingMode == float_round_down ) && zSig1;
  642 + }
  643 + else {
  644 + increment = ( roundingMode == float_round_up ) && zSig1;
  645 + }
  646 + }
  647 + }
  648 + if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
  649 + if ( ( 0x7FFE < zExp )
  650 + || ( ( zExp == 0x7FFE )
  651 + && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
  652 + && increment
  653 + )
  654 + ) {
  655 + roundMask = 0;
  656 + overflow:
  657 + float_raise( float_flag_overflow | float_flag_inexact );
  658 + if ( ( roundingMode == float_round_to_zero )
  659 + || ( zSign && ( roundingMode == float_round_up ) )
  660 + || ( ! zSign && ( roundingMode == float_round_down ) )
  661 + ) {
  662 + return packFloatx80( zSign, 0x7FFE, ~ roundMask );
  663 + }
  664 + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  665 + }
  666 + if ( zExp <= 0 ) {
  667 + isTiny =
  668 + ( float_detect_tininess == float_tininess_before_rounding )
  669 + || ( zExp < 0 )
  670 + || ! increment
  671 + || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
  672 + shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
  673 + zExp = 0;
  674 + if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
  675 + if ( zSig1 ) float_exception_flags |= float_flag_inexact;
  676 + if ( roundNearestEven ) {
  677 + increment = ( (sbits64) zSig1 < 0 );
  678 + }
  679 + else {
  680 + if ( zSign ) {
  681 + increment = ( roundingMode == float_round_down ) && zSig1;
  682 + }
  683 + else {
  684 + increment = ( roundingMode == float_round_up ) && zSig1;
  685 + }
  686 + }
  687 + if ( increment ) {
  688 + ++zSig0;
  689 + zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
  690 + if ( (sbits64) zSig0 < 0 ) zExp = 1;
  691 + }
  692 + return packFloatx80( zSign, zExp, zSig0 );
  693 + }
  694 + }
  695 + if ( zSig1 ) float_exception_flags |= float_flag_inexact;
  696 + if ( increment ) {
  697 + ++zSig0;
  698 + if ( zSig0 == 0 ) {
  699 + ++zExp;
  700 + zSig0 = LIT64( 0x8000000000000000 );
  701 + }
  702 + else {
  703 + zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
  704 + }
  705 + }
  706 + else {
  707 + if ( zSig0 == 0 ) zExp = 0;
  708 + }
  709 +
  710 + return packFloatx80( zSign, zExp, zSig0 );
  711 +}
  712 +
  713 +/*
  714 +-------------------------------------------------------------------------------
  715 +Takes an abstract floating-point value having sign `zSign', exponent
  716 +`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
  717 +and returns the proper extended double-precision floating-point value
  718 +corresponding to the abstract input. This routine is just like
  719 +`roundAndPackFloatx80' except that the input significand does not have to be
  720 +normalized.
  721 +-------------------------------------------------------------------------------
  722 +*/
  723 +static floatx80
  724 + normalizeRoundAndPackFloatx80(
  725 + int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
  726 + )
  727 +{
  728 + int8 shiftCount;
  729 +
  730 + if ( zSig0 == 0 ) {
  731 + zSig0 = zSig1;
  732 + zSig1 = 0;
  733 + zExp -= 64;
  734 + }
  735 + shiftCount = countLeadingZeros64( zSig0 );
  736 + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
  737 + zExp -= shiftCount;
  738 + return
  739 + roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
  740 +
  741 +}
  742 +
  743 +#endif
  744 +
  745 +/*
  746 +-------------------------------------------------------------------------------
  747 +Returns the result of converting the 32-bit two's complement integer `a' to
  748 +the single-precision floating-point format. The conversion is performed
  749 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  750 +-------------------------------------------------------------------------------
  751 +*/
  752 +float32 int32_to_float32( int32 a )
  753 +{
  754 + flag zSign;
  755 +
  756 + if ( a == 0 ) return 0;
  757 + if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
  758 + zSign = ( a < 0 );
  759 + return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
  760 +
  761 +}
  762 +
  763 +/*
  764 +-------------------------------------------------------------------------------
  765 +Returns the result of converting the 32-bit two's complement integer `a' to
  766 +the double-precision floating-point format. The conversion is performed
  767 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  768 +-------------------------------------------------------------------------------
  769 +*/
  770 +float64 int32_to_float64( int32 a )
  771 +{
  772 + flag aSign;
  773 + uint32 absA;
  774 + int8 shiftCount;
  775 + bits64 zSig;
  776 +
  777 + if ( a == 0 ) return 0;
  778 + aSign = ( a < 0 );
  779 + absA = aSign ? - a : a;
  780 + shiftCount = countLeadingZeros32( absA ) + 21;
  781 + zSig = absA;
  782 + return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
  783 +
  784 +}
  785 +
  786 +#ifdef FLOATX80
  787 +
  788 +/*
  789 +-------------------------------------------------------------------------------
  790 +Returns the result of converting the 32-bit two's complement integer `a'
  791 +to the extended double-precision floating-point format. The conversion
  792 +is performed according to the IEC/IEEE Standard for Binary Floating-point
  793 +Arithmetic.
  794 +-------------------------------------------------------------------------------
  795 +*/
  796 +floatx80 int32_to_floatx80( int32 a )
  797 +{
  798 + flag zSign;
  799 + uint32 absA;
  800 + int8 shiftCount;
  801 + bits64 zSig;
  802 +
  803 + if ( a == 0 ) return packFloatx80( 0, 0, 0 );
  804 + zSign = ( a < 0 );
  805 + absA = zSign ? - a : a;
  806 + shiftCount = countLeadingZeros32( absA ) + 32;
  807 + zSig = absA;
  808 + return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
  809 +
  810 +}
  811 +
  812 +#endif
  813 +
  814 +/*
  815 +-------------------------------------------------------------------------------
  816 +Returns the result of converting the single-precision floating-point value
  817 +`a' to the 32-bit two's complement integer format. The conversion is
  818 +performed according to the IEC/IEEE Standard for Binary Floating-point
  819 +Arithmetic---which means in particular that the conversion is rounded
  820 +according to the current rounding mode. If `a' is a NaN, the largest
  821 +positive integer is returned. Otherwise, if the conversion overflows, the
  822 +largest integer with the same sign as `a' is returned.
  823 +-------------------------------------------------------------------------------
  824 +*/
  825 +int32 float32_to_int32( float32 a )
  826 +{
  827 + flag aSign;
  828 + int16 aExp, shiftCount;
  829 + bits32 aSig;
  830 + bits64 zSig;
  831 +
  832 + aSig = extractFloat32Frac( a );
  833 + aExp = extractFloat32Exp( a );
  834 + aSign = extractFloat32Sign( a );
  835 + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
  836 + if ( aExp ) aSig |= 0x00800000;
  837 + shiftCount = 0xAF - aExp;
  838 + zSig = aSig;
  839 + zSig <<= 32;
  840 + if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
  841 + return roundAndPackInt32( aSign, zSig );
  842 +
  843 +}
  844 +
  845 +/*
  846 +-------------------------------------------------------------------------------
  847 +Returns the result of converting the single-precision floating-point value
  848 +`a' to the 32-bit two's complement integer format. The conversion is
  849 +performed according to the IEC/IEEE Standard for Binary Floating-point
  850 +Arithmetic, except that the conversion is always rounded toward zero. If
  851 +`a' is a NaN, the largest positive integer is returned. Otherwise, if the
  852 +conversion overflows, the largest integer with the same sign as `a' is
  853 +returned.
  854 +-------------------------------------------------------------------------------
  855 +*/
  856 +int32 float32_to_int32_round_to_zero( float32 a )
  857 +{
  858 + flag aSign;
  859 + int16 aExp, shiftCount;
  860 + bits32 aSig;
  861 + int32 z;
  862 +
  863 + aSig = extractFloat32Frac( a );
  864 + aExp = extractFloat32Exp( a );
  865 + aSign = extractFloat32Sign( a );
  866 + shiftCount = aExp - 0x9E;
  867 + if ( 0 <= shiftCount ) {
  868 + if ( a == 0xCF000000 ) return 0x80000000;
  869 + float_raise( float_flag_invalid );
  870 + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
  871 + return 0x80000000;
  872 + }
  873 + else if ( aExp <= 0x7E ) {
  874 + if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
  875 + return 0;
  876 + }
  877 + aSig = ( aSig | 0x00800000 )<<8;
  878 + z = aSig>>( - shiftCount );
  879 + if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
  880 + float_exception_flags |= float_flag_inexact;
  881 + }
  882 + return aSign ? - z : z;
  883 +
  884 +}
  885 +
  886 +/*
  887 +-------------------------------------------------------------------------------
  888 +Returns the result of converting the single-precision floating-point value
  889 +`a' to the double-precision floating-point format. The conversion is
  890 +performed according to the IEC/IEEE Standard for Binary Floating-point
  891 +Arithmetic.
  892 +-------------------------------------------------------------------------------
  893 +*/
  894 +float64 float32_to_float64( float32 a )
  895 +{
  896 + flag aSign;
  897 + int16 aExp;
  898 + bits32 aSig;
  899 +
  900 + aSig = extractFloat32Frac( a );
  901 + aExp = extractFloat32Exp( a );
  902 + aSign = extractFloat32Sign( a );
  903 + if ( aExp == 0xFF ) {
  904 + if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
  905 + return packFloat64( aSign, 0x7FF, 0 );
  906 + }
  907 + if ( aExp == 0 ) {
  908 + if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
  909 + normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  910 + --aExp;
  911 + }
  912 + return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
  913 +
  914 +}
  915 +
  916 +#ifdef FLOATX80
  917 +
  918 +/*
  919 +-------------------------------------------------------------------------------
  920 +Returns the result of converting the single-precision floating-point value
  921 +`a' to the extended double-precision floating-point format. The conversion
  922 +is performed according to the IEC/IEEE Standard for Binary Floating-point
  923 +Arithmetic.
  924 +-------------------------------------------------------------------------------
  925 +*/
  926 +floatx80 float32_to_floatx80( float32 a )
  927 +{
  928 + flag aSign;
  929 + int16 aExp;
  930 + bits32 aSig;
  931 +
  932 + aSig = extractFloat32Frac( a );
  933 + aExp = extractFloat32Exp( a );
  934 + aSign = extractFloat32Sign( a );
  935 + if ( aExp == 0xFF ) {
  936 + if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
  937 + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  938 + }
  939 + if ( aExp == 0 ) {
  940 + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
  941 + normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  942 + }
  943 + aSig |= 0x00800000;
  944 + return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
  945 +
  946 +}
  947 +
  948 +#endif
  949 +
  950 +/*
  951 +-------------------------------------------------------------------------------
  952 +Rounds the single-precision floating-point value `a' to an integer, and
  953 +returns the result as a single-precision floating-point value. The
  954 +operation is performed according to the IEC/IEEE Standard for Binary
  955 +Floating-point Arithmetic.
  956 +-------------------------------------------------------------------------------
  957 +*/
  958 +float32 float32_round_to_int( float32 a )
  959 +{
  960 + flag aSign;
  961 + int16 aExp;
  962 + bits32 lastBitMask, roundBitsMask;
  963 + int8 roundingMode;
  964 + float32 z;
  965 +
  966 + aExp = extractFloat32Exp( a );
  967 + if ( 0x96 <= aExp ) {
  968 + if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
  969 + return propagateFloat32NaN( a, a );
  970 + }
  971 + return a;
  972 + }
  973 + if ( aExp <= 0x7E ) {
  974 + if ( (bits32) ( a<<1 ) == 0 ) return a;
  975 + float_exception_flags |= float_flag_inexact;
  976 + aSign = extractFloat32Sign( a );
  977 + switch ( float_rounding_mode ) {
  978 + case float_round_nearest_even:
  979 + if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
  980 + return packFloat32( aSign, 0x7F, 0 );
  981 + }
  982 + break;
  983 + case float_round_down:
  984 + return aSign ? 0xBF800000 : 0;
  985 + case float_round_up:
  986 + return aSign ? 0x80000000 : 0x3F800000;
  987 + }
  988 + return packFloat32( aSign, 0, 0 );
  989 + }
  990 + lastBitMask = 1;
  991 + lastBitMask <<= 0x96 - aExp;
  992 + roundBitsMask = lastBitMask - 1;
  993 + z = a;
  994 + roundingMode = float_rounding_mode;
  995 + if ( roundingMode == float_round_nearest_even ) {
  996 + z += lastBitMask>>1;
  997 + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
  998 + }
  999 + else if ( roundingMode != float_round_to_zero ) {
  1000 + if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
  1001 + z += roundBitsMask;
  1002 + }
  1003 + }
  1004 + z &= ~ roundBitsMask;
  1005 + if ( z != a ) float_exception_flags |= float_flag_inexact;
  1006 + return z;
  1007 +
  1008 +}
  1009 +
  1010 +/*
  1011 +-------------------------------------------------------------------------------
  1012 +Returns the result of adding the absolute values of the single-precision
  1013 +floating-point values `a' and `b'. If `zSign' is true, the sum is negated
  1014 +before being returned. `zSign' is ignored if the result is a NaN. The
  1015 +addition is performed according to the IEC/IEEE Standard for Binary
  1016 +Floating-point Arithmetic.
  1017 +-------------------------------------------------------------------------------
  1018 +*/
  1019 +static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
  1020 +{
  1021 + int16 aExp, bExp, zExp;
  1022 + bits32 aSig, bSig, zSig;
  1023 + int16 expDiff;
  1024 +
  1025 + aSig = extractFloat32Frac( a );
  1026 + aExp = extractFloat32Exp( a );
  1027 + bSig = extractFloat32Frac( b );
  1028 + bExp = extractFloat32Exp( b );
  1029 + expDiff = aExp - bExp;
  1030 + aSig <<= 6;
  1031 + bSig <<= 6;
  1032 + if ( 0 < expDiff ) {
  1033 + if ( aExp == 0xFF ) {
  1034 + if ( aSig ) return propagateFloat32NaN( a, b );
  1035 + return a;
  1036 + }
  1037 + if ( bExp == 0 ) {
  1038 + --expDiff;
  1039 + }
  1040 + else {
  1041 + bSig |= 0x20000000;
  1042 + }
  1043 + shift32RightJamming( bSig, expDiff, &bSig );
  1044 + zExp = aExp;
  1045 + }
  1046 + else if ( expDiff < 0 ) {
  1047 + if ( bExp == 0xFF ) {
  1048 + if ( bSig ) return propagateFloat32NaN( a, b );
  1049 + return packFloat32( zSign, 0xFF, 0 );
  1050 + }
  1051 + if ( aExp == 0 ) {
  1052 + ++expDiff;
  1053 + }
  1054 + else {
  1055 + aSig |= 0x20000000;
  1056 + }
  1057 + shift32RightJamming( aSig, - expDiff, &aSig );
  1058 + zExp = bExp;
  1059 + }
  1060 + else {
  1061 + if ( aExp == 0xFF ) {
  1062 + if ( aSig | bSig ) return propagateFloat32NaN( a, b );
  1063 + return a;
  1064 + }
  1065 + if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
  1066 + zSig = 0x40000000 + aSig + bSig;
  1067 + zExp = aExp;
  1068 + goto roundAndPack;
  1069 + }
  1070 + aSig |= 0x20000000;
  1071 + zSig = ( aSig + bSig )<<1;
  1072 + --zExp;
  1073 + if ( (sbits32) zSig < 0 ) {
  1074 + zSig = aSig + bSig;
  1075 + ++zExp;
  1076 + }
  1077 + roundAndPack:
  1078 + return roundAndPackFloat32( zSign, zExp, zSig );
  1079 +
  1080 +}
  1081 +
  1082 +/*
  1083 +-------------------------------------------------------------------------------
  1084 +Returns the result of subtracting the absolute values of the single-
  1085 +precision floating-point values `a' and `b'. If `zSign' is true, the
  1086 +difference is negated before being returned. `zSign' is ignored if the
  1087 +result is a NaN. The subtraction is performed according to the IEC/IEEE
  1088 +Standard for Binary Floating-point Arithmetic.
  1089 +-------------------------------------------------------------------------------
  1090 +*/
  1091 +static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
  1092 +{
  1093 + int16 aExp, bExp, zExp;
  1094 + bits32 aSig, bSig, zSig;
  1095 + int16 expDiff;
  1096 +
  1097 + aSig = extractFloat32Frac( a );
  1098 + aExp = extractFloat32Exp( a );
  1099 + bSig = extractFloat32Frac( b );
  1100 + bExp = extractFloat32Exp( b );
  1101 + expDiff = aExp - bExp;
  1102 + aSig <<= 7;
  1103 + bSig <<= 7;
  1104 + if ( 0 < expDiff ) goto aExpBigger;
  1105 + if ( expDiff < 0 ) goto bExpBigger;
  1106 + if ( aExp == 0xFF ) {
  1107 + if ( aSig | bSig ) return propagateFloat32NaN( a, b );
  1108 + float_raise( float_flag_invalid );
  1109 + return float32_default_nan;
  1110 + }
  1111 + if ( aExp == 0 ) {
  1112 + aExp = 1;
  1113 + bExp = 1;
  1114 + }
  1115 + if ( bSig < aSig ) goto aBigger;
  1116 + if ( aSig < bSig ) goto bBigger;
  1117 + return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
  1118 + bExpBigger:
  1119 + if ( bExp == 0xFF ) {
  1120 + if ( bSig ) return propagateFloat32NaN( a, b );
  1121 + return packFloat32( zSign ^ 1, 0xFF, 0 );
  1122 + }
  1123 + if ( aExp == 0 ) {
  1124 + ++expDiff;
  1125 + }
  1126 + else {
  1127 + aSig |= 0x40000000;
  1128 + }
  1129 + shift32RightJamming( aSig, - expDiff, &aSig );
  1130 + bSig |= 0x40000000;
  1131 + bBigger:
  1132 + zSig = bSig - aSig;
  1133 + zExp = bExp;
  1134 + zSign ^= 1;
  1135 + goto normalizeRoundAndPack;
  1136 + aExpBigger:
  1137 + if ( aExp == 0xFF ) {
  1138 + if ( aSig ) return propagateFloat32NaN( a, b );
  1139 + return a;
  1140 + }
  1141 + if ( bExp == 0 ) {
  1142 + --expDiff;
  1143 + }
  1144 + else {
  1145 + bSig |= 0x40000000;
  1146 + }
  1147 + shift32RightJamming( bSig, expDiff, &bSig );
  1148 + aSig |= 0x40000000;
  1149 + aBigger:
  1150 + zSig = aSig - bSig;
  1151 + zExp = aExp;
  1152 + normalizeRoundAndPack:
  1153 + --zExp;
  1154 + return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
  1155 +
  1156 +}
  1157 +
  1158 +/*
  1159 +-------------------------------------------------------------------------------
  1160 +Returns the result of adding the single-precision floating-point values `a'
  1161 +and `b'. The operation is performed according to the IEC/IEEE Standard for
  1162 +Binary Floating-point Arithmetic.
  1163 +-------------------------------------------------------------------------------
  1164 +*/
  1165 +float32 float32_add( float32 a, float32 b )
  1166 +{
  1167 + flag aSign, bSign;
  1168 +
  1169 + aSign = extractFloat32Sign( a );
  1170 + bSign = extractFloat32Sign( b );
  1171 + if ( aSign == bSign ) {
  1172 + return addFloat32Sigs( a, b, aSign );
  1173 + }
  1174 + else {
  1175 + return subFloat32Sigs( a, b, aSign );
  1176 + }
  1177 +
  1178 +}
  1179 +
  1180 +/*
  1181 +-------------------------------------------------------------------------------
  1182 +Returns the result of subtracting the single-precision floating-point values
  1183 +`a' and `b'. The operation is performed according to the IEC/IEEE Standard
  1184 +for Binary Floating-point Arithmetic.
  1185 +-------------------------------------------------------------------------------
  1186 +*/
  1187 +float32 float32_sub( float32 a, float32 b )
  1188 +{
  1189 + flag aSign, bSign;
  1190 +
  1191 + aSign = extractFloat32Sign( a );
  1192 + bSign = extractFloat32Sign( b );
  1193 + if ( aSign == bSign ) {
  1194 + return subFloat32Sigs( a, b, aSign );
  1195 + }
  1196 + else {
  1197 + return addFloat32Sigs( a, b, aSign );
  1198 + }
  1199 +
  1200 +}
  1201 +
  1202 +/*
  1203 +-------------------------------------------------------------------------------
  1204 +Returns the result of multiplying the single-precision floating-point values
  1205 +`a' and `b'. The operation is performed according to the IEC/IEEE Standard
  1206 +for Binary Floating-point Arithmetic.
  1207 +-------------------------------------------------------------------------------
  1208 +*/
  1209 +float32 float32_mul( float32 a, float32 b )
  1210 +{
  1211 + flag aSign, bSign, zSign;
  1212 + int16 aExp, bExp, zExp;
  1213 + bits32 aSig, bSig;
  1214 + bits64 zSig64;
  1215 + bits32 zSig;
  1216 +
  1217 + aSig = extractFloat32Frac( a );
  1218 + aExp = extractFloat32Exp( a );
  1219 + aSign = extractFloat32Sign( a );
  1220 + bSig = extractFloat32Frac( b );
  1221 + bExp = extractFloat32Exp( b );
  1222 + bSign = extractFloat32Sign( b );
  1223 + zSign = aSign ^ bSign;
  1224 + if ( aExp == 0xFF ) {
  1225 + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
  1226 + return propagateFloat32NaN( a, b );
  1227 + }
  1228 + if ( ( bExp | bSig ) == 0 ) {
  1229 + float_raise( float_flag_invalid );
  1230 + return float32_default_nan;
  1231 + }
  1232 + return packFloat32( zSign, 0xFF, 0 );
  1233 + }
  1234 + if ( bExp == 0xFF ) {
  1235 + if ( bSig ) return propagateFloat32NaN( a, b );
  1236 + if ( ( aExp | aSig ) == 0 ) {
  1237 + float_raise( float_flag_invalid );
  1238 + return float32_default_nan;
  1239 + }
  1240 + return packFloat32( zSign, 0xFF, 0 );
  1241 + }
  1242 + if ( aExp == 0 ) {
  1243 + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
  1244 + normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  1245 + }
  1246 + if ( bExp == 0 ) {
  1247 + if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
  1248 + normalizeFloat32Subnormal( bSig, &bExp, &bSig );
  1249 + }
  1250 + zExp = aExp + bExp - 0x7F;
  1251 + aSig = ( aSig | 0x00800000 )<<7;
  1252 + bSig = ( bSig | 0x00800000 )<<8;
  1253 + shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
  1254 + zSig = zSig64;
  1255 + if ( 0 <= (sbits32) ( zSig<<1 ) ) {
  1256 + zSig <<= 1;
  1257 + --zExp;
  1258 + }
  1259 + return roundAndPackFloat32( zSign, zExp, zSig );
  1260 +
  1261 +}
  1262 +
  1263 +/*
  1264 +-------------------------------------------------------------------------------
  1265 +Returns the result of dividing the single-precision floating-point value `a'
  1266 +by the corresponding value `b'. The operation is performed according to the
  1267 +IEC/IEEE Standard for Binary Floating-point Arithmetic.
  1268 +-------------------------------------------------------------------------------
  1269 +*/
  1270 +float32 float32_div( float32 a, float32 b )
  1271 +{
  1272 + flag aSign, bSign, zSign;
  1273 + int16 aExp, bExp, zExp;
  1274 + bits32 aSig, bSig, zSig;
  1275 +
  1276 + aSig = extractFloat32Frac( a );
  1277 + aExp = extractFloat32Exp( a );
  1278 + aSign = extractFloat32Sign( a );
  1279 + bSig = extractFloat32Frac( b );
  1280 + bExp = extractFloat32Exp( b );
  1281 + bSign = extractFloat32Sign( b );
  1282 + zSign = aSign ^ bSign;
  1283 + if ( aExp == 0xFF ) {
  1284 + if ( aSig ) return propagateFloat32NaN( a, b );
  1285 + if ( bExp == 0xFF ) {
  1286 + if ( bSig ) return propagateFloat32NaN( a, b );
  1287 + float_raise( float_flag_invalid );
  1288 + return float32_default_nan;
  1289 + }
  1290 + return packFloat32( zSign, 0xFF, 0 );
  1291 + }
  1292 + if ( bExp == 0xFF ) {
  1293 + if ( bSig ) return propagateFloat32NaN( a, b );
  1294 + return packFloat32( zSign, 0, 0 );
  1295 + }
  1296 + if ( bExp == 0 ) {
  1297 + if ( bSig == 0 ) {
  1298 + if ( ( aExp | aSig ) == 0 ) {
  1299 + float_raise( float_flag_invalid );
  1300 + return float32_default_nan;
  1301 + }
  1302 + float_raise( float_flag_divbyzero );
  1303 + return packFloat32( zSign, 0xFF, 0 );
  1304 + }
  1305 + normalizeFloat32Subnormal( bSig, &bExp, &bSig );
  1306 + }
  1307 + if ( aExp == 0 ) {
  1308 + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
  1309 + normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  1310 + }
  1311 + zExp = aExp - bExp + 0x7D;
  1312 + aSig = ( aSig | 0x00800000 )<<7;
  1313 + bSig = ( bSig | 0x00800000 )<<8;
  1314 + if ( bSig <= ( aSig + aSig ) ) {
  1315 + aSig >>= 1;
  1316 + ++zExp;
  1317 + }
  1318 + zSig = ( ( (bits64) aSig )<<32 ) / bSig;
  1319 + if ( ( zSig & 0x3F ) == 0 ) {
  1320 + zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
  1321 + }
  1322 + return roundAndPackFloat32( zSign, zExp, zSig );
  1323 +
  1324 +}
  1325 +
  1326 +/*
  1327 +-------------------------------------------------------------------------------
  1328 +Returns the remainder of the single-precision floating-point value `a'
  1329 +with respect to the corresponding value `b'. The operation is performed
  1330 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  1331 +-------------------------------------------------------------------------------
  1332 +*/
  1333 +float32 float32_rem( float32 a, float32 b )
  1334 +{
  1335 + flag aSign, bSign, zSign;
  1336 + int16 aExp, bExp, expDiff;
  1337 + bits32 aSig, bSig;
  1338 + bits32 q;
  1339 + bits64 aSig64, bSig64, q64;
  1340 + bits32 alternateASig;
  1341 + sbits32 sigMean;
  1342 +
  1343 + aSig = extractFloat32Frac( a );
  1344 + aExp = extractFloat32Exp( a );
  1345 + aSign = extractFloat32Sign( a );
  1346 + bSig = extractFloat32Frac( b );
  1347 + bExp = extractFloat32Exp( b );
  1348 + bSign = extractFloat32Sign( b );
  1349 + if ( aExp == 0xFF ) {
  1350 + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
  1351 + return propagateFloat32NaN( a, b );
  1352 + }
  1353 + float_raise( float_flag_invalid );
  1354 + return float32_default_nan;
  1355 + }
  1356 + if ( bExp == 0xFF ) {
  1357 + if ( bSig ) return propagateFloat32NaN( a, b );
  1358 + return a;
  1359 + }
  1360 + if ( bExp == 0 ) {
  1361 + if ( bSig == 0 ) {
  1362 + float_raise( float_flag_invalid );
  1363 + return float32_default_nan;
  1364 + }
  1365 + normalizeFloat32Subnormal( bSig, &bExp, &bSig );
  1366 + }
  1367 + if ( aExp == 0 ) {
  1368 + if ( aSig == 0 ) return a;
  1369 + normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  1370 + }
  1371 + expDiff = aExp - bExp;
  1372 + aSig |= 0x00800000;
  1373 + bSig |= 0x00800000;
  1374 + if ( expDiff < 32 ) {
  1375 + aSig <<= 8;
  1376 + bSig <<= 8;
  1377 + if ( expDiff < 0 ) {
  1378 + if ( expDiff < -1 ) return a;
  1379 + aSig >>= 1;
  1380 + }
  1381 + q = ( bSig <= aSig );
  1382 + if ( q ) aSig -= bSig;
  1383 + if ( 0 < expDiff ) {
  1384 + q = ( ( (bits64) aSig )<<32 ) / bSig;
  1385 + q >>= 32 - expDiff;
  1386 + bSig >>= 2;
  1387 + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
  1388 + }
  1389 + else {
  1390 + aSig >>= 2;
  1391 + bSig >>= 2;
  1392 + }
  1393 + }
  1394 + else {
  1395 + if ( bSig <= aSig ) aSig -= bSig;
  1396 + aSig64 = ( (bits64) aSig )<<40;
  1397 + bSig64 = ( (bits64) bSig )<<40;
  1398 + expDiff -= 64;
  1399 + while ( 0 < expDiff ) {
  1400 + q64 = estimateDiv128To64( aSig64, 0, bSig64 );
  1401 + q64 = ( 2 < q64 ) ? q64 - 2 : 0;
  1402 + aSig64 = - ( ( bSig * q64 )<<38 );
  1403 + expDiff -= 62;
  1404 + }
  1405 + expDiff += 64;
  1406 + q64 = estimateDiv128To64( aSig64, 0, bSig64 );
  1407 + q64 = ( 2 < q64 ) ? q64 - 2 : 0;
  1408 + q = q64>>( 64 - expDiff );
  1409 + bSig <<= 6;
  1410 + aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
  1411 + }
  1412 + do {
  1413 + alternateASig = aSig;
  1414 + ++q;
  1415 + aSig -= bSig;
  1416 + } while ( 0 <= (sbits32) aSig );
  1417 + sigMean = aSig + alternateASig;
  1418 + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
  1419 + aSig = alternateASig;
  1420 + }
  1421 + zSign = ( (sbits32) aSig < 0 );
  1422 + if ( zSign ) aSig = - aSig;
  1423 + return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
  1424 +
  1425 +}
  1426 +
  1427 +/*
  1428 +-------------------------------------------------------------------------------
  1429 +Returns the square root of the single-precision floating-point value `a'.
  1430 +The operation is performed according to the IEC/IEEE Standard for Binary
  1431 +Floating-point Arithmetic.
  1432 +-------------------------------------------------------------------------------
  1433 +*/
  1434 +float32 float32_sqrt( float32 a )
  1435 +{
  1436 + flag aSign;
  1437 + int16 aExp, zExp;
  1438 + bits32 aSig, zSig;
  1439 + bits64 rem, term;
  1440 +
  1441 + aSig = extractFloat32Frac( a );
  1442 + aExp = extractFloat32Exp( a );
  1443 + aSign = extractFloat32Sign( a );
  1444 + if ( aExp == 0xFF ) {
  1445 + if ( aSig ) return propagateFloat32NaN( a, 0 );
  1446 + if ( ! aSign ) return a;
  1447 + float_raise( float_flag_invalid );
  1448 + return float32_default_nan;
  1449 + }
  1450 + if ( aSign ) {
  1451 + if ( ( aExp | aSig ) == 0 ) return a;
  1452 + float_raise( float_flag_invalid );
  1453 + return float32_default_nan;
  1454 + }
  1455 + if ( aExp == 0 ) {
  1456 + if ( aSig == 0 ) return 0;
  1457 + normalizeFloat32Subnormal( aSig, &aExp, &aSig );
  1458 + }
  1459 + zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
  1460 + aSig = ( aSig | 0x00800000 )<<8;
  1461 + zSig = estimateSqrt32( aExp, aSig ) + 2;
  1462 + if ( ( zSig & 0x7F ) <= 5 ) {
  1463 + if ( zSig < 2 ) {
  1464 + zSig = 0xFFFFFFFF;
  1465 + }
  1466 + else {
  1467 + aSig >>= aExp & 1;
  1468 + term = ( (bits64) zSig ) * zSig;
  1469 + rem = ( ( (bits64) aSig )<<32 ) - term;
  1470 + while ( (sbits64) rem < 0 ) {
  1471 + --zSig;
  1472 + rem += ( ( (bits64) zSig )<<1 ) | 1;
  1473 + }
  1474 + zSig |= ( rem != 0 );
  1475 + }
  1476 + }
  1477 + shift32RightJamming( zSig, 1, &zSig );
  1478 + return roundAndPackFloat32( 0, zExp, zSig );
  1479 +
  1480 +}
  1481 +
  1482 +/*
  1483 +-------------------------------------------------------------------------------
  1484 +Returns 1 if the single-precision floating-point value `a' is equal to the
  1485 +corresponding value `b', and 0 otherwise. The comparison is performed
  1486 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  1487 +-------------------------------------------------------------------------------
  1488 +*/
  1489 +flag float32_eq( float32 a, float32 b )
  1490 +{
  1491 +
  1492 + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  1493 + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  1494 + ) {
  1495 + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
  1496 + float_raise( float_flag_invalid );
  1497 + }
  1498 + return 0;
  1499 + }
  1500 + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
  1501 +
  1502 +}
  1503 +
  1504 +/*
  1505 +-------------------------------------------------------------------------------
  1506 +Returns 1 if the single-precision floating-point value `a' is less than or
  1507 +equal to the corresponding value `b', and 0 otherwise. The comparison is
  1508 +performed according to the IEC/IEEE Standard for Binary Floating-point
  1509 +Arithmetic.
  1510 +-------------------------------------------------------------------------------
  1511 +*/
  1512 +flag float32_le( float32 a, float32 b )
  1513 +{
  1514 + flag aSign, bSign;
  1515 +
  1516 + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  1517 + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  1518 + ) {
  1519 + float_raise( float_flag_invalid );
  1520 + return 0;
  1521 + }
  1522 + aSign = extractFloat32Sign( a );
  1523 + bSign = extractFloat32Sign( b );
  1524 + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
  1525 + return ( a == b ) || ( aSign ^ ( a < b ) );
  1526 +
  1527 +}
  1528 +
  1529 +/*
  1530 +-------------------------------------------------------------------------------
  1531 +Returns 1 if the single-precision floating-point value `a' is less than
  1532 +the corresponding value `b', and 0 otherwise. The comparison is performed
  1533 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  1534 +-------------------------------------------------------------------------------
  1535 +*/
  1536 +flag float32_lt( float32 a, float32 b )
  1537 +{
  1538 + flag aSign, bSign;
  1539 +
  1540 + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  1541 + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  1542 + ) {
  1543 + float_raise( float_flag_invalid );
  1544 + return 0;
  1545 + }
  1546 + aSign = extractFloat32Sign( a );
  1547 + bSign = extractFloat32Sign( b );
  1548 + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
  1549 + return ( a != b ) && ( aSign ^ ( a < b ) );
  1550 +
  1551 +}
  1552 +
  1553 +/*
  1554 +-------------------------------------------------------------------------------
  1555 +Returns 1 if the single-precision floating-point value `a' is equal to the
  1556 +corresponding value `b', and 0 otherwise. The invalid exception is raised
  1557 +if either operand is a NaN. Otherwise, the comparison is performed
  1558 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  1559 +-------------------------------------------------------------------------------
  1560 +*/
  1561 +flag float32_eq_signaling( float32 a, float32 b )
  1562 +{
  1563 +
  1564 + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  1565 + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  1566 + ) {
  1567 + float_raise( float_flag_invalid );
  1568 + return 0;
  1569 + }
  1570 + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
  1571 +
  1572 +}
  1573 +
  1574 +/*
  1575 +-------------------------------------------------------------------------------
  1576 +Returns 1 if the single-precision floating-point value `a' is less than or
  1577 +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
  1578 +cause an exception. Otherwise, the comparison is performed according to the
  1579 +IEC/IEEE Standard for Binary Floating-point Arithmetic.
  1580 +-------------------------------------------------------------------------------
  1581 +*/
  1582 +flag float32_le_quiet( float32 a, float32 b )
  1583 +{
  1584 + flag aSign, bSign;
  1585 + //int16 aExp, bExp;
  1586 +
  1587 + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  1588 + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  1589 + ) {
  1590 + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
  1591 + float_raise( float_flag_invalid );
  1592 + }
  1593 + return 0;
  1594 + }
  1595 + aSign = extractFloat32Sign( a );
  1596 + bSign = extractFloat32Sign( b );
  1597 + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
  1598 + return ( a == b ) || ( aSign ^ ( a < b ) );
  1599 +
  1600 +}
  1601 +
  1602 +/*
  1603 +-------------------------------------------------------------------------------
  1604 +Returns 1 if the single-precision floating-point value `a' is less than
  1605 +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
  1606 +exception. Otherwise, the comparison is performed according to the IEC/IEEE
  1607 +Standard for Binary Floating-point Arithmetic.
  1608 +-------------------------------------------------------------------------------
  1609 +*/
  1610 +flag float32_lt_quiet( float32 a, float32 b )
  1611 +{
  1612 + flag aSign, bSign;
  1613 +
  1614 + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
  1615 + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
  1616 + ) {
  1617 + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
  1618 + float_raise( float_flag_invalid );
  1619 + }
  1620 + return 0;
  1621 + }
  1622 + aSign = extractFloat32Sign( a );
  1623 + bSign = extractFloat32Sign( b );
  1624 + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
  1625 + return ( a != b ) && ( aSign ^ ( a < b ) );
  1626 +
  1627 +}
  1628 +
  1629 +/*
  1630 +-------------------------------------------------------------------------------
  1631 +Returns the result of converting the double-precision floating-point value
  1632 +`a' to the 32-bit two's complement integer format. The conversion is
  1633 +performed according to the IEC/IEEE Standard for Binary Floating-point
  1634 +Arithmetic---which means in particular that the conversion is rounded
  1635 +according to the current rounding mode. If `a' is a NaN, the largest
  1636 +positive integer is returned. Otherwise, if the conversion overflows, the
  1637 +largest integer with the same sign as `a' is returned.
  1638 +-------------------------------------------------------------------------------
  1639 +*/
  1640 +int32 float64_to_int32( float64 a )
  1641 +{
  1642 + flag aSign;
  1643 + int16 aExp, shiftCount;
  1644 + bits64 aSig;
  1645 +
  1646 + aSig = extractFloat64Frac( a );
  1647 + aExp = extractFloat64Exp( a );
  1648 + aSign = extractFloat64Sign( a );
  1649 + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
  1650 + if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
  1651 + shiftCount = 0x42C - aExp;
  1652 + if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
  1653 + return roundAndPackInt32( aSign, aSig );
  1654 +
  1655 +}
  1656 +
  1657 +/*
  1658 +-------------------------------------------------------------------------------
  1659 +Returns the result of converting the double-precision floating-point value
  1660 +`a' to the 32-bit two's complement integer format. The conversion is
  1661 +performed according to the IEC/IEEE Standard for Binary Floating-point
  1662 +Arithmetic, except that the conversion is always rounded toward zero. If
  1663 +`a' is a NaN, the largest positive integer is returned. Otherwise, if the
  1664 +conversion overflows, the largest integer with the same sign as `a' is
  1665 +returned.
  1666 +-------------------------------------------------------------------------------
  1667 +*/
  1668 +int32 float64_to_int32_round_to_zero( float64 a )
  1669 +{
  1670 + flag aSign;
  1671 + int16 aExp, shiftCount;
  1672 + bits64 aSig, savedASig;
  1673 + int32 z;
  1674 +
  1675 + aSig = extractFloat64Frac( a );
  1676 + aExp = extractFloat64Exp( a );
  1677 + aSign = extractFloat64Sign( a );
  1678 + shiftCount = 0x433 - aExp;
  1679 + if ( shiftCount < 21 ) {
  1680 + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
  1681 + goto invalid;
  1682 + }
  1683 + else if ( 52 < shiftCount ) {
  1684 + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
  1685 + return 0;
  1686 + }
  1687 + aSig |= LIT64( 0x0010000000000000 );
  1688 + savedASig = aSig;
  1689 + aSig >>= shiftCount;
  1690 + z = aSig;
  1691 + if ( aSign ) z = - z;
  1692 + if ( ( z < 0 ) ^ aSign ) {
  1693 + invalid:
  1694 + float_exception_flags |= float_flag_invalid;
  1695 + return aSign ? 0x80000000 : 0x7FFFFFFF;
  1696 + }
  1697 + if ( ( aSig<<shiftCount ) != savedASig ) {
  1698 + float_exception_flags |= float_flag_inexact;
  1699 + }
  1700 + return z;
  1701 +
  1702 +}
  1703 +
  1704 +/*
  1705 +-------------------------------------------------------------------------------
  1706 +Returns the result of converting the double-precision floating-point value
  1707 +`a' to the 32-bit two's complement unsigned integer format. The conversion
  1708 +is performed according to the IEC/IEEE Standard for Binary Floating-point
  1709 +Arithmetic---which means in particular that the conversion is rounded
  1710 +according to the current rounding mode. If `a' is a NaN, the largest
  1711 +positive integer is returned. Otherwise, if the conversion overflows, the
  1712 +largest positive integer is returned.
  1713 +-------------------------------------------------------------------------------
  1714 +*/
  1715 +int32 float64_to_uint32( float64 a )
  1716 +{
  1717 + flag aSign;
  1718 + int16 aExp, shiftCount;
  1719 + bits64 aSig;
  1720 +
  1721 + aSig = extractFloat64Frac( a );
  1722 + aExp = extractFloat64Exp( a );
  1723 + aSign = 0; //extractFloat64Sign( a );
  1724 + //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
  1725 + if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
  1726 + shiftCount = 0x42C - aExp;
  1727 + if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
  1728 + return roundAndPackInt32( aSign, aSig );
  1729 +}
  1730 +
  1731 +/*
  1732 +-------------------------------------------------------------------------------
  1733 +Returns the result of converting the double-precision floating-point value
  1734 +`a' to the 32-bit two's complement integer format. The conversion is
  1735 +performed according to the IEC/IEEE Standard for Binary Floating-point
  1736 +Arithmetic, except that the conversion is always rounded toward zero. If
  1737 +`a' is a NaN, the largest positive integer is returned. Otherwise, if the
  1738 +conversion overflows, the largest positive integer is returned.
  1739 +-------------------------------------------------------------------------------
  1740 +*/
  1741 +int32 float64_to_uint32_round_to_zero( float64 a )
  1742 +{
  1743 + flag aSign;
  1744 + int16 aExp, shiftCount;
  1745 + bits64 aSig, savedASig;
  1746 + int32 z;
  1747 +
  1748 + aSig = extractFloat64Frac( a );
  1749 + aExp = extractFloat64Exp( a );
  1750 + aSign = extractFloat64Sign( a );
  1751 + shiftCount = 0x433 - aExp;
  1752 + if ( shiftCount < 21 ) {
  1753 + if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
  1754 + goto invalid;
  1755 + }
  1756 + else if ( 52 < shiftCount ) {
  1757 + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
  1758 + return 0;
  1759 + }
  1760 + aSig |= LIT64( 0x0010000000000000 );
  1761 + savedASig = aSig;
  1762 + aSig >>= shiftCount;
  1763 + z = aSig;
  1764 + if ( aSign ) z = - z;
  1765 + if ( ( z < 0 ) ^ aSign ) {
  1766 + invalid:
  1767 + float_exception_flags |= float_flag_invalid;
  1768 + return aSign ? 0x80000000 : 0x7FFFFFFF;
  1769 + }
  1770 + if ( ( aSig<<shiftCount ) != savedASig ) {
  1771 + float_exception_flags |= float_flag_inexact;
  1772 + }
  1773 + return z;
  1774 +}
  1775 +
  1776 +/*
  1777 +-------------------------------------------------------------------------------
  1778 +Returns the result of converting the double-precision floating-point value
  1779 +`a' to the single-precision floating-point format. The conversion is
  1780 +performed according to the IEC/IEEE Standard for Binary Floating-point
  1781 +Arithmetic.
  1782 +-------------------------------------------------------------------------------
  1783 +*/
  1784 +float32 float64_to_float32( float64 a )
  1785 +{
  1786 + flag aSign;
  1787 + int16 aExp;
  1788 + bits64 aSig;
  1789 + bits32 zSig;
  1790 +
  1791 + aSig = extractFloat64Frac( a );
  1792 + aExp = extractFloat64Exp( a );
  1793 + aSign = extractFloat64Sign( a );
  1794 + if ( aExp == 0x7FF ) {
  1795 + if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
  1796 + return packFloat32( aSign, 0xFF, 0 );
  1797 + }
  1798 + shift64RightJamming( aSig, 22, &aSig );
  1799 + zSig = aSig;
  1800 + if ( aExp || zSig ) {
  1801 + zSig |= 0x40000000;
  1802 + aExp -= 0x381;
  1803 + }
  1804 + return roundAndPackFloat32( aSign, aExp, zSig );
  1805 +
  1806 +}
  1807 +
  1808 +#ifdef FLOATX80
  1809 +
  1810 +/*
  1811 +-------------------------------------------------------------------------------
  1812 +Returns the result of converting the double-precision floating-point value
  1813 +`a' to the extended double-precision floating-point format. The conversion
  1814 +is performed according to the IEC/IEEE Standard for Binary Floating-point
  1815 +Arithmetic.
  1816 +-------------------------------------------------------------------------------
  1817 +*/
  1818 +floatx80 float64_to_floatx80( float64 a )
  1819 +{
  1820 + flag aSign;
  1821 + int16 aExp;
  1822 + bits64 aSig;
  1823 +
  1824 + aSig = extractFloat64Frac( a );
  1825 + aExp = extractFloat64Exp( a );
  1826 + aSign = extractFloat64Sign( a );
  1827 + if ( aExp == 0x7FF ) {
  1828 + if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
  1829 + return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  1830 + }
  1831 + if ( aExp == 0 ) {
  1832 + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
  1833 + normalizeFloat64Subnormal( aSig, &aExp, &aSig );
  1834 + }
  1835 + return
  1836 + packFloatx80(
  1837 + aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
  1838 +
  1839 +}
  1840 +
  1841 +#endif
  1842 +
  1843 +/*
  1844 +-------------------------------------------------------------------------------
  1845 +Rounds the double-precision floating-point value `a' to an integer, and
  1846 +returns the result as a double-precision floating-point value. The
  1847 +operation is performed according to the IEC/IEEE Standard for Binary
  1848 +Floating-point Arithmetic.
  1849 +-------------------------------------------------------------------------------
  1850 +*/
  1851 +float64 float64_round_to_int( float64 a )
  1852 +{
  1853 + flag aSign;
  1854 + int16 aExp;
  1855 + bits64 lastBitMask, roundBitsMask;
  1856 + int8 roundingMode;
  1857 + float64 z;
  1858 +
  1859 + aExp = extractFloat64Exp( a );
  1860 + if ( 0x433 <= aExp ) {
  1861 + if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
  1862 + return propagateFloat64NaN( a, a );
  1863 + }
  1864 + return a;
  1865 + }
  1866 + if ( aExp <= 0x3FE ) {
  1867 + if ( (bits64) ( a<<1 ) == 0 ) return a;
  1868 + float_exception_flags |= float_flag_inexact;
  1869 + aSign = extractFloat64Sign( a );
  1870 + switch ( float_rounding_mode ) {
  1871 + case float_round_nearest_even:
  1872 + if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
  1873 + return packFloat64( aSign, 0x3FF, 0 );
  1874 + }
  1875 + break;
  1876 + case float_round_down:
  1877 + return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
  1878 + case float_round_up:
  1879 + return
  1880 + aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
  1881 + }
  1882 + return packFloat64( aSign, 0, 0 );
  1883 + }
  1884 + lastBitMask = 1;
  1885 + lastBitMask <<= 0x433 - aExp;
  1886 + roundBitsMask = lastBitMask - 1;
  1887 + z = a;
  1888 + roundingMode = float_rounding_mode;
  1889 + if ( roundingMode == float_round_nearest_even ) {
  1890 + z += lastBitMask>>1;
  1891 + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
  1892 + }
  1893 + else if ( roundingMode != float_round_to_zero ) {
  1894 + if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
  1895 + z += roundBitsMask;
  1896 + }
  1897 + }
  1898 + z &= ~ roundBitsMask;
  1899 + if ( z != a ) float_exception_flags |= float_flag_inexact;
  1900 + return z;
  1901 +
  1902 +}
  1903 +
  1904 +/*
  1905 +-------------------------------------------------------------------------------
  1906 +Returns the result of adding the absolute values of the double-precision
  1907 +floating-point values `a' and `b'. If `zSign' is true, the sum is negated
  1908 +before being returned. `zSign' is ignored if the result is a NaN. The
  1909 +addition is performed according to the IEC/IEEE Standard for Binary
  1910 +Floating-point Arithmetic.
  1911 +-------------------------------------------------------------------------------
  1912 +*/
  1913 +static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
  1914 +{
  1915 + int16 aExp, bExp, zExp;
  1916 + bits64 aSig, bSig, zSig;
  1917 + int16 expDiff;
  1918 +
  1919 + aSig = extractFloat64Frac( a );
  1920 + aExp = extractFloat64Exp( a );
  1921 + bSig = extractFloat64Frac( b );
  1922 + bExp = extractFloat64Exp( b );
  1923 + expDiff = aExp - bExp;
  1924 + aSig <<= 9;
  1925 + bSig <<= 9;
  1926 + if ( 0 < expDiff ) {
  1927 + if ( aExp == 0x7FF ) {
  1928 + if ( aSig ) return propagateFloat64NaN( a, b );
  1929 + return a;
  1930 + }
  1931 + if ( bExp == 0 ) {
  1932 + --expDiff;
  1933 + }
  1934 + else {
  1935 + bSig |= LIT64( 0x2000000000000000 );
  1936 + }
  1937 + shift64RightJamming( bSig, expDiff, &bSig );
  1938 + zExp = aExp;
  1939 + }
  1940 + else if ( expDiff < 0 ) {
  1941 + if ( bExp == 0x7FF ) {
  1942 + if ( bSig ) return propagateFloat64NaN( a, b );
  1943 + return packFloat64( zSign, 0x7FF, 0 );
  1944 + }
  1945 + if ( aExp == 0 ) {
  1946 + ++expDiff;
  1947 + }
  1948 + else {
  1949 + aSig |= LIT64( 0x2000000000000000 );
  1950 + }
  1951 + shift64RightJamming( aSig, - expDiff, &aSig );
  1952 + zExp = bExp;
  1953 + }
  1954 + else {
  1955 + if ( aExp == 0x7FF ) {
  1956 + if ( aSig | bSig ) return propagateFloat64NaN( a, b );
  1957 + return a;
  1958 + }
  1959 + if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
  1960 + zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
  1961 + zExp = aExp;
  1962 + goto roundAndPack;
  1963 + }
  1964 + aSig |= LIT64( 0x2000000000000000 );
  1965 + zSig = ( aSig + bSig )<<1;
  1966 + --zExp;
  1967 + if ( (sbits64) zSig < 0 ) {
  1968 + zSig = aSig + bSig;
  1969 + ++zExp;
  1970 + }
  1971 + roundAndPack:
  1972 + return roundAndPackFloat64( zSign, zExp, zSig );
  1973 +
  1974 +}
  1975 +
  1976 +/*
  1977 +-------------------------------------------------------------------------------
  1978 +Returns the result of subtracting the absolute values of the double-
  1979 +precision floating-point values `a' and `b'. If `zSign' is true, the
  1980 +difference is negated before being returned. `zSign' is ignored if the
  1981 +result is a NaN. The subtraction is performed according to the IEC/IEEE
  1982 +Standard for Binary Floating-point Arithmetic.
  1983 +-------------------------------------------------------------------------------
  1984 +*/
  1985 +static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
  1986 +{
  1987 + int16 aExp, bExp, zExp;
  1988 + bits64 aSig, bSig, zSig;
  1989 + int16 expDiff;
  1990 +
  1991 + aSig = extractFloat64Frac( a );
  1992 + aExp = extractFloat64Exp( a );
  1993 + bSig = extractFloat64Frac( b );
  1994 + bExp = extractFloat64Exp( b );
  1995 + expDiff = aExp - bExp;
  1996 + aSig <<= 10;
  1997 + bSig <<= 10;
  1998 + if ( 0 < expDiff ) goto aExpBigger;
  1999 + if ( expDiff < 0 ) goto bExpBigger;
  2000 + if ( aExp == 0x7FF ) {
  2001 + if ( aSig | bSig ) return propagateFloat64NaN( a, b );
  2002 + float_raise( float_flag_invalid );
  2003 + return float64_default_nan;
  2004 + }
  2005 + if ( aExp == 0 ) {
  2006 + aExp = 1;
  2007 + bExp = 1;
  2008 + }
  2009 + if ( bSig < aSig ) goto aBigger;
  2010 + if ( aSig < bSig ) goto bBigger;
  2011 + return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
  2012 + bExpBigger:
  2013 + if ( bExp == 0x7FF ) {
  2014 + if ( bSig ) return propagateFloat64NaN( a, b );
  2015 + return packFloat64( zSign ^ 1, 0x7FF, 0 );
  2016 + }
  2017 + if ( aExp == 0 ) {
  2018 + ++expDiff;
  2019 + }
  2020 + else {
  2021 + aSig |= LIT64( 0x4000000000000000 );
  2022 + }
  2023 + shift64RightJamming( aSig, - expDiff, &aSig );
  2024 + bSig |= LIT64( 0x4000000000000000 );
  2025 + bBigger:
  2026 + zSig = bSig - aSig;
  2027 + zExp = bExp;
  2028 + zSign ^= 1;
  2029 + goto normalizeRoundAndPack;
  2030 + aExpBigger:
  2031 + if ( aExp == 0x7FF ) {
  2032 + if ( aSig ) return propagateFloat64NaN( a, b );
  2033 + return a;
  2034 + }
  2035 + if ( bExp == 0 ) {
  2036 + --expDiff;
  2037 + }
  2038 + else {
  2039 + bSig |= LIT64( 0x4000000000000000 );
  2040 + }
  2041 + shift64RightJamming( bSig, expDiff, &bSig );
  2042 + aSig |= LIT64( 0x4000000000000000 );
  2043 + aBigger:
  2044 + zSig = aSig - bSig;
  2045 + zExp = aExp;
  2046 + normalizeRoundAndPack:
  2047 + --zExp;
  2048 + return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
  2049 +
  2050 +}
  2051 +
  2052 +/*
  2053 +-------------------------------------------------------------------------------
  2054 +Returns the result of adding the double-precision floating-point values `a'
  2055 +and `b'. The operation is performed according to the IEC/IEEE Standard for
  2056 +Binary Floating-point Arithmetic.
  2057 +-------------------------------------------------------------------------------
  2058 +*/
  2059 +float64 float64_add( float64 a, float64 b )
  2060 +{
  2061 + flag aSign, bSign;
  2062 +
  2063 + aSign = extractFloat64Sign( a );
  2064 + bSign = extractFloat64Sign( b );
  2065 + if ( aSign == bSign ) {
  2066 + return addFloat64Sigs( a, b, aSign );
  2067 + }
  2068 + else {
  2069 + return subFloat64Sigs( a, b, aSign );
  2070 + }
  2071 +
  2072 +}
  2073 +
  2074 +/*
  2075 +-------------------------------------------------------------------------------
  2076 +Returns the result of subtracting the double-precision floating-point values
  2077 +`a' and `b'. The operation is performed according to the IEC/IEEE Standard
  2078 +for Binary Floating-point Arithmetic.
  2079 +-------------------------------------------------------------------------------
  2080 +*/
  2081 +float64 float64_sub( float64 a, float64 b )
  2082 +{
  2083 + flag aSign, bSign;
  2084 +
  2085 + aSign = extractFloat64Sign( a );
  2086 + bSign = extractFloat64Sign( b );
  2087 + if ( aSign == bSign ) {
  2088 + return subFloat64Sigs( a, b, aSign );
  2089 + }
  2090 + else {
  2091 + return addFloat64Sigs( a, b, aSign );
  2092 + }
  2093 +
  2094 +}
  2095 +
  2096 +/*
  2097 +-------------------------------------------------------------------------------
  2098 +Returns the result of multiplying the double-precision floating-point values
  2099 +`a' and `b'. The operation is performed according to the IEC/IEEE Standard
  2100 +for Binary Floating-point Arithmetic.
  2101 +-------------------------------------------------------------------------------
  2102 +*/
  2103 +float64 float64_mul( float64 a, float64 b )
  2104 +{
  2105 + flag aSign, bSign, zSign;
  2106 + int16 aExp, bExp, zExp;
  2107 + bits64 aSig, bSig, zSig0, zSig1;
  2108 +
  2109 + aSig = extractFloat64Frac( a );
  2110 + aExp = extractFloat64Exp( a );
  2111 + aSign = extractFloat64Sign( a );
  2112 + bSig = extractFloat64Frac( b );
  2113 + bExp = extractFloat64Exp( b );
  2114 + bSign = extractFloat64Sign( b );
  2115 + zSign = aSign ^ bSign;
  2116 + if ( aExp == 0x7FF ) {
  2117 + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
  2118 + return propagateFloat64NaN( a, b );
  2119 + }
  2120 + if ( ( bExp | bSig ) == 0 ) {
  2121 + float_raise( float_flag_invalid );
  2122 + return float64_default_nan;
  2123 + }
  2124 + return packFloat64( zSign, 0x7FF, 0 );
  2125 + }
  2126 + if ( bExp == 0x7FF ) {
  2127 + if ( bSig ) return propagateFloat64NaN( a, b );
  2128 + if ( ( aExp | aSig ) == 0 ) {
  2129 + float_raise( float_flag_invalid );
  2130 + return float64_default_nan;
  2131 + }
  2132 + return packFloat64( zSign, 0x7FF, 0 );
  2133 + }
  2134 + if ( aExp == 0 ) {
  2135 + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
  2136 + normalizeFloat64Subnormal( aSig, &aExp, &aSig );
  2137 + }
  2138 + if ( bExp == 0 ) {
  2139 + if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
  2140 + normalizeFloat64Subnormal( bSig, &bExp, &bSig );
  2141 + }
  2142 + zExp = aExp + bExp - 0x3FF;
  2143 + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
  2144 + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
  2145 + mul64To128( aSig, bSig, &zSig0, &zSig1 );
  2146 + zSig0 |= ( zSig1 != 0 );
  2147 + if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
  2148 + zSig0 <<= 1;
  2149 + --zExp;
  2150 + }
  2151 + return roundAndPackFloat64( zSign, zExp, zSig0 );
  2152 +
  2153 +}
  2154 +
  2155 +/*
  2156 +-------------------------------------------------------------------------------
  2157 +Returns the result of dividing the double-precision floating-point value `a'
  2158 +by the corresponding value `b'. The operation is performed according to
  2159 +the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  2160 +-------------------------------------------------------------------------------
  2161 +*/
  2162 +float64 float64_div( float64 a, float64 b )
  2163 +{
  2164 + flag aSign, bSign, zSign;
  2165 + int16 aExp, bExp, zExp;
  2166 + bits64 aSig, bSig, zSig;
  2167 + bits64 rem0, rem1;
  2168 + bits64 term0, term1;
  2169 +
  2170 + aSig = extractFloat64Frac( a );
  2171 + aExp = extractFloat64Exp( a );
  2172 + aSign = extractFloat64Sign( a );
  2173 + bSig = extractFloat64Frac( b );
  2174 + bExp = extractFloat64Exp( b );
  2175 + bSign = extractFloat64Sign( b );
  2176 + zSign = aSign ^ bSign;
  2177 + if ( aExp == 0x7FF ) {
  2178 + if ( aSig ) return propagateFloat64NaN( a, b );
  2179 + if ( bExp == 0x7FF ) {
  2180 + if ( bSig ) return propagateFloat64NaN( a, b );
  2181 + float_raise( float_flag_invalid );
  2182 + return float64_default_nan;
  2183 + }
  2184 + return packFloat64( zSign, 0x7FF, 0 );
  2185 + }
  2186 + if ( bExp == 0x7FF ) {
  2187 + if ( bSig ) return propagateFloat64NaN( a, b );
  2188 + return packFloat64( zSign, 0, 0 );
  2189 + }
  2190 + if ( bExp == 0 ) {
  2191 + if ( bSig == 0 ) {
  2192 + if ( ( aExp | aSig ) == 0 ) {
  2193 + float_raise( float_flag_invalid );
  2194 + return float64_default_nan;
  2195 + }
  2196 + float_raise( float_flag_divbyzero );
  2197 + return packFloat64( zSign, 0x7FF, 0 );
  2198 + }
  2199 + normalizeFloat64Subnormal( bSig, &bExp, &bSig );
  2200 + }
  2201 + if ( aExp == 0 ) {
  2202 + if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
  2203 + normalizeFloat64Subnormal( aSig, &aExp, &aSig );
  2204 + }
  2205 + zExp = aExp - bExp + 0x3FD;
  2206 + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
  2207 + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
  2208 + if ( bSig <= ( aSig + aSig ) ) {
  2209 + aSig >>= 1;
  2210 + ++zExp;
  2211 + }
  2212 + zSig = estimateDiv128To64( aSig, 0, bSig );
  2213 + if ( ( zSig & 0x1FF ) <= 2 ) {
  2214 + mul64To128( bSig, zSig, &term0, &term1 );
  2215 + sub128( aSig, 0, term0, term1, &rem0, &rem1 );
  2216 + while ( (sbits64) rem0 < 0 ) {
  2217 + --zSig;
  2218 + add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
  2219 + }
  2220 + zSig |= ( rem1 != 0 );
  2221 + }
  2222 + return roundAndPackFloat64( zSign, zExp, zSig );
  2223 +
  2224 +}
  2225 +
  2226 +/*
  2227 +-------------------------------------------------------------------------------
  2228 +Returns the remainder of the double-precision floating-point value `a'
  2229 +with respect to the corresponding value `b'. The operation is performed
  2230 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  2231 +-------------------------------------------------------------------------------
  2232 +*/
  2233 +float64 float64_rem( float64 a, float64 b )
  2234 +{
  2235 + flag aSign, bSign, zSign;
  2236 + int16 aExp, bExp, expDiff;
  2237 + bits64 aSig, bSig;
  2238 + bits64 q, alternateASig;
  2239 + sbits64 sigMean;
  2240 +
  2241 + aSig = extractFloat64Frac( a );
  2242 + aExp = extractFloat64Exp( a );
  2243 + aSign = extractFloat64Sign( a );
  2244 + bSig = extractFloat64Frac( b );
  2245 + bExp = extractFloat64Exp( b );
  2246 + bSign = extractFloat64Sign( b );
  2247 + if ( aExp == 0x7FF ) {
  2248 + if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
  2249 + return propagateFloat64NaN( a, b );
  2250 + }
  2251 + float_raise( float_flag_invalid );
  2252 + return float64_default_nan;
  2253 + }
  2254 + if ( bExp == 0x7FF ) {
  2255 + if ( bSig ) return propagateFloat64NaN( a, b );
  2256 + return a;
  2257 + }
  2258 + if ( bExp == 0 ) {
  2259 + if ( bSig == 0 ) {
  2260 + float_raise( float_flag_invalid );
  2261 + return float64_default_nan;
  2262 + }
  2263 + normalizeFloat64Subnormal( bSig, &bExp, &bSig );
  2264 + }
  2265 + if ( aExp == 0 ) {
  2266 + if ( aSig == 0 ) return a;
  2267 + normalizeFloat64Subnormal( aSig, &aExp, &aSig );
  2268 + }
  2269 + expDiff = aExp - bExp;
  2270 + aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
  2271 + bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
  2272 + if ( expDiff < 0 ) {
  2273 + if ( expDiff < -1 ) return a;
  2274 + aSig >>= 1;
  2275 + }
  2276 + q = ( bSig <= aSig );
  2277 + if ( q ) aSig -= bSig;
  2278 + expDiff -= 64;
  2279 + while ( 0 < expDiff ) {
  2280 + q = estimateDiv128To64( aSig, 0, bSig );
  2281 + q = ( 2 < q ) ? q - 2 : 0;
  2282 + aSig = - ( ( bSig>>2 ) * q );
  2283 + expDiff -= 62;
  2284 + }
  2285 + expDiff += 64;
  2286 + if ( 0 < expDiff ) {
  2287 + q = estimateDiv128To64( aSig, 0, bSig );
  2288 + q = ( 2 < q ) ? q - 2 : 0;
  2289 + q >>= 64 - expDiff;
  2290 + bSig >>= 2;
  2291 + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
  2292 + }
  2293 + else {
  2294 + aSig >>= 2;
  2295 + bSig >>= 2;
  2296 + }
  2297 + do {
  2298 + alternateASig = aSig;
  2299 + ++q;
  2300 + aSig -= bSig;
  2301 + } while ( 0 <= (sbits64) aSig );
  2302 + sigMean = aSig + alternateASig;
  2303 + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
  2304 + aSig = alternateASig;
  2305 + }
  2306 + zSign = ( (sbits64) aSig < 0 );
  2307 + if ( zSign ) aSig = - aSig;
  2308 + return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
  2309 +
  2310 +}
  2311 +
  2312 +/*
  2313 +-------------------------------------------------------------------------------
  2314 +Returns the square root of the double-precision floating-point value `a'.
  2315 +The operation is performed according to the IEC/IEEE Standard for Binary
  2316 +Floating-point Arithmetic.
  2317 +-------------------------------------------------------------------------------
  2318 +*/
  2319 +float64 float64_sqrt( float64 a )
  2320 +{
  2321 + flag aSign;
  2322 + int16 aExp, zExp;
  2323 + bits64 aSig, zSig;
  2324 + bits64 rem0, rem1, term0, term1; //, shiftedRem;
  2325 + //float64 z;
  2326 +
  2327 + aSig = extractFloat64Frac( a );
  2328 + aExp = extractFloat64Exp( a );
  2329 + aSign = extractFloat64Sign( a );
  2330 + if ( aExp == 0x7FF ) {
  2331 + if ( aSig ) return propagateFloat64NaN( a, a );
  2332 + if ( ! aSign ) return a;
  2333 + float_raise( float_flag_invalid );
  2334 + return float64_default_nan;
  2335 + }
  2336 + if ( aSign ) {
  2337 + if ( ( aExp | aSig ) == 0 ) return a;
  2338 + float_raise( float_flag_invalid );
  2339 + return float64_default_nan;
  2340 + }
  2341 + if ( aExp == 0 ) {
  2342 + if ( aSig == 0 ) return 0;
  2343 + normalizeFloat64Subnormal( aSig, &aExp, &aSig );
  2344 + }
  2345 + zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
  2346 + aSig |= LIT64( 0x0010000000000000 );
  2347 + zSig = estimateSqrt32( aExp, aSig>>21 );
  2348 + zSig <<= 31;
  2349 + aSig <<= 9 - ( aExp & 1 );
  2350 + zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
  2351 + if ( ( zSig & 0x3FF ) <= 5 ) {
  2352 + if ( zSig < 2 ) {
  2353 + zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
  2354 + }
  2355 + else {
  2356 + aSig <<= 2;
  2357 + mul64To128( zSig, zSig, &term0, &term1 );
  2358 + sub128( aSig, 0, term0, term1, &rem0, &rem1 );
  2359 + while ( (sbits64) rem0 < 0 ) {
  2360 + --zSig;
  2361 + shortShift128Left( 0, zSig, 1, &term0, &term1 );
  2362 + term1 |= 1;
  2363 + add128( rem0, rem1, term0, term1, &rem0, &rem1 );
  2364 + }
  2365 + zSig |= ( ( rem0 | rem1 ) != 0 );
  2366 + }
  2367 + }
  2368 + shift64RightJamming( zSig, 1, &zSig );
  2369 + return roundAndPackFloat64( 0, zExp, zSig );
  2370 +
  2371 +}
  2372 +
  2373 +/*
  2374 +-------------------------------------------------------------------------------
  2375 +Returns 1 if the double-precision floating-point value `a' is equal to the
  2376 +corresponding value `b', and 0 otherwise. The comparison is performed
  2377 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  2378 +-------------------------------------------------------------------------------
  2379 +*/
  2380 +flag float64_eq( float64 a, float64 b )
  2381 +{
  2382 +
  2383 + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  2384 + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  2385 + ) {
  2386 + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
  2387 + float_raise( float_flag_invalid );
  2388 + }
  2389 + return 0;
  2390 + }
  2391 + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
  2392 +
  2393 +}
  2394 +
  2395 +/*
  2396 +-------------------------------------------------------------------------------
  2397 +Returns 1 if the double-precision floating-point value `a' is less than or
  2398 +equal to the corresponding value `b', and 0 otherwise. The comparison is
  2399 +performed according to the IEC/IEEE Standard for Binary Floating-point
  2400 +Arithmetic.
  2401 +-------------------------------------------------------------------------------
  2402 +*/
  2403 +flag float64_le( float64 a, float64 b )
  2404 +{
  2405 + flag aSign, bSign;
  2406 +
  2407 + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  2408 + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  2409 + ) {
  2410 + float_raise( float_flag_invalid );
  2411 + return 0;
  2412 + }
  2413 + aSign = extractFloat64Sign( a );
  2414 + bSign = extractFloat64Sign( b );
  2415 + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
  2416 + return ( a == b ) || ( aSign ^ ( a < b ) );
  2417 +
  2418 +}
  2419 +
  2420 +/*
  2421 +-------------------------------------------------------------------------------
  2422 +Returns 1 if the double-precision floating-point value `a' is less than
  2423 +the corresponding value `b', and 0 otherwise. The comparison is performed
  2424 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  2425 +-------------------------------------------------------------------------------
  2426 +*/
  2427 +flag float64_lt( float64 a, float64 b )
  2428 +{
  2429 + flag aSign, bSign;
  2430 +
  2431 + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  2432 + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  2433 + ) {
  2434 + float_raise( float_flag_invalid );
  2435 + return 0;
  2436 + }
  2437 + aSign = extractFloat64Sign( a );
  2438 + bSign = extractFloat64Sign( b );
  2439 + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
  2440 + return ( a != b ) && ( aSign ^ ( a < b ) );
  2441 +
  2442 +}
  2443 +
  2444 +/*
  2445 +-------------------------------------------------------------------------------
  2446 +Returns 1 if the double-precision floating-point value `a' is equal to the
  2447 +corresponding value `b', and 0 otherwise. The invalid exception is raised
  2448 +if either operand is a NaN. Otherwise, the comparison is performed
  2449 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  2450 +-------------------------------------------------------------------------------
  2451 +*/
  2452 +flag float64_eq_signaling( float64 a, float64 b )
  2453 +{
  2454 +
  2455 + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  2456 + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  2457 + ) {
  2458 + float_raise( float_flag_invalid );
  2459 + return 0;
  2460 + }
  2461 + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
  2462 +
  2463 +}
  2464 +
  2465 +/*
  2466 +-------------------------------------------------------------------------------
  2467 +Returns 1 if the double-precision floating-point value `a' is less than or
  2468 +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
  2469 +cause an exception. Otherwise, the comparison is performed according to the
  2470 +IEC/IEEE Standard for Binary Floating-point Arithmetic.
  2471 +-------------------------------------------------------------------------------
  2472 +*/
  2473 +flag float64_le_quiet( float64 a, float64 b )
  2474 +{
  2475 + flag aSign, bSign;
  2476 + //int16 aExp, bExp;
  2477 +
  2478 + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  2479 + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  2480 + ) {
  2481 + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
  2482 + float_raise( float_flag_invalid );
  2483 + }
  2484 + return 0;
  2485 + }
  2486 + aSign = extractFloat64Sign( a );
  2487 + bSign = extractFloat64Sign( b );
  2488 + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
  2489 + return ( a == b ) || ( aSign ^ ( a < b ) );
  2490 +
  2491 +}
  2492 +
  2493 +/*
  2494 +-------------------------------------------------------------------------------
  2495 +Returns 1 if the double-precision floating-point value `a' is less than
  2496 +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
  2497 +exception. Otherwise, the comparison is performed according to the IEC/IEEE
  2498 +Standard for Binary Floating-point Arithmetic.
  2499 +-------------------------------------------------------------------------------
  2500 +*/
  2501 +flag float64_lt_quiet( float64 a, float64 b )
  2502 +{
  2503 + flag aSign, bSign;
  2504 +
  2505 + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
  2506 + || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
  2507 + ) {
  2508 + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
  2509 + float_raise( float_flag_invalid );
  2510 + }
  2511 + return 0;
  2512 + }
  2513 + aSign = extractFloat64Sign( a );
  2514 + bSign = extractFloat64Sign( b );
  2515 + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
  2516 + return ( a != b ) && ( aSign ^ ( a < b ) );
  2517 +
  2518 +}
  2519 +
  2520 +#ifdef FLOATX80
  2521 +
  2522 +/*
  2523 +-------------------------------------------------------------------------------
  2524 +Returns the result of converting the extended double-precision floating-
  2525 +point value `a' to the 32-bit two's complement integer format. The
  2526 +conversion is performed according to the IEC/IEEE Standard for Binary
  2527 +Floating-point Arithmetic---which means in particular that the conversion
  2528 +is rounded according to the current rounding mode. If `a' is a NaN, the
  2529 +largest positive integer is returned. Otherwise, if the conversion
  2530 +overflows, the largest integer with the same sign as `a' is returned.
  2531 +-------------------------------------------------------------------------------
  2532 +*/
  2533 +int32 floatx80_to_int32( floatx80 a )
  2534 +{
  2535 + flag aSign;
  2536 + int32 aExp, shiftCount;
  2537 + bits64 aSig;
  2538 +
  2539 + aSig = extractFloatx80Frac( a );
  2540 + aExp = extractFloatx80Exp( a );
  2541 + aSign = extractFloatx80Sign( a );
  2542 + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
  2543 + shiftCount = 0x4037 - aExp;
  2544 + if ( shiftCount <= 0 ) shiftCount = 1;
  2545 + shift64RightJamming( aSig, shiftCount, &aSig );
  2546 + return roundAndPackInt32( aSign, aSig );
  2547 +
  2548 +}
  2549 +
  2550 +/*
  2551 +-------------------------------------------------------------------------------
  2552 +Returns the result of converting the extended double-precision floating-
  2553 +point value `a' to the 32-bit two's complement integer format. The
  2554 +conversion is performed according to the IEC/IEEE Standard for Binary
  2555 +Floating-point Arithmetic, except that the conversion is always rounded
  2556 +toward zero. If `a' is a NaN, the largest positive integer is returned.
  2557 +Otherwise, if the conversion overflows, the largest integer with the same
  2558 +sign as `a' is returned.
  2559 +-------------------------------------------------------------------------------
  2560 +*/
  2561 +int32 floatx80_to_int32_round_to_zero( floatx80 a )
  2562 +{
  2563 + flag aSign;
  2564 + int32 aExp, shiftCount;
  2565 + bits64 aSig, savedASig;
  2566 + int32 z;
  2567 +
  2568 + aSig = extractFloatx80Frac( a );
  2569 + aExp = extractFloatx80Exp( a );
  2570 + aSign = extractFloatx80Sign( a );
  2571 + shiftCount = 0x403E - aExp;
  2572 + if ( shiftCount < 32 ) {
  2573 + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
  2574 + goto invalid;
  2575 + }
  2576 + else if ( 63 < shiftCount ) {
  2577 + if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
  2578 + return 0;
  2579 + }
  2580 + savedASig = aSig;
  2581 + aSig >>= shiftCount;
  2582 + z = aSig;
  2583 + if ( aSign ) z = - z;
  2584 + if ( ( z < 0 ) ^ aSign ) {
  2585 + invalid:
  2586 + float_exception_flags |= float_flag_invalid;
  2587 + return aSign ? 0x80000000 : 0x7FFFFFFF;
  2588 + }
  2589 + if ( ( aSig<<shiftCount ) != savedASig ) {
  2590 + float_exception_flags |= float_flag_inexact;
  2591 + }
  2592 + return z;
  2593 +
  2594 +}
  2595 +
  2596 +/*
  2597 +-------------------------------------------------------------------------------
  2598 +Returns the result of converting the extended double-precision floating-
  2599 +point value `a' to the single-precision floating-point format. The
  2600 +conversion is performed according to the IEC/IEEE Standard for Binary
  2601 +Floating-point Arithmetic.
  2602 +-------------------------------------------------------------------------------
  2603 +*/
  2604 +float32 floatx80_to_float32( floatx80 a )
  2605 +{
  2606 + flag aSign;
  2607 + int32 aExp;
  2608 + bits64 aSig;
  2609 +
  2610 + aSig = extractFloatx80Frac( a );
  2611 + aExp = extractFloatx80Exp( a );
  2612 + aSign = extractFloatx80Sign( a );
  2613 + if ( aExp == 0x7FFF ) {
  2614 + if ( (bits64) ( aSig<<1 ) ) {
  2615 + return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
  2616 + }
  2617 + return packFloat32( aSign, 0xFF, 0 );
  2618 + }
  2619 + shift64RightJamming( aSig, 33, &aSig );
  2620 + if ( aExp || aSig ) aExp -= 0x3F81;
  2621 + return roundAndPackFloat32( aSign, aExp, aSig );
  2622 +
  2623 +}
  2624 +
  2625 +/*
  2626 +-------------------------------------------------------------------------------
  2627 +Returns the result of converting the extended double-precision floating-
  2628 +point value `a' to the double-precision floating-point format. The
  2629 +conversion is performed according to the IEC/IEEE Standard for Binary
  2630 +Floating-point Arithmetic.
  2631 +-------------------------------------------------------------------------------
  2632 +*/
  2633 +float64 floatx80_to_float64( floatx80 a )
  2634 +{
  2635 + flag aSign;
  2636 + int32 aExp;
  2637 + bits64 aSig, zSig;
  2638 +
  2639 + aSig = extractFloatx80Frac( a );
  2640 + aExp = extractFloatx80Exp( a );
  2641 + aSign = extractFloatx80Sign( a );
  2642 + if ( aExp == 0x7FFF ) {
  2643 + if ( (bits64) ( aSig<<1 ) ) {
  2644 + return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
  2645 + }
  2646 + return packFloat64( aSign, 0x7FF, 0 );
  2647 + }
  2648 + shift64RightJamming( aSig, 1, &zSig );
  2649 + if ( aExp || aSig ) aExp -= 0x3C01;
  2650 + return roundAndPackFloat64( aSign, aExp, zSig );
  2651 +
  2652 +}
  2653 +
  2654 +/*
  2655 +-------------------------------------------------------------------------------
  2656 +Rounds the extended double-precision floating-point value `a' to an integer,
  2657 +and returns the result as an extended quadruple-precision floating-point
  2658 +value. The operation is performed according to the IEC/IEEE Standard for
  2659 +Binary Floating-point Arithmetic.
  2660 +-------------------------------------------------------------------------------
  2661 +*/
  2662 +floatx80 floatx80_round_to_int( floatx80 a )
  2663 +{
  2664 + flag aSign;
  2665 + int32 aExp;
  2666 + bits64 lastBitMask, roundBitsMask;
  2667 + int8 roundingMode;
  2668 + floatx80 z;
  2669 +
  2670 + aExp = extractFloatx80Exp( a );
  2671 + if ( 0x403E <= aExp ) {
  2672 + if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
  2673 + return propagateFloatx80NaN( a, a );
  2674 + }
  2675 + return a;
  2676 + }
  2677 + if ( aExp <= 0x3FFE ) {
  2678 + if ( ( aExp == 0 )
  2679 + && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
  2680 + return a;
  2681 + }
  2682 + float_exception_flags |= float_flag_inexact;
  2683 + aSign = extractFloatx80Sign( a );
  2684 + switch ( float_rounding_mode ) {
  2685 + case float_round_nearest_even:
  2686 + if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
  2687 + ) {
  2688 + return
  2689 + packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
  2690 + }
  2691 + break;
  2692 + case float_round_down:
  2693 + return
  2694 + aSign ?
  2695 + packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
  2696 + : packFloatx80( 0, 0, 0 );
  2697 + case float_round_up:
  2698 + return
  2699 + aSign ? packFloatx80( 1, 0, 0 )
  2700 + : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
  2701 + }
  2702 + return packFloatx80( aSign, 0, 0 );
  2703 + }
  2704 + lastBitMask = 1;
  2705 + lastBitMask <<= 0x403E - aExp;
  2706 + roundBitsMask = lastBitMask - 1;
  2707 + z = a;
  2708 + roundingMode = float_rounding_mode;
  2709 + if ( roundingMode == float_round_nearest_even ) {
  2710 + z.low += lastBitMask>>1;
  2711 + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
  2712 + }
  2713 + else if ( roundingMode != float_round_to_zero ) {
  2714 + if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
  2715 + z.low += roundBitsMask;
  2716 + }
  2717 + }
  2718 + z.low &= ~ roundBitsMask;
  2719 + if ( z.low == 0 ) {
  2720 + ++z.high;
  2721 + z.low = LIT64( 0x8000000000000000 );
  2722 + }
  2723 + if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
  2724 + return z;
  2725 +
  2726 +}
  2727 +
  2728 +/*
  2729 +-------------------------------------------------------------------------------
  2730 +Returns the result of adding the absolute values of the extended double-
  2731 +precision floating-point values `a' and `b'. If `zSign' is true, the sum is
  2732 +negated before being returned. `zSign' is ignored if the result is a NaN.
  2733 +The addition is performed according to the IEC/IEEE Standard for Binary
  2734 +Floating-point Arithmetic.
  2735 +-------------------------------------------------------------------------------
  2736 +*/
  2737 +static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
  2738 +{
  2739 + int32 aExp, bExp, zExp;
  2740 + bits64 aSig, bSig, zSig0, zSig1;
  2741 + int32 expDiff;
  2742 +
  2743 + aSig = extractFloatx80Frac( a );
  2744 + aExp = extractFloatx80Exp( a );
  2745 + bSig = extractFloatx80Frac( b );
  2746 + bExp = extractFloatx80Exp( b );
  2747 + expDiff = aExp - bExp;
  2748 + if ( 0 < expDiff ) {
  2749 + if ( aExp == 0x7FFF ) {
  2750 + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
  2751 + return a;
  2752 + }
  2753 + if ( bExp == 0 ) --expDiff;
  2754 + shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
  2755 + zExp = aExp;
  2756 + }
  2757 + else if ( expDiff < 0 ) {
  2758 + if ( bExp == 0x7FFF ) {
  2759 + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  2760 + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  2761 + }
  2762 + if ( aExp == 0 ) ++expDiff;
  2763 + shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
  2764 + zExp = bExp;
  2765 + }
  2766 + else {
  2767 + if ( aExp == 0x7FFF ) {
  2768 + if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
  2769 + return propagateFloatx80NaN( a, b );
  2770 + }
  2771 + return a;
  2772 + }
  2773 + zSig1 = 0;
  2774 + zSig0 = aSig + bSig;
  2775 + if ( aExp == 0 ) {
  2776 + normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
  2777 + goto roundAndPack;
  2778 + }
  2779 + zExp = aExp;
  2780 + goto shiftRight1;
  2781 + }
  2782 +
  2783 + zSig0 = aSig + bSig;
  2784 +
  2785 + if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
  2786 + shiftRight1:
  2787 + shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
  2788 + zSig0 |= LIT64( 0x8000000000000000 );
  2789 + ++zExp;
  2790 + roundAndPack:
  2791 + return
  2792 + roundAndPackFloatx80(
  2793 + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  2794 +
  2795 +}
  2796 +
  2797 +/*
  2798 +-------------------------------------------------------------------------------
  2799 +Returns the result of subtracting the absolute values of the extended
  2800 +double-precision floating-point values `a' and `b'. If `zSign' is true,
  2801 +the difference is negated before being returned. `zSign' is ignored if the
  2802 +result is a NaN. The subtraction is performed according to the IEC/IEEE
  2803 +Standard for Binary Floating-point Arithmetic.
  2804 +-------------------------------------------------------------------------------
  2805 +*/
  2806 +static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
  2807 +{
  2808 + int32 aExp, bExp, zExp;
  2809 + bits64 aSig, bSig, zSig0, zSig1;
  2810 + int32 expDiff;
  2811 + floatx80 z;
  2812 +
  2813 + aSig = extractFloatx80Frac( a );
  2814 + aExp = extractFloatx80Exp( a );
  2815 + bSig = extractFloatx80Frac( b );
  2816 + bExp = extractFloatx80Exp( b );
  2817 + expDiff = aExp - bExp;
  2818 + if ( 0 < expDiff ) goto aExpBigger;
  2819 + if ( expDiff < 0 ) goto bExpBigger;
  2820 + if ( aExp == 0x7FFF ) {
  2821 + if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
  2822 + return propagateFloatx80NaN( a, b );
  2823 + }
  2824 + float_raise( float_flag_invalid );
  2825 + z.low = floatx80_default_nan_low;
  2826 + z.high = floatx80_default_nan_high;
  2827 + return z;
  2828 + }
  2829 + if ( aExp == 0 ) {
  2830 + aExp = 1;
  2831 + bExp = 1;
  2832 + }
  2833 + zSig1 = 0;
  2834 + if ( bSig < aSig ) goto aBigger;
  2835 + if ( aSig < bSig ) goto bBigger;
  2836 + return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
  2837 + bExpBigger:
  2838 + if ( bExp == 0x7FFF ) {
  2839 + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  2840 + return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
  2841 + }
  2842 + if ( aExp == 0 ) ++expDiff;
  2843 + shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
  2844 + bBigger:
  2845 + sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
  2846 + zExp = bExp;
  2847 + zSign ^= 1;
  2848 + goto normalizeRoundAndPack;
  2849 + aExpBigger:
  2850 + if ( aExp == 0x7FFF ) {
  2851 + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
  2852 + return a;
  2853 + }
  2854 + if ( bExp == 0 ) --expDiff;
  2855 + shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
  2856 + aBigger:
  2857 + sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
  2858 + zExp = aExp;
  2859 + normalizeRoundAndPack:
  2860 + return
  2861 + normalizeRoundAndPackFloatx80(
  2862 + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  2863 +
  2864 +}
  2865 +
  2866 +/*
  2867 +-------------------------------------------------------------------------------
  2868 +Returns the result of adding the extended double-precision floating-point
  2869 +values `a' and `b'. The operation is performed according to the IEC/IEEE
  2870 +Standard for Binary Floating-point Arithmetic.
  2871 +-------------------------------------------------------------------------------
  2872 +*/
  2873 +floatx80 floatx80_add( floatx80 a, floatx80 b )
  2874 +{
  2875 + flag aSign, bSign;
  2876 +
  2877 + aSign = extractFloatx80Sign( a );
  2878 + bSign = extractFloatx80Sign( b );
  2879 + if ( aSign == bSign ) {
  2880 + return addFloatx80Sigs( a, b, aSign );
  2881 + }
  2882 + else {
  2883 + return subFloatx80Sigs( a, b, aSign );
  2884 + }
  2885 +
  2886 +}
  2887 +
  2888 +/*
  2889 +-------------------------------------------------------------------------------
  2890 +Returns the result of subtracting the extended double-precision floating-
  2891 +point values `a' and `b'. The operation is performed according to the
  2892 +IEC/IEEE Standard for Binary Floating-point Arithmetic.
  2893 +-------------------------------------------------------------------------------
  2894 +*/
  2895 +floatx80 floatx80_sub( floatx80 a, floatx80 b )
  2896 +{
  2897 + flag aSign, bSign;
  2898 +
  2899 + aSign = extractFloatx80Sign( a );
  2900 + bSign = extractFloatx80Sign( b );
  2901 + if ( aSign == bSign ) {
  2902 + return subFloatx80Sigs( a, b, aSign );
  2903 + }
  2904 + else {
  2905 + return addFloatx80Sigs( a, b, aSign );
  2906 + }
  2907 +
  2908 +}
  2909 +
  2910 +/*
  2911 +-------------------------------------------------------------------------------
  2912 +Returns the result of multiplying the extended double-precision floating-
  2913 +point values `a' and `b'. The operation is performed according to the
  2914 +IEC/IEEE Standard for Binary Floating-point Arithmetic.
  2915 +-------------------------------------------------------------------------------
  2916 +*/
  2917 +floatx80 floatx80_mul( floatx80 a, floatx80 b )
  2918 +{
  2919 + flag aSign, bSign, zSign;
  2920 + int32 aExp, bExp, zExp;
  2921 + bits64 aSig, bSig, zSig0, zSig1;
  2922 + floatx80 z;
  2923 +
  2924 + aSig = extractFloatx80Frac( a );
  2925 + aExp = extractFloatx80Exp( a );
  2926 + aSign = extractFloatx80Sign( a );
  2927 + bSig = extractFloatx80Frac( b );
  2928 + bExp = extractFloatx80Exp( b );
  2929 + bSign = extractFloatx80Sign( b );
  2930 + zSign = aSign ^ bSign;
  2931 + if ( aExp == 0x7FFF ) {
  2932 + if ( (bits64) ( aSig<<1 )
  2933 + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
  2934 + return propagateFloatx80NaN( a, b );
  2935 + }
  2936 + if ( ( bExp | bSig ) == 0 ) goto invalid;
  2937 + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  2938 + }
  2939 + if ( bExp == 0x7FFF ) {
  2940 + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  2941 + if ( ( aExp | aSig ) == 0 ) {
  2942 + invalid:
  2943 + float_raise( float_flag_invalid );
  2944 + z.low = floatx80_default_nan_low;
  2945 + z.high = floatx80_default_nan_high;
  2946 + return z;
  2947 + }
  2948 + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  2949 + }
  2950 + if ( aExp == 0 ) {
  2951 + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
  2952 + normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
  2953 + }
  2954 + if ( bExp == 0 ) {
  2955 + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
  2956 + normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
  2957 + }
  2958 + zExp = aExp + bExp - 0x3FFE;
  2959 + mul64To128( aSig, bSig, &zSig0, &zSig1 );
  2960 + if ( 0 < (sbits64) zSig0 ) {
  2961 + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
  2962 + --zExp;
  2963 + }
  2964 + return
  2965 + roundAndPackFloatx80(
  2966 + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  2967 +
  2968 +}
  2969 +
  2970 +/*
  2971 +-------------------------------------------------------------------------------
  2972 +Returns the result of dividing the extended double-precision floating-point
  2973 +value `a' by the corresponding value `b'. The operation is performed
  2974 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  2975 +-------------------------------------------------------------------------------
  2976 +*/
  2977 +floatx80 floatx80_div( floatx80 a, floatx80 b )
  2978 +{
  2979 + flag aSign, bSign, zSign;
  2980 + int32 aExp, bExp, zExp;
  2981 + bits64 aSig, bSig, zSig0, zSig1;
  2982 + bits64 rem0, rem1, rem2, term0, term1, term2;
  2983 + floatx80 z;
  2984 +
  2985 + aSig = extractFloatx80Frac( a );
  2986 + aExp = extractFloatx80Exp( a );
  2987 + aSign = extractFloatx80Sign( a );
  2988 + bSig = extractFloatx80Frac( b );
  2989 + bExp = extractFloatx80Exp( b );
  2990 + bSign = extractFloatx80Sign( b );
  2991 + zSign = aSign ^ bSign;
  2992 + if ( aExp == 0x7FFF ) {
  2993 + if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
  2994 + if ( bExp == 0x7FFF ) {
  2995 + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  2996 + goto invalid;
  2997 + }
  2998 + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  2999 + }
  3000 + if ( bExp == 0x7FFF ) {
  3001 + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  3002 + return packFloatx80( zSign, 0, 0 );
  3003 + }
  3004 + if ( bExp == 0 ) {
  3005 + if ( bSig == 0 ) {
  3006 + if ( ( aExp | aSig ) == 0 ) {
  3007 + invalid:
  3008 + float_raise( float_flag_invalid );
  3009 + z.low = floatx80_default_nan_low;
  3010 + z.high = floatx80_default_nan_high;
  3011 + return z;
  3012 + }
  3013 + float_raise( float_flag_divbyzero );
  3014 + return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
  3015 + }
  3016 + normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
  3017 + }
  3018 + if ( aExp == 0 ) {
  3019 + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
  3020 + normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
  3021 + }
  3022 + zExp = aExp - bExp + 0x3FFE;
  3023 + rem1 = 0;
  3024 + if ( bSig <= aSig ) {
  3025 + shift128Right( aSig, 0, 1, &aSig, &rem1 );
  3026 + ++zExp;
  3027 + }
  3028 + zSig0 = estimateDiv128To64( aSig, rem1, bSig );
  3029 + mul64To128( bSig, zSig0, &term0, &term1 );
  3030 + sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
  3031 + while ( (sbits64) rem0 < 0 ) {
  3032 + --zSig0;
  3033 + add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
  3034 + }
  3035 + zSig1 = estimateDiv128To64( rem1, 0, bSig );
  3036 + if ( (bits64) ( zSig1<<1 ) <= 8 ) {
  3037 + mul64To128( bSig, zSig1, &term1, &term2 );
  3038 + sub128( rem1, 0, term1, term2, &rem1, &rem2 );
  3039 + while ( (sbits64) rem1 < 0 ) {
  3040 + --zSig1;
  3041 + add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
  3042 + }
  3043 + zSig1 |= ( ( rem1 | rem2 ) != 0 );
  3044 + }
  3045 + return
  3046 + roundAndPackFloatx80(
  3047 + floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
  3048 +
  3049 +}
  3050 +
  3051 +/*
  3052 +-------------------------------------------------------------------------------
  3053 +Returns the remainder of the extended double-precision floating-point value
  3054 +`a' with respect to the corresponding value `b'. The operation is performed
  3055 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  3056 +-------------------------------------------------------------------------------
  3057 +*/
  3058 +floatx80 floatx80_rem( floatx80 a, floatx80 b )
  3059 +{
  3060 + flag aSign, bSign, zSign;
  3061 + int32 aExp, bExp, expDiff;
  3062 + bits64 aSig0, aSig1, bSig;
  3063 + bits64 q, term0, term1, alternateASig0, alternateASig1;
  3064 + floatx80 z;
  3065 +
  3066 + aSig0 = extractFloatx80Frac( a );
  3067 + aExp = extractFloatx80Exp( a );
  3068 + aSign = extractFloatx80Sign( a );
  3069 + bSig = extractFloatx80Frac( b );
  3070 + bExp = extractFloatx80Exp( b );
  3071 + bSign = extractFloatx80Sign( b );
  3072 + if ( aExp == 0x7FFF ) {
  3073 + if ( (bits64) ( aSig0<<1 )
  3074 + || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
  3075 + return propagateFloatx80NaN( a, b );
  3076 + }
  3077 + goto invalid;
  3078 + }
  3079 + if ( bExp == 0x7FFF ) {
  3080 + if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
  3081 + return a;
  3082 + }
  3083 + if ( bExp == 0 ) {
  3084 + if ( bSig == 0 ) {
  3085 + invalid:
  3086 + float_raise( float_flag_invalid );
  3087 + z.low = floatx80_default_nan_low;
  3088 + z.high = floatx80_default_nan_high;
  3089 + return z;
  3090 + }
  3091 + normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
  3092 + }
  3093 + if ( aExp == 0 ) {
  3094 + if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
  3095 + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
  3096 + }
  3097 + bSig |= LIT64( 0x8000000000000000 );
  3098 + zSign = aSign;
  3099 + expDiff = aExp - bExp;
  3100 + aSig1 = 0;
  3101 + if ( expDiff < 0 ) {
  3102 + if ( expDiff < -1 ) return a;
  3103 + shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
  3104 + expDiff = 0;
  3105 + }
  3106 + q = ( bSig <= aSig0 );
  3107 + if ( q ) aSig0 -= bSig;
  3108 + expDiff -= 64;
  3109 + while ( 0 < expDiff ) {
  3110 + q = estimateDiv128To64( aSig0, aSig1, bSig );
  3111 + q = ( 2 < q ) ? q - 2 : 0;
  3112 + mul64To128( bSig, q, &term0, &term1 );
  3113 + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
  3114 + shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
  3115 + expDiff -= 62;
  3116 + }
  3117 + expDiff += 64;
  3118 + if ( 0 < expDiff ) {
  3119 + q = estimateDiv128To64( aSig0, aSig1, bSig );
  3120 + q = ( 2 < q ) ? q - 2 : 0;
  3121 + q >>= 64 - expDiff;
  3122 + mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
  3123 + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
  3124 + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
  3125 + while ( le128( term0, term1, aSig0, aSig1 ) ) {
  3126 + ++q;
  3127 + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
  3128 + }
  3129 + }
  3130 + else {
  3131 + term1 = 0;
  3132 + term0 = bSig;
  3133 + }
  3134 + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
  3135 + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
  3136 + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
  3137 + && ( q & 1 ) )
  3138 + ) {
  3139 + aSig0 = alternateASig0;
  3140 + aSig1 = alternateASig1;
  3141 + zSign = ! zSign;
  3142 + }
  3143 + return
  3144 + normalizeRoundAndPackFloatx80(
  3145 + 80, zSign, bExp + expDiff, aSig0, aSig1 );
  3146 +
  3147 +}
  3148 +
  3149 +/*
  3150 +-------------------------------------------------------------------------------
  3151 +Returns the square root of the extended double-precision floating-point
  3152 +value `a'. The operation is performed according to the IEC/IEEE Standard
  3153 +for Binary Floating-point Arithmetic.
  3154 +-------------------------------------------------------------------------------
  3155 +*/
  3156 +floatx80 floatx80_sqrt( floatx80 a )
  3157 +{
  3158 + flag aSign;
  3159 + int32 aExp, zExp;
  3160 + bits64 aSig0, aSig1, zSig0, zSig1;
  3161 + bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
  3162 + bits64 shiftedRem0, shiftedRem1;
  3163 + floatx80 z;
  3164 +
  3165 + aSig0 = extractFloatx80Frac( a );
  3166 + aExp = extractFloatx80Exp( a );
  3167 + aSign = extractFloatx80Sign( a );
  3168 + if ( aExp == 0x7FFF ) {
  3169 + if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
  3170 + if ( ! aSign ) return a;
  3171 + goto invalid;
  3172 + }
  3173 + if ( aSign ) {
  3174 + if ( ( aExp | aSig0 ) == 0 ) return a;
  3175 + invalid:
  3176 + float_raise( float_flag_invalid );
  3177 + z.low = floatx80_default_nan_low;
  3178 + z.high = floatx80_default_nan_high;
  3179 + return z;
  3180 + }
  3181 + if ( aExp == 0 ) {
  3182 + if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
  3183 + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
  3184 + }
  3185 + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
  3186 + zSig0 = estimateSqrt32( aExp, aSig0>>32 );
  3187 + zSig0 <<= 31;
  3188 + aSig1 = 0;
  3189 + shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
  3190 + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
  3191 + if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
  3192 + shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
  3193 + mul64To128( zSig0, zSig0, &term0, &term1 );
  3194 + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
  3195 + while ( (sbits64) rem0 < 0 ) {
  3196 + --zSig0;
  3197 + shortShift128Left( 0, zSig0, 1, &term0, &term1 );
  3198 + term1 |= 1;
  3199 + add128( rem0, rem1, term0, term1, &rem0, &rem1 );
  3200 + }
  3201 + shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
  3202 + zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
  3203 + if ( (bits64) ( zSig1<<1 ) <= 10 ) {
  3204 + if ( zSig1 == 0 ) zSig1 = 1;
  3205 + mul64To128( zSig0, zSig1, &term1, &term2 );
  3206 + shortShift128Left( term1, term2, 1, &term1, &term2 );
  3207 + sub128( rem1, 0, term1, term2, &rem1, &rem2 );
  3208 + mul64To128( zSig1, zSig1, &term2, &term3 );
  3209 + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
  3210 + while ( (sbits64) rem1 < 0 ) {
  3211 + --zSig1;
  3212 + shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
  3213 + term3 |= 1;
  3214 + add192(
  3215 + rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
  3216 + }
  3217 + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
  3218 + }
  3219 + return
  3220 + roundAndPackFloatx80(
  3221 + floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
  3222 +
  3223 +}
  3224 +
  3225 +/*
  3226 +-------------------------------------------------------------------------------
  3227 +Returns 1 if the extended double-precision floating-point value `a' is
  3228 +equal to the corresponding value `b', and 0 otherwise. The comparison is
  3229 +performed according to the IEC/IEEE Standard for Binary Floating-point
  3230 +Arithmetic.
  3231 +-------------------------------------------------------------------------------
  3232 +*/
  3233 +flag floatx80_eq( floatx80 a, floatx80 b )
  3234 +{
  3235 +
  3236 + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
  3237 + && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  3238 + || ( ( extractFloatx80Exp( b ) == 0x7FFF )
  3239 + && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  3240 + ) {
  3241 + if ( floatx80_is_signaling_nan( a )
  3242 + || floatx80_is_signaling_nan( b ) ) {
  3243 + float_raise( float_flag_invalid );
  3244 + }
  3245 + return 0;
  3246 + }
  3247 + return
  3248 + ( a.low == b.low )
  3249 + && ( ( a.high == b.high )
  3250 + || ( ( a.low == 0 )
  3251 + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
  3252 + );
  3253 +
  3254 +}
  3255 +
  3256 +/*
  3257 +-------------------------------------------------------------------------------
  3258 +Returns 1 if the extended double-precision floating-point value `a' is
  3259 +less than or equal to the corresponding value `b', and 0 otherwise. The
  3260 +comparison is performed according to the IEC/IEEE Standard for Binary
  3261 +Floating-point Arithmetic.
  3262 +-------------------------------------------------------------------------------
  3263 +*/
  3264 +flag floatx80_le( floatx80 a, floatx80 b )
  3265 +{
  3266 + flag aSign, bSign;
  3267 +
  3268 + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
  3269 + && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  3270 + || ( ( extractFloatx80Exp( b ) == 0x7FFF )
  3271 + && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  3272 + ) {
  3273 + float_raise( float_flag_invalid );
  3274 + return 0;
  3275 + }
  3276 + aSign = extractFloatx80Sign( a );
  3277 + bSign = extractFloatx80Sign( b );
  3278 + if ( aSign != bSign ) {
  3279 + return
  3280 + aSign
  3281 + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  3282 + == 0 );
  3283 + }
  3284 + return
  3285 + aSign ? le128( b.high, b.low, a.high, a.low )
  3286 + : le128( a.high, a.low, b.high, b.low );
  3287 +
  3288 +}
  3289 +
  3290 +/*
  3291 +-------------------------------------------------------------------------------
  3292 +Returns 1 if the extended double-precision floating-point value `a' is
  3293 +less than the corresponding value `b', and 0 otherwise. The comparison
  3294 +is performed according to the IEC/IEEE Standard for Binary Floating-point
  3295 +Arithmetic.
  3296 +-------------------------------------------------------------------------------
  3297 +*/
  3298 +flag floatx80_lt( floatx80 a, floatx80 b )
  3299 +{
  3300 + flag aSign, bSign;
  3301 +
  3302 + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
  3303 + && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  3304 + || ( ( extractFloatx80Exp( b ) == 0x7FFF )
  3305 + && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  3306 + ) {
  3307 + float_raise( float_flag_invalid );
  3308 + return 0;
  3309 + }
  3310 + aSign = extractFloatx80Sign( a );
  3311 + bSign = extractFloatx80Sign( b );
  3312 + if ( aSign != bSign ) {
  3313 + return
  3314 + aSign
  3315 + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  3316 + != 0 );
  3317 + }
  3318 + return
  3319 + aSign ? lt128( b.high, b.low, a.high, a.low )
  3320 + : lt128( a.high, a.low, b.high, b.low );
  3321 +
  3322 +}
  3323 +
  3324 +/*
  3325 +-------------------------------------------------------------------------------
  3326 +Returns 1 if the extended double-precision floating-point value `a' is equal
  3327 +to the corresponding value `b', and 0 otherwise. The invalid exception is
  3328 +raised if either operand is a NaN. Otherwise, the comparison is performed
  3329 +according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  3330 +-------------------------------------------------------------------------------
  3331 +*/
  3332 +flag floatx80_eq_signaling( floatx80 a, floatx80 b )
  3333 +{
  3334 +
  3335 + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
  3336 + && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  3337 + || ( ( extractFloatx80Exp( b ) == 0x7FFF )
  3338 + && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  3339 + ) {
  3340 + float_raise( float_flag_invalid );
  3341 + return 0;
  3342 + }
  3343 + return
  3344 + ( a.low == b.low )
  3345 + && ( ( a.high == b.high )
  3346 + || ( ( a.low == 0 )
  3347 + && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
  3348 + );
  3349 +
  3350 +}
  3351 +
  3352 +/*
  3353 +-------------------------------------------------------------------------------
  3354 +Returns 1 if the extended double-precision floating-point value `a' is less
  3355 +than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
  3356 +do not cause an exception. Otherwise, the comparison is performed according
  3357 +to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
  3358 +-------------------------------------------------------------------------------
  3359 +*/
  3360 +flag floatx80_le_quiet( floatx80 a, floatx80 b )
  3361 +{
  3362 + flag aSign, bSign;
  3363 +
  3364 + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
  3365 + && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  3366 + || ( ( extractFloatx80Exp( b ) == 0x7FFF )
  3367 + && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  3368 + ) {
  3369 + if ( floatx80_is_signaling_nan( a )
  3370 + || floatx80_is_signaling_nan( b ) ) {
  3371 + float_raise( float_flag_invalid );
  3372 + }
  3373 + return 0;
  3374 + }
  3375 + aSign = extractFloatx80Sign( a );
  3376 + bSign = extractFloatx80Sign( b );
  3377 + if ( aSign != bSign ) {
  3378 + return
  3379 + aSign
  3380 + || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  3381 + == 0 );
  3382 + }
  3383 + return
  3384 + aSign ? le128( b.high, b.low, a.high, a.low )
  3385 + : le128( a.high, a.low, b.high, b.low );
  3386 +
  3387 +}
  3388 +
  3389 +/*
  3390 +-------------------------------------------------------------------------------
  3391 +Returns 1 if the extended double-precision floating-point value `a' is less
  3392 +than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
  3393 +an exception. Otherwise, the comparison is performed according to the
  3394 +IEC/IEEE Standard for Binary Floating-point Arithmetic.
  3395 +-------------------------------------------------------------------------------
  3396 +*/
  3397 +flag floatx80_lt_quiet( floatx80 a, floatx80 b )
  3398 +{
  3399 + flag aSign, bSign;
  3400 +
  3401 + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
  3402 + && (bits64) ( extractFloatx80Frac( a )<<1 ) )
  3403 + || ( ( extractFloatx80Exp( b ) == 0x7FFF )
  3404 + && (bits64) ( extractFloatx80Frac( b )<<1 ) )
  3405 + ) {
  3406 + if ( floatx80_is_signaling_nan( a )
  3407 + || floatx80_is_signaling_nan( b ) ) {
  3408 + float_raise( float_flag_invalid );
  3409 + }
  3410 + return 0;
  3411 + }
  3412 + aSign = extractFloatx80Sign( a );
  3413 + bSign = extractFloatx80Sign( b );
  3414 + if ( aSign != bSign ) {
  3415 + return
  3416 + aSign
  3417 + && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
  3418 + != 0 );
  3419 + }
  3420 + return
  3421 + aSign ? lt128( b.high, b.low, a.high, a.low )
  3422 + : lt128( a.high, a.low, b.high, b.low );
  3423 +
  3424 +}
  3425 +
  3426 +#endif
  3427 +
... ...
target-arm/nwfpe/softfloat.h 0 → 100644
  1 +
  2 +/*
  3 +===============================================================================
  4 +
  5 +This C header file is part of the SoftFloat IEC/IEEE Floating-point
  6 +Arithmetic Package, Release 2.
  7 +
  8 +Written by John R. Hauser. This work was made possible in part by the
  9 +International Computer Science Institute, located at Suite 600, 1947 Center
  10 +Street, Berkeley, California 94704. Funding was partially provided by the
  11 +National Science Foundation under grant MIP-9311980. The original version
  12 +of this code was written as part of a project to build a fixed-point vector
  13 +processor in collaboration with the University of California at Berkeley,
  14 +overseen by Profs. Nelson Morgan and John Wawrzynek. More information
  15 +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  16 +arithmetic/softfloat.html'.
  17 +
  18 +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
  19 +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  20 +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
  21 +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  22 +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  23 +
  24 +Derivative works are acceptable, even for commercial purposes, so long as
  25 +(1) they include prominent notice that the work is derivative, and (2) they
  26 +include prominent notice akin to these three paragraphs for those parts of
  27 +this code that are retained.
  28 +
  29 +===============================================================================
  30 +*/
  31 +
  32 +#ifndef __SOFTFLOAT_H__
  33 +#define __SOFTFLOAT_H__
  34 +
  35 +/*
  36 +-------------------------------------------------------------------------------
  37 +The macro `FLOATX80' must be defined to enable the extended double-precision
  38 +floating-point format `floatx80'. If this macro is not defined, the
  39 +`floatx80' type will not be defined, and none of the functions that either
  40 +input or output the `floatx80' type will be defined.
  41 +-------------------------------------------------------------------------------
  42 +*/
  43 +#define FLOATX80
  44 +
  45 +/*
  46 +-------------------------------------------------------------------------------
  47 +Software IEC/IEEE floating-point types.
  48 +-------------------------------------------------------------------------------
  49 +*/
  50 +typedef unsigned long int float32;
  51 +typedef unsigned long long float64;
  52 +typedef struct {
  53 + unsigned short high;
  54 + unsigned long long low;
  55 +} floatx80;
  56 +
  57 +/*
  58 +-------------------------------------------------------------------------------
  59 +Software IEC/IEEE floating-point underflow tininess-detection mode.
  60 +-------------------------------------------------------------------------------
  61 +*/
  62 +extern signed char float_detect_tininess;
  63 +enum {
  64 + float_tininess_after_rounding = 0,
  65 + float_tininess_before_rounding = 1
  66 +};
  67 +
  68 +/*
  69 +-------------------------------------------------------------------------------
  70 +Software IEC/IEEE floating-point rounding mode.
  71 +-------------------------------------------------------------------------------
  72 +*/
  73 +extern signed char float_rounding_mode;
  74 +enum {
  75 + float_round_nearest_even = 0,
  76 + float_round_to_zero = 1,
  77 + float_round_down = 2,
  78 + float_round_up = 3
  79 +};
  80 +
  81 +/*
  82 +-------------------------------------------------------------------------------
  83 +Software IEC/IEEE floating-point exception flags.
  84 +-------------------------------------------------------------------------------
  85 +extern signed char float_exception_flags;
  86 +enum {
  87 + float_flag_inexact = 1,
  88 + float_flag_underflow = 2,
  89 + float_flag_overflow = 4,
  90 + float_flag_divbyzero = 8,
  91 + float_flag_invalid = 16
  92 +};
  93 +
  94 +ScottB: November 4, 1998
  95 +Changed the enumeration to match the bit order in the FPA11.
  96 +*/
  97 +
  98 +extern signed char float_exception_flags;
  99 +enum {
  100 + float_flag_invalid = 1,
  101 + float_flag_divbyzero = 2,
  102 + float_flag_overflow = 4,
  103 + float_flag_underflow = 8,
  104 + float_flag_inexact = 16
  105 +};
  106 +
  107 +/*
  108 +-------------------------------------------------------------------------------
  109 +Routine to raise any or all of the software IEC/IEEE floating-point
  110 +exception flags.
  111 +-------------------------------------------------------------------------------
  112 +*/
  113 +void float_raise( signed char );
  114 +
  115 +/*
  116 +-------------------------------------------------------------------------------
  117 +Software IEC/IEEE integer-to-floating-point conversion routines.
  118 +-------------------------------------------------------------------------------
  119 +*/
  120 +float32 int32_to_float32( signed int );
  121 +float64 int32_to_float64( signed int );
  122 +#ifdef FLOATX80
  123 +floatx80 int32_to_floatx80( signed int );
  124 +#endif
  125 +
  126 +/*
  127 +-------------------------------------------------------------------------------
  128 +Software IEC/IEEE single-precision conversion routines.
  129 +-------------------------------------------------------------------------------
  130 +*/
  131 +signed int float32_to_int32( float32 );
  132 +signed int float32_to_int32_round_to_zero( float32 );
  133 +float64 float32_to_float64( float32 );
  134 +#ifdef FLOATX80
  135 +floatx80 float32_to_floatx80( float32 );
  136 +#endif
  137 +
  138 +/*
  139 +-------------------------------------------------------------------------------
  140 +Software IEC/IEEE single-precision operations.
  141 +-------------------------------------------------------------------------------
  142 +*/
  143 +float32 float32_round_to_int( float32 );
  144 +float32 float32_add( float32, float32 );
  145 +float32 float32_sub( float32, float32 );
  146 +float32 float32_mul( float32, float32 );
  147 +float32 float32_div( float32, float32 );
  148 +float32 float32_rem( float32, float32 );
  149 +float32 float32_sqrt( float32 );
  150 +char float32_eq( float32, float32 );
  151 +char float32_le( float32, float32 );
  152 +char float32_lt( float32, float32 );
  153 +char float32_eq_signaling( float32, float32 );
  154 +char float32_le_quiet( float32, float32 );
  155 +char float32_lt_quiet( float32, float32 );
  156 +char float32_is_signaling_nan( float32 );
  157 +
  158 +/*
  159 +-------------------------------------------------------------------------------
  160 +Software IEC/IEEE double-precision conversion routines.
  161 +-------------------------------------------------------------------------------
  162 +*/
  163 +signed int float64_to_int32( float64 );
  164 +signed int float64_to_int32_round_to_zero( float64 );
  165 +float32 float64_to_float32( float64 );
  166 +#ifdef FLOATX80
  167 +floatx80 float64_to_floatx80( float64 );
  168 +#endif
  169 +
  170 +/*
  171 +-------------------------------------------------------------------------------
  172 +Software IEC/IEEE double-precision operations.
  173 +-------------------------------------------------------------------------------
  174 +*/
  175 +float64 float64_round_to_int( float64 );
  176 +float64 float64_add( float64, float64 );
  177 +float64 float64_sub( float64, float64 );
  178 +float64 float64_mul( float64, float64 );
  179 +float64 float64_div( float64, float64 );
  180 +float64 float64_rem( float64, float64 );
  181 +float64 float64_sqrt( float64 );
  182 +char float64_eq( float64, float64 );
  183 +char float64_le( float64, float64 );
  184 +char float64_lt( float64, float64 );
  185 +char float64_eq_signaling( float64, float64 );
  186 +char float64_le_quiet( float64, float64 );
  187 +char float64_lt_quiet( float64, float64 );
  188 +char float64_is_signaling_nan( float64 );
  189 +
  190 +#ifdef FLOATX80
  191 +
  192 +/*
  193 +-------------------------------------------------------------------------------
  194 +Software IEC/IEEE extended double-precision conversion routines.
  195 +-------------------------------------------------------------------------------
  196 +*/
  197 +signed int floatx80_to_int32( floatx80 );
  198 +signed int floatx80_to_int32_round_to_zero( floatx80 );
  199 +float32 floatx80_to_float32( floatx80 );
  200 +float64 floatx80_to_float64( floatx80 );
  201 +
  202 +/*
  203 +-------------------------------------------------------------------------------
  204 +Software IEC/IEEE extended double-precision rounding precision. Valid
  205 +values are 32, 64, and 80.
  206 +-------------------------------------------------------------------------------
  207 +*/
  208 +extern signed char floatx80_rounding_precision;
  209 +
  210 +/*
  211 +-------------------------------------------------------------------------------
  212 +Software IEC/IEEE extended double-precision operations.
  213 +-------------------------------------------------------------------------------
  214 +*/
  215 +floatx80 floatx80_round_to_int( floatx80 );
  216 +floatx80 floatx80_add( floatx80, floatx80 );
  217 +floatx80 floatx80_sub( floatx80, floatx80 );
  218 +floatx80 floatx80_mul( floatx80, floatx80 );
  219 +floatx80 floatx80_div( floatx80, floatx80 );
  220 +floatx80 floatx80_rem( floatx80, floatx80 );
  221 +floatx80 floatx80_sqrt( floatx80 );
  222 +char floatx80_eq( floatx80, floatx80 );
  223 +char floatx80_le( floatx80, floatx80 );
  224 +char floatx80_lt( floatx80, floatx80 );
  225 +char floatx80_eq_signaling( floatx80, floatx80 );
  226 +char floatx80_le_quiet( floatx80, floatx80 );
  227 +char floatx80_lt_quiet( floatx80, floatx80 );
  228 +char floatx80_is_signaling_nan( floatx80 );
  229 +
  230 +#endif
  231 +
  232 +#endif
... ...