Commit 7524c84d82bd9d66ba14bac7da0456d02e47ea66

Authored by ths
1 parent 2d0e944d

Fix guest x86/amd64 helper_fprem/helper_fprem1, by Julian Seward.


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2588 c046a42c-6fe2-441c-8c8c-71466251a162
Showing 1 changed file with 70 additions and 27 deletions
target-i386/helper.c
... ... @@ -3139,30 +3139,51 @@ void helper_fprem1(void)
3139 3139 CPU86_LDouble dblq, fpsrcop, fptemp;
3140 3140 CPU86_LDoubleU fpsrcop1, fptemp1;
3141 3141 int expdif;
3142   - int q;
  3142 + signed long long int q;
  3143 +
  3144 + if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
  3145 + ST0 = 0.0 / 0.0; /* NaN */
  3146 + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
  3147 + return;
  3148 + }
3143 3149  
3144 3150 fpsrcop = ST0;
3145 3151 fptemp = ST1;
3146 3152 fpsrcop1.d = fpsrcop;
3147 3153 fptemp1.d = fptemp;
3148 3154 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
  3155 +
  3156 + if (expdif < 0) {
  3157 + /* optimisation? taken from the AMD docs */
  3158 + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
  3159 + /* ST0 is unchanged */
  3160 + return;
  3161 + }
  3162 +
3149 3163 if (expdif < 53) {
3150 3164 dblq = fpsrcop / fptemp;
3151   - dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
3152   - ST0 = fpsrcop - fptemp*dblq;
3153   - q = (int)dblq; /* cutting off top bits is assumed here */
  3165 + /* round dblq towards nearest integer */
  3166 + dblq = rint(dblq);
  3167 + ST0 = fpsrcop - fptemp * dblq;
  3168 +
  3169 + /* convert dblq to q by truncating towards zero */
  3170 + if (dblq < 0.0)
  3171 + q = (signed long long int)(-dblq);
  3172 + else
  3173 + q = (signed long long int)dblq;
  3174 +
3154 3175 env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
3155   - /* (C0,C1,C3) <-- (q2,q1,q0) */
3156   - env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
3157   - env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
3158   - env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
  3176 + /* (C0,C3,C1) <-- (q2,q1,q0) */
  3177 + env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
  3178 + env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
  3179 + env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
3159 3180 } else {
3160 3181 env->fpus |= 0x400; /* C2 <-- 1 */
3161   - fptemp = pow(2.0, expdif-50);
  3182 + fptemp = pow(2.0, expdif - 50);
3162 3183 fpsrcop = (ST0 / ST1) / fptemp;
3163   - /* fpsrcop = integer obtained by rounding to the nearest */
3164   - fpsrcop = (fpsrcop-floor(fpsrcop) < ceil(fpsrcop)-fpsrcop)?
3165   - floor(fpsrcop): ceil(fpsrcop);
  3184 + /* fpsrcop = integer obtained by chopping */
  3185 + fpsrcop = (fpsrcop < 0.0) ?
  3186 + -(floor(fabs(fpsrcop))) : floor(fpsrcop);
3166 3187 ST0 -= (ST1 * fpsrcop * fptemp);
3167 3188 }
3168 3189 }
... ... @@ -3172,30 +3193,52 @@ void helper_fprem(void)
3172 3193 CPU86_LDouble dblq, fpsrcop, fptemp;
3173 3194 CPU86_LDoubleU fpsrcop1, fptemp1;
3174 3195 int expdif;
3175   - int q;
3176   -
3177   - fpsrcop = ST0;
3178   - fptemp = ST1;
  3196 + signed long long int q;
  3197 +
  3198 + if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) {
  3199 + ST0 = 0.0 / 0.0; /* NaN */
  3200 + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
  3201 + return;
  3202 + }
  3203 +
  3204 + fpsrcop = (CPU86_LDouble)ST0;
  3205 + fptemp = (CPU86_LDouble)ST1;
3179 3206 fpsrcop1.d = fpsrcop;
3180 3207 fptemp1.d = fptemp;
3181 3208 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
  3209 +
  3210 + if (expdif < 0) {
  3211 + /* optimisation? taken from the AMD docs */
  3212 + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
  3213 + /* ST0 is unchanged */
  3214 + return;
  3215 + }
  3216 +
3182 3217 if ( expdif < 53 ) {
3183   - dblq = fpsrcop / fptemp;
3184   - dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
3185   - ST0 = fpsrcop - fptemp*dblq;
3186   - q = (int)dblq; /* cutting off top bits is assumed here */
  3218 + dblq = fpsrcop/*ST0*/ / fptemp/*ST1*/;
  3219 + /* round dblq towards zero */
  3220 + dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
  3221 + ST0 = fpsrcop/*ST0*/ - fptemp * dblq;
  3222 +
  3223 + /* convert dblq to q by truncating towards zero */
  3224 + if (dblq < 0.0)
  3225 + q = (signed long long int)(-dblq);
  3226 + else
  3227 + q = (signed long long int)dblq;
  3228 +
3187 3229 env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
3188   - /* (C0,C1,C3) <-- (q2,q1,q0) */
3189   - env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
3190   - env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
3191   - env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
  3230 + /* (C0,C3,C1) <-- (q2,q1,q0) */
  3231 + env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
  3232 + env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
  3233 + env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
3192 3234 } else {
  3235 + int N = 32 + (expdif % 32); /* as per AMD docs */
3193 3236 env->fpus |= 0x400; /* C2 <-- 1 */
3194   - fptemp = pow(2.0, expdif-50);
  3237 + fptemp = pow(2.0, (double)(expdif - N));
3195 3238 fpsrcop = (ST0 / ST1) / fptemp;
3196 3239 /* fpsrcop = integer obtained by chopping */
3197   - fpsrcop = (fpsrcop < 0.0)?
3198   - -(floor(fabs(fpsrcop))): floor(fpsrcop);
  3240 + fpsrcop = (fpsrcop < 0.0) ?
  3241 + -(floor(fabs(fpsrcop))) : floor(fpsrcop);
3199 3242 ST0 -= (ST1 * fpsrcop * fptemp);
3200 3243 }
3201 3244 }
... ...