Commit 7524c84d82bd9d66ba14bac7da0456d02e47ea66
1 parent
2d0e944d
Fix guest x86/amd64 helper_fprem/helper_fprem1, by Julian Seward.
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2588 c046a42c-6fe2-441c-8c8c-71466251a162
Showing
1 changed file
with
70 additions
and
27 deletions
target-i386/helper.c
| @@ -3139,30 +3139,51 @@ void helper_fprem1(void) | @@ -3139,30 +3139,51 @@ void helper_fprem1(void) | ||
| 3139 | CPU86_LDouble dblq, fpsrcop, fptemp; | 3139 | CPU86_LDouble dblq, fpsrcop, fptemp; |
| 3140 | CPU86_LDoubleU fpsrcop1, fptemp1; | 3140 | CPU86_LDoubleU fpsrcop1, fptemp1; |
| 3141 | int expdif; | 3141 | int expdif; |
| 3142 | - int q; | 3142 | + signed long long int q; |
| 3143 | + | ||
| 3144 | + if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) { | ||
| 3145 | + ST0 = 0.0 / 0.0; /* NaN */ | ||
| 3146 | + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ | ||
| 3147 | + return; | ||
| 3148 | + } | ||
| 3143 | 3149 | ||
| 3144 | fpsrcop = ST0; | 3150 | fpsrcop = ST0; |
| 3145 | fptemp = ST1; | 3151 | fptemp = ST1; |
| 3146 | fpsrcop1.d = fpsrcop; | 3152 | fpsrcop1.d = fpsrcop; |
| 3147 | fptemp1.d = fptemp; | 3153 | fptemp1.d = fptemp; |
| 3148 | expdif = EXPD(fpsrcop1) - EXPD(fptemp1); | 3154 | expdif = EXPD(fpsrcop1) - EXPD(fptemp1); |
| 3155 | + | ||
| 3156 | + if (expdif < 0) { | ||
| 3157 | + /* optimisation? taken from the AMD docs */ | ||
| 3158 | + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ | ||
| 3159 | + /* ST0 is unchanged */ | ||
| 3160 | + return; | ||
| 3161 | + } | ||
| 3162 | + | ||
| 3149 | if (expdif < 53) { | 3163 | if (expdif < 53) { |
| 3150 | dblq = fpsrcop / fptemp; | 3164 | dblq = fpsrcop / fptemp; |
| 3151 | - dblq = (dblq < 0.0)? ceil(dblq): floor(dblq); | ||
| 3152 | - ST0 = fpsrcop - fptemp*dblq; | ||
| 3153 | - q = (int)dblq; /* cutting off top bits is assumed here */ | 3165 | + /* round dblq towards nearest integer */ |
| 3166 | + dblq = rint(dblq); | ||
| 3167 | + ST0 = fpsrcop - fptemp * dblq; | ||
| 3168 | + | ||
| 3169 | + /* convert dblq to q by truncating towards zero */ | ||
| 3170 | + if (dblq < 0.0) | ||
| 3171 | + q = (signed long long int)(-dblq); | ||
| 3172 | + else | ||
| 3173 | + q = (signed long long int)dblq; | ||
| 3174 | + | ||
| 3154 | env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ | 3175 | env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ |
| 3155 | - /* (C0,C1,C3) <-- (q2,q1,q0) */ | ||
| 3156 | - env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */ | ||
| 3157 | - env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */ | ||
| 3158 | - env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */ | 3176 | + /* (C0,C3,C1) <-- (q2,q1,q0) */ |
| 3177 | + env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ | ||
| 3178 | + env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ | ||
| 3179 | + env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ | ||
| 3159 | } else { | 3180 | } else { |
| 3160 | env->fpus |= 0x400; /* C2 <-- 1 */ | 3181 | env->fpus |= 0x400; /* C2 <-- 1 */ |
| 3161 | - fptemp = pow(2.0, expdif-50); | 3182 | + fptemp = pow(2.0, expdif - 50); |
| 3162 | fpsrcop = (ST0 / ST1) / fptemp; | 3183 | fpsrcop = (ST0 / ST1) / fptemp; |
| 3163 | - /* fpsrcop = integer obtained by rounding to the nearest */ | ||
| 3164 | - fpsrcop = (fpsrcop-floor(fpsrcop) < ceil(fpsrcop)-fpsrcop)? | ||
| 3165 | - floor(fpsrcop): ceil(fpsrcop); | 3184 | + /* fpsrcop = integer obtained by chopping */ |
| 3185 | + fpsrcop = (fpsrcop < 0.0) ? | ||
| 3186 | + -(floor(fabs(fpsrcop))) : floor(fpsrcop); | ||
| 3166 | ST0 -= (ST1 * fpsrcop * fptemp); | 3187 | ST0 -= (ST1 * fpsrcop * fptemp); |
| 3167 | } | 3188 | } |
| 3168 | } | 3189 | } |
| @@ -3172,30 +3193,52 @@ void helper_fprem(void) | @@ -3172,30 +3193,52 @@ void helper_fprem(void) | ||
| 3172 | CPU86_LDouble dblq, fpsrcop, fptemp; | 3193 | CPU86_LDouble dblq, fpsrcop, fptemp; |
| 3173 | CPU86_LDoubleU fpsrcop1, fptemp1; | 3194 | CPU86_LDoubleU fpsrcop1, fptemp1; |
| 3174 | int expdif; | 3195 | int expdif; |
| 3175 | - int q; | ||
| 3176 | - | ||
| 3177 | - fpsrcop = ST0; | ||
| 3178 | - fptemp = ST1; | 3196 | + signed long long int q; |
| 3197 | + | ||
| 3198 | + if (isinf(ST0) || isnan(ST0) || isnan(ST1) || (ST1 == 0.0)) { | ||
| 3199 | + ST0 = 0.0 / 0.0; /* NaN */ | ||
| 3200 | + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ | ||
| 3201 | + return; | ||
| 3202 | + } | ||
| 3203 | + | ||
| 3204 | + fpsrcop = (CPU86_LDouble)ST0; | ||
| 3205 | + fptemp = (CPU86_LDouble)ST1; | ||
| 3179 | fpsrcop1.d = fpsrcop; | 3206 | fpsrcop1.d = fpsrcop; |
| 3180 | fptemp1.d = fptemp; | 3207 | fptemp1.d = fptemp; |
| 3181 | expdif = EXPD(fpsrcop1) - EXPD(fptemp1); | 3208 | expdif = EXPD(fpsrcop1) - EXPD(fptemp1); |
| 3209 | + | ||
| 3210 | + if (expdif < 0) { | ||
| 3211 | + /* optimisation? taken from the AMD docs */ | ||
| 3212 | + env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ | ||
| 3213 | + /* ST0 is unchanged */ | ||
| 3214 | + return; | ||
| 3215 | + } | ||
| 3216 | + | ||
| 3182 | if ( expdif < 53 ) { | 3217 | if ( expdif < 53 ) { |
| 3183 | - dblq = fpsrcop / fptemp; | ||
| 3184 | - dblq = (dblq < 0.0)? ceil(dblq): floor(dblq); | ||
| 3185 | - ST0 = fpsrcop - fptemp*dblq; | ||
| 3186 | - q = (int)dblq; /* cutting off top bits is assumed here */ | 3218 | + dblq = fpsrcop/*ST0*/ / fptemp/*ST1*/; |
| 3219 | + /* round dblq towards zero */ | ||
| 3220 | + dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq); | ||
| 3221 | + ST0 = fpsrcop/*ST0*/ - fptemp * dblq; | ||
| 3222 | + | ||
| 3223 | + /* convert dblq to q by truncating towards zero */ | ||
| 3224 | + if (dblq < 0.0) | ||
| 3225 | + q = (signed long long int)(-dblq); | ||
| 3226 | + else | ||
| 3227 | + q = (signed long long int)dblq; | ||
| 3228 | + | ||
| 3187 | env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ | 3229 | env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */ |
| 3188 | - /* (C0,C1,C3) <-- (q2,q1,q0) */ | ||
| 3189 | - env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */ | ||
| 3190 | - env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */ | ||
| 3191 | - env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */ | 3230 | + /* (C0,C3,C1) <-- (q2,q1,q0) */ |
| 3231 | + env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ | ||
| 3232 | + env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ | ||
| 3233 | + env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ | ||
| 3192 | } else { | 3234 | } else { |
| 3235 | + int N = 32 + (expdif % 32); /* as per AMD docs */ | ||
| 3193 | env->fpus |= 0x400; /* C2 <-- 1 */ | 3236 | env->fpus |= 0x400; /* C2 <-- 1 */ |
| 3194 | - fptemp = pow(2.0, expdif-50); | 3237 | + fptemp = pow(2.0, (double)(expdif - N)); |
| 3195 | fpsrcop = (ST0 / ST1) / fptemp; | 3238 | fpsrcop = (ST0 / ST1) / fptemp; |
| 3196 | /* fpsrcop = integer obtained by chopping */ | 3239 | /* fpsrcop = integer obtained by chopping */ |
| 3197 | - fpsrcop = (fpsrcop < 0.0)? | ||
| 3198 | - -(floor(fabs(fpsrcop))): floor(fpsrcop); | 3240 | + fpsrcop = (fpsrcop < 0.0) ? |
| 3241 | + -(floor(fabs(fpsrcop))) : floor(fpsrcop); | ||
| 3199 | ST0 -= (ST1 * fpsrcop * fptemp); | 3242 | ST0 -= (ST1 * fpsrcop * fptemp); |
| 3200 | } | 3243 | } |
| 3201 | } | 3244 | } |