target/i386: fix IEEE SSE floating-point exception raising

The SSE instruction implementations all fail to raise the expected
IEEE floating-point exceptions because they do nothing to convert the
exception state from the softfloat machinery into the exception flags
in MXCSR.

Fix this by adding such conversions. Unlike for x87, emulated SSE
floating-point operations might be optimized using hardware floating
point on the host, and so a different approach is taken that is
compatible with such optimizations. The required invariant is that
all exceptions set in env->sse_status (other than "denormal operand",
for which the SSE semantics are different from those in the softfloat
code) are ones that are set in the MXCSR; the emulated MXCSR is
updated lazily when code reads MXCSR, while when code sets MXCSR, the
exceptions in env->sse_status are set accordingly.

A few instructions do not raise all the exceptions that would be
raised by the softfloat code, and those instructions are made to save
and restore the softfloat exception state accordingly.

Nothing is done about "denormal operand"; setting that (only for the
case when input denormals are *not* flushed to zero, the opposite of
the logic in the softfloat code for such an exception) will require
custom code for relevant instructions, or else architecture-specific
conditionals in the softfloat code for when to set such an exception
together with custom code for various SSE conversion and rounding
instructions that do not set that exception.

Nothing is done about trapping exceptions (for which there is minimal
and largely broken support in QEMU's emulation in the x87 case and no
support at all in the SSE case).

Backports commit 418b0f93d12a1589d5031405de857844f32e9ccc from qemu
This commit is contained in:
Joseph Myers 2021-02-25 23:21:30 -05:00 committed by Lioncash
parent fd5b0dd456
commit cf54c51869
6 changed files with 55 additions and 12 deletions

View file

@ -1991,6 +1991,7 @@ static inline int32_t x86_get_a20_mask(CPUX86State *env)
/* fpu_helper.c */ /* fpu_helper.c */
void update_fp_status(CPUX86State *env); void update_fp_status(CPUX86State *env);
void update_mxcsr_status(CPUX86State *env); void update_mxcsr_status(CPUX86State *env);
void update_mxcsr_from_sse_status(CPUX86State *env);
static inline void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr) static inline void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
{ {

View file

@ -2494,6 +2494,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{ {
update_mxcsr_from_sse_status(env);
cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
} }
@ -2924,6 +2925,14 @@ void update_mxcsr_status(CPUX86State *env)
} }
set_float_rounding_mode(rnd_type, &env->sse_status); set_float_rounding_mode(rnd_type, &env->sse_status);
/* Set exception flags. */
set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
(mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
(mxcsr & FPUS_OE ? float_flag_overflow : 0) |
(mxcsr & FPUS_UE ? float_flag_underflow : 0) |
(mxcsr & FPUS_PE ? float_flag_inexact : 0),
&env->sse_status);
/* set denormals are zero */ /* set denormals are zero */
set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
@ -2931,6 +2940,32 @@ void update_mxcsr_status(CPUX86State *env)
set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
} }
void update_mxcsr_from_sse_status(CPUX86State *env)
{
if (tcg_enabled()) {
uint8_t flags = get_float_exception_flags(&env->sse_status);
/*
* The MXCSR denormal flag has opposite semantics to
* float_flag_input_denormal (the softfloat code sets that flag
* only when flushing input denormals to zero, but SSE sets it
* only when not flushing them to zero), so is not converted
* here.
*/
env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
(flags & float_flag_divbyzero ? FPUS_ZE : 0) |
(flags & float_flag_overflow ? FPUS_OE : 0) |
(flags & float_flag_underflow ? FPUS_UE : 0) |
(flags & float_flag_inexact ? FPUS_PE : 0) |
(flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
0));
}
}
void helper_update_mxcsr(CPUX86State *env)
{
update_mxcsr_from_sse_status(env);
}
void helper_ldmxcsr(CPUX86State *env, uint32_t val) void helper_ldmxcsr(CPUX86State *env, uint32_t val)
{ {
cpu_set_mxcsr(env, val); cpu_set_mxcsr(env, val);

View file

@ -350,6 +350,7 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
for(i = 0; i < 8; i++) { for(i = 0; i < 8; i++) {
fptag |= ((!env->fptags[i]) << i); fptag |= ((!env->fptags[i]) << i);
} }
update_mxcsr_from_sse_status(env);
cpu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n", cpu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n",
env->fpuc, env->fpuc,
(env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11, (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11,

View file

@ -209,6 +209,7 @@ DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl)
/* MMX/SSE */ /* MMX/SSE */
DEF_HELPER_2(ldmxcsr, void, env, i32) DEF_HELPER_2(ldmxcsr, void, env, i32)
DEF_HELPER_1(update_mxcsr, void, env)
DEF_HELPER_1(enter_mmx, void, env) DEF_HELPER_1(enter_mmx, void, env)
DEF_HELPER_1(emms, void, env) DEF_HELPER_1(emms, void, env)
DEF_HELPER_3(movq, void, env, ptr, ptr) DEF_HELPER_3(movq, void, env, ptr, ptr)

View file

@ -843,6 +843,7 @@ int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s)
void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s) void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{ {
uint8_t old_flags = get_float_exception_flags(&env->sse_status);
d->ZMM_S(0) = float32_div(float32_one, d->ZMM_S(0) = float32_div(float32_one,
float32_sqrt(s->ZMM_S(0), &env->sse_status), float32_sqrt(s->ZMM_S(0), &env->sse_status),
&env->sse_status); &env->sse_status);
@ -855,26 +856,33 @@ void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
d->ZMM_S(3) = float32_div(float32_one, d->ZMM_S(3) = float32_div(float32_one,
float32_sqrt(s->ZMM_S(3), &env->sse_status), float32_sqrt(s->ZMM_S(3), &env->sse_status),
&env->sse_status); &env->sse_status);
set_float_exception_flags(old_flags, &env->sse_status);
} }
void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *s) void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{ {
uint8_t old_flags = get_float_exception_flags(&env->sse_status);
d->ZMM_S(0) = float32_div(float32_one, d->ZMM_S(0) = float32_div(float32_one,
float32_sqrt(s->ZMM_S(0), &env->sse_status), float32_sqrt(s->ZMM_S(0), &env->sse_status),
&env->sse_status); &env->sse_status);
set_float_exception_flags(old_flags, &env->sse_status);
} }
void helper_rcpps(CPUX86State *env, ZMMReg *d, ZMMReg *s) void helper_rcpps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{ {
uint8_t old_flags = get_float_exception_flags(&env->sse_status);
d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status);
d->ZMM_S(1) = float32_div(float32_one, s->ZMM_S(1), &env->sse_status); d->ZMM_S(1) = float32_div(float32_one, s->ZMM_S(1), &env->sse_status);
d->ZMM_S(2) = float32_div(float32_one, s->ZMM_S(2), &env->sse_status); d->ZMM_S(2) = float32_div(float32_one, s->ZMM_S(2), &env->sse_status);
d->ZMM_S(3) = float32_div(float32_one, s->ZMM_S(3), &env->sse_status); d->ZMM_S(3) = float32_div(float32_one, s->ZMM_S(3), &env->sse_status);
set_float_exception_flags(old_flags, &env->sse_status);
} }
void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *s) void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *s)
{ {
uint8_t old_flags = get_float_exception_flags(&env->sse_status);
d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status);
set_float_exception_flags(old_flags, &env->sse_status);
} }
static inline uint64_t helper_extrq(uint64_t src, int shift, int len) static inline uint64_t helper_extrq(uint64_t src, int shift, int len)
@ -1763,6 +1771,7 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode) uint32_t mode)
{ {
uint8_t old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode; signed char prev_rounding_mode;
prev_rounding_mode = env->sse_status.float_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode;
@ -1788,19 +1797,18 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
d->ZMM_S(2) = float32_round_to_int(s->ZMM_S(2), &env->sse_status); d->ZMM_S(2) = float32_round_to_int(s->ZMM_S(2), &env->sse_status);
d->ZMM_S(3) = float32_round_to_int(s->ZMM_S(3), &env->sse_status); d->ZMM_S(3) = float32_round_to_int(s->ZMM_S(3), &env->sse_status);
#if 0 /* TODO */ if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
if (mode & (1 << 3)) {
set_float_exception_flags(get_float_exception_flags(&env->sse_status) & set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
~float_flag_inexact, ~float_flag_inexact,
&env->sse_status); &env->sse_status);
} }
#endif
env->sse_status.float_rounding_mode = prev_rounding_mode; env->sse_status.float_rounding_mode = prev_rounding_mode;
} }
void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode) uint32_t mode)
{ {
uint8_t old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode; signed char prev_rounding_mode;
prev_rounding_mode = env->sse_status.float_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode;
@ -1824,19 +1832,18 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status); d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status);
d->ZMM_D(1) = float64_round_to_int(s->ZMM_D(1), &env->sse_status); d->ZMM_D(1) = float64_round_to_int(s->ZMM_D(1), &env->sse_status);
#if 0 /* TODO */ if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
if (mode & (1 << 3)) {
set_float_exception_flags(get_float_exception_flags(&env->sse_status) & set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
~float_flag_inexact, ~float_flag_inexact,
&env->sse_status); &env->sse_status);
} }
#endif
env->sse_status.float_rounding_mode = prev_rounding_mode; env->sse_status.float_rounding_mode = prev_rounding_mode;
} }
void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode) uint32_t mode)
{ {
uint8_t old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode; signed char prev_rounding_mode;
prev_rounding_mode = env->sse_status.float_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode;
@ -1859,19 +1866,18 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status); d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status);
#if 0 /* TODO */ if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
if (mode & (1 << 3)) {
set_float_exception_flags(get_float_exception_flags(&env->sse_status) & set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
~float_flag_inexact, ~float_flag_inexact,
&env->sse_status); &env->sse_status);
} }
#endif
env->sse_status.float_rounding_mode = prev_rounding_mode; env->sse_status.float_rounding_mode = prev_rounding_mode;
} }
void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
uint32_t mode) uint32_t mode)
{ {
uint8_t old_flags = get_float_exception_flags(&env->sse_status);
signed char prev_rounding_mode; signed char prev_rounding_mode;
prev_rounding_mode = env->sse_status.float_rounding_mode; prev_rounding_mode = env->sse_status.float_rounding_mode;
@ -1894,13 +1900,11 @@ void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status); d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status);
#if 0 /* TODO */ if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
if (mode & (1 << 3)) {
set_float_exception_flags(get_float_exception_flags(&env->sse_status) & set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
~float_flag_inexact, ~float_flag_inexact,
&env->sse_status); &env->sse_status);
} }
#endif
env->sse_status.float_rounding_mode = prev_rounding_mode; env->sse_status.float_rounding_mode = prev_rounding_mode;
} }

View file

@ -8708,6 +8708,7 @@ case 0x101:
gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
break; break;
} }
gen_helper_update_mxcsr(tcg_ctx, tcg_ctx->cpu_env);
gen_lea_modrm(env, s, modrm); gen_lea_modrm(env, s, modrm);
tcg_gen_ld32u_tl(tcg_ctx, s->T0, cpu_env, offsetof(CPUX86State, mxcsr)); tcg_gen_ld32u_tl(tcg_ctx, s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
gen_op_st_v(s, MO_32, s->T0, s->A0); gen_op_st_v(s, MO_32, s->T0, s->A0);