target-i386: Add XSAVE extension

This includes XSAVE, XRSTOR, XGETBV, XSETBV, which are all related,
as well as the associate cpuid bits.

Backports commit 19dc85dba23c0db1ca932c62e453c37e00761628 from qemu
This commit is contained in:
Richard Henderson 2018-02-20 12:46:01 -05:00 committed by Lioncash
parent 6657c0c54a
commit 6c5b6a0e7f
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
5 changed files with 227 additions and 12 deletions

View file

@ -297,14 +297,14 @@ static const char *cpuid_6_feature_name[] = {
#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */ \
CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
/* missing:
CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
CPUID_EXT_RDRAND */
CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AVX,
CPUID_EXT_F16C, CPUID_EXT_RDRAND */
#ifdef TARGET_X86_64
#define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM)
@ -2210,10 +2210,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ebx = (cpu->apic_id << 24) |
8 << 8; /* CLFLUSH size in quad words, Linux wants it. */
*ecx = env->features[FEAT_1_ECX];
if ((*ecx & CPUID_EXT_XSAVE) && (env->cr[4] & CR4_OSXSAVE_MASK)) {
*ecx |= CPUID_EXT_OSXSAVE;
}
*edx = env->features[FEAT_1_EDX];
if (cs->nr_cores * cs->nr_threads > 1) {
*ebx |= (cs->nr_cores * cs->nr_threads) << 16;
*edx |= 1 << 28; /* HTT bit */
*edx |= CPUID_HT;
}
break;
case 2:
@ -2321,12 +2324,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 0xA:
/* Architectural Performance Monitoring Leaf */
*eax = 0;
*ebx = 0;
*ecx = 0;
*edx = 0;
*eax = 0;
*ebx = 0;
*ecx = 0;
*edx = 0;
break;
case 0xD: {
/* Processor Extended State */
*eax = 0;
*ebx = 0;
*ecx = 0;
*edx = 0;
break;
}
case 0x80000000:

View file

@ -1219,6 +1219,45 @@ void helper_fxsave(CPUX86State *env, target_ulong ptr)
}
}
static uint64_t get_xinuse(CPUX86State *env)
{
/* We don't track XINUSE. We could calculate it here, but it's
probably less work to simply indicate all components in use. */
return -1;
}
void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
uintptr_t ra = GETPC();
uint64_t old_bv, new_bv;
/* The OS must have enabled XSAVE. */
if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
raise_exception_ra(env, EXCP06_ILLOP, ra);
}
/* The operand must be 64 byte aligned. */
if (ptr & 63) {
raise_exception_ra(env, EXCP0D_GPF, ra);
}
/* Never save anything not enabled by XCR0. */
rfbm &= env->xcr0;
if (rfbm & XSTATE_FP) {
do_xsave_fpu(env, ptr, ra);
}
if (rfbm & XSTATE_SSE) {
do_xsave_mxcsr(env, ptr, ra);
do_xsave_sse(env, ptr, ra);
}
/* Update the XSTATE_BV field. */
old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
new_bv = (old_bv & ~rfbm) | (get_xinuse(env) & rfbm);
cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
}
static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
int i, fpus, fptag;
@ -1288,6 +1327,112 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr)
}
}
void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
uintptr_t ra = GETPC();
uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
rfbm &= env->xcr0;
/* The OS must have enabled XSAVE. */
if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
raise_exception_ra(env, EXCP06_ILLOP, ra);
}
/* The operand must be 64 byte aligned. */
if (ptr & 63) {
raise_exception_ra(env, EXCP0D_GPF, ra);
}
xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
if ((int64_t)xstate_bv < 0) {
/* FIXME: Compact form. */
raise_exception_ra(env, EXCP0D_GPF, ra);
}
/* Standard form. */
/* The XSTATE field must not set bits not present in XCR0. */
if (xstate_bv & ~env->xcr0) {
raise_exception_ra(env, EXCP0D_GPF, ra);
}
/* The XCOMP field must be zero. */
xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
if (xcomp_bv0 || xcomp_bv1) {
raise_exception_ra(env, EXCP0D_GPF, ra);
}
if (rfbm & XSTATE_FP) {
if (xstate_bv & XSTATE_FP) {
do_xrstor_fpu(env, ptr, ra);
} else {
helper_fninit(env);
memset(env->fpregs, 0, sizeof(env->fpregs));
}
}
if (rfbm & XSTATE_SSE) {
/* Note that the standard form of XRSTOR loads MXCSR from memory
whether or not the XSTATE_BV bit is set. */
do_xrstor_mxcsr(env, ptr, ra);
if (xstate_bv & XSTATE_SSE) {
do_xrstor_sse(env, ptr, ra);
} else {
/* ??? When AVX is implemented, we may have to be more
selective in the clearing. */
memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
}
}
}
uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
{
/* The OS must have enabled XSAVE. */
if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
raise_exception_ra(env, EXCP06_ILLOP, GETPC());
}
switch (ecx) {
case 0:
return env->xcr0;
case 1:
/* FIXME: #GP if !CPUID.(EAX=0DH,ECX=1):EAX.XG1[bit 2]. */
return env->xcr0 & get_xinuse(env);
}
raise_exception_ra(env, EXCP0D_GPF, GETPC());
}
void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
{
uint32_t dummy, ena_lo, ena_hi;
uint64_t ena;
/* The OS must have enabled XSAVE. */
if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
raise_exception_ra(env, EXCP06_ILLOP, GETPC());
}
/* Only XCR0 is defined at present; the FPU may not be disabled. */
if (ecx != 0 || (mask & XSTATE_FP) == 0) {
goto do_gpf;
}
/* Disallow enabling unimplemented features. */
cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
ena = ((uint64_t)ena_hi << 32) | ena_lo;
if (mask & ~ena) {
goto do_gpf;
}
env->xcr0 = mask;
return;
do_gpf:
raise_exception_ra(env, EXCP0D_GPF, GETPC());
}
void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
{
CPU_LDoubleU temp;

View file

@ -460,6 +460,7 @@ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3)
void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
{
X86CPU *cpu = x86_env_get_cpu(env);
uint32_t hflags;
#if defined(DEBUG_MMU)
printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
@ -469,24 +470,27 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
CR4_SMEP_MASK | CR4_SMAP_MASK)) {
tlb_flush(CPU(cpu), 1);
}
/* Clear bits we're going to recompute. */
hflags = env->hflags & ~(HF_OSFXSR_MASK | HF_SMAP_MASK);
/* SSE handling */
if (!(env->features[FEAT_1_EDX] & CPUID_SSE)) {
new_cr4 &= ~CR4_OSFXSR_MASK;
}
env->hflags &= ~HF_OSFXSR_MASK;
if (new_cr4 & CR4_OSFXSR_MASK) {
env->hflags |= HF_OSFXSR_MASK;
hflags |= HF_OSFXSR_MASK;
}
if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SMAP)) {
new_cr4 &= ~CR4_SMAP_MASK;
}
env->hflags &= ~HF_SMAP_MASK;
if (new_cr4 & CR4_SMAP_MASK) {
env->hflags |= HF_SMAP_MASK;
hflags |= HF_SMAP_MASK;
}
env->cr[4] = new_cr4;
env->hflags = hflags;
}
#if defined(CONFIG_USER_ONLY)

View file

@ -189,6 +189,10 @@ DEF_HELPER_3(fsave, void, env, tl, int)
DEF_HELPER_3(frstor, void, env, tl, int)
DEF_HELPER_FLAGS_2(fxsave, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_2(fxrstor, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_3(xsave, TCG_CALL_NO_WG, void, env, tl, i64)
DEF_HELPER_FLAGS_3(xrstor, TCG_CALL_NO_WG, void, env, tl, i64)
DEF_HELPER_FLAGS_2(xgetbv, TCG_CALL_NO_WG, i64, env, i32)
DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, i64)
DEF_HELPER_FLAGS_1(clz_x86, TCG_CALL_NO_RWG_SE, tl, tl)

View file

@ -7735,6 +7735,36 @@ case 0x101:
gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0);
break;
case 0xd0: /* xgetbv */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
|| (s->prefix & (PREFIX_LOCK | PREFIX_DATA
| PREFIX_REPZ | PREFIX_REPNZ))) {
goto illegal_op;
}
tcg_gen_trunc_tl_i32(tcg_ctx, cpu_tmp2_i32, *cpu_regs[R_ECX]);
gen_helper_xgetbv(tcg_ctx, cpu_tmp1_i64, cpu_env, cpu_tmp2_i32);
tcg_gen_extr_i64_tl(tcg_ctx, *cpu_regs[R_EAX], *cpu_regs[R_EDX], cpu_tmp1_i64);
break;
case 0xd1: /* xsetbv */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
|| (s->prefix & (PREFIX_LOCK | PREFIX_DATA
| PREFIX_REPZ | PREFIX_REPNZ))) {
goto illegal_op;
}
if (s->cpl != 0) {
gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
break;
}
tcg_gen_concat_tl_i64(tcg_ctx, cpu_tmp1_i64, *cpu_regs[R_EAX],
*cpu_regs[R_EDX]);
tcg_gen_trunc_tl_i32(tcg_ctx, cpu_tmp2_i32, *cpu_regs[R_ECX]);
gen_helper_xsetbv(tcg_ctx, cpu_env, cpu_tmp2_i32, cpu_tmp1_i64);
/* End TB because translation flags may change. */
gen_jmp_im(s, s->pc - pc_start);
gen_eob(s);
break;
case 0xd8: /* VMRUN */
if (!(s->flags & HF_SVME_MASK) || !s->pe) {
goto illegal_op;
@ -8243,6 +8273,30 @@ case 0x101:
gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
break;
CASE_MEM_OP(4): /* xsave */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
|| (prefixes & (PREFIX_LOCK | PREFIX_DATA
| PREFIX_REPZ | PREFIX_REPNZ))) {
goto illegal_op;
}
gen_lea_modrm(env, s, modrm);
tcg_gen_concat_tl_i64(tcg_ctx, cpu_tmp1_i64, *cpu_regs[R_EAX],
*cpu_regs[R_EDX]);
gen_helper_xsave(tcg_ctx, cpu_env, cpu_A0, cpu_tmp1_i64);
break;
CASE_MEM_OP(5): /* xrstor */
if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
|| (prefixes & (PREFIX_LOCK | PREFIX_DATA
| PREFIX_REPZ | PREFIX_REPNZ))) {
goto illegal_op;
}
gen_lea_modrm(env, s, modrm);
tcg_gen_concat_tl_i64(tcg_ctx, cpu_tmp1_i64, *cpu_regs[R_EAX],
*cpu_regs[R_EDX]);
gen_helper_xrstor(tcg_ctx, cpu_env, cpu_A0, cpu_tmp1_i64);
break;
CASE_MEM_OP(6): /* clwb */
if (prefixes & PREFIX_LOCK) {
goto illegal_op;