diff --git a/qemu/target-i386/cpu.c b/qemu/target-i386/cpu.c index 51ad9c99..7c0a54bb 100644 --- a/qemu/target-i386/cpu.c +++ b/qemu/target-i386/cpu.c @@ -297,14 +297,14 @@ static const char *cpuid_6_feature_name[] = { #define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \ CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \ CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \ + CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */ \ CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR) /* missing: CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA, - CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE, - CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C, - CPUID_EXT_RDRAND */ + CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AVX, + CPUID_EXT_F16C, CPUID_EXT_RDRAND */ #ifdef TARGET_X86_64 #define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM) @@ -2210,10 +2210,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ebx = (cpu->apic_id << 24) | 8 << 8; /* CLFLUSH size in quad words, Linux wants it. */ *ecx = env->features[FEAT_1_ECX]; + if ((*ecx & CPUID_EXT_XSAVE) && (env->cr[4] & CR4_OSXSAVE_MASK)) { + *ecx |= CPUID_EXT_OSXSAVE; + } *edx = env->features[FEAT_1_EDX]; if (cs->nr_cores * cs->nr_threads > 1) { *ebx |= (cs->nr_cores * cs->nr_threads) << 16; - *edx |= 1 << 28; /* HTT bit */ + *edx |= CPUID_HT; } break; case 2: @@ -2321,12 +2324,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0xA: /* Architectural Performance Monitoring Leaf */ - *eax = 0; - *ebx = 0; - *ecx = 0; - *edx = 0; + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; break; case 0xD: { + /* Processor Extended State */ + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; break; } case 0x80000000: diff --git a/qemu/target-i386/fpu_helper.c b/qemu/target-i386/fpu_helper.c index 13a31008..d7f34776 100644 --- a/qemu/target-i386/fpu_helper.c +++ b/qemu/target-i386/fpu_helper.c @@ -1219,6 +1219,45 @@ void helper_fxsave(CPUX86State *env, target_ulong ptr) } } +static uint64_t get_xinuse(CPUX86State *env) +{ + /* We don't track XINUSE. We could calculate it here, but it's + probably less work to simply indicate all components in use. */ + return -1; +} + +void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +{ + uintptr_t ra = GETPC(); + uint64_t old_bv, new_bv; + + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, ra); + } + + /* The operand must be 64 byte aligned. */ + if (ptr & 63) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + /* Never save anything not enabled by XCR0. */ + rfbm &= env->xcr0; + + if (rfbm & XSTATE_FP) { + do_xsave_fpu(env, ptr, ra); + } + if (rfbm & XSTATE_SSE) { + do_xsave_mxcsr(env, ptr, ra); + do_xsave_sse(env, ptr, ra); + } + + /* Update the XSTATE_BV field. */ + old_bv = cpu_ldq_data_ra(env, ptr + 512, ra); + new_bv = (old_bv & ~rfbm) | (get_xinuse(env) & rfbm); + cpu_stq_data_ra(env, ptr + 512, new_bv, ra); +} + static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) { int i, fpus, fptag; @@ -1288,6 +1327,112 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr) } } +void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +{ + uintptr_t ra = GETPC(); + uint64_t xstate_bv, xcomp_bv0, xcomp_bv1; + + rfbm &= env->xcr0; + + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, ra); + } + + /* The operand must be 64 byte aligned. */ + if (ptr & 63) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra); + + if ((int64_t)xstate_bv < 0) { + /* FIXME: Compact form. */ + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + /* Standard form. */ + + /* The XSTATE field must not set bits not present in XCR0. */ + if (xstate_bv & ~env->xcr0) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + /* The XCOMP field must be zero. */ + xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra); + xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra); + if (xcomp_bv0 || xcomp_bv1) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + if (rfbm & XSTATE_FP) { + if (xstate_bv & XSTATE_FP) { + do_xrstor_fpu(env, ptr, ra); + } else { + helper_fninit(env); + memset(env->fpregs, 0, sizeof(env->fpregs)); + } + } + if (rfbm & XSTATE_SSE) { + /* Note that the standard form of XRSTOR loads MXCSR from memory + whether or not the XSTATE_BV bit is set. */ + do_xrstor_mxcsr(env, ptr, ra); + if (xstate_bv & XSTATE_SSE) { + do_xrstor_sse(env, ptr, ra); + } else { + /* ??? When AVX is implemented, we may have to be more + selective in the clearing. */ + memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); + } + } +} + +uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) +{ + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, GETPC()); + } + + switch (ecx) { + case 0: + return env->xcr0; + case 1: + /* FIXME: #GP if !CPUID.(EAX=0DH,ECX=1):EAX.XG1[bit 2]. */ + return env->xcr0 & get_xinuse(env); + } + raise_exception_ra(env, EXCP0D_GPF, GETPC()); +} + +void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) +{ + uint32_t dummy, ena_lo, ena_hi; + uint64_t ena; + + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, GETPC()); + } + + /* Only XCR0 is defined at present; the FPU may not be disabled. */ + if (ecx != 0 || (mask & XSTATE_FP) == 0) { + goto do_gpf; + } + + /* Disallow enabling unimplemented features. */ + cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); + ena = ((uint64_t)ena_hi << 32) | ena_lo; + if (mask & ~ena) { + goto do_gpf; + } + + env->xcr0 = mask; + return; + + do_gpf: + raise_exception_ra(env, EXCP0D_GPF, GETPC()); +} + void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f) { CPU_LDoubleU temp; diff --git a/qemu/target-i386/helper.c b/qemu/target-i386/helper.c index 584ae2a8..8ad69e35 100644 --- a/qemu/target-i386/helper.c +++ b/qemu/target-i386/helper.c @@ -460,6 +460,7 @@ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3) void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) { X86CPU *cpu = x86_env_get_cpu(env); + uint32_t hflags; #if defined(DEBUG_MMU) printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]); @@ -469,24 +470,27 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) CR4_SMEP_MASK | CR4_SMAP_MASK)) { tlb_flush(CPU(cpu), 1); } + + /* Clear bits we're going to recompute. */ + hflags = env->hflags & ~(HF_OSFXSR_MASK | HF_SMAP_MASK); + /* SSE handling */ if (!(env->features[FEAT_1_EDX] & CPUID_SSE)) { new_cr4 &= ~CR4_OSFXSR_MASK; } - env->hflags &= ~HF_OSFXSR_MASK; if (new_cr4 & CR4_OSFXSR_MASK) { - env->hflags |= HF_OSFXSR_MASK; + hflags |= HF_OSFXSR_MASK; } if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SMAP)) { new_cr4 &= ~CR4_SMAP_MASK; } - env->hflags &= ~HF_SMAP_MASK; if (new_cr4 & CR4_SMAP_MASK) { - env->hflags |= HF_SMAP_MASK; + hflags |= HF_SMAP_MASK; } env->cr[4] = new_cr4; + env->hflags = hflags; } #if defined(CONFIG_USER_ONLY) diff --git a/qemu/target-i386/helper.h b/qemu/target-i386/helper.h index 644837fb..72aa35d0 100644 --- a/qemu/target-i386/helper.h +++ b/qemu/target-i386/helper.h @@ -189,6 +189,10 @@ DEF_HELPER_3(fsave, void, env, tl, int) DEF_HELPER_3(frstor, void, env, tl, int) DEF_HELPER_FLAGS_2(fxsave, TCG_CALL_NO_WG, void, env, tl) DEF_HELPER_FLAGS_2(fxrstor, TCG_CALL_NO_WG, void, env, tl) +DEF_HELPER_FLAGS_3(xsave, TCG_CALL_NO_WG, void, env, tl, i64) +DEF_HELPER_FLAGS_3(xrstor, TCG_CALL_NO_WG, void, env, tl, i64) +DEF_HELPER_FLAGS_2(xgetbv, TCG_CALL_NO_WG, i64, env, i32) +DEF_HELPER_FLAGS_3(xsetbv, TCG_CALL_NO_WG, void, env, i32, i64) DEF_HELPER_FLAGS_1(clz_x86, TCG_CALL_NO_RWG_SE, tl, tl) diff --git a/qemu/target-i386/translate.c b/qemu/target-i386/translate.c index 2c60687e..68634d0b 100644 --- a/qemu/target-i386/translate.c +++ b/qemu/target-i386/translate.c @@ -7735,6 +7735,36 @@ case 0x101: gen_op_st_v(s, CODE64(s) + MO_32, cpu_T0, cpu_A0); break; + case 0xd0: /* xgetbv */ + if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 + || (s->prefix & (PREFIX_LOCK | PREFIX_DATA + | PREFIX_REPZ | PREFIX_REPNZ))) { + goto illegal_op; + } + tcg_gen_trunc_tl_i32(tcg_ctx, cpu_tmp2_i32, *cpu_regs[R_ECX]); + gen_helper_xgetbv(tcg_ctx, cpu_tmp1_i64, cpu_env, cpu_tmp2_i32); + tcg_gen_extr_i64_tl(tcg_ctx, *cpu_regs[R_EAX], *cpu_regs[R_EDX], cpu_tmp1_i64); + break; + + case 0xd1: /* xsetbv */ + if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 + || (s->prefix & (PREFIX_LOCK | PREFIX_DATA + | PREFIX_REPZ | PREFIX_REPNZ))) { + goto illegal_op; + } + if (s->cpl != 0) { + gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + break; + } + tcg_gen_concat_tl_i64(tcg_ctx, cpu_tmp1_i64, *cpu_regs[R_EAX], + *cpu_regs[R_EDX]); + tcg_gen_trunc_tl_i32(tcg_ctx, cpu_tmp2_i32, *cpu_regs[R_ECX]); + gen_helper_xsetbv(tcg_ctx, cpu_env, cpu_tmp2_i32, cpu_tmp1_i64); + /* End TB because translation flags may change. */ + gen_jmp_im(s, s->pc - pc_start); + gen_eob(s); + break; + case 0xd8: /* VMRUN */ if (!(s->flags & HF_SVME_MASK) || !s->pe) { goto illegal_op; @@ -8243,6 +8273,30 @@ case 0x101: gen_op_st_v(s, MO_32, cpu_T0, cpu_A0); break; + CASE_MEM_OP(4): /* xsave */ + if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 + || (prefixes & (PREFIX_LOCK | PREFIX_DATA + | PREFIX_REPZ | PREFIX_REPNZ))) { + goto illegal_op; + } + gen_lea_modrm(env, s, modrm); + tcg_gen_concat_tl_i64(tcg_ctx, cpu_tmp1_i64, *cpu_regs[R_EAX], + *cpu_regs[R_EDX]); + gen_helper_xsave(tcg_ctx, cpu_env, cpu_A0, cpu_tmp1_i64); + break; + + CASE_MEM_OP(5): /* xrstor */ + if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 + || (prefixes & (PREFIX_LOCK | PREFIX_DATA + | PREFIX_REPZ | PREFIX_REPNZ))) { + goto illegal_op; + } + gen_lea_modrm(env, s, modrm); + tcg_gen_concat_tl_i64(tcg_ctx, cpu_tmp1_i64, *cpu_regs[R_EAX], + *cpu_regs[R_EDX]); + gen_helper_xrstor(tcg_ctx, cpu_env, cpu_A0, cpu_tmp1_i64); + break; + CASE_MEM_OP(6): /* clwb */ if (prefixes & PREFIX_LOCK) { goto illegal_op;