mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-01-10 21:15:35 +00:00
tcg/i386: Handle ctz and clz opcodes
Backports commit bbf25f90ba802a286fd72be9175a860ae5fec726 from qemu
This commit is contained in:
parent
73ab332185
commit
246d891668
|
@ -93,8 +93,8 @@ extern bool have_bmi1;
|
|||
#define TCG_TARGET_HAS_eqv_i32 0
|
||||
#define TCG_TARGET_HAS_nand_i32 0
|
||||
#define TCG_TARGET_HAS_nor_i32 0
|
||||
#define TCG_TARGET_HAS_clz_i32 0
|
||||
#define TCG_TARGET_HAS_ctz_i32 0
|
||||
#define TCG_TARGET_HAS_clz_i32 1
|
||||
#define TCG_TARGET_HAS_ctz_i32 1
|
||||
#define TCG_TARGET_HAS_deposit_i32 1
|
||||
#define TCG_TARGET_HAS_extract_i32 1
|
||||
#define TCG_TARGET_HAS_sextract_i32 1
|
||||
|
@ -127,8 +127,8 @@ extern bool have_bmi1;
|
|||
#define TCG_TARGET_HAS_eqv_i64 0
|
||||
#define TCG_TARGET_HAS_nand_i64 0
|
||||
#define TCG_TARGET_HAS_nor_i64 0
|
||||
#define TCG_TARGET_HAS_clz_i64 0
|
||||
#define TCG_TARGET_HAS_ctz_i64 0
|
||||
#define TCG_TARGET_HAS_clz_i64 1
|
||||
#define TCG_TARGET_HAS_ctz_i64 1
|
||||
#define TCG_TARGET_HAS_deposit_i64 1
|
||||
#define TCG_TARGET_HAS_extract_i64 1
|
||||
#define TCG_TARGET_HAS_sextract_i64 0
|
||||
|
|
|
@ -98,6 +98,7 @@ static const int tcg_target_call_oarg_regs[] = {
|
|||
#define TCG_CT_CONST_S32 0x100
|
||||
#define TCG_CT_CONST_U32 0x200
|
||||
#define TCG_CT_CONST_I32 0x400
|
||||
#define TCG_CT_CONST_WSZ 0x800
|
||||
|
||||
/* Registers used with L constraint, which are the first argument
|
||||
registers on x86_64, and two random call clobbered registers on
|
||||
|
@ -147,6 +148,11 @@ static bool have_bmi2;
|
|||
#else
|
||||
static bool have_bmi2 = 0;
|
||||
#endif
|
||||
#if defined(CONFIG_CPUID_H) && defined(bit_LZCNT)
|
||||
static bool have_lzcnt;
|
||||
#else
|
||||
# define have_lzcnt 0
|
||||
#endif
|
||||
|
||||
static void patch_reloc(tcg_insn_unit *code_ptr, int type,
|
||||
intptr_t value, intptr_t addend)
|
||||
|
@ -221,6 +227,10 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
|
|||
tcg_regset_set32(ct->u.regs, 0, 0xff);
|
||||
}
|
||||
break;
|
||||
case 'W':
|
||||
/* With TZCNT/LZCNT, we can have operand-size as an input. */
|
||||
ct->ct |= TCG_CT_CONST_WSZ;
|
||||
break;
|
||||
|
||||
/* qemu_ld/st address constraint */
|
||||
case 'L':
|
||||
|
@ -267,6 +277,9 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
|
|||
if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
|
||||
return 1;
|
||||
}
|
||||
if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -300,6 +313,8 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
|
|||
#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
|
||||
#define OPC_ANDN (0xf2 | P_EXT38)
|
||||
#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
|
||||
#define OPC_BSF (0xbc | P_EXT)
|
||||
#define OPC_BSR (0xbd | P_EXT)
|
||||
#define OPC_BSWAP (0xc8 | P_EXT)
|
||||
#define OPC_CALL_Jz (0xe8)
|
||||
#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
|
||||
|
@ -314,6 +329,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
|
|||
#define OPC_JMP_long (0xe9)
|
||||
#define OPC_JMP_short (0xeb)
|
||||
#define OPC_LEA (0x8d)
|
||||
#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
|
||||
#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
|
||||
#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
|
||||
#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
|
||||
|
@ -340,6 +356,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
|
|||
#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
|
||||
#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
|
||||
#define OPC_TESTL (0x85)
|
||||
#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
|
||||
#define OPC_XCHG_ax_r32 (0x90)
|
||||
|
||||
#define OPC_GRP3_Ev (0xf7)
|
||||
|
@ -444,6 +461,11 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
|
|||
if (opc & P_ADDR32) {
|
||||
tcg_out8(s, 0x67);
|
||||
}
|
||||
if (opc & P_SIMDF3) {
|
||||
tcg_out8(s, 0xf3);
|
||||
} else if (opc & P_SIMDF2) {
|
||||
tcg_out8(s, 0xf2);
|
||||
}
|
||||
|
||||
rex = 0;
|
||||
rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
|
||||
|
@ -478,6 +500,11 @@ static void tcg_out_opc(TCGContext *s, int opc)
|
|||
if (opc & P_DATA16) {
|
||||
tcg_out8(s, 0x66);
|
||||
}
|
||||
if (opc & P_SIMDF3) {
|
||||
tcg_out8(s, 0xf3);
|
||||
} else if (opc & P_SIMDF2) {
|
||||
tcg_out8(s, 0xf2);
|
||||
}
|
||||
if (opc & (P_EXT | P_EXT38)) {
|
||||
tcg_out8(s, 0x0f);
|
||||
if (opc & P_EXT38) {
|
||||
|
@ -1106,13 +1133,11 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
|
|||
}
|
||||
#endif
|
||||
|
||||
static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
|
||||
TCGArg c1, TCGArg c2, int const_c2,
|
||||
TCGArg v1)
|
||||
static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
|
||||
TCGReg dest, TCGReg v1)
|
||||
{
|
||||
tcg_out_cmp(s, c1, c2, const_c2, 0);
|
||||
if (have_cmov) {
|
||||
tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
|
||||
tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
|
||||
} else {
|
||||
TCGLabel *over = gen_new_label(s);
|
||||
tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
|
||||
|
@ -1121,16 +1146,64 @@ static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
|
|||
}
|
||||
}
|
||||
|
||||
static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest,
|
||||
TCGReg c1, TCGArg c2, int const_c2,
|
||||
TCGReg v1)
|
||||
{
|
||||
tcg_out_cmp(s, c1, c2, const_c2, 0);
|
||||
tcg_out_cmov(s, cond, 0, dest, v1);
|
||||
}
|
||||
|
||||
#if TCG_TARGET_REG_BITS == 64
|
||||
static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
|
||||
TCGArg c1, TCGArg c2, int const_c2,
|
||||
TCGArg v1)
|
||||
static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest,
|
||||
TCGReg c1, TCGArg c2, int const_c2,
|
||||
TCGReg v1)
|
||||
{
|
||||
tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
|
||||
tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
|
||||
tcg_out_cmov(s, cond, P_REXW, dest, v1);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
|
||||
TCGArg arg2, bool const_a2)
|
||||
{
|
||||
if (const_a2) {
|
||||
tcg_debug_assert(have_bmi1);
|
||||
tcg_debug_assert(arg2 == (rexw ? 64 : 32));
|
||||
tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1);
|
||||
} else {
|
||||
tcg_debug_assert(dest != arg2);
|
||||
tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1);
|
||||
tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
|
||||
TCGArg arg2, bool const_a2)
|
||||
{
|
||||
if (have_lzcnt) {
|
||||
tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1);
|
||||
if (const_a2) {
|
||||
tcg_debug_assert(arg2 == (rexw ? 64 : 32));
|
||||
} else {
|
||||
tcg_debug_assert(dest != arg2);
|
||||
tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
|
||||
}
|
||||
} else {
|
||||
tcg_debug_assert(!const_a2);
|
||||
tcg_debug_assert(dest != arg1);
|
||||
tcg_debug_assert(dest != arg2);
|
||||
|
||||
/* Recall that the output of BSR is the index not the count. */
|
||||
tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1);
|
||||
tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0);
|
||||
|
||||
/* Since we have destroyed the flags from BSR, we have to re-test. */
|
||||
tcg_out_cmp(s, arg1, 0, 1, rexw);
|
||||
tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
|
||||
{
|
||||
intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
|
||||
|
@ -2096,6 +2169,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
}
|
||||
break;
|
||||
|
||||
OP_32_64(ctz):
|
||||
tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]);
|
||||
break;
|
||||
OP_32_64(clz):
|
||||
tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]);
|
||||
break;
|
||||
|
||||
case INDEX_op_brcond_i32:
|
||||
tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(s, args[3]), 0);
|
||||
break;
|
||||
|
@ -2451,6 +2531,25 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
|
|||
return &arith2;
|
||||
}
|
||||
|
||||
case INDEX_op_ctz_i32:
|
||||
case INDEX_op_ctz_i64:
|
||||
{
|
||||
static const TCGTargetOpDef ctz[2] = {
|
||||
{ 0, { "&r", "r", "r" } },
|
||||
{ 0, { "&r", "r", "rW" } },
|
||||
};
|
||||
return &ctz[have_bmi1];
|
||||
}
|
||||
case INDEX_op_clz_i32:
|
||||
case INDEX_op_clz_i64:
|
||||
{
|
||||
static const TCGTargetOpDef clz[2] = {
|
||||
{ 0, { "&r", "r", "r" } },
|
||||
{ 0, { "&r", "r", "rW" } },
|
||||
};
|
||||
return &clz[have_lzcnt];
|
||||
}
|
||||
|
||||
case INDEX_op_qemu_ld_i32:
|
||||
return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
|
||||
case INDEX_op_qemu_st_i32:
|
||||
|
@ -2618,6 +2717,16 @@ static void tcg_target_init(TCGContext *s)
|
|||
}
|
||||
#endif
|
||||
|
||||
// TODO: MSVC-compatible equivalent
|
||||
#ifndef have_lzcnt
|
||||
max = __get_cpuid_max(0x8000000, 0);
|
||||
if (max >= 1) {
|
||||
__cpuid(0x80000001, a, b, c, d);
|
||||
/* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */
|
||||
have_lzcnt = (c & bit_LZCNT) != 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
tcg_regset_set32(s->tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
|
||||
tcg_regset_set32(s->tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
|
||||
|
|
Loading…
Reference in a new issue