tcg/i386: Adjust TCG_TARGET_HAS_MEMORY_BSWAP

Always true when movbe is available, otherwise leave
this to generic code.

Backports d2ef1b83a7a2047e0e36d7b62b3a5d151ab958f5
This commit is contained in:
Richard Henderson 2021-03-03 19:38:26 -05:00 committed by Lioncash
parent 1a3abaa81a
commit a90476c897
18 changed files with 63 additions and 75 deletions

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_aarch64 #define have_bmi1 have_bmi1_aarch64
#define have_bmi2 have_bmi2_aarch64 #define have_bmi2 have_bmi2_aarch64
#define have_popcnt have_popcnt_aarch64 #define have_popcnt have_popcnt_aarch64
#define have_movbe have_movbe_aarch64
#define hcr_write hcr_write_aarch64 #define hcr_write hcr_write_aarch64
#define helper_access_check_cp_reg helper_access_check_cp_reg_aarch64 #define helper_access_check_cp_reg helper_access_check_cp_reg_aarch64
#define helper_add_saturate helper_add_saturate_aarch64 #define helper_add_saturate helper_add_saturate_aarch64

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_aarch64eb #define have_bmi1 have_bmi1_aarch64eb
#define have_bmi2 have_bmi2_aarch64eb #define have_bmi2 have_bmi2_aarch64eb
#define have_popcnt have_popcnt_aarch64eb #define have_popcnt have_popcnt_aarch64eb
#define have_movbe have_movbe_aarch64eb
#define hcr_write hcr_write_aarch64eb #define hcr_write hcr_write_aarch64eb
#define helper_access_check_cp_reg helper_access_check_cp_reg_aarch64eb #define helper_access_check_cp_reg helper_access_check_cp_reg_aarch64eb
#define helper_add_saturate helper_add_saturate_aarch64eb #define helper_add_saturate helper_add_saturate_aarch64eb

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_arm #define have_bmi1 have_bmi1_arm
#define have_bmi2 have_bmi2_arm #define have_bmi2 have_bmi2_arm
#define have_popcnt have_popcnt_arm #define have_popcnt have_popcnt_arm
#define have_movbe have_movbe_arm
#define hcr_write hcr_write_arm #define hcr_write hcr_write_arm
#define helper_access_check_cp_reg helper_access_check_cp_reg_arm #define helper_access_check_cp_reg helper_access_check_cp_reg_arm
#define helper_add_saturate helper_add_saturate_arm #define helper_add_saturate helper_add_saturate_arm

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_armeb #define have_bmi1 have_bmi1_armeb
#define have_bmi2 have_bmi2_armeb #define have_bmi2 have_bmi2_armeb
#define have_popcnt have_popcnt_armeb #define have_popcnt have_popcnt_armeb
#define have_movbe have_movbe_armeb
#define hcr_write hcr_write_armeb #define hcr_write hcr_write_armeb
#define helper_access_check_cp_reg helper_access_check_cp_reg_armeb #define helper_access_check_cp_reg helper_access_check_cp_reg_armeb
#define helper_add_saturate helper_add_saturate_armeb #define helper_add_saturate helper_add_saturate_armeb

View file

@ -890,6 +890,7 @@ symbols = (
'have_bmi1', 'have_bmi1',
'have_bmi2', 'have_bmi2',
'have_popcnt', 'have_popcnt',
'have_movbe',
'hcr_write', 'hcr_write',
'helper_access_check_cp_reg', 'helper_access_check_cp_reg',
'helper_add_saturate', 'helper_add_saturate',

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_m68k #define have_bmi1 have_bmi1_m68k
#define have_bmi2 have_bmi2_m68k #define have_bmi2 have_bmi2_m68k
#define have_popcnt have_popcnt_m68k #define have_popcnt have_popcnt_m68k
#define have_movbe have_movbe_m68k
#define hcr_write hcr_write_m68k #define hcr_write hcr_write_m68k
#define helper_access_check_cp_reg helper_access_check_cp_reg_m68k #define helper_access_check_cp_reg helper_access_check_cp_reg_m68k
#define helper_add_saturate helper_add_saturate_m68k #define helper_add_saturate helper_add_saturate_m68k

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_mips #define have_bmi1 have_bmi1_mips
#define have_bmi2 have_bmi2_mips #define have_bmi2 have_bmi2_mips
#define have_popcnt have_popcnt_mips #define have_popcnt have_popcnt_mips
#define have_movbe have_movbe_mips
#define hcr_write hcr_write_mips #define hcr_write hcr_write_mips
#define helper_access_check_cp_reg helper_access_check_cp_reg_mips #define helper_access_check_cp_reg helper_access_check_cp_reg_mips
#define helper_add_saturate helper_add_saturate_mips #define helper_add_saturate helper_add_saturate_mips

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_mips64 #define have_bmi1 have_bmi1_mips64
#define have_bmi2 have_bmi2_mips64 #define have_bmi2 have_bmi2_mips64
#define have_popcnt have_popcnt_mips64 #define have_popcnt have_popcnt_mips64
#define have_movbe have_movbe_mips64
#define hcr_write hcr_write_mips64 #define hcr_write hcr_write_mips64
#define helper_access_check_cp_reg helper_access_check_cp_reg_mips64 #define helper_access_check_cp_reg helper_access_check_cp_reg_mips64
#define helper_add_saturate helper_add_saturate_mips64 #define helper_add_saturate helper_add_saturate_mips64

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_mips64el #define have_bmi1 have_bmi1_mips64el
#define have_bmi2 have_bmi2_mips64el #define have_bmi2 have_bmi2_mips64el
#define have_popcnt have_popcnt_mips64el #define have_popcnt have_popcnt_mips64el
#define have_movbe have_movbe_mips64el
#define hcr_write hcr_write_mips64el #define hcr_write hcr_write_mips64el
#define helper_access_check_cp_reg helper_access_check_cp_reg_mips64el #define helper_access_check_cp_reg helper_access_check_cp_reg_mips64el
#define helper_add_saturate helper_add_saturate_mips64el #define helper_add_saturate helper_add_saturate_mips64el

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_mipsel #define have_bmi1 have_bmi1_mipsel
#define have_bmi2 have_bmi2_mipsel #define have_bmi2 have_bmi2_mipsel
#define have_popcnt have_popcnt_mipsel #define have_popcnt have_popcnt_mipsel
#define have_movbe have_movbe_mipsel
#define hcr_write hcr_write_mipsel #define hcr_write hcr_write_mipsel
#define helper_access_check_cp_reg helper_access_check_cp_reg_mipsel #define helper_access_check_cp_reg helper_access_check_cp_reg_mipsel
#define helper_add_saturate helper_add_saturate_mipsel #define helper_add_saturate helper_add_saturate_mipsel

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_powerpc #define have_bmi1 have_bmi1_powerpc
#define have_bmi2 have_bmi2_powerpc #define have_bmi2 have_bmi2_powerpc
#define have_popcnt have_popcnt_powerpc #define have_popcnt have_popcnt_powerpc
#define have_movbe have_movbe_powerpc
#define hcr_write hcr_write_powerpc #define hcr_write hcr_write_powerpc
#define helper_access_check_cp_reg helper_access_check_cp_reg_powerpc #define helper_access_check_cp_reg helper_access_check_cp_reg_powerpc
#define helper_add_saturate helper_add_saturate_powerpc #define helper_add_saturate helper_add_saturate_powerpc

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_riscv32 #define have_bmi1 have_bmi1_riscv32
#define have_bmi2 have_bmi2_riscv32 #define have_bmi2 have_bmi2_riscv32
#define have_popcnt have_popcnt_riscv32 #define have_popcnt have_popcnt_riscv32
#define have_movbe have_movbe_riscv32
#define hcr_write hcr_write_riscv32 #define hcr_write hcr_write_riscv32
#define helper_access_check_cp_reg helper_access_check_cp_reg_riscv32 #define helper_access_check_cp_reg helper_access_check_cp_reg_riscv32
#define helper_add_saturate helper_add_saturate_riscv32 #define helper_add_saturate helper_add_saturate_riscv32

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_riscv64 #define have_bmi1 have_bmi1_riscv64
#define have_bmi2 have_bmi2_riscv64 #define have_bmi2 have_bmi2_riscv64
#define have_popcnt have_popcnt_riscv64 #define have_popcnt have_popcnt_riscv64
#define have_movbe have_movbe_riscv64
#define hcr_write hcr_write_riscv64 #define hcr_write hcr_write_riscv64
#define helper_access_check_cp_reg helper_access_check_cp_reg_riscv64 #define helper_access_check_cp_reg helper_access_check_cp_reg_riscv64
#define helper_add_saturate helper_add_saturate_riscv64 #define helper_add_saturate helper_add_saturate_riscv64

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_sparc #define have_bmi1 have_bmi1_sparc
#define have_bmi2 have_bmi2_sparc #define have_bmi2 have_bmi2_sparc
#define have_popcnt have_popcnt_sparc #define have_popcnt have_popcnt_sparc
#define have_movbe have_movbe_sparc
#define hcr_write hcr_write_sparc #define hcr_write hcr_write_sparc
#define helper_access_check_cp_reg helper_access_check_cp_reg_sparc #define helper_access_check_cp_reg helper_access_check_cp_reg_sparc
#define helper_add_saturate helper_add_saturate_sparc #define helper_add_saturate helper_add_saturate_sparc

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_sparc64 #define have_bmi1 have_bmi1_sparc64
#define have_bmi2 have_bmi2_sparc64 #define have_bmi2 have_bmi2_sparc64
#define have_popcnt have_popcnt_sparc64 #define have_popcnt have_popcnt_sparc64
#define have_movbe have_movbe_sparc64
#define hcr_write hcr_write_sparc64 #define hcr_write hcr_write_sparc64
#define helper_access_check_cp_reg helper_access_check_cp_reg_sparc64 #define helper_access_check_cp_reg helper_access_check_cp_reg_sparc64
#define helper_add_saturate helper_add_saturate_sparc64 #define helper_add_saturate helper_add_saturate_sparc64

View file

@ -101,6 +101,7 @@ extern bool have_bmi1;
extern bool have_popcnt; extern bool have_popcnt;
extern bool have_avx1; extern bool have_avx1;
extern bool have_avx2; extern bool have_avx2;
extern bool have_movbe;
// UNICORN FIXME: // UNICORN FIXME:
// Taken from cpuid.h in mainline qemu. // Taken from cpuid.h in mainline qemu.
@ -262,7 +263,7 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1 #define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe
#ifdef CONFIG_SOFTMMU #ifdef CONFIG_SOFTMMU
#define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_LDST_LABELS

View file

@ -163,13 +163,12 @@ bool have_bmi1;
bool have_popcnt; bool have_popcnt;
bool have_avx1; bool have_avx1;
bool have_avx2; bool have_avx2;
bool have_movbe;
#ifdef CONFIG_CPUID_H #ifdef CONFIG_CPUID_H
static bool have_movbe;
static bool have_bmi2; static bool have_bmi2;
static bool have_lzcnt; static bool have_lzcnt;
#else #else
# define have_movbe 0
# define have_bmi2 0 # define have_bmi2 0
# define have_lzcnt 0 # define have_lzcnt 0
#endif #endif
@ -1954,13 +1953,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, int index, intptr_t ofs, TCGReg base, int index, intptr_t ofs,
int seg, bool is64, MemOp memop) int seg, bool is64, MemOp memop)
{ {
const MemOp real_bswap = memop & MO_BSWAP; bool use_movbe = false;
MemOp bswap = real_bswap;
int rexw = is64 * P_REXW; int rexw = is64 * P_REXW;
int movop = OPC_MOVL_GvEv; int movop = OPC_MOVL_GvEv;
if (s->have_movbe && real_bswap) { /* Do big-endian loads with movbe. */
bswap = 0; if (memop & MO_BSWAP) {
tcg_debug_assert(have_movbe);
use_movbe = true;
movop = OPC_MOVBE_GyMy; movop = OPC_MOVBE_GyMy;
} }
@ -1974,23 +1974,28 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
base, index, 0, ofs); base, index, 0, ofs);
break; break;
case MO_UW: case MO_UW:
tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, if (use_movbe) {
base, index, 0, ofs); /* There is no extending movbe; only low 16-bits are modified. */
if (real_bswap) { if (datalo != base && datalo != index) {
tcg_out_rolw_8(s, datalo); /* XOR breaks dependency chains. */
} tgen_arithr(s, ARITH_XOR, datalo, datalo);
break;
case MO_SW:
if (real_bswap) {
if (s->have_movbe) {
tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
datalo, base, index, 0, ofs); datalo, base, index, 0, ofs);
} else { } else {
tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
base, index, 0, ofs); datalo, base, index, 0, ofs);
tcg_out_rolw_8(s, datalo); tcg_out_ext16u(s, datalo, datalo);
} }
tcg_out_modrm(s, OPC_MOVSWL + rexw, datalo, datalo); } else {
tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
base, index, 0, ofs);
}
break;
case MO_SW:
if (use_movbe) {
tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
datalo, base, index, 0, ofs);
tcg_out_ext16s(s, datalo, datalo, rexw);
} else { } else {
tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg, tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg,
datalo, base, index, 0, ofs); datalo, base, index, 0, ofs);
@ -1998,18 +2003,12 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
break; break;
case MO_UL: case MO_UL:
tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
if (bswap) {
tcg_out_bswap32(s, datalo);
}
break; break;
#if TCG_TARGET_REG_BITS == 64 #if TCG_TARGET_REG_BITS == 64
case MO_SL: case MO_SL:
if (real_bswap) { if (use_movbe) {
tcg_out_modrm_sib_offset(s, movop + seg, datalo, tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + seg, datalo,
base, index, 0, ofs); base, index, 0, ofs);
if (bswap) {
tcg_out_bswap32(s, datalo);
}
tcg_out_ext32s(s, datalo, datalo); tcg_out_ext32s(s, datalo, datalo);
} else { } else {
tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo, tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
@ -2021,12 +2020,9 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
if (TCG_TARGET_REG_BITS == 64) { if (TCG_TARGET_REG_BITS == 64) {
tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
base, index, 0, ofs); base, index, 0, ofs);
if (bswap) {
tcg_out_bswap64(s, datalo);
}
} else { } else {
if (real_bswap) { if (use_movbe) {
int t = datalo; TCGReg t = datalo;
datalo = datahi; datalo = datahi;
datahi = t; datahi = t;
} }
@ -2041,14 +2037,10 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
tcg_out_modrm_sib_offset(s, movop + seg, datalo, tcg_out_modrm_sib_offset(s, movop + seg, datalo,
base, index, 0, ofs); base, index, 0, ofs);
} }
if (bswap) {
tcg_out_bswap32(s, datalo);
tcg_out_bswap32(s, datahi);
}
} }
break; break;
default: default:
tcg_abort(); g_assert_not_reached();
} }
} }
@ -2114,24 +2106,27 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, int index, intptr_t ofs, TCGReg base, int index, intptr_t ofs,
int seg, MemOp memop) int seg, MemOp memop)
{ {
/* ??? Ideally we wouldn't need a scratch register. For user-only,
we could perform the bswap twice to restore the original value
instead of moving to the scratch. But as it is, the L constraint
means that TCG_REG_L0 is definitely free here. */
const TCGReg scratch = TCG_REG_L0; const TCGReg scratch = TCG_REG_L0;
const MemOp real_bswap = memop & MO_BSWAP; bool use_movbe = false;
MemOp bswap = real_bswap;
int movop = OPC_MOVL_EvGv; int movop = OPC_MOVL_EvGv;
if (s->have_movbe && real_bswap) { /*
bswap = 0; * Do big-endian stores with movbe or softmmu.
* User-only without movbe will have its swapping done generically.
*/
if (memop & MO_BSWAP) {
tcg_debug_assert(have_movbe);
use_movbe = true;
movop = OPC_MOVBE_MyGy; movop = OPC_MOVBE_MyGy;
} }
switch (memop & MO_SIZE) { switch (memop & MO_SIZE) {
case MO_8: case MO_8:
/* In 32-bit mode, 8-bit stores can only happen from [abcd]x. /*
Use the scratch register if necessary. */ * In 32-bit mode, 8-bit stores can only happen from [abcd]x.
* TODO: Adjust constraints such that this is is forced,
* then we won't need a scratch at all for user-only.
*/
if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) { if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
datalo = scratch; datalo = scratch;
@ -2140,43 +2135,19 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
datalo, base, index, 0, ofs); datalo, base, index, 0, ofs);
break; break;
case MO_16: case MO_16:
if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_rolw_8(s, scratch);
datalo = scratch;
}
tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo, tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo,
base, index, 0, ofs); base, index, 0, ofs);
break; break;
case MO_32: case MO_32:
if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_bswap32(s, scratch);
datalo = scratch;
}
tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs); tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
break; break;
case MO_64: case MO_64:
if (TCG_TARGET_REG_BITS == 64) { if (TCG_TARGET_REG_BITS == 64) {
if (bswap) {
tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
tcg_out_bswap64(s, scratch);
datalo = scratch;
}
tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
base, index, 0, ofs); base, index, 0, ofs);
} else if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
tcg_out_bswap32(s, scratch);
tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
base, index, 0, ofs);
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_bswap32(s, scratch);
tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
base, index, 0, ofs + 4);
} else { } else {
if (real_bswap) { if (use_movbe) {
int t = datalo; TCGReg t = datalo;
datalo = datahi; datalo = datahi;
datahi = t; datahi = t;
} }
@ -2187,7 +2158,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
} }
break; break;
default: default:
tcg_abort(); g_assert_not_reached();
} }
} }

View file

@ -884,6 +884,7 @@
#define have_bmi1 have_bmi1_x86_64 #define have_bmi1 have_bmi1_x86_64
#define have_bmi2 have_bmi2_x86_64 #define have_bmi2 have_bmi2_x86_64
#define have_popcnt have_popcnt_x86_64 #define have_popcnt have_popcnt_x86_64
#define have_movbe have_movbe_x86_64
#define hcr_write hcr_write_x86_64 #define hcr_write hcr_write_x86_64
#define helper_access_check_cp_reg helper_access_check_cp_reg_x86_64 #define helper_access_check_cp_reg helper_access_check_cp_reg_x86_64
#define helper_add_saturate helper_add_saturate_x86_64 #define helper_add_saturate helper_add_saturate_x86_64