tcg/i386: use softmmu fast path for unaligned accesses

Softmmu unaligned load/stores currently goes through through the slow
path for two reasons:
  - to support unaligned access on host with strict alignement
  - to correctly handle accesses crossing pages

x86 is only concerned by the second reason. Unaligned accesses are
avoided by compilers, but are not uncommon. We therefore would like
to see them going through the fast path, if they don't cross pages.

For that we can use the fact that two adjacent TLB entries can't contain
the same page. Therefore accessing the TLB entry corresponding to the
first byte, but comparing its content to page address of the last byte
ensures that we don't cross pages. We can do this check without adding
more instructions in the TLB code (but increasing its length by one
byte) by using the LEA instruction to combine the existing move with the
size addition.

On an x86-64 host, this gives a 3% boot time improvement for a powerpc
guest and 4% for an x86-64 guest.

Backports commit 8cc580f6a0d8c0e2f590c1472cf5cd8e51761760 from qemu
This commit is contained in:
Aurelien Jarno 2018-02-15 10:30:28 -05:00 committed by Lioncash
parent ea2ee48d9c
commit 11cfddad05
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7

View file

@ -1274,7 +1274,7 @@ static void * const qemu_st_helpers[16] = {
First argument register is clobbered. */
static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
int mem_index, TCGMemOp s_bits,
int mem_index, TCGMemOp opc,
tcg_insn_unit **label_ptr, int which)
{
const TCGReg r0 = TCG_REG_L0;
@ -1282,6 +1282,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
TCGType ttype = TCG_TYPE_I32;
TCGType htype = TCG_TYPE_I32;
int trexw = 0, hrexw = 0;
int s_mask = (1 << (opc & MO_SIZE)) - 1;
bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0;
if (TCG_TARGET_REG_BITS == 64) {
if (TARGET_LONG_BITS == 64) {
@ -1295,13 +1297,19 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
}
tcg_out_mov(s, htype, r0, addrlo);
tcg_out_mov(s, ttype, r1, addrlo);
if (aligned) {
tcg_out_mov(s, ttype, r1, addrlo);
} else {
/* For unaligned access check that we don't cross pages using
the page address of the last byte. */
tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask);
}
tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tgen_arithi(s, ARITH_AND + trexw, r1,
TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0);
tgen_arithi(s, ARITH_AND + hrexw, r0,
(CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
@ -1649,7 +1657,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
TCGMemOp opc;
#if defined(CONFIG_SOFTMMU)
int mem_index;
TCGMemOp s_bits;
tcg_insn_unit *label_ptr[2];
#endif
@ -1662,9 +1669,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
#if defined(CONFIG_SOFTMMU)
mem_index = get_mmuidx(oi);
s_bits = opc & MO_SIZE;
tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
label_ptr, offsetof(CPUTLBEntry, addr_read));
/* TLB Hit. */
@ -1791,7 +1797,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
TCGMemOp opc;
#if defined(CONFIG_SOFTMMU)
int mem_index;
TCGMemOp s_bits;
tcg_insn_unit *label_ptr[2];
#endif
@ -1804,9 +1809,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
#if defined(CONFIG_SOFTMMU)
mem_index = get_mmuidx(oi);
s_bits = opc & MO_SIZE;
tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
label_ptr, offsetof(CPUTLBEntry, addr_write));
/* TLB Hit. */