mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-02-25 21:06:54 +00:00
tcg/arm: Improve tlb load for armv7
Use UBFX to avoid limitation on CPU_TLB_BITS. Since we're dropping the initial shift, we need to replace the page masking. We can use MOVW+BIC to do this without shifting. The result is the same size as the armv6 path with one less conditional instruction. Backports commit 647ab96aaf5defeb138e48d610f7f633c587b40d from qemu
This commit is contained in:
parent
b3fd6a8c8c
commit
e4d05c2567
|
@ -1182,18 +1182,33 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
|
||||||
unsigned s_bits = opc & MO_SIZE;
|
unsigned s_bits = opc & MO_SIZE;
|
||||||
unsigned a_bits = get_alignment_bits(opc);
|
unsigned a_bits = get_alignment_bits(opc);
|
||||||
|
|
||||||
/* Should generate something like the following:
|
/* V7 generates the following:
|
||||||
* shr tmp, addrlo, #TARGET_PAGE_BITS (1)
|
* ubfx r0, addrlo, #TARGET_PAGE_BITS, #CPU_TLB_BITS
|
||||||
|
* shr tmp, addrlo, #TARGET_PAGE_BITS
|
||||||
|
* add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS
|
||||||
|
* ldr r0, [r2, #cmp]
|
||||||
|
* ldr r2, [r2, #add]
|
||||||
|
* movw tmp, #page_align_mask
|
||||||
|
* bic tmp, addrlo, tmp
|
||||||
|
* cmp r0, tmp
|
||||||
|
*
|
||||||
|
* Otherwise we generate:
|
||||||
|
* shr tmp, addrlo, #TARGET_PAGE_BITS
|
||||||
* add r2, env, #high
|
* add r2, env, #high
|
||||||
* and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
|
* and r0, tmp, #(CPU_TLB_SIZE - 1)
|
||||||
* add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
|
* add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS
|
||||||
* ldr r0, [r2, #cmp] (4)
|
* ldr r0, [r2, #cmp]
|
||||||
|
* ldr r2, [r2, #add]
|
||||||
* tst addrlo, #s_mask
|
* tst addrlo, #s_mask
|
||||||
* ldr r2, [r2, #add] (5)
|
|
||||||
* cmpeq r0, tmp, lsl #TARGET_PAGE_BITS
|
* cmpeq r0, tmp, lsl #TARGET_PAGE_BITS
|
||||||
*/
|
*/
|
||||||
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
|
if (use_armv7_instructions) {
|
||||||
0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
|
tcg_out_extract(s, COND_AL, TCG_REG_R0, addrlo,
|
||||||
|
TARGET_PAGE_BITS, CPU_TLB_BITS);
|
||||||
|
} else {
|
||||||
|
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
|
||||||
|
0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
|
||||||
|
}
|
||||||
|
|
||||||
/* We checked that the offset is contained within 16 bits above. */
|
/* We checked that the offset is contained within 16 bits above. */
|
||||||
if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
|
if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
|
||||||
|
@ -1204,8 +1219,10 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
|
||||||
cmp_off &= 0xff;
|
cmp_off &= 0xff;
|
||||||
}
|
}
|
||||||
|
|
||||||
tcg_out_dat_imm(s, COND_AL, ARITH_AND,
|
if (!use_armv7_instructions) {
|
||||||
TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
|
tcg_out_dat_imm(s, COND_AL, ARITH_AND,
|
||||||
|
TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
|
||||||
|
}
|
||||||
tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
|
tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
|
||||||
TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
|
TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
|
||||||
|
|
||||||
|
@ -1221,24 +1238,41 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Load the tlb addend. */
|
||||||
|
tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
|
||||||
|
|
||||||
/* Check alignment. We don't support inline unaligned acceses,
|
/* Check alignment. We don't support inline unaligned acceses,
|
||||||
but we can easily support overalignment checks. */
|
but we can easily support overalignment checks. */
|
||||||
if (a_bits < s_bits) {
|
if (a_bits < s_bits) {
|
||||||
a_bits = s_bits;
|
a_bits = s_bits;
|
||||||
}
|
}
|
||||||
if (a_bits) {
|
|
||||||
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Load the tlb addend. */
|
/* Load the tlb addend. */
|
||||||
tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
|
if (use_armv7_instructions) {
|
||||||
|
tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
|
||||||
|
int rot = encode_imm(mask);
|
||||||
|
|
||||||
tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
|
if (rot >= 0) {
|
||||||
TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
|
tcg_out_dat_imm(s, COND_AL, ARITH_BIC, TCG_REG_TMP, addrlo,
|
||||||
|
rotl(mask, rot) | (rot << 7));
|
||||||
|
} else {
|
||||||
|
tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
|
||||||
|
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
|
||||||
|
addrlo, TCG_REG_TMP, 0);
|
||||||
|
}
|
||||||
|
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R0, TCG_REG_TMP, 0);
|
||||||
|
} else {
|
||||||
|
if (a_bits) {
|
||||||
|
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
|
||||||
|
(1 << a_bits) - 1);
|
||||||
|
}
|
||||||
|
tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
|
||||||
|
0, TCG_REG_R0, TCG_REG_TMP,
|
||||||
|
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
|
||||||
|
}
|
||||||
|
|
||||||
if (TARGET_LONG_BITS == 64) {
|
if (TARGET_LONG_BITS == 64) {
|
||||||
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
|
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R1, addrhi, 0);
|
||||||
TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return TCG_REG_R2;
|
return TCG_REG_R2;
|
||||||
|
|
Loading…
Reference in a new issue