mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-03-23 05:25:11 +00:00
cputlb: read CPUTLBEntry.addr_write atomically
Updates can come from other threads, so readers that do not take tlb_lock must use atomic_read to avoid undefined behaviour (UB). This completes the conversion to tlb_lock. This conversion results on average in no performance loss, as the following experiments (run on an Intel i7-6700K CPU @ 4.00GHz) show. 1. aarch64 bootup+shutdown test: - Before: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% ) 31,574,905,303 cycles # 4.217 GHz ( +- 0.12% ) 57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% ) 10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% ) 173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% ) 7.504481349 seconds time elapsed ( +- 0.14% ) - After: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% ) 31,478,476,520 cycles # 4.218 GHz ( +- 0.07% ) 57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% ) 10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% ) 173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% ) 7.474970463 seconds time elapsed ( +- 0.07% ) 2. SPEC06int: SPEC06int (test set) [Y axis: Speedup over master] 1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+ | | 1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+ | +++ | +++ tlb-lock-v3 (spinl|ck) | | +++ | | +++ +++ | | | 1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+ | ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### | 1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+ | *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # | 0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+ | * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # | | * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # | 0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+ | * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # | 0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+ | * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | | * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | 0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+ | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+ 400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean png: https://imgur.com/a/BHzpPTW Notes: - tlb-lock-v2 corresponds to an implementation with a mutex. - tlb-lock-v3 corresponds to the current implementation, i.e. a spinlock and a single lock acquisition in tlb_set_page_with_attrs. Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
This commit is contained in:
parent
af6c47e192
commit
1677898a09
|
@ -128,7 +128,7 @@ void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
|
|||
uintptr_t addr;
|
||||
|
||||
if (tlb_is_dirty_ram(tlb_entry)) {
|
||||
addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
|
||||
addr = (tlb_addr_write(tlb_entry) & TARGET_PAGE_MASK) + tlb_entry->addend;
|
||||
if ((addr - start) < length) {
|
||||
tlb_entry->addr_write |= TLB_NOTDIRTY;
|
||||
}
|
||||
|
@ -363,7 +363,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
|
|||
|
||||
static void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
|
||||
{
|
||||
if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
|
||||
if (tlb_addr_write(tlb_entry) == (vaddr | TLB_NOTDIRTY)) {
|
||||
tlb_entry->addr_write = vaddr;
|
||||
}
|
||||
}
|
||||
|
@ -393,7 +393,7 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
|
|||
|
||||
static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
|
||||
{
|
||||
return (tlbe->addr_write & (TLB_INVALID_MASK|TLB_MMIO|TLB_NOTDIRTY)) == 0;
|
||||
return (tlb_addr_write(tlbe) & (TLB_INVALID_MASK|TLB_MMIO|TLB_NOTDIRTY)) == 0;
|
||||
}
|
||||
|
||||
static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
|
||||
|
@ -424,7 +424,7 @@ void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
|
|||
static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
|
||||
{
|
||||
if (tlb_hit_page(tlb_entry->addr_read, addr) ||
|
||||
tlb_hit_page(tlb_entry->addr_write, addr) ||
|
||||
tlb_hit_page(tlb_addr_write(tlb_entry), addr) ||
|
||||
tlb_hit_page(tlb_entry->addr_code, addr)) {
|
||||
memset(tlb_entry, -1, sizeof(*tlb_entry));
|
||||
}
|
||||
|
@ -536,7 +536,14 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
|
|||
size_t vidx;
|
||||
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
|
||||
CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
|
||||
target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
|
||||
target_ulong cmp;
|
||||
|
||||
/* elt_ofs might correspond to .addr_write, so use atomic_read */
|
||||
#if TCG_OVERSIZED_GUEST
|
||||
cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
|
||||
#else
|
||||
cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
|
||||
#endif
|
||||
|
||||
if (cmp == page) {
|
||||
/* Found entry in victim tlb, swap tlb and iotlb. */
|
||||
|
@ -569,7 +576,7 @@ void probe_write(CPUArchState *env, target_ulong addr, int size, int mmu_idx,
|
|||
uintptr_t index = tlb_index(env, mmu_idx, addr);
|
||||
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
|
||||
|
||||
if (!tlb_hit(entry->addr_write, addr)) {
|
||||
if (!tlb_hit(tlb_addr_write(entry), addr)) {
|
||||
/* TLB entry is for a different page */
|
||||
if (!VICTIM_TLB_HIT(addr_write, addr)) {
|
||||
tlb_fill(ENV_GET_CPU(env), addr, size, MMU_DATA_STORE,
|
||||
|
@ -586,7 +593,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
|
|||
size_t mmu_idx = get_mmuidx(oi);
|
||||
uintptr_t index = tlb_index(env, mmu_idx, addr);
|
||||
CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
|
||||
target_ulong tlb_addr = tlbe->addr_write;
|
||||
target_ulong tlb_addr = tlb_addr_write(tlbe);
|
||||
TCGMemOp mop = get_memop(oi);
|
||||
int a_bits = get_alignment_bits(mop);
|
||||
int s_bits = mop & MO_SIZE;
|
||||
|
@ -616,7 +623,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
|
|||
tlb_fill(ENV_GET_CPU(env), addr, 1 << s_bits, MMU_DATA_STORE,
|
||||
mmu_idx, retaddr);
|
||||
}
|
||||
tlb_addr = tlbe->addr_write;
|
||||
tlb_addr = tlb_addr_write(tlbe);
|
||||
}
|
||||
|
||||
/* Check notdirty */
|
||||
|
|
|
@ -508,7 +508,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
|||
uintptr_t mmu_idx = get_mmuidx(oi);
|
||||
uintptr_t index = tlb_index(env, mmu_idx, addr);
|
||||
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
|
||||
target_ulong tlb_addr = entry->addr_write;
|
||||
target_ulong tlb_addr = tlb_addr_write(entry);
|
||||
unsigned a_bits = get_alignment_bits(get_memop(oi));
|
||||
uintptr_t haddr;
|
||||
struct hook *hook;
|
||||
|
@ -580,7 +580,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
|||
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
|
||||
mmu_idx, retaddr);
|
||||
}
|
||||
tlb_addr = entry->addr_write;
|
||||
tlb_addr = tlb_addr_write(entry);
|
||||
}
|
||||
|
||||
/* Handle an IO access. */
|
||||
|
@ -618,7 +618,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
|||
cannot evict the first. */
|
||||
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
|
||||
entry2 = tlb_entry(env, mmu_idx, page2);
|
||||
if (!tlb_hit_page(entry2->addr_write, page2)
|
||||
if (!tlb_hit_page(tlb_addr_write(entry2), page2)
|
||||
&& !VICTIM_TLB_HIT(addr_write, page2)) {
|
||||
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
|
||||
mmu_idx, retaddr);
|
||||
|
@ -653,7 +653,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
|||
uintptr_t mmu_idx = get_mmuidx(oi);
|
||||
uintptr_t index = tlb_index(env, mmu_idx, addr);
|
||||
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
|
||||
target_ulong tlb_addr = entry->addr_write;
|
||||
target_ulong tlb_addr = tlb_addr_write(entry);
|
||||
unsigned a_bits = get_alignment_bits(get_memop(oi));
|
||||
uintptr_t haddr;
|
||||
struct hook *hook;
|
||||
|
@ -725,7 +725,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
|||
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
|
||||
mmu_idx, retaddr);
|
||||
}
|
||||
tlb_addr = entry->addr_write;
|
||||
tlb_addr = tlb_addr_write(entry);
|
||||
}
|
||||
|
||||
/* Handle an IO access. */
|
||||
|
@ -763,7 +763,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
|||
cannot evict the first. */
|
||||
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
|
||||
entry2 = tlb_entry(env, mmu_idx, page2);
|
||||
if (!tlb_hit_page(entry2->addr_write, page2)
|
||||
if (!tlb_hit_page(tlb_addr_write(entry2), page2)
|
||||
&& !VICTIM_TLB_HIT(addr_write, page2)) {
|
||||
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
|
||||
mmu_idx, retaddr);
|
||||
|
|
|
@ -163,6 +163,15 @@
|
|||
/* The memory helpers for tcg-generated code need tcg_target_long etc. */
|
||||
#include "tcg.h"
|
||||
|
||||
static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
|
||||
{
|
||||
#if TCG_OVERSIZED_GUEST
|
||||
return entry->addr_write;
|
||||
#else
|
||||
return atomic_read(&entry->addr_write);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Find the TLB index corresponding to the mmu_idx + address pair. */
|
||||
static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
|
||||
target_ulong addr)
|
||||
|
@ -484,7 +493,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
|
|||
tlb_addr = tlbentry->addr_read;
|
||||
break;
|
||||
case 1:
|
||||
tlb_addr = tlbentry->addr_write;
|
||||
tlb_addr = tlb_addr_write(tlbentry);
|
||||
break;
|
||||
case 2:
|
||||
tlb_addr = tlbentry->addr_code;
|
||||
|
|
|
@ -153,7 +153,7 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
|
|||
addr = ptr;
|
||||
mmu_idx = CPU_MMU_INDEX;
|
||||
entry = tlb_entry(env, mmu_idx, addr);
|
||||
if (unlikely(entry->addr_write !=
|
||||
if (unlikely(tlb_addr_write(entry) !=
|
||||
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
|
||||
oi = make_memop_idx(SHIFT, mmu_idx);
|
||||
glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi,
|
||||
|
|
Loading…
Reference in a new issue