mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-01-25 20:21:00 +00:00
1677898a09
Updates can come from other threads, so readers that do not take tlb_lock must use atomic_read to avoid undefined behaviour (UB). This completes the conversion to tlb_lock. This conversion results on average in no performance loss, as the following experiments (run on an Intel i7-6700K CPU @ 4.00GHz) show. 1. aarch64 bootup+shutdown test: - Before: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% ) 31,574,905,303 cycles # 4.217 GHz ( +- 0.12% ) 57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% ) 10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% ) 173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% ) 7.504481349 seconds time elapsed ( +- 0.14% ) - After: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% ) 31,478,476,520 cycles # 4.218 GHz ( +- 0.07% ) 57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% ) 10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% ) 173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% ) 7.474970463 seconds time elapsed ( +- 0.07% ) 2. SPEC06int: SPEC06int (test set) [Y axis: Speedup over master] 1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+ | | 1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+ | +++ | +++ tlb-lock-v3 (spinl|ck) | | +++ | | +++ +++ | | | 1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+ | ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### | 1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+ | *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # | 0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+ | * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # | | * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # | 0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+ | * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # | 0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+ | * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | | * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | 0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+ | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+ 400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean png: https://imgur.com/a/BHzpPTW Notes: - tlb-lock-v2 corresponds to an implementation with a mutex. - tlb-lock-v3 corresponds to the current implementation, i.e. a spinlock and a single lock acquisition in tlb_set_page_with_attrs. Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
236 lines
6.3 KiB
C
236 lines
6.3 KiB
C
/*
|
|
* Software MMU support
|
|
*
|
|
* Generate inline load/store functions for one MMU mode and data
|
|
* size.
|
|
*
|
|
* Generate a store function as well as signed and unsigned loads. For
|
|
* 32 and 64 bit cases, also generate floating point functions with
|
|
* the same size.
|
|
*
|
|
* Not used directly but included from cpu_ldst.h.
|
|
*
|
|
* Copyright (c) 2003 Fabrice Bellard
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#if DATA_SIZE == 8
|
|
#define SUFFIX q
|
|
#define USUFFIX q
|
|
#define DATA_TYPE uint64_t
|
|
#define SHIFT 3
|
|
#elif DATA_SIZE == 4
|
|
#define SUFFIX l
|
|
#define USUFFIX l
|
|
#define DATA_TYPE uint32_t
|
|
#define SHIFT 2
|
|
#elif DATA_SIZE == 2
|
|
#define SUFFIX w
|
|
#define USUFFIX uw
|
|
#define DATA_TYPE uint16_t
|
|
#define DATA_STYPE int16_t
|
|
#define SHIFT 1
|
|
#elif DATA_SIZE == 1
|
|
#define SUFFIX b
|
|
#define USUFFIX ub
|
|
#define DATA_TYPE uint8_t
|
|
#define DATA_STYPE int8_t
|
|
#define SHIFT 0
|
|
#else
|
|
#error unsupported data size
|
|
#endif
|
|
|
|
#if DATA_SIZE == 8
|
|
#define RES_TYPE uint64_t
|
|
#else
|
|
#define RES_TYPE uint32_t
|
|
#endif
|
|
|
|
#ifdef SOFTMMU_CODE_ACCESS
|
|
#define ADDR_READ addr_code
|
|
#define MMUSUFFIX _cmmu
|
|
#define URETSUFFIX SUFFIX
|
|
#define SRETSUFFIX SUFFIX
|
|
#else
|
|
#define ADDR_READ addr_read
|
|
#define MMUSUFFIX _mmu
|
|
#define URETSUFFIX USUFFIX
|
|
#define SRETSUFFIX glue(s, SUFFIX)
|
|
#endif
|
|
|
|
/* generic load/store macros */
|
|
|
|
static inline RES_TYPE
|
|
glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
|
|
target_ulong ptr,
|
|
uintptr_t retaddr)
|
|
{
|
|
CPUTLBEntry *entry;
|
|
RES_TYPE res;
|
|
target_ulong addr;
|
|
int mmu_idx;
|
|
TCGMemOpIdx oi;
|
|
|
|
addr = ptr;
|
|
mmu_idx = CPU_MMU_INDEX;
|
|
entry = tlb_entry(env, mmu_idx, addr);
|
|
if (unlikely(entry->ADDR_READ !=
|
|
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
|
|
oi = make_memop_idx(SHIFT, mmu_idx);
|
|
res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr,
|
|
oi, retaddr);
|
|
} else {
|
|
uintptr_t hostaddr = (uintptr_t)(addr + entry->addend);
|
|
res = glue(glue(ld, USUFFIX), _raw)(hostaddr);
|
|
}
|
|
return res;
|
|
}
|
|
|
|
static inline RES_TYPE
|
|
glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr)
|
|
{
|
|
return glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(env, ptr, 0);
|
|
}
|
|
|
|
#if DATA_SIZE <= 2
|
|
static inline int
|
|
glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
|
|
target_ulong ptr,
|
|
uintptr_t retaddr)
|
|
{
|
|
CPUTLBEntry *entry;
|
|
int res;
|
|
target_ulong addr;
|
|
int mmu_idx;
|
|
TCGMemOpIdx oi;
|
|
|
|
addr = ptr;
|
|
mmu_idx = CPU_MMU_INDEX;
|
|
entry = tlb_entry(env, mmu_idx, addr);
|
|
if (unlikely(entry->ADDR_READ !=
|
|
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
|
|
oi = make_memop_idx(SHIFT, mmu_idx);
|
|
res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX),
|
|
MMUSUFFIX)(env, addr, oi, retaddr);
|
|
} else {
|
|
uintptr_t hostaddr = (uintptr_t)(addr + entry->addend);
|
|
res = glue(glue(lds, SUFFIX), _raw)(hostaddr);
|
|
}
|
|
return res;
|
|
}
|
|
static inline int
|
|
glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr)
|
|
{
|
|
return glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(env, ptr, 0);
|
|
}
|
|
#endif
|
|
|
|
#ifndef SOFTMMU_CODE_ACCESS
|
|
|
|
/* generic store macro */
|
|
|
|
static inline void
|
|
glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
|
|
target_ulong ptr,
|
|
RES_TYPE v, uintptr_t retaddr)
|
|
{
|
|
CPUTLBEntry *entry;
|
|
target_ulong addr;
|
|
int mmu_idx;
|
|
TCGMemOpIdx oi;
|
|
|
|
addr = ptr;
|
|
mmu_idx = CPU_MMU_INDEX;
|
|
entry = tlb_entry(env, mmu_idx, addr);
|
|
if (unlikely(tlb_addr_write(entry) !=
|
|
(addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
|
|
oi = make_memop_idx(SHIFT, mmu_idx);
|
|
glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi,
|
|
retaddr);
|
|
} else {
|
|
uintptr_t hostaddr = (uintptr_t)(addr + entry->addend);
|
|
glue(glue(st, SUFFIX), _raw)(hostaddr, v);
|
|
}
|
|
}
|
|
|
|
static inline void
|
|
glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr,
|
|
RES_TYPE v)
|
|
{
|
|
glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(env, ptr, v, 0);
|
|
}
|
|
|
|
#if DATA_SIZE == 8
|
|
static inline float64 glue(cpu_ldfq, MEMSUFFIX)(CPUArchState *env,
|
|
target_ulong ptr)
|
|
{
|
|
union {
|
|
float64 d;
|
|
uint64_t i;
|
|
} u;
|
|
u.i = glue(cpu_ldq, MEMSUFFIX)(env, ptr);
|
|
return u.d;
|
|
}
|
|
|
|
static inline void glue(cpu_stfq, MEMSUFFIX)(CPUArchState *env,
|
|
target_ulong ptr, float64 v)
|
|
{
|
|
union {
|
|
float64 d;
|
|
uint64_t i;
|
|
} u;
|
|
u.d = v;
|
|
glue(cpu_stq, MEMSUFFIX)(env, ptr, u.i);
|
|
}
|
|
#endif /* DATA_SIZE == 8 */
|
|
|
|
#if DATA_SIZE == 4
|
|
static inline float32 glue(cpu_ldfl, MEMSUFFIX)(CPUArchState *env,
|
|
target_ulong ptr)
|
|
{
|
|
union {
|
|
float32 f;
|
|
uint32_t i;
|
|
} u;
|
|
u.i = glue(cpu_ldl, MEMSUFFIX)(env, ptr);
|
|
return u.f;
|
|
}
|
|
|
|
static inline void glue(cpu_stfl, MEMSUFFIX)(CPUArchState *env,
|
|
target_ulong ptr, float32 v)
|
|
{
|
|
union {
|
|
float32 f;
|
|
uint32_t i;
|
|
} u;
|
|
u.f = v;
|
|
glue(cpu_stl, MEMSUFFIX)(env, ptr, u.i);
|
|
}
|
|
#endif /* DATA_SIZE == 4 */
|
|
|
|
#endif /* !SOFTMMU_CODE_ACCESS */
|
|
|
|
#undef RES_TYPE
|
|
#undef DATA_TYPE
|
|
#undef DATA_STYPE
|
|
#undef SUFFIX
|
|
#undef USUFFIX
|
|
#undef DATA_SIZE
|
|
#undef MMUSUFFIX
|
|
#undef ADDR_READ
|
|
#undef URETSUFFIX
|
|
#undef SRETSUFFIX
|
|
#undef SHIFT
|
|
|