unicorn/qemu/include/exec/cpu_ldst.h

295 lines
11 KiB
C
Raw Normal View History

2015-08-21 07:04:50 +00:00
/*
* Software MMU support
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
*/
/*
* Generate inline load/store functions for all MMU modes (typically
* at least _user and _kernel) as well as _data versions, for all data
* sizes.
*
* Used by target op helpers.
*
* The syntax for the accessors is:
*
* load: cpu_ld{sign}{size}_{mmusuffix}(env, ptr)
*
* store: cpu_st{sign}{size}_{mmusuffix}(env, ptr, val)
*
* sign is:
* (empty): for 32 and 64 bit sizes
* u : unsigned
* s : signed
*
* size is:
* b: 8 bits
* w: 16 bits
* l: 32 bits
* q: 64 bits
*
* mmusuffix is one of the generic suffixes "data" or "code", or
* (for softmmu configs) a target-specific MMU mode suffix as defined
* in target cpu.h.
2015-08-21 07:04:50 +00:00
*/
#ifndef CPU_LDST_H
#define CPU_LDST_H
#if defined(CONFIG_USER_ONLY)
/* sparc32plus has 64bit long but 32bit space address
* this can make bad result with g2h() and h2g()
*/
#if TARGET_VIRT_ADDR_SPACE_BITS <= 32
typedef uint32_t abi_ptr;
#define TARGET_ABI_FMT_ptr "%x"
#else
typedef uint64_t abi_ptr;
#define TARGET_ABI_FMT_ptr "%"PRIx64
#endif
2015-08-21 07:04:50 +00:00
/* All direct uses of g2h and h2g need to go away for usermode softmmu. */
#define g2h(x) ((void *)((unsigned long)(abi_ptr)(x) + GUEST_BASE))
2015-08-21 07:04:50 +00:00
#if HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS
#define h2g_valid(x) 1
#else
#define h2g_valid(x) ({ \
unsigned long __guest = (unsigned long)(x) - GUEST_BASE; \
(__guest < (1ul << TARGET_VIRT_ADDR_SPACE_BITS)) && \
(!RESERVED_VA || (__guest < RESERVED_VA)); \
})
#endif
#define h2g_nocheck(x) ({ \
unsigned long __ret = (unsigned long)(x) - GUEST_BASE; \
(abi_ptr)__ret; \
2015-08-21 07:04:50 +00:00
})
#define h2g(x) ({ \
/* Check if given address fits target address space */ \
assert(h2g_valid(x)); \
h2g_nocheck(x); \
})
#else
typedef target_ulong abi_ptr;
#define TARGET_ABI_FMT_ptr TARGET_ABI_FMT_lx
2015-08-21 07:04:50 +00:00
#endif
/* The memory helpers for tcg-generated code need tcg_target_long etc. */
#include "tcg.h"
cputlb: read CPUTLBEntry.addr_write atomically Updates can come from other threads, so readers that do not take tlb_lock must use atomic_read to avoid undefined behaviour (UB). This completes the conversion to tlb_lock. This conversion results on average in no performance loss, as the following experiments (run on an Intel i7-6700K CPU @ 4.00GHz) show. 1. aarch64 bootup+shutdown test: - Before: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% ) 31,574,905,303 cycles # 4.217 GHz ( +- 0.12% ) 57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% ) 10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% ) 173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% ) 7.504481349 seconds time elapsed ( +- 0.14% ) - After: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% ) 31,478,476,520 cycles # 4.218 GHz ( +- 0.07% ) 57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% ) 10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% ) 173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% ) 7.474970463 seconds time elapsed ( +- 0.07% ) 2. SPEC06int: SPEC06int (test set) [Y axis: Speedup over master] 1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+ | | 1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+ | +++ | +++ tlb-lock-v3 (spinl|ck) | | +++ | | +++ +++ | | | 1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+ | ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### | 1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+ | *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # | 0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+ | * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # | | * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # | 0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+ | * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # | 0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+ | * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | | * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | 0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+ | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+ 400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean png: https://imgur.com/a/BHzpPTW Notes: - tlb-lock-v2 corresponds to an implementation with a mutex. - tlb-lock-v3 corresponds to the current implementation, i.e. a spinlock and a single lock acquisition in tlb_set_page_with_attrs. Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
2018-10-23 19:37:32 +00:00
static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
{
#if TCG_OVERSIZED_GUEST
return entry->addr_write;
#else
return qatomic_read(&entry->addr_write);
cputlb: read CPUTLBEntry.addr_write atomically Updates can come from other threads, so readers that do not take tlb_lock must use atomic_read to avoid undefined behaviour (UB). This completes the conversion to tlb_lock. This conversion results on average in no performance loss, as the following experiments (run on an Intel i7-6700K CPU @ 4.00GHz) show. 1. aarch64 bootup+shutdown test: - Before: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% ) 31,574,905,303 cycles # 4.217 GHz ( +- 0.12% ) 57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% ) 10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% ) 173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% ) 7.504481349 seconds time elapsed ( +- 0.14% ) - After: Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs): 7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% ) 31,478,476,520 cycles # 4.218 GHz ( +- 0.07% ) 57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% ) 10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% ) 173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% ) 7.474970463 seconds time elapsed ( +- 0.07% ) 2. SPEC06int: SPEC06int (test set) [Y axis: Speedup over master] 1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+ | | 1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+ | +++ | +++ tlb-lock-v3 (spinl|ck) | | +++ | | +++ +++ | | | 1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+ | ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### | 1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+ | *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # | 0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+ | * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # | | * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # | 0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+ | * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # | 0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+ | * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | | * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # | 0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+ | * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # | 0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+ 400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean png: https://imgur.com/a/BHzpPTW Notes: - tlb-lock-v2 corresponds to an implementation with a mutex. - tlb-lock-v3 corresponds to the current implementation, i.e. a spinlock and a single lock acquisition in tlb_set_page_with_attrs. Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
2018-10-23 19:37:32 +00:00
#endif
}
/* Find the TLB index corresponding to the mmu_idx + address pair. */
static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
target_ulong addr)
{
return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
}
/* Find the TLB entry corresponding to the mmu_idx + address pair. */
static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
target_ulong addr)
{
return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)];
}
uint32_t cpu_ldub_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra);
int cpu_ldsb_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra);
uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra);
int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra);
uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra);
uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra);
uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra);
int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra);
uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra);
uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
int mmu_idx, uintptr_t ra);
void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
int mmu_idx, uintptr_t retaddr);
2015-08-21 07:04:50 +00:00
void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
int mmu_idx, uintptr_t retaddr);
void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
int mmu_idx, uintptr_t retaddr);
void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
int mmu_idx, uintptr_t retaddr);
void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
int mmu_idx, uintptr_t retaddr);
void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
int mmu_idx, uintptr_t retaddr);
void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
int mmu_idx, uintptr_t retaddr);
uint32_t cpu_ldub_data(CPUArchState *env, abi_ptr ptr);
int cpu_ldsb_data(CPUArchState *env, abi_ptr ptr);
uint32_t cpu_lduw_be_data(CPUArchState *env, abi_ptr ptr);
int cpu_ldsw_be_data(CPUArchState *env, abi_ptr ptr);
uint32_t cpu_ldl_be_data(CPUArchState *env, abi_ptr ptr);
uint64_t cpu_ldq_be_data(CPUArchState *env, abi_ptr ptr);
uint32_t cpu_lduw_le_data(CPUArchState *env, abi_ptr ptr);
int cpu_ldsw_le_data(CPUArchState *env, abi_ptr ptr);
uint32_t cpu_ldl_le_data(CPUArchState *env, abi_ptr ptr);
uint64_t cpu_ldq_le_data(CPUArchState *env, abi_ptr ptr);
uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr ptr, uintptr_t ra);
void cpu_stb_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
void cpu_stw_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
void cpu_stl_be_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
void cpu_stq_be_data(CPUArchState *env, abi_ptr ptr, uint64_t val);
void cpu_stw_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
void cpu_stl_le_data(CPUArchState *env, abi_ptr ptr, uint32_t val);
void cpu_stq_le_data(CPUArchState *env, abi_ptr ptr, uint64_t val);
void cpu_stb_data_ra(CPUArchState *env, abi_ptr ptr,
uint32_t val, uintptr_t ra);
void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr ptr,
uint32_t val, uintptr_t ra);
void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr ptr,
uint32_t val, uintptr_t ra);
void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr ptr,
uint64_t val, uintptr_t ra);
void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr ptr,
uint32_t val, uintptr_t ra);
void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr ptr,
uint32_t val, uintptr_t ra);
void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr ptr,
uint64_t val, uintptr_t ra);
#ifdef TARGET_WORDS_BIGENDIAN
# define cpu_lduw_data cpu_lduw_be_data
# define cpu_ldsw_data cpu_ldsw_be_data
# define cpu_ldl_data cpu_ldl_be_data
# define cpu_ldq_data cpu_ldq_be_data
# define cpu_lduw_data_ra cpu_lduw_be_data_ra
# define cpu_ldsw_data_ra cpu_ldsw_be_data_ra
# define cpu_ldl_data_ra cpu_ldl_be_data_ra
# define cpu_ldq_data_ra cpu_ldq_be_data_ra
# define cpu_lduw_mmuidx_ra cpu_lduw_be_mmuidx_ra
# define cpu_ldsw_mmuidx_ra cpu_ldsw_be_mmuidx_ra
# define cpu_ldl_mmuidx_ra cpu_ldl_be_mmuidx_ra
# define cpu_ldq_mmuidx_ra cpu_ldq_be_mmuidx_ra
# define cpu_stw_data cpu_stw_be_data
# define cpu_stl_data cpu_stl_be_data
# define cpu_stq_data cpu_stq_be_data
# define cpu_stw_data_ra cpu_stw_be_data_ra
# define cpu_stl_data_ra cpu_stl_be_data_ra
# define cpu_stq_data_ra cpu_stq_be_data_ra
# define cpu_stw_mmuidx_ra cpu_stw_be_mmuidx_ra
# define cpu_stl_mmuidx_ra cpu_stl_be_mmuidx_ra
# define cpu_stq_mmuidx_ra cpu_stq_be_mmuidx_ra
#else
# define cpu_lduw_data cpu_lduw_le_data
# define cpu_ldsw_data cpu_ldsw_le_data
# define cpu_ldl_data cpu_ldl_le_data
# define cpu_ldq_data cpu_ldq_le_data
# define cpu_lduw_data_ra cpu_lduw_le_data_ra
# define cpu_ldsw_data_ra cpu_ldsw_le_data_ra
# define cpu_ldl_data_ra cpu_ldl_le_data_ra
# define cpu_ldq_data_ra cpu_ldq_le_data_ra
# define cpu_lduw_mmuidx_ra cpu_lduw_le_mmuidx_ra
# define cpu_ldsw_mmuidx_ra cpu_ldsw_le_mmuidx_ra
# define cpu_ldl_mmuidx_ra cpu_ldl_le_mmuidx_ra
# define cpu_ldq_mmuidx_ra cpu_ldq_le_mmuidx_ra
# define cpu_stw_data cpu_stw_le_data
# define cpu_stl_data cpu_stl_le_data
# define cpu_stq_data cpu_stq_le_data
# define cpu_stw_data_ra cpu_stw_le_data_ra
# define cpu_stl_data_ra cpu_stl_le_data_ra
# define cpu_stq_data_ra cpu_stq_le_data_ra
# define cpu_stw_mmuidx_ra cpu_stw_le_mmuidx_ra
# define cpu_stl_mmuidx_ra cpu_stl_le_mmuidx_ra
# define cpu_stq_mmuidx_ra cpu_stq_le_mmuidx_ra
#endif
2015-08-21 07:04:50 +00:00
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr);
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr);
uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr);
static inline int cpu_ldsb_code(CPUArchState *env, abi_ptr addr)
{
return (int8_t)cpu_ldub_code(env, addr);
}
static inline int cpu_ldsw_code(CPUArchState *env, abi_ptr addr)
{
return (int16_t)cpu_lduw_code(env, addr);
}
2015-08-21 07:04:50 +00:00
/**
* tlb_vaddr_to_host:
* @env: CPUArchState
* @addr: guest virtual address to look up
* @access_type: 0 for read, 1 for write, 2 for execute
* @mmu_idx: MMU index to use for lookup
*
* Look up the specified guest virtual index in the TCG softmmu TLB.
* If we can translate a host virtual address suitable for direct RAM
* access, without causing a guest exception, then return it.
* Otherwise (TLB entry is for an I/O access, guest software
* TLB fill required, etc) return NULL.
2015-08-21 07:04:50 +00:00
*/
#ifdef CONFIG_USER_ONLY
static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
MMUAccessType access_type, int mmu_idx)
2015-08-21 07:04:50 +00:00
{
return g2h(addr);
}
#else
void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
MMUAccessType access_type, int mmu_idx);
#endif
2015-08-21 07:04:50 +00:00
#endif /* CPU_LDST_H */