2015-08-21 07:04:50 +00:00
|
|
|
/*
|
|
|
|
* Software MMU support
|
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Generate inline load/store functions for all MMU modes (typically
|
|
|
|
* at least _user and _kernel) as well as _data versions, for all data
|
|
|
|
* sizes.
|
|
|
|
*
|
|
|
|
* Used by target op helpers.
|
|
|
|
*
|
|
|
|
* MMU mode suffixes are defined in target cpu.h.
|
|
|
|
*/
|
|
|
|
#ifndef CPU_LDST_H
|
|
|
|
#define CPU_LDST_H
|
|
|
|
|
|
|
|
#if defined(CONFIG_USER_ONLY)
|
|
|
|
/* All direct uses of g2h and h2g need to go away for usermode softmmu. */
|
|
|
|
#define g2h(x) ((void *)((unsigned long)(target_ulong)(x) + GUEST_BASE))
|
|
|
|
|
|
|
|
#if HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS
|
|
|
|
#define h2g_valid(x) 1
|
|
|
|
#else
|
|
|
|
#define h2g_valid(x) ({ \
|
|
|
|
unsigned long __guest = (unsigned long)(x) - GUEST_BASE; \
|
|
|
|
(__guest < (1ul << TARGET_VIRT_ADDR_SPACE_BITS)) && \
|
|
|
|
(!RESERVED_VA || (__guest < RESERVED_VA)); \
|
|
|
|
})
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define h2g_nocheck(x) ({ \
|
|
|
|
unsigned long __ret = (unsigned long)(x) - GUEST_BASE; \
|
|
|
|
(abi_ulong)__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
#define h2g(x) ({ \
|
|
|
|
/* Check if given address fits target address space */ \
|
|
|
|
assert(h2g_valid(x)); \
|
|
|
|
h2g_nocheck(x); \
|
|
|
|
})
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(CONFIG_USER_ONLY)
|
|
|
|
|
2019-04-22 11:08:32 +00:00
|
|
|
/* In user-only mode we provide only the _code and _data accessors. */
|
|
|
|
|
|
|
|
#define MEMSUFFIX _data
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_useronly_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_useronly_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_useronly_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_useronly_template.h"
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
|
|
|
|
#define MEMSUFFIX _code
|
|
|
|
#define CODE_ACCESS
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_useronly_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_useronly_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_useronly_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_useronly_template.h"
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#undef CODE_ACCESS
|
2015-08-21 07:04:50 +00:00
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
/* XXX: find something cleaner.
|
|
|
|
* Furthermore, this is false for 64 bits targets
|
|
|
|
*/
|
|
|
|
#define ldul_user ldl_user
|
|
|
|
#define ldul_kernel ldl_kernel
|
|
|
|
#define ldul_hypv ldl_hypv
|
|
|
|
#define ldul_executive ldl_executive
|
|
|
|
#define ldul_supervisor ldl_supervisor
|
|
|
|
|
|
|
|
/* The memory helpers for tcg-generated code need tcg_target_long etc. */
|
|
|
|
#include "tcg.h"
|
|
|
|
|
cputlb: read CPUTLBEntry.addr_write atomically
Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).
This completes the conversion to tlb_lock. This conversion results
on average in no performance loss, as the following experiments
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
1. aarch64 bootup+shutdown test:
- Before:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% )
31,574,905,303 cycles # 4.217 GHz ( +- 0.12% )
57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% )
10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% )
173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% )
7.504481349 seconds time elapsed ( +- 0.14% )
- After:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% )
31,478,476,520 cycles # 4.218 GHz ( +- 0.07% )
57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% )
10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% )
173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% )
7.474970463 seconds time elapsed ( +- 0.07% )
2. SPEC06int:
SPEC06int (test set)
[Y axis: Speedup over master]
1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
| |
1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+
| +++ | +++ tlb-lock-v3 (spinl|ck) |
| +++ | | +++ +++ | | |
1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
| ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### |
1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
| *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # |
0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
| * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # |
| * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # |
0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
| * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # |
0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
| * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
| * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
png: https://imgur.com/a/BHzpPTW
Notes:
- tlb-lock-v2 corresponds to an implementation with a mutex.
- tlb-lock-v3 corresponds to the current implementation, i.e.
a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
2018-10-23 19:37:32 +00:00
|
|
|
static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
|
|
|
|
{
|
|
|
|
#if TCG_OVERSIZED_GUEST
|
|
|
|
return entry->addr_write;
|
|
|
|
#else
|
|
|
|
return atomic_read(&entry->addr_write);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2018-10-23 19:02:42 +00:00
|
|
|
/* Find the TLB index corresponding to the mmu_idx + address pair. */
|
|
|
|
static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
|
|
|
|
target_ulong addr)
|
|
|
|
{
|
|
|
|
return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Find the TLB entry corresponding to the mmu_idx + address pair. */
|
|
|
|
static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
|
|
|
|
target_ulong addr)
|
|
|
|
{
|
|
|
|
return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)];
|
|
|
|
}
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
#define CPU_MMU_INDEX 0
|
|
|
|
#define MEMSUFFIX MMU_MODE0_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
|
|
|
|
#define CPU_MMU_INDEX 1
|
|
|
|
#define MEMSUFFIX MMU_MODE1_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
|
|
|
|
#if (NB_MMU_MODES >= 3)
|
|
|
|
|
|
|
|
#define CPU_MMU_INDEX 2
|
|
|
|
#define MEMSUFFIX MMU_MODE2_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#endif /* (NB_MMU_MODES >= 3) */
|
|
|
|
|
|
|
|
#if (NB_MMU_MODES >= 4)
|
|
|
|
|
|
|
|
#define CPU_MMU_INDEX 3
|
|
|
|
#define MEMSUFFIX MMU_MODE3_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#endif /* (NB_MMU_MODES >= 4) */
|
|
|
|
|
|
|
|
#if (NB_MMU_MODES >= 5)
|
|
|
|
|
|
|
|
#define CPU_MMU_INDEX 4
|
|
|
|
#define MEMSUFFIX MMU_MODE4_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#endif /* (NB_MMU_MODES >= 5) */
|
|
|
|
|
|
|
|
#if (NB_MMU_MODES >= 6)
|
|
|
|
|
|
|
|
#define CPU_MMU_INDEX 5
|
|
|
|
#define MEMSUFFIX MMU_MODE5_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#endif /* (NB_MMU_MODES >= 6) */
|
|
|
|
|
2018-02-12 16:20:21 +00:00
|
|
|
#if (NB_MMU_MODES >= 7) && defined(MMU_MODE6_SUFFIX)
|
|
|
|
|
|
|
|
#define CPU_MMU_INDEX 6
|
|
|
|
#define MEMSUFFIX MMU_MODE6_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#endif /* (NB_MMU_MODES >= 7) */
|
|
|
|
|
2018-02-13 13:34:47 +00:00
|
|
|
#if (NB_MMU_MODES >= 8) && defined(MMU_MODE7_SUFFIX)
|
|
|
|
|
|
|
|
#define CPU_MMU_INDEX 7
|
|
|
|
#define MEMSUFFIX MMU_MODE7_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#endif /* (NB_MMU_MODES >= 8) */
|
|
|
|
|
|
|
|
#if (NB_MMU_MODES >= 9) && defined(MMU_MODE8_SUFFIX)
|
|
|
|
|
|
|
|
#define CPU_MMU_INDEX 8
|
|
|
|
#define MEMSUFFIX MMU_MODE8_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#endif /* (NB_MMU_MODES >= 9) */
|
|
|
|
|
|
|
|
#if (NB_MMU_MODES >= 10) && defined(MMU_MODE9_SUFFIX)
|
|
|
|
|
|
|
|
#define CPU_MMU_INDEX 9
|
|
|
|
#define MEMSUFFIX MMU_MODE9_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#endif /* (NB_MMU_MODES >= 10) */
|
|
|
|
|
|
|
|
#if (NB_MMU_MODES >= 11) && defined(MMU_MODE10_SUFFIX)
|
|
|
|
|
|
|
|
#define CPU_MMU_INDEX 10
|
|
|
|
#define MEMSUFFIX MMU_MODE10_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#endif /* (NB_MMU_MODES >= 11) */
|
|
|
|
|
|
|
|
#if (NB_MMU_MODES >= 12) && defined(MMU_MODE11_SUFFIX)
|
|
|
|
|
|
|
|
#define CPU_MMU_INDEX 11
|
|
|
|
#define MEMSUFFIX MMU_MODE11_SUFFIX
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#endif /* (NB_MMU_MODES >= 12) */
|
|
|
|
|
|
|
|
#if (NB_MMU_MODES > 12)
|
|
|
|
#error "NB_MMU_MODES > 12 is not supported for now"
|
|
|
|
#endif /* (NB_MMU_MODES > 12) */
|
2015-08-21 07:04:50 +00:00
|
|
|
|
|
|
|
/* these access are slower, they must be as rare as possible */
|
2018-02-15 16:53:57 +00:00
|
|
|
#define CPU_MMU_INDEX (cpu_mmu_index(env, false))
|
2015-08-21 07:04:50 +00:00
|
|
|
#define MEMSUFFIX _data
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
|
2018-02-15 16:53:57 +00:00
|
|
|
#define CPU_MMU_INDEX (cpu_mmu_index(env, true))
|
2015-08-21 07:04:50 +00:00
|
|
|
#define MEMSUFFIX _code
|
|
|
|
#define SOFTMMU_CODE_ACCESS
|
|
|
|
|
|
|
|
#define DATA_SIZE 1
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 2
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 4
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#define DATA_SIZE 8
|
|
|
|
#include "exec/cpu_ldst_template.h"
|
|
|
|
|
|
|
|
#undef CPU_MMU_INDEX
|
|
|
|
#undef MEMSUFFIX
|
|
|
|
#undef SOFTMMU_CODE_ACCESS
|
|
|
|
|
2018-02-13 20:00:43 +00:00
|
|
|
#endif /* defined(CONFIG_USER_ONLY) */
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
/**
|
|
|
|
* tlb_vaddr_to_host:
|
|
|
|
* @env: CPUArchState
|
|
|
|
* @addr: guest virtual address to look up
|
|
|
|
* @access_type: 0 for read, 1 for write, 2 for execute
|
|
|
|
* @mmu_idx: MMU index to use for lookup
|
|
|
|
*
|
|
|
|
* Look up the specified guest virtual index in the TCG softmmu TLB.
|
|
|
|
* If the TLB contains a host virtual address suitable for direct RAM
|
|
|
|
* access, then return it. Otherwise (TLB miss, TLB entry is for an
|
|
|
|
* I/O access, etc) return NULL.
|
|
|
|
*
|
|
|
|
* This is the equivalent of the initial fast-path code used by
|
|
|
|
* TCG backends for guest load and store accesses.
|
|
|
|
*/
|
|
|
|
static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
|
|
|
|
int access_type, int mmu_idx)
|
|
|
|
{
|
2018-02-13 20:00:43 +00:00
|
|
|
#if defined(CONFIG_USER_ONLY)
|
2018-03-01 13:56:30 +00:00
|
|
|
return g2h(addr);
|
2018-02-13 20:00:43 +00:00
|
|
|
#else
|
2018-10-23 19:02:42 +00:00
|
|
|
CPUTLBEntry *tlbentry = tlb_entry(env, mmu_idx, addr);
|
2015-08-21 07:04:50 +00:00
|
|
|
target_ulong tlb_addr;
|
|
|
|
uintptr_t haddr;
|
|
|
|
|
|
|
|
switch (access_type) {
|
|
|
|
case 0:
|
|
|
|
tlb_addr = tlbentry->addr_read;
|
|
|
|
break;
|
|
|
|
case 1:
|
cputlb: read CPUTLBEntry.addr_write atomically
Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).
This completes the conversion to tlb_lock. This conversion results
on average in no performance loss, as the following experiments
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
1. aarch64 bootup+shutdown test:
- Before:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% )
31,574,905,303 cycles # 4.217 GHz ( +- 0.12% )
57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% )
10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% )
173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% )
7.504481349 seconds time elapsed ( +- 0.14% )
- After:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% )
31,478,476,520 cycles # 4.218 GHz ( +- 0.07% )
57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% )
10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% )
173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% )
7.474970463 seconds time elapsed ( +- 0.07% )
2. SPEC06int:
SPEC06int (test set)
[Y axis: Speedup over master]
1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
| |
1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+
| +++ | +++ tlb-lock-v3 (spinl|ck) |
| +++ | | +++ +++ | | |
1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
| ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### |
1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
| *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # |
0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
| * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # |
| * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # |
0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
| * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # |
0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
| * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
| * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
png: https://imgur.com/a/BHzpPTW
Notes:
- tlb-lock-v2 corresponds to an implementation with a mutex.
- tlb-lock-v3 corresponds to the current implementation, i.e.
a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
2018-10-23 19:37:32 +00:00
|
|
|
tlb_addr = tlb_addr_write(tlbentry);
|
2015-08-21 07:04:50 +00:00
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
tlb_addr = tlbentry->addr_code;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
|
2018-07-03 23:21:22 +00:00
|
|
|
if (!tlb_hit(tlb_addr, addr)) {
|
2015-08-21 07:04:50 +00:00
|
|
|
/* TLB entry is for a different page */
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tlb_addr & ~TARGET_PAGE_MASK) {
|
|
|
|
/* IO access */
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-10-23 19:02:42 +00:00
|
|
|
haddr = (uintptr_t)(addr + tlbentry->addend);
|
2015-08-21 07:04:50 +00:00
|
|
|
return (void *)haddr;
|
|
|
|
#endif /* defined(CONFIG_USER_ONLY) */
|
2018-02-13 20:00:43 +00:00
|
|
|
}
|
2015-08-21 07:04:50 +00:00
|
|
|
|
|
|
|
#endif /* CPU_LDST_H */
|