softmmu: support up to 12 MMU modes

At 8k per TLB (for 64-bit host or target), 8 or more modes
make the TLBs bigger than 64k, and some RISC TCG backends do
not like that. On the affected hosts, cut the TLB size in
half---there is still a measurable speedup on PPC with the
next patch.

Backports commit 1de29aef17a7d70dbc04a7fe51e18942e3ebe313 from qemu
This commit is contained in:
Paolo Bonzini 2018-02-13 08:34:47 -05:00 committed by Lioncash
parent b34c233c2f
commit 96e7e32972
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
2 changed files with 131 additions and 8 deletions

View file

@ -27,6 +27,7 @@
#include "unicorn/platform.h"
#include "qemu/osdep.h"
#include "qemu/queue.h"
#include "tcg-target.h"
#ifndef CONFIG_USER_ONLY
#include "exec/hwaddr.h"
#endif
@ -70,8 +71,6 @@ typedef uint64_t target_ulong;
#define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE)
#if !defined(CONFIG_USER_ONLY)
#define CPU_TLB_BITS 8
#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
/* use a fully associative victim tlb of 8 entries */
#define CPU_VTLB_SIZE 8
@ -81,6 +80,38 @@ typedef uint64_t target_ulong;
#define CPU_TLB_ENTRY_BITS 5
#endif
/* TCG_TARGET_TLB_DISPLACEMENT_BITS is used in CPU_TLB_BITS to ensure that
* the TLB is not unnecessarily small, but still small enough for the
* TLB lookup instruction sequence used by the TCG target.
*
* TCG will have to generate an operand as large as the distance between
* env and the tlb_table[NB_MMU_MODES - 1][0].addend. For simplicity,
* the TCG targets just round everything up to the next power of two, and
* count bits. This works because: 1) the size of each TLB is a largish
* power of two, 2) and because the limit of the displacement is really close
* to a power of two, 3) the offset of tlb_table[0][0] inside env is smaller
* than the size of a TLB.
*
* For example, the maximum displacement 0xFFF0 on PPC and MIPS, but TCG
* just says "the displacement is 16 bits". TCG_TARGET_TLB_DISPLACEMENT_BITS
* then ensures that tlb_table at least 0x8000 bytes large ("not unnecessarily
* small": 2^15). The operand then will come up smaller than 0xFFF0 without
* any particular care, because the TLB for a single MMU mode is larger than
* 0x10000-0xFFF0=16 bytes. In the end, the maximum value of the operand
* could be something like 0xC000 (the offset of the last TLB table) plus
* 0x18 (the offset of the addend field in each TLB entry) plus the offset
* of tlb_table inside env (which is non-trivial but not huge).
*/
#define CPU_TLB_BITS \
MIN(8, \
TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS - \
(NB_MMU_MODES <= 1 ? 0 : \
NB_MMU_MODES <= 2 ? 1 : \
NB_MMU_MODES <= 4 ? 2 : \
NB_MMU_MODES <= 8 ? 3 : 4))
#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
typedef struct CPUTLBEntry {
/* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not

View file

@ -309,12 +309,104 @@ uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 7) */
#if (NB_MMU_MODES > 7)
/* Note that supporting NB_MMU_MODES == 9 would require
* changes to at least the ARM TCG backend.
*/
#error "NB_MMU_MODES > 7 is not supported for now"
#endif /* (NB_MMU_MODES > 7) */
#if (NB_MMU_MODES >= 8) && defined(MMU_MODE7_SUFFIX)
#define CPU_MMU_INDEX 7
#define MEMSUFFIX MMU_MODE7_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 8) */
#if (NB_MMU_MODES >= 9) && defined(MMU_MODE8_SUFFIX)
#define CPU_MMU_INDEX 8
#define MEMSUFFIX MMU_MODE8_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 9) */
#if (NB_MMU_MODES >= 10) && defined(MMU_MODE9_SUFFIX)
#define CPU_MMU_INDEX 9
#define MEMSUFFIX MMU_MODE9_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 10) */
#if (NB_MMU_MODES >= 11) && defined(MMU_MODE10_SUFFIX)
#define CPU_MMU_INDEX 10
#define MEMSUFFIX MMU_MODE10_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 11) */
#if (NB_MMU_MODES >= 12) && defined(MMU_MODE11_SUFFIX)
#define CPU_MMU_INDEX 11
#define MEMSUFFIX MMU_MODE11_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 12) */
#if (NB_MMU_MODES > 12)
#error "NB_MMU_MODES > 12 is not supported for now"
#endif /* (NB_MMU_MODES > 12) */
/* these access are slower, they must be as rare as possible */
#define CPU_MMU_INDEX (cpu_mmu_index(env))