mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-02-03 14:21:02 +00:00
translate-all: use a binary search tree to track TBs in TBContext
This is a prerequisite for supporting multiple TCG contexts, since we will have threads generating code in separate regions of code_gen_buffer. For this we need a new field (.size) in struct tb_tc to keep track of the size of the translated code. This field uses a size_t to avoid adding a hole to the struct, although really an unsigned int would have been enough. The comparison function we use is optimized for the common case: insertions. Profiling shows that upon booting debian-arm, 98% of comparisons are between existing tb's (i.e. a->size and b->size are both !0), which happens during insertions (and removals, but those are rare). The remaining cases are lookups. From reading the glib sources we see that the first key is always the lookup key. However, the code does not assume this to always be the case because this behaviour is not guaranteed in the glib docs. However, we embed this knowledge in the code as a branch hint for the compiler. Note that tb_free does not free space in the code_gen_buffer anymore, since we cannot easily know whether the tb is the last one inserted in code_gen_buffer. The next patch in this series renames tb_free to tb_remove to reflect this. Performance-wise, lookups in tb_find_pc are the same as before: O(log n). However, insertions are O(log n) instead of O(1), which results in a small slowdown when booting debian-arm: Performance counter stats for 'build/arm-softmmu/qemu-system-arm \ -machine type=virt -nographic -smp 1 -m 4096 \ -netdev user,id=unet,hostfwd=tcp::2222-:22 \ -device virtio-net-device,netdev=unet \ -drive file=img/arm/jessie-arm32.qcow2,id=myblock,index=0,if=none \ -device virtio-blk-device,drive=myblock \ -kernel img/arm/aarch32-current-linux-kernel-only.img \ -append console=ttyAMA0 root=/dev/vda1 \ -name arm,debug-threads=on -smp 1' (10 runs): - Before: 8048.598422 task-clock (msec) # 0.931 CPUs utilized ( +- 0.28% ) 16,974 context-switches # 0.002 M/sec ( +- 0.12% ) 0 cpu-migrations # 0.000 K/sec 10,125 page-faults # 0.001 M/sec ( +- 1.23% ) 35,144,901,879 cycles # 4.367 GHz ( +- 0.14% ) <not supported> stalled-cycles-frontend <not supported> stalled-cycles-backend 65,758,252,643 instructions # 1.87 insns per cycle ( +- 0.33% ) 10,871,298,668 branches # 1350.707 M/sec ( +- 0.41% ) 192,322,212 branch-misses # 1.77% of all branches ( +- 0.32% ) 8.640869419 seconds time elapsed ( +- 0.57% ) - After: 8146.242027 task-clock (msec) # 0.923 CPUs utilized ( +- 1.23% ) 17,016 context-switches # 0.002 M/sec ( +- 0.40% ) 0 cpu-migrations # 0.000 K/sec 18,769 page-faults # 0.002 M/sec ( +- 0.45% ) 35,660,956,120 cycles # 4.378 GHz ( +- 1.22% ) <not supported> stalled-cycles-frontend <not supported> stalled-cycles-backend 65,095,366,607 instructions # 1.83 insns per cycle ( +- 1.73% ) 10,803,480,261 branches # 1326.192 M/sec ( +- 1.95% ) 195,601,289 branch-misses # 1.81% of all branches ( +- 0.39% ) 8.828660235 seconds time elapsed ( +- 0.38% ) Backports commit 2ac01d6dafabd4a726254eea98824c798d416ee4 from qemu
This commit is contained in:
parent
35e551dc45
commit
f7c984d21f
|
@ -250,8 +250,6 @@ static int encode_search(TCGContext *tcg_ctx, TranslationBlock *tb, uint8_t *blo
|
|||
uint8_t *p = block;
|
||||
int i, j, n;
|
||||
|
||||
tb->tc.search = block;
|
||||
|
||||
for (i = 0, n = tb->icount; i < n; ++i) {
|
||||
target_ulong prev;
|
||||
|
||||
|
@ -287,7 +285,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
|
|||
target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
|
||||
uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
|
||||
CPUArchState *env = cpu->env_ptr;
|
||||
uint8_t *p = tb->tc.search;
|
||||
uint8_t *p = tb->tc.ptr + tb->tc.size;
|
||||
int i, j, num_insns = tb->icount;
|
||||
#ifdef CONFIG_PROFILER
|
||||
int64_t ti = profile_getclock();
|
||||
|
@ -806,6 +804,48 @@ void free_code_gen_buffer(struct uc_struct *uc)
|
|||
}
|
||||
#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
|
||||
|
||||
/* compare a pointer @ptr and a tb_tc @s */
|
||||
static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
|
||||
{
|
||||
if (ptr >= s->ptr + s->size) {
|
||||
return 1;
|
||||
} else if (ptr < s->ptr) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
|
||||
{
|
||||
const struct tb_tc *a = ap;
|
||||
const struct tb_tc *b = bp;
|
||||
|
||||
/*
|
||||
* When both sizes are set, we know this isn't a lookup.
|
||||
* This is the most likely case: every TB must be inserted; lookups
|
||||
* are a lot less frequent.
|
||||
*/
|
||||
if (likely(a->size && b->size)) {
|
||||
if (a->ptr > b->ptr) {
|
||||
return 1;
|
||||
} else if (a->ptr < b->ptr) {
|
||||
return -1;
|
||||
}
|
||||
/* a->ptr == b->ptr should happen only on deletions */
|
||||
g_assert(a->size == b->size);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* All lookups have either .size field set to 0.
|
||||
* From the glib sources we see that @ap is always the lookup key. However
|
||||
* the docs provide no guarantee, so we just mark this case as likely.
|
||||
*/
|
||||
if (likely(a->size == 0)) {
|
||||
return ptr_cmp_tb_tc(a->ptr, b);
|
||||
}
|
||||
return ptr_cmp_tb_tc(b->ptr, a);
|
||||
}
|
||||
|
||||
static inline void code_gen_alloc(struct uc_struct *uc, size_t tb_size)
|
||||
{
|
||||
TCGContext *tcg_ctx = uc->tcg_ctx;
|
||||
|
@ -825,12 +865,7 @@ static inline void code_gen_alloc(struct uc_struct *uc, size_t tb_size)
|
|||
still haven't deducted the prologue from the buffer size here,
|
||||
but that's minimal and won't affect the estimate much. */
|
||||
/* size this conservatively -- realloc later if needed */
|
||||
tcg_ctx->tb_ctx.tbs_size =
|
||||
tcg_ctx->code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE / 8;
|
||||
if (unlikely(!tcg_ctx->tb_ctx.tbs_size)) {
|
||||
tcg_ctx->tb_ctx.tbs_size = 64 * 1024;
|
||||
}
|
||||
tcg_ctx->tb_ctx.tbs = g_new(TranslationBlock *, tcg_ctx->tb_ctx.tbs_size);
|
||||
tcg_ctx->tb_ctx.tb_tree = g_tree_new(tb_tc_cmp);
|
||||
}
|
||||
|
||||
static void tb_htable_init(struct uc_struct *uc)
|
||||
|
@ -877,18 +912,11 @@ static TranslationBlock *tb_alloc(struct uc_struct *uc, target_ulong pc)
|
|||
{
|
||||
TCGContext *tcg_ctx = uc->tcg_ctx;
|
||||
TranslationBlock *tb;
|
||||
TBContext *ctx;
|
||||
|
||||
tb = tcg_tb_alloc(tcg_ctx);
|
||||
if (unlikely(tb == NULL)) {
|
||||
return NULL;
|
||||
}
|
||||
ctx = &tcg_ctx->tb_ctx;
|
||||
if (unlikely(ctx->nb_tbs == ctx->tbs_size)) {
|
||||
ctx->tbs_size *= 2;
|
||||
ctx->tbs = g_renew(TranslationBlock *, ctx->tbs, ctx->tbs_size);
|
||||
}
|
||||
ctx->tbs[ctx->nb_tbs++] = tb;
|
||||
return tb;
|
||||
}
|
||||
|
||||
|
@ -897,16 +925,7 @@ void tb_free(struct uc_struct *uc, TranslationBlock *tb)
|
|||
{
|
||||
TCGContext *tcg_ctx = uc->tcg_ctx;
|
||||
|
||||
/* In practice this is mostly used for single use temporary TB
|
||||
Ignore the hard cases and just back up if this TB happens to
|
||||
be the last one generated. */
|
||||
if (tcg_ctx->tb_ctx.nb_tbs > 0 &&
|
||||
tb == tcg_ctx->tb_ctx.tbs[tcg_ctx->tb_ctx.nb_tbs - 1]) {
|
||||
size_t struct_size = ROUND_UP(sizeof(*tb), uc->qemu_icache_linesize);
|
||||
|
||||
tcg_ctx->code_gen_ptr = tb->tc.ptr - struct_size;
|
||||
tcg_ctx->tb_ctx.nb_tbs--;
|
||||
}
|
||||
g_tree_remove(tcg_ctx->tb_ctx.tb_tree, &tb->tc);
|
||||
}
|
||||
|
||||
static inline void invalidate_page_bitmap(PageDesc *p)
|
||||
|
@ -963,11 +982,12 @@ void tb_flush(CPUState *cpu)
|
|||
TCGContext *tcg_ctx = uc->tcg_ctx;
|
||||
|
||||
if (DEBUG_TB_FLUSH_GATE) {
|
||||
printf("qemu: flush code_size=%td nb_tbs=%d avg_tb_size=%td\n",
|
||||
tcg_ctx->code_gen_ptr - tcg_ctx->code_gen_buffer,
|
||||
tcg_ctx->tb_ctx.nb_tbs, tcg_ctx->tb_ctx.nb_tbs > 0 ?
|
||||
(tcg_ctx->code_gen_ptr - tcg_ctx->code_gen_buffer) /
|
||||
tcg_ctx->tb_ctx.nb_tbs : 0);
|
||||
size_t nb_tbs = g_tree_nnodes(tcg_ctx->tb_ctx.tb_tree);
|
||||
|
||||
printf("qemu: flush code_size=%td nb_tbs=%zu avg_tb_size=%zu\n",
|
||||
tcg_ctx->code_gen_ptr - tcg_ctx->code_gen_buffer, nb_tbs,
|
||||
nb_tbs > 0 ?
|
||||
(size_t)((tcg_ctx->code_gen_ptr - tcg_ctx->code_gen_buffer) / nb_tbs : 0));
|
||||
}
|
||||
if ((unsigned long)((char*)tcg_ctx->code_gen_ptr - (char*)tcg_ctx->code_gen_buffer)
|
||||
> tcg_ctx->code_gen_buffer_size) {
|
||||
|
@ -977,7 +997,10 @@ void tb_flush(CPUState *cpu)
|
|||
cpu_tb_jmp_cache_clear(cpu);
|
||||
atomic_mb_set(&cpu->tb_flushed, true);
|
||||
|
||||
tcg_ctx->tb_ctx.nb_tbs = 0;
|
||||
/* Increment the refcount first so that destroy acts as a reset */
|
||||
g_tree_ref(tcg_ctx->tb_ctx.tb_tree);
|
||||
g_tree_destroy(tcg_ctx->tb_ctx.tb_tree);
|
||||
|
||||
qht_reset_size(&tcg_ctx->tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
|
||||
page_flush_tb(uc);
|
||||
|
||||
|
@ -1393,6 +1416,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
|
|||
if (unlikely(search_size < 0)) {
|
||||
goto buffer_overflow;
|
||||
}
|
||||
tb->tc.size = gen_code_size;
|
||||
|
||||
#ifdef CONFIG_PROFILER
|
||||
tcg_ctx.code_time += profile_getclock();
|
||||
|
@ -1464,6 +1488,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
|
|||
* through the physical hash table and physical page list.
|
||||
*/
|
||||
tb_link_page(cpu->uc, tb, phys_pc, phys_page2);
|
||||
g_tree_insert(tcg_ctx->tb_ctx.tb_tree, &tb->tc, tb);
|
||||
return tb;
|
||||
}
|
||||
|
||||
|
@ -1720,38 +1745,18 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* find the TB 'tb' such that tb[0].tc.ptr <= tc_ptr <
|
||||
tb[1].tc.ptr. Return NULL if not found */
|
||||
/*
|
||||
* Find the TB 'tb' such that
|
||||
* tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
|
||||
* Return NULL if not found.
|
||||
*/
|
||||
static TranslationBlock *tb_find_pc(struct uc_struct *uc, uintptr_t tc_ptr)
|
||||
{
|
||||
TCGContext *tcg_ctx = uc->tcg_ctx;
|
||||
int m_min, m_max, m;
|
||||
uintptr_t v;
|
||||
TranslationBlock *tb;
|
||||
struct tb_tc s = {0};
|
||||
s.ptr = (void *)tc_ptr;
|
||||
|
||||
if (tcg_ctx->tb_ctx.nb_tbs <= 0) {
|
||||
return NULL;
|
||||
}
|
||||
if (tc_ptr < (uintptr_t)tcg_ctx->code_gen_buffer ||
|
||||
tc_ptr >= (uintptr_t)tcg_ctx->code_gen_ptr) {
|
||||
return NULL;
|
||||
}
|
||||
/* binary search (cf Knuth) */
|
||||
m_min = 0;
|
||||
m_max = tcg_ctx->tb_ctx.nb_tbs - 1;
|
||||
while (m_min <= m_max) {
|
||||
m = (m_min + m_max) >> 1;
|
||||
tb = tcg_ctx->tb_ctx.tbs[m];
|
||||
v = (uintptr_t)tb->tc.ptr;
|
||||
if (v == tc_ptr) {
|
||||
return tb;
|
||||
} else if (tc_ptr < v) {
|
||||
m_max = m - 1;
|
||||
} else {
|
||||
m_min = m + 1;
|
||||
}
|
||||
}
|
||||
return tcg_ctx->tb_ctx.tbs[m_max];
|
||||
return g_tree_lookup(tcg_ctx->tb_ctx.tb_tree, &s);
|
||||
}
|
||||
|
||||
#if !defined(CONFIG_USER_ONLY)
|
||||
|
|
1283
qemu/glib_compat.c
1283
qemu/glib_compat.c
File diff suppressed because it is too large
Load diff
|
@ -210,10 +210,14 @@ static inline void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr)
|
|||
|
||||
/*
|
||||
* Translation Cache-related fields of a TB.
|
||||
* This struct exists just for convenience; we keep track of TB's in a binary
|
||||
* search tree, and the only fields needed to compare TB's in the tree are
|
||||
* @ptr and @size.
|
||||
* Note: the address of search data can be obtained by adding @size to @ptr.
|
||||
*/
|
||||
struct tb_tc {
|
||||
void *ptr; /* pointer to the translated code */
|
||||
uint8_t *search; /* pointer to search data */
|
||||
size_t size;
|
||||
};
|
||||
|
||||
struct TranslationBlock {
|
||||
|
|
|
@ -31,10 +31,8 @@ typedef struct TBContext TBContext;
|
|||
|
||||
struct TBContext {
|
||||
|
||||
TranslationBlock **tbs;
|
||||
GTree *tb_tree;
|
||||
struct qht htable;
|
||||
size_t tbs_size;
|
||||
int nb_tbs;
|
||||
|
||||
/* statistics */
|
||||
int tb_flush_count;
|
||||
|
|
|
@ -61,6 +61,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|||
typedef void* gpointer;
|
||||
typedef const void *gconstpointer;
|
||||
typedef int gint;
|
||||
typedef signed char gint8;
|
||||
typedef unsigned char guint8;
|
||||
typedef signed short gint16;
|
||||
typedef unsigned short guint16;
|
||||
typedef uint32_t guint32;
|
||||
typedef uint64_t guint64;
|
||||
typedef unsigned int guint;
|
||||
|
@ -71,12 +75,24 @@ typedef unsigned long gulong;
|
|||
typedef unsigned long gsize;
|
||||
typedef signed long gssize;
|
||||
|
||||
typedef gint (*GCompareDataFunc)(gconstpointer a,
|
||||
gconstpointer b,
|
||||
gpointer user_data);
|
||||
typedef void (*GFunc)(gpointer data, gpointer user_data);
|
||||
typedef gint (*GCompareFunc)(gconstpointer v1, gconstpointer v2);
|
||||
typedef gint (*GCompareFunc)(gconstpointer a, gconstpointer b);
|
||||
typedef gint (*GCompareDataFunc)(gconstpointer a, gconstpointer b,
|
||||
gpointer user_data);
|
||||
typedef gboolean (*GEqualFunc)(gconstpointer a, gconstpointer b);
|
||||
typedef void (*GDestroyNotify)(gpointer data);
|
||||
typedef void (*GFunc)(gpointer data, gpointer user_data);
|
||||
typedef guint (*GHashFunc)(gconstpointer key);
|
||||
typedef void (*GHFunc)(gpointer key, gpointer value, gpointer user_data);
|
||||
typedef void (*GFreeFunc)(gpointer data);
|
||||
|
||||
/* Tree traverse orders */
|
||||
typedef enum
|
||||
{
|
||||
G_IN_ORDER,
|
||||
G_PRE_ORDER,
|
||||
G_POST_ORDER,
|
||||
G_LEVEL_ORDER
|
||||
} GTraverseType;
|
||||
|
||||
guint g_direct_hash(gconstpointer v);
|
||||
gboolean g_direct_equal(gconstpointer v1, gconstpointer v2);
|
||||
|
@ -178,6 +194,36 @@ GHashTable *g_hash_table_iter_get_hash_table(GHashTableIter *iter);
|
|||
void g_hash_table_iter_remove(GHashTableIter *iter);
|
||||
void g_hash_table_iter_steal(GHashTableIter *iter);
|
||||
|
||||
/* Tree code */
|
||||
typedef struct _GTree GTree;
|
||||
|
||||
typedef gboolean (*GTraverseFunc) (gpointer key,
|
||||
gpointer value,
|
||||
gpointer data);
|
||||
|
||||
GTree *g_tree_new(GCompareFunc key_compare_func);
|
||||
GTree *g_tree_new_with_data(GCompareDataFunc key_compare_func,
|
||||
gpointer key_compare_data);
|
||||
GTree *g_tree_new_full(GCompareDataFunc key_compare_func,
|
||||
gpointer key_compare_data,
|
||||
GDestroyNotify key_destroy_func,
|
||||
GDestroyNotify value_destroy_func);
|
||||
GTree *g_tree_ref(GTree *tree);
|
||||
void g_tree_unref(GTree *tree);
|
||||
void g_tree_destroy(GTree *tree);
|
||||
void g_tree_insert(GTree *tree, gpointer key, gpointer value);
|
||||
void g_tree_replace(GTree *tree, gpointer key, gpointer value);
|
||||
gboolean g_tree_remove(GTree *tree, gconstpointer key);
|
||||
gboolean g_tree_steal(GTree *tree, gconstpointer key);
|
||||
gpointer g_tree_lookup(GTree *tree, gconstpointer key);
|
||||
gboolean g_tree_lookup_extended(GTree *tree, gconstpointer lookup_key,
|
||||
gpointer *orig_key, gpointer *value);
|
||||
void g_tree_foreach(GTree *tree, GTraverseFunc func, gpointer user_data);
|
||||
gpointer g_tree_search(GTree *tree, GCompareFunc search_func, gconstpointer user_data);
|
||||
gint g_tree_height(GTree *tree);
|
||||
gint g_tree_nnodes(GTree *tree);
|
||||
void g_tree_traverse(GTree *tree, GTraverseFunc traverse_func, GTraverseType traverse_type, gpointer user_data);
|
||||
|
||||
/* replacement for g_malloc dependency */
|
||||
void g_free(gpointer ptr);
|
||||
gpointer g_malloc(size_t size);
|
||||
|
|
|
@ -27,7 +27,7 @@ void arm64_release(void* ctx)
|
|||
struct uc_struct* uc = s->uc;
|
||||
ARMCPU* cpu = ARM_CPU(uc, uc->cpu);
|
||||
|
||||
g_free(s->tb_ctx.tbs);
|
||||
g_tree_destroy(s->tb_ctx.tb_tree);
|
||||
g_free(cpu->cpreg_indexes);
|
||||
g_free(cpu->cpreg_values);
|
||||
g_free(cpu->cpreg_vmstate_indexes);
|
||||
|
|
|
@ -29,7 +29,7 @@ void arm_release(void* ctx)
|
|||
ARMCPU* cpu = ARM_CPU(uc, uc->cpu);
|
||||
CPUArchState *env = &cpu->env;
|
||||
|
||||
g_free(s->tb_ctx.tbs);
|
||||
g_tree_destroy(s->tb_ctx.tb_tree);
|
||||
g_free(cpu->cpreg_indexes);
|
||||
g_free(cpu->cpreg_values);
|
||||
g_free(cpu->cpreg_vmstate_indexes);
|
||||
|
|
|
@ -58,7 +58,7 @@ void x86_release(void *ctx)
|
|||
release_common(ctx);
|
||||
|
||||
// arch specific
|
||||
g_free(s->tb_ctx.tbs);
|
||||
g_tree_destroy(s->tb_ctx.tb_tree);
|
||||
}
|
||||
|
||||
void x86_reg_reset(struct uc_struct *uc)
|
||||
|
|
|
@ -25,7 +25,7 @@ void m68k_release(void* ctx)
|
|||
TCGContext *tcg_ctx = ctx;;
|
||||
|
||||
release_common(ctx);
|
||||
g_free(tcg_ctx->tb_ctx.tbs);
|
||||
g_tree_destroy(s->tb_ctx.tb_tree);
|
||||
}
|
||||
|
||||
void m68k_reg_reset(struct uc_struct *uc)
|
||||
|
|
|
@ -54,7 +54,7 @@ void mips_release(void *ctx)
|
|||
release_common(ctx);
|
||||
g_free(cpu->env.tlb);
|
||||
g_free(cpu->env.mvp);
|
||||
g_free(tcg_ctx->tb_ctx.tbs);
|
||||
g_tree_destroy(s->tb_ctx.tb_tree);
|
||||
}
|
||||
|
||||
void mips_reg_reset(struct uc_struct *uc)
|
||||
|
|
|
@ -35,7 +35,7 @@ void sparc_release(void *ctx)
|
|||
{
|
||||
TCGContext *tcg_ctx = (TCGContext *) ctx;
|
||||
release_common(ctx);
|
||||
g_free(tcg_ctx->tb_ctx.tbs);
|
||||
g_tree_destroy(s->tb_ctx.tb_tree);
|
||||
}
|
||||
|
||||
void sparc_reg_reset(struct uc_struct *uc)
|
||||
|
|
Loading…
Reference in a new issue