tcg: define CF_PARALLEL and use it for TB hashing along with CF_COUNT_MASK

This will enable us to decouple code translation from the value
of parallel_cpus at any given time. It will also help us minimize
TB flushes when generating code via EXCP_ATOMIC.

Note that the declaration of parallel_cpus is brought to exec-all.h
to be able to define there the "curr_cflags" inline.

Backports commit 4e2ca83e71b51577b06b1468e836556912bd5b6e from qemu
This commit is contained in:
Emilio G. Cota 2018-03-13 14:22:31 -04:00 committed by Lioncash
parent 6bc05eeee4
commit b5961a139b
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
7 changed files with 74 additions and 34 deletions

View file

@ -101,7 +101,9 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
}
tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
max_cycles | CF_NOCACHE);
max_cycles | CF_NOCACHE
| 0
| curr_cflags(cpu->uc));
tb->orig_tb = orig_tb;
/* execute the generated code */
// Unicorn: commented out
@ -117,6 +119,8 @@ struct tb_desc {
CPUArchState *env;
tb_page_addr_t phys_page1;
uint32_t flags;
uint32_t cf_mask;
uint32_t trace_vcpu_dstate;
};
static bool tb_cmp(const void *p, const void *d)
@ -127,7 +131,9 @@ static bool tb_cmp(const void *p, const void *d)
if (tb->pc == desc->pc &&
tb->page_addr[0] == desc->phys_page1 &&
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags) {
tb->flags == desc->flags &&
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
(tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) {
/* check next page if needed */
if (tb->page_addr[1] == -1) {
return true;
@ -146,7 +152,8 @@ static bool tb_cmp(const void *p, const void *d)
}
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags)
target_ulong cs_base, uint32_t flags,
uint32_t cf_mask)
{
TCGContext *tcg_ctx = cpu->uc->tcg_ctx;
tb_page_addr_t phys_pc;
@ -157,10 +164,12 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
desc.env = (CPUArchState *)cpu->env_ptr;
desc.cs_base = cs_base;
desc.flags = flags;
desc.cf_mask = cf_mask;
desc.trace_vcpu_dstate = 0;
desc.pc = pc;
phys_pc = get_page_addr_code(desc.env, pc);
desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
h = tb_hash_func(phys_pc, pc, flags);
h = tb_hash_func(phys_pc, pc, flags, cf_mask, 0);
return qht_lookup(&tcg_ctx->tb_ctx.htable, tb_cmp, &desc, h);
}
@ -208,8 +217,9 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
target_ulong cs_base, pc;
uint32_t flags;
bool acquired_tb_lock = false;
uint32_t cf_mask = curr_cflags(cpu->uc);
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags);
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
if (tb == NULL) {
/* mmap_lock is needed by tb_gen_code, and mmap_lock must be
* taken outside tb_lock. As system emulation is currently
@ -222,10 +232,10 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
/* There's a chance that our desired tb has been translated while
* taking the locks so we check again inside the lock.
*/
tb = tb_htable_lookup(cpu, pc, cs_base, flags);
tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask);
if (likely(tb == NULL)) {
/* if no translated code available, then translate it now */
tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask);
}
mmap_unlock();
@ -466,24 +476,21 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
static void cpu_exec_step(struct uc_struct *uc, CPUState *cpu)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
uint32_t cflags = 1 | CF_IGNORE_ICOUNT;
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
mmap_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags,
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
tb->orig_tb = NULL;
mmap_unlock();
/* execute the generated code */
cpu_tb_exec(cpu, tb);
tb_phys_invalidate(uc, tb, -1);
tb_free(uc, tb);
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags,
cflags & CF_HASH_MASK);
if (tb == NULL) {
mmap_lock();
//tb_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
//tb_unlock();
mmap_unlock();
}
} else {
/* We may have exited due to another problem here, so we need
* to reset any tb_locks we may have taken but didn't release.

View file

@ -151,7 +151,7 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env)
target_ulong cs_base, pc;
uint32_t flags;
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags);
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags(cpu->uc));
if (tb == NULL) {
return tcg_ctx->code_gen_epilogue;
}

View file

@ -1133,7 +1133,8 @@ void tb_phys_invalidate(struct uc_struct *uc,
/* remove the TB from the hash list */
phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
h = tb_hash_func(phys_pc, tb->pc, tb->flags);
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
tb->trace_vcpu_dstate);
qht_remove(&tcg_ctx->tb_ctx.htable, tb, h);
/* remove the TB from the page list */
@ -1289,7 +1290,8 @@ static void tb_link_page(struct uc_struct *uc,
}
/* add in the hash table */
h = tb_hash_func(phys_pc, tb->pc, tb->flags);
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
tb->trace_vcpu_dstate);
qht_insert(&tcg_ctx->tb_ctx.htable, tb, h);
#ifdef CONFIG_USER_ONLY
@ -1604,7 +1606,8 @@ void tb_invalidate_phys_page_range(struct uc_struct *uc, tb_page_addr_t start, t
/* we generate a block containing just the instruction
modifying the memory. It will ensure that it cannot modify
itself */
tb_gen_code(uc->cpu, current_pc, current_cs_base, current_flags, 1);
tb_gen_code(cpu, current_pc, current_cs_base, current_flags,
1 | curr_cflags(uc));
cpu_loop_exit_noexc(uc->cpu);
}
#endif
@ -1716,7 +1719,8 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
/* we generate a block containing just the instruction
modifying the memory. It will ensure that it cannot modify
itself */
tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
tb_gen_code(cpu, current_pc, current_cs_base, current_flags,
1 | curr_cflags(uc));
return true;
}
#endif
@ -1850,6 +1854,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
}
cflags = n | CF_LAST_IO;
cflags |= curr_cflags(cpu->uc);
pc = tb->pc;
cs_base = tb->cs_base;
flags = tb->flags;

View file

@ -22,6 +22,7 @@
#include "qemu-common.h"
#include "exec/tb-context.h"
#include "uc_priv.h"
/* allow to see translation results - the slowdown should be negligible, so we leave it */
#define DEBUG_DISAS
@ -229,8 +230,15 @@ struct TranslationBlock {
#define CF_USE_ICOUNT 0x20000
#define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */
#define CF_INVALID 0x80000 /* TB is stale. Setters must acquire tb_lock */
#define CF_PARALLEL 0x100000 /* Generate code for a parallel context */
/* cflags' mask for hashing/comparison */
#define CF_HASH_MASK (CF_PARALLEL)
/* Per-vCPU dynamic tracing state used to generate this TB */
uint32_t trace_vcpu_dstate;
struct tb_tc tc;
/* original tb when cflags has CF_NOCACHE */
struct TranslationBlock *orig_tb;
/* first and second physical page containing code. The lower bit
@ -265,12 +273,25 @@ struct TranslationBlock {
uintptr_t jmp_list_first;
};
/* Hide the atomic_read to make code a little easier on the eyes */
static inline uint32_t tb_cflags(const TranslationBlock *tb)
{
return atomic_read(&tb->cflags);
}
/* current cflags for hashing/comparison */
static inline uint32_t curr_cflags(struct uc_struct *uc)
{
return uc->parallel_cpus ? CF_PARALLEL : 0;
}
void tb_free(struct uc_struct *uc, TranslationBlock *tb);
void tb_flush(CPUState *cpu);
void tb_phys_invalidate(struct uc_struct *uc,
TranslationBlock *tb, tb_page_addr_t page_addr);
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags);
target_ulong cs_base, uint32_t flags,
uint32_t cf_mask);
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);

View file

@ -48,8 +48,8 @@
* xxhash32, customized for input variables that are not guaranteed to be
* contiguous in memory.
*/
static inline
uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e)
static inline uint32_t
tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g)
{
uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2;
uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2;
@ -78,11 +78,17 @@ uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e)
v4 *= PRIME32_1;
h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
h32 += 20;
h32 += 28;
h32 += e * PRIME32_3;
h32 = rol32(h32, 17) * PRIME32_4;
h32 += f * PRIME32_3;
h32 = rol32(h32, 17) * PRIME32_4;
h32 += g * PRIME32_3;
h32 = rol32(h32, 17) * PRIME32_4;
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;

View file

@ -58,9 +58,10 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
}
static inline
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags)
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
uint32_t cf_mask, uint32_t trace_vcpu_dstate)
{
return tb_hash_func5(phys_pc, pc, flags);
return tb_hash_func7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
}
#endif

View file

@ -19,7 +19,7 @@
/* Might cause an exception, so have a longjmp destination ready */
static inline TranslationBlock *
tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
uint32_t *flags)
uint32_t *flags, uint32_t cf_mask)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
@ -32,10 +32,10 @@ tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
tb->pc == *pc &&
tb->cs_base == *cs_base &&
tb->flags == *flags &&
!(atomic_read(&tb->cflags) & CF_INVALID))) {
(tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) {
return tb;
}
tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags);
tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask);
if (tb == NULL) {
return NULL;
}