From ee1ddf4a92e76506ead12766ad4af30377eea8da Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 4 May 2019 22:17:43 -0400 Subject: [PATCH] tcg: define CF_PARALLEL and use it for TB hashing along with CF_COUNT_MASK This will enable us to decouple code translation from the value of parallel_cpus at any given time. It will also help us minimize TB flushes when generating code via EXCP_ATOMIC. Note that the declaration of parallel_cpus is brought to exec-all.h to be able to define there the "curr_cflags" inline. Backports commit 4e2ca83e71b51577b06b1468e836556912bd5b6e from qemu --- qemu/accel/tcg/cpu-exec.c | 30 ++++++++++++++++-------------- qemu/accel/tcg/tcg-runtime.c | 2 +- qemu/accel/tcg/translate-all.c | 17 ++++++++++------- qemu/include/exec/exec-all.h | 14 +++++++++++++- qemu/include/exec/tb-hash.h | 5 +++-- qemu/include/exec/tb-lookup.h | 6 +++--- 6 files changed, 46 insertions(+), 28 deletions(-) diff --git a/qemu/accel/tcg/cpu-exec.c b/qemu/accel/tcg/cpu-exec.c index ab4cd6cf..9cfe7141 100644 --- a/qemu/accel/tcg/cpu-exec.c +++ b/qemu/accel/tcg/cpu-exec.c @@ -115,7 +115,8 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, #endif TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, uint32_t flags) + target_ulong cs_base, uint32_t flags, + uint32_t cf_mask) { TCGContext *tcg_ctx = cpu->uc->tcg_ctx; CPUArchState *env = (CPUArchState *)cpu->env_ptr; @@ -126,7 +127,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, /* find translated block using physical mappings */ phys_pc = get_page_addr_code(env, pc); phys_page1 = phys_pc & TARGET_PAGE_MASK; - h = tb_hash_func(phys_pc, pc, flags); + h = tb_hash_func(phys_pc, pc, flags, cf_mask); /* Start at head of the hash entry */ ptb1 = tb_hash_head = &tcg_ctx->tb_ctx.tb_phys_hash[h]; @@ -137,8 +138,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, tb->page_addr[0] == phys_page1 && tb->cs_base == cs_base && tb->flags == flags && - !(tb_cflags(tb) & CF_INVALID)) { - + (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask) { if (tb->page_addr[1] == -1) { /* done, we have a match */ break; @@ -209,8 +209,9 @@ static inline TranslationBlock *tb_find(CPUState *cpu, target_ulong cs_base, pc; uint32_t flags; bool acquired_tb_lock = false; + uint32_t cf_mask = curr_cflags(cpu->uc); - tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags); + tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); if (tb == NULL) { mmap_lock(); //tb_lock(); @@ -219,10 +220,10 @@ static inline TranslationBlock *tb_find(CPUState *cpu, /* There's a chance that our desired tb has been translated while * taking the locks so we check again inside the lock. */ - tb = tb_htable_lookup(cpu, pc, cs_base, flags); + tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask); if (likely(tb == NULL)) { /* if no translated code available, then translate it now */ - tb = tb_gen_code(cpu, pc, cs_base, flags, 0); + tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask); } mmap_unlock(); @@ -472,20 +473,21 @@ static void cpu_exec_step(struct uc_struct *uc, CPUState *cpu) TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; + uint32_t cflags = 1 | CF_IGNORE_ICOUNT; cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); if (sigsetjmp(cpu->jmp_env, 0) == 0) { - mmap_lock(); - tb = tb_gen_code(cpu, pc, cs_base, flags, - 1 | CF_NOCACHE | CF_IGNORE_ICOUNT); - tb->orig_tb = NULL; - mmap_unlock(); + tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, + cflags & CF_HASH_MASK); + if (tb == NULL) { + mmap_lock(); + tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); + mmap_unlock(); + } /* execute the generated code */ cpu_tb_exec(cpu, tb); - tb_phys_invalidate(uc, tb, -1); - tb_free(uc, tb); } else { /* We may have exited due to another problem here, so we need * to reset any tb_locks we may have taken but didn't release. diff --git a/qemu/accel/tcg/tcg-runtime.c b/qemu/accel/tcg/tcg-runtime.c index dfeb91b3..3411f0d0 100644 --- a/qemu/accel/tcg/tcg-runtime.c +++ b/qemu/accel/tcg/tcg-runtime.c @@ -151,7 +151,7 @@ void *HELPER(lookup_tb_ptr)(CPUArchState *env) target_ulong cs_base, pc; uint32_t flags; - tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags); + tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags(env->uc)); if (tb == NULL) { return tcg_ctx->code_gen_epilogue; } diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c index dcd940f6..18f51e2c 100644 --- a/qemu/accel/tcg/translate-all.c +++ b/qemu/accel/tcg/translate-all.c @@ -1147,7 +1147,7 @@ void tb_phys_invalidate(struct uc_struct *uc, /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_hash_func(phys_pc, tb->pc, tb->flags); + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK); tb_hash_remove(&tcg_ctx->tb_ctx.tb_phys_hash[h], tb); /* remove the TB from the page list */ @@ -1304,7 +1304,7 @@ static void tb_link_page(struct uc_struct *uc, } /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags); + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK); ptb = &tcg_ctx->tb_ctx.tb_phys_hash[h]; tb->phys_hash_next = *ptb; *ptb = tb; @@ -1658,7 +1658,8 @@ void tb_invalidate_phys_page_range(struct uc_struct *uc, tb_page_addr_t start, t /* we generate a block containing just the instruction modifying the memory. It will ensure that it cannot modify itself */ - tb_gen_code(uc->cpu, current_pc, current_cs_base, current_flags, 1); + tb_gen_code(uc->cpu, current_pc, current_cs_base, current_flags, + 1 | curr_cflags(uc)); cpu_loop_exit_noexc(uc->cpu); } #endif @@ -1714,7 +1715,7 @@ void tb_invalidate_phys_page_fast(struct uc_struct* uc, tb_page_addr_t start, in * TB (because it was modified by this store and the guest CPU has * precise-SMC semantics). */ -static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) +static bool tb_invalidate_phys_page(struct uc_struct *uc, tb_page_addr_t addr, uintptr_t pc) { TranslationBlock *tb; PageDesc *p; @@ -1770,7 +1771,8 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) /* we generate a block containing just the instruction modifying the memory. It will ensure that it cannot modify itself */ - tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1); + tb_gen_code(cpu, current_pc, current_cs_base, current_flags, + 1 | curr_cflags(uc)); return true; } #endif @@ -1903,6 +1905,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) } cflags = n | CF_LAST_IO; + cflags |= curr_cflags(cpu->uc); pc = tb->pc; cs_base = tb->cs_base; flags = tb->flags; @@ -2177,7 +2180,7 @@ static void page_set_flags(struct uc_struct *uc, target_ulong start, target_ulon if (!(p->flags & PAGE_WRITE) && (flags & PAGE_WRITE) && p->first_tb) { - tb_invalidate_phys_page(addr, 0); + tb_invalidate_phys_page(uc, addr, 0); } p->flags = flags; } @@ -2277,7 +2280,7 @@ int page_unprotect(struct uc_struct *uc, target_ulong address, uintptr_t pc) /* and since the content will be modified, we must invalidate the corresponding translated code. */ - current_tb_invalidated |= tb_invalidate_phys_page(addr, pc); + current_tb_invalidated |= tb_invalidate_phys_page(uc, addr, pc); #ifdef CONFIG_USER_ONLY if (DEBUG_TB_CHECK_GATE) { tb_invalidate_check(addr); diff --git a/qemu/include/exec/exec-all.h b/qemu/include/exec/exec-all.h index af92746d..ca26949b 100644 --- a/qemu/include/exec/exec-all.h +++ b/qemu/include/exec/exec-all.h @@ -23,6 +23,8 @@ #include "qemu-common.h" #include "exec/tb-context.h" +#include "uc_priv.h" + /* allow to see translation results - the slowdown should be negligible, so we leave it */ #define DEBUG_DISAS @@ -248,6 +250,9 @@ struct TranslationBlock { #define CF_USE_ICOUNT 0x20000 #define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */ #define CF_INVALID 0x80000 /* TB is stale. Setters must acquire tb_lock */ +#define CF_PARALLEL 0x100000 /* Generate code for a parallel context */ +/* cflags' mask for hashing/comparison */ +#define CF_HASH_MASK (CF_PARALLEL) struct tb_tc tc; /* next matching tb for physical address. */ @@ -292,12 +297,19 @@ static inline uint32_t tb_cflags(const TranslationBlock *tb) return atomic_read(&tb->cflags); } +/* current cflags for hashing/comparison */ +static inline uint32_t curr_cflags(struct uc_struct *uc) +{ + return uc->parallel_cpus ? CF_PARALLEL : 0; +} + void tb_free(struct uc_struct *uc, TranslationBlock *tb); void tb_flush(CPUState *cpu); void tb_phys_invalidate(struct uc_struct *uc, TranslationBlock *tb, tb_page_addr_t page_addr); TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, uint32_t flags); + target_ulong cs_base, uint32_t flags, + uint32_t cf_mask); void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr); diff --git a/qemu/include/exec/tb-hash.h b/qemu/include/exec/tb-hash.h index dcab7016..3214884e 100644 --- a/qemu/include/exec/tb-hash.h +++ b/qemu/include/exec/tb-hash.h @@ -59,9 +59,10 @@ static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc) } static inline -uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags) +uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags, + uint32_t cf_mask) { - return qemu_xxhash7(phys_pc, pc, flags, 0, 0) & (CODE_GEN_PHYS_HASH_SIZE - 1); + return qemu_xxhash7(phys_pc, pc, flags, cf_mask, 0) & (CODE_GEN_PHYS_HASH_SIZE - 1); } #endif diff --git a/qemu/include/exec/tb-lookup.h b/qemu/include/exec/tb-lookup.h index 096d1100..5e6603c6 100644 --- a/qemu/include/exec/tb-lookup.h +++ b/qemu/include/exec/tb-lookup.h @@ -19,7 +19,7 @@ /* Might cause an exception, so have a longjmp destination ready */ static inline TranslationBlock * tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base, - uint32_t *flags) + uint32_t *flags, uint32_t cf_mask) { CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; @@ -32,10 +32,10 @@ tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base, tb->pc == *pc && tb->cs_base == *cs_base && tb->flags == *flags && - !(tb_cflags(tb) & CF_INVALID))) { + (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) { return tb; } - tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags); + tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask); if (tb == NULL) { return NULL; }