diff --git a/include/qemu.h b/include/qemu.h index 20316539..b76002be 100644 --- a/include/qemu.h +++ b/include/qemu.h @@ -28,4 +28,28 @@ typedef struct { bool in_use; } BounceBuffer; +// Moved here to allow a concrete type in the uc_struct. + +/* + * We divide code_gen_buffer into equally-sized "regions" that TCG threads + * dynamically allocate from as demand dictates. Given appropriate region + * sizing, this minimizes flushes even when some TCG threads generate a lot + * more code than others. + */ +struct tcg_region_state { + //QemuMutex lock; + + /* fields set at init time */ + void *start; + void *start_aligned; + void *end; + size_t n; + size_t size; /* size of one region */ + size_t stride; /* .size + guard size */ + + /* fields protected by the lock */ + size_t current; /* current region index */ + size_t agg_size_full; /* aggregate size of full regions */ +}; + #endif diff --git a/include/uc_priv.h b/include/uc_priv.h index 9dd79d4b..bd8f2ca0 100644 --- a/include/uc_priv.h +++ b/include/uc_priv.h @@ -178,6 +178,7 @@ struct uc_struct { // qemu/cpus.c bool mttcg_enabled; + int tcg_region_inited; // qemu/exec.c MemoryRegion *system_memory; @@ -226,6 +227,7 @@ struct uc_struct { // tcg.c void *tcg_ctxs; // actually "TCGContext **tcg_ctxs" unsigned int n_tcg_ctxs; + struct tcg_region_state region; /* memory.c */ unsigned memory_region_transaction_depth; diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 06d6ac19..1fa7a425 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_aarch64 #define tcg_out_call tcg_out_call_aarch64 #define tcg_out_cmp tcg_out_cmp_aarch64 +#define tcg_code_capacity tcg_code_capacity_aarch64 +#define tcg_code_size tcg_code_size_aarch64 #define tcg_out_ext16s tcg_out_ext16s_aarch64 #define tcg_out_ext16u tcg_out_ext16u_aarch64 #define tcg_out_ext32s tcg_out_ext32s_aarch64 @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_aarch64 #define tcg_reg_free tcg_reg_free_aarch64 #define tcg_reg_sync tcg_reg_sync_aarch64 +#define tcg_region_init tcg_region_init_aarch64 +#define tcg_region_reset_all tcg_region_reset_all_aarch64 #define tcg_set_frame tcg_set_frame_aarch64 #define tcg_set_nop tcg_set_nop_aarch64 #define tcg_swap_cond tcg_swap_cond_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 5180053a..ad62fec1 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_aarch64eb #define tcg_out_call tcg_out_call_aarch64eb #define tcg_out_cmp tcg_out_cmp_aarch64eb +#define tcg_code_capacity tcg_code_capacity_aarch64eb +#define tcg_code_size tcg_code_size_aarch64eb #define tcg_out_ext16s tcg_out_ext16s_aarch64eb #define tcg_out_ext16u tcg_out_ext16u_aarch64eb #define tcg_out_ext32s tcg_out_ext32s_aarch64eb @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_aarch64eb #define tcg_reg_free tcg_reg_free_aarch64eb #define tcg_reg_sync tcg_reg_sync_aarch64eb +#define tcg_region_init tcg_region_init_aarch64eb +#define tcg_region_reset_all tcg_region_reset_all_aarch64eb #define tcg_set_frame tcg_set_frame_aarch64eb #define tcg_set_nop tcg_set_nop_aarch64eb #define tcg_swap_cond tcg_swap_cond_aarch64eb diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c index 3b47388f..2b3caafc 100644 --- a/qemu/accel/tcg/translate-all.c +++ b/qemu/accel/tcg/translate-all.c @@ -586,15 +586,13 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc) TCGContext *tcg_ctx = uc->tcg_ctx; void *buf = static_code_gen_buffer; void *end = static_code_gen_buffer + sizeof(static_code_gen_buffer); - size_t full_size, size; + size_t size; /* page-align the beginning and end of the buffer */ buf = QEMU_ALIGN_PTR_UP(buf, uc->qemu_real_host_page_size); end = QEMU_ALIGN_PTR_DOWN(end, uc->qemu_real_host_page_size); - /* Reserve a guard page. */ - full_size = end - buf; - size = full_size - uc->qemu_real_host_page_size; + size = end - buf; /* Honor a command-line option limiting the size of the buffer. */ if (size > tcg_ctx->code_gen_buffer_size) { @@ -613,9 +611,6 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc) if (qemu_mprotect_rwx(uc, buf, size)) { abort(); } - if (qemu_mprotect_none(uc, buf + size, uc->qemu_real_host_page_size)) { - abort(); - } // Unicorn: commented out //qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); @@ -624,20 +619,10 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc) #elif defined(_WIN32) static inline void *alloc_code_gen_buffer(struct uc_struct *uc) { - TCGContext *tcg_ctx = uc->tcg_ctx; - size_t size = tcg_ctx->code_gen_buffer_size; - void *buf1, *buf2; - - /* Perform the allocation in two steps, so that the guard page - is reserved but uncommitted. */ - buf1 = VirtualAlloc(NULL, size + uc->qemu_real_host_page_size, - MEM_RESERVE, PAGE_NOACCESS); - if (buf1 != NULL) { - buf2 = VirtualAlloc(buf1, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); - assert(buf1 == buf2); - } - - return buf1; + void *buf; + buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, + PAGE_EXECUTE_READWRITE); + return buf; } void free_code_gen_buffer(struct uc_struct *uc) @@ -664,6 +649,7 @@ void free_code_gen_buffer(struct uc_struct *uc) static inline void *alloc_code_gen_buffer(struct uc_struct *uc) { TCGContext *tcg_ctx = uc->tcg_ctx; + int prot = PROT_WRITE | PROT_READ | PROT_EXEC; int flags = MAP_PRIVATE | MAP_ANONYMOUS; uintptr_t start = 0; size_t size = tcg_ctx->code_gen_buffer_size; @@ -698,8 +684,7 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc) # endif # endif - buf = mmap((void *)start, size + uc->qemu_real_host_page_size, - PROT_NONE, flags, -1, 0); + buf = mmap((void *)start, size, prot, flags, -1, 0); if (buf == MAP_FAILED) { return NULL; } @@ -709,24 +694,23 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc) /* Try again, with the original still mapped, to avoid re-acquiring that 256mb crossing. This time don't specify an address. */ size_t size2; - void *buf2 = mmap(NULL, size + uc->qemu_real_host_page_size, - PROT_NONE, flags, -1, 0); + void *buf2 = mmap(NULL, size, prot, flags, -1, 0); switch ((int)(buf2 != MAP_FAILED)) { case 1: if (!cross_256mb(buf2, size)) { /* Success! Use the new buffer. */ - munmap(buf, size + uc->qemu_real_host_page_size); + munmap(buf, size); break; } /* Failure. Work with what we had. */ - munmap(buf2, size + uc->qemu_real_host_page_size); + munmap(buf2, size); /* fallthru */ default: /* Split the original buffer. Free the smaller half. */ buf2 = split_cross_256mb(buf, size); size2 = tcg_ctx->code_gen_buffer_size; if (buf == buf2) { - munmap(buf + size2 + uc->qemu_real_host_page_size, size - size2); + munmap(buf + size2, size - size2); } else { munmap(buf, size - size2); } @@ -738,10 +722,6 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc) } #endif - /* Make the final buffer accessible. The guard page at the end - will remain inaccessible with PROT_NONE. */ - mprotect(buf, size, PROT_WRITE | PROT_READ | PROT_EXEC); - /* Request large pages for the buffer. */ // Unicorn: Commented out //qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE); @@ -948,20 +928,14 @@ static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data) void tb_flush(CPUState *cpu) { struct uc_struct* uc = cpu->uc; - TCGContext *tcg_ctx = uc->tcg_ctx; if (DEBUG_TB_FLUSH_GATE) { size_t nb_tbs = g_tree_nnodes(uc->tb_ctx.tb_tree); size_t host_size = 0; g_tree_foreach(uc->tb_ctx.tb_tree, tb_host_size_iter, &host_size); - printf("qemu: flush code_size=%td nb_tbs=%zu avg_tb_size=%zu\n", - tcg_ctx->code_gen_ptr - tcg_ctx->code_gen_buffer, nb_tbs, - nb_tbs > 0 ? host_size / nb_tbs : 0); - } - if ((unsigned long)((char*)tcg_ctx->code_gen_ptr - (char*)tcg_ctx->code_gen_buffer) - > tcg_ctx->code_gen_buffer_size) { - cpu_abort(cpu, "Internal error: code buffer overflow\n"); + printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n", + tcg_code_size(uc), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0); } cpu_tb_jmp_cache_clear(cpu); @@ -974,10 +948,10 @@ void tb_flush(CPUState *cpu) qht_reset_size(&uc->tb_ctx.htable, CODE_GEN_HTABLE_SIZE); page_flush_tb(uc); - tcg_ctx->code_gen_ptr = tcg_ctx->code_gen_buffer; + tcg_region_reset_all(uc); /* XXX: flush processor icache at this point if cache flush is expensive */ - uc->tb_ctx.tb_flush_count++; + atomic_mb_set(&uc->tb_ctx.tb_flush_count, uc->tb_ctx.tb_flush_count + 1); } /* @@ -1308,9 +1282,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, phys_pc = get_page_addr_code(env, pc); + buffer_overflow: tb = tb_alloc(env->uc, pc); if (unlikely(!tb)) { - buffer_overflow: /* flush must be done */ tb_flush(cpu); /* cannot fail at this point */ @@ -1393,9 +1367,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_ctx.search_out_len += search_size; #endif - tcg_ctx->code_gen_ptr = (void *) + atomic_set(&tcg_ctx->code_gen_ptr, (void *) ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, - CODE_GEN_ALIGN); + CODE_GEN_ALIGN)); /* init jump list */ assert(((uintptr_t)tb & 3) == 0); diff --git a/qemu/arm.h b/qemu/arm.h index 83c5130a..a3635c95 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_arm #define tcg_out_call tcg_out_call_arm #define tcg_out_cmp tcg_out_cmp_arm +#define tcg_code_capacity tcg_code_capacity_arm +#define tcg_code_size tcg_code_size_arm #define tcg_out_ext16s tcg_out_ext16s_arm #define tcg_out_ext16u tcg_out_ext16u_arm #define tcg_out_ext32s tcg_out_ext32s_arm @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_arm #define tcg_reg_free tcg_reg_free_arm #define tcg_reg_sync tcg_reg_sync_arm +#define tcg_region_init tcg_region_init_arm +#define tcg_region_reset_all tcg_region_reset_all_arm #define tcg_set_frame tcg_set_frame_arm #define tcg_set_nop tcg_set_nop_arm #define tcg_swap_cond tcg_swap_cond_arm diff --git a/qemu/armeb.h b/qemu/armeb.h index 7d690a41..2bbbeaa3 100644 --- a/qemu/armeb.h +++ b/qemu/armeb.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_armeb #define tcg_out_call tcg_out_call_armeb #define tcg_out_cmp tcg_out_cmp_armeb +#define tcg_code_capacity tcg_code_capacity_armeb +#define tcg_code_size tcg_code_size_armeb #define tcg_out_ext16s tcg_out_ext16s_armeb #define tcg_out_ext16u tcg_out_ext16u_armeb #define tcg_out_ext32s tcg_out_ext32s_armeb @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_armeb #define tcg_reg_free tcg_reg_free_armeb #define tcg_reg_sync tcg_reg_sync_armeb +#define tcg_region_init tcg_region_init_armeb +#define tcg_region_reset_all tcg_region_reset_all_armeb #define tcg_set_frame tcg_set_frame_armeb #define tcg_set_nop tcg_set_nop_armeb #define tcg_swap_cond tcg_swap_cond_armeb diff --git a/qemu/cpus.c b/qemu/cpus.c index e395ef6f..9542c0b2 100644 --- a/qemu/cpus.c +++ b/qemu/cpus.c @@ -135,6 +135,19 @@ static void *qemu_tcg_cpu_loop(struct uc_struct *uc) static int qemu_tcg_init_vcpu(CPUState *cpu) { + struct uc_struct *uc = cpu->uc; + + /* + * Initialize TCG regions--once. Now is a good time, because: + * (1) TCG's init context, prologue and target globals have been set up. + * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the + * -accel flag is processed, so the check doesn't work then). + */ + if (!uc->tcg_region_inited) { + uc->tcg_region_inited = 1; + tcg_region_init(uc); + } + return 0; } diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 41b17cd9..e24ef8f2 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -2837,6 +2837,8 @@ symbols = ( 'tcg_out_bswap64', 'tcg_out_call', 'tcg_out_cmp', + 'tcg_code_capacity', + 'tcg_code_size', 'tcg_out_ext16s', 'tcg_out_ext16u', 'tcg_out_ext32s', @@ -2888,6 +2890,8 @@ symbols = ( 'tcg_reg_alloc_start', 'tcg_reg_free', 'tcg_reg_sync', + 'tcg_region_init', + 'tcg_region_reset_all', 'tcg_set_frame', 'tcg_set_nop', 'tcg_swap_cond', diff --git a/qemu/m68k.h b/qemu/m68k.h index 887fc35c..e37a9741 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_m68k #define tcg_out_call tcg_out_call_m68k #define tcg_out_cmp tcg_out_cmp_m68k +#define tcg_code_capacity tcg_code_capacity_m68k +#define tcg_code_size tcg_code_size_m68k #define tcg_out_ext16s tcg_out_ext16s_m68k #define tcg_out_ext16u tcg_out_ext16u_m68k #define tcg_out_ext32s tcg_out_ext32s_m68k @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_m68k #define tcg_reg_free tcg_reg_free_m68k #define tcg_reg_sync tcg_reg_sync_m68k +#define tcg_region_init tcg_region_init_m68k +#define tcg_region_reset_all tcg_region_reset_all_m68k #define tcg_set_frame tcg_set_frame_m68k #define tcg_set_nop tcg_set_nop_m68k #define tcg_swap_cond tcg_swap_cond_m68k diff --git a/qemu/mips.h b/qemu/mips.h index a9e30d2f..3b891af1 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_mips #define tcg_out_call tcg_out_call_mips #define tcg_out_cmp tcg_out_cmp_mips +#define tcg_code_capacity tcg_code_capacity_mips +#define tcg_code_size tcg_code_size_mips #define tcg_out_ext16s tcg_out_ext16s_mips #define tcg_out_ext16u tcg_out_ext16u_mips #define tcg_out_ext32s tcg_out_ext32s_mips @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_mips #define tcg_reg_free tcg_reg_free_mips #define tcg_reg_sync tcg_reg_sync_mips +#define tcg_region_init tcg_region_init_mips +#define tcg_region_reset_all tcg_region_reset_all_mips #define tcg_set_frame tcg_set_frame_mips #define tcg_set_nop tcg_set_nop_mips #define tcg_swap_cond tcg_swap_cond_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index 9b564d4d..fd2b23af 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_mips64 #define tcg_out_call tcg_out_call_mips64 #define tcg_out_cmp tcg_out_cmp_mips64 +#define tcg_code_capacity tcg_code_capacity_mips64 +#define tcg_code_size tcg_code_size_mips64 #define tcg_out_ext16s tcg_out_ext16s_mips64 #define tcg_out_ext16u tcg_out_ext16u_mips64 #define tcg_out_ext32s tcg_out_ext32s_mips64 @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_mips64 #define tcg_reg_free tcg_reg_free_mips64 #define tcg_reg_sync tcg_reg_sync_mips64 +#define tcg_region_init tcg_region_init_mips64 +#define tcg_region_reset_all tcg_region_reset_all_mips64 #define tcg_set_frame tcg_set_frame_mips64 #define tcg_set_nop tcg_set_nop_mips64 #define tcg_swap_cond tcg_swap_cond_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index f982c765..56cf5b9b 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_mips64el #define tcg_out_call tcg_out_call_mips64el #define tcg_out_cmp tcg_out_cmp_mips64el +#define tcg_code_capacity tcg_code_capacity_mips64el +#define tcg_code_size tcg_code_size_mips64el #define tcg_out_ext16s tcg_out_ext16s_mips64el #define tcg_out_ext16u tcg_out_ext16u_mips64el #define tcg_out_ext32s tcg_out_ext32s_mips64el @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_mips64el #define tcg_reg_free tcg_reg_free_mips64el #define tcg_reg_sync tcg_reg_sync_mips64el +#define tcg_region_init tcg_region_init_mips64el +#define tcg_region_reset_all tcg_region_reset_all_mips64el #define tcg_set_frame tcg_set_frame_mips64el #define tcg_set_nop tcg_set_nop_mips64el #define tcg_swap_cond tcg_swap_cond_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index 16402ce2..232f9dee 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_mipsel #define tcg_out_call tcg_out_call_mipsel #define tcg_out_cmp tcg_out_cmp_mipsel +#define tcg_code_capacity tcg_code_capacity_mipsel +#define tcg_code_size tcg_code_size_mipsel #define tcg_out_ext16s tcg_out_ext16s_mipsel #define tcg_out_ext16u tcg_out_ext16u_mipsel #define tcg_out_ext32s tcg_out_ext32s_mipsel @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_mipsel #define tcg_reg_free tcg_reg_free_mipsel #define tcg_reg_sync tcg_reg_sync_mipsel +#define tcg_region_init tcg_region_init_mipsel +#define tcg_region_reset_all tcg_region_reset_all_mipsel #define tcg_set_frame tcg_set_frame_mipsel #define tcg_set_nop tcg_set_nop_mipsel #define tcg_swap_cond tcg_swap_cond_mipsel diff --git a/qemu/powerpc.h b/qemu/powerpc.h index 84365cf3..df55d0a9 100644 --- a/qemu/powerpc.h +++ b/qemu/powerpc.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_powerpc #define tcg_out_call tcg_out_call_powerpc #define tcg_out_cmp tcg_out_cmp_powerpc +#define tcg_code_capacity tcg_code_capacity_powerpc +#define tcg_code_size tcg_code_size_powerpc #define tcg_out_ext16s tcg_out_ext16s_powerpc #define tcg_out_ext16u tcg_out_ext16u_powerpc #define tcg_out_ext32s tcg_out_ext32s_powerpc @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_powerpc #define tcg_reg_free tcg_reg_free_powerpc #define tcg_reg_sync tcg_reg_sync_powerpc +#define tcg_region_init tcg_region_init_powerpc +#define tcg_region_reset_all tcg_region_reset_all_powerpc #define tcg_set_frame tcg_set_frame_powerpc #define tcg_set_nop tcg_set_nop_powerpc #define tcg_swap_cond tcg_swap_cond_powerpc diff --git a/qemu/sparc.h b/qemu/sparc.h index a4924f5d..7d24c55a 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_sparc #define tcg_out_call tcg_out_call_sparc #define tcg_out_cmp tcg_out_cmp_sparc +#define tcg_code_capacity tcg_code_capacity_sparc +#define tcg_code_size tcg_code_size_sparc #define tcg_out_ext16s tcg_out_ext16s_sparc #define tcg_out_ext16u tcg_out_ext16u_sparc #define tcg_out_ext32s tcg_out_ext32s_sparc @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_sparc #define tcg_reg_free tcg_reg_free_sparc #define tcg_reg_sync tcg_reg_sync_sparc +#define tcg_region_init tcg_region_init_sparc +#define tcg_region_reset_all tcg_region_reset_all_sparc #define tcg_set_frame tcg_set_frame_sparc #define tcg_set_nop tcg_set_nop_sparc #define tcg_swap_cond tcg_swap_cond_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index 017822f6..b34239e9 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_sparc64 #define tcg_out_call tcg_out_call_sparc64 #define tcg_out_cmp tcg_out_cmp_sparc64 +#define tcg_code_capacity tcg_code_capacity_sparc64 +#define tcg_code_size tcg_code_size_sparc64 #define tcg_out_ext16s tcg_out_ext16s_sparc64 #define tcg_out_ext16u tcg_out_ext16u_sparc64 #define tcg_out_ext32s tcg_out_ext32s_sparc64 @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_sparc64 #define tcg_reg_free tcg_reg_free_sparc64 #define tcg_reg_sync tcg_reg_sync_sparc64 +#define tcg_region_init tcg_region_init_sparc64 +#define tcg_region_reset_all tcg_region_reset_all_sparc64 #define tcg_set_frame tcg_set_frame_sparc64 #define tcg_set_nop tcg_set_nop_sparc64 #define tcg_swap_cond tcg_swap_cond_sparc64 diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c index 34f0faae..b7209fd9 100644 --- a/qemu/tcg/tcg.c +++ b/qemu/tcg/tcg.c @@ -257,6 +257,205 @@ TCGLabel *gen_new_label(TCGContext *s) #include "tcg-target.inc.c" +static void tcg_region_bounds(struct uc_struct *uc, size_t curr_region, void **pstart, void **pend) +{ + void *start, *end; + + start = uc->region.start_aligned + curr_region * uc->region.stride; + end = start + uc->region.size; + + if (curr_region == 0) { + start = uc->region.start; + } + if (curr_region == uc->region.n - 1) { + end = uc->region.end; + } + + *pstart = start; + *pend = end; +} + +static void tcg_region_assign(struct uc_struct *uc, TCGContext *s, size_t curr_region) +{ + void *start, *end; + + tcg_region_bounds(uc, curr_region, &start, &end); + + s->code_gen_buffer = start; + s->code_gen_ptr = start; + s->code_gen_buffer_size = end - start; + s->code_gen_highwater = end - TCG_HIGHWATER; +} + +static bool tcg_region_alloc__locked(struct uc_struct *uc, TCGContext *s) +{ + if (uc->region.current == uc->region.n) { + return true; + } + tcg_region_assign(uc, s, uc->region.current); + uc->region.current++; + return false; +} + +/* + * Request a new region once the one in use has filled up. + * Returns true on error. + */ +static bool tcg_region_alloc(struct uc_struct *uc, TCGContext *s) +{ + bool err; + /* read the region size now; alloc__locked will overwrite it on success */ + size_t size_full = s->code_gen_buffer_size; + + // Unicorn: commented out + //qemu_mutex_lock(®ion.lock); + err = tcg_region_alloc__locked(uc, s); + if (!err) { + uc->region.agg_size_full += size_full - TCG_HIGHWATER; + } + //qemu_mutex_unlock(®ion.lock); + return err; +} + +/* + * Perform a context's first region allocation. + * This function does _not_ increment region.agg_size_full. + */ +static inline bool tcg_region_initial_alloc__locked(struct uc_struct *uc, TCGContext *s) +{ + return tcg_region_alloc__locked(uc, s); +} + +/* Call from a safe-work context */ +void tcg_region_reset_all(struct uc_struct *uc) +{ + unsigned int i; + TCGContext **tcg_ctxs = uc->tcg_ctxs; + + // Unicorn: commented out + //qemu_mutex_lock(®ion.lock); + uc->region.current = 0; + uc->region.agg_size_full = 0; + + for (i = 0; i < uc->n_tcg_ctxs; i++) { + bool err = tcg_region_initial_alloc__locked(uc, tcg_ctxs[i]); + + g_assert(!err); + } + // Unicorn: commented out + //qemu_mutex_unlock(®ion.lock); +} + +/* + * Initializes region partitioning. + * + * Called at init time from the parent thread (i.e. the one calling + * tcg_context_init), after the target's TCG globals have been set. + */ +void tcg_region_init(struct uc_struct *uc) +{ + TCGContext *tcg_init_ctx = uc->tcg_init_ctx; + + void *buf = tcg_init_ctx->code_gen_buffer; + void *aligned; + size_t size = tcg_init_ctx->code_gen_buffer_size; + size_t page_size = uc->qemu_real_host_page_size; + size_t region_size; + size_t n_regions; + size_t i; + + /* We do not yet support multiple TCG contexts, so use one region for now */ + n_regions = 1; + + /* The first region will be 'aligned - buf' bytes larger than the others */ + aligned = QEMU_ALIGN_PTR_UP(buf, page_size); + g_assert(aligned < tcg_init_ctx->code_gen_buffer + size); + /* + * Make region_size a multiple of page_size, using aligned as the start. + * As a result of this we might end up with a few extra pages at the end of + * the buffer; we will assign those to the last region. + */ + region_size = (size - (aligned - buf)) / n_regions; + region_size = QEMU_ALIGN_DOWN(region_size, page_size); + + /* A region must have at least 2 pages; one code, one guard */ + g_assert(region_size >= 2 * page_size); + + /* init the region struct */ + //qemu_mutex_init(®ion.lock); + uc->region.n = n_regions; + uc->region.size = region_size - page_size; + uc->region.stride = region_size; + uc->region.start = buf; + uc->region.start_aligned = aligned; + /* page-align the end, since its last page will be a guard page */ + uc->region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size); + /* account for that last guard page */ + uc->region.end -= page_size; + + /* set guard pages */ + for (i = 0; i < uc->region.n; i++) { + void *start, *end; + int rc; + + tcg_region_bounds(uc, i, &start, &end); + rc = qemu_mprotect_none(uc, end, page_size); + g_assert(!rc); + } + + /* We do not yet support multiple TCG contexts so allocate the region now */ + { + TCGContext *tcg_ctx = uc->tcg_ctx; + bool err = tcg_region_initial_alloc__locked(uc, tcg_ctx); + + g_assert(!err); + } +} + +/* + * Returns the size (in bytes) of all translated code (i.e. from all regions) + * currently in the cache. + * See also: tcg_code_capacity() + * Do not confuse with tcg_current_code_size(); that one applies to a single + * TCG context. + */ +size_t tcg_code_size(struct uc_struct *uc) +{ + unsigned int i; + size_t total; + + // Unicorn: commented out + //qemu_mutex_lock(®ion.lock); + total = uc->region.agg_size_full; + for (i = 0; i < uc->n_tcg_ctxs; i++) { + TCGContext **tcg_ctxs = uc->tcg_ctxs; + const TCGContext *s = tcg_ctxs[i]; + size_t size; + + size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer; + g_assert(size <= s->code_gen_buffer_size); + total += size; + } + //qemu_mutex_unlock(®ion.lock); + return total; +} + +/* + * Returns the code capacity (in bytes) of the entire cache, i.e. including all + * regions. + * See also: tcg_code_size() + */ +size_t tcg_code_capacity(struct uc_struct *uc) +{ + size_t guard_size, capacity; + + /* no need for synchronization; these variables are set at init time */ + guard_size = uc->region.stride - uc->region.size; + capacity = uc->region.end + guard_size - uc->region.start; + capacity -= uc->region.n * (guard_size + TCG_HIGHWATER); + return capacity; +} + /* pool based memory allocation */ void *tcg_malloc_internal(TCGContext *s, int size) { @@ -403,13 +602,17 @@ TranslationBlock *tcg_tb_alloc(TCGContext *s) TranslationBlock *tb; void *next; + retry: tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); if (unlikely(next > s->code_gen_highwater)) { - return NULL; + if (tcg_region_alloc(s->uc, s)) { + return NULL; + } + goto retry; } - s->code_gen_ptr = next; + atomic_set(&s->code_gen_ptr, next); s->data_gen_ptr = NULL; return tb; } diff --git a/qemu/tcg/tcg.h b/qemu/tcg/tcg.h index c510bb46..7df4fa3c 100644 --- a/qemu/tcg/tcg.h +++ b/qemu/tcg/tcg.h @@ -1142,6 +1142,12 @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg); void tcg_optimize(TCGContext *s); +void tcg_region_init(struct uc_struct *uc); +void tcg_region_reset_all(struct uc_struct *uc); + +size_t tcg_code_size(struct uc_struct *uc); +size_t tcg_code_capacity(struct uc_struct *uc); + /* Called with tb_lock held. */ static inline void *tcg_malloc(TCGContext *s, int size) { diff --git a/qemu/x86_64.h b/qemu/x86_64.h index 45dd40e8..0296f0a2 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -2831,6 +2831,8 @@ #define tcg_out_bswap64 tcg_out_bswap64_x86_64 #define tcg_out_call tcg_out_call_x86_64 #define tcg_out_cmp tcg_out_cmp_x86_64 +#define tcg_code_capacity tcg_code_capacity_x86_64 +#define tcg_code_size tcg_code_size_x86_64 #define tcg_out_ext16s tcg_out_ext16s_x86_64 #define tcg_out_ext16u tcg_out_ext16u_x86_64 #define tcg_out_ext32s tcg_out_ext32s_x86_64 @@ -2882,6 +2884,8 @@ #define tcg_reg_alloc_start tcg_reg_alloc_start_x86_64 #define tcg_reg_free tcg_reg_free_x86_64 #define tcg_reg_sync tcg_reg_sync_x86_64 +#define tcg_region_init tcg_region_init_x86_64 +#define tcg_region_reset_all tcg_region_reset_all_x86_64 #define tcg_set_frame tcg_set_frame_x86_64 #define tcg_set_nop tcg_set_nop_x86_64 #define tcg_swap_cond tcg_swap_cond_x86_64