mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-03-23 05:25:11 +00:00
tcg: introduce regions to split code_gen_buffer
This is groundwork for supporting multiple TCG contexts. The naive solution here is to split code_gen_buffer statically among the TCG threads; this however results in poor utilization if translation needs are different across TCG threads. What we do here is to add an extra layer of indirection, assigning regions that act just like pages do in virtual memory allocation. (BTW if you are wondering about the chosen naming, I did not want to use blocks or pages because those are already heavily used in QEMU). We use a global lock to serialize allocations as well as statistics reporting (we now export the size of the used code_gen_buffer with tcg_code_size()). Note that for the allocator we could just use a counter and atomic_inc; however, that would complicate the gathering of tcg_code_size()-like stats. So given that the region operations are not a fast path, a lock seems the most reasonable choice. The effectiveness of this approach is clear after seeing some numbers. I used the bootup+shutdown of debian-arm with '-tb-size 80' as a benchmark. Note that I'm evaluating this after enabling per-thread TCG (which is done by a subsequent commit). * -smp 1, 1 region (entire buffer): qemu: flush code_size=83885014 nb_tbs=154739 avg_tb_size=357 qemu: flush code_size=83884902 nb_tbs=153136 avg_tb_size=363 qemu: flush code_size=83885014 nb_tbs=152777 avg_tb_size=364 qemu: flush code_size=83884950 nb_tbs=150057 avg_tb_size=373 qemu: flush code_size=83884998 nb_tbs=150234 avg_tb_size=373 qemu: flush code_size=83885014 nb_tbs=154009 avg_tb_size=360 qemu: flush code_size=83885014 nb_tbs=151007 avg_tb_size=370 qemu: flush code_size=83885014 nb_tbs=151816 avg_tb_size=367 That is, 8 flushes. * -smp 8, 32 regions (80/32 MB per region) [i.e. this patch]: qemu: flush code_size=76328008 nb_tbs=141040 avg_tb_size=356 qemu: flush code_size=75366534 nb_tbs=138000 avg_tb_size=361 qemu: flush code_size=76864546 nb_tbs=140653 avg_tb_size=361 qemu: flush code_size=76309084 nb_tbs=135945 avg_tb_size=375 qemu: flush code_size=74581856 nb_tbs=132909 avg_tb_size=375 qemu: flush code_size=73927256 nb_tbs=135616 avg_tb_size=360 qemu: flush code_size=78629426 nb_tbs=142896 avg_tb_size=365 qemu: flush code_size=76667052 nb_tbs=138508 avg_tb_size=368 Again, 8 flushes. Note how buffer utilization is not 100%, but it is close. Smaller region sizes would yield higher utilization, but we want region allocation to be rare (it acquires a lock), so we do not want to go too small. * -smp 8, static partitioning of 8 regions (10 MB per region): qemu: flush code_size=21936504 nb_tbs=40570 avg_tb_size=354 qemu: flush code_size=11472174 nb_tbs=20633 avg_tb_size=370 qemu: flush code_size=11603976 nb_tbs=21059 avg_tb_size=365 qemu: flush code_size=23254872 nb_tbs=41243 avg_tb_size=377 qemu: flush code_size=28289496 nb_tbs=52057 avg_tb_size=358 qemu: flush code_size=43605160 nb_tbs=78896 avg_tb_size=367 qemu: flush code_size=45166552 nb_tbs=82158 avg_tb_size=364 qemu: flush code_size=63289640 nb_tbs=116494 avg_tb_size=358 qemu: flush code_size=51389960 nb_tbs=93937 avg_tb_size=362 qemu: flush code_size=59665928 nb_tbs=107063 avg_tb_size=372 qemu: flush code_size=38380824 nb_tbs=68597 avg_tb_size=374 qemu: flush code_size=44884568 nb_tbs=79901 avg_tb_size=376 qemu: flush code_size=50782632 nb_tbs=90681 avg_tb_size=374 qemu: flush code_size=39848888 nb_tbs=71433 avg_tb_size=372 qemu: flush code_size=64708840 nb_tbs=119052 avg_tb_size=359 qemu: flush code_size=49830008 nb_tbs=90992 avg_tb_size=362 qemu: flush code_size=68372408 nb_tbs=123442 avg_tb_size=368 qemu: flush code_size=33555560 nb_tbs=59514 avg_tb_size=378 qemu: flush code_size=44748344 nb_tbs=80974 avg_tb_size=367 qemu: flush code_size=37104248 nb_tbs=67609 avg_tb_size=364 That is, 20 flushes. Note how a static partitioning approach uses the code buffer poorly, leading to many unnecessary flushes. Backports commit e8feb96fcc6c16eab8923332e86ff4ef0e2ac276 from qemu
This commit is contained in:
parent
72c18027a6
commit
f772fd986d
|
@ -28,4 +28,28 @@ typedef struct {
|
|||
bool in_use;
|
||||
} BounceBuffer;
|
||||
|
||||
// Moved here to allow a concrete type in the uc_struct.
|
||||
|
||||
/*
|
||||
* We divide code_gen_buffer into equally-sized "regions" that TCG threads
|
||||
* dynamically allocate from as demand dictates. Given appropriate region
|
||||
* sizing, this minimizes flushes even when some TCG threads generate a lot
|
||||
* more code than others.
|
||||
*/
|
||||
struct tcg_region_state {
|
||||
//QemuMutex lock;
|
||||
|
||||
/* fields set at init time */
|
||||
void *start;
|
||||
void *start_aligned;
|
||||
void *end;
|
||||
size_t n;
|
||||
size_t size; /* size of one region */
|
||||
size_t stride; /* .size + guard size */
|
||||
|
||||
/* fields protected by the lock */
|
||||
size_t current; /* current region index */
|
||||
size_t agg_size_full; /* aggregate size of full regions */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -178,6 +178,7 @@ struct uc_struct {
|
|||
|
||||
// qemu/cpus.c
|
||||
bool mttcg_enabled;
|
||||
int tcg_region_inited;
|
||||
|
||||
// qemu/exec.c
|
||||
MemoryRegion *system_memory;
|
||||
|
@ -226,6 +227,7 @@ struct uc_struct {
|
|||
// tcg.c
|
||||
void *tcg_ctxs; // actually "TCGContext **tcg_ctxs"
|
||||
unsigned int n_tcg_ctxs;
|
||||
struct tcg_region_state region;
|
||||
|
||||
/* memory.c */
|
||||
unsigned memory_region_transaction_depth;
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_aarch64
|
||||
#define tcg_out_call tcg_out_call_aarch64
|
||||
#define tcg_out_cmp tcg_out_cmp_aarch64
|
||||
#define tcg_code_capacity tcg_code_capacity_aarch64
|
||||
#define tcg_code_size tcg_code_size_aarch64
|
||||
#define tcg_out_ext16s tcg_out_ext16s_aarch64
|
||||
#define tcg_out_ext16u tcg_out_ext16u_aarch64
|
||||
#define tcg_out_ext32s tcg_out_ext32s_aarch64
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_aarch64
|
||||
#define tcg_reg_free tcg_reg_free_aarch64
|
||||
#define tcg_reg_sync tcg_reg_sync_aarch64
|
||||
#define tcg_region_init tcg_region_init_aarch64
|
||||
#define tcg_region_reset_all tcg_region_reset_all_aarch64
|
||||
#define tcg_set_frame tcg_set_frame_aarch64
|
||||
#define tcg_set_nop tcg_set_nop_aarch64
|
||||
#define tcg_swap_cond tcg_swap_cond_aarch64
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_aarch64eb
|
||||
#define tcg_out_call tcg_out_call_aarch64eb
|
||||
#define tcg_out_cmp tcg_out_cmp_aarch64eb
|
||||
#define tcg_code_capacity tcg_code_capacity_aarch64eb
|
||||
#define tcg_code_size tcg_code_size_aarch64eb
|
||||
#define tcg_out_ext16s tcg_out_ext16s_aarch64eb
|
||||
#define tcg_out_ext16u tcg_out_ext16u_aarch64eb
|
||||
#define tcg_out_ext32s tcg_out_ext32s_aarch64eb
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_aarch64eb
|
||||
#define tcg_reg_free tcg_reg_free_aarch64eb
|
||||
#define tcg_reg_sync tcg_reg_sync_aarch64eb
|
||||
#define tcg_region_init tcg_region_init_aarch64eb
|
||||
#define tcg_region_reset_all tcg_region_reset_all_aarch64eb
|
||||
#define tcg_set_frame tcg_set_frame_aarch64eb
|
||||
#define tcg_set_nop tcg_set_nop_aarch64eb
|
||||
#define tcg_swap_cond tcg_swap_cond_aarch64eb
|
||||
|
|
|
@ -586,15 +586,13 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
|
|||
TCGContext *tcg_ctx = uc->tcg_ctx;
|
||||
void *buf = static_code_gen_buffer;
|
||||
void *end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
|
||||
size_t full_size, size;
|
||||
size_t size;
|
||||
|
||||
/* page-align the beginning and end of the buffer */
|
||||
buf = QEMU_ALIGN_PTR_UP(buf, uc->qemu_real_host_page_size);
|
||||
end = QEMU_ALIGN_PTR_DOWN(end, uc->qemu_real_host_page_size);
|
||||
|
||||
/* Reserve a guard page. */
|
||||
full_size = end - buf;
|
||||
size = full_size - uc->qemu_real_host_page_size;
|
||||
size = end - buf;
|
||||
|
||||
/* Honor a command-line option limiting the size of the buffer. */
|
||||
if (size > tcg_ctx->code_gen_buffer_size) {
|
||||
|
@ -613,9 +611,6 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
|
|||
if (qemu_mprotect_rwx(uc, buf, size)) {
|
||||
abort();
|
||||
}
|
||||
if (qemu_mprotect_none(uc, buf + size, uc->qemu_real_host_page_size)) {
|
||||
abort();
|
||||
}
|
||||
// Unicorn: commented out
|
||||
//qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
|
||||
|
||||
|
@ -624,20 +619,10 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
|
|||
#elif defined(_WIN32)
|
||||
static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
|
||||
{
|
||||
TCGContext *tcg_ctx = uc->tcg_ctx;
|
||||
size_t size = tcg_ctx->code_gen_buffer_size;
|
||||
void *buf1, *buf2;
|
||||
|
||||
/* Perform the allocation in two steps, so that the guard page
|
||||
is reserved but uncommitted. */
|
||||
buf1 = VirtualAlloc(NULL, size + uc->qemu_real_host_page_size,
|
||||
MEM_RESERVE, PAGE_NOACCESS);
|
||||
if (buf1 != NULL) {
|
||||
buf2 = VirtualAlloc(buf1, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||
assert(buf1 == buf2);
|
||||
}
|
||||
|
||||
return buf1;
|
||||
void *buf;
|
||||
buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
|
||||
PAGE_EXECUTE_READWRITE);
|
||||
return buf;
|
||||
}
|
||||
|
||||
void free_code_gen_buffer(struct uc_struct *uc)
|
||||
|
@ -664,6 +649,7 @@ void free_code_gen_buffer(struct uc_struct *uc)
|
|||
static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
|
||||
{
|
||||
TCGContext *tcg_ctx = uc->tcg_ctx;
|
||||
int prot = PROT_WRITE | PROT_READ | PROT_EXEC;
|
||||
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
||||
uintptr_t start = 0;
|
||||
size_t size = tcg_ctx->code_gen_buffer_size;
|
||||
|
@ -698,8 +684,7 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
|
|||
# endif
|
||||
# endif
|
||||
|
||||
buf = mmap((void *)start, size + uc->qemu_real_host_page_size,
|
||||
PROT_NONE, flags, -1, 0);
|
||||
buf = mmap((void *)start, size, prot, flags, -1, 0);
|
||||
if (buf == MAP_FAILED) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -709,24 +694,23 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
|
|||
/* Try again, with the original still mapped, to avoid re-acquiring
|
||||
that 256mb crossing. This time don't specify an address. */
|
||||
size_t size2;
|
||||
void *buf2 = mmap(NULL, size + uc->qemu_real_host_page_size,
|
||||
PROT_NONE, flags, -1, 0);
|
||||
void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
|
||||
switch ((int)(buf2 != MAP_FAILED)) {
|
||||
case 1:
|
||||
if (!cross_256mb(buf2, size)) {
|
||||
/* Success! Use the new buffer. */
|
||||
munmap(buf, size + uc->qemu_real_host_page_size);
|
||||
munmap(buf, size);
|
||||
break;
|
||||
}
|
||||
/* Failure. Work with what we had. */
|
||||
munmap(buf2, size + uc->qemu_real_host_page_size);
|
||||
munmap(buf2, size);
|
||||
/* fallthru */
|
||||
default:
|
||||
/* Split the original buffer. Free the smaller half. */
|
||||
buf2 = split_cross_256mb(buf, size);
|
||||
size2 = tcg_ctx->code_gen_buffer_size;
|
||||
if (buf == buf2) {
|
||||
munmap(buf + size2 + uc->qemu_real_host_page_size, size - size2);
|
||||
munmap(buf + size2, size - size2);
|
||||
} else {
|
||||
munmap(buf, size - size2);
|
||||
}
|
||||
|
@ -738,10 +722,6 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* Make the final buffer accessible. The guard page at the end
|
||||
will remain inaccessible with PROT_NONE. */
|
||||
mprotect(buf, size, PROT_WRITE | PROT_READ | PROT_EXEC);
|
||||
|
||||
/* Request large pages for the buffer. */
|
||||
// Unicorn: Commented out
|
||||
//qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
|
||||
|
@ -948,20 +928,14 @@ static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
|
|||
void tb_flush(CPUState *cpu)
|
||||
{
|
||||
struct uc_struct* uc = cpu->uc;
|
||||
TCGContext *tcg_ctx = uc->tcg_ctx;
|
||||
|
||||
if (DEBUG_TB_FLUSH_GATE) {
|
||||
size_t nb_tbs = g_tree_nnodes(uc->tb_ctx.tb_tree);
|
||||
size_t host_size = 0;
|
||||
|
||||
g_tree_foreach(uc->tb_ctx.tb_tree, tb_host_size_iter, &host_size);
|
||||
printf("qemu: flush code_size=%td nb_tbs=%zu avg_tb_size=%zu\n",
|
||||
tcg_ctx->code_gen_ptr - tcg_ctx->code_gen_buffer, nb_tbs,
|
||||
nb_tbs > 0 ? host_size / nb_tbs : 0);
|
||||
}
|
||||
if ((unsigned long)((char*)tcg_ctx->code_gen_ptr - (char*)tcg_ctx->code_gen_buffer)
|
||||
> tcg_ctx->code_gen_buffer_size) {
|
||||
cpu_abort(cpu, "Internal error: code buffer overflow\n");
|
||||
printf("qemu: flush code_size=%zu nb_tbs=%zu avg_tb_size=%zu\n",
|
||||
tcg_code_size(uc), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
|
||||
}
|
||||
|
||||
cpu_tb_jmp_cache_clear(cpu);
|
||||
|
@ -974,10 +948,10 @@ void tb_flush(CPUState *cpu)
|
|||
qht_reset_size(&uc->tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
|
||||
page_flush_tb(uc);
|
||||
|
||||
tcg_ctx->code_gen_ptr = tcg_ctx->code_gen_buffer;
|
||||
tcg_region_reset_all(uc);
|
||||
/* XXX: flush processor icache at this point if cache flush is
|
||||
expensive */
|
||||
uc->tb_ctx.tb_flush_count++;
|
||||
atomic_mb_set(&uc->tb_ctx.tb_flush_count, uc->tb_ctx.tb_flush_count + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1308,9 +1282,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
|
|||
|
||||
phys_pc = get_page_addr_code(env, pc);
|
||||
|
||||
buffer_overflow:
|
||||
tb = tb_alloc(env->uc, pc);
|
||||
if (unlikely(!tb)) {
|
||||
buffer_overflow:
|
||||
/* flush must be done */
|
||||
tb_flush(cpu);
|
||||
/* cannot fail at this point */
|
||||
|
@ -1393,9 +1367,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
|
|||
tcg_ctx.search_out_len += search_size;
|
||||
#endif
|
||||
|
||||
tcg_ctx->code_gen_ptr = (void *)
|
||||
atomic_set(&tcg_ctx->code_gen_ptr, (void *)
|
||||
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
|
||||
CODE_GEN_ALIGN);
|
||||
CODE_GEN_ALIGN));
|
||||
|
||||
/* init jump list */
|
||||
assert(((uintptr_t)tb & 3) == 0);
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_arm
|
||||
#define tcg_out_call tcg_out_call_arm
|
||||
#define tcg_out_cmp tcg_out_cmp_arm
|
||||
#define tcg_code_capacity tcg_code_capacity_arm
|
||||
#define tcg_code_size tcg_code_size_arm
|
||||
#define tcg_out_ext16s tcg_out_ext16s_arm
|
||||
#define tcg_out_ext16u tcg_out_ext16u_arm
|
||||
#define tcg_out_ext32s tcg_out_ext32s_arm
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_arm
|
||||
#define tcg_reg_free tcg_reg_free_arm
|
||||
#define tcg_reg_sync tcg_reg_sync_arm
|
||||
#define tcg_region_init tcg_region_init_arm
|
||||
#define tcg_region_reset_all tcg_region_reset_all_arm
|
||||
#define tcg_set_frame tcg_set_frame_arm
|
||||
#define tcg_set_nop tcg_set_nop_arm
|
||||
#define tcg_swap_cond tcg_swap_cond_arm
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_armeb
|
||||
#define tcg_out_call tcg_out_call_armeb
|
||||
#define tcg_out_cmp tcg_out_cmp_armeb
|
||||
#define tcg_code_capacity tcg_code_capacity_armeb
|
||||
#define tcg_code_size tcg_code_size_armeb
|
||||
#define tcg_out_ext16s tcg_out_ext16s_armeb
|
||||
#define tcg_out_ext16u tcg_out_ext16u_armeb
|
||||
#define tcg_out_ext32s tcg_out_ext32s_armeb
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_armeb
|
||||
#define tcg_reg_free tcg_reg_free_armeb
|
||||
#define tcg_reg_sync tcg_reg_sync_armeb
|
||||
#define tcg_region_init tcg_region_init_armeb
|
||||
#define tcg_region_reset_all tcg_region_reset_all_armeb
|
||||
#define tcg_set_frame tcg_set_frame_armeb
|
||||
#define tcg_set_nop tcg_set_nop_armeb
|
||||
#define tcg_swap_cond tcg_swap_cond_armeb
|
||||
|
|
13
qemu/cpus.c
13
qemu/cpus.c
|
@ -135,6 +135,19 @@ static void *qemu_tcg_cpu_loop(struct uc_struct *uc)
|
|||
|
||||
static int qemu_tcg_init_vcpu(CPUState *cpu)
|
||||
{
|
||||
struct uc_struct *uc = cpu->uc;
|
||||
|
||||
/*
|
||||
* Initialize TCG regions--once. Now is a good time, because:
|
||||
* (1) TCG's init context, prologue and target globals have been set up.
|
||||
* (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
|
||||
* -accel flag is processed, so the check doesn't work then).
|
||||
*/
|
||||
if (!uc->tcg_region_inited) {
|
||||
uc->tcg_region_inited = 1;
|
||||
tcg_region_init(uc);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -2837,6 +2837,8 @@ symbols = (
|
|||
'tcg_out_bswap64',
|
||||
'tcg_out_call',
|
||||
'tcg_out_cmp',
|
||||
'tcg_code_capacity',
|
||||
'tcg_code_size',
|
||||
'tcg_out_ext16s',
|
||||
'tcg_out_ext16u',
|
||||
'tcg_out_ext32s',
|
||||
|
@ -2888,6 +2890,8 @@ symbols = (
|
|||
'tcg_reg_alloc_start',
|
||||
'tcg_reg_free',
|
||||
'tcg_reg_sync',
|
||||
'tcg_region_init',
|
||||
'tcg_region_reset_all',
|
||||
'tcg_set_frame',
|
||||
'tcg_set_nop',
|
||||
'tcg_swap_cond',
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_m68k
|
||||
#define tcg_out_call tcg_out_call_m68k
|
||||
#define tcg_out_cmp tcg_out_cmp_m68k
|
||||
#define tcg_code_capacity tcg_code_capacity_m68k
|
||||
#define tcg_code_size tcg_code_size_m68k
|
||||
#define tcg_out_ext16s tcg_out_ext16s_m68k
|
||||
#define tcg_out_ext16u tcg_out_ext16u_m68k
|
||||
#define tcg_out_ext32s tcg_out_ext32s_m68k
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_m68k
|
||||
#define tcg_reg_free tcg_reg_free_m68k
|
||||
#define tcg_reg_sync tcg_reg_sync_m68k
|
||||
#define tcg_region_init tcg_region_init_m68k
|
||||
#define tcg_region_reset_all tcg_region_reset_all_m68k
|
||||
#define tcg_set_frame tcg_set_frame_m68k
|
||||
#define tcg_set_nop tcg_set_nop_m68k
|
||||
#define tcg_swap_cond tcg_swap_cond_m68k
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_mips
|
||||
#define tcg_out_call tcg_out_call_mips
|
||||
#define tcg_out_cmp tcg_out_cmp_mips
|
||||
#define tcg_code_capacity tcg_code_capacity_mips
|
||||
#define tcg_code_size tcg_code_size_mips
|
||||
#define tcg_out_ext16s tcg_out_ext16s_mips
|
||||
#define tcg_out_ext16u tcg_out_ext16u_mips
|
||||
#define tcg_out_ext32s tcg_out_ext32s_mips
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_mips
|
||||
#define tcg_reg_free tcg_reg_free_mips
|
||||
#define tcg_reg_sync tcg_reg_sync_mips
|
||||
#define tcg_region_init tcg_region_init_mips
|
||||
#define tcg_region_reset_all tcg_region_reset_all_mips
|
||||
#define tcg_set_frame tcg_set_frame_mips
|
||||
#define tcg_set_nop tcg_set_nop_mips
|
||||
#define tcg_swap_cond tcg_swap_cond_mips
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_mips64
|
||||
#define tcg_out_call tcg_out_call_mips64
|
||||
#define tcg_out_cmp tcg_out_cmp_mips64
|
||||
#define tcg_code_capacity tcg_code_capacity_mips64
|
||||
#define tcg_code_size tcg_code_size_mips64
|
||||
#define tcg_out_ext16s tcg_out_ext16s_mips64
|
||||
#define tcg_out_ext16u tcg_out_ext16u_mips64
|
||||
#define tcg_out_ext32s tcg_out_ext32s_mips64
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_mips64
|
||||
#define tcg_reg_free tcg_reg_free_mips64
|
||||
#define tcg_reg_sync tcg_reg_sync_mips64
|
||||
#define tcg_region_init tcg_region_init_mips64
|
||||
#define tcg_region_reset_all tcg_region_reset_all_mips64
|
||||
#define tcg_set_frame tcg_set_frame_mips64
|
||||
#define tcg_set_nop tcg_set_nop_mips64
|
||||
#define tcg_swap_cond tcg_swap_cond_mips64
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_mips64el
|
||||
#define tcg_out_call tcg_out_call_mips64el
|
||||
#define tcg_out_cmp tcg_out_cmp_mips64el
|
||||
#define tcg_code_capacity tcg_code_capacity_mips64el
|
||||
#define tcg_code_size tcg_code_size_mips64el
|
||||
#define tcg_out_ext16s tcg_out_ext16s_mips64el
|
||||
#define tcg_out_ext16u tcg_out_ext16u_mips64el
|
||||
#define tcg_out_ext32s tcg_out_ext32s_mips64el
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_mips64el
|
||||
#define tcg_reg_free tcg_reg_free_mips64el
|
||||
#define tcg_reg_sync tcg_reg_sync_mips64el
|
||||
#define tcg_region_init tcg_region_init_mips64el
|
||||
#define tcg_region_reset_all tcg_region_reset_all_mips64el
|
||||
#define tcg_set_frame tcg_set_frame_mips64el
|
||||
#define tcg_set_nop tcg_set_nop_mips64el
|
||||
#define tcg_swap_cond tcg_swap_cond_mips64el
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_mipsel
|
||||
#define tcg_out_call tcg_out_call_mipsel
|
||||
#define tcg_out_cmp tcg_out_cmp_mipsel
|
||||
#define tcg_code_capacity tcg_code_capacity_mipsel
|
||||
#define tcg_code_size tcg_code_size_mipsel
|
||||
#define tcg_out_ext16s tcg_out_ext16s_mipsel
|
||||
#define tcg_out_ext16u tcg_out_ext16u_mipsel
|
||||
#define tcg_out_ext32s tcg_out_ext32s_mipsel
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_mipsel
|
||||
#define tcg_reg_free tcg_reg_free_mipsel
|
||||
#define tcg_reg_sync tcg_reg_sync_mipsel
|
||||
#define tcg_region_init tcg_region_init_mipsel
|
||||
#define tcg_region_reset_all tcg_region_reset_all_mipsel
|
||||
#define tcg_set_frame tcg_set_frame_mipsel
|
||||
#define tcg_set_nop tcg_set_nop_mipsel
|
||||
#define tcg_swap_cond tcg_swap_cond_mipsel
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_powerpc
|
||||
#define tcg_out_call tcg_out_call_powerpc
|
||||
#define tcg_out_cmp tcg_out_cmp_powerpc
|
||||
#define tcg_code_capacity tcg_code_capacity_powerpc
|
||||
#define tcg_code_size tcg_code_size_powerpc
|
||||
#define tcg_out_ext16s tcg_out_ext16s_powerpc
|
||||
#define tcg_out_ext16u tcg_out_ext16u_powerpc
|
||||
#define tcg_out_ext32s tcg_out_ext32s_powerpc
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_powerpc
|
||||
#define tcg_reg_free tcg_reg_free_powerpc
|
||||
#define tcg_reg_sync tcg_reg_sync_powerpc
|
||||
#define tcg_region_init tcg_region_init_powerpc
|
||||
#define tcg_region_reset_all tcg_region_reset_all_powerpc
|
||||
#define tcg_set_frame tcg_set_frame_powerpc
|
||||
#define tcg_set_nop tcg_set_nop_powerpc
|
||||
#define tcg_swap_cond tcg_swap_cond_powerpc
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_sparc
|
||||
#define tcg_out_call tcg_out_call_sparc
|
||||
#define tcg_out_cmp tcg_out_cmp_sparc
|
||||
#define tcg_code_capacity tcg_code_capacity_sparc
|
||||
#define tcg_code_size tcg_code_size_sparc
|
||||
#define tcg_out_ext16s tcg_out_ext16s_sparc
|
||||
#define tcg_out_ext16u tcg_out_ext16u_sparc
|
||||
#define tcg_out_ext32s tcg_out_ext32s_sparc
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_sparc
|
||||
#define tcg_reg_free tcg_reg_free_sparc
|
||||
#define tcg_reg_sync tcg_reg_sync_sparc
|
||||
#define tcg_region_init tcg_region_init_sparc
|
||||
#define tcg_region_reset_all tcg_region_reset_all_sparc
|
||||
#define tcg_set_frame tcg_set_frame_sparc
|
||||
#define tcg_set_nop tcg_set_nop_sparc
|
||||
#define tcg_swap_cond tcg_swap_cond_sparc
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_sparc64
|
||||
#define tcg_out_call tcg_out_call_sparc64
|
||||
#define tcg_out_cmp tcg_out_cmp_sparc64
|
||||
#define tcg_code_capacity tcg_code_capacity_sparc64
|
||||
#define tcg_code_size tcg_code_size_sparc64
|
||||
#define tcg_out_ext16s tcg_out_ext16s_sparc64
|
||||
#define tcg_out_ext16u tcg_out_ext16u_sparc64
|
||||
#define tcg_out_ext32s tcg_out_ext32s_sparc64
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_sparc64
|
||||
#define tcg_reg_free tcg_reg_free_sparc64
|
||||
#define tcg_reg_sync tcg_reg_sync_sparc64
|
||||
#define tcg_region_init tcg_region_init_sparc64
|
||||
#define tcg_region_reset_all tcg_region_reset_all_sparc64
|
||||
#define tcg_set_frame tcg_set_frame_sparc64
|
||||
#define tcg_set_nop tcg_set_nop_sparc64
|
||||
#define tcg_swap_cond tcg_swap_cond_sparc64
|
||||
|
|
207
qemu/tcg/tcg.c
207
qemu/tcg/tcg.c
|
@ -257,6 +257,205 @@ TCGLabel *gen_new_label(TCGContext *s)
|
|||
|
||||
#include "tcg-target.inc.c"
|
||||
|
||||
static void tcg_region_bounds(struct uc_struct *uc, size_t curr_region, void **pstart, void **pend)
|
||||
{
|
||||
void *start, *end;
|
||||
|
||||
start = uc->region.start_aligned + curr_region * uc->region.stride;
|
||||
end = start + uc->region.size;
|
||||
|
||||
if (curr_region == 0) {
|
||||
start = uc->region.start;
|
||||
}
|
||||
if (curr_region == uc->region.n - 1) {
|
||||
end = uc->region.end;
|
||||
}
|
||||
|
||||
*pstart = start;
|
||||
*pend = end;
|
||||
}
|
||||
|
||||
static void tcg_region_assign(struct uc_struct *uc, TCGContext *s, size_t curr_region)
|
||||
{
|
||||
void *start, *end;
|
||||
|
||||
tcg_region_bounds(uc, curr_region, &start, &end);
|
||||
|
||||
s->code_gen_buffer = start;
|
||||
s->code_gen_ptr = start;
|
||||
s->code_gen_buffer_size = end - start;
|
||||
s->code_gen_highwater = end - TCG_HIGHWATER;
|
||||
}
|
||||
|
||||
static bool tcg_region_alloc__locked(struct uc_struct *uc, TCGContext *s)
|
||||
{
|
||||
if (uc->region.current == uc->region.n) {
|
||||
return true;
|
||||
}
|
||||
tcg_region_assign(uc, s, uc->region.current);
|
||||
uc->region.current++;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Request a new region once the one in use has filled up.
|
||||
* Returns true on error.
|
||||
*/
|
||||
static bool tcg_region_alloc(struct uc_struct *uc, TCGContext *s)
|
||||
{
|
||||
bool err;
|
||||
/* read the region size now; alloc__locked will overwrite it on success */
|
||||
size_t size_full = s->code_gen_buffer_size;
|
||||
|
||||
// Unicorn: commented out
|
||||
//qemu_mutex_lock(®ion.lock);
|
||||
err = tcg_region_alloc__locked(uc, s);
|
||||
if (!err) {
|
||||
uc->region.agg_size_full += size_full - TCG_HIGHWATER;
|
||||
}
|
||||
//qemu_mutex_unlock(®ion.lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a context's first region allocation.
|
||||
* This function does _not_ increment region.agg_size_full.
|
||||
*/
|
||||
static inline bool tcg_region_initial_alloc__locked(struct uc_struct *uc, TCGContext *s)
|
||||
{
|
||||
return tcg_region_alloc__locked(uc, s);
|
||||
}
|
||||
|
||||
/* Call from a safe-work context */
|
||||
void tcg_region_reset_all(struct uc_struct *uc)
|
||||
{
|
||||
unsigned int i;
|
||||
TCGContext **tcg_ctxs = uc->tcg_ctxs;
|
||||
|
||||
// Unicorn: commented out
|
||||
//qemu_mutex_lock(®ion.lock);
|
||||
uc->region.current = 0;
|
||||
uc->region.agg_size_full = 0;
|
||||
|
||||
for (i = 0; i < uc->n_tcg_ctxs; i++) {
|
||||
bool err = tcg_region_initial_alloc__locked(uc, tcg_ctxs[i]);
|
||||
|
||||
g_assert(!err);
|
||||
}
|
||||
// Unicorn: commented out
|
||||
//qemu_mutex_unlock(®ion.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initializes region partitioning.
|
||||
*
|
||||
* Called at init time from the parent thread (i.e. the one calling
|
||||
* tcg_context_init), after the target's TCG globals have been set.
|
||||
*/
|
||||
void tcg_region_init(struct uc_struct *uc)
|
||||
{
|
||||
TCGContext *tcg_init_ctx = uc->tcg_init_ctx;
|
||||
|
||||
void *buf = tcg_init_ctx->code_gen_buffer;
|
||||
void *aligned;
|
||||
size_t size = tcg_init_ctx->code_gen_buffer_size;
|
||||
size_t page_size = uc->qemu_real_host_page_size;
|
||||
size_t region_size;
|
||||
size_t n_regions;
|
||||
size_t i;
|
||||
|
||||
/* We do not yet support multiple TCG contexts, so use one region for now */
|
||||
n_regions = 1;
|
||||
|
||||
/* The first region will be 'aligned - buf' bytes larger than the others */
|
||||
aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
|
||||
g_assert(aligned < tcg_init_ctx->code_gen_buffer + size);
|
||||
/*
|
||||
* Make region_size a multiple of page_size, using aligned as the start.
|
||||
* As a result of this we might end up with a few extra pages at the end of
|
||||
* the buffer; we will assign those to the last region.
|
||||
*/
|
||||
region_size = (size - (aligned - buf)) / n_regions;
|
||||
region_size = QEMU_ALIGN_DOWN(region_size, page_size);
|
||||
|
||||
/* A region must have at least 2 pages; one code, one guard */
|
||||
g_assert(region_size >= 2 * page_size);
|
||||
|
||||
/* init the region struct */
|
||||
//qemu_mutex_init(®ion.lock);
|
||||
uc->region.n = n_regions;
|
||||
uc->region.size = region_size - page_size;
|
||||
uc->region.stride = region_size;
|
||||
uc->region.start = buf;
|
||||
uc->region.start_aligned = aligned;
|
||||
/* page-align the end, since its last page will be a guard page */
|
||||
uc->region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
|
||||
/* account for that last guard page */
|
||||
uc->region.end -= page_size;
|
||||
|
||||
/* set guard pages */
|
||||
for (i = 0; i < uc->region.n; i++) {
|
||||
void *start, *end;
|
||||
int rc;
|
||||
|
||||
tcg_region_bounds(uc, i, &start, &end);
|
||||
rc = qemu_mprotect_none(uc, end, page_size);
|
||||
g_assert(!rc);
|
||||
}
|
||||
|
||||
/* We do not yet support multiple TCG contexts so allocate the region now */
|
||||
{
|
||||
TCGContext *tcg_ctx = uc->tcg_ctx;
|
||||
bool err = tcg_region_initial_alloc__locked(uc, tcg_ctx);
|
||||
|
||||
g_assert(!err);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the size (in bytes) of all translated code (i.e. from all regions)
|
||||
* currently in the cache.
|
||||
* See also: tcg_code_capacity()
|
||||
* Do not confuse with tcg_current_code_size(); that one applies to a single
|
||||
* TCG context.
|
||||
*/
|
||||
size_t tcg_code_size(struct uc_struct *uc)
|
||||
{
|
||||
unsigned int i;
|
||||
size_t total;
|
||||
|
||||
// Unicorn: commented out
|
||||
//qemu_mutex_lock(®ion.lock);
|
||||
total = uc->region.agg_size_full;
|
||||
for (i = 0; i < uc->n_tcg_ctxs; i++) {
|
||||
TCGContext **tcg_ctxs = uc->tcg_ctxs;
|
||||
const TCGContext *s = tcg_ctxs[i];
|
||||
size_t size;
|
||||
|
||||
size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
|
||||
g_assert(size <= s->code_gen_buffer_size);
|
||||
total += size;
|
||||
}
|
||||
//qemu_mutex_unlock(®ion.lock);
|
||||
return total;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the code capacity (in bytes) of the entire cache, i.e. including all
|
||||
* regions.
|
||||
* See also: tcg_code_size()
|
||||
*/
|
||||
size_t tcg_code_capacity(struct uc_struct *uc)
|
||||
{
|
||||
size_t guard_size, capacity;
|
||||
|
||||
/* no need for synchronization; these variables are set at init time */
|
||||
guard_size = uc->region.stride - uc->region.size;
|
||||
capacity = uc->region.end + guard_size - uc->region.start;
|
||||
capacity -= uc->region.n * (guard_size + TCG_HIGHWATER);
|
||||
return capacity;
|
||||
}
|
||||
|
||||
/* pool based memory allocation */
|
||||
void *tcg_malloc_internal(TCGContext *s, int size)
|
||||
{
|
||||
|
@ -403,13 +602,17 @@ TranslationBlock *tcg_tb_alloc(TCGContext *s)
|
|||
TranslationBlock *tb;
|
||||
void *next;
|
||||
|
||||
retry:
|
||||
tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
|
||||
next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
|
||||
|
||||
if (unlikely(next > s->code_gen_highwater)) {
|
||||
return NULL;
|
||||
if (tcg_region_alloc(s->uc, s)) {
|
||||
return NULL;
|
||||
}
|
||||
goto retry;
|
||||
}
|
||||
s->code_gen_ptr = next;
|
||||
atomic_set(&s->code_gen_ptr, next);
|
||||
s->data_gen_ptr = NULL;
|
||||
return tb;
|
||||
}
|
||||
|
|
|
@ -1142,6 +1142,12 @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
|
|||
|
||||
void tcg_optimize(TCGContext *s);
|
||||
|
||||
void tcg_region_init(struct uc_struct *uc);
|
||||
void tcg_region_reset_all(struct uc_struct *uc);
|
||||
|
||||
size_t tcg_code_size(struct uc_struct *uc);
|
||||
size_t tcg_code_capacity(struct uc_struct *uc);
|
||||
|
||||
/* Called with tb_lock held. */
|
||||
static inline void *tcg_malloc(TCGContext *s, int size)
|
||||
{
|
||||
|
|
|
@ -2831,6 +2831,8 @@
|
|||
#define tcg_out_bswap64 tcg_out_bswap64_x86_64
|
||||
#define tcg_out_call tcg_out_call_x86_64
|
||||
#define tcg_out_cmp tcg_out_cmp_x86_64
|
||||
#define tcg_code_capacity tcg_code_capacity_x86_64
|
||||
#define tcg_code_size tcg_code_size_x86_64
|
||||
#define tcg_out_ext16s tcg_out_ext16s_x86_64
|
||||
#define tcg_out_ext16u tcg_out_ext16u_x86_64
|
||||
#define tcg_out_ext32s tcg_out_ext32s_x86_64
|
||||
|
@ -2882,6 +2884,8 @@
|
|||
#define tcg_reg_alloc_start tcg_reg_alloc_start_x86_64
|
||||
#define tcg_reg_free tcg_reg_free_x86_64
|
||||
#define tcg_reg_sync tcg_reg_sync_x86_64
|
||||
#define tcg_region_init tcg_region_init_x86_64
|
||||
#define tcg_region_reset_all tcg_region_reset_all_x86_64
|
||||
#define tcg_set_frame tcg_set_frame_x86_64
|
||||
#define tcg_set_nop tcg_set_nop_x86_64
|
||||
#define tcg_swap_cond tcg_swap_cond_x86_64
|
||||
|
|
Loading…
Reference in a new issue