unicorn/qemu/cpus.c

251 lines
6.1 KiB
C
Raw Normal View History

2015-08-21 07:04:50 +00:00
/*
* QEMU System Emulator
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/* Modified for Unicorn Engine by Nguyen Anh Quynh, 2015 */
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "cpu.h"
#include "qapi/error.h"
2015-08-21 07:04:50 +00:00
#include "sysemu/sysemu.h"
#include "exec/exec-all.h"
#include "tcg.h"
#include "qemu/thread.h"
#include "sysemu/cpus.h"
2015-08-21 07:04:50 +00:00
#include "exec/address-spaces.h" // debug, can be removed later
#include "uc_priv.h"
static bool cpu_can_run(CPUState *cpu);
static void cpu_handle_guest_debug(CPUState *cpu);
static int tcg_cpu_exec(struct uc_struct *uc, CPUState *cpu);
2015-08-21 07:04:50 +00:00
static bool tcg_exec_all(struct uc_struct* uc);
static int qemu_tcg_init_vcpu(CPUState *cpu);
static void *qemu_tcg_cpu_loop(struct uc_struct *uc);
2015-08-21 07:04:50 +00:00
static bool default_mttcg_enabled(void)
{
return false;
}
void qemu_tcg_configure(struct uc_struct *uc)
{
uc->mttcg_enabled = default_mttcg_enabled();
}
int vm_start(struct uc_struct* uc)
2015-08-21 07:04:50 +00:00
{
if (resume_all_vcpus(uc)) {
return -1;
}
return 0;
2015-08-21 07:04:50 +00:00
}
bool cpu_is_stopped(CPUState *cpu)
{
return cpu->stopped;
}
void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
2015-08-21 07:04:50 +00:00
{
func(cpu, data);
2015-08-21 07:04:50 +00:00
}
int resume_all_vcpus(struct uc_struct *uc)
2015-08-21 07:04:50 +00:00
{
2016-09-23 14:38:21 +00:00
CPUState *cpu = uc->cpu;
// Fix call multiple time (vu).
// We have to check whether this is the second time, then reset all CPU.
if (!cpu->created) {
cpu->created = true;
cpu->halted = 0;
if (qemu_init_vcpu(cpu))
return -1;
2015-08-21 07:04:50 +00:00
}
//qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
2016-09-23 14:38:21 +00:00
cpu_resume(cpu);
qemu_tcg_cpu_loop(uc);
return 0;
2015-08-21 07:04:50 +00:00
}
int qemu_init_vcpu(CPUState *cpu)
2015-08-21 07:04:50 +00:00
{
cpu->nr_cores = smp_cores;
cpu->nr_threads = smp_threads;
cpu->stopped = true;
if (!cpu->as) {
/* If the target cpu hasn't set up any address spaces itself,
* give it the default one.
*/
cpu->num_ases = 1;
cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
}
if (tcg_enabled(cpu->uc)) {
return qemu_tcg_init_vcpu(cpu);
}
return 0;
2015-08-21 07:04:50 +00:00
}
static void *qemu_tcg_cpu_loop(struct uc_struct *uc)
2015-08-21 07:04:50 +00:00
{
2016-09-23 14:38:21 +00:00
CPUState *cpu = uc->cpu;
2015-08-21 07:04:50 +00:00
//qemu_tcg_init_cpu_signals();
2016-09-23 14:38:21 +00:00
cpu->created = true;
2015-08-21 07:04:50 +00:00
while (1) {
if (tcg_exec_all(uc))
break;
}
2016-09-23 14:38:21 +00:00
cpu->created = false;
2015-08-21 07:04:50 +00:00
return NULL;
}
static int qemu_tcg_init_vcpu(CPUState *cpu)
2015-08-21 07:04:50 +00:00
{
tcg: introduce regions to split code_gen_buffer This is groundwork for supporting multiple TCG contexts. The naive solution here is to split code_gen_buffer statically among the TCG threads; this however results in poor utilization if translation needs are different across TCG threads. What we do here is to add an extra layer of indirection, assigning regions that act just like pages do in virtual memory allocation. (BTW if you are wondering about the chosen naming, I did not want to use blocks or pages because those are already heavily used in QEMU). We use a global lock to serialize allocations as well as statistics reporting (we now export the size of the used code_gen_buffer with tcg_code_size()). Note that for the allocator we could just use a counter and atomic_inc; however, that would complicate the gathering of tcg_code_size()-like stats. So given that the region operations are not a fast path, a lock seems the most reasonable choice. The effectiveness of this approach is clear after seeing some numbers. I used the bootup+shutdown of debian-arm with '-tb-size 80' as a benchmark. Note that I'm evaluating this after enabling per-thread TCG (which is done by a subsequent commit). * -smp 1, 1 region (entire buffer): qemu: flush code_size=83885014 nb_tbs=154739 avg_tb_size=357 qemu: flush code_size=83884902 nb_tbs=153136 avg_tb_size=363 qemu: flush code_size=83885014 nb_tbs=152777 avg_tb_size=364 qemu: flush code_size=83884950 nb_tbs=150057 avg_tb_size=373 qemu: flush code_size=83884998 nb_tbs=150234 avg_tb_size=373 qemu: flush code_size=83885014 nb_tbs=154009 avg_tb_size=360 qemu: flush code_size=83885014 nb_tbs=151007 avg_tb_size=370 qemu: flush code_size=83885014 nb_tbs=151816 avg_tb_size=367 That is, 8 flushes. * -smp 8, 32 regions (80/32 MB per region) [i.e. this patch]: qemu: flush code_size=76328008 nb_tbs=141040 avg_tb_size=356 qemu: flush code_size=75366534 nb_tbs=138000 avg_tb_size=361 qemu: flush code_size=76864546 nb_tbs=140653 avg_tb_size=361 qemu: flush code_size=76309084 nb_tbs=135945 avg_tb_size=375 qemu: flush code_size=74581856 nb_tbs=132909 avg_tb_size=375 qemu: flush code_size=73927256 nb_tbs=135616 avg_tb_size=360 qemu: flush code_size=78629426 nb_tbs=142896 avg_tb_size=365 qemu: flush code_size=76667052 nb_tbs=138508 avg_tb_size=368 Again, 8 flushes. Note how buffer utilization is not 100%, but it is close. Smaller region sizes would yield higher utilization, but we want region allocation to be rare (it acquires a lock), so we do not want to go too small. * -smp 8, static partitioning of 8 regions (10 MB per region): qemu: flush code_size=21936504 nb_tbs=40570 avg_tb_size=354 qemu: flush code_size=11472174 nb_tbs=20633 avg_tb_size=370 qemu: flush code_size=11603976 nb_tbs=21059 avg_tb_size=365 qemu: flush code_size=23254872 nb_tbs=41243 avg_tb_size=377 qemu: flush code_size=28289496 nb_tbs=52057 avg_tb_size=358 qemu: flush code_size=43605160 nb_tbs=78896 avg_tb_size=367 qemu: flush code_size=45166552 nb_tbs=82158 avg_tb_size=364 qemu: flush code_size=63289640 nb_tbs=116494 avg_tb_size=358 qemu: flush code_size=51389960 nb_tbs=93937 avg_tb_size=362 qemu: flush code_size=59665928 nb_tbs=107063 avg_tb_size=372 qemu: flush code_size=38380824 nb_tbs=68597 avg_tb_size=374 qemu: flush code_size=44884568 nb_tbs=79901 avg_tb_size=376 qemu: flush code_size=50782632 nb_tbs=90681 avg_tb_size=374 qemu: flush code_size=39848888 nb_tbs=71433 avg_tb_size=372 qemu: flush code_size=64708840 nb_tbs=119052 avg_tb_size=359 qemu: flush code_size=49830008 nb_tbs=90992 avg_tb_size=362 qemu: flush code_size=68372408 nb_tbs=123442 avg_tb_size=368 qemu: flush code_size=33555560 nb_tbs=59514 avg_tb_size=378 qemu: flush code_size=44748344 nb_tbs=80974 avg_tb_size=367 qemu: flush code_size=37104248 nb_tbs=67609 avg_tb_size=364 That is, 20 flushes. Note how a static partitioning approach uses the code buffer poorly, leading to many unnecessary flushes. Backports commit e8feb96fcc6c16eab8923332e86ff4ef0e2ac276 from qemu
2018-03-14 15:14:31 +00:00
struct uc_struct *uc = cpu->uc;
/*
* Initialize TCG regions--once. Now is a good time, because:
* (1) TCG's init context, prologue and target globals have been set up.
* (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
* -accel flag is processed, so the check doesn't work then).
*/
if (!uc->tcg_region_inited) {
uc->tcg_region_inited = 1;
tcg_region_init(uc);
}
return 0;
2015-08-21 07:04:50 +00:00
}
static int tcg_cpu_exec(struct uc_struct *uc, CPUState *cpu)
2015-08-21 07:04:50 +00:00
{
return cpu_exec(uc, cpu);
2015-08-21 07:04:50 +00:00
}
static bool tcg_exec_all(struct uc_struct* uc)
{
int r;
bool finish = false;
while (!uc->cpu->exit_request) {
2016-09-23 14:38:21 +00:00
CPUState *cpu = uc->cpu;
2015-08-21 07:04:50 +00:00
CPUArchState *env = cpu->env_ptr;
//qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
// (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
if (cpu_can_run(cpu)) {
uc->quit_request = false;
r = tcg_cpu_exec(uc, cpu);
// quit current TB but continue emulating?
if (uc->quit_request) {
// reset stop_request
uc->stop_request = false;
} else if (uc->stop_request) {
2015-08-21 07:04:50 +00:00
finish = true;
break;
}
// save invalid memory access error & quit
if (env->invalid_error) {
uc->invalid_addr = env->invalid_addr;
uc->invalid_error = env->invalid_error;
finish = true;
break;
}
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
break;
}
if (r == EXCP_HLT) {
finish = true;
break;
} else if (r == EXCP_ATOMIC) {
cpu_exec_step_atomic(uc, cpu);
2015-08-21 07:04:50 +00:00
}
} else if (cpu->stop) {
printf(">>> got stopped!!!\n");
2015-08-21 07:04:50 +00:00
break;
}
}
if (uc->cpu && uc->cpu->exit_request) {
atomic_mb_set(&uc->cpu->exit_request, 0);
}
2015-08-21 07:04:50 +00:00
return finish;
}
static bool cpu_can_run(CPUState *cpu)
{
if (cpu->stop) {
return false;
}
if (cpu_is_stopped(cpu)) {
return false;
}
return true;
}
static void cpu_handle_guest_debug(CPUState *cpu)
{
cpu->stopped = true;
}
#if 0
#ifndef _WIN32
static void qemu_tcg_init_cpu_signals(void)
{
sigset_t set;
struct sigaction sigact;
memset(&sigact, 0, sizeof(sigact));
sigact.sa_handler = cpu_signal;
sigaction(SIG_IPI, &sigact, NULL);
sigemptyset(&set);
sigaddset(&set, SIG_IPI);
pthread_sigmask(SIG_UNBLOCK, &set, NULL);
}
#else /* _WIN32 */
static void qemu_tcg_init_cpu_signals(void)
{
}
#endif /* _WIN32 */
#endif