tcg: Save insn data and use it in cpu_restore_state_from_tb

We can now restore state without retranslation.

Backports commit fca8a500d519a56abeaedf8073167a61d3c6b9c4 from qemu
This commit is contained in:
Richard Henderson 2018-02-16 09:45:15 -05:00 committed by Lioncash
parent a7cf761caf
commit 66de6cc37c
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
4 changed files with 146 additions and 50 deletions

View file

@ -198,6 +198,7 @@ struct TranslationBlock {
#define CF_USE_ICOUNT 0x20000
void *tc_ptr; /* pointer to the translated code */
uint8_t *tc_search; /* pointer to search data */
/* next matching tb for physical address. */
struct TranslationBlock *phys_hash_next;
/* first and second physical page containing code. The lower bit

View file

@ -2335,7 +2335,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_insn_unit *gen_code_buf,
long search_pc)
{
int i, oi, oi_next;
int i, oi, oi_next, num_insns;
#ifdef DEBUG_DISAS
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
@ -2379,6 +2379,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_out_tb_init(s);
num_insns = -1;
for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
TCGOp * const op = &s->gen_op_buf[oi];
TCGArg * const args = &s->gen_opparam_buf[op->args];
@ -2401,6 +2402,10 @@ static inline int tcg_gen_code_common(TCGContext *s,
tcg_reg_alloc_movi(s, args, dead_args, sync_args);
break;
case INDEX_op_insn_start:
if (num_insns >= 0) {
s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
}
num_insns++;
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
target_ulong a;
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@ -2408,7 +2413,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
#else
a = args[i];
#endif
s->gen_opc_data[i] = a;
s->gen_insn_data[num_insns][i] = a;
}
break;
case INDEX_op_discard:
@ -2441,6 +2446,9 @@ static inline int tcg_gen_code_common(TCGContext *s,
#endif
}
tcg_debug_assert(num_insns >= 0);
s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
/* Generate TB finalization at the end of block */
tcg_out_tb_finalize(s);
return -1;
@ -2494,24 +2502,26 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
{
#if 0
TCGContext *s = &tcg_ctx;
int64_t tot;
int64_t tb_count = s->tb_count;
int64_t tb_div_count = tb_count ? tb_count : 1;
int64_t tot = s->interm_time + s->code_time;
tot = s->interm_time + s->code_time;
cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
tot, tot / 2.4e9);
cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
s->tb_count,
s->tb_count1 - s->tb_count,
s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
tb_count, s->tb_count1 - tb_count,
(double)(s->tb_count1 - s->tb_count)
/ (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
(double)s->op_count / tb_div_count, s->op_count_max);
cpu_fprintf(f, "deleted ops/TB %0.2f\n",
s->tb_count ?
(double)s->del_op_count / s->tb_count : 0);
(double)s->del_op_count / tb_div_count);
cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
s->tb_count ?
(double)s->temp_count / s->tb_count : 0,
s->temp_count_max);
(double)s->temp_count / tb_div_count, s->temp_count_max);
cpu_fprintf(f, "avg host code/TB %0.1f\n",
(double)s->code_out_len / tb_div_count);
cpu_fprintf(f, "avg search data/TB %0.1f\n",
(double)s->search_out_len / tb_div_count);
cpu_fprintf(f, "cycles/op %0.1f\n",
s->op_count ? (double)tot / s->op_count : 0);
@ -2519,8 +2529,11 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
s->code_in_len ? (double)tot / s->code_in_len : 0);
cpu_fprintf(f, "cycles/out byte %0.1f\n",
s->code_out_len ? (double)tot / s->code_out_len : 0);
if (tot == 0)
cpu_fprintf(f, "cycles/search byte %0.1f\n",
s->search_out_len ? (double)tot / s->search_out_len : 0);
if (tot == 0) {
tot = 1;
}
cpu_fprintf(f, " gen_interm time %0.1f%%\n",
(double)s->interm_time / tot * 100.0);
cpu_fprintf(f, " gen_code time %0.1f%%\n",

View file

@ -663,6 +663,7 @@ struct TCGContext {
int64_t del_op_count;
int64_t code_in_len;
int64_t code_out_len;
int64_t search_out_len;
int64_t interm_time;
int64_t code_time;
int64_t la_time;
@ -712,7 +713,8 @@ struct TCGContext {
uint16_t gen_opc_icount[OPC_BUF_SIZE];
uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
target_ulong gen_opc_data[TARGET_INSN_START_WORDS];
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
// Unicorn engine variables
struct uc_struct *uc;

View file

@ -156,49 +156,126 @@ void tb_cleanup(struct uc_struct *uc)
tb_clean_internal(uc, V_L1_SHIFT / V_L2_BITS, lp);
}
/* Encode VAL as a signed leb128 sequence at P.
Return P incremented past the encoded value. */
static uint8_t *encode_sleb128(uint8_t *p, target_long val)
{
int more, byte;
do {
byte = val & 0x7f;
val >>= 7;
more = !((val == 0 && (byte & 0x40) == 0)
|| (val == -1 && (byte & 0x40) != 0));
if (more) {
byte |= 0x80;
}
*p++ = byte;
} while (more);
return p;
}
/* Decode a signed leb128 sequence at *PP; increment *PP past the
decoded value. Return the decoded value. */
static target_long decode_sleb128(uint8_t **pp)
{
uint8_t *p = *pp;
target_long val = 0;
int byte, shift = 0;
do {
byte = *p++;
val |= (target_ulong)(byte & 0x7f) << shift;
shift += 7;
} while (byte & 0x80);
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
val |= -(target_ulong)1 << shift;
}
*pp = p;
return val;
}
/* Encode the data collected about the instructions while compiling TB.
Place the data at BLOCK, and return the number of bytes consumed.
The logical table consisits of TARGET_INSN_START_WORDS target_ulong's,
which come from the target's insn_start data, followed by a uintptr_t
which comes from the host pc of the end of the code implementing the insn.
Each line of the table is encoded as sleb128 deltas from the previous
line. The seed for the first line is { tb->pc, 0..., tb->tc_ptr }.
That is, the first column is seeded with the guest pc, the last column
with the host pc, and the middle columns with zeros. */
static int encode_search(TCGContext *tcg_ctx, TranslationBlock *tb, uint8_t *block)
{
uint8_t *p = block;
int i, j, n;
tb->tc_search = block;
for (i = 0, n = tb->icount; i < n; ++i) {
target_ulong prev;
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
if (i == 0) {
prev = (j == 0 ? tb->pc : 0);
} else {
prev = tcg_ctx->gen_insn_data[i - 1][j];
}
p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
}
prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
}
return p - block;
}
/* The cpu state corresponding to 'searched_pc' is restored. */
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
uintptr_t searched_pc)
{
target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
uintptr_t host_pc = (uintptr_t)tb->tc_ptr;
CPUArchState *env = cpu->env_ptr;
TCGContext *s = cpu->uc->tcg_ctx;
int j;
uintptr_t tc_ptr;
uint8_t *p = tb->tc_search;
int i, j, num_insns = tb->icount;
#ifdef CONFIG_PROFILER
int64_t ti;
int64_t ti = profile_getclock();
#endif
#ifdef CONFIG_PROFILER
ti = profile_getclock();
#endif
tcg_func_start(s);
gen_intermediate_code_pc(env, tb);
/* find opc index corresponding to search_pc */
tc_ptr = (uintptr_t)tb->tc_ptr;
if (searched_pc < tc_ptr)
if (searched_pc < host_pc) {
return -1;
s->tb_next_offset = tb->tb_next_offset;
#ifdef USE_DIRECT_JUMP
s->tb_jmp_offset = tb->tb_jmp_offset;
s->tb_next = NULL;
#else
s->tb_jmp_offset = NULL;
s->tb_next = tb->tb_next;
#endif
j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
searched_pc - tc_ptr);
if (j < 0)
return -1;
/* now find start of instruction before */
while (s->gen_opc_instr_start[j] == 0) {
j--;
}
cpu->icount_decr.u16.low -= s->gen_opc_icount[j];
restore_state_to_opc(env, tb, s->gen_opc_data);
/* Reconstruct the stored insn data while looking for the point at
which the end of the insn exceeds the searched_pc. */
for (i = 0; i < num_insns; ++i) {
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
data[j] += decode_sleb128(&p);
}
host_pc += decode_sleb128(&p);
if (host_pc > searched_pc) {
goto found;
}
}
return -1;
found:
// UNICORN: Commented out
//if (tb->cflags & CF_USE_ICOUNT) {
// assert(use_icount);
// /* Reset the cycle counter to the start of the block. */
// cpu->icount_decr.u16.low += num_insns;
// /* Clear the IO flag. */
// cpu->can_do_io = 0;
//}
cpu->icount_decr.u16.low -= i;
restore_state_to_opc(env, tb, data);
#ifdef CONFIG_PROFILER
s->restore_time += profile_getclock() - ti;
@ -1030,7 +1107,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
TranslationBlock *tb;
tb_page_addr_t phys_pc, phys_page2;
tcg_insn_unit *gen_code_buf;
int gen_code_size;
int gen_code_size, search_size;
#ifdef CONFIG_PROFILER
int64_t ti;
#endif
@ -1095,11 +1172,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
#endif
gen_code_size = tcg_gen_code(tcg_ctx, gen_code_buf);
search_size = encode_search(tcg_ctx, tb, (void *)gen_code_buf + gen_code_size);
#ifdef CONFIG_PROFILER
tcg_ctx.code_time += profile_getclock();
tcg_ctx.code_in_len += tb->size;
tcg_ctx.code_out_len += gen_code_size;
tcg_ctx.search_out_len += search_size;
#endif
/* UNICORN: Commented out
@ -1112,8 +1191,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
}
#endif*/
tcg_ctx->code_gen_ptr = (void *)(((uintptr_t)gen_code_buf +
gen_code_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
tcg_ctx->code_gen_ptr = (void *)
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
CODE_GEN_ALIGN);
phys_page2 = -1;
/* check next page if needed */