tcg/optimize: optimize temps tracking

The tcg_temp_info structure uses 24 bytes per temp. Now that we emulate
vector registers on most guests, it's not uncommon to have more than 100
used temps. This means we have initialize more than 2kB at least twice
per TB, often more when there is a few goto_tb.

Instead used a TCGTempSet bit array to track which temps are in used in
the current basic block. This means there are only around 16 bytes to
initialize.

This improves the boot time of a MIPS guest on an x86-64 host by around
7% and moves out tcg_optimize from the the top of the profiler list.

Backports commit 1208d7dd5fddc1fbd98de800d17429b4e5578848 from qemu
This commit is contained in:
Aurelien Jarno 2018-02-10 21:46:58 -05:00 committed by Lioncash
parent 5f67ab74e7
commit b450b79622
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
2 changed files with 36 additions and 13 deletions

View file

@ -53,6 +53,26 @@ static void reset_temp(TCGContext *s, TCGArg temp)
temps[temp].mask = -1;
}
/* Reset all temporaries, given that there are NB_TEMPS of them. */
static void reset_all_temps(TCGContext *s, int nb_temps)
{
long len = BITS_TO_LONGS(nb_temps) * sizeof(unsigned long);
memset(&s->temps2_used.l, 0, len);
}
/* Initialize and activate a temporary. */
static void init_temp_info(TCGContext *s, TCGArg temp)
{
struct tcg_temp_info *temps = s->temps2;
TCGTempSet *temps_used = &s->temps2_used;
if (!test_bit(temp, temps_used->l)) {
temps[temp].state = TCG_TEMP_UNDEF;
temps[temp].mask = -1;
set_bit(temp, temps_used->l);
}
}
static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
TCGOpcode opc, int nargs)
{
@ -85,18 +105,6 @@ static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
return new_op;
}
/* Reset all temporaries, given that there are NB_TEMPS of them. */
static void reset_all_temps(TCGContext *s, int nb_temps)
{
struct tcg_temp_info *temps = s->temps2;
int i;
for (i = 0; i < nb_temps; i++) {
temps[i].state = TCG_TEMP_UNDEF;
temps[i].mask = -1;
}
}
static int op_bits(TCGContext *s, TCGOpcode op)
{
const TCGOpDef *def = &s->tcg_op_defs[op];
@ -602,12 +610,24 @@ void tcg_optimize(TCGContext *s)
const TCGOpDef *def = &s->tcg_op_defs[opc];
oi_next = op->next;
/* Count the arguments, and initialize the temps that are
going to be used */
if (opc == INDEX_op_call) {
nb_oargs = op->callo;
nb_iargs = op->calli;
for (i = 0; i < nb_oargs + nb_iargs; i++) {
tmp = args[i];
if (tmp != TCG_CALL_DUMMY_ARG) {
init_temp_info(s, tmp);
}
}
} else {
nb_oargs = def->nb_oargs;
nb_iargs = def->nb_iargs;
for (i = 0; i < nb_oargs + nb_iargs; i++) {
init_temp_info(s, args[i]);
}
}
/* Do copy propagation */
@ -1303,7 +1323,9 @@ void tcg_optimize(TCGContext *s)
if (!(args[nb_oargs + nb_iargs + 1]
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
for (i = 0; i < nb_globals; i++) {
reset_temp(s, i);
if (test_bit(i, s->temps2_used.l)) {
reset_temp(s, i);
}
}
}
goto do_reset_output;

View file

@ -744,6 +744,7 @@ struct TCGContext {
/* qemu/tcg/optimize.c */
struct tcg_temp_info temps2[TCG_MAX_TEMPS];
TCGTempSet temps2_used;
/* qemu/target-m68k/translate.c */
TCGv_i32 cpu_halted;