mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-07-01 10:28:23 +00:00
tcg: Compress dead_temps and mem_temps into a single array
We only need two bits per temporary. Fold the two bytes into one, and reduce the memory and cachelines required during compilation. Backports commit c70fbf0a9938baf3b4f843355a77c17a7e945b98 from qemu
This commit is contained in:
parent
690985a582
commit
e973e89a57
149
qemu/tcg/tcg.c
149
qemu/tcg/tcg.c
|
@ -814,16 +814,16 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
|
||||||
real_args++;
|
real_args++;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/* If stack grows up, then we will be placing successive
|
/* If stack grows up, then we will be placing successive
|
||||||
arguments at lower addresses, which means we need to
|
arguments at lower addresses, which means we need to
|
||||||
reverse the order compared to how we would normally
|
reverse the order compared to how we would normally
|
||||||
treat either big or little-endian. For those arguments
|
treat either big or little-endian. For those arguments
|
||||||
that will wind up in registers, this still works for
|
that will wind up in registers, this still works for
|
||||||
HPPA (the only current STACK_GROWSUP target) since the
|
HPPA (the only current STACK_GROWSUP target) since the
|
||||||
argument registers are *also* allocated in decreasing
|
argument registers are *also* allocated in decreasing
|
||||||
order. If another such target is added, this logic may
|
order. If another such target is added, this logic may
|
||||||
have to get more complicated to differentiate between
|
have to get more complicated to differentiate between
|
||||||
stack arguments and register arguments. */
|
stack arguments and register arguments. */
|
||||||
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
|
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
|
||||||
s->gen_opparam_buf[pi++] = args[i] + 1;
|
s->gen_opparam_buf[pi++] = args[i] + 1;
|
||||||
s->gen_opparam_buf[pi++] = args[i];
|
s->gen_opparam_buf[pi++] = args[i];
|
||||||
|
@ -1359,39 +1359,42 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
|
||||||
|
|
||||||
#ifdef USE_LIVENESS_ANALYSIS
|
#ifdef USE_LIVENESS_ANALYSIS
|
||||||
|
|
||||||
|
#define TS_DEAD 1
|
||||||
|
#define TS_MEM 2
|
||||||
|
|
||||||
/* liveness analysis: end of function: all temps are dead, and globals
|
/* liveness analysis: end of function: all temps are dead, and globals
|
||||||
should be in memory. */
|
should be in memory. */
|
||||||
static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
|
static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
|
||||||
uint8_t *mem_temps)
|
|
||||||
{
|
{
|
||||||
memset(dead_temps, 1, s->nb_temps);
|
memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
|
||||||
memset(mem_temps, 1, s->nb_globals);
|
memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
|
||||||
memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* liveness analysis: end of basic block: all temps are dead, globals
|
/* liveness analysis: end of basic block: all temps are dead, globals
|
||||||
and local temps should be in memory. */
|
and local temps should be in memory. */
|
||||||
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
|
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
|
||||||
uint8_t *mem_temps)
|
|
||||||
{
|
{
|
||||||
int i;
|
int i, n;
|
||||||
|
|
||||||
memset(dead_temps, 1, s->nb_temps);
|
tcg_la_func_end(s, temp_state);
|
||||||
memset(mem_temps, 1, s->nb_globals);
|
for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
|
||||||
for(i = s->nb_globals; i < s->nb_temps; i++) {
|
if (s->temps[i].temp_local) {
|
||||||
mem_temps[i] = s->temps[i].temp_local;
|
temp_state[i] |= TS_MEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Unicorn: for brcond, we should refresh liveness states for TCG globals
|
Unicorn: for brcond, we should refresh liveness states for TCG globals
|
||||||
*/
|
*/
|
||||||
static inline void tcg_la_br_end(TCGContext *s, uint8_t *mem_temps)
|
static inline void tcg_la_br_end(TCGContext *s, uint8_t *temp_state)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
memset(mem_temps, 1, s->nb_globals);
|
for (i = 0; i < s->nb_globals; i++) {
|
||||||
|
temp_state[i] |= TS_MEM;
|
||||||
|
}
|
||||||
for(i = s->nb_globals; i < s->nb_temps; i++) {
|
for(i = s->nb_globals; i < s->nb_temps; i++) {
|
||||||
mem_temps[i] = s->temps[i].temp_local;
|
temp_state[i] = s->temps[i].temp_local;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1400,12 +1403,12 @@ static inline void tcg_la_br_end(TCGContext *s, uint8_t *mem_temps)
|
||||||
temporaries are removed. */
|
temporaries are removed. */
|
||||||
static void tcg_liveness_analysis(TCGContext *s)
|
static void tcg_liveness_analysis(TCGContext *s)
|
||||||
{
|
{
|
||||||
uint8_t *dead_temps, *mem_temps;
|
uint8_t *temp_state;
|
||||||
int oi, oi_prev;
|
int oi, oi_prev;
|
||||||
|
int nb_globals = s->nb_globals;
|
||||||
|
|
||||||
dead_temps = tcg_malloc(s, s->nb_temps);
|
temp_state = tcg_malloc(s, s->nb_temps);
|
||||||
mem_temps = tcg_malloc(s, s->nb_temps);
|
tcg_la_func_end(s, temp_state);
|
||||||
tcg_la_func_end(s, dead_temps, mem_temps);
|
|
||||||
|
|
||||||
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
|
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
|
||||||
int i, nb_iargs, nb_oargs;
|
int i, nb_iargs, nb_oargs;
|
||||||
|
@ -1434,7 +1437,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
|
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
|
||||||
for (i = 0; i < nb_oargs; i++) {
|
for (i = 0; i < nb_oargs; i++) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
if (!dead_temps[arg] || mem_temps[arg]) {
|
if (temp_state[arg] != TS_DEAD) {
|
||||||
goto do_not_remove_call;
|
goto do_not_remove_call;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1445,34 +1448,40 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
/* output args are dead */
|
/* output args are dead */
|
||||||
for (i = 0; i < nb_oargs; i++) {
|
for (i = 0; i < nb_oargs; i++) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
if (dead_temps[arg]) {
|
if (temp_state[arg] & TS_DEAD) {
|
||||||
arg_life |= DEAD_ARG << i;
|
arg_life |= DEAD_ARG << i;
|
||||||
}
|
}
|
||||||
if (mem_temps[arg]) {
|
if (temp_state[arg] & TS_MEM) {
|
||||||
arg_life |= SYNC_ARG << i;
|
arg_life |= SYNC_ARG << i;
|
||||||
}
|
}
|
||||||
dead_temps[arg] = 1;
|
temp_state[arg] = TS_DEAD;
|
||||||
mem_temps[arg] = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
|
|
||||||
/* globals should be synced to memory */
|
|
||||||
memset(mem_temps, 1, s->nb_globals);
|
|
||||||
}
|
|
||||||
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
|
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
|
||||||
TCG_CALL_NO_READ_GLOBALS))) {
|
TCG_CALL_NO_READ_GLOBALS))) {
|
||||||
/* globals should go back to memory */
|
/* globals should go back to memory */
|
||||||
memset(dead_temps, 1, s->nb_globals);
|
memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
|
||||||
|
} else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
|
||||||
|
/* globals should be synced to memory */
|
||||||
|
for (i = 0; i < nb_globals; i++) {
|
||||||
|
temp_state[i] |= TS_MEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* input args are live */
|
/* record arguments that die in this helper */
|
||||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
if (arg != TCG_CALL_DUMMY_ARG) {
|
if (arg != TCG_CALL_DUMMY_ARG) {
|
||||||
if (dead_temps[arg]) {
|
if (temp_state[arg] & TS_DEAD) {
|
||||||
arg_life |= DEAD_ARG << i;
|
arg_life |= DEAD_ARG << i;
|
||||||
}
|
}
|
||||||
dead_temps[arg] = 0;
|
}
|
||||||
|
}
|
||||||
|
/* input arguments are live for preceding opcodes */
|
||||||
|
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||||
|
arg = args[i];
|
||||||
|
if (arg != TCG_CALL_DUMMY_ARG) {
|
||||||
|
temp_state[arg] &= ~TS_DEAD;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1482,8 +1491,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
break;
|
break;
|
||||||
case INDEX_op_discard:
|
case INDEX_op_discard:
|
||||||
/* mark the temporary as dead */
|
/* mark the temporary as dead */
|
||||||
dead_temps[args[0]] = 1;
|
temp_state[args[0]] = TS_DEAD;
|
||||||
mem_temps[args[0]] = 0;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case INDEX_op_add2_i32:
|
case INDEX_op_add2_i32:
|
||||||
|
@ -1504,8 +1512,8 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
the low part. The result can be optimized to a simple
|
the low part. The result can be optimized to a simple
|
||||||
add or sub. This happens often for x86_64 guest when the
|
add or sub. This happens often for x86_64 guest when the
|
||||||
cpu mode is set to 32 bit. */
|
cpu mode is set to 32 bit. */
|
||||||
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
|
if (temp_state[args[1]] == TS_DEAD) {
|
||||||
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
|
if (temp_state[args[0]] == TS_DEAD) {
|
||||||
goto do_remove;
|
goto do_remove;
|
||||||
}
|
}
|
||||||
/* Replace the opcode and adjust the args in place,
|
/* Replace the opcode and adjust the args in place,
|
||||||
|
@ -1542,8 +1550,8 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
do_mul2:
|
do_mul2:
|
||||||
nb_iargs = 2;
|
nb_iargs = 2;
|
||||||
nb_oargs = 2;
|
nb_oargs = 2;
|
||||||
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
|
if (temp_state[args[1]] == TS_DEAD) {
|
||||||
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
|
if (temp_state[args[0]] == TS_DEAD) {
|
||||||
/* Both parts of the operation are dead. */
|
/* Both parts of the operation are dead. */
|
||||||
goto do_remove;
|
goto do_remove;
|
||||||
}
|
}
|
||||||
|
@ -1551,8 +1559,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
op->opc = opc = opc_new;
|
op->opc = opc = opc_new;
|
||||||
args[1] = args[2];
|
args[1] = args[2];
|
||||||
args[2] = args[3];
|
args[2] = args[3];
|
||||||
} else if (have_opc_new2 && dead_temps[args[0]]
|
} else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
|
||||||
&& !mem_temps[args[0]]) {
|
|
||||||
/* The low part of the operation is dead; generate the high. */
|
/* The low part of the operation is dead; generate the high. */
|
||||||
op->opc = opc = opc_new2;
|
op->opc = opc = opc_new2;
|
||||||
args[0] = args[1];
|
args[0] = args[1];
|
||||||
|
@ -1575,8 +1582,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
implies side effects */
|
implies side effects */
|
||||||
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
|
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
|
||||||
for (i = 0; i < nb_oargs; i++) {
|
for (i = 0; i < nb_oargs; i++) {
|
||||||
arg = args[i];
|
if (temp_state[args[i]] != TS_DEAD) {
|
||||||
if (!dead_temps[arg] || mem_temps[arg]) {
|
|
||||||
goto do_not_remove;
|
goto do_not_remove;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1587,14 +1593,13 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
/* output args are dead */
|
/* output args are dead */
|
||||||
for (i = 0; i < nb_oargs; i++) {
|
for (i = 0; i < nb_oargs; i++) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
if (dead_temps[arg]) {
|
if (temp_state[arg] & TS_DEAD) {
|
||||||
arg_life |= DEAD_ARG << i;
|
arg_life |= DEAD_ARG << i;
|
||||||
}
|
}
|
||||||
if (mem_temps[arg]) {
|
if (temp_state[arg] & TS_MEM) {
|
||||||
arg_life |= SYNC_ARG << i;
|
arg_life |= SYNC_ARG << i;
|
||||||
}
|
}
|
||||||
dead_temps[arg] = 1;
|
temp_state[arg] = TS_DEAD;
|
||||||
mem_temps[arg] = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if end of basic block, update */
|
/* if end of basic block, update */
|
||||||
|
@ -1603,27 +1608,33 @@ static void tcg_liveness_analysis(TCGContext *s)
|
||||||
// this causes problem because check_exit_request() inserts
|
// this causes problem because check_exit_request() inserts
|
||||||
// brcond instruction in the middle of the TB,
|
// brcond instruction in the middle of the TB,
|
||||||
// which incorrectly flags end-of-block
|
// which incorrectly flags end-of-block
|
||||||
if (opc != INDEX_op_brcond_i32)
|
if (opc != INDEX_op_brcond_i32) {
|
||||||
tcg_la_bb_end(s, dead_temps, mem_temps);
|
tcg_la_bb_end(s, temp_state);
|
||||||
// Unicorn: we do not touch dead temps for brcond,
|
} else {
|
||||||
// but we should refresh TCG globals In-Memory states,
|
// Unicorn: we do not touch dead temps for brcond,
|
||||||
// otherwise, important CPU states(especially conditional flags) might be forgotten,
|
// but we should refresh TCG globals In-Memory states,
|
||||||
// result in wrongly generated host code that run into wrong branch.
|
// otherwise, important CPU states(especially conditional flags) might be forgotten,
|
||||||
// Refer to https://github.com/unicorn-engine/unicorn/issues/287 for further information
|
// result in wrongly generated host code that run into wrong branch.
|
||||||
else
|
// Refer to https://github.com/unicorn-engine/unicorn/issues/287 for further information
|
||||||
tcg_la_br_end(s, mem_temps);
|
tcg_la_br_end(s, temp_state);
|
||||||
|
}
|
||||||
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
|
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
|
||||||
/* globals should be synced to memory */
|
/* globals should be synced to memory */
|
||||||
memset(mem_temps, 1, s->nb_globals);
|
for (i = 0; i < nb_globals; i++) {
|
||||||
|
temp_state[i] |= TS_MEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* input args are live */
|
/* record arguments that die in this opcode */
|
||||||
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
||||||
arg = args[i];
|
arg = args[i];
|
||||||
if (dead_temps[arg]) {
|
if (temp_state[arg] & TS_DEAD) {
|
||||||
arg_life |= DEAD_ARG << i;
|
arg_life |= DEAD_ARG << i;
|
||||||
}
|
}
|
||||||
dead_temps[arg] = 0;
|
}
|
||||||
|
/* input arguments are live for preceding opcodes */
|
||||||
|
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
||||||
|
temp_state[args[i]] &= ~TS_DEAD;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in a new issue