mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-02-25 05:26:51 +00:00
tcg: Compress dead_temps and mem_temps into a single array
We only need two bits per temporary. Fold the two bytes into one, and reduce the memory and cachelines required during compilation. Backports commit c70fbf0a9938baf3b4f843355a77c17a7e945b98 from qemu
This commit is contained in:
parent
690985a582
commit
e973e89a57
149
qemu/tcg/tcg.c
149
qemu/tcg/tcg.c
|
@ -814,16 +814,16 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
|
|||
real_args++;
|
||||
}
|
||||
#endif
|
||||
/* If stack grows up, then we will be placing successive
|
||||
arguments at lower addresses, which means we need to
|
||||
reverse the order compared to how we would normally
|
||||
treat either big or little-endian. For those arguments
|
||||
that will wind up in registers, this still works for
|
||||
HPPA (the only current STACK_GROWSUP target) since the
|
||||
argument registers are *also* allocated in decreasing
|
||||
order. If another such target is added, this logic may
|
||||
have to get more complicated to differentiate between
|
||||
stack arguments and register arguments. */
|
||||
/* If stack grows up, then we will be placing successive
|
||||
arguments at lower addresses, which means we need to
|
||||
reverse the order compared to how we would normally
|
||||
treat either big or little-endian. For those arguments
|
||||
that will wind up in registers, this still works for
|
||||
HPPA (the only current STACK_GROWSUP target) since the
|
||||
argument registers are *also* allocated in decreasing
|
||||
order. If another such target is added, this logic may
|
||||
have to get more complicated to differentiate between
|
||||
stack arguments and register arguments. */
|
||||
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
|
||||
s->gen_opparam_buf[pi++] = args[i] + 1;
|
||||
s->gen_opparam_buf[pi++] = args[i];
|
||||
|
@ -1359,39 +1359,42 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
|
|||
|
||||
#ifdef USE_LIVENESS_ANALYSIS
|
||||
|
||||
#define TS_DEAD 1
|
||||
#define TS_MEM 2
|
||||
|
||||
/* liveness analysis: end of function: all temps are dead, and globals
|
||||
should be in memory. */
|
||||
static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
|
||||
uint8_t *mem_temps)
|
||||
static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
|
||||
{
|
||||
memset(dead_temps, 1, s->nb_temps);
|
||||
memset(mem_temps, 1, s->nb_globals);
|
||||
memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
|
||||
memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
|
||||
memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
|
||||
}
|
||||
|
||||
/* liveness analysis: end of basic block: all temps are dead, globals
|
||||
and local temps should be in memory. */
|
||||
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
|
||||
uint8_t *mem_temps)
|
||||
static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
|
||||
{
|
||||
int i;
|
||||
int i, n;
|
||||
|
||||
memset(dead_temps, 1, s->nb_temps);
|
||||
memset(mem_temps, 1, s->nb_globals);
|
||||
for(i = s->nb_globals; i < s->nb_temps; i++) {
|
||||
mem_temps[i] = s->temps[i].temp_local;
|
||||
tcg_la_func_end(s, temp_state);
|
||||
for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
|
||||
if (s->temps[i].temp_local) {
|
||||
temp_state[i] |= TS_MEM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Unicorn: for brcond, we should refresh liveness states for TCG globals
|
||||
*/
|
||||
static inline void tcg_la_br_end(TCGContext *s, uint8_t *mem_temps)
|
||||
static inline void tcg_la_br_end(TCGContext *s, uint8_t *temp_state)
|
||||
{
|
||||
int i;
|
||||
memset(mem_temps, 1, s->nb_globals);
|
||||
for (i = 0; i < s->nb_globals; i++) {
|
||||
temp_state[i] |= TS_MEM;
|
||||
}
|
||||
for(i = s->nb_globals; i < s->nb_temps; i++) {
|
||||
mem_temps[i] = s->temps[i].temp_local;
|
||||
temp_state[i] = s->temps[i].temp_local;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1400,12 +1403,12 @@ static inline void tcg_la_br_end(TCGContext *s, uint8_t *mem_temps)
|
|||
temporaries are removed. */
|
||||
static void tcg_liveness_analysis(TCGContext *s)
|
||||
{
|
||||
uint8_t *dead_temps, *mem_temps;
|
||||
uint8_t *temp_state;
|
||||
int oi, oi_prev;
|
||||
int nb_globals = s->nb_globals;
|
||||
|
||||
dead_temps = tcg_malloc(s, s->nb_temps);
|
||||
mem_temps = tcg_malloc(s, s->nb_temps);
|
||||
tcg_la_func_end(s, dead_temps, mem_temps);
|
||||
temp_state = tcg_malloc(s, s->nb_temps);
|
||||
tcg_la_func_end(s, temp_state);
|
||||
|
||||
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
|
||||
int i, nb_iargs, nb_oargs;
|
||||
|
@ -1434,7 +1437,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (!dead_temps[arg] || mem_temps[arg]) {
|
||||
if (temp_state[arg] != TS_DEAD) {
|
||||
goto do_not_remove_call;
|
||||
}
|
||||
}
|
||||
|
@ -1445,34 +1448,40 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
/* output args are dead */
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (dead_temps[arg]) {
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
if (mem_temps[arg]) {
|
||||
if (temp_state[arg] & TS_MEM) {
|
||||
arg_life |= SYNC_ARG << i;
|
||||
}
|
||||
dead_temps[arg] = 1;
|
||||
mem_temps[arg] = 0;
|
||||
temp_state[arg] = TS_DEAD;
|
||||
}
|
||||
|
||||
if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
|
||||
/* globals should be synced to memory */
|
||||
memset(mem_temps, 1, s->nb_globals);
|
||||
}
|
||||
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
|
||||
TCG_CALL_NO_READ_GLOBALS))) {
|
||||
/* globals should go back to memory */
|
||||
memset(dead_temps, 1, s->nb_globals);
|
||||
memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
|
||||
} else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
|
||||
/* globals should be synced to memory */
|
||||
for (i = 0; i < nb_globals; i++) {
|
||||
temp_state[i] |= TS_MEM;
|
||||
}
|
||||
}
|
||||
|
||||
/* input args are live */
|
||||
/* record arguments that die in this helper */
|
||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (arg != TCG_CALL_DUMMY_ARG) {
|
||||
if (dead_temps[arg]) {
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
dead_temps[arg] = 0;
|
||||
}
|
||||
}
|
||||
/* input arguments are live for preceding opcodes */
|
||||
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (arg != TCG_CALL_DUMMY_ARG) {
|
||||
temp_state[arg] &= ~TS_DEAD;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1482,8 +1491,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
break;
|
||||
case INDEX_op_discard:
|
||||
/* mark the temporary as dead */
|
||||
dead_temps[args[0]] = 1;
|
||||
mem_temps[args[0]] = 0;
|
||||
temp_state[args[0]] = TS_DEAD;
|
||||
break;
|
||||
|
||||
case INDEX_op_add2_i32:
|
||||
|
@ -1504,8 +1512,8 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
the low part. The result can be optimized to a simple
|
||||
add or sub. This happens often for x86_64 guest when the
|
||||
cpu mode is set to 32 bit. */
|
||||
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
|
||||
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
|
||||
if (temp_state[args[1]] == TS_DEAD) {
|
||||
if (temp_state[args[0]] == TS_DEAD) {
|
||||
goto do_remove;
|
||||
}
|
||||
/* Replace the opcode and adjust the args in place,
|
||||
|
@ -1542,8 +1550,8 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
do_mul2:
|
||||
nb_iargs = 2;
|
||||
nb_oargs = 2;
|
||||
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
|
||||
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
|
||||
if (temp_state[args[1]] == TS_DEAD) {
|
||||
if (temp_state[args[0]] == TS_DEAD) {
|
||||
/* Both parts of the operation are dead. */
|
||||
goto do_remove;
|
||||
}
|
||||
|
@ -1551,8 +1559,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
op->opc = opc = opc_new;
|
||||
args[1] = args[2];
|
||||
args[2] = args[3];
|
||||
} else if (have_opc_new2 && dead_temps[args[0]]
|
||||
&& !mem_temps[args[0]]) {
|
||||
} else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
|
||||
/* The low part of the operation is dead; generate the high. */
|
||||
op->opc = opc = opc_new2;
|
||||
args[0] = args[1];
|
||||
|
@ -1575,8 +1582,7 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
implies side effects */
|
||||
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (!dead_temps[arg] || mem_temps[arg]) {
|
||||
if (temp_state[args[i]] != TS_DEAD) {
|
||||
goto do_not_remove;
|
||||
}
|
||||
}
|
||||
|
@ -1587,14 +1593,13 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
/* output args are dead */
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
arg = args[i];
|
||||
if (dead_temps[arg]) {
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
if (mem_temps[arg]) {
|
||||
if (temp_state[arg] & TS_MEM) {
|
||||
arg_life |= SYNC_ARG << i;
|
||||
}
|
||||
dead_temps[arg] = 1;
|
||||
mem_temps[arg] = 0;
|
||||
temp_state[arg] = TS_DEAD;
|
||||
}
|
||||
|
||||
/* if end of basic block, update */
|
||||
|
@ -1603,27 +1608,33 @@ static void tcg_liveness_analysis(TCGContext *s)
|
|||
// this causes problem because check_exit_request() inserts
|
||||
// brcond instruction in the middle of the TB,
|
||||
// which incorrectly flags end-of-block
|
||||
if (opc != INDEX_op_brcond_i32)
|
||||
tcg_la_bb_end(s, dead_temps, mem_temps);
|
||||
// Unicorn: we do not touch dead temps for brcond,
|
||||
// but we should refresh TCG globals In-Memory states,
|
||||
// otherwise, important CPU states(especially conditional flags) might be forgotten,
|
||||
// result in wrongly generated host code that run into wrong branch.
|
||||
// Refer to https://github.com/unicorn-engine/unicorn/issues/287 for further information
|
||||
else
|
||||
tcg_la_br_end(s, mem_temps);
|
||||
if (opc != INDEX_op_brcond_i32) {
|
||||
tcg_la_bb_end(s, temp_state);
|
||||
} else {
|
||||
// Unicorn: we do not touch dead temps for brcond,
|
||||
// but we should refresh TCG globals In-Memory states,
|
||||
// otherwise, important CPU states(especially conditional flags) might be forgotten,
|
||||
// result in wrongly generated host code that run into wrong branch.
|
||||
// Refer to https://github.com/unicorn-engine/unicorn/issues/287 for further information
|
||||
tcg_la_br_end(s, temp_state);
|
||||
}
|
||||
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
|
||||
/* globals should be synced to memory */
|
||||
memset(mem_temps, 1, s->nb_globals);
|
||||
for (i = 0; i < nb_globals; i++) {
|
||||
temp_state[i] |= TS_MEM;
|
||||
}
|
||||
}
|
||||
|
||||
/* input args are live */
|
||||
/* record arguments that die in this opcode */
|
||||
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
||||
arg = args[i];
|
||||
if (dead_temps[arg]) {
|
||||
if (temp_state[arg] & TS_DEAD) {
|
||||
arg_life |= DEAD_ARG << i;
|
||||
}
|
||||
dead_temps[arg] = 0;
|
||||
}
|
||||
/* input arguments are live for preceding opcodes */
|
||||
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
|
||||
temp_state[args[i]] &= ~TS_DEAD;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
Loading…
Reference in a new issue