tcg: Manually expand INDEX_op_dup_vec

This case is similar to INDEX_op_mov_* in that we need to do
different things depending on the current location of the source.

Backports commit bab1671f0fa928fd678a22f934739f06fd5fd035 from qemu
This commit is contained in:
Richard Henderson 2019-05-16 15:22:27 -04:00 committed by Lioncash
parent 3d20e1678c
commit cf238d3544
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
3 changed files with 118 additions and 10 deletions

View file

@ -2090,10 +2090,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
case INDEX_op_mov_i64:
case INDEX_op_mov_vec:
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
case INDEX_op_movi_i64:
case INDEX_op_dupi_vec:
case INDEX_op_call: /* Always emitted via tcg_out_call. */
default:
g_assert_not_reached();
@ -2190,9 +2188,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_not_vec:
tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
break;
case INDEX_op_dup_vec:
tcg_out_dup_vec(s, type, vece, a0, a1);
break;
case INDEX_op_shli_vec:
tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
break;
@ -2236,6 +2231,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
}
}
break;
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
default:
g_assert_not_reached();
}

View file

@ -2631,10 +2631,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
case INDEX_op_mov_i64:
case INDEX_op_mov_vec:
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
case INDEX_op_movi_i64:
case INDEX_op_dupi_vec:
case INDEX_op_call: /* Always emitted via tcg_out_call. */
default:
tcg_abort();
@ -2823,9 +2821,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_st_vec:
tcg_out_st(s, type, a0, a1, a2);
break;
case INDEX_op_dup_vec:
tcg_out_dup_vec(s, type, vece, a0, a1);
break;
case INDEX_op_x86_shufps_vec:
insn = OPC_SHUFPS;
@ -2867,6 +2862,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out8(s, a2);
break;
case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
default:
g_assert_not_reached();
}

View file

@ -2762,6 +2762,9 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
save_globals(s, allocated_regs);
}
/*
* Specialized code generation for INDEX_op_movi_*.
*/
static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
tcg_target_ulong val, TCGLifeData arg_life,
TCGRegSet preferred_regs)
@ -2791,6 +2794,9 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
}
/*
* Specialized code generation for INDEX_op_mov_*.
*/
static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
{
const TCGLifeData arg_life = op->life;
@ -2885,6 +2891,108 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
}
}
/*
* Specialized code generation for INDEX_op_dup_vec.
*/
static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
{
const TCGLifeData arg_life = op->life;
TCGRegSet dup_out_regs, dup_in_regs;
TCGTemp *its, *ots;
TCGType itype, vtype;
unsigned vece;
bool ok;
ots = arg_temp(op->args[0]);
its = arg_temp(op->args[1]);
/* ENV should not be modified. */
tcg_debug_assert(!ots->fixed_reg);
itype = its->type;
vece = TCGOP_VECE(op);
vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
if (its->val_type == TEMP_VAL_CONST) {
/* Propagate constant via movi -> dupi. */
tcg_target_ulong val = its->val;
if (IS_DEAD_ARG(1)) {
temp_dead(s, its);
}
tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
return;
}
dup_out_regs = s->tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
dup_in_regs = s->tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
/* Allocate the output register now. */
if (ots->val_type != TEMP_VAL_REG) {
TCGRegSet allocated_regs = s->reserved_regs;
if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
/* Make sure to not spill the input register. */
tcg_regset_set_reg(allocated_regs, its->reg);
}
ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
op->output_pref[0], ots->indirect_base);
ots->val_type = TEMP_VAL_REG;
ots->mem_coherent = 0;
s->reg_to_temp[ots->reg] = ots;
}
switch (its->val_type) {
case TEMP_VAL_REG:
/*
* The dup constriaints must be broad, covering all possible VECE.
* However, tcg_op_dup_vec() gets to see the VECE and we allow it
* to fail, indicating that extra moves are required for that case.
*/
if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
goto done;
}
/* Try again from memory or a vector input register. */
}
if (!its->mem_coherent) {
/*
* The input register is not synced, and so an extra store
* would be required to use memory. Attempt an integer-vector
* register move first. We do not have a TCGRegSet for this.
*/
if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
break;
}
/* Sync the temp back to its slot and load from there. */
temp_sync(s, its, s->reserved_regs, 0, 0);
}
/* fall through */
case TEMP_VAL_MEM:
/* TODO: dup from memory */
tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
break;
default:
g_assert_not_reached();
}
/* We now have a vector input register, so dup must succeed. */
ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
tcg_debug_assert(ok);
done:
if (IS_DEAD_ARG(1)) {
temp_dead(s, its);
}
if (NEED_SYNC_ARG(0)) {
temp_sync(s, ots, s->reserved_regs, 0, 0);
}
if (IS_DEAD_ARG(0)) {
temp_dead(s, ots);
}
}
static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
{
const TCGLifeData arg_life = op->life;
@ -3357,6 +3465,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
case INDEX_op_dupi_vec:
tcg_reg_alloc_movi(s, op);
break;
case INDEX_op_dup_vec:
tcg_reg_alloc_dup(s, op);
break;
case INDEX_op_insn_start:
if (num_insns >= 0) {
s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);