tcg: Add tcg_reg_alloc_dup2

There are several ways we can expand a vector dup of a 64-bit
element on a 32-bit host.

Backports efe86b21ead9b5d256ce90c378e31681c5e243a5
This commit is contained in:
Richard Henderson 2021-03-04 13:18:55 -05:00 committed by Lioncash
parent 471fc98c49
commit 1b811b8546

View file

@ -3392,6 +3392,98 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
} }
} }
static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
{
const TCGLifeData arg_life = op->life;
TCGTemp *ots, *itsl, *itsh;
TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
/* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
tcg_debug_assert(TCGOP_VECE(op) == MO_64);
ots = arg_temp(op->args[0]);
itsl = arg_temp(op->args[1]);
itsh = arg_temp(op->args[2]);
/* ENV should not be modified. */
tcg_debug_assert(!temp_readonly(ots));
/* Allocate the output register now. */
if (ots->val_type != TEMP_VAL_REG) {
TCGRegSet allocated_regs = s->reserved_regs;
TCGRegSet dup_out_regs =
s->tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
/* Make sure to not spill the input registers. */
if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
tcg_regset_set_reg(allocated_regs, itsl->reg);
}
if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
tcg_regset_set_reg(allocated_regs, itsh->reg);
}
ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
op->output_pref[0], ots->indirect_base);
ots->val_type = TEMP_VAL_REG;
ots->mem_coherent = 0;
s->reg_to_temp[ots->reg] = ots;
}
/* Promote dup2 of immediates to dupi_vec. */
if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
MemOp vece = MO_64;
if (val == dup_const(MO_8, val)) {
vece = MO_8;
} else if (val == dup_const(MO_16, val)) {
vece = MO_16;
} else if (val == dup_const(MO_32, val)) {
vece = MO_32;
}
tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
goto done;
}
/* If the two inputs form one 64-bit value, try dupm_vec. */
if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
if (!itsl->mem_coherent) {
temp_sync(s, itsl, s->reserved_regs, 0, 0);
}
if (!itsh->mem_coherent) {
temp_sync(s, itsh, s->reserved_regs, 0, 0);
}
#ifdef HOST_WORDS_BIGENDIAN
TCGTemp *its = itsh;
#else
TCGTemp *its = itsl;
#endif
if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
its->mem_base->reg, its->mem_offset)) {
goto done;
}
}
/* Fall back to generic expansion. */
return false;
done:
if (IS_DEAD_ARG(1)) {
temp_dead(s, itsl);
}
if (IS_DEAD_ARG(2)) {
temp_dead(s, itsh);
}
if (NEED_SYNC_ARG(0)) {
temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
} else if (IS_DEAD_ARG(0)) {
temp_dead(s, ots);
}
return true;
}
#ifdef TCG_TARGET_STACK_GROWSUP #ifdef TCG_TARGET_STACK_GROWSUP
#define STACK_DIR(x) (-(x)) #define STACK_DIR(x) (-(x))
#else #else
@ -3699,6 +3791,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
case INDEX_op_call: case INDEX_op_call:
tcg_reg_alloc_call(s, op); tcg_reg_alloc_call(s, op);
break; break;
case INDEX_op_dup2_vec:
if (tcg_reg_alloc_dup2(s, op)) {
break;
}
/* fall through */
default: default:
/* Sanity check that we've not introduced any unhandled opcodes. */ /* Sanity check that we've not introduced any unhandled opcodes. */
tcg_debug_assert(tcg_op_supported(opc)); tcg_debug_assert(tcg_op_supported(opc));