tcg: Add INDEX_op_dupm_vec

Allow the backend to expand dup from memory directly, instead of
forcing the value into a temp first. This is especially important
if integer/vector register moves do not exist.

Note that officially tcg_out_dupm_vec is allowed to fail.
If it did, we could fix this up relatively easily:

VECE == 32/64:
Load the value into a vector register, then dup.
Both of these must work.

VECE == 8/16:
If the value happens to be at an offset such that an aligned
load would place the desired value in the least significant
end of the register, go ahead and load w/garbage in high bits.

Load the value w/INDEX_op_ld{8,16}_i32.
Attempt a move directly to vector reg, which may fail.
Store the value into the backing store for OTS.
Load the value into the vector reg w/TCG_TYPE_I32, which must work.
Duplicate from the vector reg into itself, which must work.

All of which is well and good, except that all supported
hosts can support dupm for all vece, so all of the failure
paths would be dead code and untestable.

Backports commit 37ee55a081b7863ffab2151068dd1b2f11376914 from qemu
This commit is contained in:
Richard Henderson 2019-05-16 15:37:57 -04:00 committed by Lioncash
parent fd7a67e4a7
commit 66e6bea084
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
24 changed files with 103 additions and 42 deletions

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_aarch64
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_aarch64
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_aarch64
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_aarch64
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_aarch64
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_aarch64
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_aarch64
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_aarch64
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_aarch64
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_aarch64

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_aarch64eb
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_aarch64eb
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_aarch64eb
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_aarch64eb
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_aarch64eb
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_aarch64eb
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_aarch64eb
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_aarch64eb
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_aarch64eb
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_aarch64eb

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_arm
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_arm
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_arm
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_arm
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_arm
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_arm
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_arm
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_arm
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_arm
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_arm

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_armeb
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_armeb
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_armeb
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_armeb
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_armeb
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_armeb
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_armeb
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_armeb
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_armeb
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_armeb

View file

@ -2816,8 +2816,10 @@ symbols = (
'tcg_gen_dup32i_vec',
'tcg_gen_dup64i_vec',
'tcg_gen_dupi_vec',
'tcg_gen_dupm_vec',
'tcg_gen_dup_i32_vec',
'tcg_gen_dup_i64_vec',
'tcg_gen_dup_mem_vec',
'tcg_gen_eqv_i32',
'tcg_gen_eqv_i64',
'tcg_gen_eqv_vec',

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_m68k
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_m68k
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_m68k
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_m68k
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_m68k
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_m68k
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_m68k
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_m68k
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_m68k
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_m68k

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_mips
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_mips
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_mips
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_mips
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_mips
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_mips
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_mips
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_mips
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_mips
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_mips

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_mips64
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_mips64
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_mips64
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_mips64
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_mips64
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_mips64
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_mips64
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_mips64
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_mips64
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_mips64

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_mips64el
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_mips64el
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_mips64el
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_mips64el
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_mips64el
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_mips64el
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_mips64el
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_mips64el
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_mips64el
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_mips64el

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_mipsel
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_mipsel
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_mipsel
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_mipsel
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_mipsel
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_mipsel
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_mipsel
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_mipsel
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_mipsel
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_mipsel

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_powerpc
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_powerpc
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_powerpc
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_powerpc
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_powerpc
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_powerpc
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_powerpc
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_powerpc
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_powerpc
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_powerpc

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_riscv32
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_riscv32
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_riscv32
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_riscv32
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_riscv32
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_riscv32
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_riscv32
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_riscv32
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_riscv32
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_riscv32

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_riscv64
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_riscv64
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_riscv64
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_riscv64
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_riscv64
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_riscv64
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_riscv64
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_riscv64
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_riscv64
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_riscv64

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_sparc
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_sparc
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_sparc
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_sparc
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_sparc
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_sparc
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_sparc
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_sparc
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_sparc
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_sparc

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_sparc64
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_sparc64
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_sparc64
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_sparc64
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_sparc64
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_sparc64
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_sparc64
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_sparc64
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_sparc64
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_sparc64

View file

@ -2173,6 +2173,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_st_vec:
tcg_out_st(s, type, a0, a1, a2);
break;
case INDEX_op_dupm_vec:
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
case INDEX_op_add_vec:
tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
break;
@ -2505,6 +2508,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
return &w_w;
case INDEX_op_ld_vec:
case INDEX_op_st_vec:
case INDEX_op_dupm_vec:
return &w_r;
case INDEX_op_dup_vec:
return &w_wr;

View file

@ -2857,6 +2857,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_st_vec:
tcg_out_st(s, type, a0, a1, a2);
break;
case INDEX_op_dupm_vec:
tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
break;
case INDEX_op_x86_shufps_vec:
insn = OPC_SHUFPS;
@ -3144,6 +3147,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_ld_vec:
case INDEX_op_st_vec:
case INDEX_op_dupm_vec:
return &x_r;
case INDEX_op_add_vec:

View file

@ -396,6 +396,41 @@ static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece,
return 0;
}
static void do_dup_store(TCGContext *s, TCGType type, uint32_t dofs, uint32_t oprsz,
uint32_t maxsz, TCGv_vec t_vec)
{
uint32_t i = 0;
switch (type) {
case TCG_TYPE_V256:
/*
* Recall that ARM SVE allows vector sizes that are not a
* power of 2, but always a multiple of 16. The intent is
* that e.g. size == 80 would be expanded with 2x32 + 1x16.
*/
for (; i + 32 <= oprsz; i += 32) {
tcg_gen_stl_vec(s, t_vec, s->cpu_env, dofs + i, TCG_TYPE_V256);
}
/* fallthru */
case TCG_TYPE_V128:
for (; i + 16 <= oprsz; i += 16) {
tcg_gen_stl_vec(s, t_vec, s->cpu_env, dofs + i, TCG_TYPE_V128);
}
break;
case TCG_TYPE_V64:
for (; i < oprsz; i += 8) {
tcg_gen_stl_vec(s, t_vec, s->cpu_env, dofs + i, TCG_TYPE_V64);
}
break;
default:
g_assert_not_reached();
}
if (oprsz < maxsz) {
expand_clr(s, dofs + oprsz, maxsz - oprsz);
}
}
/* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C.
* Only one of IN_32 or IN_64 may be set;
* IN_C is used if IN_32 and IN_64 are unset.
@ -435,49 +470,11 @@ static void do_dup(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t oprsz,
} else if (in_64) {
tcg_gen_dup_i64_vec(s, vece, t_vec, in_64);
} else {
switch (vece) {
case MO_8:
tcg_gen_dup8i_vec(s, t_vec, in_c);
break;
case MO_16:
tcg_gen_dup16i_vec(s, t_vec, in_c);
break;
case MO_32:
tcg_gen_dup32i_vec(s, t_vec, in_c);
break;
default:
tcg_gen_dup64i_vec(s, t_vec, in_c);
break;
}
tcg_gen_dupi_vec(s, vece, t_vec, in_c);
}
i = 0;
switch (type) {
case TCG_TYPE_V256:
/* Recall that ARM SVE allows vector sizes that are not a
* power of 2, but always a multiple of 16. The intent is
* that e.g. size == 80 would be expanded with 2x32 + 1x16.
*/
for (; i + 32 <= oprsz; i += 32) {
tcg_gen_stl_vec(s, t_vec, s->cpu_env, dofs + i, TCG_TYPE_V256);
}
/* fallthru */
case TCG_TYPE_V128:
for (; i + 16 <= oprsz; i += 16) {
tcg_gen_stl_vec(s, t_vec, s->cpu_env, dofs + i, TCG_TYPE_V128);
}
break;
case TCG_TYPE_V64:
for (; i < oprsz; i += 8) {
tcg_gen_stl_vec(s, t_vec, s->cpu_env, dofs + i, TCG_TYPE_V64);
}
break;
default:
g_assert_not_reached();
}
do_dup_store(s, type, dofs, oprsz, maxsz, t_vec);
tcg_temp_free_vec(s, t_vec);
goto done;
return;
}
/* Otherwise, inline with an integer type, unless "large". */
@ -1451,6 +1448,16 @@ void tcg_gen_gvec_dup_i64(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t
void tcg_gen_gvec_dup_mem(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t oprsz, uint32_t maxsz)
{
if (vece <= MO_64) {
TCGType type = choose_vector_type(0, vece, oprsz, 0);
if (type != 0) {
TCGv_vec t_vec = tcg_temp_new_vec(s, type);
tcg_gen_dup_mem_vec(s, vece, t_vec, s->cpu_env, aofs);
do_dup_store(s, type, dofs, oprsz, maxsz, t_vec);
tcg_temp_free_vec(s, t_vec);
return;
}
}
if (vece <= MO_32) {
TCGv_i32 in = tcg_temp_new_i32(s);
switch (vece) {

View file

@ -279,6 +279,17 @@ void tcg_gen_dup_i32_vec(TCGContext *s, unsigned vece, TCGv_vec r, TCGv_i32 a)
vec_gen_2(s, INDEX_op_dup_vec, type, vece, ri, ai);
}
void tcg_gen_dup_mem_vec(TCGContext *s, unsigned vece, TCGv_vec r, TCGv_ptr b,
tcg_target_long ofs)
{
TCGArg ri = tcgv_vec_arg(s, r);
TCGArg bi = tcgv_ptr_arg(s, b);
TCGTemp *rt = arg_temp(ri);
TCGType type = rt->base_type;
vec_gen_3(s, INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
}
static void vec_gen_ldst(TCGContext *s, TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
{
TCGArg ri = tcgv_vec_arg(s, r);

View file

@ -967,6 +967,7 @@ void tcg_gen_atomic_umax_fetch_i64(TCGContext *, TCGv_i64, TCGv, TCGv_i64, TCGAr
void tcg_gen_mov_vec(TCGContext *, TCGv_vec, TCGv_vec);
void tcg_gen_dup_i32_vec(TCGContext *, unsigned vece, TCGv_vec, TCGv_i32);
void tcg_gen_dup_i64_vec(TCGContext *, unsigned vece, TCGv_vec, TCGv_i64);
void tcg_gen_dup_mem_vec(TCGContext *, unsigned vece, TCGv_vec, TCGv_ptr, tcg_target_long);
void tcg_gen_dup8i_vec(TCGContext *, TCGv_vec, uint32_t);
void tcg_gen_dup16i_vec(TCGContext *, TCGv_vec, uint32_t);
void tcg_gen_dup32i_vec(TCGContext *, TCGv_vec, uint32_t);

View file

@ -224,6 +224,7 @@ DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
DEF(ld_vec, 1, 1, 1, IMPLVEC)
DEF(st_vec, 0, 2, 1, IMPLVEC)
DEF(dupm_vec, 1, 1, 1, IMPLVEC)
DEF(add_vec, 1, 2, 0, IMPLVEC)
DEF(sub_vec, 1, 2, 0, IMPLVEC)

View file

@ -1042,6 +1042,7 @@ bool tcg_op_supported(TCGOpcode op)
case INDEX_op_mov_vec:
case INDEX_op_dup_vec:
case INDEX_op_dupi_vec:
case INDEX_op_dupm_vec:
case INDEX_op_ld_vec:
case INDEX_op_st_vec:
case INDEX_op_add_vec:

View file

@ -1175,7 +1175,7 @@ static inline TCGv_ptr tcg_temp_local_new_ptr(TCGContext *s)
}
// UNICORN: Added
#define TCG_OP_DEFS_TABLE_SIZE 181
#define TCG_OP_DEFS_TABLE_SIZE 182
extern const TCGOpDef tcg_op_defs_org[TCG_OP_DEFS_TABLE_SIZE];
typedef struct TCGTargetOpDef {

View file

@ -2810,8 +2810,10 @@
#define tcg_gen_dup32i_vec tcg_gen_dup32i_vec_x86_64
#define tcg_gen_dup64i_vec tcg_gen_dup64i_vec_x86_64
#define tcg_gen_dupi_vec tcg_gen_dupi_vec_x86_64
#define tcg_gen_dupm_vec tcg_gen_dupm_vec_x86_64
#define tcg_gen_dup_i32_vec tcg_gen_dup_i32_vec_x86_64
#define tcg_gen_dup_i64_vec tcg_gen_dup_i64_vec_x86_64
#define tcg_gen_dup_mem_vec tcg_gen_dup_mem_vec_x86_64
#define tcg_gen_eqv_i32 tcg_gen_eqv_i32_x86_64
#define tcg_gen_eqv_i64 tcg_gen_eqv_i64_x86_64
#define tcg_gen_eqv_vec tcg_gen_eqv_vec_x86_64