target/arm: Convert Neon 2-reg-misc VREV32 and VREV16 to decodetree

Convert the VREV32 and VREV16 insns in the Neon 2-reg-misc group
to decodetree.

Backports commit 8966808205b59d6c196b380b638475bcd1657ef4 from qemu
This commit is contained in:
Peter Maydell 2021-02-25 12:42:43 -05:00 committed by Lioncash
parent db1e503708
commit 88f8111500
4 changed files with 74 additions and 27 deletions

View file

@ -445,6 +445,8 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
&2misc vm=%vm_dp vd=%vd_dp q=1
VREV64 1111 001 11 . 11 .. 00 .... 0 0000 . . 0 .... @2misc
VREV32 1111 001 11 . 11 .. 00 .... 0 0001 . . 0 .... @2misc
VREV16 1111 001 11 . 11 .. 00 .... 0 0010 . . 0 .... @2misc
VPADDL_S 1111 001 11 . 11 .. 00 .... 0 0100 . . 0 .... @2misc
VPADDL_U 1111 001 11 . 11 .. 00 .... 0 0101 . . 0 .... @2misc

View file

@ -3045,7 +3045,7 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
tcg_gen_bswap32_i32(tcg_ctx, tmp[half], tmp[half]);
break;
case 1:
gen_swap_half(s, tmp[half], tmp[half]);
gen_swap_half(tcg_ctx, tmp[half], tmp[half]);
break;
case 2:
break;
@ -3592,3 +3592,59 @@ DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
{
int pass;
TCGContext *tcg_ctx = s->uc->tcg_ctx;
/* Handle a 2-reg-misc operation by iterating 32 bits at a time */
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}
if (!fn) {
return false;
}
if ((a->vd | a->vm) & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
TCGv_i32 tmp = neon_load_reg(s, a->vm, pass);
fn(tcg_ctx, tmp, tmp);
neon_store_reg(s, a->vd, pass, tmp);
}
return true;
}
static bool trans_VREV32(DisasContext *s, arg_2misc *a)
{
static NeonGenOneOpFn * const fn[] = {
tcg_gen_bswap32_i32,
gen_swap_half,
NULL,
NULL,
};
return do_2misc(s, a, fn[a->size]);
}
static bool trans_VREV16(DisasContext *s, arg_2misc *a)
{
if (a->size != 0) {
return false;
}
return do_2misc(s, a, gen_rev16);
}

View file

@ -362,9 +362,8 @@ static void gen_smul_dual(DisasContext *s, TCGv_i32 a, TCGv_i32 b)
}
/* Byteswap each halfword. */
static void gen_rev16(DisasContext *s, TCGv_i32 dest, TCGv_i32 var)
static void gen_rev16(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_i32 var)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
TCGv_i32 mask = tcg_const_i32(tcg_ctx, 0x00ff00ff);
tcg_gen_shri_i32(tcg_ctx, tmp, var, 8);
@ -377,19 +376,17 @@ static void gen_rev16(DisasContext *s, TCGv_i32 dest, TCGv_i32 var)
}
/* Byteswap low halfword and sign extend. */
static void gen_revsh(DisasContext *s, TCGv_i32 dest, TCGv_i32 var)
static void gen_revsh(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_i32 var)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
tcg_gen_ext16u_i32(tcg_ctx, var, var);
tcg_gen_bswap16_i32(tcg_ctx, var, var);
tcg_gen_ext16s_i32(tcg_ctx, dest, var);
}
/* Swap low and high halfwords. */
static void gen_swap_half(DisasContext *s, TCGv_i32 dest, TCGv_i32 var)
static void gen_swap_half(TCGContext *s, TCGv_i32 dest, TCGv_i32 var)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
tcg_gen_rotri_i32(tcg_ctx, dest, var, 16);
tcg_gen_rotri_i32(s, dest, var, 16);
}
/* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
@ -5041,6 +5038,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case NEON_2RM_AESE: case NEON_2RM_AESMC:
case NEON_2RM_SHA1H:
case NEON_2RM_SHA1SU1:
case NEON_2RM_VREV32:
case NEON_2RM_VREV16:
/* handled by decodetree */
return 1;
case NEON_2RM_VTRN:
@ -5062,16 +5061,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
for (pass = 0; pass < (q ? 4 : 2); pass++) {
tmp = neon_load_reg(s, rm, pass);
switch (op) {
case NEON_2RM_VREV32:
switch (size) {
case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp, tmp); break;
case 1: gen_swap_half(s, tmp, tmp); break;
default: abort();
}
break;
case NEON_2RM_VREV16:
gen_rev16(s, tmp, tmp);
break;
case NEON_2RM_VCLS:
switch (size) {
case 0: gen_helper_neon_cls_s8(tcg_ctx, tmp, tmp); break;
@ -6087,10 +6076,9 @@ static void gen_ext16u_i32(DisasContext *s, TCGv_i32 dest, TCGv_i32 src)
tcg_gen_ext16u_i32(tcg_ctx, dest, src);
}
static void gen_bswap32_i32(DisasContext *s, TCGv_i32 dest, TCGv_i32 src)
static void gen_bswap32_i32(TCGContext *s, TCGv_i32 dest, TCGv_i32 src)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
tcg_gen_bswap32_i32(tcg_ctx, dest, src);
tcg_gen_bswap32_i32(s, dest, src);
}
static void gen_ssat_dectree(DisasContext *s, TCGv_i32 dest, TCGv_env env, TCGv_i32 a, TCGv_i32 b)
@ -6129,9 +6117,8 @@ static void gen_uxtb16_dectree(DisasContext *s, TCGv_i32 dest, TCGv_i32 src)
gen_helper_uxtb16(tcg_ctx, dest, src);
}
static void gen_rbit_dectree(DisasContext* s, TCGv_i32 dest, TCGv_i32 src)
static void gen_rbit_dectree(TCGContext* tcg_ctx, TCGv_i32 dest, TCGv_i32 src)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
gen_helper_rbit(tcg_ctx, dest, src);
}
@ -8250,12 +8237,13 @@ static bool trans_SEL(DisasContext *s, arg_rrr *a)
}
static bool op_rr(DisasContext *s, arg_rr *a,
void (*gen)(DisasContext* s, TCGv_i32, TCGv_i32))
void (*gen)(TCGContext* s, TCGv_i32, TCGv_i32))
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
TCGv_i32 tmp;
tmp = load_reg(s, a->rm);
gen(s, tmp, tmp);
gen(tcg_ctx, tmp, tmp);
store_reg(s, a->rd, tmp);
return true;
}
@ -8308,7 +8296,7 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
t1 = load_reg(s, a->rn);
t2 = load_reg(s, a->rm);
if (m_swap) {
gen_swap_half(s, t2, t2);
gen_swap_half(tcg_ctx, t2, t2);
}
gen_smul_dual(s, t1, t2);
@ -8367,7 +8355,7 @@ static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
t1 = load_reg(s, a->rn);
t2 = load_reg(s, a->rm);
if (m_swap) {
gen_swap_half(s, t2, t2);
gen_swap_half(tcg_ctx, t2, t2);
}
gen_smul_dual(s, t1, t2);

View file

@ -370,6 +370,7 @@ typedef void GVecGen4Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t);
/* Function prototype for gen_ functions for calling Neon helpers */
typedef void NeonGenOneOpFn(TCGContext *t, TCGv_i32, TCGv_i32);
typedef void NeonGenOneOpEnvFn(TCGContext *t, TCGv_i32, TCGv_ptr, TCGv_i32);
typedef void NeonGenTwoOpFn(TCGContext *t, TCGv_i32, TCGv_i32, TCGv_i32);
typedef void NeonGenTwoOpEnvFn(TCGContext *t, TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);