target/arm: Convert Neon 2-reg-misc VRINT insns to decodetree

Convert the Neon 2-reg-misc VRINT insns to decodetree.
Giving these insns their own do_vrint() function allows us
to change the rounding mode just once at the start and end
rather than doing it for every element in the vector.

Backports commit 128123ea34e9e6afe4842aefcb9cf84b9642ac22 from qemu
This commit is contained in:
Peter Maydell 2021-02-25 13:02:22 -05:00 committed by Lioncash
parent 3eddb77327
commit 7e705fdc8c
3 changed files with 75 additions and 26 deletions

View file

@ -503,11 +503,19 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
SHA1SU1 1111 001 11 . 11 .. 10 .... 0 0111 0 . 0 .... @2misc_q1 SHA1SU1 1111 001 11 . 11 .. 10 .... 0 0111 0 . 0 .... @2misc_q1
SHA256SU0 1111 001 11 . 11 .. 10 .... 0 0111 1 . 0 .... @2misc_q1 SHA256SU0 1111 001 11 . 11 .. 10 .... 0 0111 1 . 0 .... @2misc_q1
VRINTN 1111 001 11 . 11 .. 10 .... 0 1000 . . 0 .... @2misc
VRINTX 1111 001 11 . 11 .. 10 .... 0 1001 . . 0 .... @2misc VRINTX 1111 001 11 . 11 .. 10 .... 0 1001 . . 0 .... @2misc
VRINTA 1111 001 11 . 11 .. 10 .... 0 1010 . . 0 .... @2misc
VRINTZ 1111 001 11 . 11 .. 10 .... 0 1011 . . 0 .... @2misc
VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0 VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0
VRINTM 1111 001 11 . 11 .. 10 .... 0 1101 . . 0 .... @2misc
VCVT_F32_F16 1111 001 11 . 11 .. 10 .... 0 1110 0 . 0 .... @2misc_q0 VCVT_F32_F16 1111 001 11 . 11 .. 10 .... 0 1110 0 . 0 .... @2misc_q0
VRINTP 1111 001 11 . 11 .. 10 .... 0 1111 . . 0 .... @2misc
VRECPE 1111 001 11 . 11 .. 11 .... 0 1000 . . 0 .... @2misc VRECPE 1111 001 11 . 11 .. 11 .... 0 1000 . . 0 .... @2misc
VRSQRTE 1111 001 11 . 11 .. 11 .... 0 1001 . . 0 .... @2misc VRSQRTE 1111 001 11 . 11 .. 11 .... 0 1001 . . 0 .... @2misc
VRECPE_F 1111 001 11 . 11 .. 11 .... 0 1010 . . 0 .... @2misc VRECPE_F 1111 001 11 . 11 .. 11 .... 0 1010 . . 0 .... @2misc

View file

@ -3843,3 +3843,65 @@ DO_FP_CMP0(VCGE0_F, gen_helper_neon_cge_f32, FWD)
DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD) DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD)
DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV) DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV)
DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV) DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV)
static bool do_vrint(DisasContext *s, arg_2misc *a, int rmode)
{
/*
* Handle a VRINT* operation by iterating 32 bits at a time,
* with a specified rounding mode in operation.
*/
int pass;
TCGv_ptr fpst;
TCGv_i32 tcg_rmode;
TCGContext *tcg_ctx = s->uc->tcg_ctx;
if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
!arm_dc_feature(s, ARM_FEATURE_V8)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}
if (a->size != 2) {
/* TODO: FP16 will be the size == 1 case */
return false;
}
if ((a->vd | a->vm) & a->q) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
fpst = get_fpstatus_ptr(tcg_ctx, 1);
tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
TCGv_i32 tmp = neon_load_reg(s, a->vm, pass);
gen_helper_rints(tcg_ctx, tmp, tmp, fpst);
neon_store_reg(s, a->vd, pass, tmp);
}
gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env);
tcg_temp_free_i32(tcg_ctx, tcg_rmode);
tcg_temp_free_ptr(tcg_ctx, fpst);
return true;
}
#define DO_VRINT(INSN, RMODE) \
static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
{ \
return do_vrint(s, a, RMODE); \
}
DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
DO_VRINT(VRINTZ, FPROUNDING_ZERO)
DO_VRINT(VRINTM, FPROUNDING_NEGINF)
DO_VRINT(VRINTP, FPROUNDING_POSINF)

View file

@ -5061,6 +5061,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case NEON_2RM_VCEQ0_F: case NEON_2RM_VCEQ0_F:
case NEON_2RM_VCLE0_F: case NEON_2RM_VCLE0_F:
case NEON_2RM_VCLT0_F: case NEON_2RM_VCLT0_F:
case NEON_2RM_VRINTN:
case NEON_2RM_VRINTA:
case NEON_2RM_VRINTM:
case NEON_2RM_VRINTP:
case NEON_2RM_VRINTZ:
/* handled by decodetree */ /* handled by decodetree */
return 1; return 1;
case NEON_2RM_VTRN: case NEON_2RM_VTRN:
@ -5095,32 +5100,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
} }
neon_store_reg(s, rm, pass, tmp2); neon_store_reg(s, rm, pass, tmp2);
break; break;
case NEON_2RM_VRINTN:
case NEON_2RM_VRINTA:
case NEON_2RM_VRINTM:
case NEON_2RM_VRINTP:
case NEON_2RM_VRINTZ:
{
TCGv_i32 tcg_rmode;
TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
int rmode;
if (op == NEON_2RM_VRINTZ) {
rmode = FPROUNDING_ZERO;
} else {
rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
}
tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode,
tcg_ctx->cpu_env);
gen_helper_rints(tcg_ctx, tmp, tmp, fpstatus);
gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode,
tcg_ctx->cpu_env);
tcg_temp_free_ptr(tcg_ctx, fpstatus);
tcg_temp_free_i32(tcg_ctx, tcg_rmode);
break;
}
case NEON_2RM_VCVTAU: case NEON_2RM_VCVTAU:
case NEON_2RM_VCVTAS: case NEON_2RM_VCVTAS:
case NEON_2RM_VCVTNU: case NEON_2RM_VCVTNU: