From 7e705fdc8cadc4007f2558b65e2cada7a9f52c5a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 25 Feb 2021 13:02:22 -0500 Subject: [PATCH] target/arm: Convert Neon 2-reg-misc VRINT insns to decodetree Convert the Neon 2-reg-misc VRINT insns to decodetree. Giving these insns their own do_vrint() function allows us to change the rounding mode just once at the start and end rather than doing it for every element in the vector. Backports commit 128123ea34e9e6afe4842aefcb9cf84b9642ac22 from qemu --- qemu/target/arm/neon-dp.decode | 8 ++++ qemu/target/arm/translate-neon.inc.c | 62 ++++++++++++++++++++++++++++ qemu/target/arm/translate.c | 31 +++----------- 3 files changed, 75 insertions(+), 26 deletions(-) diff --git a/qemu/target/arm/neon-dp.decode b/qemu/target/arm/neon-dp.decode index c9acd00f..e0717c7e 100644 --- a/qemu/target/arm/neon-dp.decode +++ b/qemu/target/arm/neon-dp.decode @@ -503,11 +503,19 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm SHA1SU1 1111 001 11 . 11 .. 10 .... 0 0111 0 . 0 .... @2misc_q1 SHA256SU0 1111 001 11 . 11 .. 10 .... 0 0111 1 . 0 .... @2misc_q1 + VRINTN 1111 001 11 . 11 .. 10 .... 0 1000 . . 0 .... @2misc VRINTX 1111 001 11 . 11 .. 10 .... 0 1001 . . 0 .... @2misc + VRINTA 1111 001 11 . 11 .. 10 .... 0 1010 . . 0 .... @2misc + VRINTZ 1111 001 11 . 11 .. 10 .... 0 1011 . . 0 .... @2misc VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0 + + VRINTM 1111 001 11 . 11 .. 10 .... 0 1101 . . 0 .... @2misc + VCVT_F32_F16 1111 001 11 . 11 .. 10 .... 0 1110 0 . 0 .... @2misc_q0 + VRINTP 1111 001 11 . 11 .. 10 .... 0 1111 . . 0 .... @2misc + VRECPE 1111 001 11 . 11 .. 11 .... 0 1000 . . 0 .... @2misc VRSQRTE 1111 001 11 . 11 .. 11 .... 0 1001 . . 0 .... @2misc VRECPE_F 1111 001 11 . 11 .. 11 .... 0 1010 . . 0 .... @2misc diff --git a/qemu/target/arm/translate-neon.inc.c b/qemu/target/arm/translate-neon.inc.c index e732b5d9..3a96ea57 100644 --- a/qemu/target/arm/translate-neon.inc.c +++ b/qemu/target/arm/translate-neon.inc.c @@ -3843,3 +3843,65 @@ DO_FP_CMP0(VCGE0_F, gen_helper_neon_cge_f32, FWD) DO_FP_CMP0(VCEQ0_F, gen_helper_neon_ceq_f32, FWD) DO_FP_CMP0(VCLE0_F, gen_helper_neon_cge_f32, REV) DO_FP_CMP0(VCLT0_F, gen_helper_neon_cgt_f32, REV) + +static bool do_vrint(DisasContext *s, arg_2misc *a, int rmode) +{ + /* + * Handle a VRINT* operation by iterating 32 bits at a time, + * with a specified rounding mode in operation. + */ + int pass; + TCGv_ptr fpst; + TCGv_i32 tcg_rmode; + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !arm_dc_feature(s, ARM_FEATURE_V8)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (a->size != 2) { + /* TODO: FP16 will be the size == 1 case */ + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(tcg_ctx, 1); + tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode)); + gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env); + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(s, a->vm, pass); + gen_helper_rints(tcg_ctx, tmp, tmp, fpst); + neon_store_reg(s, a->vd, pass, tmp); + } + gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_ctx->cpu_env); + tcg_temp_free_i32(tcg_ctx, tcg_rmode); + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +#define DO_VRINT(INSN, RMODE) \ + static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ + { \ + return do_vrint(s, a, RMODE); \ + } + +DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) +DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) +DO_VRINT(VRINTZ, FPROUNDING_ZERO) +DO_VRINT(VRINTM, FPROUNDING_NEGINF) +DO_VRINT(VRINTP, FPROUNDING_POSINF) diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 8c2fc425..373b6d8d 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -5061,6 +5061,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_2RM_VCEQ0_F: case NEON_2RM_VCLE0_F: case NEON_2RM_VCLT0_F: + case NEON_2RM_VRINTN: + case NEON_2RM_VRINTA: + case NEON_2RM_VRINTM: + case NEON_2RM_VRINTP: + case NEON_2RM_VRINTZ: /* handled by decodetree */ return 1; case NEON_2RM_VTRN: @@ -5095,32 +5100,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } neon_store_reg(s, rm, pass, tmp2); break; - case NEON_2RM_VRINTN: - case NEON_2RM_VRINTA: - case NEON_2RM_VRINTM: - case NEON_2RM_VRINTP: - case NEON_2RM_VRINTZ: - { - TCGv_i32 tcg_rmode; - TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1); - int rmode; - - if (op == NEON_2RM_VRINTZ) { - rmode = FPROUNDING_ZERO; - } else { - rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1]; - } - - tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode)); - gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, - tcg_ctx->cpu_env); - gen_helper_rints(tcg_ctx, tmp, tmp, fpstatus); - gen_helper_set_neon_rmode(tcg_ctx, tcg_rmode, tcg_rmode, - tcg_ctx->cpu_env); - tcg_temp_free_ptr(tcg_ctx, fpstatus); - tcg_temp_free_i32(tcg_ctx, tcg_rmode); - break; - } case NEON_2RM_VCVTAU: case NEON_2RM_VCVTAS: case NEON_2RM_VCVTNU: