From 6301f9acaa642589b30bf2090547f2ec2cd55e8d Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 25 Feb 2021 12:24:20 -0500 Subject: [PATCH] target/arm: Convert Neon VCVT f16/f32 insns to decodetree Convert the Neon insns in the 2-reg-misc group which are VCVT between f32 and f16 to decodetree. Backports commit 654a517355e249435505ae5ff14a7520410cf7a4 from qemu --- qemu/target/arm/neon-dp.decode | 3 + qemu/target/arm/translate-neon.inc.c | 98 ++++++++++++++++++++++++++++ qemu/target/arm/translate.c | 65 +----------------- 3 files changed, 104 insertions(+), 62 deletions(-) diff --git a/qemu/target/arm/neon-dp.decode b/qemu/target/arm/neon-dp.decode index 0102aa72..8174f2f9 100644 --- a/qemu/target/arm/neon-dp.decode +++ b/qemu/target/arm/neon-dp.decode @@ -461,6 +461,9 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm VQMOVN_U 1111 001 11 . 11 .. 10 .... 0 0101 1 . 0 .... @2misc_q0 VSHLL 1111 001 11 . 11 .. 10 .... 0 0110 0 . 0 .... @2misc_q0 + + VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0 + VCVT_F32_F16 1111 001 11 . 11 .. 10 .... 0 1110 0 . 0 .... @2misc_q0 ] # Subgroup for size != 0b11 diff --git a/qemu/target/arm/translate-neon.inc.c b/qemu/target/arm/translate-neon.inc.c index f13ce1f9..de5829c0 100644 --- a/qemu/target/arm/translate-neon.inc.c +++ b/qemu/target/arm/translate-neon.inc.c @@ -3396,3 +3396,101 @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a) tcg_temp_free_i32(tcg_ctx, rm1); return true; } + +static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr fpst; + TCGv_i32 ahp, tmp, tmp2, tmp3; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !dc_isar_feature(aa32_fp16_spconv, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vm & 1) || (a->size != 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(s, true); + ahp = get_ahp_flag(s); + tmp = neon_load_reg(s, a->vm, 0); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); + tmp2 = neon_load_reg(s, a->vm, 1); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp2, tmp2, fpst, ahp); + tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16); + tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + tmp = neon_load_reg(s, a->vm, 2); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); + tmp3 = neon_load_reg(s, a->vm, 3); + neon_store_reg(s, a->vd, 0, tmp2); + gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp3, tmp3, fpst, ahp); + tcg_gen_shli_i32(tcg_ctx, tmp3, tmp3, 16); + tcg_gen_or_i32(tcg_ctx, tmp3, tmp3, tmp); + neon_store_reg(s, a->vd, 1, tmp3); + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i32(tcg_ctx, ahp); + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} + +static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr fpst; + TCGv_i32 ahp, tmp, tmp2, tmp3; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON) || + !dc_isar_feature(aa32_fp16_spconv, s)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if ((a->vd & 1) || (a->size != 1)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + fpst = get_fpstatus_ptr(s, true); + ahp = get_ahp_flag(s); + tmp3 = tcg_temp_new_i32(tcg_ctx); + tmp = neon_load_reg(s, a->vm, 0); + tmp2 = neon_load_reg(s, a->vm, 1); + tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); + neon_store_reg(s, a->vd, 0, tmp3); + tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp, tmp, fpst, ahp); + neon_store_reg(s, a->vd, 1, tmp); + tmp3 = tcg_temp_new_i32(tcg_ctx); + tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp2); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); + neon_store_reg(s, a->vd, 2, tmp3); + tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 16); + gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp2, tmp2, fpst, ahp); + neon_store_reg(s, a->vd, 3, tmp2); + tcg_temp_free_i32(tcg_ctx, ahp); + tcg_temp_free_ptr(tcg_ctx, fpst); + + return true; +} diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 0518c574..89597ddc 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -4965,7 +4965,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) int pass; int u; int vec_size; - TCGv_i32 tmp, tmp2, tmp3; + TCGv_i32 tmp, tmp2; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return 1; @@ -5032,6 +5032,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_2RM_VZIP: case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN: case NEON_2RM_VSHLL: + case NEON_2RM_VCVT_F16_F32: + case NEON_2RM_VCVT_F32_F16: /* handled by decodetree */ return 1; case NEON_2RM_VTRN: @@ -5047,67 +5049,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) goto elementwise; } break; - case NEON_2RM_VCVT_F16_F32: - { - TCGv_ptr fpst; - TCGv_i32 ahp; - - if (!dc_isar_feature(aa32_fp16_spconv, s) || - q || (rm & 1)) { - return 1; - } - fpst = get_fpstatus_ptr(tcg_ctx, true); - ahp = get_ahp_flag(s); - tmp = neon_load_reg(s, rm, 0); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); - tmp2 = neon_load_reg(s, rm, 1); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp2, tmp2, fpst, ahp); - tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16); - tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp); - tcg_temp_free_i32(tcg_ctx, tmp); - tmp = neon_load_reg(s, rm, 2); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); - tmp3 = neon_load_reg(s, rm, 3); - neon_store_reg(s, rd, 0, tmp2); - gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp3, tmp3, fpst, ahp); - tcg_gen_shli_i32(tcg_ctx, tmp3, tmp3, 16); - tcg_gen_or_i32(tcg_ctx, tmp3, tmp3, tmp); - neon_store_reg(s, rd, 1, tmp3); - tcg_temp_free_i32(tcg_ctx, tmp); - tcg_temp_free_i32(tcg_ctx, ahp); - tcg_temp_free_ptr(tcg_ctx, fpst); - break; - } - case NEON_2RM_VCVT_F32_F16: - { - TCGv_ptr fpst; - TCGv_i32 ahp; - if (!dc_isar_feature(aa32_fp16_spconv, s) || - q || (rd & 1)) { - return 1; - } - fpst = get_fpstatus_ptr(tcg_ctx, true); - ahp = get_ahp_flag(s); - tmp3 = tcg_temp_new_i32(tcg_ctx); - tmp = neon_load_reg(s, rm, 0); - tmp2 = neon_load_reg(s, rm, 1); - tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); - neon_store_reg(s, rd, 0, tmp3); - tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp, tmp, fpst, ahp); - neon_store_reg(s, rd, 1, tmp); - tmp3 = tcg_temp_new_i32(tcg_ctx); - tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp2); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); - neon_store_reg(s, rd, 2, tmp3); - tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 16); - gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp2, tmp2, fpst, ahp); - neon_store_reg(s, rd, 3, tmp2); - tcg_temp_free_i32(tcg_ctx, ahp); - tcg_temp_free_ptr(tcg_ctx, fpst); - break; - } case NEON_2RM_AESE: case NEON_2RM_AESMC: if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) { return 1;