From 90aa9647e0279deb6357c83e1296a23c3a881b2c Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 1 Mar 2021 16:15:03 -0500 Subject: [PATCH] target/arm: Implement VFP fp16 VRINT* Implement the fp16 version of the VFP VRINT* insns. Backports 0a6f4b4cb338665b81ad824d9a6868932461b7f7 --- qemu/aarch64.h | 2 + qemu/aarch64eb.h | 2 + qemu/arm.h | 2 + qemu/armeb.h | 2 + qemu/header_gen.py | 2 + qemu/m68k.h | 2 + qemu/mips.h | 2 + qemu/mips64.h | 2 + qemu/mips64el.h | 2 + qemu/mipsel.h | 2 + qemu/powerpc.h | 2 + qemu/riscv32.h | 2 + qemu/riscv64.h | 2 + qemu/sparc.h | 2 + qemu/sparc64.h | 2 + qemu/target/arm/helper.h | 2 + qemu/target/arm/translate-vfp.inc.c | 102 ++++++++++++++++++++++++++-- qemu/target/arm/vfp-uncond.decode | 6 +- qemu/target/arm/vfp.decode | 3 + qemu/target/arm/vfp_helper.c | 21 ++++++ qemu/x86_64.h | 2 + 21 files changed, 158 insertions(+), 8 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 94e976bb..dc8b9f0c 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_aarch64 #define helper_rintd helper_rintd_aarch64 #define helper_rintd_exact helper_rintd_exact_aarch64 +#define helper_rinth helper_rinth_aarch64 +#define helper_rinth_exact helper_rinth_exact_aarch64 #define helper_rints helper_rints_aarch64 #define helper_rints_exact helper_rints_exact_aarch64 #define helper_ror_cc helper_ror_cc_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 7ba3a9da..3a422b10 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_aarch64eb #define helper_rintd helper_rintd_aarch64eb #define helper_rintd_exact helper_rintd_exact_aarch64eb +#define helper_rinth helper_rinth_aarch64eb +#define helper_rinth_exact helper_rinth_exact_aarch64eb #define helper_rints helper_rints_aarch64eb #define helper_rints_exact helper_rints_exact_aarch64eb #define helper_ror_cc helper_ror_cc_aarch64eb diff --git a/qemu/arm.h b/qemu/arm.h index 657cd789..85fee87c 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_arm #define helper_rintd helper_rintd_arm #define helper_rintd_exact helper_rintd_exact_arm +#define helper_rinth helper_rinth_arm +#define helper_rinth_exact helper_rinth_exact_arm #define helper_rints helper_rints_arm #define helper_rints_exact helper_rints_exact_arm #define helper_ror_cc helper_ror_cc_arm diff --git a/qemu/armeb.h b/qemu/armeb.h index 7f8ccd1f..b5f80beb 100644 --- a/qemu/armeb.h +++ b/qemu/armeb.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_armeb #define helper_rintd helper_rintd_armeb #define helper_rintd_exact helper_rintd_exact_armeb +#define helper_rinth helper_rinth_armeb +#define helper_rinth_exact helper_rinth_exact_armeb #define helper_rints helper_rints_armeb #define helper_rints_exact helper_rints_exact_armeb #define helper_ror_cc helper_ror_cc_armeb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 7c21a517..54dbdc4e 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -1813,6 +1813,8 @@ symbols = ( 'helper_ret_stb_mmu', 'helper_rintd', 'helper_rintd_exact', + 'helper_rinth', + 'helper_rinth_exact', 'helper_rints', 'helper_rints_exact', 'helper_ror_cc', diff --git a/qemu/m68k.h b/qemu/m68k.h index 36bd261f..8a394307 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_m68k #define helper_rintd helper_rintd_m68k #define helper_rintd_exact helper_rintd_exact_m68k +#define helper_rinth helper_rinth_m68k +#define helper_rinth_exact helper_rinth_exact_m68k #define helper_rints helper_rints_m68k #define helper_rints_exact helper_rints_exact_m68k #define helper_ror_cc helper_ror_cc_m68k diff --git a/qemu/mips.h b/qemu/mips.h index 9685b1f8..1ba6b8f8 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_mips #define helper_rintd helper_rintd_mips #define helper_rintd_exact helper_rintd_exact_mips +#define helper_rinth helper_rinth_mips +#define helper_rinth_exact helper_rinth_exact_mips #define helper_rints helper_rints_mips #define helper_rints_exact helper_rints_exact_mips #define helper_ror_cc helper_ror_cc_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index 12cb768f..5035e1a2 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_mips64 #define helper_rintd helper_rintd_mips64 #define helper_rintd_exact helper_rintd_exact_mips64 +#define helper_rinth helper_rinth_mips64 +#define helper_rinth_exact helper_rinth_exact_mips64 #define helper_rints helper_rints_mips64 #define helper_rints_exact helper_rints_exact_mips64 #define helper_ror_cc helper_ror_cc_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index d92e550a..c39ca800 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_mips64el #define helper_rintd helper_rintd_mips64el #define helper_rintd_exact helper_rintd_exact_mips64el +#define helper_rinth helper_rinth_mips64el +#define helper_rinth_exact helper_rinth_exact_mips64el #define helper_rints helper_rints_mips64el #define helper_rints_exact helper_rints_exact_mips64el #define helper_ror_cc helper_ror_cc_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index f10cbb85..35737e6e 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_mipsel #define helper_rintd helper_rintd_mipsel #define helper_rintd_exact helper_rintd_exact_mipsel +#define helper_rinth helper_rinth_mipsel +#define helper_rinth_exact helper_rinth_exact_mipsel #define helper_rints helper_rints_mipsel #define helper_rints_exact helper_rints_exact_mipsel #define helper_ror_cc helper_ror_cc_mipsel diff --git a/qemu/powerpc.h b/qemu/powerpc.h index 48e0c050..9bdd0ecb 100644 --- a/qemu/powerpc.h +++ b/qemu/powerpc.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_powerpc #define helper_rintd helper_rintd_powerpc #define helper_rintd_exact helper_rintd_exact_powerpc +#define helper_rinth helper_rinth_powerpc +#define helper_rinth_exact helper_rinth_exact_powerpc #define helper_rints helper_rints_powerpc #define helper_rints_exact helper_rints_exact_powerpc #define helper_ror_cc helper_ror_cc_powerpc diff --git a/qemu/riscv32.h b/qemu/riscv32.h index 294ed08a..11529ebb 100644 --- a/qemu/riscv32.h +++ b/qemu/riscv32.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_riscv32 #define helper_rintd helper_rintd_riscv32 #define helper_rintd_exact helper_rintd_exact_riscv32 +#define helper_rinth helper_rinth_riscv32 +#define helper_rinth_exact helper_rinth_exact_riscv32 #define helper_rints helper_rints_riscv32 #define helper_rints_exact helper_rints_exact_riscv32 #define helper_ror_cc helper_ror_cc_riscv32 diff --git a/qemu/riscv64.h b/qemu/riscv64.h index 1f478b50..5fcccaf0 100644 --- a/qemu/riscv64.h +++ b/qemu/riscv64.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_riscv64 #define helper_rintd helper_rintd_riscv64 #define helper_rintd_exact helper_rintd_exact_riscv64 +#define helper_rinth helper_rinth_riscv64 +#define helper_rinth_exact helper_rinth_exact_riscv64 #define helper_rints helper_rints_riscv64 #define helper_rints_exact helper_rints_exact_riscv64 #define helper_ror_cc helper_ror_cc_riscv64 diff --git a/qemu/sparc.h b/qemu/sparc.h index c5282e65..f1ff004c 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_sparc #define helper_rintd helper_rintd_sparc #define helper_rintd_exact helper_rintd_exact_sparc +#define helper_rinth helper_rinth_sparc +#define helper_rinth_exact helper_rinth_exact_sparc #define helper_rints helper_rints_sparc #define helper_rints_exact helper_rints_exact_sparc #define helper_ror_cc helper_ror_cc_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index dcc4ccda..5f830f4f 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_sparc64 #define helper_rintd helper_rintd_sparc64 #define helper_rintd_exact helper_rintd_exact_sparc64 +#define helper_rinth helper_rinth_sparc64 +#define helper_rinth_exact helper_rinth_exact_sparc64 #define helper_rints helper_rints_sparc64 #define helper_rints_exact helper_rints_exact_sparc64 #define helper_ror_cc helper_ror_cc_sparc64 diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h index 76bc5964..a40e3445 100644 --- a/qemu/target/arm/helper.h +++ b/qemu/target/arm/helper.h @@ -240,8 +240,10 @@ DEF_HELPER_3(shr_cc, i32, env, i32, i32) DEF_HELPER_3(sar_cc, i32, env, i32, i32) DEF_HELPER_3(ror_cc, i32, env, i32, i32) +DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr) diff --git a/qemu/target/arm/translate-vfp.inc.c b/qemu/target/arm/translate-vfp.inc.c index 5b9935f0..3092dfeb 100644 --- a/qemu/target/arm/translate-vfp.inc.c +++ b/qemu/target/arm/translate-vfp.inc.c @@ -345,7 +345,7 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; uint32_t rd, rm; - bool dp = a->dp; + int sz = a->sz; TCGv_ptr fpst; TCGv_i32 tcg_rmode; int rounding = fp_decode_rm[a->rm]; @@ -354,12 +354,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) return false; } - if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) { + if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { + return false; + } + + if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { return false; } /* UNDEF accesses to D16-D31 if they don't exist */ - if (dp && !dc_isar_feature(aa32_simd_r32, s) && + if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && ((a->vm | a->vd) & 0x10)) { return false; } @@ -371,12 +375,17 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) return true; } - fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR); + if (sz == 1) { + fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR_F16); + } else { + fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR); + } + tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rounding)); gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst); - if (dp) { + if (sz == 3) { TCGv_i64 tcg_op; TCGv_i64 tcg_res; tcg_op = tcg_temp_new_i64(tcg_ctx); @@ -392,7 +401,11 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) tcg_op = tcg_temp_new_i32(tcg_ctx); tcg_res = tcg_temp_new_i32(tcg_ctx); neon_load_reg32(s, tcg_op, rm); - gen_helper_rints(tcg_ctx, tcg_res, tcg_op, fpst); + if (sz == 1) { + gen_helper_rinth(tcg_ctx, tcg_res, tcg_op, fpst); + } else { + gen_helper_rints(tcg_ctx, tcg_res, tcg_op, fpst); + } neon_store_reg32(s, tcg_res, rd); tcg_temp_free_i32(tcg_ctx, tcg_op); tcg_temp_free_i32(tcg_ctx, tcg_res); @@ -2676,6 +2689,30 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) return true; } +static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr fpst; + TCGv_i32 tmp; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = tcg_temp_new_i32(tcg_ctx); + neon_load_reg32(s, tmp, a->vm); + fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR_F16); + gen_helper_rinth(tcg_ctx, tmp, tmp, fpst); + neon_store_reg32(s, tmp, a->vd); + tcg_temp_free_ptr(tcg_ctx, fpst); + tcg_temp_free_i32(tcg_ctx, tmp); + return true; +} + static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; @@ -2733,6 +2770,35 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) return true; } +static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr fpst; + TCGv_i32 tmp; + TCGv_i32 tcg_rmode; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = tcg_temp_new_i32(tcg_ctx); + neon_load_reg32(s, tmp, a->vm); + fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR_F16); + tcg_rmode = tcg_const_i32(tcg_ctx, float_round_to_zero); + gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst); + gen_helper_rinth(tcg_ctx, tmp, tmp, fpst); + gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst); + neon_store_reg32(s, tmp, a->vd); + tcg_temp_free_ptr(tcg_ctx, fpst); + tcg_temp_free_i32(tcg_ctx, tcg_rmode); + tcg_temp_free_i32(tcg_ctx, tmp); + return true; +} + static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; @@ -2800,6 +2866,30 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) return true; } +static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_ptr fpst; + TCGv_i32 tmp; + + if (!dc_isar_feature(aa32_fp16_arith, s)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = tcg_temp_new_i32(tcg_ctx); + neon_load_reg32(s, tmp, a->vm); + fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR_F16); + gen_helper_rinth_exact(tcg_ctx, tmp, tmp, fpst); + neon_store_reg32(s, tmp, a->vd); + tcg_temp_free_ptr(tcg_ctx, fpst); + tcg_temp_free_i32(tcg_ctx, tmp); + return true; +} + static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) { TCGContext *tcg_ctx = s->uc->tcg_ctx; diff --git a/qemu/target/arm/vfp-uncond.decode b/qemu/target/arm/vfp-uncond.decode index 8ba7b170..96155446 100644 --- a/qemu/target/arm/vfp-uncond.decode +++ b/qemu/target/arm/vfp-uncond.decode @@ -60,10 +60,12 @@ VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 .... @vfp_dnm_s VMAXNM_dp 1111 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d VMINNM_dp 1111 1110 1.00 .... .... 1011 .1.0 .... @vfp_dnm_d +VRINT 1111 1110 1.11 10 rm:2 .... 1001 01.0 .... \ + vm=%vm_sp vd=%vd_sp sz=1 VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \ - vm=%vm_sp vd=%vd_sp dp=0 + vm=%vm_sp vd=%vd_sp sz=2 VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \ - vm=%vm_dp vd=%vd_dp dp=1 + vm=%vm_dp vd=%vd_dp sz=3 # VCVT float to int with specified rounding mode; Vd is always single-precision VCVT 1111 1110 1.11 11 rm:2 .... 1001 op:1 1.0 .... \ diff --git a/qemu/target/arm/vfp.decode b/qemu/target/arm/vfp.decode index a8f1137b..9a79e99f 100644 --- a/qemu/target/arm/vfp.decode +++ b/qemu/target/arm/vfp.decode @@ -195,12 +195,15 @@ VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \ VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \ vd=%vd_sp vm=%vm_dp +VRINTR_hp ---- 1110 1.11 0110 .... 1001 01.0 .... @vfp_dm_ss VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... @vfp_dm_ss VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... @vfp_dm_dd +VRINTZ_hp ---- 1110 1.11 0110 .... 1001 11.0 .... @vfp_dm_ss VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... @vfp_dm_ss VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... @vfp_dm_dd +VRINTX_hp ---- 1110 1.11 0111 .... 1001 01.0 .... @vfp_dm_ss VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... @vfp_dm_ss VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... @vfp_dm_dd diff --git a/qemu/target/arm/vfp_helper.c b/qemu/target/arm/vfp_helper.c index d9749500..ed6dd0de 100644 --- a/qemu/target/arm/vfp_helper.c +++ b/qemu/target/arm/vfp_helper.c @@ -1039,6 +1039,11 @@ float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp) } /* ARMv8 round to integral */ +dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status) +{ + return float16_round_to_int(x, fp_status); +} + float32 HELPER(rints_exact)(float32 x, void *fp_status) { return float32_round_to_int(x, fp_status); @@ -1049,6 +1054,22 @@ float64 HELPER(rintd_exact)(float64 x, void *fp_status) return float64_round_to_int(x, fp_status); } +dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status) +{ + int old_flags = get_float_exception_flags(fp_status), new_flags; + float16 ret; + + ret = float16_round_to_int(x, fp_status); + + /* Suppress any inexact exceptions the conversion produced */ + if (!(old_flags & float_flag_inexact)) { + new_flags = get_float_exception_flags(fp_status); + set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); + } + + return ret; +} + float32 HELPER(rints)(float32 x, void *fp_status) { int old_flags = get_float_exception_flags(fp_status), new_flags; diff --git a/qemu/x86_64.h b/qemu/x86_64.h index a5cf5a80..427f705c 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -1807,6 +1807,8 @@ #define helper_ret_stb_mmu helper_ret_stb_mmu_x86_64 #define helper_rintd helper_rintd_x86_64 #define helper_rintd_exact helper_rintd_exact_x86_64 +#define helper_rinth helper_rinth_x86_64 +#define helper_rinth_exact helper_rinth_exact_x86_64 #define helper_rints helper_rints_x86_64 #define helper_rints_exact helper_rints_exact_x86_64 #define helper_ror_cc helper_ror_cc_x86_64