From 6ac2c597abd36ab6b19f31270f846159ddcad3f8 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Sun, 28 Feb 2021 04:39:19 -0500 Subject: [PATCH] target/arm: Implement VFP fp16 for fused-multiply-add Implement VFP fp16 support for fused multiply-add insns VFNMA, VFNMS, VFMA, VFMS. Backports 9886fe2834b064a3cf0675a4659942ed547aed42 --- qemu/aarch64.h | 1 + qemu/aarch64eb.h | 1 + qemu/arm.h | 1 + qemu/armeb.h | 1 + qemu/header_gen.py | 1 + qemu/m68k.h | 1 + qemu/mips.h | 1 + qemu/mips64.h | 1 + qemu/mips64el.h | 1 + qemu/mipsel.h | 1 + qemu/powerpc.h | 1 + qemu/riscv32.h | 1 + qemu/riscv64.h | 1 + qemu/sparc.h | 1 + qemu/sparc64.h | 1 + qemu/target/arm/helper.h | 1 + qemu/target/arm/translate-vfp.inc.c | 65 +++++++++++++++++++++++++++++ qemu/target/arm/vfp.decode | 5 +++ qemu/target/arm/vfp_helper.c | 7 ++++ qemu/x86_64.h | 1 + 20 files changed, 94 insertions(+) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index d4a5210c..6a834979 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_aarch64 #define helper_vfp_mins helper_vfp_mins_aarch64 #define helper_vfp_muladdd helper_vfp_muladdd_aarch64 +#define helper_vfp_muladdh helper_vfp_muladdh_aarch64 #define helper_vfp_muladds helper_vfp_muladds_aarch64 #define helper_vfp_muld helper_vfp_muld_aarch64 #define helper_vfp_mulh helper_vfp_mulh_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 76191a26..c1ab3112 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_aarch64eb #define helper_vfp_mins helper_vfp_mins_aarch64eb #define helper_vfp_muladdd helper_vfp_muladdd_aarch64eb +#define helper_vfp_muladdh helper_vfp_muladdh_aarch64eb #define helper_vfp_muladds helper_vfp_muladds_aarch64eb #define helper_vfp_muld helper_vfp_muld_aarch64eb #define helper_vfp_mulh helper_vfp_mulh_aarch64eb diff --git a/qemu/arm.h b/qemu/arm.h index 93b3a2ea..f16f9182 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_arm #define helper_vfp_mins helper_vfp_mins_arm #define helper_vfp_muladdd helper_vfp_muladdd_arm +#define helper_vfp_muladdh helper_vfp_muladdh_arm #define helper_vfp_muladds helper_vfp_muladds_arm #define helper_vfp_muld helper_vfp_muld_arm #define helper_vfp_mulh helper_vfp_mulh_arm diff --git a/qemu/armeb.h b/qemu/armeb.h index dfce6422..e29c8bcd 100644 --- a/qemu/armeb.h +++ b/qemu/armeb.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_armeb #define helper_vfp_mins helper_vfp_mins_armeb #define helper_vfp_muladdd helper_vfp_muladdd_armeb +#define helper_vfp_muladdh helper_vfp_muladdh_armeb #define helper_vfp_muladds helper_vfp_muladds_armeb #define helper_vfp_muld helper_vfp_muld_armeb #define helper_vfp_mulh helper_vfp_mulh_armeb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 6ab842d0..b36069fd 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -1925,6 +1925,7 @@ symbols = ( 'helper_vfp_minnums', 'helper_vfp_mins', 'helper_vfp_muladdd', + 'helper_vfp_muladdh', 'helper_vfp_muladds', 'helper_vfp_muld', 'helper_vfp_mulh', diff --git a/qemu/m68k.h b/qemu/m68k.h index 8d193fc6..d5f1b9a1 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_m68k #define helper_vfp_mins helper_vfp_mins_m68k #define helper_vfp_muladdd helper_vfp_muladdd_m68k +#define helper_vfp_muladdh helper_vfp_muladdh_m68k #define helper_vfp_muladds helper_vfp_muladds_m68k #define helper_vfp_muld helper_vfp_muld_m68k #define helper_vfp_mulh helper_vfp_mulh_m68k diff --git a/qemu/mips.h b/qemu/mips.h index ccdcc18c..64bff2d6 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_mips #define helper_vfp_mins helper_vfp_mins_mips #define helper_vfp_muladdd helper_vfp_muladdd_mips +#define helper_vfp_muladdh helper_vfp_muladdh_mips #define helper_vfp_muladds helper_vfp_muladds_mips #define helper_vfp_muld helper_vfp_muld_mips #define helper_vfp_mulh helper_vfp_mulh_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index e8f9f5b2..75473620 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_mips64 #define helper_vfp_mins helper_vfp_mins_mips64 #define helper_vfp_muladdd helper_vfp_muladdd_mips64 +#define helper_vfp_muladdh helper_vfp_muladdh_mips64 #define helper_vfp_muladds helper_vfp_muladds_mips64 #define helper_vfp_muld helper_vfp_muld_mips64 #define helper_vfp_mulh helper_vfp_mulh_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index 1b786ef6..19372167 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_mips64el #define helper_vfp_mins helper_vfp_mins_mips64el #define helper_vfp_muladdd helper_vfp_muladdd_mips64el +#define helper_vfp_muladdh helper_vfp_muladdh_mips64el #define helper_vfp_muladds helper_vfp_muladds_mips64el #define helper_vfp_muld helper_vfp_muld_mips64el #define helper_vfp_mulh helper_vfp_mulh_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index fba1dbc7..2576097d 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_mipsel #define helper_vfp_mins helper_vfp_mins_mipsel #define helper_vfp_muladdd helper_vfp_muladdd_mipsel +#define helper_vfp_muladdh helper_vfp_muladdh_mipsel #define helper_vfp_muladds helper_vfp_muladds_mipsel #define helper_vfp_muld helper_vfp_muld_mipsel #define helper_vfp_mulh helper_vfp_mulh_mipsel diff --git a/qemu/powerpc.h b/qemu/powerpc.h index 828077a1..806e9f61 100644 --- a/qemu/powerpc.h +++ b/qemu/powerpc.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_powerpc #define helper_vfp_mins helper_vfp_mins_powerpc #define helper_vfp_muladdd helper_vfp_muladdd_powerpc +#define helper_vfp_muladdh helper_vfp_muladdh_powerpc #define helper_vfp_muladds helper_vfp_muladds_powerpc #define helper_vfp_muld helper_vfp_muld_powerpc #define helper_vfp_mulh helper_vfp_mulh_powerpc diff --git a/qemu/riscv32.h b/qemu/riscv32.h index 8c53f93f..da04d4dc 100644 --- a/qemu/riscv32.h +++ b/qemu/riscv32.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_riscv32 #define helper_vfp_mins helper_vfp_mins_riscv32 #define helper_vfp_muladdd helper_vfp_muladdd_riscv32 +#define helper_vfp_muladdh helper_vfp_muladdh_riscv32 #define helper_vfp_muladds helper_vfp_muladds_riscv32 #define helper_vfp_muld helper_vfp_muld_riscv32 #define helper_vfp_mulh helper_vfp_mulh_riscv32 diff --git a/qemu/riscv64.h b/qemu/riscv64.h index a5f733e3..6aa66a9a 100644 --- a/qemu/riscv64.h +++ b/qemu/riscv64.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_riscv64 #define helper_vfp_mins helper_vfp_mins_riscv64 #define helper_vfp_muladdd helper_vfp_muladdd_riscv64 +#define helper_vfp_muladdh helper_vfp_muladdh_riscv64 #define helper_vfp_muladds helper_vfp_muladds_riscv64 #define helper_vfp_muld helper_vfp_muld_riscv64 #define helper_vfp_mulh helper_vfp_mulh_riscv64 diff --git a/qemu/sparc.h b/qemu/sparc.h index 965217a8..6a83e059 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_sparc #define helper_vfp_mins helper_vfp_mins_sparc #define helper_vfp_muladdd helper_vfp_muladdd_sparc +#define helper_vfp_muladdh helper_vfp_muladdh_sparc #define helper_vfp_muladds helper_vfp_muladds_sparc #define helper_vfp_muld helper_vfp_muld_sparc #define helper_vfp_mulh helper_vfp_mulh_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index b2275f80..736b83f1 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_sparc64 #define helper_vfp_mins helper_vfp_mins_sparc64 #define helper_vfp_muladdd helper_vfp_muladdd_sparc64 +#define helper_vfp_muladdh helper_vfp_muladdh_sparc64 #define helper_vfp_muladds helper_vfp_muladds_sparc64 #define helper_vfp_muld helper_vfp_muld_sparc64 #define helper_vfp_mulh helper_vfp_mulh_sparc64 diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h index 4cc0ce53..3cfa0d7d 100644 --- a/qemu/target/arm/helper.h +++ b/qemu/target/arm/helper.h @@ -211,6 +211,7 @@ DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32) DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr) DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr) +DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, ptr) DEF_HELPER_3(recps_f32, f32, env, f32, f32) DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32) diff --git a/qemu/target/arm/translate-vfp.inc.c b/qemu/target/arm/translate-vfp.inc.c index fe39e271..afb78076 100644 --- a/qemu/target/arm/translate-vfp.inc.c +++ b/qemu/target/arm/translate-vfp.inc.c @@ -1935,6 +1935,70 @@ static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a) a->vd, a->vn, a->vm, false); } +static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) +{ + /* + * VFNMA : fd = muladd(-fd, fn, fm) + * VFNMS : fd = muladd(-fd, -fn, fm) + * VFMA : fd = muladd( fd, fn, fm) + * VFMS : fd = muladd( fd, -fn, fm) + * + * These are fused multiply-add, and must be done as one floating + * point operation with no rounding between the multiplication and + * addition steps. NB that doing the negations here as separate + * steps is correct : an input NaN should come out with its sign + * bit flipped if it is a negated-input. + */ + TCGv_ptr fpst; + TCGv_i32 vn, vm, vd; + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* + * Present in VFPv4 only, and only with the FP16 extension. + * Note that we can't rely on the SIMDFMAC check alone, because + * in a Neon-no-VFP core that ID register field will be non-zero. + */ + if (!dc_isar_feature(aa32_fp16_arith, s) || + !dc_isar_feature(aa32_simdfmac, s) || + !dc_isar_feature(aa32_fpsp_v2, s)) { + return false; + } + + if (s->vec_len != 0 || s->vec_stride != 0) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + vn = tcg_temp_new_i32(tcg_ctx); + vm = tcg_temp_new_i32(tcg_ctx); + vd = tcg_temp_new_i32(tcg_ctx); + + neon_load_reg32(s, vn, a->vn); + neon_load_reg32(s, vm, a->vm); + if (neg_n) { + /* VFNMS, VFMS */ + gen_helper_vfp_negh(tcg_ctx, vn, vn); + } + neon_load_reg32(s, vd, a->vd); + if (neg_d) { + /* VFNMA, VFNMS */ + gen_helper_vfp_negh(tcg_ctx, vd, vd); + } + fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR_F16); + gen_helper_vfp_muladdh(tcg_ctx, vd, vn, vm, vd, fpst); + neon_store_reg32(s, vd, a->vd); + + tcg_temp_free_ptr(tcg_ctx, fpst); + tcg_temp_free_i32(tcg_ctx, vn); + tcg_temp_free_i32(tcg_ctx, vm); + tcg_temp_free_i32(tcg_ctx, vd); + + return true; +} + static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) { /* @@ -2087,6 +2151,7 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \ MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true) +MAKE_VFM_TRANS_FNS(hp) MAKE_VFM_TRANS_FNS(sp) MAKE_VFM_TRANS_FNS(dp) diff --git a/qemu/target/arm/vfp.decode b/qemu/target/arm/vfp.decode index e5545076..af4829e2 100644 --- a/qemu/target/arm/vfp.decode +++ b/qemu/target/arm/vfp.decode @@ -139,6 +139,11 @@ VDIV_hp ---- 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d +VFMA_hp ---- 1110 1.10 .... .... 1001 .0. 0 .... @vfp_dnm_s +VFMS_hp ---- 1110 1.10 .... .... 1001 .1. 0 .... @vfp_dnm_s +VFNMA_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s +VFNMS_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s + VFMA_sp ---- 1110 1.10 .... .... 1010 .0. 0 .... @vfp_dnm_s VFMS_sp ---- 1110 1.10 .... .... 1010 .1. 0 .... @vfp_dnm_s VFNMA_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s diff --git a/qemu/target/arm/vfp_helper.c b/qemu/target/arm/vfp_helper.c index 38221e28..be9ac188 100644 --- a/qemu/target/arm/vfp_helper.c +++ b/qemu/target/arm/vfp_helper.c @@ -1082,6 +1082,13 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a) } /* VFPv4 fused multiply-accumulate */ +dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b, + dh_ctype_f16 c, void *fpstp) +{ + float_status *fpst = fpstp; + return float16_muladd(a, b, c, 0, fpst); +} + float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp) { float_status *fpst = fpstp; diff --git a/qemu/x86_64.h b/qemu/x86_64.h index 83d25e76..6eb45dca 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -1919,6 +1919,7 @@ #define helper_vfp_minnums helper_vfp_minnums_x86_64 #define helper_vfp_mins helper_vfp_mins_x86_64 #define helper_vfp_muladdd helper_vfp_muladdd_x86_64 +#define helper_vfp_muladdh helper_vfp_muladdh_x86_64 #define helper_vfp_muladds helper_vfp_muladds_x86_64 #define helper_vfp_muld helper_vfp_muld_x86_64 #define helper_vfp_mulh helper_vfp_mulh_x86_64