target/arm: Implement VFP fp16 VRINT*

Implement the fp16 version of the VFP VRINT* insns. Backports 0a6f4b4cb338665b81ad824d9a6868932461b7f7
2025-11-25 20:46:14 +00:00 · 2021-03-01 16:15:03 -05:00 · 2021-03-01 16:15:03 -05:00 · 90aa9647e0
parent 1c8088b48a
commit 90aa9647e0
21 changed files with 158 additions and 8 deletions
--- a/qemu/aarch64.h
+++ b/qemu/aarch64.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_aarch64
 #define helper_rintd helper_rintd_aarch64
 #define helper_rintd_exact helper_rintd_exact_aarch64
+#define helper_rinth helper_rinth_aarch64
+#define helper_rinth_exact helper_rinth_exact_aarch64
 #define helper_rints helper_rints_aarch64
 #define helper_rints_exact helper_rints_exact_aarch64
 #define helper_ror_cc helper_ror_cc_aarch64
--- a/qemu/aarch64eb.h
+++ b/qemu/aarch64eb.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_aarch64eb
 #define helper_rintd helper_rintd_aarch64eb
 #define helper_rintd_exact helper_rintd_exact_aarch64eb
+#define helper_rinth helper_rinth_aarch64eb
+#define helper_rinth_exact helper_rinth_exact_aarch64eb
 #define helper_rints helper_rints_aarch64eb
 #define helper_rints_exact helper_rints_exact_aarch64eb
 #define helper_ror_cc helper_ror_cc_aarch64eb
--- a/qemu/arm.h
+++ b/qemu/arm.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_arm
 #define helper_rintd helper_rintd_arm
 #define helper_rintd_exact helper_rintd_exact_arm
+#define helper_rinth helper_rinth_arm
+#define helper_rinth_exact helper_rinth_exact_arm
 #define helper_rints helper_rints_arm
 #define helper_rints_exact helper_rints_exact_arm
 #define helper_ror_cc helper_ror_cc_arm
--- a/qemu/armeb.h
+++ b/qemu/armeb.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_armeb
 #define helper_rintd helper_rintd_armeb
 #define helper_rintd_exact helper_rintd_exact_armeb
+#define helper_rinth helper_rinth_armeb
+#define helper_rinth_exact helper_rinth_exact_armeb
 #define helper_rints helper_rints_armeb
 #define helper_rints_exact helper_rints_exact_armeb
 #define helper_ror_cc helper_ror_cc_armeb
--- a/qemu/header_gen.py
+++ b/qemu/header_gen.py
@ -1813,6 +1813,8 @@ symbols = (
    'helper_ret_stb_mmu',
    'helper_rintd',
    'helper_rintd_exact',
+    'helper_rinth',
+    'helper_rinth_exact',
    'helper_rints',
    'helper_rints_exact',
    'helper_ror_cc',
--- a/qemu/m68k.h
+++ b/qemu/m68k.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_m68k
 #define helper_rintd helper_rintd_m68k
 #define helper_rintd_exact helper_rintd_exact_m68k
+#define helper_rinth helper_rinth_m68k
+#define helper_rinth_exact helper_rinth_exact_m68k
 #define helper_rints helper_rints_m68k
 #define helper_rints_exact helper_rints_exact_m68k
 #define helper_ror_cc helper_ror_cc_m68k
--- a/qemu/mips.h
+++ b/qemu/mips.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_mips
 #define helper_rintd helper_rintd_mips
 #define helper_rintd_exact helper_rintd_exact_mips
+#define helper_rinth helper_rinth_mips
+#define helper_rinth_exact helper_rinth_exact_mips
 #define helper_rints helper_rints_mips
 #define helper_rints_exact helper_rints_exact_mips
 #define helper_ror_cc helper_ror_cc_mips
--- a/qemu/mips64.h
+++ b/qemu/mips64.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_mips64
 #define helper_rintd helper_rintd_mips64
 #define helper_rintd_exact helper_rintd_exact_mips64
+#define helper_rinth helper_rinth_mips64
+#define helper_rinth_exact helper_rinth_exact_mips64
 #define helper_rints helper_rints_mips64
 #define helper_rints_exact helper_rints_exact_mips64
 #define helper_ror_cc helper_ror_cc_mips64
--- a/qemu/mips64el.h
+++ b/qemu/mips64el.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_mips64el
 #define helper_rintd helper_rintd_mips64el
 #define helper_rintd_exact helper_rintd_exact_mips64el
+#define helper_rinth helper_rinth_mips64el
+#define helper_rinth_exact helper_rinth_exact_mips64el
 #define helper_rints helper_rints_mips64el
 #define helper_rints_exact helper_rints_exact_mips64el
 #define helper_ror_cc helper_ror_cc_mips64el
--- a/qemu/mipsel.h
+++ b/qemu/mipsel.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_mipsel
 #define helper_rintd helper_rintd_mipsel
 #define helper_rintd_exact helper_rintd_exact_mipsel
+#define helper_rinth helper_rinth_mipsel
+#define helper_rinth_exact helper_rinth_exact_mipsel
 #define helper_rints helper_rints_mipsel
 #define helper_rints_exact helper_rints_exact_mipsel
 #define helper_ror_cc helper_ror_cc_mipsel
--- a/qemu/powerpc.h
+++ b/qemu/powerpc.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_powerpc
 #define helper_rintd helper_rintd_powerpc
 #define helper_rintd_exact helper_rintd_exact_powerpc
+#define helper_rinth helper_rinth_powerpc
+#define helper_rinth_exact helper_rinth_exact_powerpc
 #define helper_rints helper_rints_powerpc
 #define helper_rints_exact helper_rints_exact_powerpc
 #define helper_ror_cc helper_ror_cc_powerpc
--- a/qemu/riscv32.h
+++ b/qemu/riscv32.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_riscv32
 #define helper_rintd helper_rintd_riscv32
 #define helper_rintd_exact helper_rintd_exact_riscv32
+#define helper_rinth helper_rinth_riscv32
+#define helper_rinth_exact helper_rinth_exact_riscv32
 #define helper_rints helper_rints_riscv32
 #define helper_rints_exact helper_rints_exact_riscv32
 #define helper_ror_cc helper_ror_cc_riscv32
--- a/qemu/riscv64.h
+++ b/qemu/riscv64.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_riscv64
 #define helper_rintd helper_rintd_riscv64
 #define helper_rintd_exact helper_rintd_exact_riscv64
+#define helper_rinth helper_rinth_riscv64
+#define helper_rinth_exact helper_rinth_exact_riscv64
 #define helper_rints helper_rints_riscv64
 #define helper_rints_exact helper_rints_exact_riscv64
 #define helper_ror_cc helper_ror_cc_riscv64
--- a/qemu/sparc.h
+++ b/qemu/sparc.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_sparc
 #define helper_rintd helper_rintd_sparc
 #define helper_rintd_exact helper_rintd_exact_sparc
+#define helper_rinth helper_rinth_sparc
+#define helper_rinth_exact helper_rinth_exact_sparc
 #define helper_rints helper_rints_sparc
 #define helper_rints_exact helper_rints_exact_sparc
 #define helper_ror_cc helper_ror_cc_sparc
--- a/qemu/sparc64.h
+++ b/qemu/sparc64.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_sparc64
 #define helper_rintd helper_rintd_sparc64
 #define helper_rintd_exact helper_rintd_exact_sparc64
+#define helper_rinth helper_rinth_sparc64
+#define helper_rinth_exact helper_rinth_exact_sparc64
 #define helper_rints helper_rints_sparc64
 #define helper_rints_exact helper_rints_exact_sparc64
 #define helper_ror_cc helper_ror_cc_sparc64
--- a/qemu/target/arm/helper.h
+++ b/qemu/target/arm/helper.h
@ -240,8 +240,10 @@ DEF_HELPER_3(shr_cc, i32, env, i32, i32)
 DEF_HELPER_3(sar_cc, i32, env, i32, i32)
 DEF_HELPER_3(ror_cc, i32, env, i32, i32)

+DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, ptr)
 DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, ptr)
 DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)

--- a/qemu/target/arm/translate-vfp.inc.c
+++ b/qemu/target/arm/translate-vfp.inc.c
@ -345,7 +345,7 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 {
    TCGContext *tcg_ctx = s->uc->tcg_ctx;
    uint32_t rd, rm;
-    bool dp = a->dp;
+    int sz = a->sz;
    TCGv_ptr fpst;
    TCGv_i32 tcg_rmode;
    int rounding = fp_decode_rm[a->rm];
@ -354,12 +354,16 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
        return false;
    }

-    if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
+    if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+        return false;
+    }
+
+    if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist */
-    if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
+    if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
        ((a->vm | a->vd) & 0x10)) {
        return false;
    }
@ -371,12 +375,17 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
        return true;
    }

-    fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR);
+    if (sz == 1) {
+        fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR_F16);
+    } else {
+        fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR);
+    }
+

    tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rounding));
    gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst);

-    if (dp) {
+    if (sz == 3) {
        TCGv_i64 tcg_op;
        TCGv_i64 tcg_res;
        tcg_op = tcg_temp_new_i64(tcg_ctx);
@ -392,7 +401,11 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
        tcg_op = tcg_temp_new_i32(tcg_ctx);
        tcg_res = tcg_temp_new_i32(tcg_ctx);
        neon_load_reg32(s, tcg_op, rm);
-        gen_helper_rints(tcg_ctx, tcg_res, tcg_op, fpst);
+        if (sz == 1) {
+            gen_helper_rinth(tcg_ctx, tcg_res, tcg_op, fpst);
+        } else {
+            gen_helper_rints(tcg_ctx, tcg_res, tcg_op, fpst);
+        }
        neon_store_reg32(s, tcg_res, rd);
        tcg_temp_free_i32(tcg_ctx, tcg_op);
        tcg_temp_free_i32(tcg_ctx, tcg_res);
@ -2676,6 +2689,30 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
    return true;
 }

+static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32(tcg_ctx);
+    neon_load_reg32(s, tmp, a->vm);
+    fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR_F16);
+    gen_helper_rinth(tcg_ctx, tmp, tmp, fpst);
+    neon_store_reg32(s, tmp, a->vd);
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+    return true;
+}
+
 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
 {
    TCGContext *tcg_ctx = s->uc->tcg_ctx;
@ -2733,6 +2770,35 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
    return true;
 }

+static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+    TCGv_i32 tcg_rmode;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32(tcg_ctx);
+    neon_load_reg32(s, tmp, a->vm);
+    fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR_F16);
+    tcg_rmode = tcg_const_i32(tcg_ctx, float_round_to_zero);
+    gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst);
+    gen_helper_rinth(tcg_ctx, tmp, tmp, fpst);
+    gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst);
+    neon_store_reg32(s, tmp, a->vd);
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+    tcg_temp_free_i32(tcg_ctx, tcg_rmode);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+    return true;
+}
+
 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
 {
    TCGContext *tcg_ctx = s->uc->tcg_ctx;
@ -2800,6 +2866,30 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
    return true;
 }

+static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
+{
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    TCGv_ptr fpst;
+    TCGv_i32 tmp;
+
+    if (!dc_isar_feature(aa32_fp16_arith, s)) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i32(tcg_ctx);
+    neon_load_reg32(s, tmp, a->vm);
+    fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR_F16);
+    gen_helper_rinth_exact(tcg_ctx, tmp, tmp, fpst);
+    neon_store_reg32(s, tmp, a->vd);
+    tcg_temp_free_ptr(tcg_ctx, fpst);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+    return true;
+}
+
 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
 {
    TCGContext *tcg_ctx = s->uc->tcg_ctx;
--- a/qemu/target/arm/vfp-uncond.decode
+++ b/qemu/target/arm/vfp-uncond.decode
@ -60,10 +60,12 @@ VMINNM_sp   1111 1110 1.00 .... .... 1010 .1.0 ....         @vfp_dnm_s
 VMAXNM_dp   1111 1110 1.00 .... .... 1011 .0.0 ....         @vfp_dnm_d
 VMINNM_dp   1111 1110 1.00 .... .... 1011 .1.0 ....         @vfp_dnm_d

+VRINT       1111 1110 1.11 10 rm:2 .... 1001 01.0 .... \
+            vm=%vm_sp vd=%vd_sp sz=1
 VRINT       1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \
-            vm=%vm_sp vd=%vd_sp dp=0
+            vm=%vm_sp vd=%vd_sp sz=2
 VRINT       1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \
-            vm=%vm_dp vd=%vd_dp dp=1
+            vm=%vm_dp vd=%vd_dp sz=3

 # VCVT float to int with specified rounding mode; Vd is always single-precision
 VCVT        1111 1110 1.11 11 rm:2 .... 1001 op:1 1.0 .... \
--- a/qemu/target/arm/vfp.decode
+++ b/qemu/target/arm/vfp.decode
@ -195,12 +195,15 @@ VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
 VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
             vd=%vd_sp vm=%vm_dp

+VRINTR_hp    ---- 1110 1.11 0110 .... 1001 01.0 ....        @vfp_dm_ss
 VRINTR_sp    ---- 1110 1.11 0110 .... 1010 01.0 ....        @vfp_dm_ss
 VRINTR_dp    ---- 1110 1.11 0110 .... 1011 01.0 ....        @vfp_dm_dd

+VRINTZ_hp    ---- 1110 1.11 0110 .... 1001 11.0 ....        @vfp_dm_ss
 VRINTZ_sp    ---- 1110 1.11 0110 .... 1010 11.0 ....        @vfp_dm_ss
 VRINTZ_dp    ---- 1110 1.11 0110 .... 1011 11.0 ....        @vfp_dm_dd

+VRINTX_hp    ---- 1110 1.11 0111 .... 1001 01.0 ....        @vfp_dm_ss
 VRINTX_sp    ---- 1110 1.11 0111 .... 1010 01.0 ....        @vfp_dm_ss
 VRINTX_dp    ---- 1110 1.11 0111 .... 1011 01.0 ....        @vfp_dm_dd

--- a/qemu/target/arm/vfp_helper.c
+++ b/qemu/target/arm/vfp_helper.c
@ -1039,6 +1039,11 @@ float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
 }

 /* ARMv8 round to integral */
+dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status)
+{
+    return float16_round_to_int(x, fp_status);
+}
+
 float32 HELPER(rints_exact)(float32 x, void *fp_status)
 {
    return float32_round_to_int(x, fp_status);
@ -1049,6 +1054,22 @@ float64 HELPER(rintd_exact)(float64 x, void *fp_status)
    return float64_round_to_int(x, fp_status);
 }

+dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status)
+{
+    int old_flags = get_float_exception_flags(fp_status), new_flags;
+    float16 ret;
+
+    ret = float16_round_to_int(x, fp_status);
+
+    /* Suppress any inexact exceptions the conversion produced */
+    if (!(old_flags & float_flag_inexact)) {
+        new_flags = get_float_exception_flags(fp_status);
+        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+    }
+
+    return ret;
+}
+
 float32 HELPER(rints)(float32 x, void *fp_status)
 {
    int old_flags = get_float_exception_flags(fp_status), new_flags;
--- a/qemu/x86_64.h
+++ b/qemu/x86_64.h
@ -1807,6 +1807,8 @@
 #define helper_ret_stb_mmu helper_ret_stb_mmu_x86_64
 #define helper_rintd helper_rintd_x86_64
 #define helper_rintd_exact helper_rintd_exact_x86_64
+#define helper_rinth helper_rinth_x86_64
+#define helper_rinth_exact helper_rinth_exact_x86_64
 #define helper_rints helper_rints_x86_64
 #define helper_rints_exact helper_rints_exact_x86_64
 #define helper_ror_cc helper_ror_cc_x86_64