target/arm: Implement fp16 for Neon VMUL, VMLA, VMLS

Convert the Neon floating-point VMUL, VMLA and VMLS to use gvec, and use this to implement fp16 support. Backports fc8ae790311882afa3c7816df004daf978c40e9a
2025-11-04 09:15:11 +00:00 · 2021-03-01 17:55:46 -05:00 · 2021-03-01 17:55:46 -05:00 · b948636c4a
parent 8c6affbca4
commit b948636c4a
1 changed files with 58 additions and 57 deletions
--- a/qemu/target/arm/translate-neon.inc.c
+++ b/qemu/target/arm/translate-neon.inc.c
@ -2466,70 +2466,71 @@ static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
 }
-/*
+static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
- * Rather than have a float-specific version of do_2scalar just for
+                              gen_helper_gvec_3_ptr *fn)
- * three insns, we wrap a NeonGenTwoSingleOpFn to turn it into
+{
- * a NeonGenTwoOpFn.
+    /* Two registers and a scalar, using gvec */
- */
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-#define WRAP_FP_FN(WRAPNAME, FUNC)                              \
+    int vec_size = a->q ? 16 : 8;
-    static void WRAPNAME(TCGContext *s, TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \
+    int rd_ofs = neon_reg_offset(a->vd, 0);
-    {                                                           \
+    int rn_ofs = neon_reg_offset(a->vn, 0);
-        TCGv_ptr fpstatus = fpstatus_ptr(s, FPST_STD);          \
+    int rm_ofs;
-        FUNC(s, rd, rn, rm, fpstatus);                          \
+    int idx;
-        tcg_temp_free_ptr(s, fpstatus);                         \
+    TCGv_ptr fpstatus;
    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
        return false;
    }
-WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls)
+    /* UNDEF accesses to D16-D31 if they don't exist. */
-WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds)
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
-WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs)
+        ((a->vd | a->vn | a->vm) & 0x10)) {
        return false;
    }
-static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a)
+    if (!fn) {
-{
+        /* Bad size (including size == 3, which is a different insn group) */
-    static NeonGenTwoOpFn * const opfn[] = {
+        return false;
-        NULL,
+    }
        NULL, /* TODO: fp16 support */
        gen_VMUL_F_mul,
        NULL,
    };
-    return do_2scalar(s, a, opfn[a->size], NULL);
+    if (a->q && ((a->vd | a->vn) & 1)) {
        return false;
    }
    if (!vfp_access_check(s)) {
        return true;
    }
    /* a->vm is M:Vm, which encodes both register and index */
    idx = extract32(a->vm, a->size + 2, 2);
    a->vm = extract32(a->vm, 0, a->size + 2);
    rm_ofs = neon_reg_offset(a->vm, 0);
    fpstatus = fpstatus_ptr(tcg_ctx, a->size == 1 ? FPST_STD_F16 : FPST_STD);
    tcg_gen_gvec_3_ptr(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, fpstatus,
                       vec_size, vec_size, idx, fn);
    tcg_temp_free_ptr(tcg_ctx, fpstatus);
    return true;
 }
-static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a)
+#define DO_VMUL_F_2sc(NAME, FUNC)                                       \
-{
+    static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a)   \
-    static NeonGenTwoOpFn * const opfn[] = {
+    {                                                                   \
-        NULL,
+        static gen_helper_gvec_3_ptr * const opfn[] = {                 \
-        NULL, /* TODO: fp16 support */
+            NULL,                                                       \
-        gen_VMUL_F_mul,
+            gen_helper_##FUNC##_h,                                      \
-        NULL,
+            gen_helper_##FUNC##_s,                                      \
-    };
+            NULL,                                                       \
-    static NeonGenTwoOpFn * const accfn[] = {
+        };                                                              \
-        NULL,
+        if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
-        NULL, /* TODO: fp16 support */
+            return false;                                               \
-        gen_VMUL_F_add,
+        }                                                               \
-        NULL,
+        return do_2scalar_fp_vec(s, a, opfn[a->size]);                  \
-    };
+    }
-    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
-}
+DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
-
+DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
 static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
 {
    static NeonGenTwoOpFn * const opfn[] = {
        NULL,
        NULL, /* TODO: fp16 support */
        gen_VMUL_F_mul,
        NULL,
    };
    static NeonGenTwoOpFn * const accfn[] = {
        NULL,
        NULL, /* TODO: fp16 support */
        gen_VMUL_F_sub,
        NULL,
    };
    return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
 }
 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)