target/arm: Convert Neon fp VMUL, VMLA, VMLS 3-reg-same insns to decodetree

Convert the Neon integer VMUL, VMLA, and VMLS 3-reg-same inssn to decodetree. We don't have a gvec helper for multiply-accumulate, so VMLA and VMLS need a loop function do_3same_fp(). This takes a reads_vd parameter to do_3same_fp() which tells it to load the old value into vd before calling the callback function, in the same way that the do_vfp_3op_sp() and do_vfp_3op_dp() functions in translate-vfp.inc.c work. (The only uses in this patch pass reads_vd == true, but later commits will use reads_vd == false.) This conversion fixes in passing an underdecoding for VMUL Backports commit 8aa71ead912ca0a9c0d29b74e0976f91952f950a from qemu
2026-05-08 09:33:22 +00:00 · 2020-05-15 23:35:19 -04:00 · 2020-05-15 23:35:19 -04:00 · 042df686ca
parent 2527e76926
commit 042df686ca
3 changed files with 86 additions and 16 deletions
--- a/qemu/target/arm/neon-dp.decode
+++ b/qemu/target/arm/neon-dp.decode
@ -180,5 +180,8 @@ VADD_fp_3s       1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
 VSUB_fp_3s       1111 001 0 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
 VPADD_fp_3s      1111 001 1 0 . 0 . .... .... 1101 ... 0 .... @3same_fp_q0
 VABD_fp_3s       1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
+VMLA_fp_3s       1111 001 0 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
+VMLS_fp_3s       1111 001 0 0 . 1 . .... .... 1101 ... 1 .... @3same_fp
+VMUL_fp_3s       1111 001 1 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
 VPMAX_fp_3s      1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
 VPMIN_fp_3s      1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
--- a/qemu/target/arm/translate-neon.inc.c
+++ b/qemu/target/arm/translate-neon.inc.c
@ -1040,6 +1040,56 @@ DO_3SAME_PAIR(VPADD, padd_u)
 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)

+static bool do_3same_fp(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn,
+                        bool reads_vd)
+{
+    /*
+     * FP operations handled elementwise 32 bits at a time.
+     * If reads_vd is true then the old value of Vd will be
+     * loaded before calling the callback function. This is
+     * used for multiply-accumulate type operations.
+     */
+    TCGv_i32 tmp, tmp2;
+    int pass;
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vn | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if ((a->vn | a->vm | a->vd) & a->q) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
+    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+        tmp = neon_load_reg(s, a->vn, pass);
+        tmp2 = neon_load_reg(s, a->vm, pass);
+        if (reads_vd) {
+            TCGv_i32 tmp_rd = neon_load_reg(s, a->vd, pass);
+            fn(tcg_ctx, tmp_rd, tmp, tmp2, fpstatus);
+            neon_store_reg(s, a->vd, pass, tmp_rd);
+            tcg_temp_free_i32(tcg_ctx, tmp);
+        } else {
+            fn(tcg_ctx, tmp, tmp, tmp2, fpstatus);
+            neon_store_reg(s, a->vd, pass, tmp);
+        }
+        tcg_temp_free_i32(tcg_ctx, tmp2);
+    }
+    tcg_temp_free_ptr(tcg_ctx, fpstatus);
+    return true;
+}
+
 /*
 * For all the functions using this macro, size == 1 means fp16,
 * which is an architecture extension we don't implement yet.
@ -1067,6 +1117,38 @@ DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
+DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s)
+
+/*
+ * For all the functions using this macro, size == 1 means fp16,
+ * which is an architecture extension we don't implement yet.
+ */
+#define DO_3S_FP(INSN,FUNC,READS_VD)                                \
+    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
+    {                                                               \
+        if (a->size != 0) {                                         \
+            /* TODO fp16 support */                                 \
+            return false;                                           \
+        }                                                           \
+        return do_3same_fp(s, a, FUNC, READS_VD);                   \
+    }
+
+static void gen_VMLA_fp_3s(TCGContext *s, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
+                           TCGv_ptr fpstatus)
+{
+    gen_helper_vfp_muls(s, vn, vn, vm, fpstatus);
+    gen_helper_vfp_adds(s, vd, vd, vn, fpstatus);
+}
+
+static void gen_VMLS_fp_3s(TCGContext *s, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
+                           TCGv_ptr fpstatus)
+{
+    gen_helper_vfp_muls(s, vn, vn, vm, fpstatus);
+    gen_helper_vfp_subs(s, vd, vd, vn, fpstatus);
+}
+
+DO_3S_FP(VMLA, gen_VMLA_fp_3s, true)
+DO_3S_FP(VMLS, gen_VMLS_fp_3s, true)

 static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
 {
--- a/qemu/target/arm/translate.c
+++ b/qemu/target/arm/translate.c
@ -5559,6 +5559,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
        case NEON_3R_VPADD_VQRDMLAH:
        case NEON_3R_VQDMULH_VQRDMULH:
        case NEON_3R_FLOAT_ARITH:
+        case NEON_3R_FLOAT_MULTIPLY:
            /* Already handled by decodetree */
            return 1;
        }
@ -5605,22 +5606,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
        tmp = neon_load_reg(s, rn, pass);
        tmp2 = neon_load_reg(s, rm, pass);
        switch (op) {
-        case NEON_3R_FLOAT_MULTIPLY:
-        {
-            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
-            gen_helper_vfp_muls(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-            if (!u) {
-                tcg_temp_free_i32(tcg_ctx, tmp2);
-                tmp2 = neon_load_reg(s, rd, pass);
-                if (size == 0) {
-                    gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                } else {
-                    gen_helper_vfp_subs(tcg_ctx, tmp, tmp2, tmp, fpstatus);
-                }
-            }
-            tcg_temp_free_ptr(tcg_ctx, fpstatus);
-            break;
-        }
        case NEON_3R_FLOAT_CMP:
        {
            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);