target/arm: Convert VFP VNMLS to decodetree

Convert the VFP VNMLS instruction to decodetree. Backports commit c54a416cc6d60efbc79dd37aaf0c8918c05b5815 from qemu
2025-09-10 10:07:25 +00:00 · 2019-06-13 18:04:30 -04:00 · 2019-06-13 18:04:30 -04:00 · 638b90de31
parent 67ad40ffa4
commit 638b90de31
3 changed files with 48 additions and 23 deletions
--- a/qemu/target/arm/translate-vfp.inc.c
+++ b/qemu/target/arm/translate-vfp.inc.c
@ -1360,3 +1360,45 @@ static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_sp *a)
 {
    return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
 }
+
+static void gen_VNMLS_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /*
+     * VNMLS: -fd + (fn * fm)
+     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+     * plausible looking simplifications because this will give wrong results
+     * for NaNs.
+     */
+    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
+
+    gen_helper_vfp_muls(tcg_ctx, tmp, vn, vm, fpst);
+    gen_helper_vfp_negs(tcg_ctx, vd, vd);
+    gen_helper_vfp_adds(tcg_ctx, vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+}
+
+static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
+{
+    return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLS_dp(TCGContext *tcg_ctx, TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
+{
+    /*
+     * VNMLS: -fd + (fn * fm)
+     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+     * plausible looking simplifications because this will give wrong results
+     * for NaNs.
+     */
+    TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx);
+
+    gen_helper_vfp_muld(tcg_ctx, tmp, vn, vm, fpst);
+    gen_helper_vfp_negd(tcg_ctx, vd, vd);
+    gen_helper_vfp_addd(tcg_ctx, vd, vd, tmp, fpst);
+    tcg_temp_free_i64(tcg_ctx, tmp);
+}
+
+static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_sp *a)
+{
+    return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
+}
--- a/qemu/target/arm/translate.c
+++ b/qemu/target/arm/translate.c
@ -1449,19 +1449,6 @@ VFP_OP2(div)

 #undef VFP_OP2

-static inline void gen_vfp_F1_mul(DisasContext *s, int dp)
-{
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    /* Like gen_vfp_mul() but put result in F1 */
-    TCGv_ptr fpst = get_fpstatus_ptr(s, 0);
-    if (dp) {
-        gen_helper_vfp_muld(tcg_ctx, s->F1d, s->F0d, s->F1d, fpst);
-    } else {
-        gen_helper_vfp_muls(tcg_ctx, s->F1s, s->F0s, s->F1s, fpst);
-    }
-    tcg_temp_free_ptr(tcg_ctx, fpst);
-}
-
 static inline void gen_vfp_F1_neg(DisasContext *s, int dp)
 {
    TCGContext *tcg_ctx = s->uc->tcg_ctx;
@ -3240,6 +3227,7 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
            switch (op) {
            case 0:
            case 1:
+            case 2:
                /* Already handled by decodetree */
                return 1;
            default:
@ -3425,16 +3413,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
            for (;;) {
                /* Perform the calculation.  */
                switch (op) {
-                case 2: /* VNMLS: -fd + (fn * fm) */
-                    /* Note that it isn't valid to replace (-A + B) with (B - A)
-                     * or similar plausible looking simplifications
-                     * because this will give wrong results for NaNs.
-                     */
-                    gen_vfp_F1_mul(s, dp);
-                    gen_mov_F0_vreg(s, dp, rd);
-                    gen_vfp_neg(s, dp);
-                    gen_vfp_add(s, dp);
-                    break;
                case 3: /* VNMLA: -fd + -(fn * fm) */
                    gen_vfp_mul(s, dp);
                    gen_vfp_F1_neg(s, dp);
--- a/qemu/target/arm/vfp.decode
+++ b/qemu/target/arm/vfp.decode
@ -107,3 +107,8 @@ VMLS_sp      ---- 1110 0.00 .... .... 1010 .1.0 .... \
             vm=%vm_sp vn=%vn_sp vd=%vd_sp
 VMLS_dp      ---- 1110 0.00 .... .... 1011 .1.0 .... \
             vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+VNMLS_sp     ---- 1110 0.01 .... .... 1010 .0.0 .... \
+             vm=%vm_sp vn=%vn_sp vd=%vd_sp
+VNMLS_dp     ---- 1110 0.01 .... .... 1011 .0.0 .... \
+             vm=%vm_dp vn=%vn_dp vd=%vd_dp