From a42ecfe203a41590b7bba2fc8577d5b63e365c90 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Sun, 28 Feb 2021 04:28:16 -0500
Subject: [PATCH] target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA,
 VNMUL

Implement fp16 versions of the VFP VMLA, VMLS, VNMLS, VNMLA, VNMUL
instructions. (These are all the remaining ones which we implement
via do_vfp_3op_[hsd]p().)

Backports commit e7cb0ded52c6d7b86585b09935fe7caeb9e38b69
---
 qemu/aarch64.h                      |  1 +
 qemu/aarch64eb.h                    |  1 +
 qemu/arm.h                          |  1 +
 qemu/armeb.h                        |  1 +
 qemu/header_gen.py                  |  1 +
 qemu/m68k.h                         |  1 +
 qemu/mips.h                         |  1 +
 qemu/mips64.h                       |  1 +
 qemu/mips64el.h                     |  1 +
 qemu/mipsel.h                       |  1 +
 qemu/powerpc.h                      |  1 +
 qemu/riscv32.h                      |  1 +
 qemu/riscv64.h                      |  1 +
 qemu/sparc.h                        |  1 +
 qemu/sparc64.h                      |  1 +
 qemu/target/arm/helper.h            |  1 +
 qemu/target/arm/translate-vfp.inc.c | 84 +++++++++++++++++++++++++++++
 qemu/target/arm/vfp.decode          |  5 ++
 qemu/target/arm/vfp_helper.c        |  5 ++
 qemu/x86_64.h                       |  1 +
 20 files changed, 111 insertions(+)

diff --git a/qemu/aarch64.h b/qemu/aarch64.h
index e5d688ba..d4a5210c 100644
--- a/qemu/aarch64.h
+++ b/qemu/aarch64.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_aarch64
 #define helper_vfp_muls helper_vfp_muls_aarch64
 #define helper_vfp_negd helper_vfp_negd_aarch64
+#define helper_vfp_negh helper_vfp_negh_aarch64
 #define helper_vfp_negs helper_vfp_negs_aarch64
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_aarch64
 #define helper_vfp_shtod helper_vfp_shtod_aarch64
diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h
index f1061b39..76191a26 100644
--- a/qemu/aarch64eb.h
+++ b/qemu/aarch64eb.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_aarch64eb
 #define helper_vfp_muls helper_vfp_muls_aarch64eb
 #define helper_vfp_negd helper_vfp_negd_aarch64eb
+#define helper_vfp_negh helper_vfp_negh_aarch64eb
 #define helper_vfp_negs helper_vfp_negs_aarch64eb
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_aarch64eb
 #define helper_vfp_shtod helper_vfp_shtod_aarch64eb
diff --git a/qemu/arm.h b/qemu/arm.h
index 0aabf703..93b3a2ea 100644
--- a/qemu/arm.h
+++ b/qemu/arm.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_arm
 #define helper_vfp_muls helper_vfp_muls_arm
 #define helper_vfp_negd helper_vfp_negd_arm
+#define helper_vfp_negh helper_vfp_negh_arm
 #define helper_vfp_negs helper_vfp_negs_arm
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_arm
 #define helper_vfp_shtod helper_vfp_shtod_arm
diff --git a/qemu/armeb.h b/qemu/armeb.h
index 3b9feecf..dfce6422 100644
--- a/qemu/armeb.h
+++ b/qemu/armeb.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_armeb
 #define helper_vfp_muls helper_vfp_muls_armeb
 #define helper_vfp_negd helper_vfp_negd_armeb
+#define helper_vfp_negh helper_vfp_negh_armeb
 #define helper_vfp_negs helper_vfp_negs_armeb
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_armeb
 #define helper_vfp_shtod helper_vfp_shtod_armeb
diff --git a/qemu/header_gen.py b/qemu/header_gen.py
index 7891f329..6ab842d0 100644
--- a/qemu/header_gen.py
+++ b/qemu/header_gen.py
@@ -1930,6 +1930,7 @@ symbols = (
     'helper_vfp_mulh',
     'helper_vfp_muls',
     'helper_vfp_negd',
+    'helper_vfp_negh',
     'helper_vfp_negs',
     'helper_vfp_set_fpscr',
     'helper_vfp_shtod',
diff --git a/qemu/m68k.h b/qemu/m68k.h
index 737fdc41..8d193fc6 100644
--- a/qemu/m68k.h
+++ b/qemu/m68k.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_m68k
 #define helper_vfp_muls helper_vfp_muls_m68k
 #define helper_vfp_negd helper_vfp_negd_m68k
+#define helper_vfp_negh helper_vfp_negh_m68k
 #define helper_vfp_negs helper_vfp_negs_m68k
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_m68k
 #define helper_vfp_shtod helper_vfp_shtod_m68k
diff --git a/qemu/mips.h b/qemu/mips.h
index a953f15f..ccdcc18c 100644
--- a/qemu/mips.h
+++ b/qemu/mips.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_mips
 #define helper_vfp_muls helper_vfp_muls_mips
 #define helper_vfp_negd helper_vfp_negd_mips
+#define helper_vfp_negh helper_vfp_negh_mips
 #define helper_vfp_negs helper_vfp_negs_mips
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_mips
 #define helper_vfp_shtod helper_vfp_shtod_mips
diff --git a/qemu/mips64.h b/qemu/mips64.h
index da9dd880..e8f9f5b2 100644
--- a/qemu/mips64.h
+++ b/qemu/mips64.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_mips64
 #define helper_vfp_muls helper_vfp_muls_mips64
 #define helper_vfp_negd helper_vfp_negd_mips64
+#define helper_vfp_negh helper_vfp_negh_mips64
 #define helper_vfp_negs helper_vfp_negs_mips64
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_mips64
 #define helper_vfp_shtod helper_vfp_shtod_mips64
diff --git a/qemu/mips64el.h b/qemu/mips64el.h
index 4d3a4178..1b786ef6 100644
--- a/qemu/mips64el.h
+++ b/qemu/mips64el.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_mips64el
 #define helper_vfp_muls helper_vfp_muls_mips64el
 #define helper_vfp_negd helper_vfp_negd_mips64el
+#define helper_vfp_negh helper_vfp_negh_mips64el
 #define helper_vfp_negs helper_vfp_negs_mips64el
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_mips64el
 #define helper_vfp_shtod helper_vfp_shtod_mips64el
diff --git a/qemu/mipsel.h b/qemu/mipsel.h
index de9f2a91..fba1dbc7 100644
--- a/qemu/mipsel.h
+++ b/qemu/mipsel.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_mipsel
 #define helper_vfp_muls helper_vfp_muls_mipsel
 #define helper_vfp_negd helper_vfp_negd_mipsel
+#define helper_vfp_negh helper_vfp_negh_mipsel
 #define helper_vfp_negs helper_vfp_negs_mipsel
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_mipsel
 #define helper_vfp_shtod helper_vfp_shtod_mipsel
diff --git a/qemu/powerpc.h b/qemu/powerpc.h
index 17895358..828077a1 100644
--- a/qemu/powerpc.h
+++ b/qemu/powerpc.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_powerpc
 #define helper_vfp_muls helper_vfp_muls_powerpc
 #define helper_vfp_negd helper_vfp_negd_powerpc
+#define helper_vfp_negh helper_vfp_negh_powerpc
 #define helper_vfp_negs helper_vfp_negs_powerpc
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_powerpc
 #define helper_vfp_shtod helper_vfp_shtod_powerpc
diff --git a/qemu/riscv32.h b/qemu/riscv32.h
index 6c1bd640..8c53f93f 100644
--- a/qemu/riscv32.h
+++ b/qemu/riscv32.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_riscv32
 #define helper_vfp_muls helper_vfp_muls_riscv32
 #define helper_vfp_negd helper_vfp_negd_riscv32
+#define helper_vfp_negh helper_vfp_negh_riscv32
 #define helper_vfp_negs helper_vfp_negs_riscv32
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_riscv32
 #define helper_vfp_shtod helper_vfp_shtod_riscv32
diff --git a/qemu/riscv64.h b/qemu/riscv64.h
index 1f54600e..a5f733e3 100644
--- a/qemu/riscv64.h
+++ b/qemu/riscv64.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_riscv64
 #define helper_vfp_muls helper_vfp_muls_riscv64
 #define helper_vfp_negd helper_vfp_negd_riscv64
+#define helper_vfp_negh helper_vfp_negh_riscv64
 #define helper_vfp_negs helper_vfp_negs_riscv64
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_riscv64
 #define helper_vfp_shtod helper_vfp_shtod_riscv64
diff --git a/qemu/sparc.h b/qemu/sparc.h
index ed4ef889..965217a8 100644
--- a/qemu/sparc.h
+++ b/qemu/sparc.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_sparc
 #define helper_vfp_muls helper_vfp_muls_sparc
 #define helper_vfp_negd helper_vfp_negd_sparc
+#define helper_vfp_negh helper_vfp_negh_sparc
 #define helper_vfp_negs helper_vfp_negs_sparc
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_sparc
 #define helper_vfp_shtod helper_vfp_shtod_sparc
diff --git a/qemu/sparc64.h b/qemu/sparc64.h
index dc3f3f3d..b2275f80 100644
--- a/qemu/sparc64.h
+++ b/qemu/sparc64.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_sparc64
 #define helper_vfp_muls helper_vfp_muls_sparc64
 #define helper_vfp_negd helper_vfp_negd_sparc64
+#define helper_vfp_negh helper_vfp_negh_sparc64
 #define helper_vfp_negs helper_vfp_negs_sparc64
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_sparc64
 #define helper_vfp_shtod helper_vfp_shtod_sparc64
diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h
index a288d1db..4cc0ce53 100644
--- a/qemu/target/arm/helper.h
+++ b/qemu/target/arm/helper.h
@@ -123,6 +123,7 @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
 DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
 DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
 DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
+DEF_HELPER_1(vfp_negh, f16, f16)
 DEF_HELPER_1(vfp_negs, f32, f32)
 DEF_HELPER_1(vfp_negd, f64, f64)
 DEF_HELPER_1(vfp_abss, f32, f32)
diff --git a/qemu/target/arm/translate-vfp.inc.c b/qemu/target/arm/translate-vfp.inc.c
index 660334b7..909449aa 100644
--- a/qemu/target/arm/translate-vfp.inc.c
+++ b/qemu/target/arm/translate-vfp.inc.c
@@ -1569,6 +1569,21 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
     return true;
 }
 
+static void gen_VMLA_hp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* Note that order of inputs to the add matters for NaNs */
+    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
+
+    gen_helper_vfp_mulh(tcg_ctx, tmp, vn, vm, fpst);
+    gen_helper_vfp_addh(tcg_ctx, vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+}
+
+static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
 static void gen_VMLA_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
 {
     /* Note that order of inputs to the add matters for NaNs */
@@ -1599,6 +1614,25 @@ static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
 }
 
+static void gen_VMLS_hp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /*
+     * VMLS: vd = vd + -(vn * vm)
+     * Note that order of inputs to the add matters for NaNs.
+     */
+    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
+
+    gen_helper_vfp_mulh(tcg_ctx, tmp, vn, vm, fpst);
+    gen_helper_vfp_negh(tcg_ctx, tmp, tmp);
+    gen_helper_vfp_addh(tcg_ctx, vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+}
+
+static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
 static void gen_VMLS_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
 {
     /*
@@ -1637,6 +1671,27 @@ static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
 }
 
+static void gen_VNMLS_hp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /*
+     * VNMLS: -fd + (fn * fm)
+     * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+     * plausible looking simplifications because this will give wrong results
+     * for NaNs.
+     */
+    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
+
+    gen_helper_vfp_mulh(tcg_ctx, tmp, vn, vm, fpst);
+    gen_helper_vfp_negh(tcg_ctx, vd, vd);
+    gen_helper_vfp_addh(tcg_ctx, vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+}
+
+static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
 static void gen_VNMLS_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
 {
     /*
@@ -1679,6 +1734,23 @@ static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
 }
 
+static void gen_VNMLA_hp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* VNMLA: -fd + -(fn * fm) */
+    TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
+
+    gen_helper_vfp_mulh(tcg_ctx, tmp, vn, vm, fpst);
+    gen_helper_vfp_negh(tcg_ctx, tmp, tmp);
+    gen_helper_vfp_negh(tcg_ctx, vd, vd);
+    gen_helper_vfp_addh(tcg_ctx, vd, vd, tmp, fpst);
+    tcg_temp_free_i32(tcg_ctx, tmp);
+}
+
+static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
 static void gen_VNMLA_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
 {
     /* VNMLA: -fd + -(fn * fm) */
@@ -1728,6 +1800,18 @@ static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
 }
 
+static void gen_VNMUL_hp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+    /* VNMUL: -(fn * fm) */
+    gen_helper_vfp_mulh(tcg_ctx, vd, vn, vm, fpst);
+    gen_helper_vfp_negh(tcg_ctx, vd, vd);
+}
+
+static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
+{
+    return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
+}
+
 static void gen_VNMUL_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
 {
     /* VNMUL: -(fn * fm) */
diff --git a/qemu/target/arm/vfp.decode b/qemu/target/arm/vfp.decode
index 1ecd5e28..e5545076 100644
--- a/qemu/target/arm/vfp.decode
+++ b/qemu/target/arm/vfp.decode
@@ -103,15 +103,19 @@ VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
              vd=%vd_dp p=1 u=0 w=1
 
 # 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
+VMLA_hp      ---- 1110 0.00 .... .... 1001 .0.0 ....        @vfp_dnm_s
 VMLA_sp      ---- 1110 0.00 .... .... 1010 .0.0 ....        @vfp_dnm_s
 VMLA_dp      ---- 1110 0.00 .... .... 1011 .0.0 ....        @vfp_dnm_d
 
+VMLS_hp      ---- 1110 0.00 .... .... 1001 .1.0 ....        @vfp_dnm_s
 VMLS_sp      ---- 1110 0.00 .... .... 1010 .1.0 ....        @vfp_dnm_s
 VMLS_dp      ---- 1110 0.00 .... .... 1011 .1.0 ....        @vfp_dnm_d
 
+VNMLS_hp     ---- 1110 0.01 .... .... 1001 .0.0 ....        @vfp_dnm_s
 VNMLS_sp     ---- 1110 0.01 .... .... 1010 .0.0 ....        @vfp_dnm_s
 VNMLS_dp     ---- 1110 0.01 .... .... 1011 .0.0 ....        @vfp_dnm_d
 
+VNMLA_hp     ---- 1110 0.01 .... .... 1001 .1.0 ....        @vfp_dnm_s
 VNMLA_sp     ---- 1110 0.01 .... .... 1010 .1.0 ....        @vfp_dnm_s
 VNMLA_dp     ---- 1110 0.01 .... .... 1011 .1.0 ....        @vfp_dnm_d
 
@@ -119,6 +123,7 @@ VMUL_hp      ---- 1110 0.10 .... .... 1001 .0.0 ....        @vfp_dnm_s
 VMUL_sp      ---- 1110 0.10 .... .... 1010 .0.0 ....        @vfp_dnm_s
 VMUL_dp      ---- 1110 0.10 .... .... 1011 .0.0 ....        @vfp_dnm_d
 
+VNMUL_hp     ---- 1110 0.10 .... .... 1001 .1.0 ....        @vfp_dnm_s
 VNMUL_sp     ---- 1110 0.10 .... .... 1010 .1.0 ....        @vfp_dnm_s
 VNMUL_dp     ---- 1110 0.10 .... .... 1011 .1.0 ....        @vfp_dnm_d
 
diff --git a/qemu/target/arm/vfp_helper.c b/qemu/target/arm/vfp_helper.c
index 229363de..38221e28 100644
--- a/qemu/target/arm/vfp_helper.c
+++ b/qemu/target/arm/vfp_helper.c
@@ -264,6 +264,11 @@ VFP_BINOP(minnum)
 VFP_BINOP(maxnum)
 #undef VFP_BINOP
 
+dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
+{
+    return float16_chs(a);
+}
+
 float32 VFP_HELPER(neg, s)(float32 a)
 {
     return float32_chs(a);
diff --git a/qemu/x86_64.h b/qemu/x86_64.h
index 9db496d3..83d25e76 100644
--- a/qemu/x86_64.h
+++ b/qemu/x86_64.h
@@ -1924,6 +1924,7 @@
 #define helper_vfp_mulh helper_vfp_mulh_x86_64
 #define helper_vfp_muls helper_vfp_muls_x86_64
 #define helper_vfp_negd helper_vfp_negd_x86_64
+#define helper_vfp_negh helper_vfp_negh_x86_64
 #define helper_vfp_negs helper_vfp_negs_x86_64
 #define helper_vfp_set_fpscr helper_vfp_set_fpscr_x86_64
 #define helper_vfp_shtod helper_vfp_shtod_x86_64