arm/translate-a64: add FP16 x2 ops for simd_indexed

A bunch of the vectorised bitwise operations just operate on larger chunks at a time. We can do the same for the new half-precision operations by introducing some TWOHALFOP helpers which work on each half of a pair of half-precision operations at once. Hopefully all this hoop jumping will get simpler once we have generically vectorised helpers here. Backports commit 6089030c7322d8f96b54fb9904e53b0f464bb8fe from qemu
2026-05-08 01:53:38 +00:00 · 2018-03-08 18:06:25 -05:00 · 2018-03-08 18:06:25 -05:00 · 82ffaab7de
parent 38815b2901
commit 82ffaab7de
6 changed files with 106 additions and 6 deletions
--- a/qemu/aarch64.h
+++ b/qemu/aarch64.h
@ -3722,18 +3722,28 @@
 #define gen_a64_set_pc_im gen_a64_set_pc_im_aarch64
 #define helper_advsimd_acge_f16 helper_advsimd_acge_f16_aarch64
 #define helper_advsimd_acgt_f16 helper_advsimd_acgt_f16_aarch64
+#define helper_advsimd_add2h helper_advsimd_add2h_aarch64
 #define helper_advsimd_addh helper_advsimd_addh_aarch64
 #define helper_advsimd_ceq_f16 helper_advsimd_ceq_f16_aarch64
 #define helper_advsimd_cge_f16 helper_advsimd_cge_f16_aarch64
 #define helper_advsimd_cgt_f16 helper_advsimd_cgt_f16_aarch64
+#define helper_advsimd_div2h helper_advsimd_div2h_aarch64
 #define helper_advsimd_divh helper_advsimd_divh_aarch64
+#define helper_advsimd_max2h helper_advsimd_max2h_aarch64
 #define helper_advsimd_maxh helper_advsimd_maxh_aarch64
+#define helper_advsimd_maxnum2h helper_advsimd_maxnum2h_aarch64
 #define helper_advsimd_maxnumh helper_advsimd_maxnumh_aarch64
+#define helper_advsimd_min2h helper_advsimd_min2h_aarch64
 #define helper_advsimd_minh helper_advsimd_minh_aarch64
+#define helper_advsimd_minnum2h helper_advsimd_minnum2h_aarch64
 #define helper_advsimd_minnumh helper_advsimd_minnumh_aarch64
 #define helper_advsimd_muladdh helper_advsimd_muladdh_aarch64
+#define helper_advsimd_muladd2h helper_advsimd_muladd2h_aarch64
+#define helper_advsimd_mul2h helper_advsimd_mul2h_aarch64
 #define helper_advsimd_mulh helper_advsimd_mulh_aarch64
+#define helper_advsimd_mulx2h helper_advsimd_mulx2h_aarch64
 #define helper_advsimd_mulxh helper_advsimd_mulxh_aarch64
+#define helper_advsimd_sub2h helper_advsimd_sub2h_aarch64
 #define helper_advsimd_subh helper_advsimd_subh_aarch64
 #define helper_crc32_64 helper_crc32_64_aarch64
 #define helper_crc32c_64 helper_crc32c_64_aarch64
--- a/qemu/aarch64eb.h
+++ b/qemu/aarch64eb.h
@ -3722,18 +3722,28 @@
 #define gen_a64_set_pc_im gen_a64_set_pc_im_aarch64eb
 #define helper_advsimd_acge_f16 helper_advsimd_acge_f16_aarch64eb
 #define helper_advsimd_acgt_f16 helper_advsimd_acgt_f16_aarch64eb
+#define helper_advsimd_add2h helper_advsimd_add2h_aarch64eb
 #define helper_advsimd_addh helper_advsimd_addh_aarch64eb
 #define helper_advsimd_ceq_f16 helper_advsimd_ceq_f16_aarch64eb
 #define helper_advsimd_cge_f16 helper_advsimd_cge_f16_aarch64eb
 #define helper_advsimd_cgt_f16 helper_advsimd_cgt_f16_aarch64eb
+#define helper_advsimd_div2h helper_advsimd_div2h_aarch64eb
 #define helper_advsimd_divh helper_advsimd_divh_aarch64eb
+#define helper_advsimd_max2h helper_advsimd_max2h_aarch64eb
 #define helper_advsimd_maxh helper_advsimd_maxh_aarch64eb
+#define helper_advsimd_maxnum2h helper_advsimd_maxnum2h_aarch64eb
 #define helper_advsimd_maxnumh helper_advsimd_maxnumh_aarch64eb
+#define helper_advsimd_min2h helper_advsimd_min2h_aarch64eb
 #define helper_advsimd_minh helper_advsimd_minh_aarch64eb
+#define helper_advsimd_minnum2h helper_advsimd_minnum2h_aarch64eb
 #define helper_advsimd_minnumh helper_advsimd_minnumh_aarch64eb
 #define helper_advsimd_muladdh helper_advsimd_muladdh_aarch64eb
+#define helper_advsimd_muladd2h helper_advsimd_muladd2h_aarch64eb
+#define helper_advsimd_mul2h helper_advsimd_mul2h_aarch64eb
 #define helper_advsimd_mulh helper_advsimd_mulh_aarch64eb
+#define helper_advsimd_mulx2h helper_advsimd_mulx2h_aarch64eb
 #define helper_advsimd_mulxh helper_advsimd_mulxh_aarch64eb
+#define helper_advsimd_sub2h helper_advsimd_sub2h_aarch64eb
 #define helper_advsimd_subh helper_advsimd_subh_aarch64eb
 #define helper_crc32_64 helper_crc32_64_aarch64eb
 #define helper_crc32c_64 helper_crc32c_64_aarch64eb
--- a/qemu/header_gen.py
+++ b/qemu/header_gen.py
@ -3742,18 +3742,28 @@ aarch64_symbols = (
    'gen_a64_set_pc_im',
    'helper_advsimd_acge_f16',
    'helper_advsimd_acgt_f16',
+    'helper_advsimd_add2h',
    'helper_advsimd_addh',
    'helper_advsimd_ceq_f16',
    'helper_advsimd_cge_f16',
    'helper_advsimd_cgt_f16',
+    'helper_advsimd_div2h',
    'helper_advsimd_divh',
+    'helper_advsimd_max2h',
    'helper_advsimd_maxh',
+    'helper_advsimd_maxnum2h',
    'helper_advsimd_maxnumh',
+    'helper_advsimd_min2h',
    'helper_advsimd_minh',
+    'helper_advsimd_minnum2h',
    'helper_advsimd_minnumh',
    'helper_advsimd_muladdh',
+    'helper_advsimd_muladd2h',
+    'helper_advsimd_mul2h',
    'helper_advsimd_mulh',
+    'helper_advsimd_mulx2h',
    'helper_advsimd_mulxh',
+    'helper_advsimd_sub2h',
    'helper_advsimd_subh',
    'helper_crc32_64',
    'helper_crc32c_64',
--- a/qemu/target/arm/helper-a64.c
+++ b/qemu/target/arm/helper-a64.c
@ -675,8 +675,32 @@ ADVSIMD_HALFOP(max)
 ADVSIMD_HALFOP(minnum)
 ADVSIMD_HALFOP(maxnum)

+#define ADVSIMD_TWOHALFOP(name)                                         \
+uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
+{ \
+    float16  a1, a2, b1, b2;                        \
+    uint32_t r1, r2;                                \
+    float_status *fpst = fpstp;                     \
+    a1 = extract32(two_a, 0, 16);                   \
+    a2 = extract32(two_a, 16, 16);                  \
+    b1 = extract32(two_b, 0, 16);                   \
+    b2 = extract32(two_b, 16, 16);                  \
+    r1 = float16_ ## name(a1, b1, fpst);            \
+    r2 = float16_ ## name(a2, b2, fpst);            \
+    return deposit32(r1, 16, 16, r2);               \
+}
+
+ADVSIMD_TWOHALFOP(add)
+ADVSIMD_TWOHALFOP(sub)
+ADVSIMD_TWOHALFOP(mul)
+ADVSIMD_TWOHALFOP(div)
+ADVSIMD_TWOHALFOP(min)
+ADVSIMD_TWOHALFOP(max)
+ADVSIMD_TWOHALFOP(minnum)
+ADVSIMD_TWOHALFOP(maxnum)
+
 /* Data processing - scalar floating-point and advanced SIMD */
-float16 HELPER(advsimd_mulxh)(float16 a, float16 b, void *fpstp)
+static float16 float16_mulx(float16 a, float16 b, void *fpstp)
 {
    float_status *fpst = fpstp;

@ -692,6 +716,9 @@ float16 HELPER(advsimd_mulxh)(float16 a, float16 b, void *fpstp)
    return float16_mul(a, b, fpst);
 }

+ADVSIMD_HALFOP(mulx)
+ADVSIMD_TWOHALFOP(mulx)
+
 /* fused multiply-accumulate */
 float16 HELPER(advsimd_muladdh)(float16 a, float16 b, float16 c, void *fpstp)
 {
@ -699,6 +726,23 @@ float16 HELPER(advsimd_muladdh)(float16 a, float16 b, float16 c, void *fpstp)
    return float16_muladd(a, b, c, 0, fpst);
 }

+uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
+                                  uint32_t two_c, void *fpstp)
+{
+    float_status *fpst = fpstp;
+    float16  a1, a2, b1, b2, c1, c2;
+    uint32_t r1, r2;
+    a1 = extract32(two_a, 0, 16);
+    a2 = extract32(two_a, 16, 16);
+    b1 = extract32(two_b, 0, 16);
+    b2 = extract32(two_b, 16, 16);
+    c1 = extract32(two_c, 0, 16);
+    c2 = extract32(two_c, 16, 16);
+    r1 = float16_muladd(a1, b1, c1, 0, fpst);
+    r2 = float16_muladd(a2, b2, c2, 0, fpst);
+    return deposit32(r1, 16, 16, r2);
+}
+
 /*
 * Floating point comparisons produce an integer result. Softfloat
 * routines return float_relation types which we convert to the 0/-1
--- a/qemu/target/arm/helper-a64.h
+++ b/qemu/target/arm/helper-a64.h
@ -61,3 +61,13 @@ DEF_HELPER_3(advsimd_acge_f16, i32, f16, f16, ptr)
 DEF_HELPER_3(advsimd_acgt_f16, i32, f16, f16, ptr)
 DEF_HELPER_3(advsimd_mulxh, f16, f16, f16, ptr)
 DEF_HELPER_4(advsimd_muladdh, f16, f16, f16, f16, ptr)
+DEF_HELPER_3(advsimd_add2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_sub2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_mul2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_div2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_max2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_min2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_maxnum2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_minnum2h, i32, i32, i32, ptr)
+DEF_HELPER_3(advsimd_mulx2h, i32, i32, i32, ptr)
+DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, ptr)
--- a/qemu/target/arm/translate-a64.c
+++ b/qemu/target/arm/translate-a64.c
@ -11574,8 +11574,13 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                         * multiply-add */
                        tcg_gen_xori_i32(tcg_ctx, tcg_op, tcg_op, 0x80008000);
                    }
-                    gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op, tcg_idx,
-                                               tcg_res, fpst);
+                    if (is_scalar) {
+                        gen_helper_advsimd_muladdh(tcg_ctx, tcg_res, tcg_op, tcg_idx,
+                                                   tcg_res, fpst);
+                    } else {
+                        gen_helper_advsimd_muladd2h(tcg_ctx, tcg_res, tcg_op, tcg_idx,
+                                                    tcg_res, fpst);
+                    }
                    break;
                case 2:
                    if (opcode == 0x5) {
@ -11594,10 +11599,21 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                switch (size) {
                case 1:
                    if (u) {
-                        gen_helper_advsimd_mulxh(tcg_ctx, tcg_res, tcg_op, tcg_idx,
-                                                 fpst);
+                        if (is_scalar) {
+                            gen_helper_advsimd_mulxh(tcg_ctx, tcg_res, tcg_op,
+                                                     tcg_idx, fpst);
+                        } else {
+                            gen_helper_advsimd_mulx2h(tcg_ctx, tcg_res, tcg_op,
+                                                      tcg_idx, fpst);
+                        }
                    } else {
-                        g_assert_not_reached();
+                        if (is_scalar) {
+                            gen_helper_advsimd_mulh(tcg_ctx, tcg_res, tcg_op,
+                                                    tcg_idx, fpst);
+                        } else {
+                            gen_helper_advsimd_mul2h(tcg_ctx, tcg_res, tcg_op,
+                                                     tcg_idx, fpst);
+                        }
                    }
                    break;
                case 2: