From 2da89a626cca21812033bbe0d332b69c32396ada Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Fri, 26 Feb 2021 14:20:18 -0500
Subject: [PATCH] target/arm: Merge helper_sve_clr_* and helper_sve_movz_*

---
 qemu/aarch64.h                  |  4 --
 qemu/aarch64eb.h                |  4 --
 qemu/header_gen.py              |  4 --
 qemu/target/arm/helper-sve.h    |  5 ---
 qemu/target/arm/sve_helper.c    | 70 +++++++--------------------------
 qemu/target/arm/translate-sve.c | 57 ++++++++++-----------------
 6 files changed, 35 insertions(+), 109 deletions(-)

diff --git a/qemu/aarch64.h b/qemu/aarch64.h
index de8b2080..7fd3b118 100644
--- a/qemu/aarch64.h
+++ b/qemu/aarch64.h
@@ -3738,10 +3738,6 @@
 #define helper_sve_brkpas helper_sve_brkpas_aarch64
 #define helper_sve_brkpb helper_sve_brkpb_aarch64
 #define helper_sve_brkpbs helper_sve_brkpbs_aarch64
-#define helper_sve_clr_b helper_sve_clr_b_aarch64
-#define helper_sve_clr_d helper_sve_clr_d_aarch64
-#define helper_sve_clr_h helper_sve_clr_h_aarch64
-#define helper_sve_clr_s helper_sve_clr_s_aarch64
 #define helper_sve_cls_b helper_sve_cls_b_aarch64
 #define helper_sve_cls_d helper_sve_cls_d_aarch64
 #define helper_sve_cls_h helper_sve_cls_h_aarch64
diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h
index 72efcf19..6d41dcd9 100644
--- a/qemu/aarch64eb.h
+++ b/qemu/aarch64eb.h
@@ -3738,10 +3738,6 @@
 #define helper_sve_brkpas helper_sve_brkpas_aarch64eb
 #define helper_sve_brkpb helper_sve_brkpb_aarch64eb
 #define helper_sve_brkpbs helper_sve_brkpbs_aarch64eb
-#define helper_sve_clr_b helper_sve_clr_b_aarch64eb
-#define helper_sve_clr_d helper_sve_clr_d_aarch64eb
-#define helper_sve_clr_h helper_sve_clr_h_aarch64eb
-#define helper_sve_clr_s helper_sve_clr_s_aarch64eb
 #define helper_sve_cls_b helper_sve_cls_b_aarch64eb
 #define helper_sve_cls_d helper_sve_cls_d_aarch64eb
 #define helper_sve_cls_h helper_sve_cls_h_aarch64eb
diff --git a/qemu/header_gen.py b/qemu/header_gen.py
index 4b28b803..6fc6a03a 100644
--- a/qemu/header_gen.py
+++ b/qemu/header_gen.py
@@ -3878,10 +3878,6 @@ aarch64_symbols = (
     'helper_sve_brkpas',
     'helper_sve_brkpb',
     'helper_sve_brkpbs',
-    'helper_sve_clr_b',
-    'helper_sve_clr_d',
-    'helper_sve_clr_h',
-    'helper_sve_clr_s',
     'helper_sve_cls_b',
     'helper_sve_cls_d',
     'helper_sve_cls_h',
diff --git a/qemu/target/arm/helper-sve.h b/qemu/target/arm/helper-sve.h
index 63c4a087..4411c471 100644
--- a/qemu/target/arm/helper-sve.h
+++ b/qemu/target/arm/helper-sve.h
@@ -269,11 +269,6 @@ DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
 
-DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
 DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/qemu/target/arm/sve_helper.c b/qemu/target/arm/sve_helper.c
index 5ea0d1a8..e0e3248e 100644
--- a/qemu/target/arm/sve_helper.c
+++ b/qemu/target/arm/sve_helper.c
@@ -955,85 +955,43 @@ uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc)
     return flags;
 }
 
-/* Store zero into every active element of Zd.  We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result.  In particular, logical shift by element size, which is
- * otherwise undefined on the host.
- *
- * For element sizes smaller than uint64_t, we use tables to expand
- * the N bits of the controlling predicate to a byte mask, and clear
- * those bytes.
+/*
+ * Copy Zn into Zd, and store zero into inactive elements.
+ * If inv, store zeros into the active elements.
  */
-void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc)
-{
-    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
-    uint64_t *d = vd;
-    uint8_t *pg = vg;
-    for (i = 0; i < opr_sz; i += 1) {
-        d[i] &= ~expand_pred_b(pg[H1(i)]);
-    }
-}
-
-void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc)
-{
-    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
-    uint64_t *d = vd;
-    uint8_t *pg = vg;
-    for (i = 0; i < opr_sz; i += 1) {
-        d[i] &= ~expand_pred_h(pg[H1(i)]);
-    }
-}
-
-void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc)
-{
-    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
-    uint64_t *d = vd;
-    uint8_t *pg = vg;
-    for (i = 0; i < opr_sz; i += 1) {
-        d[i] &= ~expand_pred_s(pg[H1(i)]);
-    }
-}
-
-void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc)
-{
-    intptr_t i, opr_sz = simd_oprsz(desc) / 8;
-    uint64_t *d = vd;
-    uint8_t *pg = vg;
-    for (i = 0; i < opr_sz; i += 1) {
-        if (pg[H1(i)] & 1) {
-            d[i] = 0;
-        }
-    }
-}
-
-/* Copy Zn into Zd, and store zero into inactive elements.  */
 void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
     uint64_t *d = vd, *n = vn;
     uint8_t *pg = vg;
+
     for (i = 0; i < opr_sz; i += 1) {
-        d[i] = n[i] & expand_pred_b(pg[H1(i)]);
+        d[i] = n[i] & (expand_pred_b(pg[H1(i)]) ^ inv);
     }
 }
 
 void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
     uint64_t *d = vd, *n = vn;
     uint8_t *pg = vg;
+
     for (i = 0; i < opr_sz; i += 1) {
-        d[i] = n[i] & expand_pred_h(pg[H1(i)]);
+        d[i] = n[i] & (expand_pred_h(pg[H1(i)]) ^ inv);
     }
 }
 
 void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc)
 {
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+    uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
     uint64_t *d = vd, *n = vn;
     uint8_t *pg = vg;
+
     for (i = 0; i < opr_sz; i += 1) {
-        d[i] = n[i] & expand_pred_s(pg[H1(i)]);
+        d[i] = n[i] & (expand_pred_s(pg[H1(i)]) ^ inv);
     }
 }
 
@@ -1042,8 +1000,10 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc)
     intptr_t i, opr_sz = simd_oprsz(desc) / 8;
     uint64_t *d = vd, *n = vn;
     uint8_t *pg = vg;
+    uint8_t inv = simd_data(desc);
+
     for (i = 0; i < opr_sz; i += 1) {
-        d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1);
+        d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1);
     }
 }
 
diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c
index f3ce370f..3912eb3f 100644
--- a/qemu/target/arm/translate-sve.c
+++ b/qemu/target/arm/translate-sve.c
@@ -598,39 +598,27 @@ static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
  *** SVE Shift by Immediate - Predicated Group
  */
 
-/* Store zero into every active element of Zd.  We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result.
+/*
+ * Copy Zn into Zd, storing zeros into inactive elements.
+ * If invert, store zeros into the active elements.
  */
-static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
-{
-    static gen_helper_gvec_2 * const fns[4] = {
-        gen_helper_sve_clr_b, gen_helper_sve_clr_h,
-        gen_helper_sve_clr_s, gen_helper_sve_clr_d,
-    };
-    if (sve_access_check(s)) {
-        TCGContext *tcg_ctx = s->uc->tcg_ctx;
-        unsigned vsz = vec_full_reg_size(s);
-        tcg_gen_gvec_2_ool(tcg_ctx, vec_full_reg_offset(s, rd),
-                           pred_full_reg_offset(s, pg),
-                           vsz, vsz, 0, fns[esz]);
-    }
-    return true;
-}
-
-/* Copy Zn into Zd, storing zeros into inactive elements.  */
-static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
+static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
+                        int esz, bool invert)
 {
     static gen_helper_gvec_3 * const fns[4] = {
         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
     };
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    unsigned vsz = vec_full_reg_size(s);
-    tcg_gen_gvec_3_ool(tcg_ctx, vec_full_reg_offset(s, rd),
-                       vec_full_reg_offset(s, rn),
-                       pred_full_reg_offset(s, pg),
-                       vsz, vsz, 0, fns[esz]);
+
+    if (sve_access_check(s)) {
+        TCGContext *tcg_ctx = s->uc->tcg_ctx;
+        unsigned vsz = vec_full_reg_size(s);
+        tcg_gen_gvec_3_ool(tcg_ctx, vec_full_reg_offset(s, rd),
+                           vec_full_reg_offset(s, rn),
+                           pred_full_reg_offset(s, pg),
+                           vsz, vsz, invert, fns[esz]);
+    }
+    return true;
 }
 
 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
@@ -675,7 +663,7 @@ static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
     /* Shift by element size is architecturally valid.
        For logical shifts, it is a zeroing operation.  */
     if (a->imm >= (8 << a->esz)) {
-        return do_clr_zp(s, a->rd, a->pg, a->esz);
+        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
     } else {
         return do_zpzi_ool(s, a, fns[a->esz]);
     }
@@ -693,7 +681,7 @@ static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
     /* Shift by element size is architecturally valid.
        For logical shifts, it is a zeroing operation.  */
     if (a->imm >= (8 << a->esz)) {
-        return do_clr_zp(s, a->rd, a->pg, a->esz);
+        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
     } else {
         return do_zpzi_ool(s, a, fns[a->esz]);
     }
@@ -711,7 +699,7 @@ static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
     /* Shift by element size is architecturally valid.  For arithmetic
        right shift for division, it is a zeroing operation.  */
     if (a->imm >= (8 << a->esz)) {
-        return do_clr_zp(s, a->rd, a->pg, a->esz);
+        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
     } else {
         return do_zpzi_ool(s, a, fns[a->esz]);
     }
@@ -5170,9 +5158,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
     tcg_temp_free_i64(tcg_ctx, temp);
 
     /* Zero the inactive elements.  */
-    gen_set_label(tcg_ctx, over);
-    do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
-    return true;
+    return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
 }
 
 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
@@ -5962,8 +5948,5 @@ static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
 
 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
 {
-    if (sve_access_check(s)) {
-        do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
-    }
-    return true;
+    return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
 }