tcg: Add load_dest parameter to GVecGen2

We have this same parameter for GVecGen2i, GVecGen3, and GVecGen3i. This will make some SVE2 insns easier to parameterize. Backports commit ac09ae627e9a2c65c8a452b69c3dac33c29d0719 from qemu
2026-05-03 21:53:18 +00:00 · 2020-05-07 10:35:43 -04:00 · 2020-05-07 10:35:43 -04:00 · 0bcd0ca93d
parent f02f71f38f
commit 0bcd0ca93d
2 changed files with 34 additions and 13 deletions
--- a/qemu/tcg/tcg-op-gvec.c
+++ b/qemu/tcg/tcg-op-gvec.c
@ -664,17 +664,22 @@ static void expand_clr(TCGContext *s, uint32_t dofs, uint32_t maxsz)

 /* Expand OPSZ bytes worth of two-operand operations using i32 elements.  */
 static void expand_2_i32(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz,
-                         void (*fni)(TCGContext *, TCGv_i32, TCGv_i32))
+                         bool load_dest, void (*fni)(TCGContext *, TCGv_i32, TCGv_i32))
 {
    TCGv_i32 t0 = tcg_temp_new_i32(s);
+    TCGv_i32 t1 = tcg_temp_new_i32(s);
    uint32_t i;

    for (i = 0; i < oprsz; i += 4) {
        tcg_gen_ld_i32(s, t0, s->cpu_env, aofs + i);
-        fni(s, t0, t0);
-        tcg_gen_st_i32(s, t0, s->cpu_env, dofs + i);
+        if (load_dest) {
+            tcg_gen_ld_i32(s, t1, s->cpu_env, dofs + i);
+        }
+        fni(s, t1, t0);
+        tcg_gen_st_i32(s, t1, s->cpu_env, dofs + i);
    }
    tcg_temp_free_i32(s, t0);
+    tcg_temp_free_i32(s, t1);
 }

 static void expand_2i_i32(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz,
@ -794,17 +799,22 @@ static void expand_4_i32(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t b

 /* Expand OPSZ bytes worth of two-operand operations using i64 elements.  */
 static void expand_2_i64(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz,
-                         void (*fni)(TCGContext *, TCGv_i64, TCGv_i64))
+                         bool load_dest, void (*fni)(TCGContext *, TCGv_i64, TCGv_i64))
 {
    TCGv_i64 t0 = tcg_temp_new_i64(s);
+    TCGv_i64 t1 = tcg_temp_new_i64(s);
    uint32_t i;

    for (i = 0; i < oprsz; i += 8) {
        tcg_gen_ld_i64(s, t0, s->cpu_env, aofs + i);
-        fni(s, t0, t0);
-        tcg_gen_st_i64(s, t0, s->cpu_env, dofs + i);
+        if (load_dest) {
+            tcg_gen_ld_i64(s, t1, s->cpu_env, dofs + i);
+        }
+        fni(s, t1, t0);
+        tcg_gen_st_i64(s, t1, s->cpu_env, dofs + i);
    }
    tcg_temp_free_i64(s, t0);
+    tcg_temp_free_i64(s, t1);
 }

 static void expand_2i_i64(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz,
@ -925,17 +935,23 @@ static void expand_4_i64(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t b
 /* Expand OPSZ bytes worth of two-operand operations using host vectors.  */
 static void expand_2_vec(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs,
                         uint32_t oprsz, uint32_t tysz, TCGType type,
+                         bool load_dest,
                         void (*fni)(TCGContext *, unsigned, TCGv_vec, TCGv_vec))
 {
    TCGv_vec t0 = tcg_temp_new_vec(s, type);
+    TCGv_vec t1 = tcg_temp_new_vec(s, type);
    uint32_t i;

    for (i = 0; i < oprsz; i += tysz) {
        tcg_gen_ld_vec(s, t0, s->cpu_env, aofs + i);
-        fni(s, vece, t0, t0);
-        tcg_gen_st_vec(s, t0, s->cpu_env, dofs + i);
+        if (load_dest) {
+            tcg_gen_ld_vec(s, t1, s->cpu_env, dofs + i);
+        }
+        fni(s, vece, t1, t0);
+        tcg_gen_st_vec(s, t1, s->cpu_env, dofs + i);
    }
    tcg_temp_free_vec(s, t0);
+    tcg_temp_free_vec(s, t1);
 }

 /* Expand OPSZ bytes worth of two-vector operands and an immediate operand
@ -1089,7 +1105,8 @@ void tcg_gen_gvec_2(TCGContext *s, uint32_t dofs, uint32_t aofs,
         * that e.g. size == 80 would be expanded with 2x32 + 1x16.
         */
        some = QEMU_ALIGN_DOWN(oprsz, 32);
-        expand_2_vec(s, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv);
+        expand_2_vec(s, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
+                     g->load_dest, g->fniv);
        if (some == oprsz) {
            break;
        }
@ -1099,17 +1116,19 @@ void tcg_gen_gvec_2(TCGContext *s, uint32_t dofs, uint32_t aofs,
        maxsz -= some;
        /* fallthru */
    case TCG_TYPE_V128:
-        expand_2_vec(s, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv);
+        expand_2_vec(s, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
+                     g->load_dest, g->fniv);
        break;
    case TCG_TYPE_V64:
-        expand_2_vec(s, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv);
+        expand_2_vec(s, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
+                     g->load_dest, g->fniv);
        break;

    case 0:
        if (g->fni8 && check_size_impl(oprsz, 8)) {
-            expand_2_i64(s, dofs, aofs, oprsz, g->fni8);
+            expand_2_i64(s, dofs, aofs, oprsz, g->load_dest, g->fni8);
        } else if (g->fni4 && check_size_impl(oprsz, 4)) {
-            expand_2_i32(s, dofs, aofs, oprsz, g->fni4);
+            expand_2_i32(s, dofs, aofs, oprsz, g->load_dest, g->fni4);
        } else {
            assert(g->fno != NULL);
            tcg_gen_gvec_2_ool(s, dofs, aofs, oprsz, maxsz, g->data, g->fno);
--- a/qemu/tcg/tcg-op-gvec.h
+++ b/qemu/tcg/tcg-op-gvec.h
@ -109,6 +109,8 @@ typedef struct {
    uint8_t vece;
    /* Prefer i64 to v64.  */
    bool prefer_i64;
+    /* Load dest as a 2nd source operand.  */
+    bool load_dest;
 } GVecGen2;

 typedef struct {