From 0bcd0ca93da7b1b5c564a474c5212edd7e7d12de Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 7 May 2020 10:35:43 -0400 Subject: [PATCH] tcg: Add load_dest parameter to GVecGen2 We have this same parameter for GVecGen2i, GVecGen3, and GVecGen3i. This will make some SVE2 insns easier to parameterize. Backports commit ac09ae627e9a2c65c8a452b69c3dac33c29d0719 from qemu --- qemu/tcg/tcg-op-gvec.c | 45 ++++++++++++++++++++++++++++++------------ qemu/tcg/tcg-op-gvec.h | 2 ++ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/qemu/tcg/tcg-op-gvec.c b/qemu/tcg/tcg-op-gvec.c index 6e1171b6..8f9c7a96 100644 --- a/qemu/tcg/tcg-op-gvec.c +++ b/qemu/tcg/tcg-op-gvec.c @@ -664,17 +664,22 @@ static void expand_clr(TCGContext *s, uint32_t dofs, uint32_t maxsz) /* Expand OPSZ bytes worth of two-operand operations using i32 elements. */ static void expand_2_i32(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - void (*fni)(TCGContext *, TCGv_i32, TCGv_i32)) + bool load_dest, void (*fni)(TCGContext *, TCGv_i32, TCGv_i32)) { TCGv_i32 t0 = tcg_temp_new_i32(s); + TCGv_i32 t1 = tcg_temp_new_i32(s); uint32_t i; for (i = 0; i < oprsz; i += 4) { tcg_gen_ld_i32(s, t0, s->cpu_env, aofs + i); - fni(s, t0, t0); - tcg_gen_st_i32(s, t0, s->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_i32(s, t1, s->cpu_env, dofs + i); + } + fni(s, t1, t0); + tcg_gen_st_i32(s, t1, s->cpu_env, dofs + i); } tcg_temp_free_i32(s, t0); + tcg_temp_free_i32(s, t1); } static void expand_2i_i32(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz, @@ -794,17 +799,22 @@ static void expand_4_i32(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t b /* Expand OPSZ bytes worth of two-operand operations using i64 elements. */ static void expand_2_i64(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - void (*fni)(TCGContext *, TCGv_i64, TCGv_i64)) + bool load_dest, void (*fni)(TCGContext *, TCGv_i64, TCGv_i64)) { TCGv_i64 t0 = tcg_temp_new_i64(s); + TCGv_i64 t1 = tcg_temp_new_i64(s); uint32_t i; for (i = 0; i < oprsz; i += 8) { tcg_gen_ld_i64(s, t0, s->cpu_env, aofs + i); - fni(s, t0, t0); - tcg_gen_st_i64(s, t0, s->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_i64(s, t1, s->cpu_env, dofs + i); + } + fni(s, t1, t0); + tcg_gen_st_i64(s, t1, s->cpu_env, dofs + i); } tcg_temp_free_i64(s, t0); + tcg_temp_free_i64(s, t1); } static void expand_2i_i64(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz, @@ -925,17 +935,23 @@ static void expand_4_i64(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t b /* Expand OPSZ bytes worth of two-operand operations using host vectors. */ static void expand_2_vec(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t tysz, TCGType type, + bool load_dest, void (*fni)(TCGContext *, unsigned, TCGv_vec, TCGv_vec)) { TCGv_vec t0 = tcg_temp_new_vec(s, type); + TCGv_vec t1 = tcg_temp_new_vec(s, type); uint32_t i; for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(s, t0, s->cpu_env, aofs + i); - fni(s, vece, t0, t0); - tcg_gen_st_vec(s, t0, s->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_vec(s, t1, s->cpu_env, dofs + i); + } + fni(s, vece, t1, t0); + tcg_gen_st_vec(s, t1, s->cpu_env, dofs + i); } tcg_temp_free_vec(s, t0); + tcg_temp_free_vec(s, t1); } /* Expand OPSZ bytes worth of two-vector operands and an immediate operand @@ -1089,7 +1105,8 @@ void tcg_gen_gvec_2(TCGContext *s, uint32_t dofs, uint32_t aofs, * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ some = QEMU_ALIGN_DOWN(oprsz, 32); - expand_2_vec(s, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv); + expand_2_vec(s, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, + g->load_dest, g->fniv); if (some == oprsz) { break; } @@ -1099,17 +1116,19 @@ void tcg_gen_gvec_2(TCGContext *s, uint32_t dofs, uint32_t aofs, maxsz -= some; /* fallthru */ case TCG_TYPE_V128: - expand_2_vec(s, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv); + expand_2_vec(s, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, + g->load_dest, g->fniv); break; case TCG_TYPE_V64: - expand_2_vec(s, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv); + expand_2_vec(s, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, + g->load_dest, g->fniv); break; case 0: if (g->fni8 && check_size_impl(oprsz, 8)) { - expand_2_i64(s, dofs, aofs, oprsz, g->fni8); + expand_2_i64(s, dofs, aofs, oprsz, g->load_dest, g->fni8); } else if (g->fni4 && check_size_impl(oprsz, 4)) { - expand_2_i32(s, dofs, aofs, oprsz, g->fni4); + expand_2_i32(s, dofs, aofs, oprsz, g->load_dest, g->fni4); } else { assert(g->fno != NULL); tcg_gen_gvec_2_ool(s, dofs, aofs, oprsz, maxsz, g->data, g->fno); diff --git a/qemu/tcg/tcg-op-gvec.h b/qemu/tcg/tcg-op-gvec.h index 24b5ee30..ec8db68b 100644 --- a/qemu/tcg/tcg-op-gvec.h +++ b/qemu/tcg/tcg-op-gvec.h @@ -109,6 +109,8 @@ typedef struct { uint8_t vece; /* Prefer i64 to v64. */ bool prefer_i64; + /* Load dest as a 2nd source operand. */ + bool load_dest; } GVecGen2; typedef struct {