diff --git a/qemu/tcg/tcg-op-gvec.c b/qemu/tcg/tcg-op-gvec.c index 6e1171b6..8f9c7a96 100644 --- a/qemu/tcg/tcg-op-gvec.c +++ b/qemu/tcg/tcg-op-gvec.c @@ -664,17 +664,22 @@ static void expand_clr(TCGContext *s, uint32_t dofs, uint32_t maxsz) /* Expand OPSZ bytes worth of two-operand operations using i32 elements. */ static void expand_2_i32(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - void (*fni)(TCGContext *, TCGv_i32, TCGv_i32)) + bool load_dest, void (*fni)(TCGContext *, TCGv_i32, TCGv_i32)) { TCGv_i32 t0 = tcg_temp_new_i32(s); + TCGv_i32 t1 = tcg_temp_new_i32(s); uint32_t i; for (i = 0; i < oprsz; i += 4) { tcg_gen_ld_i32(s, t0, s->cpu_env, aofs + i); - fni(s, t0, t0); - tcg_gen_st_i32(s, t0, s->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_i32(s, t1, s->cpu_env, dofs + i); + } + fni(s, t1, t0); + tcg_gen_st_i32(s, t1, s->cpu_env, dofs + i); } tcg_temp_free_i32(s, t0); + tcg_temp_free_i32(s, t1); } static void expand_2i_i32(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz, @@ -794,17 +799,22 @@ static void expand_4_i32(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t b /* Expand OPSZ bytes worth of two-operand operations using i64 elements. */ static void expand_2_i64(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz, - void (*fni)(TCGContext *, TCGv_i64, TCGv_i64)) + bool load_dest, void (*fni)(TCGContext *, TCGv_i64, TCGv_i64)) { TCGv_i64 t0 = tcg_temp_new_i64(s); + TCGv_i64 t1 = tcg_temp_new_i64(s); uint32_t i; for (i = 0; i < oprsz; i += 8) { tcg_gen_ld_i64(s, t0, s->cpu_env, aofs + i); - fni(s, t0, t0); - tcg_gen_st_i64(s, t0, s->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_i64(s, t1, s->cpu_env, dofs + i); + } + fni(s, t1, t0); + tcg_gen_st_i64(s, t1, s->cpu_env, dofs + i); } tcg_temp_free_i64(s, t0); + tcg_temp_free_i64(s, t1); } static void expand_2i_i64(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz, @@ -925,17 +935,23 @@ static void expand_4_i64(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t b /* Expand OPSZ bytes worth of two-operand operations using host vectors. */ static void expand_2_vec(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t tysz, TCGType type, + bool load_dest, void (*fni)(TCGContext *, unsigned, TCGv_vec, TCGv_vec)) { TCGv_vec t0 = tcg_temp_new_vec(s, type); + TCGv_vec t1 = tcg_temp_new_vec(s, type); uint32_t i; for (i = 0; i < oprsz; i += tysz) { tcg_gen_ld_vec(s, t0, s->cpu_env, aofs + i); - fni(s, vece, t0, t0); - tcg_gen_st_vec(s, t0, s->cpu_env, dofs + i); + if (load_dest) { + tcg_gen_ld_vec(s, t1, s->cpu_env, dofs + i); + } + fni(s, vece, t1, t0); + tcg_gen_st_vec(s, t1, s->cpu_env, dofs + i); } tcg_temp_free_vec(s, t0); + tcg_temp_free_vec(s, t1); } /* Expand OPSZ bytes worth of two-vector operands and an immediate operand @@ -1089,7 +1105,8 @@ void tcg_gen_gvec_2(TCGContext *s, uint32_t dofs, uint32_t aofs, * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ some = QEMU_ALIGN_DOWN(oprsz, 32); - expand_2_vec(s, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv); + expand_2_vec(s, g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, + g->load_dest, g->fniv); if (some == oprsz) { break; } @@ -1099,17 +1116,19 @@ void tcg_gen_gvec_2(TCGContext *s, uint32_t dofs, uint32_t aofs, maxsz -= some; /* fallthru */ case TCG_TYPE_V128: - expand_2_vec(s, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv); + expand_2_vec(s, g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, + g->load_dest, g->fniv); break; case TCG_TYPE_V64: - expand_2_vec(s, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv); + expand_2_vec(s, g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, + g->load_dest, g->fniv); break; case 0: if (g->fni8 && check_size_impl(oprsz, 8)) { - expand_2_i64(s, dofs, aofs, oprsz, g->fni8); + expand_2_i64(s, dofs, aofs, oprsz, g->load_dest, g->fni8); } else if (g->fni4 && check_size_impl(oprsz, 4)) { - expand_2_i32(s, dofs, aofs, oprsz, g->fni4); + expand_2_i32(s, dofs, aofs, oprsz, g->load_dest, g->fni4); } else { assert(g->fno != NULL); tcg_gen_gvec_2_ool(s, dofs, aofs, oprsz, maxsz, g->data, g->fno); diff --git a/qemu/tcg/tcg-op-gvec.h b/qemu/tcg/tcg-op-gvec.h index 24b5ee30..ec8db68b 100644 --- a/qemu/tcg/tcg-op-gvec.h +++ b/qemu/tcg/tcg-op-gvec.h @@ -109,6 +109,8 @@ typedef struct { uint8_t vece; /* Prefer i64 to v64. */ bool prefer_i64; + /* Load dest as a 2nd source operand. */ + bool load_dest; } GVecGen2; typedef struct {