From 9263117d47e608487b202fec22b8dca6710b7356 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 2 Mar 2021 12:46:51 -0500 Subject: [PATCH] target/arm: Simplify do_long_3d and do_2scalar_long In both cases, we can sink the write-back and perform the accumulate into the normal destination temps Backports 9f1a5f93c2dd345dc6c8fe86ed14bf1485056f6e --- qemu/target/arm/translate-neon.inc.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/qemu/target/arm/translate-neon.inc.c b/qemu/target/arm/translate-neon.inc.c index e58d348c..189fee83 100644 --- a/qemu/target/arm/translate-neon.inc.c +++ b/qemu/target/arm/translate-neon.inc.c @@ -2069,17 +2069,14 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a, if (accfn) { tmp = tcg_temp_new_i64(tcg_ctx); read_neon_element64(s, tmp, a->vd, 0, MO_64); - accfn(tcg_ctx, tmp, tmp, rd0); - write_neon_element64(s, tmp, a->vd, 0, MO_64); + accfn(tcg_ctx, rd0, tmp, rd0); read_neon_element64(s, tmp, a->vd, 1, MO_64); - accfn(tcg_ctx, tmp, tmp, rd1); - write_neon_element64(s, tmp, a->vd, 1, MO_64); + accfn(tcg_ctx, rd1, tmp, rd1); tcg_temp_free_i64(tcg_ctx, tmp); - } else { - write_neon_element64(s, rd0, a->vd, 0, MO_64); - write_neon_element64(s, rd1, a->vd, 1, MO_64); } + write_neon_element64(s, rd0, a->vd, 0, MO_64); + write_neon_element64(s, rd1, a->vd, 1, MO_64); tcg_temp_free_i64(tcg_ctx, rd0); tcg_temp_free_i64(tcg_ctx, rd1); @@ -2709,16 +2706,14 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, if (accfn) { TCGv_i64 t64 = tcg_temp_new_i64(tcg_ctx); read_neon_element64(s, t64, a->vd, 0, MO_64); - accfn(tcg_ctx, t64, t64, rn0_64); - write_neon_element64(s, t64, a->vd, 0, MO_64); + accfn(tcg_ctx, rn0_64, t64, rn0_64); read_neon_element64(s, t64, a->vd, 1, MO_64); - accfn(tcg_ctx, t64, t64, rn1_64); - write_neon_element64(s, t64, a->vd, 1, MO_64); + accfn(tcg_ctx, rn1_64, t64, rn1_64); tcg_temp_free_i64(tcg_ctx, t64); - } else { - write_neon_element64(s, rn0_64, a->vd, 0, MO_64); - write_neon_element64(s, rn1_64, a->vd, 1, MO_64); } + + write_neon_element64(s, rn0_64, a->vd, 0, MO_64); + write_neon_element64(s, rn1_64, a->vd, 1, MO_64); tcg_temp_free_i64(tcg_ctx, rn0_64); tcg_temp_free_i64(tcg_ctx, rn1_64); return true;