From 9bbc412c66290685058ac076a176a45834978873 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 10 Nov 2018 09:39:10 -0500 Subject: [PATCH] target/arm: Hoist address increment for vector memory ops This can reduce the number of opcodes required for certain complex forms of load-multiple (e.g. ld4.16b). Backports commit a7d8143aed2268f147cc1abfebc848ed6282a313 from qemu --- qemu/target/arm/translate-a64.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index 8f99d627..ac96d11d 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -3087,7 +3087,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) bool is_store = !extract32(insn, 22, 1); bool is_postidx = extract32(insn, 23, 1); bool is_q = extract32(insn, 30, 1); - TCGv_i64 tcg_addr, tcg_rn; + TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes; int ebytes = 1 << size; int elements = (is_q ? 128 : 64) / (8 << size); @@ -3152,6 +3152,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) tcg_rn = cpu_reg_sp(s, rn); tcg_addr = tcg_temp_new_i64(tcg_ctx); tcg_gen_mov_i64(tcg_ctx, tcg_addr, tcg_rn); + tcg_ebytes = tcg_const_i64(tcg_ctx, ebytes); for (r = 0; r < rpt; r++) { int e; @@ -3176,7 +3177,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) clear_vec_high(s, is_q, tt); } } - tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, ebytes); + tcg_gen_add_i64(tcg_ctx, tcg_addr, tcg_addr, tcg_ebytes); tt = (tt + 1) % 32; } } @@ -3190,6 +3191,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm)); } } + tcg_temp_free_i64(tcg_ctx, tcg_ebytes); tcg_temp_free_i64(tcg_ctx, tcg_addr); } @@ -3233,7 +3235,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) bool replicate = false; int index = is_q << 3 | S << 2 | size; int ebytes, xs; - TCGv_i64 tcg_addr, tcg_rn; + TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes; switch (scale) { case 3: @@ -3286,6 +3288,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_rn = cpu_reg_sp(s, rn); tcg_addr = tcg_temp_new_i64(tcg_ctx); tcg_gen_mov_i64(tcg_ctx, tcg_addr, tcg_rn); + tcg_ebytes = tcg_const_i64(tcg_ctx, ebytes); for (xs = 0; xs < selem; xs++) { if (replicate) { @@ -3328,7 +3331,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) do_vec_st(s, rt, index, tcg_addr, scale); } } - tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, ebytes); + tcg_gen_add_i64(tcg_ctx, tcg_addr, tcg_addr, tcg_ebytes); rt = (rt + 1) % 32; } @@ -3340,6 +3343,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm)); } } + tcg_temp_free_i64(tcg_ctx, tcg_ebytes); tcg_temp_free_i64(tcg_ctx, tcg_addr); }