mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-01-11 12:55:29 +00:00
target/arm: Hoist address increment for vector memory ops
This can reduce the number of opcodes required for certain complex forms of load-multiple (e.g. ld4.16b). Backports commit a7d8143aed2268f147cc1abfebc848ed6282a313 from qemu
This commit is contained in:
parent
d60fe610bb
commit
9bbc412c66
|
@ -3087,7 +3087,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
|
||||||
bool is_store = !extract32(insn, 22, 1);
|
bool is_store = !extract32(insn, 22, 1);
|
||||||
bool is_postidx = extract32(insn, 23, 1);
|
bool is_postidx = extract32(insn, 23, 1);
|
||||||
bool is_q = extract32(insn, 30, 1);
|
bool is_q = extract32(insn, 30, 1);
|
||||||
TCGv_i64 tcg_addr, tcg_rn;
|
TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes;
|
||||||
|
|
||||||
int ebytes = 1 << size;
|
int ebytes = 1 << size;
|
||||||
int elements = (is_q ? 128 : 64) / (8 << size);
|
int elements = (is_q ? 128 : 64) / (8 << size);
|
||||||
|
@ -3152,6 +3152,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
|
||||||
tcg_rn = cpu_reg_sp(s, rn);
|
tcg_rn = cpu_reg_sp(s, rn);
|
||||||
tcg_addr = tcg_temp_new_i64(tcg_ctx);
|
tcg_addr = tcg_temp_new_i64(tcg_ctx);
|
||||||
tcg_gen_mov_i64(tcg_ctx, tcg_addr, tcg_rn);
|
tcg_gen_mov_i64(tcg_ctx, tcg_addr, tcg_rn);
|
||||||
|
tcg_ebytes = tcg_const_i64(tcg_ctx, ebytes);
|
||||||
|
|
||||||
for (r = 0; r < rpt; r++) {
|
for (r = 0; r < rpt; r++) {
|
||||||
int e;
|
int e;
|
||||||
|
@ -3176,7 +3177,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
|
||||||
clear_vec_high(s, is_q, tt);
|
clear_vec_high(s, is_q, tt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, ebytes);
|
tcg_gen_add_i64(tcg_ctx, tcg_addr, tcg_addr, tcg_ebytes);
|
||||||
tt = (tt + 1) % 32;
|
tt = (tt + 1) % 32;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3190,6 +3191,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
|
||||||
tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm));
|
tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
tcg_temp_free_i64(tcg_ctx, tcg_ebytes);
|
||||||
tcg_temp_free_i64(tcg_ctx, tcg_addr);
|
tcg_temp_free_i64(tcg_ctx, tcg_addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3233,7 +3235,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
|
||||||
bool replicate = false;
|
bool replicate = false;
|
||||||
int index = is_q << 3 | S << 2 | size;
|
int index = is_q << 3 | S << 2 | size;
|
||||||
int ebytes, xs;
|
int ebytes, xs;
|
||||||
TCGv_i64 tcg_addr, tcg_rn;
|
TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes;
|
||||||
|
|
||||||
switch (scale) {
|
switch (scale) {
|
||||||
case 3:
|
case 3:
|
||||||
|
@ -3286,6 +3288,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
|
||||||
tcg_rn = cpu_reg_sp(s, rn);
|
tcg_rn = cpu_reg_sp(s, rn);
|
||||||
tcg_addr = tcg_temp_new_i64(tcg_ctx);
|
tcg_addr = tcg_temp_new_i64(tcg_ctx);
|
||||||
tcg_gen_mov_i64(tcg_ctx, tcg_addr, tcg_rn);
|
tcg_gen_mov_i64(tcg_ctx, tcg_addr, tcg_rn);
|
||||||
|
tcg_ebytes = tcg_const_i64(tcg_ctx, ebytes);
|
||||||
|
|
||||||
for (xs = 0; xs < selem; xs++) {
|
for (xs = 0; xs < selem; xs++) {
|
||||||
if (replicate) {
|
if (replicate) {
|
||||||
|
@ -3328,7 +3331,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
|
||||||
do_vec_st(s, rt, index, tcg_addr, scale);
|
do_vec_st(s, rt, index, tcg_addr, scale);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tcg_gen_addi_i64(tcg_ctx, tcg_addr, tcg_addr, ebytes);
|
tcg_gen_add_i64(tcg_ctx, tcg_addr, tcg_addr, tcg_ebytes);
|
||||||
rt = (rt + 1) % 32;
|
rt = (rt + 1) % 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3340,6 +3343,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
|
||||||
tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm));
|
tcg_gen_add_i64(tcg_ctx, tcg_rn, tcg_rn, cpu_reg(s, rm));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
tcg_temp_free_i64(tcg_ctx, tcg_ebytes);
|
||||||
tcg_temp_free_i64(tcg_ctx, tcg_addr);
|
tcg_temp_free_i64(tcg_ctx, tcg_addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue