mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-01-22 20:41:04 +00:00
target/arm: Simplify SMMLA, SMMLAR, SMMLS, SMMLSR
All of the inputs to these instructions are 32-bits. Rather than extend each input to 64-bits and then extract the high 32-bits of the output, use tcg_gen_muls2_i32 and other 32-bit generator functions. Backports commit 5f8cd06ebcf57420be8fea4574de2e074de46709 from qemu
This commit is contained in:
parent
4a1cc16eef
commit
93c016a3e7
|
@ -384,36 +384,6 @@ static void gen_revsh(DisasContext *s, TCGv_i32 var)
|
||||||
tcg_gen_ext16s_i32(tcg_ctx, var, var);
|
tcg_gen_ext16s_i32(tcg_ctx, var, var);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return (b << 32) + a. Mark inputs as dead */
|
|
||||||
static TCGv_i64 gen_addq_msw(DisasContext *s, TCGv_i64 a, TCGv_i32 b)
|
|
||||||
{
|
|
||||||
TCGContext *tcg_ctx = s->uc->tcg_ctx;
|
|
||||||
TCGv_i64 tmp64 = tcg_temp_new_i64(tcg_ctx);
|
|
||||||
|
|
||||||
tcg_gen_extu_i32_i64(tcg_ctx, tmp64, b);
|
|
||||||
tcg_temp_free_i32(tcg_ctx, b);
|
|
||||||
tcg_gen_shli_i64(tcg_ctx, tmp64, tmp64, 32);
|
|
||||||
tcg_gen_add_i64(tcg_ctx, a, tmp64, a);
|
|
||||||
|
|
||||||
tcg_temp_free_i64(tcg_ctx, tmp64);
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return (b << 32) - a. Mark inputs as dead. */
|
|
||||||
static TCGv_i64 gen_subq_msw(DisasContext *s, TCGv_i64 a, TCGv_i32 b)
|
|
||||||
{
|
|
||||||
TCGContext *tcg_ctx = s->uc->tcg_ctx;
|
|
||||||
TCGv_i64 tmp64 = tcg_temp_new_i64(tcg_ctx);
|
|
||||||
|
|
||||||
tcg_gen_extu_i32_i64(tcg_ctx, tmp64, b);
|
|
||||||
tcg_temp_free_i32(tcg_ctx, b);
|
|
||||||
tcg_gen_shli_i64(tcg_ctx, tmp64, tmp64, 32);
|
|
||||||
tcg_gen_sub_i64(tcg_ctx, a, tmp64, a);
|
|
||||||
|
|
||||||
tcg_temp_free_i64(tcg_ctx, tmp64);
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 32x32->64 multiply. Marks inputs as dead. */
|
/* 32x32->64 multiply. Marks inputs as dead. */
|
||||||
static TCGv_i64 gen_mulu_i64_i32(DisasContext *s, TCGv_i32 a, TCGv_i32 b)
|
static TCGv_i64 gen_mulu_i64_i32(DisasContext *s, TCGv_i32 a, TCGv_i32 b)
|
||||||
{
|
{
|
||||||
|
@ -9040,23 +9010,27 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
|
||||||
(SMMUL, SMMLA, SMMLS) */
|
(SMMUL, SMMLA, SMMLS) */
|
||||||
tmp = load_reg(s, rm);
|
tmp = load_reg(s, rm);
|
||||||
tmp2 = load_reg(s, rs);
|
tmp2 = load_reg(s, rs);
|
||||||
tmp64 = gen_muls_i64_i32(s, tmp, tmp2);
|
tcg_gen_muls2_i32(tcg_ctx, tmp2, tmp, tmp, tmp2);
|
||||||
|
|
||||||
if (rd != 15) {
|
if (rd != 15) {
|
||||||
tmp = load_reg(s, rd);
|
tmp3 = load_reg(s, rd);
|
||||||
if (insn & (1 << 6)) {
|
if (insn & (1 << 6)) {
|
||||||
tmp64 = gen_subq_msw(s, tmp64, tmp);
|
tcg_gen_sub_i32(tcg_ctx, tmp, tmp, tmp3);
|
||||||
} else {
|
} else {
|
||||||
tmp64 = gen_addq_msw(s, tmp64, tmp);
|
tcg_gen_add_i32(tcg_ctx, tmp, tmp, tmp3);
|
||||||
}
|
}
|
||||||
|
tcg_temp_free_i32(tcg_ctx, tmp3);
|
||||||
}
|
}
|
||||||
if (insn & (1 << 5)) {
|
if (insn & (1 << 5)) {
|
||||||
tcg_gen_addi_i64(tcg_ctx, tmp64, tmp64, 0x80000000u);
|
/*
|
||||||
|
* Adding 0x80000000 to the 64-bit quantity
|
||||||
|
* means that we have carry in to the high
|
||||||
|
* word when the low word has the high bit set.
|
||||||
|
*/
|
||||||
|
tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 31);
|
||||||
|
tcg_gen_add_i32(tcg_ctx, tmp, tmp, tmp2);
|
||||||
}
|
}
|
||||||
tcg_gen_shri_i64(tcg_ctx, tmp64, tmp64, 32);
|
tcg_temp_free_i32(tcg_ctx, tmp2);
|
||||||
tmp = tcg_temp_new_i32(tcg_ctx);
|
|
||||||
tcg_gen_extrl_i64_i32(tcg_ctx, tmp, tmp64);
|
|
||||||
tcg_temp_free_i64(tcg_ctx, tmp64);
|
|
||||||
store_reg(s, rn, tmp);
|
store_reg(s, rn, tmp);
|
||||||
break;
|
break;
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -10278,22 +10252,26 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
|
case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
|
||||||
tmp64 = gen_muls_i64_i32(s, tmp, tmp2);
|
tcg_gen_muls2_i32(tcg_ctx, tmp2, tmp, tmp, tmp2);
|
||||||
if (rs != 15) {
|
if (rs != 15) {
|
||||||
tmp = load_reg(s, rs);
|
tmp3 = load_reg(s, rs);
|
||||||
if (insn & (1 << 20)) {
|
if (insn & (1 << 20)) {
|
||||||
tmp64 = gen_addq_msw(s, tmp64, tmp);
|
tcg_gen_add_i32(tcg_ctx, tmp, tmp, tmp3);
|
||||||
} else {
|
} else {
|
||||||
tmp64 = gen_subq_msw(s, tmp64, tmp);
|
tcg_gen_sub_i32(tcg_ctx, tmp, tmp, tmp3);
|
||||||
}
|
}
|
||||||
|
tcg_temp_free_i32(tcg_ctx, tmp3);
|
||||||
}
|
}
|
||||||
if (insn & (1 << 4)) {
|
if (insn & (1 << 4)) {
|
||||||
tcg_gen_addi_i64(tcg_ctx, tmp64, tmp64, 0x80000000u);
|
/*
|
||||||
|
* Adding 0x80000000 to the 64-bit quantity
|
||||||
|
* means that we have carry in to the high
|
||||||
|
* word when the low word has the high bit set.
|
||||||
|
*/
|
||||||
|
tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 31);
|
||||||
|
tcg_gen_add_i32(tcg_ctx, tmp, tmp, tmp2);
|
||||||
}
|
}
|
||||||
tcg_gen_shri_i64(tcg_ctx, tmp64, tmp64, 32);
|
tcg_temp_free_i32(tcg_ctx, tmp2);
|
||||||
tmp = tcg_temp_new_i32(tcg_ctx);
|
|
||||||
tcg_gen_extrl_i64_i32(tcg_ctx, tmp, tmp64);
|
|
||||||
tcg_temp_free_i64(tcg_ctx, tmp64);
|
|
||||||
break;
|
break;
|
||||||
case 7: /* Unsigned sum of absolute differences. */
|
case 7: /* Unsigned sum of absolute differences. */
|
||||||
gen_helper_usad8(tcg_ctx, tmp, tmp, tmp2);
|
gen_helper_usad8(tcg_ctx, tmp, tmp, tmp2);
|
||||||
|
|
Loading…
Reference in a new issue