mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-01-10 22:45:36 +00:00
target/arm: Use gvec for VSHR, VSHL
Backports commit 1dc8425e551be1371d657e94367f37130cd7aede from qemu
This commit is contained in:
parent
7790ca1ccb
commit
b5877f1dfb
|
@ -6355,7 +6355,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||||
break;
|
break;
|
||||||
case NEON_3R_VMUL:
|
case NEON_3R_VMUL:
|
||||||
/* VMUL.P8; other cases already eliminated. */
|
/* VMUL.P8; other cases already eliminated. */
|
||||||
gen_helper_neon_mul_p8(s, tmp, tmp, tmp2);
|
gen_helper_neon_mul_p8(tcg_ctx, tmp, tmp, tmp2);
|
||||||
break;
|
break;
|
||||||
case NEON_3R_VPMAX:
|
case NEON_3R_VPMAX:
|
||||||
GEN_NEON_INTEGER_OP(pmax);
|
GEN_NEON_INTEGER_OP(pmax);
|
||||||
|
@ -6537,8 +6537,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||||
size--;
|
size--;
|
||||||
}
|
}
|
||||||
shift = (insn >> 16) & ((1 << (3 + size)) - 1);
|
shift = (insn >> 16) & ((1 << (3 + size)) - 1);
|
||||||
/* To avoid excessive duplication of ops we implement shift
|
|
||||||
by immediate using the variable shift operations. */
|
|
||||||
if (op < 8) {
|
if (op < 8) {
|
||||||
/* Shift by immediate:
|
/* Shift by immediate:
|
||||||
VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
|
VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
|
||||||
|
@ -6550,37 +6548,62 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||||
}
|
}
|
||||||
/* Right shifts are encoded as N - shift, where N is the
|
/* Right shifts are encoded as N - shift, where N is the
|
||||||
element size in bits. */
|
element size in bits. */
|
||||||
if (op <= 4)
|
if (op <= 4) {
|
||||||
shift = shift - (1 << (size + 3));
|
shift = shift - (1 << (size + 3));
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (op) {
|
||||||
|
case 0: /* VSHR */
|
||||||
|
/* Right shift comes here negative. */
|
||||||
|
shift = -shift;
|
||||||
|
/* Shifts larger than the element size are architecturally
|
||||||
|
* valid. Unsigned results in all zeros; signed results
|
||||||
|
* in all sign bits.
|
||||||
|
*/
|
||||||
|
if (!u) {
|
||||||
|
tcg_gen_gvec_sari(tcg_ctx, size, rd_ofs, rm_ofs,
|
||||||
|
MIN(shift, (8 << size) - 1),
|
||||||
|
vec_size, vec_size);
|
||||||
|
} else if (shift >= 8 << size) {
|
||||||
|
tcg_gen_gvec_dup8i(tcg_ctx, rd_ofs, vec_size, vec_size, 0);
|
||||||
|
} else {
|
||||||
|
tcg_gen_gvec_shri(tcg_ctx, size, rd_ofs, rm_ofs, shift,
|
||||||
|
vec_size, vec_size);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
case 5: /* VSHL, VSLI */
|
||||||
|
if (!u) { /* VSHL */
|
||||||
|
/* Shifts larger than the element size are
|
||||||
|
* architecturally valid and results in zero.
|
||||||
|
*/
|
||||||
|
if (shift >= 8 << size) {
|
||||||
|
tcg_gen_gvec_dup8i(tcg_ctx, rd_ofs, vec_size, vec_size, 0);
|
||||||
|
} else {
|
||||||
|
tcg_gen_gvec_shli(tcg_ctx, size, rd_ofs, rm_ofs, shift,
|
||||||
|
vec_size, vec_size);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (size == 3) {
|
if (size == 3) {
|
||||||
count = q + 1;
|
count = q + 1;
|
||||||
} else {
|
} else {
|
||||||
count = q ? 4: 2;
|
count = q ? 4: 2;
|
||||||
}
|
}
|
||||||
switch (size) {
|
|
||||||
case 0:
|
/* To avoid excessive duplication of ops we implement shift
|
||||||
imm = (uint8_t) shift;
|
* by immediate using the variable shift operations.
|
||||||
imm |= imm << 8;
|
*/
|
||||||
imm |= imm << 16;
|
imm = dup_const(size, shift);
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
imm = (uint16_t) shift;
|
|
||||||
imm |= imm << 16;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
case 3:
|
|
||||||
imm = shift;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (pass = 0; pass < count; pass++) {
|
for (pass = 0; pass < count; pass++) {
|
||||||
if (size == 3) {
|
if (size == 3) {
|
||||||
neon_load_reg64(s, s->V0, rm + pass);
|
neon_load_reg64(s, s->V0, rm + pass);
|
||||||
tcg_gen_movi_i64(tcg_ctx, s->V1, imm);
|
tcg_gen_movi_i64(tcg_ctx, s->V1, imm);
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case 0: /* VSHR */
|
|
||||||
case 1: /* VSRA */
|
case 1: /* VSRA */
|
||||||
if (u)
|
if (u)
|
||||||
gen_helper_neon_shl_u64(tcg_ctx, s->V0, s->V0, s->V1);
|
gen_helper_neon_shl_u64(tcg_ctx, s->V0, s->V0, s->V1);
|
||||||
|
@ -6611,6 +6634,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||||
s->V0, s->V1);
|
s->V0, s->V1);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
if (op == 1 || op == 3) {
|
if (op == 1 || op == 3) {
|
||||||
/* Accumulate. */
|
/* Accumulate. */
|
||||||
|
@ -6639,7 +6664,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||||
tmp2 = tcg_temp_new_i32(tcg_ctx);
|
tmp2 = tcg_temp_new_i32(tcg_ctx);
|
||||||
tcg_gen_movi_i32(tcg_ctx, tmp2, imm);
|
tcg_gen_movi_i32(tcg_ctx, tmp2, imm);
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case 0: /* VSHR */
|
|
||||||
case 1: /* VSRA */
|
case 1: /* VSRA */
|
||||||
GEN_NEON_INTEGER_OP(shl);
|
GEN_NEON_INTEGER_OP(shl);
|
||||||
break;
|
break;
|
||||||
|
@ -6677,6 +6701,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||||
case 7: /* VQSHL */
|
case 7: /* VQSHL */
|
||||||
GEN_NEON_INTEGER_OP_ENV(qshl);
|
GEN_NEON_INTEGER_OP_ENV(qshl);
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
tcg_temp_free_i32(tcg_ctx, tmp2);
|
tcg_temp_free_i32(tcg_ctx, tmp2);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue