mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-01-11 08:15:40 +00:00
target/arm: Convert NEON VFMA, VFMS 3-reg-same insns to decodetree
Convert the Neon floating point VFMA and VFMS insn to decodetree. These are the last insns in the 3-reg-same group so we can remove all the support/loop code from the old decoder. Backports commit e95485f85657be21135c17a9226e297c21e73360 from qemu
This commit is contained in:
parent
82484db863
commit
7b2fb5bc63
|
@ -174,6 +174,9 @@ SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \
|
|||
SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
|
||||
vm=%vm_dp vn=%vn_dp vd=%vd_dp
|
||||
|
||||
VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp
|
||||
VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp
|
||||
|
||||
VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
|
||||
|
||||
VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
|
||||
|
|
|
@ -1226,6 +1226,47 @@ static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
|
|||
return do_3same(s, a, gen_VRSQRTS_fp_3s);
|
||||
}
|
||||
|
||||
static void gen_VFMA_fp_3s(TCGContext *s, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
|
||||
TCGv_ptr fpstatus)
|
||||
{
|
||||
gen_helper_vfp_muladds(s, vd, vn, vm, vd, fpstatus);
|
||||
}
|
||||
|
||||
static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a)
|
||||
{
|
||||
if (!dc_isar_feature(aa32_simdfmac, s)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (a->size != 0) {
|
||||
/* TODO fp16 support */
|
||||
return false;
|
||||
}
|
||||
|
||||
return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
|
||||
}
|
||||
|
||||
static void gen_VFMS_fp_3s(TCGContext *s, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
|
||||
TCGv_ptr fpstatus)
|
||||
{
|
||||
gen_helper_vfp_negs(s, vn, vn);
|
||||
gen_helper_vfp_muladds(s, vd, vn, vm, vd, fpstatus);
|
||||
}
|
||||
|
||||
static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a)
|
||||
{
|
||||
if (!dc_isar_feature(aa32_simdfmac, s)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (a->size != 0) {
|
||||
/* TODO fp16 support */
|
||||
return false;
|
||||
}
|
||||
|
||||
return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
|
||||
}
|
||||
|
||||
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
|
||||
{
|
||||
/* FP operations handled pairwise 32 bits at a time */
|
||||
|
|
|
@ -3518,78 +3518,6 @@ static void gen_neon_narrow_op(DisasContext *s, int op, int u, int size,
|
|||
}
|
||||
}
|
||||
|
||||
/* Symbolic constants for op fields for Neon 3-register same-length.
|
||||
* The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
|
||||
* table A7-9.
|
||||
*/
|
||||
#define NEON_3R_VHADD 0
|
||||
#define NEON_3R_VQADD 1
|
||||
#define NEON_3R_VRHADD 2
|
||||
#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
|
||||
#define NEON_3R_VHSUB 4
|
||||
#define NEON_3R_VQSUB 5
|
||||
#define NEON_3R_VCGT 6
|
||||
#define NEON_3R_VCGE 7
|
||||
#define NEON_3R_VSHL 8
|
||||
#define NEON_3R_VQSHL 9
|
||||
#define NEON_3R_VRSHL 10
|
||||
#define NEON_3R_VQRSHL 11
|
||||
#define NEON_3R_VMAX 12
|
||||
#define NEON_3R_VMIN 13
|
||||
#define NEON_3R_VABD 14
|
||||
#define NEON_3R_VABA 15
|
||||
#define NEON_3R_VADD_VSUB 16
|
||||
#define NEON_3R_VTST_VCEQ 17
|
||||
#define NEON_3R_VML 18 /* VMLA, VMLS */
|
||||
#define NEON_3R_VMUL 19
|
||||
#define NEON_3R_VPMAX 20
|
||||
#define NEON_3R_VPMIN 21
|
||||
#define NEON_3R_VQDMULH_VQRDMULH 22
|
||||
#define NEON_3R_VPADD_VQRDMLAH 23
|
||||
#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
|
||||
#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
|
||||
#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
|
||||
#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
|
||||
#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
|
||||
#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
|
||||
#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
|
||||
#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
|
||||
|
||||
static const uint8_t neon_3r_sizes[] = {
|
||||
[NEON_3R_VHADD] = 0x7,
|
||||
[NEON_3R_VQADD] = 0xf,
|
||||
[NEON_3R_VRHADD] = 0x7,
|
||||
[NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
|
||||
[NEON_3R_VHSUB] = 0x7,
|
||||
[NEON_3R_VQSUB] = 0xf,
|
||||
[NEON_3R_VCGT] = 0x7,
|
||||
[NEON_3R_VCGE] = 0x7,
|
||||
[NEON_3R_VSHL] = 0xf,
|
||||
[NEON_3R_VQSHL] = 0xf,
|
||||
[NEON_3R_VRSHL] = 0xf,
|
||||
[NEON_3R_VQRSHL] = 0xf,
|
||||
[NEON_3R_VMAX] = 0x7,
|
||||
[NEON_3R_VMIN] = 0x7,
|
||||
[NEON_3R_VABD] = 0x7,
|
||||
[NEON_3R_VABA] = 0x7,
|
||||
[NEON_3R_VADD_VSUB] = 0xf,
|
||||
[NEON_3R_VTST_VCEQ] = 0x7,
|
||||
[NEON_3R_VML] = 0x7,
|
||||
[NEON_3R_VMUL] = 0x7,
|
||||
[NEON_3R_VPMAX] = 0x7,
|
||||
[NEON_3R_VPMIN] = 0x7,
|
||||
[NEON_3R_VQDMULH_VQRDMULH] = 0x6,
|
||||
[NEON_3R_VPADD_VQRDMLAH] = 0x7,
|
||||
[NEON_3R_SHA] = 0xf, /* size field encodes op type */
|
||||
[NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
|
||||
[NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
|
||||
[NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
|
||||
[NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
|
||||
[NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
|
||||
[NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
|
||||
[NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
|
||||
};
|
||||
|
||||
/* Symbolic constants for op fields for Neon 2-register miscellaneous.
|
||||
* The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
|
||||
* table A7-13.
|
||||
|
@ -5509,108 +5437,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
|||
rm_ofs = neon_reg_offset(rm, 0);
|
||||
|
||||
if ((insn & (1 << 23)) == 0) {
|
||||
/* Three register same length. */
|
||||
op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
|
||||
/* Catch invalid op and bad size combinations: UNDEF */
|
||||
if ((neon_3r_sizes[op] & (1 << size)) == 0) {
|
||||
/* Three register same length: handled by decodetree */
|
||||
return 1;
|
||||
}
|
||||
/* All insns of this form UNDEF for either this condition or the
|
||||
* superset of cases "Q==1"; we catch the latter later.
|
||||
*/
|
||||
if (q && ((rd | rn | rm) & 1)) {
|
||||
return 1;
|
||||
}
|
||||
switch (op) {
|
||||
case NEON_3R_VFM_VQRDMLSH:
|
||||
if (!u) {
|
||||
/* VFM, VFMS */
|
||||
if (size == 1) {
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
/* VQRDMLSH : handled by decodetree */
|
||||
return 1;
|
||||
|
||||
case NEON_3R_VADD_VSUB:
|
||||
case NEON_3R_LOGIC:
|
||||
case NEON_3R_VMAX:
|
||||
case NEON_3R_VMIN:
|
||||
case NEON_3R_VTST_VCEQ:
|
||||
case NEON_3R_VCGT:
|
||||
case NEON_3R_VCGE:
|
||||
case NEON_3R_VQADD:
|
||||
case NEON_3R_VQSUB:
|
||||
case NEON_3R_VMUL:
|
||||
case NEON_3R_VML:
|
||||
case NEON_3R_VSHL:
|
||||
case NEON_3R_SHA:
|
||||
case NEON_3R_VHADD:
|
||||
case NEON_3R_VRHADD:
|
||||
case NEON_3R_VHSUB:
|
||||
case NEON_3R_VABD:
|
||||
case NEON_3R_VABA:
|
||||
case NEON_3R_VQSHL:
|
||||
case NEON_3R_VRSHL:
|
||||
case NEON_3R_VQRSHL:
|
||||
case NEON_3R_VPMAX:
|
||||
case NEON_3R_VPMIN:
|
||||
case NEON_3R_VPADD_VQRDMLAH:
|
||||
case NEON_3R_VQDMULH_VQRDMULH:
|
||||
case NEON_3R_FLOAT_ARITH:
|
||||
case NEON_3R_FLOAT_MULTIPLY:
|
||||
case NEON_3R_FLOAT_CMP:
|
||||
case NEON_3R_FLOAT_ACMP:
|
||||
case NEON_3R_FLOAT_MINMAX:
|
||||
case NEON_3R_FLOAT_MISC:
|
||||
/* Already handled by decodetree */
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (size == 3) {
|
||||
/* 64-bit element instructions: handled by decodetree */
|
||||
return 1;
|
||||
}
|
||||
switch (op) {
|
||||
case NEON_3R_VFM_VQRDMLSH:
|
||||
if (!dc_isar_feature(aa32_simdfmac, s)) {
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (pass = 0; pass < (q ? 4 : 2); pass++) {
|
||||
|
||||
/* Elementwise. */
|
||||
tmp = neon_load_reg(s, rn, pass);
|
||||
tmp2 = neon_load_reg(s, rm, pass);
|
||||
switch (op) {
|
||||
case NEON_3R_VFM_VQRDMLSH:
|
||||
{
|
||||
/* VFMA, VFMS: fused multiply-add */
|
||||
TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
|
||||
TCGv_i32 tmp3 = neon_load_reg(s, rd, pass);
|
||||
if (size) {
|
||||
/* VFMS */
|
||||
gen_helper_vfp_negs(tcg_ctx, tmp, tmp);
|
||||
}
|
||||
gen_helper_vfp_muladds(tcg_ctx, tmp, tmp, tmp2, tmp3, fpstatus);
|
||||
tcg_temp_free_i32(tcg_ctx, tmp3);
|
||||
tcg_temp_free_ptr(tcg_ctx, fpstatus);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
tcg_temp_free_i32(tcg_ctx, tmp2);
|
||||
|
||||
neon_store_reg(s, rd, pass, tmp);
|
||||
|
||||
} /* for pass */
|
||||
/* End of 3 register same size operations. */
|
||||
} else if (insn & (1 << 4)) {
|
||||
if ((insn & 0x00380080) != 0) {
|
||||
/* Two registers and shift. */
|
||||
|
|
Loading…
Reference in a new issue