target/arm: Convert Neon 2-reg-scalar float multiplies to decodetree

Convert the float versions of VMLA, VMLS and VMUL in the Neon
2-reg-scalar group to decodetree.

Backports commit 85ac9aef9a5418de3168df569e21258e853840a2 from qemu
This commit is contained in:
Peter Maydell 2020-06-17 00:09:29 -04:00 committed by Lioncash
parent bf1b0374b9
commit 2e8ae1130e
3 changed files with 71 additions and 34 deletions

View file

@ -478,9 +478,12 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
&2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar
VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar
VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
]
}

View file

@ -2511,3 +2511,68 @@ static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
}
/*
* Rather than have a float-specific version of do_2scalar just for
* three insns, we wrap a NeonGenTwoSingleOpFn to turn it into
* a NeonGenTwoOpFn.
*/
#define WRAP_FP_FN(WRAPNAME, FUNC) \
static void WRAPNAME(TCGContext *s, TCGv_i32 rd, TCGv_i32 rn, TCGv_i32 rm) \
{ \
TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1); \
FUNC(s, rd, rn, rm, fpstatus); \
tcg_temp_free_ptr(s, fpstatus); \
}
WRAP_FP_FN(gen_VMUL_F_mul, gen_helper_vfp_muls)
WRAP_FP_FN(gen_VMUL_F_add, gen_helper_vfp_adds)
WRAP_FP_FN(gen_VMUL_F_sub, gen_helper_vfp_subs)
static bool trans_VMUL_F_2sc(DisasContext *s, arg_2scalar *a)
{
static NeonGenTwoOpFn * const opfn[] = {
NULL,
NULL, /* TODO: fp16 support */
gen_VMUL_F_mul,
NULL,
};
return do_2scalar(s, a, opfn[a->size], NULL);
}
static bool trans_VMLA_F_2sc(DisasContext *s, arg_2scalar *a)
{
static NeonGenTwoOpFn * const opfn[] = {
NULL,
NULL, /* TODO: fp16 support */
gen_VMUL_F_mul,
NULL,
};
static NeonGenTwoOpFn * const accfn[] = {
NULL,
NULL, /* TODO: fp16 support */
gen_VMUL_F_add,
NULL,
};
return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
}
static bool trans_VMLS_F_2sc(DisasContext *s, arg_2scalar *a)
{
static NeonGenTwoOpFn * const opfn[] = {
NULL,
NULL, /* TODO: fp16 support */
gen_VMUL_F_mul,
NULL,
};
static NeonGenTwoOpFn * const accfn[] = {
NULL,
NULL, /* TODO: fp16 support */
gen_VMUL_F_sub,
NULL,
};
return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
}

View file

@ -5307,15 +5307,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case 0: /* Integer VMLA scalar */
case 4: /* Integer VMLS scalar */
case 8: /* Integer VMUL scalar */
return 1; /* handled by decodetree */
case 1: /* Float VMLA scalar */
case 5: /* Floating point VMLS scalar */
case 9: /* Floating point VMUL scalar */
if (size == 1) {
return 1;
}
/* fall through */
return 1; /* handled by decodetree */
case 12: /* VQDMULH scalar */
case 13: /* VQRDMULH scalar */
if (u && ((rd | rn) & 1)) {
@ -5332,41 +5328,14 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
} else {
gen_helper_neon_qdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
}
} else if (op == 13) {
} else {
if (size == 1) {
gen_helper_neon_qrdmulh_s16(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
} else {
gen_helper_neon_qrdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
}
} else {
TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
gen_helper_vfp_muls(tcg_ctx, tmp, tmp, tmp2, fpstatus);
tcg_temp_free_ptr(tcg_ctx, fpstatus);
}
tcg_temp_free_i32(tcg_ctx, tmp2);
if (op < 8) {
/* Accumulate. */
tmp2 = neon_load_reg(s, rd, pass);
switch (op) {
case 1:
{
TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus);
tcg_temp_free_ptr(tcg_ctx, fpstatus);
break;
}
case 5:
{
TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
gen_helper_vfp_subs(tcg_ctx, tmp, tmp2, tmp, fpstatus);
tcg_temp_free_ptr(tcg_ctx, fpstatus);
break;
}
default:
abort();
}
tcg_temp_free_i32(tcg_ctx, tmp2);
}
neon_store_reg(s, rd, pass, tmp);
}
break;