target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA, VNMUL

Implement fp16 versions of the VFP VMLA, VMLS, VNMLS, VNMLA, VNMUL
instructions. (These are all the remaining ones which we implement
via do_vfp_3op_[hsd]p().)

Backports commit e7cb0ded52c6d7b86585b09935fe7caeb9e38b69
This commit is contained in:
Peter Maydell 2021-02-28 04:28:16 -05:00 committed by Lioncash
parent eae621098d
commit a42ecfe203
20 changed files with 111 additions and 0 deletions

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_aarch64
#define helper_vfp_muls helper_vfp_muls_aarch64
#define helper_vfp_negd helper_vfp_negd_aarch64
#define helper_vfp_negh helper_vfp_negh_aarch64
#define helper_vfp_negs helper_vfp_negs_aarch64
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_aarch64
#define helper_vfp_shtod helper_vfp_shtod_aarch64

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_aarch64eb
#define helper_vfp_muls helper_vfp_muls_aarch64eb
#define helper_vfp_negd helper_vfp_negd_aarch64eb
#define helper_vfp_negh helper_vfp_negh_aarch64eb
#define helper_vfp_negs helper_vfp_negs_aarch64eb
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_aarch64eb
#define helper_vfp_shtod helper_vfp_shtod_aarch64eb

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_arm
#define helper_vfp_muls helper_vfp_muls_arm
#define helper_vfp_negd helper_vfp_negd_arm
#define helper_vfp_negh helper_vfp_negh_arm
#define helper_vfp_negs helper_vfp_negs_arm
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_arm
#define helper_vfp_shtod helper_vfp_shtod_arm

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_armeb
#define helper_vfp_muls helper_vfp_muls_armeb
#define helper_vfp_negd helper_vfp_negd_armeb
#define helper_vfp_negh helper_vfp_negh_armeb
#define helper_vfp_negs helper_vfp_negs_armeb
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_armeb
#define helper_vfp_shtod helper_vfp_shtod_armeb

View file

@ -1930,6 +1930,7 @@ symbols = (
'helper_vfp_mulh',
'helper_vfp_muls',
'helper_vfp_negd',
'helper_vfp_negh',
'helper_vfp_negs',
'helper_vfp_set_fpscr',
'helper_vfp_shtod',

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_m68k
#define helper_vfp_muls helper_vfp_muls_m68k
#define helper_vfp_negd helper_vfp_negd_m68k
#define helper_vfp_negh helper_vfp_negh_m68k
#define helper_vfp_negs helper_vfp_negs_m68k
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_m68k
#define helper_vfp_shtod helper_vfp_shtod_m68k

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_mips
#define helper_vfp_muls helper_vfp_muls_mips
#define helper_vfp_negd helper_vfp_negd_mips
#define helper_vfp_negh helper_vfp_negh_mips
#define helper_vfp_negs helper_vfp_negs_mips
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_mips
#define helper_vfp_shtod helper_vfp_shtod_mips

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_mips64
#define helper_vfp_muls helper_vfp_muls_mips64
#define helper_vfp_negd helper_vfp_negd_mips64
#define helper_vfp_negh helper_vfp_negh_mips64
#define helper_vfp_negs helper_vfp_negs_mips64
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_mips64
#define helper_vfp_shtod helper_vfp_shtod_mips64

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_mips64el
#define helper_vfp_muls helper_vfp_muls_mips64el
#define helper_vfp_negd helper_vfp_negd_mips64el
#define helper_vfp_negh helper_vfp_negh_mips64el
#define helper_vfp_negs helper_vfp_negs_mips64el
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_mips64el
#define helper_vfp_shtod helper_vfp_shtod_mips64el

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_mipsel
#define helper_vfp_muls helper_vfp_muls_mipsel
#define helper_vfp_negd helper_vfp_negd_mipsel
#define helper_vfp_negh helper_vfp_negh_mipsel
#define helper_vfp_negs helper_vfp_negs_mipsel
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_mipsel
#define helper_vfp_shtod helper_vfp_shtod_mipsel

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_powerpc
#define helper_vfp_muls helper_vfp_muls_powerpc
#define helper_vfp_negd helper_vfp_negd_powerpc
#define helper_vfp_negh helper_vfp_negh_powerpc
#define helper_vfp_negs helper_vfp_negs_powerpc
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_powerpc
#define helper_vfp_shtod helper_vfp_shtod_powerpc

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_riscv32
#define helper_vfp_muls helper_vfp_muls_riscv32
#define helper_vfp_negd helper_vfp_negd_riscv32
#define helper_vfp_negh helper_vfp_negh_riscv32
#define helper_vfp_negs helper_vfp_negs_riscv32
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_riscv32
#define helper_vfp_shtod helper_vfp_shtod_riscv32

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_riscv64
#define helper_vfp_muls helper_vfp_muls_riscv64
#define helper_vfp_negd helper_vfp_negd_riscv64
#define helper_vfp_negh helper_vfp_negh_riscv64
#define helper_vfp_negs helper_vfp_negs_riscv64
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_riscv64
#define helper_vfp_shtod helper_vfp_shtod_riscv64

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_sparc
#define helper_vfp_muls helper_vfp_muls_sparc
#define helper_vfp_negd helper_vfp_negd_sparc
#define helper_vfp_negh helper_vfp_negh_sparc
#define helper_vfp_negs helper_vfp_negs_sparc
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_sparc
#define helper_vfp_shtod helper_vfp_shtod_sparc

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_sparc64
#define helper_vfp_muls helper_vfp_muls_sparc64
#define helper_vfp_negd helper_vfp_negd_sparc64
#define helper_vfp_negh helper_vfp_negh_sparc64
#define helper_vfp_negs helper_vfp_negs_sparc64
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_sparc64
#define helper_vfp_shtod helper_vfp_shtod_sparc64

View file

@ -123,6 +123,7 @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
DEF_HELPER_1(vfp_negh, f16, f16)
DEF_HELPER_1(vfp_negs, f32, f32)
DEF_HELPER_1(vfp_negd, f64, f64)
DEF_HELPER_1(vfp_abss, f32, f32)

View file

@ -1569,6 +1569,21 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
return true;
}
static void gen_VMLA_hp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* Note that order of inputs to the add matters for NaNs */
TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
gen_helper_vfp_mulh(tcg_ctx, tmp, vn, vm, fpst);
gen_helper_vfp_addh(tcg_ctx, vd, vd, tmp, fpst);
tcg_temp_free_i32(tcg_ctx, tmp);
}
static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
{
return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
}
static void gen_VMLA_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* Note that order of inputs to the add matters for NaNs */
@ -1599,6 +1614,25 @@ static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
}
static void gen_VMLS_hp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
* VMLS: vd = vd + -(vn * vm)
* Note that order of inputs to the add matters for NaNs.
*/
TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
gen_helper_vfp_mulh(tcg_ctx, tmp, vn, vm, fpst);
gen_helper_vfp_negh(tcg_ctx, tmp, tmp);
gen_helper_vfp_addh(tcg_ctx, vd, vd, tmp, fpst);
tcg_temp_free_i32(tcg_ctx, tmp);
}
static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
{
return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
}
static void gen_VMLS_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
@ -1637,6 +1671,27 @@ static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
}
static void gen_VNMLS_hp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
* VNMLS: -fd + (fn * fm)
* Note that it isn't valid to replace (-A + B) with (B - A) or similar
* plausible looking simplifications because this will give wrong results
* for NaNs.
*/
TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
gen_helper_vfp_mulh(tcg_ctx, tmp, vn, vm, fpst);
gen_helper_vfp_negh(tcg_ctx, vd, vd);
gen_helper_vfp_addh(tcg_ctx, vd, vd, tmp, fpst);
tcg_temp_free_i32(tcg_ctx, tmp);
}
static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
{
return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
}
static void gen_VNMLS_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
@ -1679,6 +1734,23 @@ static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
}
static void gen_VNMLA_hp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMLA: -fd + -(fn * fm) */
TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx);
gen_helper_vfp_mulh(tcg_ctx, tmp, vn, vm, fpst);
gen_helper_vfp_negh(tcg_ctx, tmp, tmp);
gen_helper_vfp_negh(tcg_ctx, vd, vd);
gen_helper_vfp_addh(tcg_ctx, vd, vd, tmp, fpst);
tcg_temp_free_i32(tcg_ctx, tmp);
}
static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
{
return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
}
static void gen_VNMLA_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMLA: -fd + -(fn * fm) */
@ -1728,6 +1800,18 @@ static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
}
static void gen_VNMUL_hp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMUL: -(fn * fm) */
gen_helper_vfp_mulh(tcg_ctx, vd, vn, vm, fpst);
gen_helper_vfp_negh(tcg_ctx, vd, vd);
}
static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
{
return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
}
static void gen_VNMUL_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMUL: -(fn * fm) */

View file

@ -103,15 +103,19 @@ VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
vd=%vd_dp p=1 u=0 w=1
# 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
VMLA_hp ---- 1110 0.00 .... .... 1001 .0.0 .... @vfp_dnm_s
VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... @vfp_dnm_d
VMLS_hp ---- 1110 0.00 .... .... 1001 .1.0 .... @vfp_dnm_s
VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... @vfp_dnm_s
VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... @vfp_dnm_d
VNMLS_hp ---- 1110 0.01 .... .... 1001 .0.0 .... @vfp_dnm_s
VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... @vfp_dnm_s
VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d
VNMLA_hp ---- 1110 0.01 .... .... 1001 .1.0 .... @vfp_dnm_s
VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d
@ -119,6 +123,7 @@ VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 .... @vfp_dnm_s
VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s
VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d
VNMUL_hp ---- 1110 0.10 .... .... 1001 .1.0 .... @vfp_dnm_s
VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d

View file

@ -264,6 +264,11 @@ VFP_BINOP(minnum)
VFP_BINOP(maxnum)
#undef VFP_BINOP
dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
{
return float16_chs(a);
}
float32 VFP_HELPER(neg, s)(float32 a)
{
return float32_chs(a);

View file

@ -1924,6 +1924,7 @@
#define helper_vfp_mulh helper_vfp_mulh_x86_64
#define helper_vfp_muls helper_vfp_muls_x86_64
#define helper_vfp_negd helper_vfp_negd_x86_64
#define helper_vfp_negh helper_vfp_negh_x86_64
#define helper_vfp_negs helper_vfp_negs_x86_64
#define helper_vfp_set_fpscr helper_vfp_set_fpscr_x86_64
#define helper_vfp_shtod helper_vfp_shtod_x86_64