target/arm: Implement fp16 for Neon VRSQRTS

Convert the Neon VRSQRTS insn to using a gvec helper,
and use this to implement the fp16 case.

As with VRECPS, we adjust the phrasing of the new implementation
slightly so that the fp32 version parallels the fp16 one.

Backports 40fde72dda2da8d55b820fa6c5efd85814be2023
This commit is contained in:
Peter Maydell 2021-03-01 17:20:18 -05:00 committed by Lioncash
parent f4ebbba9fd
commit b08ea84374
20 changed files with 66 additions and 36 deletions

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_aarch64
#define helper_gvec_rotr32v helper_gvec_rotr32v_aarch64
#define helper_gvec_rotr64v helper_gvec_rotr64v_aarch64
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_aarch64
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_aarch64
#define helper_gvec_sar8i helper_gvec_sar8i_aarch64
#define helper_gvec_sar8v helper_gvec_sar8v_aarch64
#define helper_gvec_sar16i helper_gvec_sar16i_aarch64

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_aarch64eb
#define helper_gvec_rotr32v helper_gvec_rotr32v_aarch64eb
#define helper_gvec_rotr64v helper_gvec_rotr64v_aarch64eb
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_aarch64eb
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_aarch64eb
#define helper_gvec_sar8i helper_gvec_sar8i_aarch64eb
#define helper_gvec_sar8v helper_gvec_sar8v_aarch64eb
#define helper_gvec_sar16i helper_gvec_sar16i_aarch64eb

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_arm
#define helper_gvec_rotr32v helper_gvec_rotr32v_arm
#define helper_gvec_rotr64v helper_gvec_rotr64v_arm
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_arm
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_arm
#define helper_gvec_sar8i helper_gvec_sar8i_arm
#define helper_gvec_sar8v helper_gvec_sar8v_arm
#define helper_gvec_sar16i helper_gvec_sar16i_arm

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_armeb
#define helper_gvec_rotr32v helper_gvec_rotr32v_armeb
#define helper_gvec_rotr64v helper_gvec_rotr64v_armeb
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_armeb
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_armeb
#define helper_gvec_sar8i helper_gvec_sar8i_armeb
#define helper_gvec_sar8v helper_gvec_sar8v_armeb
#define helper_gvec_sar16i helper_gvec_sar16i_armeb

View file

@ -1399,6 +1399,8 @@ symbols = (
'helper_gvec_rotr16v',
'helper_gvec_rotr32v',
'helper_gvec_rotr64v',
'helper_gvec_rsqrts_nf_h',
'helper_gvec_rsqrts_nf_s',
'helper_gvec_sar8i',
'helper_gvec_sar8v',
'helper_gvec_sar16i',

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_m68k
#define helper_gvec_rotr32v helper_gvec_rotr32v_m68k
#define helper_gvec_rotr64v helper_gvec_rotr64v_m68k
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_m68k
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_m68k
#define helper_gvec_sar8i helper_gvec_sar8i_m68k
#define helper_gvec_sar8v helper_gvec_sar8v_m68k
#define helper_gvec_sar16i helper_gvec_sar16i_m68k

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_mips
#define helper_gvec_rotr32v helper_gvec_rotr32v_mips
#define helper_gvec_rotr64v helper_gvec_rotr64v_mips
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mips
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mips
#define helper_gvec_sar8i helper_gvec_sar8i_mips
#define helper_gvec_sar8v helper_gvec_sar8v_mips
#define helper_gvec_sar16i helper_gvec_sar16i_mips

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_mips64
#define helper_gvec_rotr32v helper_gvec_rotr32v_mips64
#define helper_gvec_rotr64v helper_gvec_rotr64v_mips64
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mips64
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mips64
#define helper_gvec_sar8i helper_gvec_sar8i_mips64
#define helper_gvec_sar8v helper_gvec_sar8v_mips64
#define helper_gvec_sar16i helper_gvec_sar16i_mips64

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_mips64el
#define helper_gvec_rotr32v helper_gvec_rotr32v_mips64el
#define helper_gvec_rotr64v helper_gvec_rotr64v_mips64el
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mips64el
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mips64el
#define helper_gvec_sar8i helper_gvec_sar8i_mips64el
#define helper_gvec_sar8v helper_gvec_sar8v_mips64el
#define helper_gvec_sar16i helper_gvec_sar16i_mips64el

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_mipsel
#define helper_gvec_rotr32v helper_gvec_rotr32v_mipsel
#define helper_gvec_rotr64v helper_gvec_rotr64v_mipsel
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mipsel
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mipsel
#define helper_gvec_sar8i helper_gvec_sar8i_mipsel
#define helper_gvec_sar8v helper_gvec_sar8v_mipsel
#define helper_gvec_sar16i helper_gvec_sar16i_mipsel

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_powerpc
#define helper_gvec_rotr32v helper_gvec_rotr32v_powerpc
#define helper_gvec_rotr64v helper_gvec_rotr64v_powerpc
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_powerpc
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_powerpc
#define helper_gvec_sar8i helper_gvec_sar8i_powerpc
#define helper_gvec_sar8v helper_gvec_sar8v_powerpc
#define helper_gvec_sar16i helper_gvec_sar16i_powerpc

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_riscv32
#define helper_gvec_rotr32v helper_gvec_rotr32v_riscv32
#define helper_gvec_rotr64v helper_gvec_rotr64v_riscv32
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_riscv32
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_riscv32
#define helper_gvec_sar8i helper_gvec_sar8i_riscv32
#define helper_gvec_sar8v helper_gvec_sar8v_riscv32
#define helper_gvec_sar16i helper_gvec_sar16i_riscv32

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_riscv64
#define helper_gvec_rotr32v helper_gvec_rotr32v_riscv64
#define helper_gvec_rotr64v helper_gvec_rotr64v_riscv64
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_riscv64
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_riscv64
#define helper_gvec_sar8i helper_gvec_sar8i_riscv64
#define helper_gvec_sar8v helper_gvec_sar8v_riscv64
#define helper_gvec_sar16i helper_gvec_sar16i_riscv64

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_sparc
#define helper_gvec_rotr32v helper_gvec_rotr32v_sparc
#define helper_gvec_rotr64v helper_gvec_rotr64v_sparc
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_sparc
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_sparc
#define helper_gvec_sar8i helper_gvec_sar8i_sparc
#define helper_gvec_sar8v helper_gvec_sar8v_sparc
#define helper_gvec_sar16i helper_gvec_sar16i_sparc

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_sparc64
#define helper_gvec_rotr32v helper_gvec_rotr32v_sparc64
#define helper_gvec_rotr64v helper_gvec_rotr64v_sparc64
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_sparc64
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_sparc64
#define helper_gvec_sar8i helper_gvec_sar8i_sparc64
#define helper_gvec_sar8v helper_gvec_sar8v_sparc64
#define helper_gvec_sar16i helper_gvec_sar16i_sparc64

View file

@ -223,7 +223,6 @@ DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, ptr)
DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32)
DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
@ -674,6 +673,9 @@ DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i3
DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)

View file

@ -1093,6 +1093,7 @@ DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
@ -1131,26 +1132,6 @@ static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
return do_3same(s, a, gen_VMINNM_fp32_3s);
}
WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32)
static void gen_VRSQRTS_fp_3s(TCGContext *s, unsigned vece, uint32_t rd_ofs,
uint32_t rn_ofs, uint32_t rm_ofs,
uint32_t oprsz, uint32_t maxsz)
{
static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp };
tcg_gen_gvec_3(s, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
}
static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
{
if (a->size != 0) {
/* TODO fp16 support */
return false;
}
return do_3same(s, a, gen_VRSQRTS_fp_3s);
}
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
{
/* FP operations handled pairwise 32 bits at a time */

View file

@ -826,6 +826,33 @@ static float32 float32_recps_nf(float32 op1, float32 op2, float_status *stat)
return float32_sub(float32_two, float32_mul(op1, op2, stat), stat);
}
/* Reciprocal square-root step. AArch32 non-fused semantics. */
static float16 float16_rsqrts_nf(float16 op1, float16 op2, float_status *stat)
{
op1 = float16_squash_input_denormal(op1, stat);
op2 = float16_squash_input_denormal(op2, stat);
if ((float16_is_infinity(op1) && float16_is_zero(op2)) ||
(float16_is_infinity(op2) && float16_is_zero(op1))) {
return float16_one_point_five;
}
op1 = float16_sub(float16_three, float16_mul(op1, op2, stat), stat);
return float16_div(op1, float16_two, stat);
}
static float32 float32_rsqrts_nf(float32 op1, float32 op2, float_status *stat)
{
op1 = float32_squash_input_denormal(op1, stat);
op2 = float32_squash_input_denormal(op2, stat);
if ((float32_is_infinity(op1) && float32_is_zero(op2)) ||
(float32_is_infinity(op2) && float32_is_zero(op1))) {
return float32_one_point_five;
}
op1 = float32_sub(float32_three, float32_mul(op1, op2, stat), stat);
return float32_div(op1, float32_two, stat);
}
#define DO_3OP(NAME, FUNC, TYPE) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
{ \
@ -886,6 +913,9 @@ DO_3OP(gvec_fminnum_s, float32_minnum, float32)
DO_3OP(gvec_recps_nf_h, float16_recps_nf, float16)
DO_3OP(gvec_recps_nf_s, float32_recps_nf, float32)
DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16)
DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32)
#ifdef TARGET_AARCH64
DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)

View file

@ -538,21 +538,6 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
return r;
}
float32 HELPER(rsqrts_f32)(CPUARMState *env, float32 a, float32 b)
{
float_status *s = &env->vfp.standard_fp_status;
float32 product;
if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
(float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
if (!(float32_is_zero(a) || float32_is_zero(b))) {
float_raise(float_flag_input_denormal, s);
}
return float32_one_point_five;
}
product = float32_mul(a, b, s);
return float32_div(float32_sub(float32_three, product, s), float32_two, s);
}
/* NEON helpers. */
/*

View file

@ -1393,6 +1393,8 @@
#define helper_gvec_rotr16v helper_gvec_rotr16v_x86_64
#define helper_gvec_rotr32v helper_gvec_rotr32v_x86_64
#define helper_gvec_rotr64v helper_gvec_rotr64v_x86_64
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_x86_64
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_x86_64
#define helper_gvec_sar8i helper_gvec_sar8i_x86_64
#define helper_gvec_sar8v helper_gvec_sar8v_x86_64
#define helper_gvec_sar16i helper_gvec_sar16i_x86_64