mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2024-12-31 22:55:40 +00:00
target/arm: Implement fp16 for Neon VRSQRTS
Convert the Neon VRSQRTS insn to using a gvec helper, and use this to implement the fp16 case. As with VRECPS, we adjust the phrasing of the new implementation slightly so that the fp32 version parallels the fp16 one. Backports 40fde72dda2da8d55b820fa6c5efd85814be2023
This commit is contained in:
parent
f4ebbba9fd
commit
b08ea84374
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_aarch64
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_aarch64
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_aarch64
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_aarch64
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_aarch64
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_aarch64
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_aarch64
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_aarch64
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_aarch64eb
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_aarch64eb
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_aarch64eb
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_aarch64eb
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_aarch64eb
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_aarch64eb
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_aarch64eb
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_aarch64eb
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_arm
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_arm
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_arm
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_arm
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_arm
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_arm
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_arm
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_arm
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_armeb
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_armeb
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_armeb
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_armeb
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_armeb
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_armeb
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_armeb
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_armeb
|
||||
|
|
|
@ -1399,6 +1399,8 @@ symbols = (
|
|||
'helper_gvec_rotr16v',
|
||||
'helper_gvec_rotr32v',
|
||||
'helper_gvec_rotr64v',
|
||||
'helper_gvec_rsqrts_nf_h',
|
||||
'helper_gvec_rsqrts_nf_s',
|
||||
'helper_gvec_sar8i',
|
||||
'helper_gvec_sar8v',
|
||||
'helper_gvec_sar16i',
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_m68k
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_m68k
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_m68k
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_m68k
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_m68k
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_m68k
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_m68k
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_m68k
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_mips
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_mips
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_mips
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mips
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mips
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_mips
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_mips
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_mips
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_mips64
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_mips64
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_mips64
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mips64
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mips64
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_mips64
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_mips64
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_mips64
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_mips64el
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_mips64el
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_mips64el
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mips64el
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mips64el
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_mips64el
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_mips64el
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_mips64el
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_mipsel
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_mipsel
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_mipsel
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_mipsel
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_mipsel
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_mipsel
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_mipsel
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_mipsel
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_powerpc
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_powerpc
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_powerpc
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_powerpc
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_powerpc
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_powerpc
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_powerpc
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_powerpc
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_riscv32
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_riscv32
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_riscv32
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_riscv32
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_riscv32
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_riscv32
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_riscv32
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_riscv32
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_riscv64
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_riscv64
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_riscv64
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_riscv64
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_riscv64
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_riscv64
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_riscv64
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_riscv64
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_sparc
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_sparc
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_sparc
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_sparc
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_sparc
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_sparc
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_sparc
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_sparc
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_sparc64
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_sparc64
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_sparc64
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_sparc64
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_sparc64
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_sparc64
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_sparc64
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_sparc64
|
||||
|
|
|
@ -223,7 +223,6 @@ DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
|
|||
DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
|
||||
DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, ptr)
|
||||
|
||||
DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32)
|
||||
DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
|
||||
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
|
||||
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
|
||||
|
@ -674,6 +673,9 @@ DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i3
|
|||
DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
|
|
|
@ -1093,6 +1093,7 @@ DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
|
|||
DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
|
||||
DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
|
||||
DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
|
||||
DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
|
||||
|
||||
WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
|
||||
WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
|
||||
|
@ -1131,26 +1132,6 @@ static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
|
|||
return do_3same(s, a, gen_VMINNM_fp32_3s);
|
||||
}
|
||||
|
||||
WRAP_ENV_FN(gen_VRSQRTS_tramp, gen_helper_rsqrts_f32)
|
||||
|
||||
static void gen_VRSQRTS_fp_3s(TCGContext *s, unsigned vece, uint32_t rd_ofs,
|
||||
uint32_t rn_ofs, uint32_t rm_ofs,
|
||||
uint32_t oprsz, uint32_t maxsz)
|
||||
{
|
||||
static const GVecGen3 ops = { .fni4 = gen_VRSQRTS_tramp };
|
||||
tcg_gen_gvec_3(s, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops);
|
||||
}
|
||||
|
||||
static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
|
||||
{
|
||||
if (a->size != 0) {
|
||||
/* TODO fp16 support */
|
||||
return false;
|
||||
}
|
||||
|
||||
return do_3same(s, a, gen_VRSQRTS_fp_3s);
|
||||
}
|
||||
|
||||
static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
|
||||
{
|
||||
/* FP operations handled pairwise 32 bits at a time */
|
||||
|
|
|
@ -826,6 +826,33 @@ static float32 float32_recps_nf(float32 op1, float32 op2, float_status *stat)
|
|||
return float32_sub(float32_two, float32_mul(op1, op2, stat), stat);
|
||||
}
|
||||
|
||||
/* Reciprocal square-root step. AArch32 non-fused semantics. */
|
||||
static float16 float16_rsqrts_nf(float16 op1, float16 op2, float_status *stat)
|
||||
{
|
||||
op1 = float16_squash_input_denormal(op1, stat);
|
||||
op2 = float16_squash_input_denormal(op2, stat);
|
||||
|
||||
if ((float16_is_infinity(op1) && float16_is_zero(op2)) ||
|
||||
(float16_is_infinity(op2) && float16_is_zero(op1))) {
|
||||
return float16_one_point_five;
|
||||
}
|
||||
op1 = float16_sub(float16_three, float16_mul(op1, op2, stat), stat);
|
||||
return float16_div(op1, float16_two, stat);
|
||||
}
|
||||
|
||||
static float32 float32_rsqrts_nf(float32 op1, float32 op2, float_status *stat)
|
||||
{
|
||||
op1 = float32_squash_input_denormal(op1, stat);
|
||||
op2 = float32_squash_input_denormal(op2, stat);
|
||||
|
||||
if ((float32_is_infinity(op1) && float32_is_zero(op2)) ||
|
||||
(float32_is_infinity(op2) && float32_is_zero(op1))) {
|
||||
return float32_one_point_five;
|
||||
}
|
||||
op1 = float32_sub(float32_three, float32_mul(op1, op2, stat), stat);
|
||||
return float32_div(op1, float32_two, stat);
|
||||
}
|
||||
|
||||
#define DO_3OP(NAME, FUNC, TYPE) \
|
||||
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
|
||||
{ \
|
||||
|
@ -886,6 +913,9 @@ DO_3OP(gvec_fminnum_s, float32_minnum, float32)
|
|||
DO_3OP(gvec_recps_nf_h, float16_recps_nf, float16)
|
||||
DO_3OP(gvec_recps_nf_s, float32_recps_nf, float32)
|
||||
|
||||
DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16)
|
||||
DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32)
|
||||
|
||||
#ifdef TARGET_AARCH64
|
||||
|
||||
DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
|
||||
|
|
|
@ -538,21 +538,6 @@ uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
|
|||
return r;
|
||||
}
|
||||
|
||||
float32 HELPER(rsqrts_f32)(CPUARMState *env, float32 a, float32 b)
|
||||
{
|
||||
float_status *s = &env->vfp.standard_fp_status;
|
||||
float32 product;
|
||||
if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
|
||||
(float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
|
||||
if (!(float32_is_zero(a) || float32_is_zero(b))) {
|
||||
float_raise(float_flag_input_denormal, s);
|
||||
}
|
||||
return float32_one_point_five;
|
||||
}
|
||||
product = float32_mul(a, b, s);
|
||||
return float32_div(float32_sub(float32_three, product, s), float32_two, s);
|
||||
}
|
||||
|
||||
/* NEON helpers. */
|
||||
|
||||
/*
|
||||
|
|
|
@ -1393,6 +1393,8 @@
|
|||
#define helper_gvec_rotr16v helper_gvec_rotr16v_x86_64
|
||||
#define helper_gvec_rotr32v helper_gvec_rotr32v_x86_64
|
||||
#define helper_gvec_rotr64v helper_gvec_rotr64v_x86_64
|
||||
#define helper_gvec_rsqrts_nf_h helper_gvec_rsqrts_nf_h_x86_64
|
||||
#define helper_gvec_rsqrts_nf_s helper_gvec_rsqrts_nf_s_x86_64
|
||||
#define helper_gvec_sar8i helper_gvec_sar8i_x86_64
|
||||
#define helper_gvec_sar8v helper_gvec_sar8v_x86_64
|
||||
#define helper_gvec_sar16i helper_gvec_sar16i_x86_64
|
||||
|
|
Loading…
Reference in a new issue