arm/translate-a64: add FP16 FR[ECP/SQRT]S to simd_three_reg_same_fp16

As some of the constants here will also be needed
elsewhere (specifically for the upcoming SVE support) we move them out
to softfloat.h.

Backports commit 026e2d6ef74000afb9049f46add4b94f594c8fb3 from qemu
This commit is contained in:
Alex Bennée 2018-03-08 15:44:29 -05:00 committed by Lioncash
parent a02b9b81a9
commit 4b2577537b
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
7 changed files with 61 additions and 5 deletions

View file

@ -3750,8 +3750,10 @@
#define helper_paired_cmpxchg64_be helper_paired_cmpxchg64_be_aarch64
#define helper_paired_cmpxchg64_le helper_paired_cmpxchg64_le_aarch64
#define helper_rbit64 helper_rbit64_aarch64
#define helper_recpsf_f16 helper_recpsf_f16_aarch64
#define helper_recpsf_f32 helper_recpsf_f32_aarch64
#define helper_recpsf_f64 helper_recpsf_f64_aarch64
#define helper_rsqrtsf_f16 helper_rsqrtsf_f16_aarch64
#define helper_rsqrtsf_f32 helper_rsqrtsf_f32_aarch64
#define helper_rsqrtsf_f64 helper_rsqrtsf_f64_aarch64
#define helper_sdiv64 helper_sdiv64_aarch64

View file

@ -3750,8 +3750,10 @@
#define helper_paired_cmpxchg64_be helper_paired_cmpxchg64_be_aarch64eb
#define helper_paired_cmpxchg64_le helper_paired_cmpxchg64_le_aarch64eb
#define helper_rbit64 helper_rbit64_aarch64eb
#define helper_recpsf_f16 helper_recpsf_f16_aarch64eb
#define helper_recpsf_f32 helper_recpsf_f32_aarch64eb
#define helper_recpsf_f64 helper_recpsf_f64_aarch64eb
#define helper_rsqrtsf_f16 helper_rsqrtsf_f16_aarch64eb
#define helper_rsqrtsf_f32 helper_rsqrtsf_f32_aarch64eb
#define helper_rsqrtsf_f64 helper_rsqrtsf_f64_aarch64eb
#define helper_sdiv64 helper_sdiv64_aarch64eb

View file

@ -3770,8 +3770,10 @@ aarch64_symbols = (
'helper_paired_cmpxchg64_be',
'helper_paired_cmpxchg64_le',
'helper_rbit64',
'helper_recpsf_f16',
'helper_recpsf_f32',
'helper_recpsf_f64',
'helper_rsqrtsf_f16',
'helper_rsqrtsf_f32',
'helper_rsqrtsf_f64',
'helper_sdiv64',

View file

@ -313,8 +313,11 @@ static inline float16 float16_set_sign(float16 a, int sign)
}
#define float16_zero make_float16(0)
#define float16_one make_float16(0x3c00)
#define float16_half make_float16(0x3800)
#define float16_one make_float16(0x3c00)
#define float16_one_point_five make_float16(0x3e00)
#define float16_two make_float16(0x4000)
#define float16_three make_float16(0x4200)
#define float16_infinity make_float16(0x7c00)
/*----------------------------------------------------------------------------
@ -422,11 +425,13 @@ static inline float32 float32_set_sign(float32 a, int sign)
}
#define float32_zero make_float32(0)
#define float32_one make_float32(0x3f800000)
#define float32_half make_float32(0x3f000000)
#define float32_one make_float32(0x3f800000)
#define float32_one_point_five make_float32(0x3fc00000)
#define float32_two make_float32(0x40000000)
#define float32_three make_float32(0x40400000)
#define float32_infinity make_float32(0x7f800000)
/*----------------------------------------------------------------------------
| The pattern for a default generated single-precision NaN.
*----------------------------------------------------------------------------*/
@ -533,9 +538,12 @@ static inline float64 float64_set_sign(float64 a, int sign)
}
#define float64_zero make_float64(0)
#define float64_one make_float64(0x3ff0000000000000LL)
#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
#define float64_half make_float64(0x3fe0000000000000LL)
#define float64_one make_float64(0x3ff0000000000000LL)
#define float64_one_point_five make_float64(0x3FF8000000000000ULL)
#define float64_two make_float64(0x4000000000000000ULL)
#define float64_three make_float64(0x4008000000000000ULL)
#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
#define float64_infinity make_float64(0x7ff0000000000000LL)
/*----------------------------------------------------------------------------

View file

@ -184,6 +184,10 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
* versions, these do a fully fused multiply-add or
* multiply-add-and-halve.
*/
#define float16_two make_float16(0x4000)
#define float16_three make_float16(0x4200)
#define float16_one_point_five make_float16(0x3e00)
#define float32_two make_float32(0x40000000)
#define float32_three make_float32(0x40400000)
#define float32_one_point_five make_float32(0x3fc00000)
@ -192,6 +196,21 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
#define float64_three make_float64(0x4008000000000000ULL)
#define float64_one_point_five make_float64(0x3FF8000000000000ULL)
float16 HELPER(recpsf_f16)(float16 a, float16 b, void *fpstp)
{
float_status *fpst = fpstp;
a = float16_squash_input_denormal(a, fpst);
b = float16_squash_input_denormal(b, fpst);
a = float16_chs(a);
if ((float16_is_infinity(a) && float16_is_zero(b)) ||
(float16_is_infinity(b) && float16_is_zero(a))) {
return float16_two;
}
return float16_muladd(a, b, float16_two, 0, fpst);
}
float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
{
float_status *fpst = fpstp;
@ -222,6 +241,21 @@ float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
return float64_muladd(a, b, float64_two, 0, fpst);
}
float16 HELPER(rsqrtsf_f16)(float16 a, float16 b, void *fpstp)
{
float_status *fpst = fpstp;
a = float16_squash_input_denormal(a, fpst);
b = float16_squash_input_denormal(b, fpst);
a = float16_chs(a);
if ((float16_is_infinity(a) && float16_is_zero(b)) ||
(float16_is_infinity(b) && float16_is_zero(a))) {
return float16_one_point_five;
}
return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
}
float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
{
float_status *fpst = fpstp;

View file

@ -29,8 +29,10 @@ DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
DEF_HELPER_FLAGS_3(neon_cge_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)

View file

@ -10455,6 +10455,9 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
case 0x6: /* FMAX */
gen_helper_advsimd_maxh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x7: /* FRECPS */
gen_helper_recpsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x8: /* FMINNM */
gen_helper_advsimd_minnumh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
break;
@ -10471,6 +10474,9 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
case 0xe: /* FMIN */
gen_helper_advsimd_minh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0xf: /* FRSQRTS */
gen_helper_rsqrtsf_f16(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
break;
case 0x13: /* FMUL */
gen_helper_advsimd_mulh(tcg_ctx, tcg_res, tcg_op1, tcg_op2, fpst);
break;