softfloat: Use post test for floatN_mul

The existing f{32,64}_addsub_post test, which checks for zero
inputs, is identical to f{32,64}_mul_fast_test. Which means
we can eliminate the fast_test/fast_op hooks in favor of
reusing the same post hook.

This means we have one fewer test along the fast path for multiply.

Backports commit b240c9c497b9880ac0ba29465907d5ebecd48083 from qemu
This commit is contained in:
Richard Henderson 2020-05-21 17:23:42 -04:00 committed by Lioncash
parent c675454b27
commit 6530d6342f
3 changed files with 19 additions and 56 deletions

View file

@ -1698,7 +1698,7 @@ void tb_invalidate_phys_page_fast(struct uc_struct* uc, tb_page_addr_t start, in
unsigned long b;
nr = start & ~TARGET_PAGE_MASK;
b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
b = p->code_bitmap[BIT_WORD(nr)] >> ((nr & (BITS_PER_LONG - 1)) & 0x1f);
if (b & ((1 << len) - 1)) {
goto do_invalidate;
}

View file

@ -340,12 +340,10 @@ static inline bool f64_is_inf(union_float64 a)
return float64_is_infinity(a.s);
}
/* Note: @fast_test and @post can be NULL */
static inline float32
float32_gen2(float32 xa, float32 xb, float_status *s,
hard_f32_op2_fn hard, soft_f32_op2_fn soft,
f32_check_fn pre, f32_check_fn post,
f32_check_fn fast_test, soft_f32_op2_fn fast_op)
f32_check_fn pre, f32_check_fn post)
{
union_float32 ua, ub, ur;
@ -360,17 +358,12 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
if (unlikely(!pre(ua, ub))) {
goto soft;
}
if (fast_test && fast_test(ua, ub)) {
return fast_op(ua.s, ub.s, s);
}
ur.h = hard(ua.h, ub.h);
if (unlikely(f32_is_inf(ur))) {
s->float_exception_flags |= float_flag_overflow;
} else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
if (post == NULL || post(ua, ub)) {
goto soft;
}
} else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
goto soft;
}
return ur.s;
@ -381,8 +374,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
static inline float64
float64_gen2(float64 xa, float64 xb, float_status *s,
hard_f64_op2_fn hard, soft_f64_op2_fn soft,
f64_check_fn pre, f64_check_fn post,
f64_check_fn fast_test, soft_f64_op2_fn fast_op)
f64_check_fn pre, f64_check_fn post)
{
union_float64 ua, ub, ur;
@ -397,17 +389,12 @@ float64_gen2(float64 xa, float64 xb, float_status *s,
if (unlikely(!pre(ua, ub))) {
goto soft;
}
if (fast_test && fast_test(ua, ub)) {
return fast_op(ua.s, ub.s, s);
}
ur.h = hard(ua.h, ub.h);
if (unlikely(f64_is_inf(ur))) {
s->float_exception_flags |= float_flag_overflow;
} else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
if (post == NULL || post(ua, ub)) {
goto soft;
}
} else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
goto soft;
}
return ur.s;
@ -1116,7 +1103,7 @@ static double hard_f64_sub(double a, double b)
return a - b;
}
static bool f32_addsub_post(union_float32 a, union_float32 b)
static bool f32_addsubmul_post(union_float32 a, union_float32 b)
{
if (QEMU_HARDFLOAT_2F32_USE_FP) {
return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
@ -1124,7 +1111,7 @@ static bool f32_addsub_post(union_float32 a, union_float32 b)
return !(float32_is_zero(a.s) && float32_is_zero(b.s));
}
static bool f64_addsub_post(union_float64 a, union_float64 b)
static bool f64_addsubmul_post(union_float64 a, union_float64 b)
{
if (QEMU_HARDFLOAT_2F64_USE_FP) {
return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
@ -1137,14 +1124,14 @@ static float32 float32_addsub(float32 a, float32 b, float_status *s,
hard_f32_op2_fn hard, soft_f32_op2_fn soft)
{
return float32_gen2(a, b, s, hard, soft,
f32_is_zon2, f32_addsub_post, NULL, NULL);
f32_is_zon2, f32_addsubmul_post);
}
static float64 float64_addsub(float64 a, float64 b, float_status *s,
hard_f64_op2_fn hard, soft_f64_op2_fn soft)
{
return float64_gen2(a, b, s, hard, soft,
f64_is_zon2, f64_addsub_post, NULL, NULL);
f64_is_zon2, f64_addsubmul_post);
}
float32 QEMU_FLATTEN
@ -1259,42 +1246,18 @@ static double hard_f64_mul(double a, double b)
return a * b;
}
static bool f32_mul_fast_test(union_float32 a, union_float32 b)
{
return float32_is_zero(a.s) || float32_is_zero(b.s);
}
static bool f64_mul_fast_test(union_float64 a, union_float64 b)
{
return float64_is_zero(a.s) || float64_is_zero(b.s);
}
static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
{
bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
return float32_set_sign(float32_zero, signbit);
}
static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
{
bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
return float64_set_sign(float64_zero, signbit);
}
float32 QEMU_FLATTEN
float32_mul(float32 a, float32 b, float_status *s)
{
return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
f32_is_zon2, f32_addsubmul_post);
}
float64 QEMU_FLATTEN
float64_mul(float64 a, float64 b, float_status *s)
{
return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
f64_is_zon2, f64_addsubmul_post);
}
/*
@ -1813,14 +1776,14 @@ float32 QEMU_FLATTEN
float32_div(float32 a, float32 b, float_status *s)
{
return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
f32_div_pre, f32_div_post, NULL, NULL);
f32_div_pre, f32_div_post);
}
float64 QEMU_FLATTEN
float64_div(float64 a, float64 b, float_status *s)
{
return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
f64_div_pre, f64_div_post, NULL, NULL);
f64_div_pre, f64_div_post);
}
/*
@ -5407,7 +5370,7 @@ int32_t floatx80_to_int32(floatx80 a, float_status *status)
if (floatx80_invalid_encoding(a)) {
float_raise(float_flag_invalid, status);
return 1 << 31;
return (int32_t)(1U << 31);
}
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );
@ -5439,7 +5402,7 @@ int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
if (floatx80_invalid_encoding(a)) {
float_raise(float_flag_invalid, status);
return 1 << 31;
return (int32_t)(1U << 31);
}
aSig = extractFloatx80Frac( a );
aExp = extractFloatx80Exp( a );

View file

@ -738,11 +738,11 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
*/
if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) {
/* VFP coprocessor: cp10 & cp11 [23:20] */
mask |= (1 << 31) | (1 << 30) | (0xf << 20);
mask |= (1U << 31) | (1 << 30) | (0xf << 20);
if (!arm_feature(env, ARM_FEATURE_NEON)) {
/* ASEDIS [31] bit is RAO/WI */
value |= (1 << 31);
value |= (1U << 31);
}
/* VFPv3 and upwards with NEON implement 32 double precision