From 6530d6342fcf041f37e54fb14f2f6e3d6c07290e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 21 May 2020 17:23:42 -0400 Subject: [PATCH] softfloat: Use post test for floatN_mul The existing f{32,64}_addsub_post test, which checks for zero inputs, is identical to f{32,64}_mul_fast_test. Which means we can eliminate the fast_test/fast_op hooks in favor of reusing the same post hook. This means we have one fewer test along the fast path for multiply. Backports commit b240c9c497b9880ac0ba29465907d5ebecd48083 from qemu --- qemu/accel/tcg/translate-all.c | 2 +- qemu/fpu/softfloat.c | 69 ++++++++-------------------------- qemu/target/arm/helper.c | 4 +- 3 files changed, 19 insertions(+), 56 deletions(-) diff --git a/qemu/accel/tcg/translate-all.c b/qemu/accel/tcg/translate-all.c index 3beb4bb3..82b8f80a 100644 --- a/qemu/accel/tcg/translate-all.c +++ b/qemu/accel/tcg/translate-all.c @@ -1698,7 +1698,7 @@ void tb_invalidate_phys_page_fast(struct uc_struct* uc, tb_page_addr_t start, in unsigned long b; nr = start & ~TARGET_PAGE_MASK; - b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1)); + b = p->code_bitmap[BIT_WORD(nr)] >> ((nr & (BITS_PER_LONG - 1)) & 0x1f); if (b & ((1 << len) - 1)) { goto do_invalidate; } diff --git a/qemu/fpu/softfloat.c b/qemu/fpu/softfloat.c index 5a81781c..6621bd91 100644 --- a/qemu/fpu/softfloat.c +++ b/qemu/fpu/softfloat.c @@ -340,12 +340,10 @@ static inline bool f64_is_inf(union_float64 a) return float64_is_infinity(a.s); } -/* Note: @fast_test and @post can be NULL */ static inline float32 float32_gen2(float32 xa, float32 xb, float_status *s, hard_f32_op2_fn hard, soft_f32_op2_fn soft, - f32_check_fn pre, f32_check_fn post, - f32_check_fn fast_test, soft_f32_op2_fn fast_op) + f32_check_fn pre, f32_check_fn post) { union_float32 ua, ub, ur; @@ -360,17 +358,12 @@ float32_gen2(float32 xa, float32 xb, float_status *s, if (unlikely(!pre(ua, ub))) { goto soft; } - if (fast_test && fast_test(ua, ub)) { - return fast_op(ua.s, ub.s, s); - } ur.h = hard(ua.h, ub.h); if (unlikely(f32_is_inf(ur))) { s->float_exception_flags |= float_flag_overflow; - } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) { - if (post == NULL || post(ua, ub)) { - goto soft; - } + } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) { + goto soft; } return ur.s; @@ -381,8 +374,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s, static inline float64 float64_gen2(float64 xa, float64 xb, float_status *s, hard_f64_op2_fn hard, soft_f64_op2_fn soft, - f64_check_fn pre, f64_check_fn post, - f64_check_fn fast_test, soft_f64_op2_fn fast_op) + f64_check_fn pre, f64_check_fn post) { union_float64 ua, ub, ur; @@ -397,17 +389,12 @@ float64_gen2(float64 xa, float64 xb, float_status *s, if (unlikely(!pre(ua, ub))) { goto soft; } - if (fast_test && fast_test(ua, ub)) { - return fast_op(ua.s, ub.s, s); - } ur.h = hard(ua.h, ub.h); if (unlikely(f64_is_inf(ur))) { s->float_exception_flags |= float_flag_overflow; - } else if (unlikely(fabs(ur.h) <= DBL_MIN)) { - if (post == NULL || post(ua, ub)) { - goto soft; - } + } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) { + goto soft; } return ur.s; @@ -1116,7 +1103,7 @@ static double hard_f64_sub(double a, double b) return a - b; } -static bool f32_addsub_post(union_float32 a, union_float32 b) +static bool f32_addsubmul_post(union_float32 a, union_float32 b) { if (QEMU_HARDFLOAT_2F32_USE_FP) { return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); @@ -1124,7 +1111,7 @@ static bool f32_addsub_post(union_float32 a, union_float32 b) return !(float32_is_zero(a.s) && float32_is_zero(b.s)); } -static bool f64_addsub_post(union_float64 a, union_float64 b) +static bool f64_addsubmul_post(union_float64 a, union_float64 b) { if (QEMU_HARDFLOAT_2F64_USE_FP) { return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO); @@ -1137,14 +1124,14 @@ static float32 float32_addsub(float32 a, float32 b, float_status *s, hard_f32_op2_fn hard, soft_f32_op2_fn soft) { return float32_gen2(a, b, s, hard, soft, - f32_is_zon2, f32_addsub_post, NULL, NULL); + f32_is_zon2, f32_addsubmul_post); } static float64 float64_addsub(float64 a, float64 b, float_status *s, hard_f64_op2_fn hard, soft_f64_op2_fn soft) { return float64_gen2(a, b, s, hard, soft, - f64_is_zon2, f64_addsub_post, NULL, NULL); + f64_is_zon2, f64_addsubmul_post); } float32 QEMU_FLATTEN @@ -1259,42 +1246,18 @@ static double hard_f64_mul(double a, double b) return a * b; } -static bool f32_mul_fast_test(union_float32 a, union_float32 b) -{ - return float32_is_zero(a.s) || float32_is_zero(b.s); -} - -static bool f64_mul_fast_test(union_float64 a, union_float64 b) -{ - return float64_is_zero(a.s) || float64_is_zero(b.s); -} - -static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s) -{ - bool signbit = float32_is_neg(a) ^ float32_is_neg(b); - - return float32_set_sign(float32_zero, signbit); -} - -static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s) -{ - bool signbit = float64_is_neg(a) ^ float64_is_neg(b); - - return float64_set_sign(float64_zero, signbit); -} - float32 QEMU_FLATTEN float32_mul(float32 a, float32 b, float_status *s) { return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul, - f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op); + f32_is_zon2, f32_addsubmul_post); } float64 QEMU_FLATTEN float64_mul(float64 a, float64 b, float_status *s) { return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul, - f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op); + f64_is_zon2, f64_addsubmul_post); } /* @@ -1813,14 +1776,14 @@ float32 QEMU_FLATTEN float32_div(float32 a, float32 b, float_status *s) { return float32_gen2(a, b, s, hard_f32_div, soft_f32_div, - f32_div_pre, f32_div_post, NULL, NULL); + f32_div_pre, f32_div_post); } float64 QEMU_FLATTEN float64_div(float64 a, float64 b, float_status *s) { return float64_gen2(a, b, s, hard_f64_div, soft_f64_div, - f64_div_pre, f64_div_post, NULL, NULL); + f64_div_pre, f64_div_post); } /* @@ -5407,7 +5370,7 @@ int32_t floatx80_to_int32(floatx80 a, float_status *status) if (floatx80_invalid_encoding(a)) { float_raise(float_flag_invalid, status); - return 1 << 31; + return (int32_t)(1U << 31); } aSig = extractFloatx80Frac( a ); aExp = extractFloatx80Exp( a ); @@ -5439,7 +5402,7 @@ int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status) if (floatx80_invalid_encoding(a)) { float_raise(float_flag_invalid, status); - return 1 << 31; + return (int32_t)(1U << 31); } aSig = extractFloatx80Frac( a ); aExp = extractFloatx80Exp( a ); diff --git a/qemu/target/arm/helper.c b/qemu/target/arm/helper.c index 9357f8ac..7f8ad302 100644 --- a/qemu/target/arm/helper.c +++ b/qemu/target/arm/helper.c @@ -738,11 +738,11 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, */ if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) { /* VFP coprocessor: cp10 & cp11 [23:20] */ - mask |= (1 << 31) | (1 << 30) | (0xf << 20); + mask |= (1U << 31) | (1 << 30) | (0xf << 20); if (!arm_feature(env, ARM_FEATURE_NEON)) { /* ASEDIS [31] bit is RAO/WI */ - value |= (1 << 31); + value |= (1U << 31); } /* VFPv3 and upwards with NEON implement 32 double precision