mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2024-12-23 05:35:32 +00:00
softfloat: Use post test for floatN_mul
The existing f{32,64}_addsub_post test, which checks for zero inputs, is identical to f{32,64}_mul_fast_test. Which means we can eliminate the fast_test/fast_op hooks in favor of reusing the same post hook. This means we have one fewer test along the fast path for multiply. Backports commit b240c9c497b9880ac0ba29465907d5ebecd48083 from qemu
This commit is contained in:
parent
c675454b27
commit
6530d6342f
|
@ -1698,7 +1698,7 @@ void tb_invalidate_phys_page_fast(struct uc_struct* uc, tb_page_addr_t start, in
|
||||||
unsigned long b;
|
unsigned long b;
|
||||||
|
|
||||||
nr = start & ~TARGET_PAGE_MASK;
|
nr = start & ~TARGET_PAGE_MASK;
|
||||||
b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
|
b = p->code_bitmap[BIT_WORD(nr)] >> ((nr & (BITS_PER_LONG - 1)) & 0x1f);
|
||||||
if (b & ((1 << len) - 1)) {
|
if (b & ((1 << len) - 1)) {
|
||||||
goto do_invalidate;
|
goto do_invalidate;
|
||||||
}
|
}
|
||||||
|
|
|
@ -340,12 +340,10 @@ static inline bool f64_is_inf(union_float64 a)
|
||||||
return float64_is_infinity(a.s);
|
return float64_is_infinity(a.s);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Note: @fast_test and @post can be NULL */
|
|
||||||
static inline float32
|
static inline float32
|
||||||
float32_gen2(float32 xa, float32 xb, float_status *s,
|
float32_gen2(float32 xa, float32 xb, float_status *s,
|
||||||
hard_f32_op2_fn hard, soft_f32_op2_fn soft,
|
hard_f32_op2_fn hard, soft_f32_op2_fn soft,
|
||||||
f32_check_fn pre, f32_check_fn post,
|
f32_check_fn pre, f32_check_fn post)
|
||||||
f32_check_fn fast_test, soft_f32_op2_fn fast_op)
|
|
||||||
{
|
{
|
||||||
union_float32 ua, ub, ur;
|
union_float32 ua, ub, ur;
|
||||||
|
|
||||||
|
@ -360,17 +358,12 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
|
||||||
if (unlikely(!pre(ua, ub))) {
|
if (unlikely(!pre(ua, ub))) {
|
||||||
goto soft;
|
goto soft;
|
||||||
}
|
}
|
||||||
if (fast_test && fast_test(ua, ub)) {
|
|
||||||
return fast_op(ua.s, ub.s, s);
|
|
||||||
}
|
|
||||||
|
|
||||||
ur.h = hard(ua.h, ub.h);
|
ur.h = hard(ua.h, ub.h);
|
||||||
if (unlikely(f32_is_inf(ur))) {
|
if (unlikely(f32_is_inf(ur))) {
|
||||||
s->float_exception_flags |= float_flag_overflow;
|
s->float_exception_flags |= float_flag_overflow;
|
||||||
} else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
|
} else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
|
||||||
if (post == NULL || post(ua, ub)) {
|
goto soft;
|
||||||
goto soft;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return ur.s;
|
return ur.s;
|
||||||
|
|
||||||
|
@ -381,8 +374,7 @@ float32_gen2(float32 xa, float32 xb, float_status *s,
|
||||||
static inline float64
|
static inline float64
|
||||||
float64_gen2(float64 xa, float64 xb, float_status *s,
|
float64_gen2(float64 xa, float64 xb, float_status *s,
|
||||||
hard_f64_op2_fn hard, soft_f64_op2_fn soft,
|
hard_f64_op2_fn hard, soft_f64_op2_fn soft,
|
||||||
f64_check_fn pre, f64_check_fn post,
|
f64_check_fn pre, f64_check_fn post)
|
||||||
f64_check_fn fast_test, soft_f64_op2_fn fast_op)
|
|
||||||
{
|
{
|
||||||
union_float64 ua, ub, ur;
|
union_float64 ua, ub, ur;
|
||||||
|
|
||||||
|
@ -397,17 +389,12 @@ float64_gen2(float64 xa, float64 xb, float_status *s,
|
||||||
if (unlikely(!pre(ua, ub))) {
|
if (unlikely(!pre(ua, ub))) {
|
||||||
goto soft;
|
goto soft;
|
||||||
}
|
}
|
||||||
if (fast_test && fast_test(ua, ub)) {
|
|
||||||
return fast_op(ua.s, ub.s, s);
|
|
||||||
}
|
|
||||||
|
|
||||||
ur.h = hard(ua.h, ub.h);
|
ur.h = hard(ua.h, ub.h);
|
||||||
if (unlikely(f64_is_inf(ur))) {
|
if (unlikely(f64_is_inf(ur))) {
|
||||||
s->float_exception_flags |= float_flag_overflow;
|
s->float_exception_flags |= float_flag_overflow;
|
||||||
} else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
|
} else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
|
||||||
if (post == NULL || post(ua, ub)) {
|
goto soft;
|
||||||
goto soft;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return ur.s;
|
return ur.s;
|
||||||
|
|
||||||
|
@ -1116,7 +1103,7 @@ static double hard_f64_sub(double a, double b)
|
||||||
return a - b;
|
return a - b;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool f32_addsub_post(union_float32 a, union_float32 b)
|
static bool f32_addsubmul_post(union_float32 a, union_float32 b)
|
||||||
{
|
{
|
||||||
if (QEMU_HARDFLOAT_2F32_USE_FP) {
|
if (QEMU_HARDFLOAT_2F32_USE_FP) {
|
||||||
return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
|
return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
|
||||||
|
@ -1124,7 +1111,7 @@ static bool f32_addsub_post(union_float32 a, union_float32 b)
|
||||||
return !(float32_is_zero(a.s) && float32_is_zero(b.s));
|
return !(float32_is_zero(a.s) && float32_is_zero(b.s));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool f64_addsub_post(union_float64 a, union_float64 b)
|
static bool f64_addsubmul_post(union_float64 a, union_float64 b)
|
||||||
{
|
{
|
||||||
if (QEMU_HARDFLOAT_2F64_USE_FP) {
|
if (QEMU_HARDFLOAT_2F64_USE_FP) {
|
||||||
return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
|
return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
|
||||||
|
@ -1137,14 +1124,14 @@ static float32 float32_addsub(float32 a, float32 b, float_status *s,
|
||||||
hard_f32_op2_fn hard, soft_f32_op2_fn soft)
|
hard_f32_op2_fn hard, soft_f32_op2_fn soft)
|
||||||
{
|
{
|
||||||
return float32_gen2(a, b, s, hard, soft,
|
return float32_gen2(a, b, s, hard, soft,
|
||||||
f32_is_zon2, f32_addsub_post, NULL, NULL);
|
f32_is_zon2, f32_addsubmul_post);
|
||||||
}
|
}
|
||||||
|
|
||||||
static float64 float64_addsub(float64 a, float64 b, float_status *s,
|
static float64 float64_addsub(float64 a, float64 b, float_status *s,
|
||||||
hard_f64_op2_fn hard, soft_f64_op2_fn soft)
|
hard_f64_op2_fn hard, soft_f64_op2_fn soft)
|
||||||
{
|
{
|
||||||
return float64_gen2(a, b, s, hard, soft,
|
return float64_gen2(a, b, s, hard, soft,
|
||||||
f64_is_zon2, f64_addsub_post, NULL, NULL);
|
f64_is_zon2, f64_addsubmul_post);
|
||||||
}
|
}
|
||||||
|
|
||||||
float32 QEMU_FLATTEN
|
float32 QEMU_FLATTEN
|
||||||
|
@ -1259,42 +1246,18 @@ static double hard_f64_mul(double a, double b)
|
||||||
return a * b;
|
return a * b;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool f32_mul_fast_test(union_float32 a, union_float32 b)
|
|
||||||
{
|
|
||||||
return float32_is_zero(a.s) || float32_is_zero(b.s);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool f64_mul_fast_test(union_float64 a, union_float64 b)
|
|
||||||
{
|
|
||||||
return float64_is_zero(a.s) || float64_is_zero(b.s);
|
|
||||||
}
|
|
||||||
|
|
||||||
static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
|
|
||||||
{
|
|
||||||
bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
|
|
||||||
|
|
||||||
return float32_set_sign(float32_zero, signbit);
|
|
||||||
}
|
|
||||||
|
|
||||||
static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
|
|
||||||
{
|
|
||||||
bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
|
|
||||||
|
|
||||||
return float64_set_sign(float64_zero, signbit);
|
|
||||||
}
|
|
||||||
|
|
||||||
float32 QEMU_FLATTEN
|
float32 QEMU_FLATTEN
|
||||||
float32_mul(float32 a, float32 b, float_status *s)
|
float32_mul(float32 a, float32 b, float_status *s)
|
||||||
{
|
{
|
||||||
return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
|
return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
|
||||||
f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
|
f32_is_zon2, f32_addsubmul_post);
|
||||||
}
|
}
|
||||||
|
|
||||||
float64 QEMU_FLATTEN
|
float64 QEMU_FLATTEN
|
||||||
float64_mul(float64 a, float64 b, float_status *s)
|
float64_mul(float64 a, float64 b, float_status *s)
|
||||||
{
|
{
|
||||||
return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
|
return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
|
||||||
f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
|
f64_is_zon2, f64_addsubmul_post);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1813,14 +1776,14 @@ float32 QEMU_FLATTEN
|
||||||
float32_div(float32 a, float32 b, float_status *s)
|
float32_div(float32 a, float32 b, float_status *s)
|
||||||
{
|
{
|
||||||
return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
|
return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
|
||||||
f32_div_pre, f32_div_post, NULL, NULL);
|
f32_div_pre, f32_div_post);
|
||||||
}
|
}
|
||||||
|
|
||||||
float64 QEMU_FLATTEN
|
float64 QEMU_FLATTEN
|
||||||
float64_div(float64 a, float64 b, float_status *s)
|
float64_div(float64 a, float64 b, float_status *s)
|
||||||
{
|
{
|
||||||
return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
|
return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
|
||||||
f64_div_pre, f64_div_post, NULL, NULL);
|
f64_div_pre, f64_div_post);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -5407,7 +5370,7 @@ int32_t floatx80_to_int32(floatx80 a, float_status *status)
|
||||||
|
|
||||||
if (floatx80_invalid_encoding(a)) {
|
if (floatx80_invalid_encoding(a)) {
|
||||||
float_raise(float_flag_invalid, status);
|
float_raise(float_flag_invalid, status);
|
||||||
return 1 << 31;
|
return (int32_t)(1U << 31);
|
||||||
}
|
}
|
||||||
aSig = extractFloatx80Frac( a );
|
aSig = extractFloatx80Frac( a );
|
||||||
aExp = extractFloatx80Exp( a );
|
aExp = extractFloatx80Exp( a );
|
||||||
|
@ -5439,7 +5402,7 @@ int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
|
||||||
|
|
||||||
if (floatx80_invalid_encoding(a)) {
|
if (floatx80_invalid_encoding(a)) {
|
||||||
float_raise(float_flag_invalid, status);
|
float_raise(float_flag_invalid, status);
|
||||||
return 1 << 31;
|
return (int32_t)(1U << 31);
|
||||||
}
|
}
|
||||||
aSig = extractFloatx80Frac( a );
|
aSig = extractFloatx80Frac( a );
|
||||||
aExp = extractFloatx80Exp( a );
|
aExp = extractFloatx80Exp( a );
|
||||||
|
|
|
@ -738,11 +738,11 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
|
||||||
*/
|
*/
|
||||||
if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) {
|
if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) {
|
||||||
/* VFP coprocessor: cp10 & cp11 [23:20] */
|
/* VFP coprocessor: cp10 & cp11 [23:20] */
|
||||||
mask |= (1 << 31) | (1 << 30) | (0xf << 20);
|
mask |= (1U << 31) | (1 << 30) | (0xf << 20);
|
||||||
|
|
||||||
if (!arm_feature(env, ARM_FEATURE_NEON)) {
|
if (!arm_feature(env, ARM_FEATURE_NEON)) {
|
||||||
/* ASEDIS [31] bit is RAO/WI */
|
/* ASEDIS [31] bit is RAO/WI */
|
||||||
value |= (1 << 31);
|
value |= (1U << 31);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* VFPv3 and upwards with NEON implement 32 double precision
|
/* VFPv3 and upwards with NEON implement 32 double precision
|
||||||
|
|
Loading…
Reference in a new issue