From 21fa932514f69558f37b35e0eb207d3102f265b8 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 5 Aug 2018 02:54:21 -0300 Subject: [PATCH] More accurate impl of FMINNM/FMAXNM, add vector variants (#296) * More accurate impl of FMINNM/FMAXNM, add vector variants * Optimize for the 0 case when op1 != op2 * Address PR feedback --- AOpCodeTable.cs | 6 +- Instruction/AInstEmitSimdArithmetic.cs | 116 ++++------ Instruction/ASoftFloat.cs | 284 ++++++++++++++++++++++++- Instruction/AVectorHelper.cs | 80 ------- 4 files changed, 324 insertions(+), 162 deletions(-) diff --git a/AOpCodeTable.cs b/AOpCodeTable.cs index aa8b1be..5ea38b0 100644 --- a/AOpCodeTable.cs +++ b/AOpCodeTable.cs @@ -267,11 +267,13 @@ namespace ChocolArm64 SetA64("0>1011100<1xxxxx111111xxxxxxxxxx", AInstEmit.Fdiv_V, typeof(AOpCodeSimdReg)); SetA64("000111110x0xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Fmadd_S, typeof(AOpCodeSimdReg)); SetA64("000111100x1xxxxx010010xxxxxxxxxx", AInstEmit.Fmax_S, typeof(AOpCodeSimdReg)); - SetA64("0x0011100x1xxxxx111101xxxxxxxxxx", AInstEmit.Fmax_V, typeof(AOpCodeSimdReg)); + SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", AInstEmit.Fmax_V, typeof(AOpCodeSimdReg)); SetA64("000111100x1xxxxx011010xxxxxxxxxx", AInstEmit.Fmaxnm_S, typeof(AOpCodeSimdReg)); + SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", AInstEmit.Fmaxnm_V, typeof(AOpCodeSimdReg)); SetA64("000111100x1xxxxx010110xxxxxxxxxx", AInstEmit.Fmin_S, typeof(AOpCodeSimdReg)); - SetA64("0x0011101x1xxxxx111101xxxxxxxxxx", AInstEmit.Fmin_V, typeof(AOpCodeSimdReg)); + SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", AInstEmit.Fmin_V, typeof(AOpCodeSimdReg)); SetA64("000111100x1xxxxx011110xxxxxxxxxx", AInstEmit.Fminnm_S, typeof(AOpCodeSimdReg)); + SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", AInstEmit.Fminnm_V, typeof(AOpCodeSimdReg)); SetA64("010111111<0011100<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmla_V, typeof(AOpCodeSimdReg)); SetA64("0x0011111< { - if (Op.Size == 0) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF)); - } - else if (Op.Size == 1) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max)); - } - else - { - throw new InvalidOperationException(); - } + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Max)); }); } public static void Fmax_V(AILEmitterCtx Context) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - EmitVectorBinaryOpF(Context, () => { - if (Op.Size == 0) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF)); - } - else if (Op.Size == 1) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max)); - } - else - { - throw new InvalidOperationException(); - } - }); - } - - public static void Fmin_S(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - EmitScalarBinaryOpF(Context, () => - { - if (Op.Size == 0) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF)); - } - else if (Op.Size == 1) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min)); - } - else - { - throw new InvalidOperationException(); - } - }); - } - - public static void Fmin_V(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - EmitVectorBinaryOpF(Context, () => - { - if (SizeF == 0) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF)); - } - else if (SizeF == 1) - { - AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min)); - } - else - { - throw new InvalidOperationException(); - } + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Max)); }); } public static void Fmaxnm_S(AILEmitterCtx Context) { - Fmax_S(Context); + EmitScalarBinaryOpF(Context, () => + { + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MaxNum)); + }); + } + + public static void Fmaxnm_V(AILEmitterCtx Context) + { + EmitVectorBinaryOpF(Context, () => + { + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MaxNum)); + }); + } + + public static void Fmin_S(AILEmitterCtx Context) + { + EmitScalarBinaryOpF(Context, () => + { + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Min)); + }); + } + + public static void Fmin_V(AILEmitterCtx Context) + { + EmitVectorBinaryOpF(Context, () => + { + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Min)); + }); } public static void Fminnm_S(AILEmitterCtx Context) { - Fmin_S(Context); + EmitScalarBinaryOpF(Context, () => + { + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MinNum)); + }); + } + + public static void Fminnm_V(AILEmitterCtx Context) + { + EmitVectorBinaryOpF(Context, () => + { + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MinNum)); + }); } public static void Fmla_Se(AILEmitterCtx Context) diff --git a/Instruction/ASoftFloat.cs b/Instruction/ASoftFloat.cs index 27f4f7f..8afa400 100644 --- a/Instruction/ASoftFloat.cs +++ b/Instruction/ASoftFloat.cs @@ -79,7 +79,7 @@ namespace ChocolArm64.Instruction if (scaled == 0) { // Zero -> Infinity - return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7ff0000000000000)); + return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7FF0000000000000)); } // Denormal @@ -94,7 +94,7 @@ namespace ChocolArm64.Instruction if (x_sign != 0) { // Negative -> NaN - return BitConverter.Int64BitsToDouble((long)0x7ff8000000000000); + return BitConverter.Int64BitsToDouble((long)0x7FF8000000000000); } if (x_exp == 0x7ff && scaled == 0) @@ -153,7 +153,7 @@ namespace ChocolArm64.Instruction if (scaled == 0) { // Zero -> Infinity - return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7ff0000000000000)); + return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7FF0000000000000)); } // Denormal @@ -208,8 +208,8 @@ namespace ChocolArm64.Instruction ulong op1_other = op1_bits & 0x7FFFFFFFFFFFFFFF; ulong op2_other = op2_bits & 0x7FFFFFFFFFFFFFFF; - bool inf1 = op1_other == 0x7ff0000000000000; - bool inf2 = op2_other == 0x7ff0000000000000; + bool inf1 = op1_other == 0x7FF0000000000000; + bool inf2 = op2_other == 0x7FF0000000000000; bool zero1 = op1_other == 0; bool zero2 = op2_other == 0; @@ -220,7 +220,7 @@ namespace ChocolArm64.Instruction else if (inf1 || inf2) { // Infinity - return BitConverter.Int64BitsToDouble((long)(0x7ff0000000000000 | (op1_sign ^ op2_sign))); + return BitConverter.Int64BitsToDouble((long)(0x7FF0000000000000 | (op1_sign ^ op2_sign))); } return 2.0 + op1 * op2; @@ -261,5 +261,277 @@ namespace ChocolArm64.Instruction uint new_exp = (uint)((exponent + 127) & 0xFF) << 23; return BitConverter.Int32BitsToSingle((int)((x_sign << 31) | new_exp | (x_mantissa << 13))); } + + public static float MaxNum(float op1, float op2) + { + uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1); + uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2); + + if (IsQNaN(op1_bits) && !IsQNaN(op2_bits)) + { + op1 = float.NegativeInfinity; + } + else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits)) + { + op2 = float.NegativeInfinity; + } + + return Max(op1, op2); + } + + public static double MaxNum(double op1, double op2) + { + ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); + ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); + + if (IsQNaN(op1_bits) && !IsQNaN(op2_bits)) + { + op1 = double.NegativeInfinity; + } + else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits)) + { + op2 = double.NegativeInfinity; + } + + return Max(op1, op2); + } + + public static float Max(float op1, float op2) + { + // Fast path + if (op1 > op2) + { + return op1; + } + + if (op1 < op2 || (op1 == op2 && op2 != 0)) + { + return op2; + } + + uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1); + uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2); + + // Handle NaN cases + if (ProcessNaNs(op1_bits, op2_bits, out uint op_bits)) + { + return BitConverter.Int32BitsToSingle((int)op_bits); + } + + // Return the most positive zero + if ((op1_bits & op2_bits) == 0x80000000u) + { + return BitConverter.Int32BitsToSingle(int.MinValue); + } + + return 0; + } + + public static double Max(double op1, double op2) + { + // Fast path + if (op1 > op2) + { + return op1; + } + + if (op1 < op2 || (op1 == op2 && op2 != 0)) + { + return op2; + } + + ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); + ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); + + // Handle NaN cases + if (ProcessNaNs(op1_bits, op2_bits, out ulong op_bits)) + { + return BitConverter.Int64BitsToDouble((long)op_bits); + } + + // Return the most positive zero + if ((op1_bits & op2_bits) == 0x8000000000000000ul) + { + return BitConverter.Int64BitsToDouble(long.MinValue); + } + + return 0; + } + + public static float MinNum(float op1, float op2) + { + uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1); + uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2); + + if (IsQNaN(op1_bits) && !IsQNaN(op2_bits)) + { + op1 = float.PositiveInfinity; + } + else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits)) + { + op2 = float.PositiveInfinity; + } + + return Max(op1, op2); + } + + public static double MinNum(double op1, double op2) + { + ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); + ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); + + if (IsQNaN(op1_bits) && !IsQNaN(op2_bits)) + { + op1 = double.PositiveInfinity; + } + else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits)) + { + op2 = double.PositiveInfinity; + } + + return Min(op1, op2); + } + + public static float Min(float op1, float op2) + { + // Fast path + if (op1 < op2) + { + return op1; + } + + if (op1 > op2 || (op1 == op2 && op2 != 0)) + { + return op2; + } + + uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1); + uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2); + + // Handle NaN cases + if (ProcessNaNs(op1_bits, op2_bits, out uint op_bits)) + { + return BitConverter.Int32BitsToSingle((int)op_bits); + } + + // Return the most negative zero + if ((op1_bits | op2_bits) == 0x80000000u) + { + return BitConverter.Int32BitsToSingle(int.MinValue); + } + + return 0; + } + + public static double Min(double op1, double op2) + { + // Fast path + if (op1 < op2) + { + return op1; + } + + if (op1 > op2 || (op1 == op2 && op2 != 0)) + { + return op2; + } + + ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); + ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); + + // Handle NaN cases + if (ProcessNaNs(op1_bits, op2_bits, out ulong op_bits)) + { + return BitConverter.Int64BitsToDouble((long)op_bits); + } + + // Return the most negative zero + if ((op1_bits | op2_bits) == 0x8000000000000000ul) + { + return BitConverter.Int64BitsToDouble(long.MinValue); + } + + return 0; + } + + private static bool ProcessNaNs(uint op1_bits, uint op2_bits, out uint op_bits) + { + if (IsSNaN(op1_bits)) + { + op_bits = op1_bits | (1u << 22); // op1 is SNaN, return QNaN op1 + } + else if (IsSNaN(op2_bits)) + { + op_bits = op2_bits | (1u << 22); // op2 is SNaN, return QNaN op2 + } + else if (IsQNaN(op1_bits)) + { + op_bits = op1_bits; // op1 is QNaN, return QNaN op1 + } + else if (IsQNaN(op2_bits)) + { + op_bits = op2_bits; // op2 is QNaN, return QNaN op2 + } + else + { + op_bits = 0; + + return false; + } + + return true; + } + + private static bool ProcessNaNs(ulong op1_bits, ulong op2_bits, out ulong op_bits) + { + if (IsSNaN(op1_bits)) + { + op_bits = op1_bits | (1ul << 51); // op1 is SNaN, return QNaN op1 + } + else if (IsSNaN(op2_bits)) + { + op_bits = op2_bits | (1ul << 51); // op2 is SNaN, return QNaN op2 + } + else if (IsQNaN(op1_bits)) + { + op_bits = op1_bits; // op1 is QNaN, return QNaN op1 + } + else if (IsQNaN(op2_bits)) + { + op_bits = op2_bits; // op2 is QNaN, return QNaN op2 + } + else + { + op_bits = 0; + + return false; + } + + return true; + } + + private static bool IsQNaN(uint op_bits) + { + return (op_bits & 0x007FFFFF) != 0 && + (op_bits & 0x7FC00000) == 0x7FC00000; + } + + private static bool IsQNaN(ulong op_bits) + { + return (op_bits & 0x000FFFFFFFFFFFFF) != 0 && + (op_bits & 0x7FF8000000000000) == 0x7FF8000000000000; + } + + private static bool IsSNaN(uint op_bits) + { + return (op_bits & 0x007FFFFF) != 0 && + (op_bits & 0x7FC00000) == 0x7F800000; + } + + private static bool IsSNaN(ulong op_bits) + { + return (op_bits & 0x000FFFFFFFFFFFFF) != 0 && + (op_bits & 0x7FF8000000000000) == 0x7FF0000000000000; + } } } \ No newline at end of file diff --git a/Instruction/AVectorHelper.cs b/Instruction/AVectorHelper.cs index a0f887b..b2d5374 100644 --- a/Instruction/AVectorHelper.cs +++ b/Instruction/AVectorHelper.cs @@ -93,86 +93,6 @@ namespace ChocolArm64.Instruction Value < ulong.MinValue ? ulong.MinValue : (ulong)Value; } - public static double Max(double LHS, double RHS) - { - if (LHS == 0.0 && RHS == 0.0) - { - if (BitConverter.DoubleToInt64Bits(LHS) < 0 && - BitConverter.DoubleToInt64Bits(RHS) < 0) - return -0.0; - - return 0.0; - } - - if (LHS > RHS) - return LHS; - - if (double.IsNaN(LHS)) - return LHS; - - return RHS; - } - - public static float MaxF(float LHS, float RHS) - { - if (LHS == 0.0 && RHS == 0.0) - { - if (BitConverter.SingleToInt32Bits(LHS) < 0 && - BitConverter.SingleToInt32Bits(RHS) < 0) - return -0.0f; - - return 0.0f; - } - - if (LHS > RHS) - return LHS; - - if (float.IsNaN(LHS)) - return LHS; - - return RHS; - } - - public static double Min(double LHS, double RHS) - { - if (LHS == 0.0 && RHS == 0.0) - { - if (BitConverter.DoubleToInt64Bits(LHS) < 0 || - BitConverter.DoubleToInt64Bits(RHS) < 0) - return -0.0; - - return 0.0; - } - - if (LHS < RHS) - return LHS; - - if (double.IsNaN(LHS)) - return LHS; - - return RHS; - } - - public static float MinF(float LHS, float RHS) - { - if (LHS == 0.0 && RHS == 0.0) - { - if (BitConverter.SingleToInt32Bits(LHS) < 0 || - BitConverter.SingleToInt32Bits(RHS) < 0) - return -0.0f; - - return 0.0f; - } - - if (LHS < RHS) - return LHS; - - if (float.IsNaN(LHS)) - return LHS; - - return RHS; - } - public static double Round(double Value, int Fpcr) { switch ((ARoundMode)((Fpcr >> 22) & 3))