From 9533b338ac83df8b82e1a4bf479f4b6715582dbd Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sun, 14 Oct 2018 04:35:16 +0200 Subject: [PATCH] Add Fmls_Se, Fmulx_Se/Ve, Smov_S Inst.; Opt. Clz/Clz_V, Cnt_V, Shl_V, S/Ushr_V, S/Usra_V Inst.; Add 11 Tests. Some fixes. (#449) * Update AOpCodeTable.cs * Update AInstEmitSimdMove.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdShift.cs * Update ASoftFallback.cs * Update ASoftFloat.cs * Update AOpCodeSimdRegElemF.cs * Update CpuTestSimdIns.cs * Update CpuTestSimdRegElem.cs * Create CpuTestSimdRegElemF.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Superseded Fmul_Se Test. Nit. * Address PR feedback. * Address PR feedback. * Update AInstEmitSimdArithmetic.cs * Update ASoftFallback.cs * Update AInstEmitAlu.cs * Update AInstEmitSimdShift.cs --- AOpCodeTable.cs | 14 ++- Decoder/AOpCodeSimdRegElemF.cs | 27 +++-- Instruction/AInstEmitAlu.cs | 14 ++- Instruction/AInstEmitSimdArithmetic.cs | 91 +++++++++++++---- Instruction/AInstEmitSimdMove.cs | 14 ++- Instruction/AInstEmitSimdShift.cs | 131 +++++++++++++++++++++++-- Instruction/ASoftFallback.cs | 19 ++-- Instruction/ASoftFloat.cs | 16 +-- 8 files changed, 268 insertions(+), 58 deletions(-) diff --git a/AOpCodeTable.cs b/AOpCodeTable.cs index 3002571..4449329 100644 --- a/AOpCodeTable.cs +++ b/AOpCodeTable.cs @@ -284,11 +284,12 @@ namespace ChocolArm64 SetA64("000111100x1xxxxx011110xxxxxxxxxx", AInstEmit.Fminnm_S, typeof(AOpCodeSimdReg)); SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", AInstEmit.Fminnm_V, typeof(AOpCodeSimdReg)); SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", AInstEmit.Fminp_V, typeof(AOpCodeSimdReg)); - SetA64("010111111<0011100<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmla_V, typeof(AOpCodeSimdReg)); - SetA64("0x0011111<00111110011101<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmls_V, typeof(AOpCodeSimdReg)); - SetA64("0x0011111<00111111011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmul_V, typeof(AOpCodeSimdReg)); - SetA64("0x0011111<00111110011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmulx_V, typeof(AOpCodeSimdReg)); + SetA64("0>10111111011101<100000111110xxxxxxxxxx", AInstEmit.Fneg_V, typeof(AOpCodeSimd)); SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Fnmadd_S, typeof(AOpCodeSimdReg)); @@ -401,6 +404,7 @@ namespace ChocolArm64 SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Sminp_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", AInstEmit.Smlal_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", AInstEmit.Smlsl_V, typeof(AOpCodeSimdReg)); + SetA64("0x001110000xxxxx001011xxxxxxxxxx", AInstEmit.Smov_S, typeof(AOpCodeSimdIns)); SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Smull_V, typeof(AOpCodeSimdReg)); SetA64("01011110xx100000011110xxxxxxxxxx", AInstEmit.Sqabs_S, typeof(AOpCodeSimd)); SetA64("0>001110<<100000011110xxxxxxxxxx", AInstEmit.Sqabs_V, typeof(AOpCodeSimd)); diff --git a/Decoder/AOpCodeSimdRegElemF.cs b/Decoder/AOpCodeSimdRegElemF.cs index e61d709..e0670de 100644 --- a/Decoder/AOpCodeSimdRegElemF.cs +++ b/Decoder/AOpCodeSimdRegElemF.cs @@ -8,15 +8,26 @@ namespace ChocolArm64.Decoder public AOpCodeSimdRegElemF(AInst Inst, long Position, int OpCode) : base(Inst, Position, OpCode) { - if ((Size & 1) != 0) + switch ((OpCode >> 21) & 3) // sz:L { - Index = (OpCode >> 11) & 1; - } - else - { - Index = (OpCode >> 21) & 1 | - (OpCode >> 10) & 2; + case 0: // H:0 + Index = (OpCode >> 10) & 2; // 0, 2 + + break; + + case 1: // H:1 + Index = (OpCode >> 10) & 2; + Index++; // 1, 3 + + break; + + case 2: // H + Index = (OpCode >> 11) & 1; // 0, 1 + + break; + + default: Emitter = AInstEmit.Und; return; } } } -} \ No newline at end of file +} diff --git a/Instruction/AInstEmitAlu.cs b/Instruction/AInstEmitAlu.cs index 490387e..4551346 100644 --- a/Instruction/AInstEmitAlu.cs +++ b/Instruction/AInstEmitAlu.cs @@ -4,6 +4,7 @@ using ChocolArm64.Translation; using System; using System.Reflection; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instruction.AInstEmitAluHelper; @@ -117,9 +118,18 @@ namespace ChocolArm64.Instruction Context.EmitLdintzr(Op.Rn); - Context.EmitLdc_I4(Op.RegisterSize == ARegisterSize.Int32 ? 32 : 64); + if (Lzcnt.IsSupported) + { + Type TValue = Op.RegisterSize == ARegisterSize.Int32 ? typeof(uint) : typeof(ulong); - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros)); + Context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { TValue })); + } + else + { + Context.EmitLdc_I4(Op.RegisterSize == ARegisterSize.Int32 ? 32 : 64); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros)); + } Context.EmitStintzr(Op.Rd); } diff --git a/Instruction/AInstEmitSimdArithmetic.cs b/Instruction/AInstEmitSimdArithmetic.cs index d11a0b8..7ba08f5 100644 --- a/Instruction/AInstEmitSimdArithmetic.cs +++ b/Instruction/AInstEmitSimdArithmetic.cs @@ -82,20 +82,6 @@ namespace ChocolArm64.Instruction } public static void Cls_V(AILEmitterCtx Context) - { - MethodInfo MthdInfo = typeof(ASoftFallback).GetMethod(nameof(ASoftFallback.CountLeadingSigns)); - - EmitCountLeadingBits(Context, () => Context.EmitCall(MthdInfo)); - } - - public static void Clz_V(AILEmitterCtx Context) - { - MethodInfo MthdInfo = typeof(ASoftFallback).GetMethod(nameof(ASoftFallback.CountLeadingZeros)); - - EmitCountLeadingBits(Context, () => Context.EmitCall(MthdInfo)); - } - - private static void EmitCountLeadingBits(AILEmitterCtx Context, Action Emit) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; @@ -110,7 +96,44 @@ namespace ChocolArm64.Instruction Context.EmitLdc_I4(ESize); - Emit(); + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingSigns)); + + EmitVectorInsert(Context, Op.Rd, Index, Op.Size); + } + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + + public static void Clz_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; + + int ESize = 8 << Op.Size; + + for (int Index = 0; Index < Elems; Index++) + { + EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); + + if (Lzcnt.IsSupported && ESize == 32) + { + Context.Emit(OpCodes.Conv_U4); + + Context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { typeof(uint) })); + + Context.Emit(OpCodes.Conv_U8); + } + else + { + Context.EmitLdc_I4(ESize); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros)); + } EmitVectorInsert(Context, Op.Rd, Index, Op.Size); } @@ -131,11 +154,14 @@ namespace ChocolArm64.Instruction { EmitVectorExtractZx(Context, Op.Rn, Index, 0); - Context.Emit(OpCodes.Conv_U4); - - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8)); - - Context.Emit(OpCodes.Conv_U8); + if (Popcnt.IsSupported) + { + Context.EmitCall(typeof(Popcnt).GetMethod(nameof(Popcnt.PopCount), new Type[] { typeof(ulong) })); + } + else + { + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8)); + } EmitVectorInsert(Context, Op.Rd, Index, 0); } @@ -440,6 +466,15 @@ namespace ChocolArm64.Instruction }); } + public static void Fmls_Se(AILEmitterCtx Context) + { + EmitScalarTernaryOpByElemF(Context, () => + { + Context.Emit(OpCodes.Mul); + Context.Emit(OpCodes.Sub); + }); + } + public static void Fmls_V(AILEmitterCtx Context) { EmitVectorTernaryOpF(Context, () => @@ -554,6 +589,14 @@ namespace ChocolArm64.Instruction }); } + public static void Fmulx_Se(AILEmitterCtx Context) + { + EmitScalarBinaryOpByElemF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); + }); + } + public static void Fmulx_V(AILEmitterCtx Context) { EmitVectorBinaryOpF(Context, () => @@ -562,6 +605,14 @@ namespace ChocolArm64.Instruction }); } + public static void Fmulx_Ve(AILEmitterCtx Context) + { + EmitVectorBinaryOpByElemF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); + }); + } + public static void Fneg_S(AILEmitterCtx Context) { EmitScalarUnaryOpF(Context, () => Context.Emit(OpCodes.Neg)); diff --git a/Instruction/AInstEmitSimdMove.cs b/Instruction/AInstEmitSimdMove.cs index 94097f4..6001f48 100644 --- a/Instruction/AInstEmitSimdMove.cs +++ b/Instruction/AInstEmitSimdMove.cs @@ -249,6 +249,17 @@ namespace ChocolArm64.Instruction EmitVectorImmUnaryOp(Context, () => Context.Emit(OpCodes.Not)); } + public static void Smov_S(AILEmitterCtx Context) + { + AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp; + + EmitVectorExtractSx(Context, Op.Rn, Op.DstIndex, Op.Size); + + EmitIntZeroUpperIfNeeded(Context); + + Context.EmitStintzr(Op.Rd); + } + public static void Tbl_V(AILEmitterCtx Context) { AOpCodeSimdTbl Op = (AOpCodeSimdTbl)Context.CurrOp; @@ -421,7 +432,8 @@ namespace ChocolArm64.Instruction private static void EmitIntZeroUpperIfNeeded(AILEmitterCtx Context) { - if (Context.CurrOp.RegisterSize == ARegisterSize.Int32) + if (Context.CurrOp.RegisterSize == ARegisterSize.Int32 || + Context.CurrOp.RegisterSize == ARegisterSize.SIMD64) { Context.Emit(OpCodes.Conv_U4); Context.Emit(OpCodes.Conv_U8); diff --git a/Instruction/AInstEmitSimdShift.cs b/Instruction/AInstEmitSimdShift.cs index 127abf1..8918c0e 100644 --- a/Instruction/AInstEmitSimdShift.cs +++ b/Instruction/AInstEmitSimdShift.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instruction.AInstEmitSimdHelper; @@ -31,12 +32,32 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - EmitVectorUnaryOpZx(Context, () => + if (AOptimizations.UseSse2 && Op.Size > 0) { + Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + + EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); + Context.EmitLdc_I4(GetImmShl(Op)); - Context.Emit(OpCodes.Shl); - }); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), Types)); + + EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitVectorUnaryOpZx(Context, () => + { + Context.EmitLdc_I4(GetImmShl(Op)); + + Context.Emit(OpCodes.Shl); + }); + } } public static void Shll_V(AILEmitterCtx Context) @@ -167,7 +188,30 @@ namespace ChocolArm64.Instruction public static void Sshr_V(AILEmitterCtx Context) { - EmitShrImmOp(Context, ShrImmFlags.VectorSx); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0 + && Op.Size < 3) + { + Type[] Types = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + + EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), Types)); + + EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitShrImmOp(Context, ShrImmFlags.VectorSx); + } } public static void Ssra_S(AILEmitterCtx Context) @@ -177,7 +221,33 @@ namespace ChocolArm64.Instruction public static void Ssra_V(AILEmitterCtx Context) { - EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0 + && Op.Size < 3) + { + Type[] TypesSra = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] }; + + EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size); + EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesSra)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); + + EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate); + } } public static void Uqrshrn_S(AILEmitterCtx Context) @@ -239,7 +309,29 @@ namespace ChocolArm64.Instruction public static void Ushr_V(AILEmitterCtx Context) { - EmitShrImmOp(Context, ShrImmFlags.VectorZx); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0) + { + Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + + EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), Types)); + + EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitShrImmOp(Context, ShrImmFlags.VectorZx); + } } public static void Usra_S(AILEmitterCtx Context) @@ -249,7 +341,32 @@ namespace ChocolArm64.Instruction public static void Usra_V(AILEmitterCtx Context) { - EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0) + { + Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; + + EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size); + EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesSrl)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); + + EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate); + } } private static void EmitVectorShl(AILEmitterCtx Context, bool Signed) diff --git a/Instruction/ASoftFallback.cs b/Instruction/ASoftFallback.cs index a7bc108..3c5c5c4 100644 --- a/Instruction/ASoftFallback.cs +++ b/Instruction/ASoftFallback.cs @@ -386,7 +386,7 @@ namespace ChocolArm64.Instruction #endregion #region "Count" - public static ulong CountLeadingSigns(ulong Value, int Size) + public static ulong CountLeadingSigns(ulong Value, int Size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). { Value ^= Value >> 1; @@ -405,9 +405,9 @@ namespace ChocolArm64.Instruction private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; - public static ulong CountLeadingZeros(ulong Value, int Size) + public static ulong CountLeadingZeros(ulong Value, int Size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). { - if (Value == 0) + if (Value == 0ul) { return (ulong)Size; } @@ -426,12 +426,17 @@ namespace ChocolArm64.Instruction return (ulong)Count; } - public static uint CountSetBits8(uint Value) + public static ulong CountSetBits8(ulong Value) // "Size" is 8 (SIMD&FP Inst.). { - Value = ((Value >> 1) & 0x55) + (Value & 0x55); - Value = ((Value >> 2) & 0x33) + (Value & 0x33); + if (Value == 0xfful) + { + return 8ul; + } - return (Value >> 4) + (Value & 0x0f); + Value = ((Value >> 1) & 0x55ul) + (Value & 0x55ul); + Value = ((Value >> 2) & 0x33ul) + (Value & 0x33ul); + + return (Value >> 4) + (Value & 0x0ful); } #endregion diff --git a/Instruction/ASoftFloat.cs b/Instruction/ASoftFloat.cs index 7412c97..2d9a9f0 100644 --- a/Instruction/ASoftFloat.cs +++ b/Instruction/ASoftFloat.cs @@ -365,8 +365,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMaxNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { @@ -430,8 +430,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMinNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { @@ -1091,8 +1091,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMaxNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { @@ -1156,8 +1156,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMinNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) {