From 4d69286a9c5af031d4299357e720a967694f265e Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 11 Sep 2022 12:44:27 -0300 Subject: [PATCH] Implement VRINT (vector) Arm32 NEON instructions (#3691) --- ARMeilleure/Decoders/OpCodeTable.cs | 6 +- ARMeilleure/Instructions/InstEmitSimdCvt32.cs | 54 ++++++++++++ ARMeilleure/Instructions/InstName.cs | 4 + Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs | 82 ++++++++++++++++++- 4 files changed, 144 insertions(+), 2 deletions(-) diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 09df31b0b..c5f86712f 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -791,7 +791,7 @@ namespace ARMeilleure.Decoders SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb, InstEmit32.Uxtb, OpCode32AluUx.Create); SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16, InstEmit32.Uxtb16, OpCode32AluUx.Create); SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth, InstEmit32.Uxth, OpCode32AluUx.Create); - + // VFP SetVfp("<<<<11101x110000xxxx101x11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); SetVfp("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); @@ -959,6 +959,10 @@ namespace ARMeilleure.Decoders SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, OpCode32SimdReg.Create); SetA32("111100111x11xx00xxxx000<>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, OpCode32SimdShImm.Create); SetA32("111100101x>>>xxxxxxx100001x1xxx0", InstName.Vrshrn, InstEmit32.Vrshrn, OpCode32SimdShImmNarrow.Create); SetA32("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, OpCode32SimdSqrte.Create); diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs index ec1ead486..b06ddd5e7 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -323,6 +323,60 @@ namespace ARMeilleure.Instructions } } + // VRINTA (vector). + public static void Vrinta_V(ArmEmitterContext context) + { + EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m)); + } + + // VRINTM (vector). + public static void Vrintm_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity))); + }); + } + else + { + EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m)); + } + } + + // VRINTN (vector). + public static void Vrintn_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest))); + }); + } + else + { + EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m)); + } + } + + // VRINTP (vector). + public static void Vrintp_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity))); + }); + } + else + { + EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m)); + } + } + // VRINTZ (floating-point). public static void Vrint_Z(ArmEmitterContext context) { diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index f2c95ae98..f7022f8ef 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -636,6 +636,10 @@ namespace ARMeilleure.Instructions Vrev, Vrhadd, Vrint, + Vrinta, + Vrintm, + Vrintn, + Vrintp, Vrintx, Vrshr, Vrshrn, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs index 395f24643..78d5c3cc2 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs @@ -13,6 +13,16 @@ namespace Ryujinx.Tests.Cpu #if SimdCvt32 #region "ValueSource (Opcodes)" + private static uint[] _Vrint_AMNP_V_F32_() + { + return new uint[] + { + 0xf3ba0500u, // VRINTA.F32 Q0, Q0 + 0xf3ba0680u, // VRINTM.F32 Q0, Q0 + 0xf3ba0400u, // VRINTN.F32 Q0, Q0 + 0xf3ba0780u // VRINTP.F32 Q0, Q0 + }; + } #endregion #region "ValueSource (Types)" @@ -64,6 +74,47 @@ namespace Ryujinx.Tests.Cpu } } + private static IEnumerable _2S_F_() + { + yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x8080000080800000ul; // -Min Normal + yield return 0x807FFFFF807FFFFFul; // -Max Subnormal + yield return 0x8000000180000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0080000000800000ul; // +Min Normal + yield return 0x007FFFFF007FFFFFul; // +Max Subnormal + yield return 0x0000000100000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x8000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFF800000FF800000ul; // -Infinity + yield return 0x7F8000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFC00000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload) + yield return 0x7FC000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong rnd1 = GenNormalS(); + ulong rnd2 = GenSubnormalS(); + + yield return (rnd1 << 32) | rnd1; + yield return (rnd2 << 32) | rnd2; + } + } + private static IEnumerable _1D_F_() { yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) @@ -224,6 +275,35 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] [Explicit] + public void Vrint_AMNP_V_F32([ValueSource(nameof(_Vrint_AMNP_V_F32_))] uint opcode, + [Values(0u, 1u, 2u, 3u)] uint rd, + [Values(0u, 1u, 2u, 3u)] uint rm, + [ValueSource(nameof(_2S_F_))] ulong d0, + [ValueSource(nameof(_2S_F_))] ulong d1, + [ValueSource(nameof(_2S_F_))] ulong d2, + [ValueSource(nameof(_2S_F_))] ulong d3, + [Values] bool q) + { + if (q) + { + opcode |= 1 << 6; + + rd >>= 1; rd <<= 1; + rm >>= 1; rm <<= 1; + } + + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + + V128 v0 = MakeVectorE0E1(d0, d1); + V128 v1 = MakeVectorE0E1(d2, d3); + + SingleOpcode(opcode, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + [Test, Pairwise, Description("VRINTX.F , ")] public void Vrintx_S([Values(0u, 1u)] uint rd, [Values(0u, 1u)] uint rm, @@ -253,7 +333,7 @@ namespace Ryujinx.Tests.Cpu } opcode |= ((size & 3) << 8); - + int fpscr = (int)rMode << (int)Fpcr.RMode; SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, fpscr: fpscr);