From 729ff5337cab8d1fd4cc66d7792d410172f25f62 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 13 Sep 2022 03:24:09 -0300 Subject: [PATCH] Fix increment on Arm32 NEON VLDn/VSTn instructions with regs > 1 (#3695) * Fix increment on Arm32 NEON VLDn/VSTn instructions with regs > 1 * PPTC version bump * PR feedback --- ARMeilleure/Decoders/OpCode32SimdMemPair.cs | 7 +- ARMeilleure/Decoders/OpCodeTable.cs | 64 ++++++++++++------- .../Instructions/InstEmitSimdMemory32.cs | 8 +-- ARMeilleure/Translation/PTC/Ptc.cs | 2 +- Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs | 29 ++++++--- 5 files changed, 71 insertions(+), 39 deletions(-) diff --git a/ARMeilleure/Decoders/OpCode32SimdMemPair.cs b/ARMeilleure/Decoders/OpCode32SimdMemPair.cs index 4cee05e82..2da0dea3c 100644 --- a/ARMeilleure/Decoders/OpCode32SimdMemPair.cs +++ b/ARMeilleure/Decoders/OpCode32SimdMemPair.cs @@ -1,11 +1,10 @@ using ARMeilleure.State; -using System; namespace ARMeilleure.Decoders { class OpCode32SimdMemPair : OpCode32, IOpCode32Simd { - private static int[] RegsMap = + private static int[] _regsMap = { 1, 1, 4, 2, 1, 1, 3, 1, @@ -40,9 +39,9 @@ namespace ARMeilleure.Decoders WBack = Rm != RegisterAlias.Aarch32Pc; RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp; - Regs = RegsMap[(opCode >> 8) & 0xf]; + Regs = _regsMap[(opCode >> 8) & 0xf]; - Increment = Math.Min(Regs, ((opCode >> 8) & 0x1) + 1); + Increment = ((opCode >> 8) & 0x1) + 1; } } } diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index c5f86712f..b222a21fe 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -888,18 +888,31 @@ namespace ARMeilleure.Decoders SetA32("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create); SetA32("111100100x10xxxxxxxx1100xxx1xxxx", InstName.Vfms, InstEmit32.Vfms_V, OpCode32SimdReg.Create); SetA32("1111001x0x<xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create); + SetA32("111101000x10xxxxxxxx000x<>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create); SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create); SetA32("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra, InstEmit32.Vsra, OpCode32SimdShImm.Create); - SetA32("111101001x00xxxxxxxx<<00xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create); - SetA32("111101000x00xxxxxxxx0111xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 1. - SetA32("111101000x00xxxxxxxx1010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 2. - SetA32("111101000x00xxxxxxxx0110xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 3. + SetA32("111101001x00xxxxxxxx0000xxx0xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create); + SetA32("111101001x00xxxxxxxx0100xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create); + SetA32("111101001x00xxxxxxxx1000x000xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create); + SetA32("111101001x00xxxxxxxx1000x011xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create); + SetA32("111101000x00xxxxxxxx0111xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create); // Regs = 1. + SetA32("111101000x00xxxxxxxx1010xx< 1 ? op.Increment : 1; int eBytes = 1 << op.Size; Operand n = context.Copy(GetIntA32(context, op.Rn)); @@ -144,7 +145,7 @@ namespace ARMeilleure.Instructions int elemD = d + reg; for (int i = 0; i < count; i++) { - // Write an element from a double simd register + // Accesses an element from a double simd register, // add ebytes for each element. Operand address = context.Add(n, Const(offset)); int index = ((elemD & 1) << (3 - op.Size)) + elem; @@ -161,7 +162,6 @@ namespace ARMeilleure.Instructions } else { - if (load) { EmitLoadSimd(context, address, GetVecA32(elemD >> 1), elemD >> 1, index, op.Size); @@ -173,7 +173,7 @@ namespace ARMeilleure.Instructions } offset += eBytes; - elemD += op.Increment; + elemD += increment; } } } diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 3a950354e..71f69ef7c 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 3683; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 3695; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs index ec2d53a42..b14fdcd55 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs @@ -12,7 +12,7 @@ namespace Ryujinx.Tests.Cpu #if SimdMemory32 private const int RndCntImm = 2; - private uint[] LDSTModes = + private uint[] _ldStModes = { // LD1 0b0111, @@ -96,7 +96,7 @@ namespace Ryujinx.Tests.Cpu [Values(0u, 13u)] uint rn, [Values(1u, 13u, 15u)] uint rm, [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, - [Range(0u, 3u)] uint mode, + [Range(0u, 10u)] uint mode, [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) { var data = GenerateVectorSequence(0x1000); @@ -104,7 +104,13 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0xf4200000u; // VLD4.8 {D0, D1, D2, D3}, [R0], R0 - opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8); + if (mode > 3 && size == 3) + { + // A size of 3 is only valid for VLD1. + size = 2; + } + + opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (_ldStModes[mode] << 8); opcode |= ((vd & 0x10) << 18); opcode |= ((vd & 0xf) << 12); @@ -151,17 +157,23 @@ namespace Ryujinx.Tests.Cpu [Values(0u, 13u)] uint rn, [Values(1u, 13u, 15u)] uint rm, [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, - [Range(0u, 3u)] uint mode, + [Range(0u, 10u)] uint mode, [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) { var data = GenerateVectorSequence(0x1000); SetWorkingMemory(0, data); (V128 vec1, V128 vec2, V128 vec3, V128 vec4) = GenerateTestVectors(); - + uint opcode = 0xf4000000u; // VST4.8 {D0, D1, D2, D3}, [R0], R0 - opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8); + if (mode > 3 && size == 3) + { + // A size of 3 is only valid for VST1. + size = 2; + } + + opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (_ldStModes[mode] << 8); opcode |= ((vd & 0x10) << 18); opcode |= ((vd & 0xf) << 12); @@ -183,7 +195,8 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0xec100a00u; // VST4.8 {D0, D1, D2, D3}, [R0], R0 - uint[] vldmModes = { + uint[] vldmModes = + { // Note: 3rd 0 leaves a space for "D". 0b0100, // Increment after. 0b0101, // Increment after. (!) @@ -240,7 +253,7 @@ namespace Ryujinx.Tests.Cpu { opcode |= ((sd & 0x1) << 22); opcode |= ((sd & 0x1e) << 11); - } + } else { opcode |= ((sd & 0x10) << 18);