From 5e0f8e873857ce3ca3f532aff0936beb28e412c8 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 10 Jan 2023 19:16:59 -0300 Subject: [PATCH] Implement JIT Arm64 backend (#4114) * Implement JIT Arm64 backend * PPTC version bump * Address some feedback from Arm64 JIT PR * Address even more PR feedback * Remove unused IsPageAligned function * Sync Qc flag before calls * Fix comment and remove unused enum * Address riperiperi PR feedback * Delete Breakpoint IR instruction that was only implemented for Arm64 --- ARMeilleure/ARMeilleure.csproj | 7 + ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs | 270 +++ ARMeilleure/CodeGen/Arm64/ArmCondition.cs | 47 + ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs | 14 + ARMeilleure/CodeGen/Arm64/ArmShiftType.cs | 11 + ARMeilleure/CodeGen/Arm64/Assembler.cs | 1160 ++++++++++++ .../CodeGen/Arm64/CallingConvention.cs | 96 + ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs | 173 ++ ARMeilleure/CodeGen/Arm64/CodeGenContext.cs | 286 +++ ARMeilleure/CodeGen/Arm64/CodeGenerator.cs | 1576 +++++++++++++++++ .../CodeGen/Arm64/CodeGeneratorIntrinsic.cs | 662 +++++++ ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs | 14 + ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs | 461 +++++ ARMeilleure/CodeGen/Arm64/IntrinsicType.cs | 59 + ARMeilleure/CodeGen/Arm64/PreAllocator.cs | 940 ++++++++++ .../CodeGen/Optimizations/ConstantFolding.cs | 41 + .../CodeGen/Optimizations/Optimizer.cs | 28 +- .../RegisterAllocators/LinearScanAllocator.cs | 75 +- .../RegisterAllocators/LiveInterval.cs | 4 +- .../RegisterAllocators/RegisterMasks.cs | 5 +- ARMeilleure/CodeGen/X86/CodeGenerator.cs | 4 +- ARMeilleure/CodeGen/X86/IntrinsicTable.cs | 2 - .../Instructions/InstEmitSimdArithmetic.cs | 1516 +++++++++++++--- .../Instructions/InstEmitSimdArithmetic32.cs | 229 ++- ARMeilleure/Instructions/InstEmitSimdCmp.cs | 18 +- ARMeilleure/Instructions/InstEmitSimdCmp32.cs | 66 +- ARMeilleure/Instructions/InstEmitSimdCvt.cs | 294 ++- ARMeilleure/Instructions/InstEmitSimdCvt32.cs | 103 +- .../Instructions/InstEmitSimdHelper32Arm64.cs | 366 ++++ .../Instructions/InstEmitSimdHelperArm64.cs | 720 ++++++++ .../Instructions/InstEmitSimdLogical.cs | 54 +- .../Instructions/InstEmitSimdLogical32.cs | 54 +- .../Instructions/InstEmitSimdMove32.cs | 58 +- ARMeilleure/Instructions/InstEmitSimdShift.cs | 517 +++++- ARMeilleure/Instructions/InstEmitSystem.cs | 4 + .../IntermediateRepresentation/Intrinsic.cs | 456 ++++- .../IntermediateRepresentation/Multiplier.cs | 3 +- .../IntermediateRepresentation/Operand.cs | 14 + .../IntermediateRepresentation/OperandType.cs | 14 + ARMeilleure/Memory/ReservedRegion.cs | 2 +- ARMeilleure/Native/JitSupportDarwin.cs | 13 + .../libs/libarmeilleure-jitsupport.dylib | Bin 0 -> 33564 bytes ARMeilleure/Native/macos_jit_support/Makefile | 8 + .../Native/macos_jit_support/support.c | 14 + ARMeilleure/Optimizations.cs | 5 + ARMeilleure/Signal/NativeSignalHandler.cs | 50 +- ARMeilleure/Signal/TestMethods.cs | 8 +- .../Signal/UnixSignalHandlerRegistration.cs | 47 +- ARMeilleure/Translation/ArmEmitterContext.cs | 49 + ARMeilleure/Translation/Cache/JitCache.cs | 26 +- .../Translation/Cache/JitCacheInvalidation.cs | 79 + ARMeilleure/Translation/Compiler.cs | 19 +- ARMeilleure/Translation/PTC/Ptc.cs | 2 +- ARMeilleure/Translation/Translator.cs | 21 +- ARMeilleure/Translation/TranslatorStubs.cs | 6 +- Ryujinx.Cpu/Jit/JitMemoryAllocator.cs | 2 +- Ryujinx.Memory/MemoryAllocationFlags.cs | 14 +- Ryujinx.Memory/MemoryBlock.cs | 70 +- Ryujinx.Memory/MemoryManagement.cs | 12 +- Ryujinx.Memory/MemoryManagementUnix.cs | 31 +- Ryujinx.Memory/MemoryManagerUnixHelper.cs | 9 +- 61 files changed, 10266 insertions(+), 642 deletions(-) create mode 100644 ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs create mode 100644 ARMeilleure/CodeGen/Arm64/ArmCondition.cs create mode 100644 ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs create mode 100644 ARMeilleure/CodeGen/Arm64/ArmShiftType.cs create mode 100644 ARMeilleure/CodeGen/Arm64/Assembler.cs create mode 100644 ARMeilleure/CodeGen/Arm64/CallingConvention.cs create mode 100644 ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs create mode 100644 ARMeilleure/CodeGen/Arm64/CodeGenContext.cs create mode 100644 ARMeilleure/CodeGen/Arm64/CodeGenerator.cs create mode 100644 ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs create mode 100644 ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs create mode 100644 ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs create mode 100644 ARMeilleure/CodeGen/Arm64/IntrinsicType.cs create mode 100644 ARMeilleure/CodeGen/Arm64/PreAllocator.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs create mode 100644 ARMeilleure/Native/JitSupportDarwin.cs create mode 100644 ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib create mode 100644 ARMeilleure/Native/macos_jit_support/Makefile create mode 100644 ARMeilleure/Native/macos_jit_support/support.c create mode 100644 ARMeilleure/Translation/Cache/JitCacheInvalidation.cs diff --git a/ARMeilleure/ARMeilleure.csproj b/ARMeilleure/ARMeilleure.csproj index bb3f47219..58fd04b38 100644 --- a/ARMeilleure/ARMeilleure.csproj +++ b/ARMeilleure/ARMeilleure.csproj @@ -9,4 +9,11 @@ + + + PreserveNewest + libarmeilleure-jitsupport.dylib + + + diff --git a/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs b/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs new file mode 100644 index 000000000..fdd4d0241 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs @@ -0,0 +1,270 @@ +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class Arm64Optimizer + { + private const int MaxConstantUses = 10000; + + public static void RunPass(ControlFlowGraph cfg) + { + var constants = new Dictionary(); + + Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source) + { + // If the constant has many uses, we also force a new constant mov to be added, in order + // to avoid overflow of the counts field (that is limited to 16 bits). + if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses) + { + constant = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, constant, source); + + block.Operations.AddBefore(operation, copyOp); + + constants[source.Value] = constant; + } + + return constant; + } + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + constants.Clear(); + + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + // Insert copies for constants that can't fit on a 32-bit immediate. + // Doing this early unblocks a few optimizations. + if (node.Instruction == Instruction.Add) + { + Operand src1 = node.GetSource(0); + Operand src2 = node.GetSource(1); + + if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32))) + { + node.SetSource(0, GetConstantCopy(block, node, src1)); + } + + if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32))) + { + node.SetSource(1, GetConstantCopy(block, node, src2)); + } + } + + // Try to fold something like: + // lsl x1, x1, #2 + // add x0, x0, x1 + // ldr x0, [x0] + // add x2, x2, #16 + // ldr x2, [x2] + // Into: + // ldr x0, [x0, x1, lsl #2] + // ldr x2, [x2, #16] + if (IsMemoryLoadOrStore(node.Instruction)) + { + OperandType type; + + if (node.Destination != default) + { + type = node.Destination.Type; + } + else + { + type = node.GetSource(1).Type; + } + + Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type); + + if (memOp != default) + { + node.SetSource(0, memOp); + } + } + } + } + + Optimizer.RemoveUnusedNodes(cfg); + } + + private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type) + { + Operand baseOp = addr; + + // First we check if the address is the result of a local X with immediate + // addition. If that is the case, then the baseOp is X, and the memory operand immediate + // becomes the addition immediate. Otherwise baseOp keeps being the address. + int imm = GetConstOp(ref baseOp, type); + if (imm != 0) + { + return MemoryOp(type, baseOp, default, Multiplier.x1, imm); + } + + // Now we check if the baseOp is the result of a local Y with a local Z addition. + // If that is the case, we now set baseOp to Y and indexOp to Z. We further check + // if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize), + // if that is the case, we set indexOp to W and adjust the scale value of the memory operand + // to match that of the left shift. + // There is one missed case, which is the address being a shift result, but this is + // probably not worth optimizing as it should never happen. + (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type); + + // If baseOp is still equal to address, then there's nothing that can be optimized. + if (baseOp == addr) + { + return default; + } + + return MemoryOp(type, baseOp, indexOp, scale, 0); + } + + private static int GetConstOp(ref Operand baseOp, OperandType accessType) + { + Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (operation == default) + { + return 0; + } + + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + Operand constOp; + Operand otherOp; + + if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable) + { + constOp = src1; + otherOp = src2; + } + else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant) + { + constOp = src2; + otherOp = src1; + } + else + { + return 0; + } + + // If we have addition by a constant that we can't encode on the instruction, + // then we can't optimize it further. + if (ConstTooLong(constOp, accessType)) + { + return 0; + } + + baseOp = otherOp; + + return constOp.AsInt32(); + } + + private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType) + { + Operand indexOp = default; + + Multiplier scale = Multiplier.x1; + + Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (addOp == default) + { + return (indexOp, scale); + } + + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable) + { + return (indexOp, scale); + } + + baseOp = src1; + indexOp = src2; + + Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft); + + bool indexOnSrc2 = false; + + if (shlOp == default) + { + shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft); + + indexOnSrc2 = true; + } + + if (shlOp != default) + { + Operand shSrc = shlOp.GetSource(0); + Operand shift = shlOp.GetSource(1); + + int maxShift = Assembler.GetScaleForType(accessType); + + if (shSrc.Kind == OperandKind.LocalVariable && + shift.Kind == OperandKind.Constant && + (shift.Value == 0 || shift.Value == (ulong)maxShift)) + { + scale = shift.Value switch + { + 1 => Multiplier.x2, + 2 => Multiplier.x4, + 3 => Multiplier.x8, + 4 => Multiplier.x16, + _ => Multiplier.x1 + }; + + baseOp = indexOnSrc2 ? src1 : src2; + indexOp = shSrc; + } + } + + return (indexOp, scale); + } + + private static Operation GetAsgOpWithInst(Operand op, Instruction inst) + { + // If we have multiple assignments, folding is not safe + // as the value may be different depending on the + // control flow path. + if (op.AssignmentsCount != 1) + { + return default; + } + + Operation asgOp = op.Assignments[0]; + + if (asgOp.Instruction != inst) + { + return default; + } + + return asgOp; + } + + private static bool IsMemoryLoadOrStore(Instruction inst) + { + return inst == Instruction.Load || inst == Instruction.Store; + } + + private static bool ConstTooLong(Operand constOp, OperandType accessType) + { + if ((uint)constOp.Value != constOp.Value) + { + return true; + } + + return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType); + } + } +} diff --git a/ARMeilleure/CodeGen/Arm64/ArmCondition.cs b/ARMeilleure/CodeGen/Arm64/ArmCondition.cs new file mode 100644 index 000000000..db27a8104 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/ArmCondition.cs @@ -0,0 +1,47 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmCondition + { + Eq = 0, + Ne = 1, + GeUn = 2, + LtUn = 3, + Mi = 4, + Pl = 5, + Vs = 6, + Vc = 7, + GtUn = 8, + LeUn = 9, + Ge = 10, + Lt = 11, + Gt = 12, + Le = 13, + Al = 14, + Nv = 15 + } + + static class ComparisonArm64Extensions + { + public static ArmCondition ToArmCondition(this Comparison comp) + { + return comp switch + { + Comparison.Equal => ArmCondition.Eq, + Comparison.NotEqual => ArmCondition.Ne, + Comparison.Greater => ArmCondition.Gt, + Comparison.LessOrEqual => ArmCondition.Le, + Comparison.GreaterUI => ArmCondition.GtUn, + Comparison.LessOrEqualUI => ArmCondition.LeUn, + Comparison.GreaterOrEqual => ArmCondition.Ge, + Comparison.Less => ArmCondition.Lt, + Comparison.GreaterOrEqualUI => ArmCondition.GeUn, + Comparison.LessUI => ArmCondition.LtUn, + + _ => throw new ArgumentException(null, nameof(comp)) + }; + } + } +} diff --git a/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs b/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs new file mode 100644 index 000000000..062a6d0b7 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmExtensionType + { + Uxtb = 0, + Uxth = 1, + Uxtw = 2, + Uxtx = 3, + Sxtb = 4, + Sxth = 5, + Sxtw = 6, + Sxtx = 7 + } +} diff --git a/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs b/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs new file mode 100644 index 000000000..d223a1464 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs @@ -0,0 +1,11 @@ + +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmShiftType + { + Lsl = 0, + Lsr = 1, + Asr = 2, + Ror = 3 + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/Assembler.cs b/ARMeilleure/CodeGen/Arm64/Assembler.cs new file mode 100644 index 000000000..0ec0be7cb --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/Assembler.cs @@ -0,0 +1,1160 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Diagnostics; +using System.IO; +using static ARMeilleure.IntermediateRepresentation.Operand; + +namespace ARMeilleure.CodeGen.Arm64 +{ + class Assembler + { + public const uint SfFlag = 1u << 31; + + private const int SpRegister = 31; + private const int ZrRegister = 31; + + private readonly Stream _stream; + + public Assembler(Stream stream) + { + _stream = stream; + } + + public void Add(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0) + { + WriteInstructionAuto(0x0b200000u, rd, rn, rm, extensionType, shiftAmount); + } + + public void Add(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0, bool immForm = false) + { + WriteInstructionAuto(0x11000000u, 0x0b000000u, rd, rn, rm, shiftType, shiftAmount, immForm); + } + + public void And(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x12000000u, 0x0a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Ands(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x72000000u, 0x6a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Asr(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Sbfm(rd, rn, shift, mask); + } + else + { + Asrv(rd, rn, rm); + } + } + + public void Asrv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02800u, rd, rn, rm); + } + + public void B(int imm) + { + WriteUInt32(0x14000000u | EncodeSImm26_2(imm)); + } + + public void B(ArmCondition condition, int imm) + { + WriteUInt32(0x54000000u | (uint)condition | (EncodeSImm19_2(imm) << 5)); + } + + public void Blr(Operand rn) + { + WriteUInt32(0xd63f0000u | (EncodeReg(rn) << 5)); + } + + public void Br(Operand rn) + { + WriteUInt32(0xd61f0000u | (EncodeReg(rn) << 5)); + } + + public void Brk() + { + WriteUInt32(0xd4200000u); + } + + public void Cbz(Operand rt, int imm) + { + WriteInstructionAuto(0x34000000u | (EncodeSImm19_2(imm) << 5), rt); + } + + public void Cbnz(Operand rt, int imm) + { + WriteInstructionAuto(0x35000000u | (EncodeSImm19_2(imm) << 5), rt); + } + + public void Clrex(int crm = 15) + { + WriteUInt32(0xd503305fu | (EncodeUImm4(crm) << 8)); + } + + public void Clz(Operand rd, Operand rn) + { + WriteInstructionAuto(0x5ac01000u, rd, rn); + } + + public void CmeqVector(Operand rd, Operand rn, Operand rm, int size, bool q = true) + { + Debug.Assert((uint)size < 4); + WriteSimdInstruction(0x2e208c00u | ((uint)size << 22), rd, rn, rm, q); + } + + public void Cmp(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Subs(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount); + } + + public void Csel(Operand rd, Operand rn, Operand rm, ArmCondition condition) + { + WriteInstructionBitwiseAuto(0x1a800000u | ((uint)condition << 12), rd, rn, rm); + } + + public void Cset(Operand rd, ArmCondition condition) + { + var zr = Factory.Register(ZrRegister, RegisterType.Integer, rd.Type); + Csinc(rd, zr, zr, (ArmCondition)((int)condition ^ 1)); + } + + public void Csinc(Operand rd, Operand rn, Operand rm, ArmCondition condition) + { + WriteInstructionBitwiseAuto(0x1a800400u | ((uint)condition << 12), rd, rn, rm); + } + + public void Dmb(uint option) + { + WriteUInt32(0xd50330bfu | (option << 8)); + } + + public void DupScalar(Operand rd, Operand rn, int index, int size) + { + WriteInstruction(0x5e000400u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn); + } + + public void Eor(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x52000000u, 0x4a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void EorVector(Operand rd, Operand rn, Operand rm, bool q = true) + { + WriteSimdInstruction(0x2e201c00u, rd, rn, rm, q); + } + + public void Extr(Operand rd, Operand rn, Operand rm, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionBitwiseAuto(0x13800000u | n | (EncodeUImm6(imms) << 10), rd, rn, rm); + } + + public void FaddScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e202800u, rd, rn, rm); + } + + public void FcvtScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e224000u | (rd.Type == OperandType.FP64 ? 1u << 15 : 1u << 22); + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void FdivScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e201800u, rd, rn, rm); + } + + public void Fmov(Operand rd, Operand rn) + { + WriteFPInstructionAuto(0x1e204000u, rd, rn); + } + + public void Fmov(Operand rd, Operand rn, bool topHalf) + { + Debug.Assert(rd.Type.IsInteger() != rn.Type.IsInteger()); + Debug.Assert(rd.Type == OperandType.I64 || rn.Type == OperandType.I64 || !topHalf); + + uint opcode = rd.Type.IsInteger() ? 0b110u : 0b111u; + + uint rmode = topHalf ? 1u << 19 : 0u; + uint ftype = rd.Type == OperandType.FP64 || rn.Type == OperandType.FP64 ? 1u << 22 : 0u; + uint sf = rd.Type == OperandType.I64 || rn.Type == OperandType.I64 ? SfFlag : 0u; + + WriteUInt32(0x1e260000u | (opcode << 16) | rmode | ftype | sf | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void FmulScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e200800u, rd, rn, rm); + } + + public void FnegScalar(Operand rd, Operand rn) + { + WriteFPInstructionAuto(0x1e214000u, rd, rn); + } + + public void FsubScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e203800u, rd, rn, rm); + } + + public void Ins(Operand rd, Operand rn, int index, int size) + { + WriteInstruction(0x4e001c00u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn); + } + + public void Ins(Operand rd, Operand rn, int srcIndex, int dstIndex, int size) + { + uint imm4 = (uint)srcIndex << size; + Debug.Assert((uint)srcIndex < (16u >> size)); + WriteInstruction(0x6e000400u | (imm4 << 11) | (EncodeIndexSizeImm5(dstIndex, size) << 16), rd, rn); + } + + public void Ldaxp(Operand rt, Operand rt2, Operand rn) + { + WriteInstruction(0x887f8000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rt2); + } + + public void Ldaxr(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn); + } + + public void Ldaxrb(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u, rt, rn); + } + + public void Ldaxrh(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u | (1u << 30), rt, rn); + } + + public void LdpRiPost(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x28c00000u, 0x2cc00000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void LdpRiPre(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29c00000u, 0x2dc00000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void LdpRiUn(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29400000u, 0x2d400000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void Ldr(Operand rt, Operand rn) + { + if (rn.Kind == OperandKind.Memory) + { + MemoryOperand memOp = rn.GetMemory(); + + if (memOp.Index != default) + { + Debug.Assert(memOp.Displacement == 0); + Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type)); + LdrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1); + } + else + { + LdrRiUn(rt, memOp.BaseAddress, memOp.Displacement); + } + } + else + { + LdrRiUn(rt, rn, 0); + } + } + + public void LdrLit(Operand rt, int offset) + { + uint instruction = 0x18000000u | (EncodeSImm19_2(offset) << 5); + + if (rt.Type == OperandType.I64) + { + instruction |= 1u << 30; + } + + WriteInstruction(instruction, rt); + } + + public void LdrRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400400u, 0x3c400400u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400c00u, 0x3c400c00u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb9400000u, 0x3d400000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift) + { + uint instruction = GetLdrStrInstruction(0xb8600800u, 0x3ce00800u, rt.Type); + WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift); + } + + public void LdrbRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38400400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrbRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38400c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrbRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x39400000u | (EncodeUImm12(imm, 0) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78400400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78400c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x79400000u | (EncodeUImm12(imm, 1) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void Ldur(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400000u, 0x3c400000u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void Lsl(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Ubfm(rd, rn, -shift & mask, mask - shift); + } + else + { + Lslv(rd, rn, rm); + } + } + + public void Lslv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02000u, rd, rn, rm); + } + + public void Lsr(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Ubfm(rd, rn, shift, mask); + } + else + { + Lsrv(rd, rn, rm); + } + } + + public void Lsrv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02400u, rd, rn, rm); + } + + public void Madd(Operand rd, Operand rn, Operand rm, Operand ra) + { + WriteInstructionAuto(0x1b000000u, rd, rn, rm, ra); + } + + public void Mul(Operand rd, Operand rn, Operand rm) + { + Madd(rd, rn, rm, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type)); + } + + public void Mov(Operand rd, Operand rn) + { + if (rd.Type.IsInteger()) + { + Orr(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn); + } + else + { + OrrVector(rd, rn, rn); + } + } + + public void MovSp(Operand rd, Operand rn) + { + if (rd.GetRegister().Index == SpRegister || + rn.GetRegister().Index == SpRegister) + { + Add(rd, rn, Factory.Const(rd.Type, 0), immForm: true); + } + else + { + Mov(rd, rn); + } + } + + public void Mov(Operand rd, int imm) + { + Movz(rd, imm, 0); + } + + public void Movz(Operand rd, int imm, int hw) + { + Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw); + WriteInstructionAuto(0x52800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd); + } + + public void Movk(Operand rd, int imm, int hw) + { + Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw); + WriteInstructionAuto(0x72800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd); + } + + public void Mrs(Operand rt, uint o0, uint op1, uint crn, uint crm, uint op2) + { + uint instruction = 0xd5300000u; + + instruction |= (op2 & 7) << 5; + instruction |= (crm & 15) << 8; + instruction |= (crn & 15) << 12; + instruction |= (op1 & 7) << 16; + instruction |= (o0 & 1) << 19; + + WriteInstruction(instruction, rt); + } + + public void Mvn(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Orn(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount); + } + + public void Neg(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Sub(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount); + } + + public void Orn(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x2a200000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Orr(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x32000000u, 0x2a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void OrrVector(Operand rd, Operand rn, Operand rm, bool q = true) + { + WriteSimdInstruction(0x0ea01c00u, rd, rn, rm, q); + } + + public void Ret(Operand rn) + { + WriteUInt32(0xd65f0000u | (EncodeReg(rn) << 5)); + } + + public void Rev(Operand rd, Operand rn) + { + uint opc0 = rd.Type == OperandType.I64 ? 1u << 10 : 0u; + WriteInstructionAuto(0x5ac00800u | opc0, rd, rn); + } + + public void Ror(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Extr(rd, rn, rn, shift); + } + else + { + Rorv(rd, rn, rm); + } + } + + public void Rorv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02c00u, rd, rn, rm); + } + + public void Sbfm(Operand rd, Operand rn, int immr, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionAuto(0x13000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn); + } + + public void ScvtfScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e220000u; + + if (rn.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteFPInstructionAuto(instruction, rd, rn); + } + + public void Sdiv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16Auto(0x1ac00c00u, rd, rn, rm); + } + + public void Smulh(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16(0x9b407c00u, rd, rn, rm); + } + + public void Stlxp(Operand rt, Operand rt2, Operand rn, Operand rs) + { + WriteInstruction(0x88208000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs, rt2); + } + + public void Stlxr(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs); + } + + public void Stlxrb(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u, rt, rn, rs); + } + + public void Stlxrh(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u | (1u << 30), rt, rn, rs); + } + + public void StpRiPost(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x28800000u, 0x2c800000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void StpRiPre(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29800000u, 0x2d800000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void StpRiUn(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29000000u, 0x2d000000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void Str(Operand rt, Operand rn) + { + if (rn.Kind == OperandKind.Memory) + { + MemoryOperand memOp = rn.GetMemory(); + + if (memOp.Index != default) + { + Debug.Assert(memOp.Displacement == 0); + Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type)); + StrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1); + } + else + { + StrRiUn(rt, memOp.BaseAddress, memOp.Displacement); + } + } + else + { + StrRiUn(rt, rn, 0); + } + } + + public void StrRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000400u, 0x3c000400u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000c00u, 0x3c000c00u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb9000000u, 0x3d000000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void StrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift) + { + uint instruction = GetLdrStrInstruction(0xb8200800u, 0x3ca00800u, rt.Type); + WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift); + } + + public void StrbRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38000400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrbRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38000c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrbRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x39000000u | (EncodeUImm12(imm, 0) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78000400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78000c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x79000000u | (EncodeUImm12(imm, 1) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void Stur(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000000u, 0x3c000000u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void Sub(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0) + { + WriteInstructionAuto(0x4b200000u, rd, rn, rm, extensionType, shiftAmount); + } + + public void Sub(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionAuto(0x51000000u, 0x4b000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Subs(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionAuto(0x71000000u, 0x6b000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Sxtb(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 7); + } + + public void Sxth(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 15); + } + + public void Sxtw(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 31); + } + + public void Tst(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Ands(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount); + } + + public void Ubfm(Operand rd, Operand rn, int immr, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionAuto(0x53000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn); + } + + public void UcvtfScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e230000u; + + if (rn.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteFPInstructionAuto(instruction, rd, rn); + } + + public void Udiv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16Auto(0x1ac00800u, rd, rn, rm); + } + + public void Umov(Operand rd, Operand rn, int index, int size) + { + uint q = size == 3 ? 1u << 30 : 0u; + WriteInstruction(0x0e003c00u | (EncodeIndexSizeImm5(index, size) << 16) | q, rd, rn); + } + + public void Umulh(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16(0x9bc07c00u, rd, rn, rm); + } + + public void Uxtb(Operand rd, Operand rn) + { + Ubfm(rd, rn, 0, 7); + } + + public void Uxth(Operand rd, Operand rn) + { + Ubfm(rd, rn, 0, 15); + } + + private void WriteInstructionAuto( + uint instI, + uint instR, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0, + bool immForm = false) + { + if (rm.Kind == OperandKind.Constant && (rm.Value != 0 || immForm)) + { + Debug.Assert(shiftAmount == 0); + int imm = rm.AsInt32(); + Debug.Assert((uint)imm == rm.Value); + if (imm != 0 && (imm & 0xfff) == 0) + { + instI |= 1 << 22; // sh flag + imm >>= 12; + } + WriteInstructionAuto(instI | (EncodeUImm12(imm, 0) << 10), rd, rn); + } + else + { + instR |= EncodeUImm6(shiftAmount) << 10; + instR |= (uint)shiftType << 22; + + WriteInstructionRm16Auto(instR, rd, rn, rm); + } + } + + private void WriteInstructionAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmExtensionType extensionType, + int shiftAmount = 0) + { + Debug.Assert((uint)shiftAmount <= 4); + + instruction |= (uint)shiftAmount << 10; + instruction |= (uint)extensionType << 13; + + WriteInstructionRm16Auto(instruction, rd, rn, rm); + } + + private void WriteInstructionBitwiseAuto( + uint instI, + uint instR, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0) + { + if (rm.Kind == OperandKind.Constant && rm.Value != 0) + { + Debug.Assert(shiftAmount == 0); + bool canEncode = CodeGenCommon.TryEncodeBitMask(rm, out int immN, out int immS, out int immR); + Debug.Assert(canEncode); + uint instruction = instI | ((uint)immS << 10) | ((uint)immR << 16) | ((uint)immN << 22); + + WriteInstructionAuto(instruction, rd, rn); + } + else + { + WriteInstructionBitwiseAuto(instR, rd, rn, rm, shiftType, shiftAmount); + } + } + + private void WriteInstructionBitwiseAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + instruction |= EncodeUImm6(shiftAmount) << 10; + instruction |= (uint)shiftType << 22; + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionLdrStrAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmExtensionType extensionType, + bool shift) + { + if (shift) + { + instruction |= 1u << 12; + } + + instruction |= (uint)extensionType << 13; + + if (rd.Type == OperandType.I64) + { + instruction |= 1u << 30; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionAuto(uint instruction, Operand rd) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd); + } + + public void WriteInstructionAuto(uint instruction, Operand rd, Operand rn) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd, rn); + } + + private void WriteInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd, rn, rm, ra); + } + + public void WriteInstruction(uint instruction, Operand rd) + { + WriteUInt32(instruction | EncodeReg(rd)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 10)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(ra) << 10) | (EncodeReg(rm) << 16)); + } + + private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn) + { + if (rd.Type == OperandType.FP64) + { + instruction |= 1u << 22; + } + + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm) + { + if (rd.Type == OperandType.FP64) + { + instruction |= 1u << 22; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteSimdInstruction(uint instruction, Operand rd, Operand rn, Operand rm, bool q = true) + { + if (q) + { + instruction |= 1u << 30; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionRm16Auto(uint instruction, Operand rd, Operand rn, Operand rm) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + public void WriteInstructionRm16(uint instruction, Operand rd, Operand rn, Operand rm) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16)); + } + + public void WriteInstructionRm16NoRet(uint instruction, Operand rn, Operand rm) + { + WriteUInt32(instruction | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16)); + } + + private static uint GetLdpStpInstruction(uint intInst, uint vecInst, int imm, OperandType type) + { + uint instruction; + int scale; + + if (type.IsInteger()) + { + instruction = intInst; + + if (type == OperandType.I64) + { + instruction |= SfFlag; + scale = 3; + } + else + { + scale = 2; + } + } + else + { + int opc = type switch + { + OperandType.FP32 => 0, + OperandType.FP64 => 1, + _ => 2 + }; + + instruction = vecInst | ((uint)opc << 30); + scale = 2 + opc; + } + + instruction |= (EncodeSImm7(imm, scale) << 15); + + return instruction; + } + + private static uint GetLdrStrInstruction(uint intInst, uint vecInst, OperandType type) + { + uint instruction; + + if (type.IsInteger()) + { + instruction = intInst; + + if (type == OperandType.I64) + { + instruction |= 1 << 30; + } + } + else + { + instruction = vecInst; + + if (type == OperandType.V128) + { + instruction |= 1u << 23; + } + else + { + instruction |= type == OperandType.FP32 ? 2u << 30 : 3u << 30; + } + } + + return instruction; + } + + private static uint EncodeIndexSizeImm5(int index, int size) + { + Debug.Assert((uint)size < 4); + Debug.Assert((uint)index < (16u >> size), $"Invalid index {index} and size {size} combination."); + return ((uint)index << (size + 1)) | (1u << size); + } + + private static uint EncodeSImm7(int value, int scale) + { + uint imm = (uint)(value >> scale) & 0x7f; + Debug.Assert(((int)imm << 25) >> (25 - scale) == value, $"Failed to encode constant 0x{value:X} with scale {scale}."); + return imm; + } + + private static uint EncodeSImm9(int value) + { + uint imm = (uint)value & 0x1ff; + Debug.Assert(((int)imm << 23) >> 23 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeSImm19_2(int value) + { + uint imm = (uint)(value >> 2) & 0x7ffff; + Debug.Assert(((int)imm << 13) >> 11 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeSImm26_2(int value) + { + uint imm = (uint)(value >> 2) & 0x3ffffff; + Debug.Assert(((int)imm << 6) >> 4 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm4(int value) + { + uint imm = (uint)value & 0xf; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm6(int value) + { + uint imm = (uint)value & 0x3f; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm12(int value, OperandType type) + { + return EncodeUImm12(value, GetScaleForType(type)); + } + + private static uint EncodeUImm12(int value, int scale) + { + uint imm = (uint)(value >> scale) & 0xfff; + Debug.Assert((int)imm << scale == value, $"Failed to encode constant 0x{value:X} with scale {scale}."); + return imm; + } + + private static uint EncodeUImm16(int value) + { + uint imm = (uint)value & 0xffff; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeReg(Operand reg) + { + if (reg.Kind == OperandKind.Constant && reg.Value == 0) + { + return ZrRegister; + } + + uint regIndex = (uint)reg.GetRegister().Index; + Debug.Assert(reg.Kind == OperandKind.Register); + Debug.Assert(regIndex < 32); + return regIndex; + } + + public static int GetScaleForType(OperandType type) + { + return type switch + { + OperandType.I32 => 2, + OperandType.I64 => 3, + OperandType.FP32 => 2, + OperandType.FP64 => 3, + OperandType.V128 => 4, + _ => throw new ArgumentException($"Invalid type {type}.") + }; + } + + private void WriteInt16(short value) + { + WriteUInt16((ushort)value); + } + + private void WriteInt32(int value) + { + WriteUInt32((uint)value); + } + + private void WriteByte(byte value) + { + _stream.WriteByte(value); + } + + private void WriteUInt16(ushort value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + } + + private void WriteUInt32(uint value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + } + } +} diff --git a/ARMeilleure/CodeGen/Arm64/CallingConvention.cs b/ARMeilleure/CodeGen/Arm64/CallingConvention.cs new file mode 100644 index 000000000..fda8d7867 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/CallingConvention.cs @@ -0,0 +1,96 @@ +using System; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CallingConvention + { + private const int RegistersMask = unchecked((int)0xffffffff); + + // Some of those register have specific roles and can't be used as general purpose registers. + // X18 - Reserved for platform specific usage. + // X29 - Frame pointer. + // X30 - Return address. + // X31 - Not an actual register, in some cases maps to SP, and in others to ZR. + private const int ReservedRegsMask = (1 << CodeGenCommon.ReservedRegister) | (1 << 18) | (1 << 29) | (1 << 30) | (1 << 31); + + public static int GetIntAvailableRegisters() + { + return RegistersMask & ~ReservedRegsMask; + } + + public static int GetVecAvailableRegisters() + { + return RegistersMask; + } + + public static int GetIntCallerSavedRegisters() + { + return (GetIntCalleeSavedRegisters() ^ RegistersMask) & ~ReservedRegsMask; + } + + public static int GetFpCallerSavedRegisters() + { + return GetFpCalleeSavedRegisters() ^ RegistersMask; + } + + public static int GetVecCallerSavedRegisters() + { + return GetVecCalleeSavedRegisters() ^ RegistersMask; + } + + public static int GetIntCalleeSavedRegisters() + { + return 0x1ff80000; // X19 to X28 + } + + public static int GetFpCalleeSavedRegisters() + { + return 0xff00; // D8 to D15 + } + + public static int GetVecCalleeSavedRegisters() + { + return 0; + } + + public static int GetArgumentsOnRegsCount() + { + return 8; + } + + public static int GetIntArgumentRegister(int index) + { + if ((uint)index < (uint)GetArgumentsOnRegsCount()) + { + return index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static int GetVecArgumentRegister(int index) + { + if ((uint)index < (uint)GetArgumentsOnRegsCount()) + { + return index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static int GetIntReturnRegister() + { + return 0; + } + + public static int GetIntReturnRegisterHigh() + { + return 1; + } + + public static int GetVecReturnRegister() + { + return 0; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs b/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs new file mode 100644 index 000000000..e67d2fdb7 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs @@ -0,0 +1,173 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Numerics; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGenCommon + { + public const int TcAddressRegister = 8; + public const int ReservedRegister = 17; + + public static bool ConstFitsOnSImm7(int value, int scale) + { + return (((value >> scale) << 25) >> (25 - scale)) == value; + } + + public static bool ConstFitsOnSImm9(int value) + { + return ((value << 23) >> 23) == value; + } + + public static bool ConstFitsOnUImm12(int value) + { + return (value & 0xfff) == value; + } + + public static bool ConstFitsOnUImm12(int value, OperandType type) + { + int scale = Assembler.GetScaleForType(type); + return (((value >> scale) & 0xfff) << scale) == value; + } + + public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR) + { + ulong value = operand.Value; + + if (operand.Type == OperandType.I32) + { + value |= value << 32; + } + + return TryEncodeBitMask(value, out immN, out immS, out immR); + } + + public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR) + { + // Some special values also can't be encoded: + // 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0). + // A value with all bits set can't be encoded because it is reserved according to the spec, because: + // Any value AND all ones will be equal itself, so it's effectively a no-op. + // Any value OR all ones will be equal all ones, so one can just use MOV. + // Any value XOR all ones will be equal its inverse, so one can just use MVN. + if (value == ulong.MaxValue) + { + immN = 0; + immS = 0; + immR = 0; + + return false; + } + + int bitLength = CountSequence(value); + + if ((value >> bitLength) != 0) + { + bitLength += CountSequence(value >> bitLength); + } + + int bitLengthLog2 = BitOperations.Log2((uint)bitLength); + int bitLengthPow2 = 1 << bitLengthLog2; + + if (bitLengthPow2 < bitLength) + { + bitLengthLog2++; + bitLengthPow2 <<= 1; + } + + int selectedESize = 64; + int repetitions = 1; + int onesCount = BitOperations.PopCount(value); + + if (bitLengthPow2 < 64 && (value >> bitLengthPow2) != 0) + { + for (int eSizeLog2 = bitLengthLog2; eSizeLog2 < 6; eSizeLog2++) + { + bool match = true; + int eSize = 1 << eSizeLog2; + ulong mask = (1UL << eSize) - 1; + ulong eValue = value & mask; + + for (int e = 1; e < 64 / eSize; e++) + { + if (((value >> (e * eSize)) & mask) != eValue) + { + match = false; + break; + } + } + + if (match) + { + selectedESize = eSize; + repetitions = 64 / eSize; + onesCount = BitOperations.PopCount(eValue); + break; + } + } + } + + // Find rotation. We have two cases, one where the highest bit is 0 + // and one where it is 1. + // If it's 1, we just need to count the number of 1 bits on the MSB to find the right rotation. + // If it's 0, we just need to count the number of 0 bits on the LSB to find the left rotation, + // then we can convert it to the right rotation shift by subtracting the value from the element size. + int rotation; + long vHigh = (long)(value << (64 - selectedESize)); + if (vHigh < 0) + { + rotation = BitOperations.LeadingZeroCount(~(ulong)vHigh); + } + else + { + rotation = (selectedESize - BitOperations.TrailingZeroCount(value)) & (selectedESize - 1); + } + + // Reconstruct value and see if it matches. If not, we can't encode. + ulong reconstructed = onesCount == 64 ? ulong.MaxValue : RotateRight((1UL << onesCount) - 1, rotation, selectedESize); + + for (int bit = 32; bit >= selectedESize; bit >>= 1) + { + reconstructed |= reconstructed << bit; + } + + if (reconstructed != value || onesCount == 0) + { + immN = 0; + immS = 0; + immR = 0; + + return false; + } + + immR = rotation; + + // immN indicates that there are no repetitions. + // The MSB of immS indicates the amount of repetitions, and the LSB the number of bits set. + if (repetitions == 1) + { + immN = 1; + immS = 0; + } + else + { + immN = 0; + immS = (0xf80 >> BitOperations.Log2((uint)repetitions)) & 0x3f; + } + + immS |= onesCount - 1; + + return true; + } + + private static int CountSequence(ulong value) + { + return BitOperations.TrailingZeroCount(value) + BitOperations.TrailingZeroCount(~value); + } + + private static ulong RotateRight(ulong bits, int shift, int size) + { + return (bits >> shift) | ((bits << (size - shift)) & (size == 64 ? ulong.MaxValue : (1UL << size) - 1)); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs b/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs new file mode 100644 index 000000000..1ddde0c19 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs @@ -0,0 +1,286 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.IO; + +namespace ARMeilleure.CodeGen.Arm64 +{ + class CodeGenContext + { + private const int BccInstLength = 4; + private const int CbnzInstLength = 4; + private const int LdrLitInstLength = 4; + + private Stream _stream; + + public int StreamOffset => (int)_stream.Length; + + public AllocationResult AllocResult { get; } + + public Assembler Assembler { get; } + + public BasicBlock CurrBlock { get; private set; } + + public bool HasCall { get; } + + public int CallArgsRegionSize { get; } + public int FpLrSaveRegionSize { get; } + + private readonly Dictionary _visitedBlocks; + private readonly Dictionary> _pendingBranches; + + private struct ConstantPoolEntry + { + public readonly int Offset; + public readonly Symbol Symbol; + public readonly List<(Operand, int)> LdrOffsets; + + public ConstantPoolEntry(int offset, Symbol symbol) + { + Offset = offset; + Symbol = symbol; + LdrOffsets = new List<(Operand, int)>(); + } + } + + private readonly Dictionary _constantPool; + + private bool _constantPoolWritten; + private long _constantPoolOffset; + + private ArmCondition _jNearCondition; + private Operand _jNearValue; + + private long _jNearPosition; + + private readonly bool _relocatable; + + public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable) + { + _stream = new MemoryStream(); + + AllocResult = allocResult; + + Assembler = new Assembler(_stream); + + bool hasCall = maxCallArgs >= 0; + + HasCall = hasCall; + + if (maxCallArgs < 0) + { + maxCallArgs = 0; + } + + CallArgsRegionSize = maxCallArgs * 16; + FpLrSaveRegionSize = hasCall ? 16 : 0; + + _visitedBlocks = new Dictionary(); + _pendingBranches = new Dictionary>(); + _constantPool = new Dictionary(); + + _relocatable = relocatable; + } + + public void EnterBlock(BasicBlock block) + { + CurrBlock = block; + + long target = _stream.Position; + + if (_pendingBranches.TryGetValue(block, out var list)) + { + foreach (var tuple in list) + { + _stream.Seek(tuple.BranchPos, SeekOrigin.Begin); + WriteBranch(tuple.Condition, target); + } + + _stream.Seek(target, SeekOrigin.Begin); + _pendingBranches.Remove(block); + } + + _visitedBlocks.Add(block, target); + } + + public void JumpTo(BasicBlock target) + { + JumpTo(ArmCondition.Al, target); + } + + public void JumpTo(ArmCondition condition, BasicBlock target) + { + if (_visitedBlocks.TryGetValue(target, out long offset)) + { + WriteBranch(condition, offset); + } + else + { + if (!_pendingBranches.TryGetValue(target, out var list)) + { + list = new List<(ArmCondition, long)>(); + _pendingBranches.Add(target, list); + } + + list.Add((condition, _stream.Position)); + + _stream.Seek(BccInstLength, SeekOrigin.Current); + } + } + + private void WriteBranch(ArmCondition condition, long to) + { + int imm = checked((int)(to - _stream.Position)); + + if (condition != ArmCondition.Al) + { + Assembler.B(condition, imm); + } + else + { + Assembler.B(imm); + } + } + + public void JumpToNear(ArmCondition condition) + { + _jNearCondition = condition; + _jNearPosition = _stream.Position; + + _stream.Seek(BccInstLength, SeekOrigin.Current); + } + + public void JumpToNearIfNotZero(Operand value) + { + _jNearValue = value; + _jNearPosition = _stream.Position; + + _stream.Seek(CbnzInstLength, SeekOrigin.Current); + } + + public void JumpHere() + { + long currentPosition = _stream.Position; + long offset = currentPosition - _jNearPosition; + + _stream.Seek(_jNearPosition, SeekOrigin.Begin); + + if (_jNearValue != default) + { + Assembler.Cbnz(_jNearValue, checked((int)offset)); + _jNearValue = default; + } + else + { + Assembler.B(_jNearCondition, checked((int)offset)); + } + + _stream.Seek(currentPosition, SeekOrigin.Begin); + } + + public void ReserveRelocatableConstant(Operand rt, Symbol symbol, ulong value) + { + if (!_constantPool.TryGetValue(value, out ConstantPoolEntry cpe)) + { + cpe = new ConstantPoolEntry(_constantPool.Count * sizeof(ulong), symbol); + _constantPool.Add(value, cpe); + } + + cpe.LdrOffsets.Add((rt, (int)_stream.Position)); + _stream.Seek(LdrLitInstLength, SeekOrigin.Current); + } + + private long WriteConstantPool() + { + if (_constantPoolWritten) + { + return _constantPoolOffset; + } + + long constantPoolBaseOffset = _stream.Position; + + foreach (ulong value in _constantPool.Keys) + { + WriteUInt64(value); + } + + foreach (ConstantPoolEntry cpe in _constantPool.Values) + { + foreach ((Operand rt, int ldrOffset) in cpe.LdrOffsets) + { + _stream.Seek(ldrOffset, SeekOrigin.Begin); + + int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset)); + int pcRelativeOffset = absoluteOffset - ldrOffset; + + Assembler.LdrLit(rt, pcRelativeOffset); + } + } + + _stream.Seek(constantPoolBaseOffset + _constantPool.Count * sizeof(ulong), SeekOrigin.Begin); + + _constantPoolOffset = constantPoolBaseOffset; + _constantPoolWritten = true; + + return constantPoolBaseOffset; + } + + public (byte[], RelocInfo) GetCode() + { + long constantPoolBaseOffset = WriteConstantPool(); + + byte[] code = new byte[_stream.Length]; + + long originalPosition = _stream.Position; + + _stream.Seek(0, SeekOrigin.Begin); + _stream.Read(code, 0, code.Length); + _stream.Seek(originalPosition, SeekOrigin.Begin); + + RelocInfo relocInfo; + + if (_relocatable) + { + RelocEntry[] relocs = new RelocEntry[_constantPool.Count]; + + int index = 0; + + foreach (ConstantPoolEntry cpe in _constantPool.Values) + { + if (cpe.Symbol.Type != SymbolType.None) + { + int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset)); + relocs[index++] = new RelocEntry(absoluteOffset, cpe.Symbol); + } + } + + if (index != relocs.Length) + { + Array.Resize(ref relocs, index); + } + + relocInfo = new RelocInfo(relocs); + } + else + { + relocInfo = new RelocInfo(new RelocEntry[0]); + } + + return (code, relocInfo); + } + + private void WriteUInt64(ulong value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + _stream.WriteByte((byte)(value >> 32)); + _stream.WriteByte((byte)(value >> 40)); + _stream.WriteByte((byte)(value >> 48)); + _stream.WriteByte((byte)(value >> 56)); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs b/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs new file mode 100644 index 000000000..704aa45ac --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs @@ -0,0 +1,1576 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Common; +using ARMeilleure.Diagnostics; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; + +using static ARMeilleure.IntermediateRepresentation.Operand; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGenerator + { + private const int DWordScale = 3; + + private const int RegistersCount = 32; + + private const int FpRegister = 29; + private const int LrRegister = 30; + private const int SpRegister = 31; + private const int ZrRegister = 31; + + private enum AccessSize + { + Byte, + Hword, + Auto + } + + private static Action[] _instTable; + + static CodeGenerator() + { + _instTable = new Action[EnumUtils.GetCount(typeof(Instruction))]; + + Add(Instruction.Add, GenerateAdd); + Add(Instruction.BitwiseAnd, GenerateBitwiseAnd); + Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr); + Add(Instruction.BitwiseNot, GenerateBitwiseNot); + Add(Instruction.BitwiseOr, GenerateBitwiseOr); + Add(Instruction.BranchIf, GenerateBranchIf); + Add(Instruction.ByteSwap, GenerateByteSwap); + Add(Instruction.Call, GenerateCall); + //Add(Instruction.Clobber, GenerateClobber); + Add(Instruction.Compare, GenerateCompare); + Add(Instruction.CompareAndSwap, GenerateCompareAndSwap); + Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16); + Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8); + Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32); + Add(Instruction.ConvertToFP, GenerateConvertToFP); + Add(Instruction.ConvertToFPUI, GenerateConvertToFPUI); + Add(Instruction.Copy, GenerateCopy); + Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros); + Add(Instruction.Divide, GenerateDivide); + Add(Instruction.DivideUI, GenerateDivideUI); + Add(Instruction.Fill, GenerateFill); + Add(Instruction.Load, GenerateLoad); + Add(Instruction.Load16, GenerateLoad16); + Add(Instruction.Load8, GenerateLoad8); + Add(Instruction.MemoryBarrier, GenerateMemoryBarrier); + Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI); + Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI); + Add(Instruction.Negate, GenerateNegate); + Add(Instruction.Return, GenerateReturn); + Add(Instruction.RotateRight, GenerateRotateRight); + Add(Instruction.ShiftLeft, GenerateShiftLeft); + Add(Instruction.ShiftRightSI, GenerateShiftRightSI); + Add(Instruction.ShiftRightUI, GenerateShiftRightUI); + Add(Instruction.SignExtend16, GenerateSignExtend16); + Add(Instruction.SignExtend32, GenerateSignExtend32); + Add(Instruction.SignExtend8, GenerateSignExtend8); + Add(Instruction.Spill, GenerateSpill); + Add(Instruction.SpillArg, GenerateSpillArg); + Add(Instruction.StackAlloc, GenerateStackAlloc); + Add(Instruction.Store, GenerateStore); + Add(Instruction.Store16, GenerateStore16); + Add(Instruction.Store8, GenerateStore8); + Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.Tailcall, GenerateTailcall); + Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar); + Add(Instruction.VectorExtract, GenerateVectorExtract); + Add(Instruction.VectorExtract16, GenerateVectorExtract16); + Add(Instruction.VectorExtract8, GenerateVectorExtract8); + Add(Instruction.VectorInsert, GenerateVectorInsert); + Add(Instruction.VectorInsert16, GenerateVectorInsert16); + Add(Instruction.VectorInsert8, GenerateVectorInsert8); + Add(Instruction.VectorOne, GenerateVectorOne); + Add(Instruction.VectorZero, GenerateVectorZero); + Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64); + Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96); + Add(Instruction.ZeroExtend16, GenerateZeroExtend16); + Add(Instruction.ZeroExtend32, GenerateZeroExtend32); + Add(Instruction.ZeroExtend8, GenerateZeroExtend8); + + static void Add(Instruction inst, Action func) + { + _instTable[(int)inst] = func; + } + } + + public static CompiledFunction Generate(CompilerContext cctx) + { + ControlFlowGraph cfg = cctx.Cfg; + + Logger.StartPass(PassName.Optimization); + + if (cctx.Options.HasFlag(CompilerOptions.Optimize)) + { + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Optimizer.RunPass(cfg); + } + + BlockPlacement.RunPass(cfg); + } + + Arm64Optimizer.RunPass(cfg); + + Logger.EndPass(PassName.Optimization, cfg); + + Logger.StartPass(PassName.PreAllocation); + + StackAllocator stackAlloc = new(); + + PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs); + + Logger.EndPass(PassName.PreAllocation, cfg); + + Logger.StartPass(PassName.RegisterAllocation); + + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Ssa.Deconstruct(cfg); + } + + IRegisterAllocator regAlloc; + + if (cctx.Options.HasFlag(CompilerOptions.Lsra)) + { + regAlloc = new LinearScanAllocator(); + } + else + { + regAlloc = new HybridAllocator(); + } + + RegisterMasks regMasks = new( + CallingConvention.GetIntAvailableRegisters(), + CallingConvention.GetVecAvailableRegisters(), + CallingConvention.GetIntCallerSavedRegisters(), + CallingConvention.GetVecCallerSavedRegisters(), + CallingConvention.GetIntCalleeSavedRegisters(), + CallingConvention.GetVecCalleeSavedRegisters(), + RegistersCount); + + AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks); + + Logger.EndPass(PassName.RegisterAllocation, cfg); + + Logger.StartPass(PassName.CodeGeneration); + + //Console.Error.WriteLine(IRDumper.GetDump(cfg)); + + bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0; + + CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable); + + UnwindInfo unwindInfo = WritePrologue(context); + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + context.EnterBlock(block); + + for (Operation node = block.Operations.First; node != default;) + { + node = GenerateOperation(context, node); + } + + if (block.SuccessorsCount == 0) + { + // The only blocks which can have 0 successors are exit blocks. + Operation last = block.Operations.Last; + + Debug.Assert(last.Instruction == Instruction.Tailcall || + last.Instruction == Instruction.Return); + } + else + { + BasicBlock succ = block.GetSuccessor(0); + + if (succ != block.ListNext) + { + context.JumpTo(succ); + } + } + } + + (byte[] code, RelocInfo relocInfo) = context.GetCode(); + + Logger.EndPass(PassName.CodeGeneration); + + return new CompiledFunction(code, unwindInfo, relocInfo); + } + + private static Operation GenerateOperation(CodeGenContext context, Operation operation) + { + if (operation.Instruction == Instruction.Extended) + { + CodeGeneratorIntrinsic.GenerateOperation(context, operation); + } + else + { + if (IsLoadOrStore(operation) && + operation.ListNext != default && + operation.ListNext.Instruction == operation.Instruction && + TryPairMemoryOp(context, operation, operation.ListNext)) + { + // Skip next operation if we managed to pair them. + return operation.ListNext.ListNext; + } + + Action func = _instTable[(int)operation.Instruction]; + + if (func != null) + { + func(context, operation); + } + else + { + throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\"."); + } + } + + return operation.ListNext; + } + + private static void GenerateAdd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + // ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Add(dest, src1, src2); + } + else + { + context.Assembler.FaddScalar(dest, src1, src2); + } + } + + private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.And(dest, src1, src2); + } + + private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Eor(dest, src1, src2); + } + else + { + context.Assembler.EorVector(dest, src1, src2); + } + } + + private static void GenerateBitwiseNot(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Mvn(dest, source); + } + + private static void GenerateBitwiseOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Orr(dest, src1, src2); + } + + private static void GenerateBranchIf(CodeGenContext context, Operation operation) + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToArmCondition(); + + GenerateCompareCommon(context, operation); + + context.JumpTo(cond, context.CurrBlock.GetSuccessor(1)); + } + + private static void GenerateByteSwap(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Rev(dest, source); + } + + private static void GenerateCall(CodeGenContext context, Operation operation) + { + context.Assembler.Blr(operation.GetSource(0)); + } + + private static void GenerateCompare(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand comp = operation.GetSource(2); + + Debug.Assert(dest.Type == OperandType.I32); + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToArmCondition(); + + GenerateCompareCommon(context, operation); + + context.Assembler.Cset(dest, cond); + } + + private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation) + { + if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3. + { + Operand actualLow = operation.GetDestination(0); + Operand actualHigh = operation.GetDestination(1); + Operand temp0 = operation.GetDestination(2); + Operand temp1 = operation.GetDestination(3); + Operand address = operation.GetSource(0); + Operand expectedLow = operation.GetSource(1); + Operand expectedHigh = operation.GetSource(2); + Operand desiredLow = operation.GetSource(3); + Operand desiredHigh = operation.GetSource(4); + + GenerateAtomicDcas( + context, + address, + expectedLow, + expectedHigh, + desiredLow, + desiredHigh, + actualLow, + actualHigh, + temp0, + temp1); + } + else + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Auto); + } + } + + private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation) + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Hword); + } + + private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation) + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Byte); + } + + private static void GenerateCompareCommon(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(src1, src2); + + Debug.Assert(src1.Type.IsInteger()); + + context.Assembler.Cmp(src1, src2); + } + + private static void GenerateConditionalSelect(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src2, src3); + + Debug.Assert(dest.Type.IsInteger()); + Debug.Assert(src1.Type == OperandType.I32); + + context.Assembler.Cmp (src1, Const(src1.Type, 0)); + context.Assembler.Csel(dest, src2, src3, ArmCondition.Ne); + } + + private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64); + + context.Assembler.Mov(dest, Register(source, OperandType.I32)); + } + + private static void GenerateConvertToFP(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + Debug.Assert(dest.Type != source.Type); + Debug.Assert(source.Type != OperandType.V128); + + if (source.Type.IsInteger()) + { + context.Assembler.ScvtfScalar(dest, source); + } + else + { + context.Assembler.FcvtScalar(dest, source); + } + } + + private static void GenerateConvertToFPUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + Debug.Assert(dest.Type != source.Type); + Debug.Assert(source.Type.IsInteger()); + + context.Assembler.UcvtfScalar(dest, source); + } + + private static void GenerateCopy(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant); + + // Moves to the same register are useless. + if (dest.Kind == source.Kind && dest.Value == source.Value) + { + return; + } + + if (dest.Kind == OperandKind.Register && source.Kind == OperandKind.Constant) + { + if (source.Relocatable) + { + context.ReserveRelocatableConstant(dest, source.Symbol, source.Value); + } + else + { + GenerateConstantCopy(context, dest, source.Value); + } + } + else + { + context.Assembler.Mov(dest, source); + } + } + + private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Clz(dest, source); + } + + private static void GenerateDivide(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + ValidateBinOp(dest, dividend, divisor); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sdiv(dest, dividend, divisor); + } + else + { + context.Assembler.FdivScalar(dest, dividend, divisor); + } + } + + private static void GenerateDivideUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + ValidateBinOp(dest, dividend, divisor); + + context.Assembler.Udiv(dest, dividend, divisor); + } + + private static void GenerateLoad(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + context.Assembler.Ldr(value, address); + } + + private static void GenerateLoad16(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.LdrhRiUn(value, address, 0); + } + + private static void GenerateLoad8(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.LdrbRiUn(value, address, 0); + } + + private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation) + { + context.Assembler.Dmb(0xf); + } + + private static void GenerateMultiply(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Mul(dest, src1, src2); + } + else + { + context.Assembler.FmulScalar(dest, src1, src2); + } + } + + private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + Debug.Assert(dest.Type == OperandType.I64); + + context.Assembler.Smulh(dest, src1, src2); + } + + private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + Debug.Assert(dest.Type == OperandType.I64); + + context.Assembler.Umulh(dest, src1, src2); + } + + private static void GenerateNegate(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + if (dest.Type.IsInteger()) + { + context.Assembler.Neg(dest, source); + } + else + { + context.Assembler.FnegScalar(dest, source); + } + } + + private static void GenerateLoad(CodeGenContext context, Operand value, Operand address, int offset) + { + if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type)) + { + context.Assembler.LdrRiUn(value, address, offset); + } + else if (CodeGenCommon.ConstFitsOnSImm9(offset)) + { + context.Assembler.Ldur(value, address, offset); + } + else + { + Operand tempAddress = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempAddress, (ulong)offset); + context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input. + context.Assembler.LdrRiUn(value, tempAddress, 0); + } + } + + private static void GenerateReturn(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Ret(Register(LrRegister)); + } + + private static void GenerateRotateRight(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Ror(dest, src1, src2); + } + + private static void GenerateShiftLeft(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Lsl(dest, src1, src2); + } + + private static void GenerateShiftRightSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Asr(dest, src1, src2); + } + + private static void GenerateShiftRightUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Lsr(dest, src1, src2); + } + + private static void GenerateSignExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxth(dest, source); + } + + private static void GenerateSignExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxtw(dest, source); + } + + private static void GenerateSignExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxtb(dest, source); + } + + private static void GenerateFill(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize; + + GenerateLoad(context, dest, Register(SpRegister), offs); + } + + private static void GenerateStore(CodeGenContext context, Operand value, Operand address, int offset) + { + if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type)) + { + context.Assembler.StrRiUn(value, address, offset); + } + else if (CodeGenCommon.ConstFitsOnSImm9(offset)) + { + context.Assembler.Stur(value, address, offset); + } + else + { + Operand tempAddress = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempAddress, (ulong)offset); + context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input. + context.Assembler.StrRiUn(value, tempAddress, 0); + } + } + + private static void GenerateSpill(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, context.CallArgsRegionSize + context.FpLrSaveRegionSize); + } + + private static void GenerateSpillArg(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, 0); + } + + private static void GenerateStackAlloc(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize; + + context.Assembler.Add(dest, Register(SpRegister), Const(dest.Type, offs)); + } + + private static void GenerateStore(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + context.Assembler.Str(value, address); + } + + private static void GenerateStore16(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.StrhRiUn(value, address, 0); + } + + private static void GenerateStore8(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.StrbRiUn(value, address, 0); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset) + { + Operand offset = operation.GetSource(0); + Operand source = operation.GetSource(1); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + baseOffset; + + GenerateStore(context, source, Register(SpRegister), offs); + } + + private static void GenerateSubtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + // ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sub(dest, src1, src2); + } + else + { + context.Assembler.FsubScalar(dest, src1, src2); + } + } + + private static void GenerateTailcall(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Br(operation.GetSource(0)); + } + + private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + if (dest != default) + { + Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger()); + + OperandType destType = source.Type == OperandType.I64 ? OperandType.FP64 : OperandType.FP32; + + context.Assembler.Fmov(Register(dest, destType), source, topHalf: false); + } + } + + private static void GenerateVectorExtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes()); + + if (dest.Type.IsInteger()) + { + context.Assembler.Umov(dest, src1, index, dest.Type == OperandType.I64 ? 3 : 2); + } + else + { + context.Assembler.DupScalar(dest, src1, index, dest.Type == OperandType.FP64 ? 3 : 2); + } + } + + private static void GenerateVectorExtract16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 8); + + context.Assembler.Umov(dest, src1, index, 1); + } + + private static void GenerateVectorExtract8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 16); + + context.Assembler.Umov(dest, src1, index, 0); + } + + private static void GenerateVectorInsert(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + if (src2.Type.IsInteger()) + { + context.Assembler.Ins(dest, src2, index, src2.Type == OperandType.I64 ? 3 : 2); + } + else + { + context.Assembler.Ins(dest, src2, 0, index, src2.Type == OperandType.FP64 ? 3 : 2); + } + } + + private static void GenerateVectorInsert16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Ins(dest, src2, index, 1); + } + + private static void GenerateVectorInsert8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Ins(dest, src2, index, 0); + } + + private static void GenerateVectorOne(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.CmeqVector(dest, dest, dest, 2); + } + + private static void GenerateVectorZero(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.EorVector(dest, dest, dest); + } + + private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + context.Assembler.Fmov(Register(dest, OperandType.FP64), Register(source, OperandType.FP64)); + } + + private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + context.Assembler.Fmov(Register(dest, OperandType.FP32), Register(source, OperandType.FP32)); + } + + private static void GenerateZeroExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Uxth(dest, source); + } + + private static void GenerateZeroExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + // We can eliminate the move if source is already 32-bit and the registers are the same. + if (dest.Value == source.Value && source.Type == OperandType.I32) + { + return; + } + + context.Assembler.Mov(Register(dest.GetRegister().Index, OperandType.I32), source); + } + + private static void GenerateZeroExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Uxtb(dest, source); + } + + private static UnwindInfo WritePrologue(CodeGenContext context) + { + List pushEntries = new List(); + + Operand rsp = Register(SpRegister); + + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask); + int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask); + + int calleeSaveRegionSize = Align16(intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8); + + int offset = 0; + + WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64); + WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64); + + int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize); + int outArgsSize = context.CallArgsRegionSize; + + if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale)) + { + if (context.HasCall) + { + context.Assembler.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, -localSize); + context.Assembler.MovSp(Register(FpRegister), rsp); + } + + if (outArgsSize != 0) + { + context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, outArgsSize)); + } + } + else + { + int frameSize = localSize + outArgsSize; + if (frameSize != 0) + { + if (CodeGenCommon.ConstFitsOnUImm12(frameSize)) + { + context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, frameSize)); + } + else + { + Operand tempSize = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempSize, (ulong)frameSize); + context.Assembler.Sub(rsp, rsp, tempSize, ArmExtensionType.Uxtx); + } + } + + context.Assembler.StpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize); + + if (outArgsSize != 0) + { + context.Assembler.Add(Register(FpRegister), Register(SpRegister), Const(OperandType.I64, outArgsSize)); + } + else + { + context.Assembler.MovSp(Register(FpRegister), Register(SpRegister)); + } + } + + return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset); + } + + private static void WritePrologueCalleeSavesPreIndexed( + CodeGenContext context, + List pushEntries, + ref int mask, + ref int offset, + int calleeSaveRegionSize, + OperandType type) + { + if ((BitOperations.PopCount((uint)mask) & 1) != 0) + { + int reg = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg)); + + mask &= ~(1 << reg); + + if (offset != 0) + { + context.Assembler.StrRiUn(Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.StrRiPre(Register(reg, type), Register(SpRegister), -calleeSaveRegionSize); + } + + offset += type.GetSizeInBytes(); + } + + while (mask != 0) + { + int reg = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg)); + + mask &= ~(1 << reg); + + int reg2 = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg2)); + + mask &= ~(1 << reg2); + + if (offset != 0) + { + context.Assembler.StpRiUn(Register(reg, type), Register(reg2, type), Register(SpRegister), offset); + } + else + { + context.Assembler.StpRiPre(Register(reg, type), Register(reg2, type), Register(SpRegister), -calleeSaveRegionSize); + } + + offset += type.GetSizeInBytes() * 2; + } + } + + private static void WriteEpilogue(CodeGenContext context) + { + Operand rsp = Register(SpRegister); + + int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize); + int outArgsSize = context.CallArgsRegionSize; + + if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale)) + { + if (outArgsSize != 0) + { + context.Assembler.Add(rsp, rsp, Const(OperandType.I64, outArgsSize)); + } + + if (context.HasCall) + { + context.Assembler.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, localSize); + } + } + else + { + if (context.HasCall) + { + context.Assembler.LdpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize); + } + + int frameSize = localSize + outArgsSize; + if (frameSize != 0) + { + if (CodeGenCommon.ConstFitsOnUImm12(frameSize)) + { + context.Assembler.Add(rsp, rsp, Const(OperandType.I64, frameSize)); + } + else + { + Operand tempSize = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempSize, (ulong)frameSize); + context.Assembler.Add(rsp, rsp, tempSize, ArmExtensionType.Uxtx); + } + } + } + + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask); + int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask); + + int offset = intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8; + int calleeSaveRegionSize = Align16(offset); + + WriteEpilogueCalleeSavesPostIndexed(context, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64); + WriteEpilogueCalleeSavesPostIndexed(context, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64); + } + + private static void WriteEpilogueCalleeSavesPostIndexed( + CodeGenContext context, + ref int mask, + ref int offset, + int calleeSaveRegionSize, + OperandType type) + { + while (mask != 0) + { + int reg = BitUtils.HighestBitSet(mask); + + mask &= ~(1 << reg); + + if (mask != 0) + { + int reg2 = BitUtils.HighestBitSet(mask); + + mask &= ~(1 << reg2); + + offset -= type.GetSizeInBytes() * 2; + + if (offset != 0) + { + context.Assembler.LdpRiUn(Register(reg2, type), Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.LdpRiPost(Register(reg2, type), Register(reg, type), Register(SpRegister), calleeSaveRegionSize); + } + } + else + { + offset -= type.GetSizeInBytes(); + + if (offset != 0) + { + context.Assembler.LdrRiUn(Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.LdrRiPost(Register(reg, type), Register(SpRegister), calleeSaveRegionSize); + } + } + } + } + + private static void GenerateConstantCopy(CodeGenContext context, Operand dest, ulong value) + { + if (value != 0) + { + int hw = 0; + bool first = true; + + while (value != 0) + { + int valueLow = (ushort)value; + if (valueLow != 0) + { + if (first) + { + context.Assembler.Movz(dest, valueLow, hw); + first = false; + } + else + { + context.Assembler.Movk(dest, valueLow, hw); + } + } + + hw++; + value >>= 16; + } + } + else + { + context.Assembler.Mov(dest, Register(ZrRegister, dest.Type)); + } + } + + private static void GenerateAtomicCas( + CodeGenContext context, + Operand address, + Operand expected, + Operand desired, + Operand actual, + Operand result, + AccessSize accessSize) + { + int startOffset = context.StreamOffset; + + switch (accessSize) + { + case AccessSize.Byte: + context.Assembler.Ldaxrb(actual, address); + break; + case AccessSize.Hword: + context.Assembler.Ldaxrh(actual, address); + break; + default: + context.Assembler.Ldaxr(actual, address); + break; + } + + context.Assembler.Cmp(actual, expected); + + context.JumpToNear(ArmCondition.Ne); + + switch (accessSize) + { + case AccessSize.Byte: + context.Assembler.Stlxrb(desired, address, result); + break; + case AccessSize.Hword: + context.Assembler.Stlxrh(desired, address, result); + break; + default: + context.Assembler.Stlxr(desired, address, result); + break; + } + + context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed. + + context.JumpHere(); + + context.Assembler.Clrex(); + } + + private static void GenerateAtomicDcas( + CodeGenContext context, + Operand address, + Operand expectedLow, + Operand expectedHigh, + Operand desiredLow, + Operand desiredHigh, + Operand actualLow, + Operand actualHigh, + Operand temp0, + Operand temp1) + { + int startOffset = context.StreamOffset; + + context.Assembler.Ldaxp(actualLow, actualHigh, address); + context.Assembler.Eor(temp0, actualHigh, expectedHigh); + context.Assembler.Eor(temp1, actualLow, expectedLow); + context.Assembler.Orr(temp0, temp1, temp0); + + context.JumpToNearIfNotZero(temp0); + + Operand result = Register(temp0, OperandType.I32); + + context.Assembler.Stlxp(desiredLow, desiredHigh, address, result); + context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed. + + context.JumpHere(); + + context.Assembler.Clrex(); + } + + private static bool TryPairMemoryOp(CodeGenContext context, Operation currentOp, Operation nextOp) + { + if (!TryGetMemOpBaseAndOffset(currentOp, out Operand op1Base, out int op1Offset)) + { + return false; + } + + if (!TryGetMemOpBaseAndOffset(nextOp, out Operand op2Base, out int op2Offset)) + { + return false; + } + + if (op1Base != op2Base) + { + return false; + } + + OperandType valueType = GetMemOpValueType(currentOp); + + if (valueType != GetMemOpValueType(nextOp) || op1Offset + valueType.GetSizeInBytes() != op2Offset) + { + return false; + } + + if (!CodeGenCommon.ConstFitsOnSImm7(op1Offset, valueType.GetSizeInBytesLog2())) + { + return false; + } + + if (currentOp.Instruction == Instruction.Load) + { + context.Assembler.LdpRiUn(currentOp.Destination, nextOp.Destination, op1Base, op1Offset); + } + else if (currentOp.Instruction == Instruction.Store) + { + context.Assembler.StpRiUn(currentOp.GetSource(1), nextOp.GetSource(1), op1Base, op1Offset); + } + else + { + return false; + } + + return true; + } + + private static bool IsLoadOrStore(Operation operation) + { + return operation.Instruction == Instruction.Load || operation.Instruction == Instruction.Store; + } + + private static OperandType GetMemOpValueType(Operation operation) + { + if (operation.Destination != default) + { + return operation.Destination.Type; + } + + return operation.GetSource(1).Type; + } + + private static bool TryGetMemOpBaseAndOffset(Operation operation, out Operand baseAddress, out int offset) + { + baseAddress = default; + offset = 0; + Operand address = operation.GetSource(0); + + if (address.Kind != OperandKind.Memory) + { + return false; + } + + MemoryOperand memOp = address.GetMemory(); + Operand baseOp = memOp.BaseAddress; + + if (baseOp == default) + { + baseOp = memOp.Index; + + if (baseOp == default || memOp.Scale != Multiplier.x1) + { + return false; + } + } + if (memOp.Index != default) + { + return false; + } + + baseAddress = memOp.BaseAddress; + offset = memOp.Displacement; + + return true; + } + + private static Operand Register(Operand operand, OperandType type = OperandType.I64) + { + return Register(operand.GetRegister().Index, type); + } + + private static Operand Register(int register, OperandType type = OperandType.I64) + { + return Factory.Register(register, RegisterType.Integer, type); + } + + private static int Align16(int value) + { + return (value + 0xf) & ~0xf; + } + + [Conditional("DEBUG")] + private static void ValidateUnOp(Operand dest, Operand source) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, source); + EnsureSameType(dest, source); + } + + [Conditional("DEBUG")] + private static void ValidateBinOp(Operand dest, Operand src1, Operand src2) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, src1); + EnsureSameType(dest, src1, src2); + } + + [Conditional("DEBUG")] + private static void ValidateShift(Operand dest, Operand src1, Operand src2) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, src1); + EnsureSameType(dest, src1); + + Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32); + } + + private static void EnsureSameReg(Operand op1, Operand op2) + { + Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory); + Debug.Assert(op1.Kind == op2.Kind); + Debug.Assert(op1.Value == op2.Value); + } + + private static void EnsureSameType(Operand op1, Operand op2) + { + Debug.Assert(op1.Type == op2.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + Debug.Assert(op1.Type == op4.Type); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs b/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs new file mode 100644 index 000000000..aaa00bb65 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs @@ -0,0 +1,662 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Diagnostics; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGeneratorIntrinsic + { + public static void GenerateOperation(CodeGenContext context, Operation operation) + { + Intrinsic intrin = operation.Intrinsic; + + IntrinsicInfo info = IntrinsicTable.GetInfo(intrin & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + switch (info.Type) + { + case IntrinsicType.ScalarUnary: + GenerateVectorUnary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarUnaryByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorUnaryByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarBinary: + GenerateVectorBinary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryFPByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryRd: + GenerateVectorUnary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryShl: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarBinaryShr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPCompare: + GenerateScalarFPCompare( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarFPConvFixed: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPConvFixedGpr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateScalarFPConvGpr( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPConvGpr: + GenerateScalarFPConvGpr( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarTernary: + GenerateScalarTernary( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2), + operation.GetSource(0)); + break; + case IntrinsicType.ScalarTernaryFPRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.ScalarTernaryShlRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + case IntrinsicType.ScalarTernaryShrRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + + case IntrinsicType.VectorUnary: + GenerateVectorUnary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.VectorUnaryByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorUnaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.VectorBinary: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryBitwise: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryFPByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryRd: + GenerateVectorUnary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryShl: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorBinaryShr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorFPConvFixed: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorInsertByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorInsertByElem( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(2)); + break; + case IntrinsicType.VectorLookupTable: + Debug.Assert((uint)(operation.SourcesCount - 2) <= 3); + + for (int i = 1; i < operation.SourcesCount - 1; i++) + { + Register currReg = operation.GetSource(i).GetRegister(); + Register prevReg = operation.GetSource(i - 1).GetRegister(); + + Debug.Assert(prevReg.Index + 1 == currReg.Index && currReg.Type == RegisterType.Vector); + } + + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst | ((uint)(operation.SourcesCount - 2) << 13), + operation.Destination, + operation.GetSource(0), + operation.GetSource(operation.SourcesCount - 1)); + break; + case IntrinsicType.VectorTernaryFPRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRd: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRdBitwise: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryShlRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + case IntrinsicType.VectorTernaryShrRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + + case IntrinsicType.GetRegister: + context.Assembler.WriteInstruction(info.Inst, operation.Destination); + break; + case IntrinsicType.SetRegister: + context.Assembler.WriteInstruction(info.Inst, operation.GetSource(0)); + break; + + default: + throw new NotImplementedException(info.Type.ToString()); + } + } + + private static void GenerateScalarFPCompare( + CodeGenContext context, + uint sz, + uint instruction, + Operand dest, + Operand rn, + Operand rm) + { + instruction |= (sz << 22); + + if (rm.Kind == OperandKind.Constant && rm.Value == 0) + { + instruction |= 0b1000; + rm = rn; + } + + context.Assembler.WriteInstructionRm16NoRet(instruction, rn, rm); + context.Assembler.Mrs(dest, 1, 3, 4, 2, 0); + } + + private static void GenerateScalarFPConvGpr( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn) + { + instruction |= (sz << 22); + + if (rd.Type.IsInteger()) + { + context.Assembler.WriteInstructionAuto(instruction, rd, rn); + } + else + { + if (rn.Type == OperandType.I64) + { + instruction |= Assembler.SfFlag; + } + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + } + + private static void GenerateScalarFPConvGpr( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint fBits) + { + Debug.Assert(fBits <= 64); + + instruction |= (sz << 22); + instruction |= (64 - fBits) << 10; + + if (rd.Type.IsInteger()) + { + Debug.Assert(rd.Type != OperandType.I32 || fBits <= 32); + + context.Assembler.WriteInstructionAuto(instruction, rd, rn); + } + else + { + if (rn.Type == OperandType.I64) + { + instruction |= Assembler.SfFlag; + } + else + { + Debug.Assert(fBits <= 32); + } + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + } + + private static void GenerateScalarTernary( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn, + Operand rm, + Operand ra) + { + instruction |= (sz << 22); + + context.Assembler.WriteInstruction(instruction, rd, rn, rm, ra); + } + + private static void GenerateVectorUnary( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn) + { + instruction |= (q << 30) | (sz << 22); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorUnaryByElem( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn) + { + uint imm5 = (srcIndex << ((int)sz + 1)) | (1u << (int)sz); + + instruction |= (q << 30) | (imm5 << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorBinary( + CodeGenContext context, + uint q, + uint instruction, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30); + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinary( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (sz << 22); + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryByElem( + CodeGenContext context, + uint q, + uint size, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (size << 22); + + if (size == 2) + { + instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10); + } + else + { + instruction |= ((srcIndex & 3) << 20) | ((srcIndex & 4) << 9); + } + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryFPByElem( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (sz << 22); + + if (sz != 0) + { + instruction |= (srcIndex & 1) << 11; + } + else + { + instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10); + } + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryShlImm( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint shift) + { + instruction |= (q << 30); + + Debug.Assert(shift >= 0 && shift < (8u << (int)sz)); + + uint imm = (8u << (int)sz) | (shift & (0x3fu >> (int)(3 - sz))); + + instruction |= (imm << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorBinaryShrImm( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint shift) + { + instruction |= (q << 30); + + Debug.Assert(shift > 0 && shift <= (8u << (int)sz)); + + uint imm = (8u << (int)sz) | ((8u << (int)sz) - shift); + + instruction |= (imm << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorInsertByElem( + CodeGenContext context, + uint sz, + uint instruction, + uint srcIndex, + uint dstIndex, + Operand rd, + Operand rn) + { + uint imm4 = srcIndex << (int)sz; + uint imm5 = (dstIndex << ((int)sz + 1)) | (1u << (int)sz); + + instruction |= imm4 << 11; + instruction |= imm5 << 16; + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs new file mode 100644 index 000000000..8695db903 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + struct IntrinsicInfo + { + public uint Inst { get; } + public IntrinsicType Type { get; } + + public IntrinsicInfo(uint inst, IntrinsicType type) + { + Inst = inst; + Type = type; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs new file mode 100644 index 000000000..53ef152e5 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs @@ -0,0 +1,461 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class IntrinsicTable + { + private static IntrinsicInfo[] _intrinTable; + + static IntrinsicTable() + { + _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))]; + + Add(Intrinsic.Arm64AbsS, new IntrinsicInfo(0x5e20b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64AbsV, new IntrinsicInfo(0x0e20b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64AddhnV, new IntrinsicInfo(0x0e204000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64AddpS, new IntrinsicInfo(0x5e31b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64AddpV, new IntrinsicInfo(0x0e20bc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64BicVi, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorBinaryBitwiseImm)); + Add(Intrinsic.Arm64BicV, new IntrinsicInfo(0x0e601c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64BifV, new IntrinsicInfo(0x2ee01c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64BitV, new IntrinsicInfo(0x2ea01c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64BslV, new IntrinsicInfo(0x2e601c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64ClsV, new IntrinsicInfo(0x0e204800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ClzV, new IntrinsicInfo(0x2e204800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmeqS, new IntrinsicInfo(0x7e208c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmeqV, new IntrinsicInfo(0x2e208c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmeqSz, new IntrinsicInfo(0x5e209800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmeqVz, new IntrinsicInfo(0x0e209800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmgeS, new IntrinsicInfo(0x5e203c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmgeV, new IntrinsicInfo(0x0e203c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmgeSz, new IntrinsicInfo(0x7e208800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmgeVz, new IntrinsicInfo(0x2e208800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmgtS, new IntrinsicInfo(0x5e203400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmgtV, new IntrinsicInfo(0x0e203400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmgtSz, new IntrinsicInfo(0x5e208800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmgtVz, new IntrinsicInfo(0x0e208800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmhiS, new IntrinsicInfo(0x7e203400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmhiV, new IntrinsicInfo(0x2e203400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmhsS, new IntrinsicInfo(0x7e203c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmhsV, new IntrinsicInfo(0x2e203c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmleSz, new IntrinsicInfo(0x7e209800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmleVz, new IntrinsicInfo(0x2e209800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmltSz, new IntrinsicInfo(0x5e20a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmltVz, new IntrinsicInfo(0x0e20a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmtstS, new IntrinsicInfo(0x5e208c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmtstV, new IntrinsicInfo(0x0e208c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CntV, new IntrinsicInfo(0x0e205800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64DupSe, new IntrinsicInfo(0x5e000400u, IntrinsicType.ScalarUnaryByElem)); + Add(Intrinsic.Arm64DupVe, new IntrinsicInfo(0x0e000400u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64DupGp, new IntrinsicInfo(0x0e000c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64EorV, new IntrinsicInfo(0x2e201c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64ExtV, new IntrinsicInfo(0x2e000000u, IntrinsicType.VectorExt)); + Add(Intrinsic.Arm64FabdS, new IntrinsicInfo(0x7ea0d400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FabdV, new IntrinsicInfo(0x2ea0d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FabsV, new IntrinsicInfo(0x0ea0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FabsS, new IntrinsicInfo(0x1e20c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FacgeS, new IntrinsicInfo(0x7e20ec00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FacgeV, new IntrinsicInfo(0x2e20ec00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FacgtS, new IntrinsicInfo(0x7ea0ec00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FacgtV, new IntrinsicInfo(0x2ea0ec00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddpS, new IntrinsicInfo(0x7e30d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FaddpV, new IntrinsicInfo(0x2e20d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddV, new IntrinsicInfo(0x0e20d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddS, new IntrinsicInfo(0x1e202800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FccmpeS, new IntrinsicInfo(0x1e200410u, IntrinsicType.ScalarFPCompareCond)); + Add(Intrinsic.Arm64FccmpS, new IntrinsicInfo(0x1e200400u, IntrinsicType.ScalarFPCompareCond)); + Add(Intrinsic.Arm64FcmeqS, new IntrinsicInfo(0x5e20e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmeqV, new IntrinsicInfo(0x0e20e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmeqSz, new IntrinsicInfo(0x5ea0d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmeqVz, new IntrinsicInfo(0x0ea0d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmgeS, new IntrinsicInfo(0x7e20e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmgeV, new IntrinsicInfo(0x2e20e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmgeSz, new IntrinsicInfo(0x7ea0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmgeVz, new IntrinsicInfo(0x2ea0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmgtS, new IntrinsicInfo(0x7ea0e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmgtV, new IntrinsicInfo(0x2ea0e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmgtSz, new IntrinsicInfo(0x5ea0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmgtVz, new IntrinsicInfo(0x0ea0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmleSz, new IntrinsicInfo(0x7ea0d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmleVz, new IntrinsicInfo(0x2ea0d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmltSz, new IntrinsicInfo(0x5ea0e800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmltVz, new IntrinsicInfo(0x0ea0e800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmpeS, new IntrinsicInfo(0x1e202010u, IntrinsicType.ScalarFPCompare)); + Add(Intrinsic.Arm64FcmpS, new IntrinsicInfo(0x1e202000u, IntrinsicType.ScalarFPCompare)); + Add(Intrinsic.Arm64FcselS, new IntrinsicInfo(0x1e200c00u, IntrinsicType.ScalarFcsel)); + Add(Intrinsic.Arm64FcvtasS, new IntrinsicInfo(0x5e21c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtasV, new IntrinsicInfo(0x0e21c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtasGp, new IntrinsicInfo(0x1e240000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtauS, new IntrinsicInfo(0x7e21c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtauV, new IntrinsicInfo(0x2e21c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtauGp, new IntrinsicInfo(0x1e250000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtlV, new IntrinsicInfo(0x0e217800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmsS, new IntrinsicInfo(0x5e21b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtmsV, new IntrinsicInfo(0x0e21b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmsGp, new IntrinsicInfo(0x1e300000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtmuS, new IntrinsicInfo(0x7e21b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtmuV, new IntrinsicInfo(0x2e21b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmuGp, new IntrinsicInfo(0x1e310000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnsS, new IntrinsicInfo(0x5e21a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtnsV, new IntrinsicInfo(0x0e21a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtnsGp, new IntrinsicInfo(0x1e200000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnuS, new IntrinsicInfo(0x7e21a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtnuV, new IntrinsicInfo(0x2e21a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtnuGp, new IntrinsicInfo(0x1e210000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnV, new IntrinsicInfo(0x0e216800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64FcvtpsS, new IntrinsicInfo(0x5ea1a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtpsV, new IntrinsicInfo(0x0ea1a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtpsGp, new IntrinsicInfo(0x1e280000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtpuS, new IntrinsicInfo(0x7ea1a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtpuV, new IntrinsicInfo(0x2ea1a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtpuGp, new IntrinsicInfo(0x1e290000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtxnS, new IntrinsicInfo(0x7e216800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtxnV, new IntrinsicInfo(0x2e216800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzsSFixed, new IntrinsicInfo(0x5f00fc00u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64FcvtzsVFixed, new IntrinsicInfo(0x0f00fc00u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64FcvtzsS, new IntrinsicInfo(0x5ea1b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtzsV, new IntrinsicInfo(0x0ea1b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzsGpFixed, new IntrinsicInfo(0x1e180000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64FcvtzsGp, new IntrinsicInfo(0x1e380000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtzuSFixed, new IntrinsicInfo(0x7f00fc00u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64FcvtzuVFixed, new IntrinsicInfo(0x2f00fc00u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64FcvtzuS, new IntrinsicInfo(0x7ea1b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtzuV, new IntrinsicInfo(0x2ea1b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzuGpFixed, new IntrinsicInfo(0x1e190000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64FcvtzuGp, new IntrinsicInfo(0x1e390000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtS, new IntrinsicInfo(0x1e224000u, IntrinsicType.ScalarFPConv)); + Add(Intrinsic.Arm64FdivV, new IntrinsicInfo(0x2e20fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FdivS, new IntrinsicInfo(0x1e201800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmaddS, new IntrinsicInfo(0x1f000000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FmaxnmpS, new IntrinsicInfo(0x7e30c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmaxnmpV, new IntrinsicInfo(0x2e20c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxnmvV, new IntrinsicInfo(0x2e30c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FmaxnmV, new IntrinsicInfo(0x0e20c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxnmS, new IntrinsicInfo(0x1e206800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmaxpS, new IntrinsicInfo(0x7e30f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmaxpV, new IntrinsicInfo(0x2e20f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxvV, new IntrinsicInfo(0x2e30f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FmaxV, new IntrinsicInfo(0x0e20f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxS, new IntrinsicInfo(0x1e204800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FminnmpS, new IntrinsicInfo(0x7eb0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FminnmpV, new IntrinsicInfo(0x2ea0c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminnmvV, new IntrinsicInfo(0x2eb0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FminnmV, new IntrinsicInfo(0x0ea0c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminnmS, new IntrinsicInfo(0x1e207800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FminpS, new IntrinsicInfo(0x7eb0f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FminpV, new IntrinsicInfo(0x2ea0f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminvV, new IntrinsicInfo(0x2eb0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FminV, new IntrinsicInfo(0x0ea0f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminS, new IntrinsicInfo(0x1e205800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmlaSe, new IntrinsicInfo(0x5f801000u, IntrinsicType.ScalarTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlaVe, new IntrinsicInfo(0x0f801000u, IntrinsicType.VectorTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlaV, new IntrinsicInfo(0x0e20cc00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64FmlsSe, new IntrinsicInfo(0x5f805000u, IntrinsicType.ScalarTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlsVe, new IntrinsicInfo(0x0f805000u, IntrinsicType.VectorTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlsV, new IntrinsicInfo(0x0ea0cc00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64FmovVi, new IntrinsicInfo(0x0f00f400u, IntrinsicType.VectorFmovi)); + Add(Intrinsic.Arm64FmovS, new IntrinsicInfo(0x1e204000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmovGp, new IntrinsicInfo(0x1e260000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FmovSi, new IntrinsicInfo(0x1e201000u, IntrinsicType.ScalarFmovi)); + Add(Intrinsic.Arm64FmsubS, new IntrinsicInfo(0x1f008000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FmulxSe, new IntrinsicInfo(0x7f809000u, IntrinsicType.ScalarBinaryFPByElem)); + Add(Intrinsic.Arm64FmulxVe, new IntrinsicInfo(0x2f809000u, IntrinsicType.VectorBinaryFPByElem)); + Add(Intrinsic.Arm64FmulxS, new IntrinsicInfo(0x5e20dc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmulxV, new IntrinsicInfo(0x0e20dc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmulSe, new IntrinsicInfo(0x5f809000u, IntrinsicType.ScalarBinaryFPByElem)); + Add(Intrinsic.Arm64FmulVe, new IntrinsicInfo(0x0f809000u, IntrinsicType.VectorBinaryFPByElem)); + Add(Intrinsic.Arm64FmulV, new IntrinsicInfo(0x2e20dc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmulS, new IntrinsicInfo(0x1e200800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FnegV, new IntrinsicInfo(0x2ea0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FnegS, new IntrinsicInfo(0x1e214000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FnmaddS, new IntrinsicInfo(0x1f200000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FnmsubS, new IntrinsicInfo(0x1f208000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FnmulS, new IntrinsicInfo(0x1e208800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrecpeS, new IntrinsicInfo(0x5ea1d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrecpeV, new IntrinsicInfo(0x0ea1d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrecpsS, new IntrinsicInfo(0x5e20fc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrecpsV, new IntrinsicInfo(0x0e20fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FrecpxS, new IntrinsicInfo(0x5ea1f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintaV, new IntrinsicInfo(0x2e218800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintaS, new IntrinsicInfo(0x1e264000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintiV, new IntrinsicInfo(0x2ea19800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintiS, new IntrinsicInfo(0x1e27c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintmV, new IntrinsicInfo(0x0e219800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintmS, new IntrinsicInfo(0x1e254000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintnV, new IntrinsicInfo(0x0e218800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintnS, new IntrinsicInfo(0x1e244000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintpV, new IntrinsicInfo(0x0ea18800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintpS, new IntrinsicInfo(0x1e24c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintxV, new IntrinsicInfo(0x2e219800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintxS, new IntrinsicInfo(0x1e274000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintzV, new IntrinsicInfo(0x0ea19800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintzS, new IntrinsicInfo(0x1e25c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrsqrteS, new IntrinsicInfo(0x7ea1d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrsqrteV, new IntrinsicInfo(0x2ea1d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrsqrtsS, new IntrinsicInfo(0x5ea0fc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrsqrtsV, new IntrinsicInfo(0x0ea0fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FsqrtV, new IntrinsicInfo(0x2ea1f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FsqrtS, new IntrinsicInfo(0x1e21c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FsubV, new IntrinsicInfo(0x0ea0d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FsubS, new IntrinsicInfo(0x1e203800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64InsVe, new IntrinsicInfo(0x6e000400u, IntrinsicType.VectorInsertByElem)); + Add(Intrinsic.Arm64InsGp, new IntrinsicInfo(0x4e001c00u, IntrinsicType.ScalarUnaryByElem)); + Add(Intrinsic.Arm64Ld1rV, new IntrinsicInfo(0x0d40c000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld1Vms, new IntrinsicInfo(0x0c402000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld1Vss, new IntrinsicInfo(0x0d400000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld2rV, new IntrinsicInfo(0x0d60c000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld2Vms, new IntrinsicInfo(0x0c408000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld2Vss, new IntrinsicInfo(0x0d600000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld3rV, new IntrinsicInfo(0x0d40e000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld3Vms, new IntrinsicInfo(0x0c404000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld3Vss, new IntrinsicInfo(0x0d402000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld4rV, new IntrinsicInfo(0x0d60e000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld4Vms, new IntrinsicInfo(0x0c400000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld4Vss, new IntrinsicInfo(0x0d602000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64MlaVe, new IntrinsicInfo(0x2f000000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64MlaV, new IntrinsicInfo(0x0e209400u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi)); + Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister)); + Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister)); + Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64MulV, new IntrinsicInfo(0x0e209c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64MvniV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorMvni)); + Add(Intrinsic.Arm64NegS, new IntrinsicInfo(0x7e20b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64NegV, new IntrinsicInfo(0x2e20b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64NotV, new IntrinsicInfo(0x2e205800u, IntrinsicType.VectorUnaryBitwise)); + Add(Intrinsic.Arm64OrnV, new IntrinsicInfo(0x0ee01c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64OrrVi, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorBinaryBitwiseImm)); + Add(Intrinsic.Arm64OrrV, new IntrinsicInfo(0x0ea01c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64PmullV, new IntrinsicInfo(0x0e20e000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64PmulV, new IntrinsicInfo(0x2e209c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64RaddhnV, new IntrinsicInfo(0x2e204000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64RbitV, new IntrinsicInfo(0x2e605800u, IntrinsicType.VectorUnaryBitwise)); + Add(Intrinsic.Arm64Rev16V, new IntrinsicInfo(0x0e201800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Rev32V, new IntrinsicInfo(0x2e200800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Rev64V, new IntrinsicInfo(0x0e200800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64RshrnV, new IntrinsicInfo(0x0f008c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64RsubhnV, new IntrinsicInfo(0x2e206000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabalV, new IntrinsicInfo(0x0e205000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabaV, new IntrinsicInfo(0x0e207c00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabdlV, new IntrinsicInfo(0x0e207000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SabdV, new IntrinsicInfo(0x0e207400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SadalpV, new IntrinsicInfo(0x0e206800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SaddlpV, new IntrinsicInfo(0x0e202800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SaddlvV, new IntrinsicInfo(0x0e303800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SaddlV, new IntrinsicInfo(0x0e200000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SaddwV, new IntrinsicInfo(0x0e201000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64ScvtfSFixed, new IntrinsicInfo(0x5f00e400u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64ScvtfVFixed, new IntrinsicInfo(0x0f00e400u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64ScvtfS, new IntrinsicInfo(0x5e21d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64ScvtfV, new IntrinsicInfo(0x0e21d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ScvtfGpFixed, new IntrinsicInfo(0x1e020000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64ScvtfGp, new IntrinsicInfo(0x1e220000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64Sha1cV, new IntrinsicInfo(0x5e000000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1hV, new IntrinsicInfo(0x5e280800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha1mV, new IntrinsicInfo(0x5e002000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1pV, new IntrinsicInfo(0x5e001000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1su0V, new IntrinsicInfo(0x5e003000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1su1V, new IntrinsicInfo(0x5e281800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha256h2V, new IntrinsicInfo(0x5e005000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha256hV, new IntrinsicInfo(0x5e004000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha256su0V, new IntrinsicInfo(0x5e282800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha256su1V, new IntrinsicInfo(0x5e006000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64ShaddV, new IntrinsicInfo(0x0e200400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64ShllV, new IntrinsicInfo(0x2e213800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ShlS, new IntrinsicInfo(0x5f005400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64ShlV, new IntrinsicInfo(0x0f005400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64ShrnV, new IntrinsicInfo(0x0f008400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64ShsubV, new IntrinsicInfo(0x0e202400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SliS, new IntrinsicInfo(0x7f005400u, IntrinsicType.ScalarTernaryShlRd)); + Add(Intrinsic.Arm64SliV, new IntrinsicInfo(0x2f005400u, IntrinsicType.VectorTernaryShlRd)); + Add(Intrinsic.Arm64SmaxpV, new IntrinsicInfo(0x0e20a400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SmaxvV, new IntrinsicInfo(0x0e30a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SmaxV, new IntrinsicInfo(0x0e206400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SminpV, new IntrinsicInfo(0x0e20ac00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SminvV, new IntrinsicInfo(0x0e31a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SminV, new IntrinsicInfo(0x0e206c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SmlalVe, new IntrinsicInfo(0x0f002000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64SmlalV, new IntrinsicInfo(0x0e208000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SmlslVe, new IntrinsicInfo(0x0f006000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64SmlslV, new IntrinsicInfo(0x0e20a000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SmovV, new IntrinsicInfo(0x0e002c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64SmullVe, new IntrinsicInfo(0x0f00a000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SmullV, new IntrinsicInfo(0x0e20c000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqabsS, new IntrinsicInfo(0x5e207800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64SqabsV, new IntrinsicInfo(0x0e207800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SqaddS, new IntrinsicInfo(0x5e200c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqaddV, new IntrinsicInfo(0x0e200c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmlalSe, new IntrinsicInfo(0x5f003000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmlalVe, new IntrinsicInfo(0x0f003000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmlalS, new IntrinsicInfo(0x5e209000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmlalV, new IntrinsicInfo(0x0e209000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmlslSe, new IntrinsicInfo(0x5f007000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmlslVe, new IntrinsicInfo(0x0f007000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmlslS, new IntrinsicInfo(0x5e20b000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmlslV, new IntrinsicInfo(0x0e20b000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmulhSe, new IntrinsicInfo(0x5f00c000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmulhVe, new IntrinsicInfo(0x0f00c000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmulhS, new IntrinsicInfo(0x5e20b400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmulhV, new IntrinsicInfo(0x0e20b400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmullSe, new IntrinsicInfo(0x5f00b000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmullVe, new IntrinsicInfo(0x0f00b000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmullS, new IntrinsicInfo(0x5e20d000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmullV, new IntrinsicInfo(0x0e20d000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqnegS, new IntrinsicInfo(0x7e207800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64SqnegV, new IntrinsicInfo(0x2e207800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SqrdmulhSe, new IntrinsicInfo(0x5f00d000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqrdmulhVe, new IntrinsicInfo(0x0f00d000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqrdmulhS, new IntrinsicInfo(0x7e20b400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqrdmulhV, new IntrinsicInfo(0x2e20b400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqrshlS, new IntrinsicInfo(0x5e205c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqrshlV, new IntrinsicInfo(0x0e205c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqrshrnS, new IntrinsicInfo(0x5f009c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrnV, new IntrinsicInfo(0x0f009c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrunS, new IntrinsicInfo(0x7f008c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrunV, new IntrinsicInfo(0x2f008c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqshluS, new IntrinsicInfo(0x7f006400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64SqshluV, new IntrinsicInfo(0x2f006400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SqshlSi, new IntrinsicInfo(0x5f007400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64SqshlVi, new IntrinsicInfo(0x0f007400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SqshlS, new IntrinsicInfo(0x5e204c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqshlV, new IntrinsicInfo(0x0e204c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqshrnS, new IntrinsicInfo(0x5f009400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqshrnV, new IntrinsicInfo(0x0f009400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqshrunS, new IntrinsicInfo(0x7f008400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqshrunV, new IntrinsicInfo(0x2f008400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqsubS, new IntrinsicInfo(0x5e202c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqsubV, new IntrinsicInfo(0x0e202c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqxtnS, new IntrinsicInfo(0x5e214800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SqxtnV, new IntrinsicInfo(0x0e214800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SqxtunS, new IntrinsicInfo(0x7e212800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SqxtunV, new IntrinsicInfo(0x2e212800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SrhaddV, new IntrinsicInfo(0x0e201400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SriS, new IntrinsicInfo(0x7f004400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SriV, new IntrinsicInfo(0x2f004400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SrshlS, new IntrinsicInfo(0x5e205400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SrshlV, new IntrinsicInfo(0x0e205400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SrshrS, new IntrinsicInfo(0x5f002400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64SrshrV, new IntrinsicInfo(0x0f002400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64SrsraS, new IntrinsicInfo(0x5f003400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SrsraV, new IntrinsicInfo(0x0f003400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SshllV, new IntrinsicInfo(0x0f00a400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SshlS, new IntrinsicInfo(0x5e204400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SshlV, new IntrinsicInfo(0x0e204400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SshrS, new IntrinsicInfo(0x5f000400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64SshrV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64SsraS, new IntrinsicInfo(0x5f001400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SsraV, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SsublV, new IntrinsicInfo(0x0e202000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SsubwV, new IntrinsicInfo(0x0e203000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64St1Vms, new IntrinsicInfo(0x0c002000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St1Vss, new IntrinsicInfo(0x0d000000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St2Vms, new IntrinsicInfo(0x0c008000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St2Vss, new IntrinsicInfo(0x0d200000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St3Vms, new IntrinsicInfo(0x0c004000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St3Vss, new IntrinsicInfo(0x0d002000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St4Vms, new IntrinsicInfo(0x0c000000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St4Vss, new IntrinsicInfo(0x0d202000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64SubhnV, new IntrinsicInfo(0x0e206000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SubS, new IntrinsicInfo(0x7e208400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SubV, new IntrinsicInfo(0x2e208400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SuqaddS, new IntrinsicInfo(0x5e203800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SuqaddV, new IntrinsicInfo(0x0e203800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64TblV, new IntrinsicInfo(0x0e000000u, IntrinsicType.VectorLookupTable)); + Add(Intrinsic.Arm64TbxV, new IntrinsicInfo(0x0e001000u, IntrinsicType.VectorLookupTable)); + Add(Intrinsic.Arm64Trn1V, new IntrinsicInfo(0x0e002800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Trn2V, new IntrinsicInfo(0x0e006800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UabalV, new IntrinsicInfo(0x2e205000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UabaV, new IntrinsicInfo(0x2e207c00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UabdlV, new IntrinsicInfo(0x2e207000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UabdV, new IntrinsicInfo(0x2e207400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UadalpV, new IntrinsicInfo(0x2e206800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UaddlpV, new IntrinsicInfo(0x2e202800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UaddlvV, new IntrinsicInfo(0x2e303800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UaddlV, new IntrinsicInfo(0x2e200000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UaddwV, new IntrinsicInfo(0x2e201000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UcvtfSFixed, new IntrinsicInfo(0x7f00e400u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64UcvtfVFixed, new IntrinsicInfo(0x2f00e400u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64UcvtfS, new IntrinsicInfo(0x7e21d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64UcvtfV, new IntrinsicInfo(0x2e21d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UcvtfGpFixed, new IntrinsicInfo(0x1e030000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64UcvtfGp, new IntrinsicInfo(0x1e230000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64UhaddV, new IntrinsicInfo(0x2e200400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UhsubV, new IntrinsicInfo(0x2e202400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmaxpV, new IntrinsicInfo(0x2e20a400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmaxvV, new IntrinsicInfo(0x2e30a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UmaxV, new IntrinsicInfo(0x2e206400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UminpV, new IntrinsicInfo(0x2e20ac00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UminvV, new IntrinsicInfo(0x2e31a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UminV, new IntrinsicInfo(0x2e206c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmlalVe, new IntrinsicInfo(0x2f002000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64UmlalV, new IntrinsicInfo(0x2e208000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UmlslVe, new IntrinsicInfo(0x2f006000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64UmlslV, new IntrinsicInfo(0x2e20a000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UmovV, new IntrinsicInfo(0x0e003c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64UmullVe, new IntrinsicInfo(0x2f00a000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64UmullV, new IntrinsicInfo(0x2e20c000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqaddS, new IntrinsicInfo(0x7e200c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqaddV, new IntrinsicInfo(0x2e200c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqrshlS, new IntrinsicInfo(0x7e205c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqrshlV, new IntrinsicInfo(0x2e205c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqrshrnS, new IntrinsicInfo(0x7f009c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UqrshrnV, new IntrinsicInfo(0x2f009c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UqshlSi, new IntrinsicInfo(0x7f007400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64UqshlVi, new IntrinsicInfo(0x2f007400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64UqshlS, new IntrinsicInfo(0x7e204c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqshlV, new IntrinsicInfo(0x2e204c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqshrnS, new IntrinsicInfo(0x7f009400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UqshrnV, new IntrinsicInfo(0x2f009400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UqsubS, new IntrinsicInfo(0x7e202c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqsubV, new IntrinsicInfo(0x2e202c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqxtnS, new IntrinsicInfo(0x7e214800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64UqxtnV, new IntrinsicInfo(0x2e214800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UrecpeV, new IntrinsicInfo(0x0ea1c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UrhaddV, new IntrinsicInfo(0x2e201400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UrshlS, new IntrinsicInfo(0x7e205400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UrshlV, new IntrinsicInfo(0x2e205400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UrshrS, new IntrinsicInfo(0x7f002400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64UrshrV, new IntrinsicInfo(0x2f002400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64UrsqrteV, new IntrinsicInfo(0x2ea1c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UrsraS, new IntrinsicInfo(0x7f003400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UrsraV, new IntrinsicInfo(0x2f003400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UshllV, new IntrinsicInfo(0x2f00a400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64UshlS, new IntrinsicInfo(0x7e204400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UshlV, new IntrinsicInfo(0x2e204400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UshrS, new IntrinsicInfo(0x7f000400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64UshrV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64UsqaddS, new IntrinsicInfo(0x7e203800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64UsqaddV, new IntrinsicInfo(0x2e203800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UsraS, new IntrinsicInfo(0x7f001400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UsraV, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UsublV, new IntrinsicInfo(0x2e202000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UsubwV, new IntrinsicInfo(0x2e203000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Uzp1V, new IntrinsicInfo(0x0e001800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Uzp2V, new IntrinsicInfo(0x0e005800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64XtnV, new IntrinsicInfo(0x0e212800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Zip1V, new IntrinsicInfo(0x0e003800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Zip2V, new IntrinsicInfo(0x0e007800u, IntrinsicType.VectorBinary)); + } + + private static void Add(Intrinsic intrin, IntrinsicInfo info) + { + _intrinTable[(int)intrin] = info; + } + + public static IntrinsicInfo GetInfo(Intrinsic intrin) + { + return _intrinTable[(int)intrin]; + } + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs b/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs new file mode 100644 index 000000000..800eca93c --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs @@ -0,0 +1,59 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + enum IntrinsicType + { + ScalarUnary, + ScalarUnaryByElem, + ScalarBinary, + ScalarBinaryByElem, + ScalarBinaryFPByElem, + ScalarBinaryRd, + ScalarBinaryShl, + ScalarBinaryShr, + ScalarFcsel, + ScalarFmovi, + ScalarFPCompare, + ScalarFPCompareCond, + ScalarFPConv, + ScalarFPConvFixed, + ScalarFPConvFixedGpr, + ScalarFPConvGpr, + ScalarTernary, + ScalarTernaryFPRdByElem, + ScalarTernaryShlRd, + ScalarTernaryShrRd, + + VectorUnary, + VectorUnaryBitwise, + VectorUnaryByElem, + VectorBinary, + VectorBinaryBitwise, + VectorBinaryBitwiseImm, + VectorBinaryByElem, + VectorBinaryFPByElem, + VectorBinaryRd, + VectorBinaryShl, + VectorBinaryShr, + VectorExt, + VectorFmovi, + VectorFPConvFixed, + VectorInsertByElem, + VectorLdSt, + VectorLdStSs, + VectorLookupTable, + VectorMovi, + VectorMvni, + VectorTernaryFPRdByElem, + VectorTernaryRd, + VectorTernaryRdBitwise, + VectorTernaryRdByElem, + VectorTernaryShlRd, + VectorTernaryShrRd, + + Vector128Unary, + Vector128Binary, + + GetRegister, + SetRegister + } +} \ No newline at end of file diff --git a/ARMeilleure/CodeGen/Arm64/PreAllocator.cs b/ARMeilleure/CodeGen/Arm64/PreAllocator.cs new file mode 100644 index 000000000..a7f073946 --- /dev/null +++ b/ARMeilleure/CodeGen/Arm64/PreAllocator.cs @@ -0,0 +1,940 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + class PreAllocator + { + private class ConstantDict + { + private readonly Dictionary<(ulong, OperandType), Operand> _constants; + + public ConstantDict() + { + _constants = new Dictionary<(ulong, OperandType), Operand>(); + } + + public void Add(ulong value, OperandType type, Operand local) + { + _constants.Add((value, type), local); + } + + public bool TryGetValue(ulong value, OperandType type, out Operand local) + { + return _constants.TryGetValue((value, type), out local); + } + } + + public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs) + { + maxCallArgs = -1; + + Span buffer = default; + + Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()]; + + for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext) + { + ConstantDict constants = new ConstantDict(); + + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + if (node.Instruction == Instruction.Phi) + { + continue; + } + + HandleConstantRegCopy(constants, block.Operations, node); + HandleDestructiveRegCopy(block.Operations, node); + + switch (node.Instruction) + { + case Instruction.Call: + // Get the maximum number of arguments used on a call. + // On windows, when a struct is returned from the call, + // we also need to pass the pointer where the struct + // should be written on the first argument. + int argsCount = node.SourcesCount - 1; + + if (node.Destination != default && node.Destination.Type == OperandType.V128) + { + argsCount++; + } + + if (maxCallArgs < argsCount) + { + maxCallArgs = argsCount; + } + + // Copy values to registers expected by the function + // being called, as mandated by the ABI. + HandleCall(constants, block.Operations, node); + break; + case Instruction.CompareAndSwap: + case Instruction.CompareAndSwap16: + case Instruction.CompareAndSwap8: + nextNode = HandleCompareAndSwap(block.Operations, node); + break; + case Instruction.LoadArgument: + nextNode = HandleLoadArgument(cctx, ref buffer, block.Operations, preservedArgs, node); + break; + case Instruction.Return: + HandleReturn(block.Operations, node); + break; + case Instruction.Tailcall: + HandleTailcall(constants, block.Operations, stackAlloc, node, node); + break; + } + } + } + } + + private static void HandleConstantRegCopy(ConstantDict constants, IntrusiveList nodes, Operation node) + { + if (node.SourcesCount == 0 || IsIntrinsicWithConst(node)) + { + return; + } + + Instruction inst = node.Instruction; + + Operand src1 = node.GetSource(0); + Operand src2; + + if (src1.Kind == OperandKind.Constant) + { + if (!src1.Type.IsInteger()) + { + // Handle non-integer types (FP32, FP64 and V128). + // For instructions without an immediate operand, we do the following: + // - Insert a copy with the constant value (as integer) to a GPR. + // - Insert a copy from the GPR to a XMM register. + // - Replace the constant use with the XMM register. + src1 = AddFloatConstantCopy(constants, nodes, node, src1); + + node.SetSource(0, src1); + } + else if (!HasConstSrc1(node, src1.Value)) + { + // Handle integer types. + // Most ALU instructions accepts a 32-bits immediate on the second operand. + // We need to ensure the following: + // - If the constant is on operand 1, we need to move it. + // -- But first, we try to swap operand 1 and 2 if the instruction is commutative. + // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy. + // - If the constant is on operand 2, we check if the instruction supports it, + // if not, we also add a copy. 64-bits constants are usually not supported. + if (IsCommutative(node)) + { + src2 = node.GetSource(1); + + Operand temp = src1; + + src1 = src2; + src2 = temp; + + node.SetSource(0, src1); + node.SetSource(1, src2); + } + + if (src1.Kind == OperandKind.Constant) + { + src1 = AddIntConstantCopy(constants, nodes, node, src1); + + node.SetSource(0, src1); + } + } + } + + if (node.SourcesCount < 2) + { + return; + } + + src2 = node.GetSource(1); + + if (src2.Kind == OperandKind.Constant) + { + if (!src2.Type.IsInteger()) + { + src2 = AddFloatConstantCopy(constants, nodes, node, src2); + + node.SetSource(1, src2); + } + else if (!HasConstSrc2(inst, src2)) + { + src2 = AddIntConstantCopy(constants, nodes, node, src2); + + node.SetSource(1, src2); + } + } + + if (node.SourcesCount < 3 || + node.Instruction == Instruction.BranchIf || + node.Instruction == Instruction.Compare || + node.Instruction == Instruction.VectorInsert || + node.Instruction == Instruction.VectorInsert16 || + node.Instruction == Instruction.VectorInsert8) + { + return; + } + + for (int srcIndex = 2; srcIndex < node.SourcesCount; srcIndex++) + { + Operand src = node.GetSource(srcIndex); + + if (src.Kind == OperandKind.Constant) + { + if (!src.Type.IsInteger()) + { + src = AddFloatConstantCopy(constants, nodes, node, src); + + node.SetSource(srcIndex, src); + } + else + { + src = AddIntConstantCopy(constants, nodes, node, src); + + node.SetSource(srcIndex, src); + } + } + } + } + + private static void HandleDestructiveRegCopy(IntrusiveList nodes, Operation node) + { + if (node.Destination == default || node.SourcesCount == 0) + { + return; + } + + Operand dest = node.Destination; + Operand src1 = node.GetSource(0); + + if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable) + { + bool useNewLocal = false; + + for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++) + { + if (node.GetSource(srcIndex) == dest) + { + useNewLocal = true; + + break; + } + } + + if (useNewLocal) + { + // Dest is being used as some source already, we need to use a new + // local to store the temporary value, otherwise the value on dest + // local would be overwritten. + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1)); + + node.SetSource(0, temp); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp)); + + node.Destination = temp; + } + else + { + nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1)); + + node.SetSource(0, dest); + } + } + } + + private static void HandleCall(ConstantDict constants, IntrusiveList nodes, Operation node) + { + Operation operation = node; + + Operand dest = operation.Destination; + + List sources = new List + { + operation.GetSource(0) + }; + + int argsCount = operation.SourcesCount - 1; + + int intMax = CallingConvention.GetArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + int stackOffset = 0; + + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(index + 1); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < intMax; + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + Operand offset = Const(stackOffset); + + Operation spillOp = Operation(Instruction.SpillArg, default, offset, source); + + HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, spillOp)); + + stackOffset += source.Type.GetSizeInBytes(); + } + } + + if (dest != default) + { + if (dest.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg)); + nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1))); + + operation.Destination = default; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, dest, retReg); + + nodes.AddAfter(node, copyOp); + + operation.Destination = retReg; + } + } + + operation.SetSources(sources.ToArray()); + } + + private static void HandleTailcall( + ConstantDict constants, + IntrusiveList nodes, + StackAllocator stackAlloc, + Operation node, + Operation operation) + { + List sources = new List + { + operation.GetSource(0) + }; + + int argsCount = operation.SourcesCount - 1; + + int intMax = CallingConvention.GetArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(1 + index); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)"); + } + } + + // The target address must be on the return registers, since we + // don't return anything and it is guaranteed to not be a + // callee saved register (which would be trashed on the epilogue). + Operand tcAddress = Gpr(CodeGenCommon.TcAddressRegister, OperandType.I64); + + Operation addrCopyOp = Operation(Instruction.Copy, tcAddress, operation.GetSource(0)); + + nodes.AddBefore(node, addrCopyOp); + + sources[0] = tcAddress; + + operation.SetSources(sources.ToArray()); + } + + private static Operation HandleCompareAndSwap(IntrusiveList nodes, Operation node) + { + Operand expected = node.GetSource(1); + + if (expected.Type == OperandType.V128) + { + Operand dest = node.Destination; + Operand expectedLow = Local(OperandType.I64); + Operand expectedHigh = Local(OperandType.I64); + Operand desiredLow = Local(OperandType.I64); + Operand desiredHigh = Local(OperandType.I64); + Operand actualLow = Local(OperandType.I64); + Operand actualHigh = Local(OperandType.I64); + + Operand address = node.GetSource(0); + Operand desired = node.GetSource(2); + + void SplitOperand(Operand source, Operand low, Operand high) + { + nodes.AddBefore(node, Operation(Instruction.VectorExtract, low, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, high, source, Const(1))); + } + + SplitOperand(expected, expectedLow, expectedHigh); + SplitOperand(desired, desiredLow, desiredHigh); + + Operation operation = node; + + // Update the sources and destinations with split 64-bit halfs of the whole 128-bit values. + // We also need a additional registers that will be used to store temporary information. + operation.SetDestinations(new[] { actualLow, actualHigh, Local(OperandType.I64), Local(OperandType.I64) }); + operation.SetSources(new[] { address, expectedLow, expectedHigh, desiredLow, desiredHigh }); + + // Add some dummy uses of the input operands, as the CAS operation will be a loop, + // so they can't be used as destination operand. + for (int i = 0; i < operation.SourcesCount; i++) + { + Operand src = operation.GetSource(i); + node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src)); + } + + // Assemble the vector with the 64-bit values at the given memory location. + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, actualLow)); + node = nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, actualHigh, Const(1))); + } + else + { + // We need a additional register where the store result will be written to. + node.SetDestinations(new[] { node.Destination, Local(OperandType.I32) }); + + // Add some dummy uses of the input operands, as the CAS operation will be a loop, + // so they can't be used as destination operand. + Operation operation = node; + + for (int i = 0; i < operation.SourcesCount; i++) + { + Operand src = operation.GetSource(i); + node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src)); + } + } + + return node.ListNext; + } + + private static void HandleReturn(IntrusiveList nodes, Operation node) + { + if (node.SourcesCount == 0) + { + return; + } + + Operand source = node.GetSource(0); + + if (source.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1))); + } + else + { + Operand retReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + + Operation retCopyOp = Operation(Instruction.Copy, retReg, source); + + nodes.AddBefore(node, retCopyOp); + } + } + + private static Operation HandleLoadArgument( + CompilerContext cctx, + ref Span buffer, + IntrusiveList nodes, + Operand[] preservedArgs, + Operation node) + { + Operand source = node.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int index = source.AsInt32(); + + int intCount = 0; + int vecCount = 0; + + for (int cIndex = 0; cIndex < index; cIndex++) + { + OperandType argType = cctx.FuncArgTypes[cIndex]; + + if (argType.IsInteger()) + { + intCount++; + } + else if (argType == OperandType.V128) + { + intCount += 2; + } + else + { + vecCount++; + } + } + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < CallingConvention.GetArgumentsOnRegsCount(); + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < CallingConvention.GetArgumentsOnRegsCount(); + } + else + { + passOnReg = vecCount < CallingConvention.GetArgumentsOnRegsCount(); + } + + if (passOnReg) + { + Operand dest = node.Destination; + + if (preservedArgs[index] == default) + { + if (dest.Type == OperandType.V128) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand pArg = Local(OperandType.V128); + + Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64); + Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64); + + Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg); + Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1)); + + cctx.Cfg.Entry.Operations.AddFirst(copyH); + cctx.Cfg.Entry.Operations.AddFirst(copyL); + + preservedArgs[index] = pArg; + } + else + { + Operand pArg = Local(dest.Type); + + Operand argReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + } + + Operation nextNode; + + if (dest.AssignmentsCount == 1) + { + // Let's propagate the argument if we can to avoid copies. + Propagate(ref buffer, dest, preservedArgs[index]); + nextNode = node.ListNext; + } + else + { + Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]); + nextNode = nodes.AddBefore(node, argCopyOp); + } + + Delete(nodes, node); + return nextNode; + } + else + { + // TODO: Pass on stack. + return node; + } + } + + private static void Propagate(ref Span buffer, Operand dest, Operand value) + { + ReadOnlySpan uses = dest.GetUses(ref buffer); + + foreach (Operation use in uses) + { + for (int srcIndex = 0; srcIndex < use.SourcesCount; srcIndex++) + { + Operand useSrc = use.GetSource(srcIndex); + + if (useSrc == dest) + { + use.SetSource(srcIndex, value); + } + else if (useSrc.Kind == OperandKind.Memory) + { + MemoryOperand memoryOp = useSrc.GetMemory(); + + Operand baseAddr = memoryOp.BaseAddress; + Operand index = memoryOp.Index; + bool changed = false; + + if (baseAddr == dest) + { + baseAddr = value; + changed = true; + } + + if (index == dest) + { + index = value; + changed = true; + } + + if (changed) + { + use.SetSource(srcIndex, MemoryOp( + useSrc.Type, + baseAddr, + index, + memoryOp.Scale, + memoryOp.Displacement)); + } + } + } + } + } + + private static Operand AddFloatConstantCopy( + ConstantDict constants, + IntrusiveList nodes, + Operation node, + Operand source) + { + Operand temp = Local(source.Type); + + Operand intConst = AddIntConstantCopy(constants, nodes, node, GetIntConst(source)); + + Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst); + + nodes.AddBefore(node, copyOp); + + return temp; + } + + private static Operand AddIntConstantCopy( + ConstantDict constants, + IntrusiveList nodes, + Operation node, + Operand source) + { + if (constants.TryGetValue(source.Value, source.Type, out Operand temp)) + { + return temp; + } + + temp = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, temp, source); + + nodes.AddBefore(node, copyOp); + + constants.Add(source.Value, source.Type, temp); + + return temp; + } + + private static Operand GetIntConst(Operand value) + { + if (value.Type == OperandType.FP32) + { + return Const(value.AsInt32()); + } + else if (value.Type == OperandType.FP64) + { + return Const(value.AsInt64()); + } + + return value; + } + + private static void Delete(IntrusiveList nodes, Operation node) + { + node.Destination = default; + + for (int index = 0; index < node.SourcesCount; index++) + { + node.SetSource(index, default); + } + + nodes.Remove(node); + } + + private static Operand Gpr(int register, OperandType type) + { + return Register(register, RegisterType.Integer, type); + } + + private static Operand Xmm(int register, OperandType type) + { + return Register(register, RegisterType.Vector, type); + } + + private static bool IsSameOperandDestSrc1(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Extended: + return IsSameOperandDestSrc1(operation.Intrinsic); + case Instruction.VectorInsert: + case Instruction.VectorInsert16: + case Instruction.VectorInsert8: + return true; + } + + return false; + } + + private static bool IsSameOperandDestSrc1(Intrinsic intrinsic) + { + IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + return info.Type == IntrinsicType.ScalarBinaryRd || + info.Type == IntrinsicType.ScalarTernaryFPRdByElem || + info.Type == IntrinsicType.ScalarTernaryShlRd || + info.Type == IntrinsicType.ScalarTernaryShrRd || + info.Type == IntrinsicType.VectorBinaryRd || + info.Type == IntrinsicType.VectorInsertByElem || + info.Type == IntrinsicType.VectorTernaryRd || + info.Type == IntrinsicType.VectorTernaryRdBitwise || + info.Type == IntrinsicType.VectorTernaryFPRdByElem || + info.Type == IntrinsicType.VectorTernaryRdByElem || + info.Type == IntrinsicType.VectorTernaryShlRd || + info.Type == IntrinsicType.VectorTernaryShrRd; + } + + private static bool HasConstSrc1(Operation node, ulong value) + { + switch (node.Instruction) + { + case Instruction.Add: + case Instruction.BranchIf: + case Instruction.Compare: + case Instruction.Subtract: + // The immediate encoding of those instructions does not allow Rn to be + // XZR (it will be SP instead), so we can't allow a Rn constant in this case. + return value == 0 && NotConstOrConst0(node.GetSource(1)); + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseNot: + case Instruction.BitwiseOr: + case Instruction.ByteSwap: + case Instruction.CountLeadingZeros: + case Instruction.Multiply: + case Instruction.Negate: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + return value == 0; + case Instruction.Copy: + case Instruction.LoadArgument: + case Instruction.Spill: + case Instruction.SpillArg: + return true; + case Instruction.Extended: + return value == 0; + } + + return false; + } + + private static bool NotConstOrConst0(Operand operand) + { + return operand.Kind != OperandKind.Constant || operand.Value == 0; + } + + private static bool HasConstSrc2(Instruction inst, Operand operand) + { + ulong value = operand.Value; + + switch (inst) + { + case Instruction.Add: + case Instruction.BranchIf: + case Instruction.Compare: + case Instruction.Subtract: + return ConstFitsOnUImm12Sh(value); + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + return value == 0 || CodeGenCommon.TryEncodeBitMask(operand, out _, out _, out _); + case Instruction.Multiply: + case Instruction.Store: + case Instruction.Store16: + case Instruction.Store8: + return value == 0; + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.VectorExtract: + case Instruction.VectorExtract16: + case Instruction.VectorExtract8: + return true; + case Instruction.Extended: + // TODO: Check if actual intrinsic is supposed to have consts here? + // Right now we only hit this case for fixed-point int <-> FP conversion instructions. + return true; + } + + return false; + } + + private static bool IsCommutative(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.Multiply: + return true; + + case Instruction.BranchIf: + case Instruction.Compare: + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var compType = (Comparison)comp.AsInt32(); + + return compType == Comparison.Equal || compType == Comparison.NotEqual; + } + } + + return false; + } + + private static bool ConstFitsOnUImm12Sh(ulong value) + { + return (value & ~0xfffUL) == 0 || (value & ~0xfff000UL) == 0; + } + + private static bool IsIntrinsicWithConst(Operation operation) + { + bool isIntrinsic = IsIntrinsic(operation.Instruction); + + if (isIntrinsic) + { + Intrinsic intrinsic = operation.Intrinsic; + IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + // Those have integer inputs that don't support consts. + return info.Type != IntrinsicType.ScalarFPConvGpr && + info.Type != IntrinsicType.ScalarFPConvFixedGpr && + info.Type != IntrinsicType.SetRegister; + } + + return false; + } + + private static bool IsIntrinsic(Instruction inst) + { + return inst == Instruction.Extended; + } + } +} diff --git a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs index 0423c2559..c5a22a537 100644 --- a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs +++ b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs @@ -90,6 +90,47 @@ namespace ARMeilleure.CodeGen.Optimizations } break; + case Instruction.Compare: + if (type == OperandType.I32 && + operation.GetSource(0).Type == type && + operation.GetSource(1).Type == type) + { + switch ((Comparison)operation.GetSource(2).Value) + { + case Comparison.Equal: + EvaluateBinaryI32(operation, (x, y) => x == y ? 1 : 0); + break; + case Comparison.NotEqual: + EvaluateBinaryI32(operation, (x, y) => x != y ? 1 : 0); + break; + case Comparison.Greater: + EvaluateBinaryI32(operation, (x, y) => x > y ? 1 : 0); + break; + case Comparison.LessOrEqual: + EvaluateBinaryI32(operation, (x, y) => x <= y ? 1 : 0); + break; + case Comparison.GreaterUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x > (uint)y ? 1 : 0); + break; + case Comparison.LessOrEqualUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x <= (uint)y ? 1 : 0); + break; + case Comparison.GreaterOrEqual: + EvaluateBinaryI32(operation, (x, y) => x >= y ? 1 : 0); + break; + case Comparison.Less: + EvaluateBinaryI32(operation, (x, y) => x < y ? 1 : 0); + break; + case Comparison.GreaterOrEqualUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x >= (uint)y ? 1 : 0); + break; + case Comparison.LessUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x < (uint)y ? 1 : 0); + break; + } + } + break; + case Instruction.Copy: if (type == OperandType.I32) { diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs index 919e996bd..a45bb4551 100644 --- a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs +++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs @@ -44,8 +44,8 @@ namespace ARMeilleure.CodeGen.Optimizations ConstantFolding.RunPass(node); Simplification.RunPass(node); - if (DestIsLocalVar(node)) - { + if (DestIsSingleLocalVar(node)) + { if (IsPropagableCompare(node)) { modified |= PropagateCompare(ref buffer, node); @@ -99,20 +99,6 @@ namespace ARMeilleure.CodeGen.Optimizations while (modified); } - private static Span GetUses(ref Span buffer, Operand operand) - { - ReadOnlySpan uses = operand.Uses; - - if (buffer.Length < uses.Length) - { - buffer = Allocators.Default.AllocateSpan((uint)uses.Length); - } - - uses.CopyTo(buffer); - - return buffer.Slice(0, uses.Length); - } - private static bool PropagateCompare(ref Span buffer, Operation compOp) { // Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form @@ -160,7 +146,7 @@ namespace ARMeilleure.CodeGen.Optimizations Comparison compType = (Comparison)comp.AsInt32(); - Span uses = GetUses(ref buffer, dest); + Span uses = dest.GetUses(ref buffer); foreach (Operation use in uses) { @@ -199,7 +185,7 @@ namespace ARMeilleure.CodeGen.Optimizations Operand dest = copyOp.Destination; Operand source = copyOp.GetSource(0); - Span uses = GetUses(ref buffer, dest); + Span uses = dest.GetUses(ref buffer); foreach (Operation use in uses) { @@ -231,12 +217,12 @@ namespace ARMeilleure.CodeGen.Optimizations private static bool IsUnused(Operation node) { - return DestIsLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node); + return DestIsSingleLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node); } - private static bool DestIsLocalVar(Operation node) + private static bool DestIsSingleLocalVar(Operation node) { - return node.Destination != default && node.Destination.Kind == OperandKind.LocalVariable; + return node.DestinationsCount == 1 && node.Destination.Kind == OperandKind.LocalVariable; } private static bool HasSideEffects(Operation node) diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs index d8a40365b..6ea62c28b 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs @@ -17,8 +17,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private const int InstructionGap = 2; private const int InstructionGapMask = InstructionGap - 1; - private const int RegistersCount = 16; - private HashSet _blockEdges; private LiveRange[] _blockRanges; private BitMap[] _blockLiveIn; @@ -59,7 +57,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators void PopulateFreePositions(RegisterType type, out int[] positions, out int count) { - positions = new int[RegistersCount]; + positions = new int[masks.RegistersCount]; count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type)); int mask = masks.GetAvailableRegisters(type); @@ -115,7 +113,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators StackAllocator stackAlloc, RegisterMasks regMasks) { - NumberLocals(cfg); + NumberLocals(cfg, regMasks.RegistersCount); var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count); @@ -134,22 +132,25 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { context.Active.Set(index); - if (current.Register.Type == RegisterType.Integer) + if (current.IsFixedAndUsed) { - context.IntUsedRegisters |= 1 << current.Register.Index; - } - else /* if (interval.Register.Type == RegisterType.Vector) */ - { - context.VecUsedRegisters |= 1 << current.Register.Index; + if (current.Register.Type == RegisterType.Integer) + { + context.IntUsedRegisters |= 1 << current.Register.Index; + } + else /* if (interval.Register.Type == RegisterType.Vector) */ + { + context.VecUsedRegisters |= 1 << current.Register.Index; + } } continue; } - AllocateInterval(context, current, index); + AllocateInterval(context, current, index, regMasks.RegistersCount); } - for (int index = RegistersCount * 2; index < _intervals.Count; index++) + for (int index = regMasks.RegistersCount * 2; index < _intervals.Count; index++) { if (!_intervals[index].IsSpilled) { @@ -163,7 +164,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize); } - private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex) + private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex, int registersCount) { // Check active intervals that already ended. foreach (int iIndex in context.Active) @@ -199,17 +200,17 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - if (!TryAllocateRegWithoutSpill(context, current, cIndex)) + if (!TryAllocateRegWithoutSpill(context, current, cIndex, registersCount)) { - AllocateRegWithSpill(context, current, cIndex); + AllocateRegWithSpill(context, current, cIndex, registersCount); } } - private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex) + private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount) { RegisterType regType = current.Local.Type.ToRegisterType(); - Span freePositions = stackalloc int[RegistersCount]; + Span freePositions = stackalloc int[registersCount]; context.GetFreePositions(regType, freePositions, out int freePositionsCount); @@ -278,7 +279,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); - InsertInterval(splitChild); + InsertInterval(splitChild, registersCount); } else { @@ -302,12 +303,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return true; } - private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex) + private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount) { RegisterType regType = current.Local.Type.ToRegisterType(); - Span usePositions = stackalloc int[RegistersCount]; - Span blockedPositions = stackalloc int[RegistersCount]; + Span usePositions = stackalloc int[registersCount]; + Span blockedPositions = stackalloc int[registersCount]; context.GetFreePositions(regType, usePositions, out _); context.GetFreePositions(regType, blockedPositions, out _); @@ -386,7 +387,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); - InsertInterval(splitChild); + InsertInterval(splitChild, registersCount); Spill(context, current); } @@ -396,7 +397,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators // so we only need to split the intervals using the selected register. current.Register = new Register(selectedReg, regType); - SplitAndSpillOverlappingIntervals(context, current); + SplitAndSpillOverlappingIntervals(context, current, registersCount); context.Active.Set(cIndex); } @@ -417,14 +418,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); - InsertInterval(splitChild); + InsertInterval(splitChild, registersCount); } else { Spill(context, splitChild); } - SplitAndSpillOverlappingIntervals(context, current); + SplitAndSpillOverlappingIntervals(context, current, registersCount); context.Active.Set(cIndex); } @@ -460,7 +461,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return selected; } - private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current) + private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current, int registersCount) { foreach (int iIndex in context.Active) { @@ -468,7 +469,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators if (!interval.IsFixed && interval.Register == current.Register) { - SplitAndSpillOverlappingInterval(context, current, interval); + SplitAndSpillOverlappingInterval(context, current, interval, registersCount); context.Active.Clear(iIndex); } @@ -480,7 +481,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current)) { - SplitAndSpillOverlappingInterval(context, current, interval); + SplitAndSpillOverlappingInterval(context, current, interval, registersCount); context.Inactive.Clear(iIndex); } @@ -490,7 +491,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators private void SplitAndSpillOverlappingInterval( AllocationContext context, LiveInterval current, - LiveInterval interval) + LiveInterval interval, + int registersCount) { // If there's a next use after the start of the current interval, // we need to split the spilled interval twice, and re-insert it @@ -522,7 +524,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators splitChild = right; } - InsertInterval(splitChild); + InsertInterval(splitChild, registersCount); } else { @@ -530,13 +532,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } } - private void InsertInterval(LiveInterval interval) + private void InsertInterval(LiveInterval interval, int registersCount) { Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses."); Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval."); Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval."); - int startIndex = RegistersCount * 2; + int startIndex = registersCount * 2; int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null); @@ -790,12 +792,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators return _operationNodes[position / InstructionGap]; } - private void NumberLocals(ControlFlowGraph cfg) + private void NumberLocals(ControlFlowGraph cfg, int registersCount) { _operationNodes = new List<(IntrusiveList, Operation)>(); _intervals = new List(); - for (int index = 0; index < RegistersCount; index++) + for (int index = 0; index < registersCount; index++) { _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer))); _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector))); @@ -1041,6 +1043,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators { LiveInterval interval = _intervals[GetOperandId(dest)]; + if (interval.IsFixed) + { + interval.IsFixedAndUsed = true; + } + interval.SetStart(operationPos + 1); interval.AddUsePosition(operationPos + 1); } diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs index 77ad95416..d739ad281 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs @@ -27,6 +27,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public Register Register; public bool IsFixed; + public bool IsFixedAndUsed; } private readonly Data* _data; @@ -44,6 +45,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public ref int SpillOffset => ref _data->SpillOffset; public bool IsFixed => _data->IsFixed; + public ref bool IsFixedAndUsed => ref _data->IsFixedAndUsed; public bool IsEmpty => FirstRange == default; public bool IsSplit => Children.Count != 0; public bool IsSpilled => SpillOffset != -1; @@ -114,7 +116,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators } else { - FirstRange = new LiveRange(position, position + 1); + FirstRange = new LiveRange(position, position + 1); End = position + 1; } } diff --git a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs index 5b11aac20..bc948f95f 100644 --- a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs +++ b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs @@ -11,6 +11,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators public int VecCallerSavedRegisters { get; } public int IntCalleeSavedRegisters { get; } public int VecCalleeSavedRegisters { get; } + public int RegistersCount { get; } public RegisterMasks( int intAvailableRegisters, @@ -18,7 +19,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators int intCallerSavedRegisters, int vecCallerSavedRegisters, int intCalleeSavedRegisters, - int vecCalleeSavedRegisters) + int vecCalleeSavedRegisters, + int registersCount) { IntAvailableRegisters = intAvailableRegisters; VecAvailableRegisters = vecAvailableRegisters; @@ -26,6 +28,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators VecCallerSavedRegisters = vecCallerSavedRegisters; IntCalleeSavedRegisters = intCalleeSavedRegisters; VecCalleeSavedRegisters = vecCalleeSavedRegisters; + RegistersCount = registersCount; } public int GetAvailableRegisters(RegisterType type) diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index e589da140..8b5a3fc57 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -16,6 +16,7 @@ namespace ARMeilleure.CodeGen.X86 { static class CodeGenerator { + private const int RegistersCount = 16; private const int PageSize = 0x1000; private const int StackGuardSize = 0x2000; @@ -143,7 +144,8 @@ namespace ARMeilleure.CodeGen.X86 CallingConvention.GetIntCallerSavedRegisters(), CallingConvention.GetVecCallerSavedRegisters(), CallingConvention.GetIntCalleeSavedRegisters(), - CallingConvention.GetVecCalleeSavedRegisters()); + CallingConvention.GetVecCalleeSavedRegisters(), + RegistersCount); AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks); diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs index 6407a9a7b..8c909ac13 100644 --- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs +++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -5,8 +5,6 @@ namespace ARMeilleure.CodeGen.X86 { static class IntrinsicTable { - private const int BadOp = 0; - private static IntrinsicInfo[] _intrinTable; static IntrinsicTable() diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index b91c522ec..3e65db23d 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -21,22 +21,47 @@ namespace ARMeilleure.Instructions { public static void Abs_S(ArmEmitterContext context) { - EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AbsS); + } + else + { + EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } } public static void Abs_V(ArmEmitterContext context) { - EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AbsV); + } + else + { + EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } } public static void Add_S(ArmEmitterContext context) { - EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64AddS); + } + else + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } } public static void Add_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -62,24 +87,42 @@ namespace ARMeilleure.Instructions public static void Addhn_V(ArmEmitterContext context) { - EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64AddhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false); + } } public static void Addp_S(ArmEmitterContext context) { - OpCodeSimd op = (OpCodeSimd)context.CurrOp; + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AddpS); + } + else + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; - Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size); - Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size); + Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size); - Operand res = context.Add(ne0, ne1); + Operand res = context.Add(ne0, ne1); - context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size)); + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size)); + } } public static void Addp_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddpV); + } + else if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp(context, X86PaddInstruction); } @@ -91,68 +134,89 @@ namespace ARMeilleure.Instructions public static void Addv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AddvV); + } + else + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } } public static void Cls_V(ArmEmitterContext context) { - OpCodeSimd op = (OpCodeSimd)context.CurrOp; - - Operand res = context.VectorZero(); - - int elems = op.GetBytesCount() >> op.Size; - - int eSize = 8 << op.Size; - - for (int index = 0; index < elems; index++) + if (Optimizations.UseAdvSimd) { - Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); - - Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize)); - - res = EmitVectorInsert(context, res, de, index, op.Size); - } - - context.Copy(GetVec(op.Rd), res); - } - - public static void Clz_V(ArmEmitterContext context) - { - OpCodeSimd op = (OpCodeSimd)context.CurrOp; - - int eSize = 8 << op.Size; - - Operand res = eSize switch { - 8 => Clz_V_I8 (context, GetVec(op.Rn)), - 16 => Clz_V_I16(context, GetVec(op.Rn)), - 32 => Clz_V_I32(context, GetVec(op.Rn)), - _ => default - }; - - if (res != default) - { - if (op.RegisterSize == RegisterSize.Simd64) - { - res = context.VectorZeroUpper64(res); - } + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClsV); } else { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + int elems = op.GetBytesCount() >> op.Size; - res = context.VectorZero(); + int eSize = 8 << op.Size; for (int index = 0; index < elems; index++) { Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); - Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize)); + Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize)); res = EmitVectorInsert(context, res, de, index, op.Size); } - } - context.Copy(GetVec(op.Rd), res); + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Clz_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClzV); + } + else + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int eSize = 8 << op.Size; + + Operand res = eSize switch { + 8 => Clz_V_I8 (context, GetVec(op.Rn)), + 16 => Clz_V_I16(context, GetVec(op.Rn)), + 32 => Clz_V_I32(context, GetVec(op.Rn)), + _ => default + }; + + if (res != default) + { + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + } + else + { + int elems = op.GetBytesCount() >> op.Size; + + res = context.VectorZero(); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize)); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + + context.Copy(GetVec(op.Rd), res); + } } private static Operand Clz_V_I8(ArmEmitterContext context, Operand arg) @@ -271,36 +335,47 @@ namespace ARMeilleure.Instructions public static void Cnt_V(ArmEmitterContext context) { - OpCodeSimd op = (OpCodeSimd)context.CurrOp; - - Operand res = context.VectorZero(); - - int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; - - for (int index = 0; index < elems; index++) + if (Optimizations.UseAdvSimd) { - Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); - - Operand de; - - if (Optimizations.UsePopCnt) - { - de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne); - } - else - { - de = EmitCountSetBits8(context, ne); - } - - res = EmitVectorInsert(context, res, de, index, 0); + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64CntV); } + else + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; - context.Copy(GetVec(op.Rd), res); + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); + + Operand de; + + if (Optimizations.UsePopCnt) + { + de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne); + } + else + { + de = EmitCountSetBits8(context, ne); + } + + res = EmitVectorInsert(context, res, de, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } } public static void Fabd_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FabdS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -336,7 +411,11 @@ namespace ARMeilleure.Instructions public static void Fabd_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FabdV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -377,7 +456,11 @@ namespace ARMeilleure.Instructions public static void Fabs_S(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FabsS); + } + else if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -405,7 +488,11 @@ namespace ARMeilleure.Instructions public static void Fabs_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FabsV); + } + else if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -440,7 +527,11 @@ namespace ARMeilleure.Instructions public static void Fadd_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd); } @@ -459,7 +550,11 @@ namespace ARMeilleure.Instructions public static void Fadd_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); } @@ -478,7 +573,11 @@ namespace ARMeilleure.Instructions public static void Faddp_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FaddpS); + } + else if (Optimizations.FastFP && Optimizations.UseSse3) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -506,7 +605,11 @@ namespace ARMeilleure.Instructions public static void Faddp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorPairwiseOpF(context, (op1, op2) => { @@ -534,7 +637,11 @@ namespace ARMeilleure.Instructions public static void Fdiv_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FdivS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd); } @@ -553,7 +660,11 @@ namespace ARMeilleure.Instructions public static void Fdiv_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FdivV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd); } @@ -572,7 +683,11 @@ namespace ARMeilleure.Instructions public static void Fmadd_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -607,7 +722,11 @@ namespace ARMeilleure.Instructions public static void Fmax_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41ProcessNaNsOpF(context, (op1, op2) => { @@ -628,7 +747,11 @@ namespace ARMeilleure.Instructions public static void Fmax_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41ProcessNaNsOpF(context, (op1, op2) => { @@ -649,7 +772,11 @@ namespace ARMeilleure.Instructions public static void Fmaxnm_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true); } @@ -664,7 +791,11 @@ namespace ARMeilleure.Instructions public static void Fmaxnm_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false); } @@ -679,7 +810,11 @@ namespace ARMeilleure.Instructions public static void Fmaxnmp_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxnmpS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2ScalarPairwiseOpF(context, (op1, op2) => { @@ -697,7 +832,11 @@ namespace ARMeilleure.Instructions public static void Fmaxnmp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorPairwiseOpF(context, (op1, op2) => { @@ -715,7 +854,11 @@ namespace ARMeilleure.Instructions public static void Fmaxnmv_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxnmvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => { @@ -733,7 +876,11 @@ namespace ARMeilleure.Instructions public static void Fmaxp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorPairwiseOpF(context, (op1, op2) => { @@ -757,7 +904,11 @@ namespace ARMeilleure.Instructions public static void Fmaxv_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => { @@ -781,7 +932,11 @@ namespace ARMeilleure.Instructions public static void Fmin_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41ProcessNaNsOpF(context, (op1, op2) => { @@ -802,7 +957,11 @@ namespace ARMeilleure.Instructions public static void Fmin_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41ProcessNaNsOpF(context, (op1, op2) => { @@ -823,7 +982,11 @@ namespace ARMeilleure.Instructions public static void Fminnm_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true); } @@ -838,7 +1001,11 @@ namespace ARMeilleure.Instructions public static void Fminnm_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false); } @@ -853,7 +1020,11 @@ namespace ARMeilleure.Instructions public static void Fminnmp_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminnmpS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2ScalarPairwiseOpF(context, (op1, op2) => { @@ -871,7 +1042,11 @@ namespace ARMeilleure.Instructions public static void Fminnmp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorPairwiseOpF(context, (op1, op2) => { @@ -889,7 +1064,11 @@ namespace ARMeilleure.Instructions public static void Fminnmv_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminnmvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => { @@ -907,7 +1086,11 @@ namespace ARMeilleure.Instructions public static void Fminp_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorPairwiseOpF(context, (op1, op2) => { @@ -931,7 +1114,11 @@ namespace ARMeilleure.Instructions public static void Fminv_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => { @@ -955,15 +1142,26 @@ namespace ARMeilleure.Instructions public static void Fmla_Se(ArmEmitterContext context) // Fused. { - EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaSe); + } + else + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } } public static void Fmla_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlaV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1006,7 +1204,11 @@ namespace ARMeilleure.Instructions public static void Fmla_Ve(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaVe); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; @@ -1055,15 +1257,26 @@ namespace ARMeilleure.Instructions public static void Fmls_Se(ArmEmitterContext context) // Fused. { - EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Subtract(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsSe); + } + else + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } } public static void Fmls_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1106,7 +1319,11 @@ namespace ARMeilleure.Instructions public static void Fmls_Ve(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsVe); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; @@ -1155,7 +1372,11 @@ namespace ARMeilleure.Instructions public static void Fmsub_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1190,7 +1411,11 @@ namespace ARMeilleure.Instructions public static void Fmul_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); } @@ -1209,12 +1434,23 @@ namespace ARMeilleure.Instructions public static void Fmul_Se(ArmEmitterContext context) { - EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulSe); + } + else + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Fmul_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); } @@ -1233,7 +1469,11 @@ namespace ARMeilleure.Instructions public static void Fmul_Ve(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulVe); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; @@ -1283,39 +1523,71 @@ namespace ARMeilleure.Instructions public static void Fmulx_S(ArmEmitterContext context) { - EmitScalarBinaryOpF(context, (op1, op2) => + if (Optimizations.UseAdvSimd) { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); - }); + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulxS); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } } public static void Fmulx_Se(ArmEmitterContext context) { - EmitScalarBinaryOpByElemF(context, (op1, op2) => + if (Optimizations.UseAdvSimd) { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); - }); + InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulxSe); + } + else + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } } public static void Fmulx_V(ArmEmitterContext context) { - EmitVectorBinaryOpF(context, (op1, op2) => + if (Optimizations.UseAdvSimd) { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); - }); + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulxV); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } } public static void Fmulx_Ve(ArmEmitterContext context) { - EmitVectorBinaryOpByElemF(context, (op1, op2) => + if (Optimizations.UseAdvSimd) { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); - }); + InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulxVe); + } + else + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } } public static void Fneg_S(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FnegS); + } + else if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -1344,7 +1616,11 @@ namespace ARMeilleure.Instructions public static void Fneg_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FnegV); + } + else if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -1380,7 +1656,11 @@ namespace ARMeilleure.Instructions public static void Fnmadd_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1423,7 +1703,11 @@ namespace ARMeilleure.Instructions public static void Fnmsub_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1466,7 +1750,14 @@ namespace ARMeilleure.Instructions public static void Fnmul_S(ArmEmitterContext context) { - EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FnmulS); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + } } public static void Frecpe_S(ArmEmitterContext context) @@ -1475,7 +1766,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrecpeS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true); @@ -1496,7 +1791,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrecpeV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false); @@ -1518,7 +1817,11 @@ namespace ARMeilleure.Instructions public static void Frecps_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpsS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1561,7 +1864,11 @@ namespace ARMeilleure.Instructions public static void Frecps_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrecpsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1609,15 +1916,26 @@ namespace ARMeilleure.Instructions public static void Frecpx_S(ArmEmitterContext context) { - EmitScalarUnaryOpF(context, (op1) => + if (Optimizations.UseAdvSimd) { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1); - }); + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpxS); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1); + }); + } } public static void Frinta_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintaS); + } + else if (Optimizations.UseSse41) { EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearestAway); } @@ -1632,7 +1950,11 @@ namespace ARMeilleure.Instructions public static void Frinta_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintaV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearestAway); } @@ -1647,23 +1969,41 @@ namespace ARMeilleure.Instructions public static void Frinti_S(ArmEmitterContext context) { - EmitScalarUnaryOpF(context, (op1) => + if (Optimizations.UseAdvSimd) { - return EmitRoundByRMode(context, op1); - }); + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintiS); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundByRMode(context, op1); + }); + } } public static void Frinti_V(ArmEmitterContext context) { - EmitVectorUnaryOpF(context, (op1) => + if (Optimizations.UseAdvSimd) { - return EmitRoundByRMode(context, op1); - }); + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintiV); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundByRMode(context, op1); + }); + } } public static void Frintm_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintmS); + } + else if (Optimizations.UseSse41) { EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); } @@ -1678,7 +2018,11 @@ namespace ARMeilleure.Instructions public static void Frintm_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintmV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); } @@ -1693,7 +2037,11 @@ namespace ARMeilleure.Instructions public static void Frintn_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintnS); + } + else if (Optimizations.UseSse41) { EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest); } @@ -1708,7 +2056,11 @@ namespace ARMeilleure.Instructions public static void Frintn_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintnV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest); } @@ -1723,7 +2075,11 @@ namespace ARMeilleure.Instructions public static void Frintp_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintpS); + } + else if (Optimizations.UseSse41) { EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); } @@ -1738,7 +2094,11 @@ namespace ARMeilleure.Instructions public static void Frintp_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintpV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); } @@ -1753,6 +2113,7 @@ namespace ARMeilleure.Instructions public static void Frintx_S(ArmEmitterContext context) { + // TODO Arm64: Fast path. Should we set host FPCR? EmitScalarUnaryOpF(context, (op1) => { return EmitRoundByRMode(context, op1); @@ -1761,6 +2122,7 @@ namespace ARMeilleure.Instructions public static void Frintx_V(ArmEmitterContext context) { + // TODO Arm64: Fast path. Should we set host FPCR? EmitVectorUnaryOpF(context, (op1) => { return EmitRoundByRMode(context, op1); @@ -1769,7 +2131,11 @@ namespace ARMeilleure.Instructions public static void Frintz_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintzS); + } + else if (Optimizations.UseSse41) { EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero); } @@ -1784,7 +2150,11 @@ namespace ARMeilleure.Instructions public static void Frintz_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintzV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero); } @@ -1803,7 +2173,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrsqrteS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true); @@ -1824,7 +2198,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrsqrteV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) { Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false); @@ -1846,7 +2224,11 @@ namespace ARMeilleure.Instructions public static void Frsqrts_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrsqrtsS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1895,7 +2277,11 @@ namespace ARMeilleure.Instructions public static void Frsqrts_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrsqrtsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -1949,7 +2335,11 @@ namespace ARMeilleure.Instructions public static void Fsqrt_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FsqrtS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd); } @@ -1964,7 +2354,11 @@ namespace ARMeilleure.Instructions public static void Fsqrt_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FsqrtV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd); } @@ -1979,7 +2373,11 @@ namespace ARMeilleure.Instructions public static void Fsub_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd); } @@ -1998,7 +2396,11 @@ namespace ARMeilleure.Instructions public static void Fsub_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FsubV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd); } @@ -2017,7 +2419,11 @@ namespace ARMeilleure.Instructions public static void Mla_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlaV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorMul_AddSub(context, AddSub.Add); } @@ -2032,15 +2438,26 @@ namespace ARMeilleure.Instructions public static void Mla_Ve(ArmEmitterContext context) { - EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlaVe); + } + else + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } } public static void Mls_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlsV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorMul_AddSub(context, AddSub.Subtract); } @@ -2055,15 +2472,26 @@ namespace ARMeilleure.Instructions public static void Mls_Ve(ArmEmitterContext context) { - EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Subtract(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlsVe); + } + else + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } } public static void Mul_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64MulV); + } + else if (Optimizations.UseSse41) { EmitSse41VectorMul_AddSub(context, AddSub.None); } @@ -2075,17 +2503,35 @@ namespace ARMeilleure.Instructions public static void Mul_Ve(ArmEmitterContext context) { - EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64MulVe); + } + else + { + EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Neg_S(ArmEmitterContext context) { - EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64NegS); + } + else + { + EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1)); + } } public static void Neg_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64NegV); + } + else if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; @@ -2110,7 +2556,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UsePclmulqdq && op.Size == 3) + if (Optimizations.UseAdvSimd && false) // Not supported by all Arm CPUs. + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV); + } + else if (Optimizations.UsePclmulqdq && op.Size == 3) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2214,33 +2664,65 @@ namespace ARMeilleure.Instructions public static void Raddhn_V(ArmEmitterContext context) { - EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RaddhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true); + } } public static void Rsubhn_V(ArmEmitterContext context) { - EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RsubhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true); + } } public static void Saba_V(ArmEmitterContext context) { - EmitVectorTernaryOpSx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabaV); + } + else + { + EmitVectorTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } } public static void Sabal_V(ArmEmitterContext context) { - EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabalV); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } } public static void Sabd_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2262,7 +2744,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdlV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2293,12 +2779,23 @@ namespace ARMeilleure.Instructions public static void Sadalp_V(ArmEmitterContext context) { - EmitAddLongPairwise(context, signed: true, accumulate: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64SadalpV); + } + else + { + EmitAddLongPairwise(context, signed: true, accumulate: true); + } } public static void Saddl_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddlV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2328,17 +2825,35 @@ namespace ARMeilleure.Instructions public static void Saddlp_V(ArmEmitterContext context) { - EmitAddLongPairwise(context, signed: true, accumulate: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlpV); + } + else + { + EmitAddLongPairwise(context, signed: true, accumulate: false); + } } public static void Saddlv_V(ArmEmitterContext context) { - EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlvV); + } + else + { + EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2)); + } } public static void Saddw_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddwV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2368,7 +2883,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShaddV); + } + else if (Optimizations.UseSse2 && op.Size > 0) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2404,7 +2923,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShsubV); + } + else if (Optimizations.UseSse2 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2442,7 +2965,11 @@ namespace ARMeilleure.Instructions public static void Smax_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2468,7 +2995,11 @@ namespace ARMeilleure.Instructions public static void Smaxp_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxpV); + } + else if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp(context, X86PmaxsInstruction); } @@ -2480,12 +3011,23 @@ namespace ARMeilleure.Instructions public static void Smaxv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SmaxvV); + } + else + { + EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true)); + } } public static void Smin_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2511,7 +3053,11 @@ namespace ARMeilleure.Instructions public static void Sminp_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminpV); + } + else if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp(context, X86PminsInstruction); } @@ -2523,14 +3069,25 @@ namespace ARMeilleure.Instructions public static void Sminv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SminvV); + } + else + { + EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true)); + } } public static void Smlal_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlalV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); @@ -2566,17 +3123,28 @@ namespace ARMeilleure.Instructions public static void Smlal_Ve(ArmEmitterContext context) { - EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlalVe); + } + else + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } } public static void Smlsl_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlslV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); @@ -2612,117 +3180,268 @@ namespace ARMeilleure.Instructions public static void Smlsl_Ve(ArmEmitterContext context) { - EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Subtract(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlslVe); + } + else + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } } public static void Smull_V(ArmEmitterContext context) { - EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmullV); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Smull_Ve(ArmEmitterContext context) { - EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64SmullVe); + } + else + { + EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Sqabs_S(ArmEmitterContext context) { - EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqabsS); + } + else + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } } public static void Sqabs_V(ArmEmitterContext context) { - EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqabsV); + } + else + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } } public static void Sqadd_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqaddS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); + } } public static void Sqadd_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqaddV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); + } } public static void Sqdmulh_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + } } public static void Sqdmulh_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + } } public static void Sqdmulh_Ve(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqdmulhVe); + } + else + { + EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + } } public static void Sqneg_S(ArmEmitterContext context) { - EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqnegS); + } + else + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } } public static void Sqneg_V(ArmEmitterContext context) { - EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqnegV); + } + else + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } } public static void Sqrdmulh_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + } } public static void Sqrdmulh_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + } } public static void Sqrdmulh_Ve(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqrdmulhVe); + } + else + { + EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + } } public static void Sqsub_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqsubS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); + } } public static void Sqsub_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqsubV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); + } } public static void Sqxtn_S(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnS); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx); + } } public static void Sqxtn_V(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnV); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx); + } } public static void Sqxtun_S(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunS); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx); + } } public static void Sqxtun_V(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunV); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx); + } } public static void Srhadd_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrhaddV); + } + else if (Optimizations.UseSse2 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2764,7 +3483,11 @@ namespace ARMeilleure.Instructions public static void Ssubl_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsublV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2794,7 +3517,11 @@ namespace ARMeilleure.Instructions public static void Ssubw_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsubwV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2822,12 +3549,23 @@ namespace ARMeilleure.Instructions public static void Sub_S(ArmEmitterContext context) { - EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SubS); + } + else + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } } public static void Sub_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SubV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2853,38 +3591,77 @@ namespace ARMeilleure.Instructions public static void Subhn_V(ArmEmitterContext context) { - EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SubhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false); + } } public static void Suqadd_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); + } } public static void Suqadd_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); + } } public static void Uaba_V(ArmEmitterContext context) { - EmitVectorTernaryOpZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabaV); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } } public static void Uabal_V(ArmEmitterContext context) { - EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabalV); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } } public static void Uabd_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2906,7 +3683,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdlV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -2937,12 +3718,23 @@ namespace ARMeilleure.Instructions public static void Uadalp_V(ArmEmitterContext context) { - EmitAddLongPairwise(context, signed: false, accumulate: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64UadalpV); + } + else + { + EmitAddLongPairwise(context, signed: false, accumulate: true); + } } public static void Uaddl_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddlV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -2972,17 +3764,35 @@ namespace ARMeilleure.Instructions public static void Uaddlp_V(ArmEmitterContext context) { - EmitAddLongPairwise(context, signed: false, accumulate: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlpV); + } + else + { + EmitAddLongPairwise(context, signed: false, accumulate: false); + } } public static void Uaddlv_V(ArmEmitterContext context) { - EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlvV); + } + else + { + EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } } public static void Uaddw_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddwV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -3012,7 +3822,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhaddV); + } + else if (Optimizations.UseSse2 && op.Size > 0) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -3048,7 +3862,11 @@ namespace ARMeilleure.Instructions { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhsubV); + } + else if (Optimizations.UseSse2 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -3079,7 +3897,11 @@ namespace ARMeilleure.Instructions public static void Umax_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -3105,7 +3927,11 @@ namespace ARMeilleure.Instructions public static void Umaxp_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxpV); + } + else if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp(context, X86PmaxuInstruction); } @@ -3117,12 +3943,23 @@ namespace ARMeilleure.Instructions public static void Umaxv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UmaxvV); + } + else + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false)); + } } public static void Umin_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -3148,7 +3985,11 @@ namespace ARMeilleure.Instructions public static void Uminp_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminpV); + } + else if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp(context, X86PminuInstruction); } @@ -3160,14 +4001,25 @@ namespace ARMeilleure.Instructions public static void Uminv_V(ArmEmitterContext context) { - EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UminvV); + } + else + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false)); + } } public static void Umlal_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlalV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); @@ -3203,17 +4055,28 @@ namespace ARMeilleure.Instructions public static void Umlal_Ve(ArmEmitterContext context) { - EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Add(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlalVe); + } + else + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } } public static void Umlsl_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse41 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlslV); + } + else if (Optimizations.UseSse41 && op.Size < 2) { Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); @@ -3249,57 +4112,124 @@ namespace ARMeilleure.Instructions public static void Umlsl_Ve(ArmEmitterContext context) { - EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + if (Optimizations.UseAdvSimd) { - return context.Subtract(op1, context.Multiply(op2, op3)); - }); + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlslVe); + } + else + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } } public static void Umull_V(ArmEmitterContext context) { - EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmullV); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Umull_Ve(ArmEmitterContext context) { - EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64UmullVe); + } + else + { + EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } } public static void Uqadd_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqaddS); + } + else + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } } public static void Uqadd_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqaddV); + } + else + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } } public static void Uqsub_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqsubS); + } + else + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } } public static void Uqsub_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqsubV); + } + else + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } } public static void Uqxtn_S(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnS); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx); + } } public static void Uqxtn_V(ArmEmitterContext context) { - EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnV); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx); + } } public static void Urhadd_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; - if (Optimizations.UseSse2 && op.Size < 2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrhaddV); + } + else if (Optimizations.UseSse2 && op.Size < 2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); @@ -3330,17 +4260,35 @@ namespace ARMeilleure.Instructions public static void Usqadd_S(ArmEmitterContext context) { - EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddS); + } + else + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } } public static void Usqadd_V(ArmEmitterContext context) { - EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddV); + } + else + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } } public static void Usubl_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsublV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -3370,7 +4318,11 @@ namespace ARMeilleure.Instructions public static void Usubw_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsubwV); + } + else if (Optimizations.UseSse41) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index 79b376e95..a9994e412 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -2,6 +2,7 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Translation; using System; + using static ARMeilleure.Instructions.InstEmitFlowHelper; using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper; @@ -30,7 +31,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FabsS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarUnaryOpSimd32(context, (m) => { @@ -49,7 +54,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FabsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { @@ -76,7 +85,11 @@ namespace ARMeilleure.Instructions public static void Vadd_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd); } @@ -92,7 +105,11 @@ namespace ARMeilleure.Instructions public static void Vadd_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FaddV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); } @@ -280,7 +297,11 @@ namespace ARMeilleure.Instructions public static void Vfma_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd); } @@ -299,7 +320,11 @@ namespace ARMeilleure.Instructions public static void Vfma_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps); } @@ -314,7 +339,11 @@ namespace ARMeilleure.Instructions public static void Vfms_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd); } @@ -333,7 +362,11 @@ namespace ARMeilleure.Instructions public static void Vfms_V(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps); } @@ -348,7 +381,11 @@ namespace ARMeilleure.Instructions public static void Vfnma_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd); } @@ -367,7 +404,11 @@ namespace ARMeilleure.Instructions public static void Vfnms_S(ArmEmitterContext context) // Fused. { - if (Optimizations.FastFP && Optimizations.UseFma) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) { EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd); } @@ -419,7 +460,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; - if (Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FnegS); + } + else if (Optimizations.UseSse2) { EmitScalarUnaryOpSimd32(context, (m) => { @@ -445,7 +490,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; - if (Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FnmulS); + } + else if (Optimizations.UseSse2) { EmitScalarBinaryOpSimd32(context, (n, m) => { @@ -473,7 +522,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true); } @@ -498,7 +551,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true); } @@ -525,7 +582,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FnegV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { @@ -554,7 +615,11 @@ namespace ARMeilleure.Instructions public static void Vdiv_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FdivS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd); } @@ -573,7 +638,11 @@ namespace ARMeilleure.Instructions public static void Vmaxnm_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmaxnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF32(context, true, true); } @@ -585,7 +654,11 @@ namespace ARMeilleure.Instructions public static void Vmaxnm_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF32(context, true, false); } @@ -597,7 +670,11 @@ namespace ARMeilleure.Instructions public static void Vminnm_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FminnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF32(context, false, true); } @@ -609,7 +686,11 @@ namespace ARMeilleure.Instructions public static void Vminnm_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse41) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) { EmitSse41MaxMinNumOpF32(context, false, false); } @@ -621,7 +702,11 @@ namespace ARMeilleure.Instructions public static void Vmax_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd); } @@ -664,7 +749,11 @@ namespace ARMeilleure.Instructions public static void Vmin_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd); } @@ -707,7 +796,11 @@ namespace ARMeilleure.Instructions public static void Vmla_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd); } @@ -730,7 +823,11 @@ namespace ARMeilleure.Instructions public static void Vmla_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd); } @@ -786,7 +883,11 @@ namespace ARMeilleure.Instructions public static void Vmls_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd); } @@ -809,7 +910,11 @@ namespace ARMeilleure.Instructions public static void Vmls_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd); } @@ -865,7 +970,11 @@ namespace ARMeilleure.Instructions public static void Vmul_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmulS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); } @@ -884,7 +993,11 @@ namespace ARMeilleure.Instructions public static void Vmul_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmulV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); } @@ -975,7 +1088,11 @@ namespace ARMeilleure.Instructions public static void Vpadd_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FaddpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps); } @@ -1008,7 +1125,11 @@ namespace ARMeilleure.Instructions public static void Vpmax_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FmaxpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps); } @@ -1038,7 +1159,11 @@ namespace ARMeilleure.Instructions public static void Vpmin_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FminpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps); } @@ -1217,7 +1342,11 @@ namespace ARMeilleure.Instructions { int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrecpeV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) { EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0); } @@ -1237,7 +1366,11 @@ namespace ARMeilleure.Instructions public static void Vrecps(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrecpsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; bool single = (op.Size & 1) == 0; @@ -1304,7 +1437,11 @@ namespace ARMeilleure.Instructions { int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrsqrteV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) { EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0); } @@ -1324,7 +1461,11 @@ namespace ARMeilleure.Instructions public static void Vrsqrts(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrsqrtsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; bool single = (op.Size & 1) == 0; @@ -1393,7 +1534,11 @@ namespace ARMeilleure.Instructions public static void Vsqrt_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FsqrtS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd); } @@ -1408,7 +1553,11 @@ namespace ARMeilleure.Instructions public static void Vsub_S(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd); } @@ -1420,7 +1569,11 @@ namespace ARMeilleure.Instructions public static void Vsub_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FsubV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd); } diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs index 71055155c..c32b64ba1 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCmp.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs @@ -466,12 +466,26 @@ namespace ARMeilleure.Instructions public static void Fcmp_S(ArmEmitterContext context) { - EmitFcmpOrFcmpe(context, signalNaNs: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: false); + } + else + { + EmitFcmpOrFcmpe(context, signalNaNs: false); + } } public static void Fcmpe_S(ArmEmitterContext context) { - EmitFcmpOrFcmpe(context, signalNaNs: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: true); + } + else + { + EmitFcmpOrFcmpe(context, signalNaNs: true); + } } private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs) diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs index 339d32939..a990e057d 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs @@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions { public static void Vceq_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, false); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false); } @@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, true); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true); } @@ -55,7 +63,11 @@ namespace ARMeilleure.Instructions public static void Vcge_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseAvx) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false); } @@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseAvx) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true); } @@ -95,7 +111,11 @@ namespace ARMeilleure.Instructions public static void Vcgt_V(ArmEmitterContext context) { - if (Optimizations.FastFP && Optimizations.UseAvx) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, false); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false); } @@ -118,7 +138,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseAvx) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, true); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true); } @@ -139,7 +163,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThanOrEqual, true); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true); } @@ -160,7 +188,11 @@ namespace ARMeilleure.Instructions if (op.F) { - if (Optimizations.FastFP && Optimizations.UseSse2) + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThan, true); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true); } @@ -247,12 +279,26 @@ namespace ARMeilleure.Instructions public static void Vcmp(ArmEmitterContext context) { - EmitVcmpOrVcmpe(context, false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, false); + } + else + { + EmitVcmpOrVcmpe(context, false); + } } public static void Vcmpe(ArmEmitterContext context) { - EmitVcmpOrVcmpe(context, true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, true); + } + else + { + EmitVcmpOrVcmpe(context, true); + } } private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs index 7f61cad41..652ad397c 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs @@ -164,7 +164,11 @@ namespace ARMeilleure.Instructions public static void Fcvtas_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtasGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false); } @@ -176,7 +180,11 @@ namespace ARMeilleure.Instructions public static void Fcvtas_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtasS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true); } @@ -188,7 +196,11 @@ namespace ARMeilleure.Instructions public static void Fcvtas_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtasS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false); } @@ -200,7 +212,11 @@ namespace ARMeilleure.Instructions public static void Fcvtau_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtauGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false); } @@ -212,7 +228,11 @@ namespace ARMeilleure.Instructions public static void Fcvtau_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtauS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true); } @@ -224,7 +244,11 @@ namespace ARMeilleure.Instructions public static void Fcvtau_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtauV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false); } @@ -240,7 +264,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.UseSse2 && sizeF == 1) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtlV); + } + else if (Optimizations.UseSse2 && sizeF == 1) { Operand n = GetVec(op.Rn); @@ -296,7 +324,11 @@ namespace ARMeilleure.Instructions public static void Fcvtms_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmsGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false); } @@ -308,7 +340,11 @@ namespace ARMeilleure.Instructions public static void Fcvtms_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtmsV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false); } @@ -320,7 +356,11 @@ namespace ARMeilleure.Instructions public static void Fcvtmu_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmuGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false); } @@ -336,7 +376,11 @@ namespace ARMeilleure.Instructions int sizeF = op.Size & 1; - if (Optimizations.UseSse2 && sizeF == 1) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpFRd(context, Intrinsic.Arm64FcvtnV); + } + else if (Optimizations.UseSse2 && sizeF == 1) { Operand d = GetVec(op.Rd); @@ -405,7 +449,11 @@ namespace ARMeilleure.Instructions public static void Fcvtns_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtnsGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false); } @@ -417,7 +465,11 @@ namespace ARMeilleure.Instructions public static void Fcvtns_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnsS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true); } @@ -429,7 +481,11 @@ namespace ARMeilleure.Instructions public static void Fcvtns_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnsV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false); } @@ -441,7 +497,11 @@ namespace ARMeilleure.Instructions public static void Fcvtnu_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnuS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true); } @@ -453,7 +513,11 @@ namespace ARMeilleure.Instructions public static void Fcvtnu_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnuV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false); } @@ -465,7 +529,11 @@ namespace ARMeilleure.Instructions public static void Fcvtps_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpsGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false); } @@ -477,7 +545,11 @@ namespace ARMeilleure.Instructions public static void Fcvtpu_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpuGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false); } @@ -489,7 +561,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzs_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzsGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false); } @@ -501,7 +577,13 @@ namespace ARMeilleure.Instructions public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzsGpFixed, op.FBits); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true); } @@ -513,7 +595,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzs_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzsS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true); } @@ -525,7 +611,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzs_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzsV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false); } @@ -537,7 +627,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzs_V_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzsVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false); } @@ -549,7 +643,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzu_Gp(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzuGp); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false); } @@ -561,7 +659,13 @@ namespace ARMeilleure.Instructions public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzuGpFixed, op.FBits); + } + else if (Optimizations.UseSse41) { EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true); } @@ -573,7 +677,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzu_S(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzuS); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true); } @@ -585,7 +693,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzu_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzuV); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false); } @@ -597,7 +709,11 @@ namespace ARMeilleure.Instructions public static void Fcvtzu_V_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzuVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse41) { EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false); } @@ -609,41 +725,59 @@ namespace ARMeilleure.Instructions public static void Scvtf_Gp(ArmEmitterContext context) { - OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; - - Operand res = GetIntOrZR(context, op.Rn); - - if (op.RegisterSize == RegisterSize.Int32) + if (Optimizations.UseAdvSimd) { - res = context.SignExtend32(OperandType.I64, res); + InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64ScvtfGp); } + else + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; - res = EmitFPConvert(context, res, op.Size, signed: true); + Operand res = GetIntOrZR(context, op.Rn); - context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + if (op.RegisterSize == RegisterSize.Int32) + { + res = context.SignExtend32(OperandType.I64, res); + } + + res = EmitFPConvert(context, res, op.Size, signed: true); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } } public static void Scvtf_Gp_Fixed(ArmEmitterContext context) { OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; - Operand res = GetIntOrZR(context, op.Rn); - - if (op.RegisterSize == RegisterSize.Int32) + if (Optimizations.UseAdvSimd) { - res = context.SignExtend32(OperandType.I64, res); + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64ScvtfGpFixed, op.FBits); } + else + { + Operand res = GetIntOrZR(context, op.Rn); - res = EmitFPConvert(context, res, op.Size, signed: true); + if (op.RegisterSize == RegisterSize.Int32) + { + res = context.SignExtend32(OperandType.I64, res); + } - res = EmitI2fFBitsMul(context, res, op.FBits); + res = EmitFPConvert(context, res, op.Size, signed: true); - context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + res = EmitI2fFBitsMul(context, res, op.FBits); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } } public static void Scvtf_S(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64ScvtfS); + } + else if (Optimizations.UseSse2) { EmitSse2ScvtfOp(context, scalar: true); } @@ -655,7 +789,11 @@ namespace ARMeilleure.Instructions public static void Scvtf_S_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64ScvtfSFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) { EmitSse2ScvtfOp(context, scalar: true); } @@ -667,7 +805,11 @@ namespace ARMeilleure.Instructions public static void Scvtf_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64ScvtfV); + } + else if (Optimizations.UseSse2) { EmitSse2ScvtfOp(context, scalar: false); } @@ -679,7 +821,11 @@ namespace ARMeilleure.Instructions public static void Scvtf_V_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64ScvtfVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) { EmitSse2ScvtfOp(context, scalar: false); } @@ -691,31 +837,49 @@ namespace ARMeilleure.Instructions public static void Ucvtf_Gp(ArmEmitterContext context) { - OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64UcvtfGp); + } + else + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; - Operand res = GetIntOrZR(context, op.Rn); + Operand res = GetIntOrZR(context, op.Rn); - res = EmitFPConvert(context, res, op.Size, signed: false); + res = EmitFPConvert(context, res, op.Size, signed: false); - context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } } public static void Ucvtf_Gp_Fixed(ArmEmitterContext context) { OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; - Operand res = GetIntOrZR(context, op.Rn); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64UcvtfGpFixed, op.FBits); + } + else + { + Operand res = GetIntOrZR(context, op.Rn); - res = EmitFPConvert(context, res, op.Size, signed: false); + res = EmitFPConvert(context, res, op.Size, signed: false); - res = EmitI2fFBitsMul(context, res, op.FBits); + res = EmitI2fFBitsMul(context, res, op.FBits); - context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } } public static void Ucvtf_S(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64UcvtfS); + } + else if (Optimizations.UseSse2) { EmitSse2UcvtfOp(context, scalar: true); } @@ -727,7 +891,11 @@ namespace ARMeilleure.Instructions public static void Ucvtf_S_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64UcvtfSFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) { EmitSse2UcvtfOp(context, scalar: true); } @@ -739,7 +907,11 @@ namespace ARMeilleure.Instructions public static void Ucvtf_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64UcvtfV); + } + else if (Optimizations.UseSse2) { EmitSse2UcvtfOp(context, scalar: false); } @@ -751,7 +923,11 @@ namespace ARMeilleure.Instructions public static void Ucvtf_V_Fixed(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64UcvtfVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) { EmitSse2UcvtfOp(context, scalar: false); } diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs index f3f239589..5fdc3b5ad 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -59,7 +59,11 @@ namespace ARMeilleure.Instructions if (toInteger) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV); + } + else if (Optimizations.UseSse41) { EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned); } @@ -153,7 +157,28 @@ namespace ARMeilleure.Instructions bool unsigned = (op.Opc2 & 1) == 0; bool roundWithFpscr = op.Opc != 1; - if (!roundWithFpscr && Optimizations.UseSse41) + if (!roundWithFpscr && Optimizations.UseAdvSimd) + { + bool doubleSize = floatSize == OperandType.FP64; + + if (doubleSize) + { + Operand m = GetVecA32(op.Vm >> 1); + + Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, doubleSize); + + Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble; + + Operand asInteger = context.AddIntrinsicInt(inst, toConvert); + + InsertScalar(context, op.Vd, asInteger); + } + else + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS); + } + } + else if (!roundWithFpscr && Optimizations.UseSse41) { EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned); } @@ -231,7 +256,34 @@ namespace ARMeilleure.Instructions bool unsigned = op.Opc == 0; int rm = op.Opc2 & 3; - if (Optimizations.UseSse41) + Intrinsic inst; + + if (Optimizations.UseAdvSimd) + { + if (unsigned) + { + inst = rm switch { + 0b00 => Intrinsic.Arm64FcvtauS, + 0b01 => Intrinsic.Arm64FcvtnuS, + 0b10 => Intrinsic.Arm64FcvtpuS, + 0b11 => Intrinsic.Arm64FcvtmuS, + _ => throw new ArgumentOutOfRangeException(nameof(rm)) + }; + } + else + { + inst = rm switch { + 0b00 => Intrinsic.Arm64FcvtasS, + 0b01 => Intrinsic.Arm64FcvtnsS, + 0b10 => Intrinsic.Arm64FcvtpsS, + 0b11 => Intrinsic.Arm64FcvtmsS, + _ => throw new ArgumentOutOfRangeException(nameof(rm)) + }; + } + + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst); + } + else if (Optimizations.UseSse41) { EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned); } @@ -338,7 +390,19 @@ namespace ARMeilleure.Instructions int rm = op.Opc2 & 3; - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + Intrinsic inst = rm switch { + 0b00 => Intrinsic.Arm64FrintaS, + 0b01 => Intrinsic.Arm64FrintnS, + 0b10 => Intrinsic.Arm64FrintpS, + 0b11 => Intrinsic.Arm64FrintmS, + _ => throw new ArgumentOutOfRangeException(nameof(rm)) + }; + + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst); + } + else if (Optimizations.UseSse41) { EmitScalarUnaryOpSimd32(context, (m) => { @@ -382,12 +446,9 @@ namespace ARMeilleure.Instructions // VRINTA (vector). public static void Vrinta_V(ArmEmitterContext context) { - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) { - EmitVectorUnaryOpSimd32(context, (m) => - { - return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: false); - }); + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS); } else { @@ -398,7 +459,11 @@ namespace ARMeilleure.Instructions // VRINTM (vector). public static void Vrintm_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS); + } + else if (Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { @@ -414,7 +479,11 @@ namespace ARMeilleure.Instructions // VRINTN (vector). public static void Vrintn_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS); + } + else if (Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { @@ -430,7 +499,11 @@ namespace ARMeilleure.Instructions // VRINTP (vector). public static void Vrintp_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS); + } + else if (Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { @@ -448,7 +521,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS); + } + else if (Optimizations.UseSse2) { EmitScalarUnaryOpSimd32(context, (m) => { diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs new file mode 100644 index 000000000..98236be6d --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs @@ -0,0 +1,366 @@ + +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func; + using Func2I = Func; + using Func3I = Func; + + static class InstEmitSimdHelper32Arm64 + { + // Intrinsic Helpers + + public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV) + { + Debug.Assert(input.Type == OperandType.V128); + + int originalSide = originalV & 1; + int targetSide = targetV & 1; + + if (originalSide == targetSide) + { + return input; + } + + Intrinsic vType = Intrinsic.Arm64VDWord | Intrinsic.Arm64V128; + + if (targetSide == 1) + { + return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 0)); // Low to high. + } + else + { + return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 1)); // High to low. + } + } + + public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV) + { + Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128); + + int targetSide = targetV & 1; + Operand idx = Const(targetSide); + + return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, idx, value, idx); + } + + public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth) + { + Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128); + + // Insert from index 0 in value to index in target. + int index = reg & (doubleWidth ? 1 : 3); + + if (doubleWidth) + { + return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, Const(index), value, Const(0)); + } + else + { + return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VWord, target, Const(index), value, Const(0)); + } + } + + public static Operand EmitExtractScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth) + { + int index = reg & (doubleWidth ? 1 : 3); + if (index == 0) return target; // Element is already at index 0, so just return the vector directly. + + if (doubleWidth) + { + return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VDWord, target, Const(1)); // Extract high (index 1). + } + else + { + return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VWord, target, Const(index)); // Extract element at index. + } + } + + // Vector Operand Templates + + public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd); + } + + Operand res = vectorFunc(m); + + if (!op.Q) // Register insert. + { + res = EmitDoubleWordInsert(context, d, res, op.Vd); + } + + context.Copy(d, res); + } + + public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m)); + } + + public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand n = GetVecA32(op.Qn); + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + + if (side == -1) + { + side = op.Vd; + } + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + n = EmitMoveDoubleWordToSide(context, n, op.Vn, side); + m = EmitMoveDoubleWordToSide(context, m, op.Vm, side); + } + + Operand res = vectorFunc(n, m); + + if (!op.Q) // Register insert. + { + if (side != op.Vd) + { + res = EmitMoveDoubleWordToSide(context, res, side, op.Vd); + } + res = EmitDoubleWordInsert(context, d, res, op.Vd); + } + + context.Copy(d, res); + } + + public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m)); + } + + public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand n = GetVecA32(op.Qn); + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + Operand initialD = d; + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd); + m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd); + } + + Operand res = vectorFunc(d, n, m); + + if (!op.Q) // Register insert. + { + res = EmitDoubleWordInsert(context, initialD, res, op.Vd); + } + + context.Copy(initialD, res); + } + + public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + + m = EmitExtractScalar(context, m, op.Vm, doubleSize); + + Operand res = scalarFunc(m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, d, res, op.Vd, doubleSize); + + context.Copy(d, res); + } + + public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m)); + } + + public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vn >> shift); + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + + n = EmitExtractScalar(context, n, op.Vn, doubleSize); + m = EmitExtractScalar(context, m, op.Vm, doubleSize); + + Operand res = scalarFunc(n, m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, d, res, op.Vd, doubleSize); + + context.Copy(d, res); + } + + public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m)); + } + + public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vn >> shift); + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + Operand initialD = d; + + n = EmitExtractScalar(context, n, op.Vn, doubleSize); + m = EmitExtractScalar(context, m, op.Vm, doubleSize); + d = EmitExtractScalar(context, d, op.Vd, doubleSize); + + Operand res = scalarFunc(d, n, m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize); + + context.Copy(initialD, res); + } + + public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitScalarTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m)); + } + + // Pairwise + + public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + inst32 |= Intrinsic.Arm64V64 | Intrinsic.Arm64VFloat; + EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst32, n, m), 0); + } + + public static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool cmpWithZero = (op.Opc & 2) != 0; + + Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS; + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vd >> shift); + Operand m = GetVecA32(op.Vm >> shift); + + n = EmitExtractScalar(context, n, op.Vd, doubleSize); + m = cmpWithZero ? Const(0) : EmitExtractScalar(context, m, op.Vm, doubleSize); + + Operand nzcv = context.AddIntrinsicInt(inst, n, m); + + Operand one = Const(1); + + SetFpFlag(context, FPState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one)); + SetFpFlag(context, FPState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one)); + SetFpFlag(context, FPState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one)); + SetFpFlag(context, FPState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one)); + } + + public static void EmitCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int sizeF = op.Size & 1; + + Intrinsic inst; + if (zero) + { + inst = cond switch + { + CmpCondition.Equal => Intrinsic.Arm64FcmeqVz, + CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtVz, + CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeVz, + CmpCondition.LessThan => Intrinsic.Arm64FcmltVz, + CmpCondition.LessThanOrEqual => Intrinsic.Arm64FcmleVz, + _ => throw new InvalidOperationException() + }; + } + else { + inst = cond switch + { + CmpCondition.Equal => Intrinsic.Arm64FcmeqV, + CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtV, + CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeV, + _ => throw new InvalidOperationException() + }; + } + + inst |= (sizeF != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + + if (zero) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return context.AddIntrinsic(inst, m); + }); + } + else + { + EmitVectorBinaryOpSimd32(context, (n, m) => + { + return context.AddIntrinsic(inst, n, m); + }); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs b/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs new file mode 100644 index 000000000..f0d242ae2 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs @@ -0,0 +1,720 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static class InstEmitSimdHelperArm64 + { + public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitScalarUnaryOpFFromGp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitScalarUnaryOpFToGp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (inst, n) + : context.AddIntrinsicLong(inst, n)); + } + + public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitScalarBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index))); + } + + public static void EmitScalarTernaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + Operand a = GetVec(op.Ra); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, a, n, m)); + } + + public static void EmitScalarTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index))); + } + + public static void EmitScalarUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitScalarBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitScalarBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n)); + } + + public static void EmitScalarTernaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(d, context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitScalarShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift))); + } + + public static void EmitScalarShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + } + + public static void EmitScalarSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand result = context.AddIntrinsic(inst, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand result = context.AddIntrinsic(inst, n, m); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand result = context.AddIntrinsic(inst, d, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits))); + } + + public static void EmitScalarConvertBinaryOpFFromGp(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits))); + } + + public static void EmitScalarConvertBinaryOpFToGp(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (inst, n, Const(fBits)) + : context.AddIntrinsicLong(inst, n, Const(fBits))); + } + + public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitVectorBinaryOpFRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n)); + } + + public static void EmitVectorBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index))); + } + + public static void EmitVectorTernaryOpFRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitVectorTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index))); + } + + public static void EmitVectorUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitVectorBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitVectorBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n)); + } + + public static void EmitVectorBinaryOpByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index))); + } + + public static void EmitVectorTernaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitVectorTernaryOpRdByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index))); + } + + public static void EmitVectorShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift))); + } + + public static void EmitVectorShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + } + + public static void EmitVectorSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, n, m); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, d, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingBinaryOpByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, n, m, Const(op.Index)); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits))); + } + + public static void EmitVectorLookupTable(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp; + + Operand[] operands = new Operand[op.Size + 1]; + + operands[op.Size] = GetVec(op.Rm); + + for (int index = 0; index < op.Size; index++) + { + operands[index] = GetVec((op.Rn + index) & 0x1F); + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, operands)); + } + + public static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false; + + Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS; + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + Operand n = GetVec(op.Rn); + Operand m = cmpWithZero ? Const(0) : GetVec(op.Rm); + + Operand nzcv = context.AddIntrinsicInt(inst, n, m); + + Operand one = Const(1); + + SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one)); + SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one)); + SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one)); + SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one)); + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs index 624ae841d..8ca815801 100644 --- a/ARMeilleure/Instructions/InstEmitSimdLogical.cs +++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs @@ -14,7 +14,11 @@ namespace ARMeilleure.Instructions { public static void And_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions public static void Bic_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -98,12 +106,26 @@ namespace ARMeilleure.Instructions public static void Bif_V(ArmEmitterContext context) { - EmitBifBit(context, notRm: true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV); + } + else + { + EmitBifBit(context, notRm: true); + } } public static void Bit_V(ArmEmitterContext context) { - EmitBifBit(context, notRm: false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV); + } + else + { + EmitBifBit(context, notRm: false); + } } private static void EmitBifBit(ArmEmitterContext context, bool notRm) @@ -167,7 +189,11 @@ namespace ARMeilleure.Instructions public static void Bsl_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -200,7 +226,11 @@ namespace ARMeilleure.Instructions public static void Eor_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -249,7 +279,11 @@ namespace ARMeilleure.Instructions public static void Orn_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -280,7 +314,11 @@ namespace ARMeilleure.Instructions public static void Orr_V(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV); + } + else if (Optimizations.UseSse2) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs index dd686d4dd..c2a04778b 100644 --- a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs @@ -13,7 +13,11 @@ namespace ARMeilleure.Instructions { public static void Vand_I(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64AndV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) { EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m)); } @@ -25,7 +29,11 @@ namespace ARMeilleure.Instructions public static void Vbic_I(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64BicV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) { EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n)); } @@ -73,17 +81,35 @@ namespace ARMeilleure.Instructions public static void Vbif(ArmEmitterContext context) { - EmitBifBit(context, true); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BifV | Intrinsic.Arm64V128, d, n, m)); + } + else + { + EmitBifBit(context, true); + } } public static void Vbit(ArmEmitterContext context) { - EmitBifBit(context, false); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BitV | Intrinsic.Arm64V128, d, n, m)); + } + else + { + EmitBifBit(context, false); + } } public static void Vbsl(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BslV | Intrinsic.Arm64V128, d, n, m)); + } + else if (Optimizations.UseSse2) { EmitVectorTernaryOpSimd32(context, (d, n, m) => { @@ -105,7 +131,11 @@ namespace ARMeilleure.Instructions public static void Veor_I(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64EorV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) { EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m)); } @@ -117,7 +147,11 @@ namespace ARMeilleure.Instructions public static void Vorn_I(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) { Operand mask = context.VectorOne(); @@ -135,7 +169,11 @@ namespace ARMeilleure.Instructions public static void Vorr_I(ArmEmitterContext context) { - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrrV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) { EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m)); } diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs index 7da180fc9..17100eb9c 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -392,7 +392,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; - if (Optimizations.UseSse2) + if (Optimizations.UseAdvSimd) + { + EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Zip1V, Intrinsic.Arm64Zip2V); + } + else if (Optimizations.UseSse2) { EmitVectorShuffleOpSimd32(context, (m, d) => { @@ -461,7 +465,11 @@ namespace ARMeilleure.Instructions { OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Uzp1V, Intrinsic.Arm64Uzp2V); + } + else if (Optimizations.UseSsse3) { EmitVectorShuffleOpSimd32(context, (m, d) => { @@ -559,6 +567,52 @@ namespace ARMeilleure.Instructions } } + private static void EmitVectorZipUzpOpSimd32(ArmEmitterContext context, Intrinsic inst1, Intrinsic inst2) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + bool overlap = op.Qm == op.Qd; + + Operand d = GetVecA32(op.Qd); + Operand m = GetVecA32(op.Qm); + + Operand dPart = d; + Operand mPart = m; + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + dPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, d, op.Vd, 0); + mPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, m, op.Vm, 0); + } + + Intrinsic vSize = op.Q ? Intrinsic.Arm64V128 : Intrinsic.Arm64V64; + + vSize |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand resD = context.AddIntrinsic(inst1 | vSize, dPart, mPart); + Operand resM = context.AddIntrinsic(inst2 | vSize, dPart, mPart); + + if (!op.Q) // Register insert. + { + resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, d, Const(op.Vd & 1), resD, Const(0)); + + if (overlap) + { + resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, resD, Const(op.Vm & 1), resM, Const(0)); + } + else + { + resM = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, m, Const(op.Vm & 1), resM, Const(0)); + } + } + + context.Copy(d, resD); + if (!overlap) + { + context.Copy(m, resM); + } + } + private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func shuffleFunc) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs index cf3b51bd6..19e41119b 100644 --- a/ARMeilleure/Instructions/InstEmitSimdShift.cs +++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs @@ -26,7 +26,15 @@ namespace ARMeilleure.Instructions public static void Rshrn_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64RshrnV, shift); + } + else if (Optimizations.UseSsse3) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; @@ -80,7 +88,14 @@ namespace ARMeilleure.Instructions int shift = GetImmShl(op); - EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64ShlS, shift); + } + else + { + EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + } } public static void Shl_V(ArmEmitterContext context) @@ -90,7 +105,11 @@ namespace ARMeilleure.Instructions int shift = GetImmShl(op); int eSize = 8 << op.Size; - if (shift >= eSize) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64ShlV, shift); + } + else if (shift >= eSize) { if ((op.RegisterSize == RegisterSize.Simd64)) { @@ -143,7 +162,11 @@ namespace ARMeilleure.Instructions int shift = 8 << op.Size; - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ShllV); + } + else if (Optimizations.UseSse41) { Operand n = GetVec(op.Rn); @@ -170,7 +193,15 @@ namespace ARMeilleure.Instructions public static void Shrn_V(ArmEmitterContext context) { - if (Optimizations.UseSsse3) + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64ShrnV, shift); + } + else if (Optimizations.UseSsse3) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; @@ -205,89 +236,259 @@ namespace ARMeilleure.Instructions public static void Sli_S(ArmEmitterContext context) { - EmitSli(context, scalar: true); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SliS, shift); + } + else + { + EmitSli(context, scalar: true); + } } public static void Sli_V(ArmEmitterContext context) { - EmitSli(context, scalar: false); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SliV, shift); + } + else + { + EmitSli(context, scalar: false); + } } public static void Sqrshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating); + } } public static void Sqrshrn_S(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnS, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } } public static void Sqrshrn_V(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnV, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } } public static void Sqrshrun_S(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunS, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } } public static void Sqrshrun_V(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunV, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } } public static void Sqshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating); + } } public static void Sqshrn_S(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnS, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } } public static void Sqshrn_V(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnV, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } } public static void Sqshrun_S(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunS, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } } public static void Sqshrun_V(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunV, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } } public static void Sri_S(ArmEmitterContext context) { - EmitSri(context, scalar: true); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SriS, shift); + } + else + { + EmitSri(context, scalar: true); + } } public static void Sri_V(ArmEmitterContext context) { - EmitSri(context, scalar: false); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SriV, shift); + } + else + { + EmitSri(context, scalar: false); + } } public static void Srshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round); + } } public static void Srshr_S(ArmEmitterContext context) { - EmitScalarShrImmOpSx(context, ShrImmFlags.Round); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SrshrS, shift); + } + else + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round); + } } public static void Srshr_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SrshrV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { int shift = GetImmShr(op); int eSize = 8 << op.Size; @@ -325,14 +526,31 @@ namespace ARMeilleure.Instructions public static void Srsra_S(ArmEmitterContext context) { - EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SrsraS, shift); + } + else + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } } public static void Srsra_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SrsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { int shift = GetImmShr(op); int eSize = 8 << op.Size; @@ -372,12 +590,26 @@ namespace ARMeilleure.Instructions public static void Sshl_S(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SshlS); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed); + } } public static void Sshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Signed); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed); + } } public static void Sshll_V(ArmEmitterContext context) @@ -386,7 +618,11 @@ namespace ARMeilleure.Instructions int shift = GetImmShl(op); - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshllV, shift); + } + else if (Optimizations.UseSse41) { Operand n = GetVec(op.Rn); @@ -416,7 +652,18 @@ namespace ARMeilleure.Instructions public static void Sshr_S(ArmEmitterContext context) { - EmitShrImmOp(context, ShrImmFlags.ScalarSx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SshrS, shift); + } + else + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx); + } } public static void Sshr_V(ArmEmitterContext context) @@ -425,7 +672,11 @@ namespace ARMeilleure.Instructions int shift = GetImmShr(op); - if (Optimizations.UseGfni && op.Size == 0) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshrV, shift); + } + else if (Optimizations.UseGfni && op.Size == 0) { Operand n = GetVec(op.Rn); @@ -478,14 +729,31 @@ namespace ARMeilleure.Instructions public static void Ssra_S(ArmEmitterContext context) { - EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SsraS, shift); + } + else + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate); + } } public static void Ssra_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { int shift = GetImmShr(op); @@ -515,49 +783,131 @@ namespace ARMeilleure.Instructions public static void Uqrshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating); + } } public static void Uqrshrn_S(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnS, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } } public static void Uqrshrn_V(ArmEmitterContext context) { - EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnV, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } } public static void Uqshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Saturating); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Saturating); + } } public static void Uqshrn_S(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnS, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } } public static void Uqshrn_V(ArmEmitterContext context) { - EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnV, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } } public static void Urshl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Round); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Round); + } } public static void Urshr_S(ArmEmitterContext context) { - EmitScalarShrImmOpZx(context, ShrImmFlags.Round); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UrshrS, shift); + } + else + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round); + } } public static void Urshr_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UrshrV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) { int shift = GetImmShr(op); int eSize = 8 << op.Size; @@ -593,14 +943,31 @@ namespace ARMeilleure.Instructions public static void Ursra_S(ArmEmitterContext context) { - EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UrsraS, shift); + } + else + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } } public static void Ursra_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UrsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) { int shift = GetImmShr(op); int eSize = 8 << op.Size; @@ -638,12 +1005,26 @@ namespace ARMeilleure.Instructions public static void Ushl_S(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.Scalar); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64UshlS); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Scalar); + } } public static void Ushl_V(ArmEmitterContext context) { - EmitShlRegOp(context, ShlRegFlags.None); + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.None); + } } public static void Ushll_V(ArmEmitterContext context) @@ -652,7 +1033,11 @@ namespace ARMeilleure.Instructions int shift = GetImmShl(op); - if (Optimizations.UseSse41) + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshllV, shift); + } + else if (Optimizations.UseSse41) { Operand n = GetVec(op.Rn); @@ -682,14 +1067,31 @@ namespace ARMeilleure.Instructions public static void Ushr_S(ArmEmitterContext context) { - EmitShrImmOp(context, ShrImmFlags.ScalarZx); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UshrS, shift); + } + else + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx); + } } public static void Ushr_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshrV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) { int shift = GetImmShr(op); @@ -714,14 +1116,31 @@ namespace ARMeilleure.Instructions public static void Usra_S(ArmEmitterContext context) { - EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate); + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UsraS, shift); + } + else + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate); + } } public static void Usra_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - if (Optimizations.UseSse2 && op.Size > 0) + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) { int shift = GetImmShr(op); diff --git a/ARMeilleure/Instructions/InstEmitSystem.cs b/ARMeilleure/Instructions/InstEmitSystem.cs index cc32228c3..1345bbf10 100644 --- a/ARMeilleure/Instructions/InstEmitSystem.cs +++ b/ARMeilleure/Instructions/InstEmitSystem.cs @@ -150,6 +150,8 @@ namespace ARMeilleure.Instructions { OpCodeSystem op = (OpCodeSystem)context.CurrOp; + context.SyncQcFlag(); + Operand fpsr = Const(0); for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++) @@ -196,6 +198,8 @@ namespace ARMeilleure.Instructions { OpCodeSystem op = (OpCodeSystem)context.CurrOp; + context.ClearQcFlagIfModified(); + Operand fpsr = GetIntOrZR(context, op.Rt); fpsr = context.ConvertI64ToI32(fpsr); diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs index bc1285be2..a665e4b7a 100644 --- a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs +++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs @@ -2,6 +2,8 @@ namespace ARMeilleure.IntermediateRepresentation { enum Intrinsic : ushort { + // X86 (SSE and AVX) + X86Addpd, X86Addps, X86Addsd, @@ -172,6 +174,458 @@ namespace ARMeilleure.IntermediateRepresentation X86Vfnmsub231sd, X86Vfnmsub231ss, X86Xorpd, - X86Xorps + X86Xorps, + + // Arm64 (FP and Advanced SIMD) + + Arm64AbsS, + Arm64AbsV, + Arm64AddhnV, + Arm64AddpS, + Arm64AddpV, + Arm64AddvV, + Arm64AddS, + Arm64AddV, + Arm64AesdV, + Arm64AeseV, + Arm64AesimcV, + Arm64AesmcV, + Arm64AndV, + Arm64BicVi, + Arm64BicV, + Arm64BifV, + Arm64BitV, + Arm64BslV, + Arm64ClsV, + Arm64ClzV, + Arm64CmeqS, + Arm64CmeqV, + Arm64CmeqSz, + Arm64CmeqVz, + Arm64CmgeS, + Arm64CmgeV, + Arm64CmgeSz, + Arm64CmgeVz, + Arm64CmgtS, + Arm64CmgtV, + Arm64CmgtSz, + Arm64CmgtVz, + Arm64CmhiS, + Arm64CmhiV, + Arm64CmhsS, + Arm64CmhsV, + Arm64CmleSz, + Arm64CmleVz, + Arm64CmltSz, + Arm64CmltVz, + Arm64CmtstS, + Arm64CmtstV, + Arm64CntV, + Arm64DupSe, + Arm64DupVe, + Arm64DupGp, + Arm64EorV, + Arm64ExtV, + Arm64FabdS, + Arm64FabdV, + Arm64FabsV, + Arm64FabsS, + Arm64FacgeS, + Arm64FacgeV, + Arm64FacgtS, + Arm64FacgtV, + Arm64FaddpS, + Arm64FaddpV, + Arm64FaddV, + Arm64FaddS, + Arm64FccmpeS, + Arm64FccmpS, + Arm64FcmeqS, + Arm64FcmeqV, + Arm64FcmeqSz, + Arm64FcmeqVz, + Arm64FcmgeS, + Arm64FcmgeV, + Arm64FcmgeSz, + Arm64FcmgeVz, + Arm64FcmgtS, + Arm64FcmgtV, + Arm64FcmgtSz, + Arm64FcmgtVz, + Arm64FcmleSz, + Arm64FcmleVz, + Arm64FcmltSz, + Arm64FcmltVz, + Arm64FcmpeS, + Arm64FcmpS, + Arm64FcselS, + Arm64FcvtasS, + Arm64FcvtasV, + Arm64FcvtasGp, + Arm64FcvtauS, + Arm64FcvtauV, + Arm64FcvtauGp, + Arm64FcvtlV, + Arm64FcvtmsS, + Arm64FcvtmsV, + Arm64FcvtmsGp, + Arm64FcvtmuS, + Arm64FcvtmuV, + Arm64FcvtmuGp, + Arm64FcvtnsS, + Arm64FcvtnsV, + Arm64FcvtnsGp, + Arm64FcvtnuS, + Arm64FcvtnuV, + Arm64FcvtnuGp, + Arm64FcvtnV, + Arm64FcvtpsS, + Arm64FcvtpsV, + Arm64FcvtpsGp, + Arm64FcvtpuS, + Arm64FcvtpuV, + Arm64FcvtpuGp, + Arm64FcvtxnS, + Arm64FcvtxnV, + Arm64FcvtzsSFixed, + Arm64FcvtzsVFixed, + Arm64FcvtzsS, + Arm64FcvtzsV, + Arm64FcvtzsGpFixed, + Arm64FcvtzsGp, + Arm64FcvtzuSFixed, + Arm64FcvtzuVFixed, + Arm64FcvtzuS, + Arm64FcvtzuV, + Arm64FcvtzuGpFixed, + Arm64FcvtzuGp, + Arm64FcvtS, + Arm64FdivV, + Arm64FdivS, + Arm64FmaddS, + Arm64FmaxnmpS, + Arm64FmaxnmpV, + Arm64FmaxnmvV, + Arm64FmaxnmV, + Arm64FmaxnmS, + Arm64FmaxpS, + Arm64FmaxpV, + Arm64FmaxvV, + Arm64FmaxV, + Arm64FmaxS, + Arm64FminnmpS, + Arm64FminnmpV, + Arm64FminnmvV, + Arm64FminnmV, + Arm64FminnmS, + Arm64FminpS, + Arm64FminpV, + Arm64FminvV, + Arm64FminV, + Arm64FminS, + Arm64FmlaSe, + Arm64FmlaVe, + Arm64FmlaV, + Arm64FmlsSe, + Arm64FmlsVe, + Arm64FmlsV, + Arm64FmovVi, + Arm64FmovS, + Arm64FmovGp, + Arm64FmovSi, + Arm64FmsubS, + Arm64FmulxSe, + Arm64FmulxVe, + Arm64FmulxS, + Arm64FmulxV, + Arm64FmulSe, + Arm64FmulVe, + Arm64FmulV, + Arm64FmulS, + Arm64FnegV, + Arm64FnegS, + Arm64FnmaddS, + Arm64FnmsubS, + Arm64FnmulS, + Arm64FrecpeS, + Arm64FrecpeV, + Arm64FrecpsS, + Arm64FrecpsV, + Arm64FrecpxS, + Arm64FrintaV, + Arm64FrintaS, + Arm64FrintiV, + Arm64FrintiS, + Arm64FrintmV, + Arm64FrintmS, + Arm64FrintnV, + Arm64FrintnS, + Arm64FrintpV, + Arm64FrintpS, + Arm64FrintxV, + Arm64FrintxS, + Arm64FrintzV, + Arm64FrintzS, + Arm64FrsqrteS, + Arm64FrsqrteV, + Arm64FrsqrtsS, + Arm64FrsqrtsV, + Arm64FsqrtV, + Arm64FsqrtS, + Arm64FsubV, + Arm64FsubS, + Arm64InsVe, + Arm64InsGp, + Arm64Ld1rV, + Arm64Ld1Vms, + Arm64Ld1Vss, + Arm64Ld2rV, + Arm64Ld2Vms, + Arm64Ld2Vss, + Arm64Ld3rV, + Arm64Ld3Vms, + Arm64Ld3Vss, + Arm64Ld4rV, + Arm64Ld4Vms, + Arm64Ld4Vss, + Arm64MlaVe, + Arm64MlaV, + Arm64MlsVe, + Arm64MlsV, + Arm64MoviV, + Arm64MrsFpsr, + Arm64MsrFpsr, + Arm64MulVe, + Arm64MulV, + Arm64MvniV, + Arm64NegS, + Arm64NegV, + Arm64NotV, + Arm64OrnV, + Arm64OrrVi, + Arm64OrrV, + Arm64PmullV, + Arm64PmulV, + Arm64RaddhnV, + Arm64RbitV, + Arm64Rev16V, + Arm64Rev32V, + Arm64Rev64V, + Arm64RshrnV, + Arm64RsubhnV, + Arm64SabalV, + Arm64SabaV, + Arm64SabdlV, + Arm64SabdV, + Arm64SadalpV, + Arm64SaddlpV, + Arm64SaddlvV, + Arm64SaddlV, + Arm64SaddwV, + Arm64ScvtfSFixed, + Arm64ScvtfVFixed, + Arm64ScvtfS, + Arm64ScvtfV, + Arm64ScvtfGpFixed, + Arm64ScvtfGp, + Arm64Sha1cV, + Arm64Sha1hV, + Arm64Sha1mV, + Arm64Sha1pV, + Arm64Sha1su0V, + Arm64Sha1su1V, + Arm64Sha256h2V, + Arm64Sha256hV, + Arm64Sha256su0V, + Arm64Sha256su1V, + Arm64ShaddV, + Arm64ShllV, + Arm64ShlS, + Arm64ShlV, + Arm64ShrnV, + Arm64ShsubV, + Arm64SliS, + Arm64SliV, + Arm64SmaxpV, + Arm64SmaxvV, + Arm64SmaxV, + Arm64SminpV, + Arm64SminvV, + Arm64SminV, + Arm64SmlalVe, + Arm64SmlalV, + Arm64SmlslVe, + Arm64SmlslV, + Arm64SmovV, + Arm64SmullVe, + Arm64SmullV, + Arm64SqabsS, + Arm64SqabsV, + Arm64SqaddS, + Arm64SqaddV, + Arm64SqdmlalSe, + Arm64SqdmlalVe, + Arm64SqdmlalS, + Arm64SqdmlalV, + Arm64SqdmlslSe, + Arm64SqdmlslVe, + Arm64SqdmlslS, + Arm64SqdmlslV, + Arm64SqdmulhSe, + Arm64SqdmulhVe, + Arm64SqdmulhS, + Arm64SqdmulhV, + Arm64SqdmullSe, + Arm64SqdmullVe, + Arm64SqdmullS, + Arm64SqdmullV, + Arm64SqnegS, + Arm64SqnegV, + Arm64SqrdmulhSe, + Arm64SqrdmulhVe, + Arm64SqrdmulhS, + Arm64SqrdmulhV, + Arm64SqrshlS, + Arm64SqrshlV, + Arm64SqrshrnS, + Arm64SqrshrnV, + Arm64SqrshrunS, + Arm64SqrshrunV, + Arm64SqshluS, + Arm64SqshluV, + Arm64SqshlSi, + Arm64SqshlVi, + Arm64SqshlS, + Arm64SqshlV, + Arm64SqshrnS, + Arm64SqshrnV, + Arm64SqshrunS, + Arm64SqshrunV, + Arm64SqsubS, + Arm64SqsubV, + Arm64SqxtnS, + Arm64SqxtnV, + Arm64SqxtunS, + Arm64SqxtunV, + Arm64SrhaddV, + Arm64SriS, + Arm64SriV, + Arm64SrshlS, + Arm64SrshlV, + Arm64SrshrS, + Arm64SrshrV, + Arm64SrsraS, + Arm64SrsraV, + Arm64SshllV, + Arm64SshlS, + Arm64SshlV, + Arm64SshrS, + Arm64SshrV, + Arm64SsraS, + Arm64SsraV, + Arm64SsublV, + Arm64SsubwV, + Arm64St1Vms, + Arm64St1Vss, + Arm64St2Vms, + Arm64St2Vss, + Arm64St3Vms, + Arm64St3Vss, + Arm64St4Vms, + Arm64St4Vss, + Arm64SubhnV, + Arm64SubS, + Arm64SubV, + Arm64SuqaddS, + Arm64SuqaddV, + Arm64TblV, + Arm64TbxV, + Arm64Trn1V, + Arm64Trn2V, + Arm64UabalV, + Arm64UabaV, + Arm64UabdlV, + Arm64UabdV, + Arm64UadalpV, + Arm64UaddlpV, + Arm64UaddlvV, + Arm64UaddlV, + Arm64UaddwV, + Arm64UcvtfSFixed, + Arm64UcvtfVFixed, + Arm64UcvtfS, + Arm64UcvtfV, + Arm64UcvtfGpFixed, + Arm64UcvtfGp, + Arm64UhaddV, + Arm64UhsubV, + Arm64UmaxpV, + Arm64UmaxvV, + Arm64UmaxV, + Arm64UminpV, + Arm64UminvV, + Arm64UminV, + Arm64UmlalVe, + Arm64UmlalV, + Arm64UmlslVe, + Arm64UmlslV, + Arm64UmovV, + Arm64UmullVe, + Arm64UmullV, + Arm64UqaddS, + Arm64UqaddV, + Arm64UqrshlS, + Arm64UqrshlV, + Arm64UqrshrnS, + Arm64UqrshrnV, + Arm64UqshlSi, + Arm64UqshlVi, + Arm64UqshlS, + Arm64UqshlV, + Arm64UqshrnS, + Arm64UqshrnV, + Arm64UqsubS, + Arm64UqsubV, + Arm64UqxtnS, + Arm64UqxtnV, + Arm64UrecpeV, + Arm64UrhaddV, + Arm64UrshlS, + Arm64UrshlV, + Arm64UrshrS, + Arm64UrshrV, + Arm64UrsqrteV, + Arm64UrsraS, + Arm64UrsraV, + Arm64UshllV, + Arm64UshlS, + Arm64UshlV, + Arm64UshrS, + Arm64UshrV, + Arm64UsqaddS, + Arm64UsqaddV, + Arm64UsraS, + Arm64UsraV, + Arm64UsublV, + Arm64UsubwV, + Arm64Uzp1V, + Arm64Uzp2V, + Arm64XtnV, + Arm64Zip1V, + Arm64Zip2V, + + Arm64VTypeShift = 13, + Arm64VTypeMask = 1 << Arm64VTypeShift, + Arm64V64 = 0 << Arm64VTypeShift, + Arm64V128 = 1 << Arm64VTypeShift, + + Arm64VSizeShift = 14, + Arm64VSizeMask = 3 << Arm64VSizeShift, + Arm64VFloat = 0 << Arm64VSizeShift, + Arm64VDouble = 1 << Arm64VSizeShift, + Arm64VByte = 0 << Arm64VSizeShift, + Arm64VHWord = 1 << Arm64VSizeShift, + Arm64VWord = 2 << Arm64VSizeShift, + Arm64VDWord = 3 << Arm64VSizeShift } } \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/Multiplier.cs b/ARMeilleure/IntermediateRepresentation/Multiplier.cs index 23582072b..d6bc7d994 100644 --- a/ARMeilleure/IntermediateRepresentation/Multiplier.cs +++ b/ARMeilleure/IntermediateRepresentation/Multiplier.cs @@ -5,6 +5,7 @@ namespace ARMeilleure.IntermediateRepresentation x1 = 0, x2 = 1, x4 = 2, - x8 = 3 + x8 = 3, + x16 = 4 } } \ No newline at end of file diff --git a/ARMeilleure/IntermediateRepresentation/Operand.cs b/ARMeilleure/IntermediateRepresentation/Operand.cs index 896d3420c..9e8de3ba4 100644 --- a/ARMeilleure/IntermediateRepresentation/Operand.cs +++ b/ARMeilleure/IntermediateRepresentation/Operand.cs @@ -259,6 +259,20 @@ namespace ARMeilleure.IntermediateRepresentation } } + public Span GetUses(ref Span buffer) + { + ReadOnlySpan uses = Uses; + + if (buffer.Length < uses.Length) + { + buffer = Allocators.Default.AllocateSpan((uint)uses.Length); + } + + uses.CopyTo(buffer); + + return buffer.Slice(0, uses.Length); + } + private static void New(ref T* data, ref ushort count, ref ushort capacity, ushort initialCapacity) where T : unmanaged { count = 0; diff --git a/ARMeilleure/IntermediateRepresentation/OperandType.cs b/ARMeilleure/IntermediateRepresentation/OperandType.cs index bfdf5130c..81b22cf56 100644 --- a/ARMeilleure/IntermediateRepresentation/OperandType.cs +++ b/ARMeilleure/IntermediateRepresentation/OperandType.cs @@ -47,5 +47,19 @@ namespace ARMeilleure.IntermediateRepresentation throw new InvalidOperationException($"Invalid operand type \"{type}\"."); } + + public static int GetSizeInBytesLog2(this OperandType type) + { + switch (type) + { + case OperandType.FP32: return 2; + case OperandType.FP64: return 3; + case OperandType.I32: return 2; + case OperandType.I64: return 3; + case OperandType.V128: return 4; + } + + throw new InvalidOperationException($"Invalid operand type \"{type}\"."); + } } } \ No newline at end of file diff --git a/ARMeilleure/Memory/ReservedRegion.cs b/ARMeilleure/Memory/ReservedRegion.cs index d63491089..2197afad9 100644 --- a/ARMeilleure/Memory/ReservedRegion.cs +++ b/ARMeilleure/Memory/ReservedRegion.cs @@ -4,7 +4,7 @@ namespace ARMeilleure.Memory { class ReservedRegion { - private const int DefaultGranularity = 65536; // Mapping granularity in Windows. + public const int DefaultGranularity = 65536; // Mapping granularity in Windows. public IJitMemoryBlock Block { get; } diff --git a/ARMeilleure/Native/JitSupportDarwin.cs b/ARMeilleure/Native/JitSupportDarwin.cs new file mode 100644 index 000000000..7d6a8634a --- /dev/null +++ b/ARMeilleure/Native/JitSupportDarwin.cs @@ -0,0 +1,13 @@ +using System; +using System.Runtime.InteropServices; +using System.Runtime.Versioning; + +namespace ARMeilleure.Native +{ + [SupportedOSPlatform("macos")] + public static partial class JitSupportDarwin + { + [LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")] + public static partial void Copy(IntPtr dst, IntPtr src, ulong n); + } +} diff --git a/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib b/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib new file mode 100644 index 0000000000000000000000000000000000000000..c65b0a4efb797fdc0280c4edf5557d16142d2347 GIT binary patch literal 33564 zcmeI*UuYaf90%~ZyJ zO&aP&snmZ+Az~CoK}GM37{rHAh>Z$b`chIsQK7{8pb6+hZ5u5{{rzU=uG@pO4+WpT zLuO}xGkd??{p{v(c8;Chj`_Oy9;CdltNY%Ltz9MXJauw0)mD_W z8}*O*delpzhp(cxjVhlv*gt4*PiBWKdvMUoWv%92w&&&?CzJJBQo}SRZl}!Z&C2^e zn{+%uag1`YDd(Kc^CPGIF(DGvUP<+Ss=>nN&zdgNl z=Qkg}^UIA^)4uO-qrMnb&MO4J9kqdTd~Mu9J^p_NkNZcoQOmjf_hAFQ_94-lbF-~< z!(4lZUC$b7*`f*fXQGKJ=b6q6QA6!U9rJy2xt=PIJ9Pb%{>5-N3Irek0SG_<0uX=z z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV= z5P$##AOHafKmY;|fB*y_a1nvC{WFErJ)y$#UZb%5j5zp5*eIS3hl*#yqDWJlAD}tB z;wp+`)W5piUsqh-yM=PZ3upVk{NM^9>ghLAQN#R87#qG-`_+N{ z)&4$D32xNa6>ND9GS8i-dDg1FqUWu1%=4sqHgifpXF**~k3C~Nx^EU#5%Z-=T zR;TX}sKV=<3zJqXE+S2zPsEg7tA!o2Ro~6cJ#O?QBL9wlIy$odhnW{T zpS$(_OO2-@&sIHjc;uuv-t@-Yk2Sm8&4;#~uwOrL$KMBE?&xfPB7611tEVD`>4tb+ z%i`xp$BxWReOTYM?%jr_rBw^he>Q)q{Fgg^=#hIy;-`+RYb|b!ytugdTIP52&n34v O_VfJwTcb0Z+W!MamVHnF literal 0 HcmV?d00001 diff --git a/ARMeilleure/Native/macos_jit_support/Makefile b/ARMeilleure/Native/macos_jit_support/Makefile new file mode 100644 index 000000000..d6da35d52 --- /dev/null +++ b/ARMeilleure/Native/macos_jit_support/Makefile @@ -0,0 +1,8 @@ +NAME = libarmeilleure-jitsupport.dylib + +all: ${NAME} + +${NAME}: + clang -O3 -dynamiclib support.c -o ${NAME} +clean: + rm -f ${NAME} diff --git a/ARMeilleure/Native/macos_jit_support/support.c b/ARMeilleure/Native/macos_jit_support/support.c new file mode 100644 index 000000000..1b13d9066 --- /dev/null +++ b/ARMeilleure/Native/macos_jit_support/support.c @@ -0,0 +1,14 @@ +#include +#include +#include + +#include + +void armeilleure_jit_memcpy(void *dst, const void *src, size_t n) { + pthread_jit_write_protect_np(0); + memcpy(dst, src, n); + pthread_jit_write_protect_np(1); + + // Ensure that the instruction cache for this range is invalidated. + sys_icache_invalidate(dst, n); +} diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs index 97defd9a9..0810d96c9 100644 --- a/ARMeilleure/Optimizations.cs +++ b/ARMeilleure/Optimizations.cs @@ -1,4 +1,5 @@ using ARMeilleure.CodeGen.X86; +using System.Runtime.Intrinsics.Arm; namespace ARMeilleure { @@ -9,6 +10,8 @@ namespace ARMeilleure public static bool AllowLcqInFunctionTable { get; set; } = true; public static bool UseUnmanagedDispatchLoop { get; set; } = true; + public static bool UseAdvSimdIfAvailable { get; set; } = true; + public static bool UseSseIfAvailable { get; set; } = true; public static bool UseSse2IfAvailable { get; set; } = true; public static bool UseSse3IfAvailable { get; set; } = true; @@ -30,6 +33,8 @@ namespace ARMeilleure set => HardwareCapabilities.ForceLegacySse = value; } + internal static bool UseAdvSimd => UseAdvSimdIfAvailable && AdvSimd.IsSupported; + internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse; internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2; internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3; diff --git a/ARMeilleure/Signal/NativeSignalHandler.cs b/ARMeilleure/Signal/NativeSignalHandler.cs index 0257f4403..da02f76a8 100644 --- a/ARMeilleure/Signal/NativeSignalHandler.cs +++ b/ARMeilleure/Signal/NativeSignalHandler.cs @@ -1,5 +1,7 @@ using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Memory; using ARMeilleure.Translation; +using ARMeilleure.Translation.Cache; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -69,8 +71,8 @@ namespace ARMeilleure.Signal private const uint EXCEPTION_ACCESS_VIOLATION = 0xc0000005; - private const ulong PageSize = 0x1000; - private const ulong PageMask = PageSize - 1; + private static ulong _pageSize = GetPageSize(); + private static ulong _pageMask = _pageSize - 1; private static IntPtr _handlerConfig; private static IntPtr _signalHandlerPtr; @@ -79,6 +81,19 @@ namespace ARMeilleure.Signal private static readonly object _lock = new object(); private static bool _initialized; + private static ulong GetPageSize() + { + // TODO: This needs to be based on the current memory manager configuration. + if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) + { + return 1UL << 14; + } + else + { + return 1UL << 12; + } + } + static NativeSignalHandler() { _handlerConfig = Marshal.AllocHGlobal(Unsafe.SizeOf()); @@ -87,7 +102,12 @@ namespace ARMeilleure.Signal config = new SignalHandlerConfig(); } - public static void InitializeSignalHandler() + public static void InitializeJitCache(IJitMemoryAllocator allocator) + { + JitCache.Initialize(allocator); + } + + public static void InitializeSignalHandler(Func customSignalHandlerFactory = null) { if (_initialized) return; @@ -95,10 +115,9 @@ namespace ARMeilleure.Signal { if (_initialized) return; - bool unix = OperatingSystem.IsLinux() || OperatingSystem.IsMacOS(); ref SignalHandlerConfig config = ref GetConfigRef(); - if (unix) + if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS()) { // Unix siginfo struct locations. // NOTE: These are incredibly likely to be different between kernel version and architectures. @@ -108,7 +127,13 @@ namespace ARMeilleure.Signal _signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateUnixSignalHandler(_handlerConfig)); - SigAction old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr); + if (customSignalHandlerFactory != null) + { + _signalHandlerPtr = customSignalHandlerFactory(UnixSignalHandlerRegistration.GetSegfaultExceptionHandler().sa_handler, _signalHandlerPtr); + } + + var old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr); + config.UnixOldSigaction = (nuint)(ulong)old.sa_handler; config.UnixOldSigaction3Arg = old.sa_flags & 4; } @@ -119,6 +144,11 @@ namespace ARMeilleure.Signal _signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateWindowsSignalHandler(_handlerConfig)); + if (customSignalHandlerFactory != null) + { + _signalHandlerPtr = customSignalHandlerFactory(IntPtr.Zero, _signalHandlerPtr); + } + _signalHandlerHandle = WindowsSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr); } @@ -197,7 +227,7 @@ namespace ARMeilleure.Signal // Only call tracking if in range. context.BranchIfFalse(nextLabel, inRange, BasicBlockFrequency.Cold); - Operand offset = context.BitwiseAnd(context.Subtract(faultAddress, rangeAddress), Const(~PageMask)); + Operand offset = context.BitwiseAnd(context.Subtract(faultAddress, rangeAddress), Const(~_pageMask)); // Call the tracking action, with the pointer's relative offset to the base address. Operand trackingActionPtr = context.Load(OperandType.I64, Const((ulong)signalStructPtr + rangeBaseOffset + 20)); @@ -208,7 +238,7 @@ namespace ARMeilleure.Signal // Tracking action should be non-null to call it, otherwise assume false return. context.BranchIfFalse(skipActionLabel, trackingActionPtr); - Operand result = context.Call(trackingActionPtr, OperandType.I32, offset, Const(PageSize), isWrite, Const(0)); + Operand result = context.Call(trackingActionPtr, OperandType.I32, offset, Const(_pageSize), isWrite, Const(0)); context.Copy(inRegionLocal, result); context.MarkLabel(skipActionLabel); @@ -278,7 +308,7 @@ namespace ARMeilleure.Signal OperandType[] argTypes = new OperandType[] { OperandType.I32, OperandType.I64, OperandType.I64 }; - return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq).Map(); + return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); } private static VectoredExceptionHandler GenerateWindowsSignalHandler(IntPtr signalStructPtr) @@ -332,7 +362,7 @@ namespace ARMeilleure.Signal OperandType[] argTypes = new OperandType[] { OperandType.I64 }; - return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map(); + return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); } } } diff --git a/ARMeilleure/Signal/TestMethods.cs b/ARMeilleure/Signal/TestMethods.cs index 2d7cef166..e2ecad242 100644 --- a/ARMeilleure/Signal/TestMethods.cs +++ b/ARMeilleure/Signal/TestMethods.cs @@ -1,7 +1,7 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Translation; using System; - +using System.Runtime.InteropServices; using static ARMeilleure.IntermediateRepresentation.Operand.Factory; namespace ARMeilleure.Signal @@ -32,7 +32,7 @@ namespace ARMeilleure.Signal OperandType[] argTypes = new OperandType[] { OperandType.I64 }; - return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map(); + return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); } public static DebugThreadLocalMapGetOrReserve GenerateDebugThreadLocalMapGetOrReserve(IntPtr structPtr) @@ -49,7 +49,7 @@ namespace ARMeilleure.Signal OperandType[] argTypes = new OperandType[] { OperandType.I64 }; - return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map(); + return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); } public static DebugNativeWriteLoop GenerateDebugNativeWriteLoop() @@ -78,7 +78,7 @@ namespace ARMeilleure.Signal OperandType[] argTypes = new OperandType[] { OperandType.I64 }; - return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq).Map(); + return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); } } } diff --git a/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs b/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs index 945a01dae..22009240b 100644 --- a/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs +++ b/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs @@ -3,23 +3,23 @@ using System.Runtime.InteropServices; namespace ARMeilleure.Signal { - [StructLayout(LayoutKind.Sequential, Pack = 1)] - unsafe struct SigSet - { - fixed long sa_mask[16]; - } - - [StructLayout(LayoutKind.Sequential, Pack = 1)] - struct SigAction - { - public IntPtr sa_handler; - public SigSet sa_mask; - public int sa_flags; - public IntPtr sa_restorer; - } - static partial class UnixSignalHandlerRegistration { + [StructLayout(LayoutKind.Sequential, Pack = 1)] + public unsafe struct SigSet + { + fixed long sa_mask[16]; + } + + [StructLayout(LayoutKind.Sequential, Pack = 1)] + public struct SigAction + { + public IntPtr sa_handler; + public SigSet sa_mask; + public int sa_flags; + public IntPtr sa_restorer; + } + private const int SIGSEGV = 11; private const int SIGBUS = 10; private const int SA_SIGINFO = 0x00000004; @@ -27,9 +27,24 @@ namespace ARMeilleure.Signal [LibraryImport("libc", SetLastError = true)] private static partial int sigaction(int signum, ref SigAction sigAction, out SigAction oldAction); + [LibraryImport("libc", SetLastError = true)] + private static partial int sigaction(int signum, IntPtr sigAction, out SigAction oldAction); + [LibraryImport("libc", SetLastError = true)] private static partial int sigemptyset(ref SigSet set); + public static SigAction GetSegfaultExceptionHandler() + { + int result = sigaction(SIGSEGV, IntPtr.Zero, out SigAction old); + + if (result != 0) + { + throw new InvalidOperationException($"Could not get SIGSEGV sigaction. Error: {result}"); + } + + return old; + } + public static SigAction RegisterExceptionHandler(IntPtr action) { SigAction sig = new SigAction @@ -49,7 +64,7 @@ namespace ARMeilleure.Signal if (OperatingSystem.IsMacOS()) { - result = sigaction(SIGBUS, ref sig, out SigAction oldb); + result = sigaction(SIGBUS, ref sig, out _); if (result != 0) { diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs index 48254de4e..238f85082 100644 --- a/ARMeilleure/Translation/ArmEmitterContext.cs +++ b/ARMeilleure/Translation/ArmEmitterContext.cs @@ -39,6 +39,8 @@ namespace ARMeilleure.Translation } } + private bool _pendingQcFlagSync; + public OpCode CurrOp { get; set; } public IMemoryManager Memory { get; } @@ -81,6 +83,8 @@ namespace ARMeilleure.Translation public override Operand Call(MethodInfo info, params Operand[] callArgs) { + SyncQcFlag(); + if (!HasPtc) { return base.Call(info, callArgs); @@ -139,6 +143,51 @@ namespace ARMeilleure.Translation _optOpLastFlagSet = null; } + public void SetPendingQcFlagSync() + { + _pendingQcFlagSync = true; + } + + public void SyncQcFlag() + { + if (_pendingQcFlagSync) + { + if (Optimizations.UseAdvSimd) + { + Operand fpsr = AddIntrinsicInt(Intrinsic.Arm64MrsFpsr); + + uint qcFlagMask = (uint)FPSR.Qc; + + Operand qcClearLabel = Label(); + + BranchIfFalse(qcClearLabel, BitwiseAnd(fpsr, Const(qcFlagMask))); + + AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0)); + InstEmitHelper.SetFpFlag(this, FPState.QcFlag, Const(1)); + + MarkLabel(qcClearLabel); + } + + _pendingQcFlagSync = false; + } + } + + public void ClearQcFlag() + { + if (Optimizations.UseAdvSimd) + { + AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0)); + } + } + + public void ClearQcFlagIfModified() + { + if (_pendingQcFlagSync && Optimizations.UseAdvSimd) + { + AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0)); + } + } + public Operand TryGetComparisonResult(Condition condition) { if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet) diff --git a/ARMeilleure/Translation/Cache/JitCache.cs b/ARMeilleure/Translation/Cache/JitCache.cs index 24affa34e..f496a8e9c 100644 --- a/ARMeilleure/Translation/Cache/JitCache.cs +++ b/ARMeilleure/Translation/Cache/JitCache.cs @@ -1,6 +1,7 @@ using ARMeilleure.CodeGen; using ARMeilleure.CodeGen.Unwinding; using ARMeilleure.Memory; +using ARMeilleure.Native; using System; using System.Collections.Generic; using System.Diagnostics; @@ -17,6 +18,7 @@ namespace ARMeilleure.Translation.Cache private const int CacheSize = 2047 * 1024 * 1024; private static ReservedRegion _jitRegion; + private static JitCacheInvalidation _jitCacheInvalidator; private static CacheMemoryAllocator _cacheAllocator; @@ -25,8 +27,6 @@ namespace ARMeilleure.Translation.Cache private static readonly object _lock = new object(); private static bool _initialized; - public static IntPtr Base => _jitRegion.Pointer; - public static void Initialize(IJitMemoryAllocator allocator) { if (_initialized) return; @@ -36,6 +36,7 @@ namespace ARMeilleure.Translation.Cache if (_initialized) return; _jitRegion = new ReservedRegion(allocator, CacheSize); + _jitCacheInvalidator = new JitCacheInvalidation(allocator); _cacheAllocator = new CacheMemoryAllocator(CacheSize); @@ -60,11 +61,24 @@ namespace ARMeilleure.Translation.Cache IntPtr funcPtr = _jitRegion.Pointer + funcOffset; - ReprotectAsWritable(funcOffset, code.Length); + if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) + { + unsafe + { + fixed (byte *codePtr = code) + { + JitSupportDarwin.Copy(funcPtr, (IntPtr)codePtr, (ulong)code.Length); + } + } + } + else + { + ReprotectAsWritable(funcOffset, code.Length); + Marshal.Copy(code, 0, funcPtr, code.Length); + ReprotectAsExecutable(funcOffset, code.Length); - Marshal.Copy(code, 0, funcPtr, code.Length); - - ReprotectAsExecutable(funcOffset, code.Length); + _jitCacheInvalidator.Invalidate(funcPtr, (ulong)code.Length); + } Add(funcOffset, code.Length, func.UnwindInfo); diff --git a/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs b/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs new file mode 100644 index 000000000..ec2ae73bb --- /dev/null +++ b/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs @@ -0,0 +1,79 @@ +using ARMeilleure.Memory; +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Translation.Cache +{ + class JitCacheInvalidation + { + private static int[] _invalidationCode = new int[] + { + unchecked((int)0xd53b0022), // mrs x2, ctr_el0 + unchecked((int)0xd3504c44), // ubfx x4, x2, #16, #4 + unchecked((int)0x52800083), // mov w3, #0x4 + unchecked((int)0x12000c45), // and w5, w2, #0xf + unchecked((int)0x1ac42064), // lsl w4, w3, w4 + unchecked((int)0x51000482), // sub w2, w4, #0x1 + unchecked((int)0x8a220002), // bic x2, x0, x2 + unchecked((int)0x1ac52063), // lsl w3, w3, w5 + unchecked((int)0xeb01005f), // cmp x2, x1 + unchecked((int)0x93407c84), // sxtw x4, w4 + unchecked((int)0x540000a2), // b.cs 3c + unchecked((int)0xd50b7b22), // dc cvau, x2 + unchecked((int)0x8b040042), // add x2, x2, x4 + unchecked((int)0xeb02003f), // cmp x1, x2 + unchecked((int)0x54ffffa8), // b.hi 2c + unchecked((int)0xd5033b9f), // dsb ish + unchecked((int)0x51000462), // sub w2, w3, #0x1 + unchecked((int)0x93407c63), // sxtw x3, w3 + unchecked((int)0x8a220000), // bic x0, x0, x2 + unchecked((int)0xeb00003f), // cmp x1, x0 + unchecked((int)0x540000a9), // b.ls 64 + unchecked((int)0xd50b7520), // ic ivau, x0 + unchecked((int)0x8b030000), // add x0, x0, x3 + unchecked((int)0xeb00003f), // cmp x1, x0 + unchecked((int)0x54ffffa8), // b.hi 54 + unchecked((int)0xd5033b9f), // dsb ish + unchecked((int)0xd5033fdf), // isb + unchecked((int)0xd65f03c0), // ret + }; + + private delegate void InvalidateCache(ulong start, ulong end); + + private InvalidateCache _invalidateCache; + private ReservedRegion _invalidateCacheCodeRegion; + + private readonly bool _needsInvalidation; + + public JitCacheInvalidation(IJitMemoryAllocator allocator) + { + // On macOS, a different path is used to write to the JIT cache, which does the invalidation. + if (!OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) + { + ulong size = (ulong)_invalidationCode.Length * sizeof(int); + ulong mask = (ulong)ReservedRegion.DefaultGranularity - 1; + + size = (size + mask) & ~mask; + + _invalidateCacheCodeRegion = new ReservedRegion(allocator, size); + _invalidateCacheCodeRegion.ExpandIfNeeded(size); + + Marshal.Copy(_invalidationCode, 0, _invalidateCacheCodeRegion.Pointer, _invalidationCode.Length); + + _invalidateCacheCodeRegion.Block.MapAsRx(0, size); + + _invalidateCache = Marshal.GetDelegateForFunctionPointer(_invalidateCacheCodeRegion.Pointer); + + _needsInvalidation = true; + } + } + + public void Invalidate(IntPtr basePointer, ulong size) + { + if (_needsInvalidation) + { + _invalidateCache((ulong)basePointer, (ulong)basePointer + size); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Translation/Compiler.cs b/ARMeilleure/Translation/Compiler.cs index 817bd487e..d4aa5cd96 100644 --- a/ARMeilleure/Translation/Compiler.cs +++ b/ARMeilleure/Translation/Compiler.cs @@ -1,8 +1,9 @@ using ARMeilleure.CodeGen; using ARMeilleure.CodeGen.Optimizations; -using ARMeilleure.CodeGen.X86; using ARMeilleure.Diagnostics; using ARMeilleure.IntermediateRepresentation; +using System; +using System.Runtime.InteropServices; namespace ARMeilleure.Translation { @@ -12,7 +13,8 @@ namespace ARMeilleure.Translation ControlFlowGraph cfg, OperandType[] argTypes, OperandType retType, - CompilerOptions options) + CompilerOptions options, + Architecture target) { CompilerContext cctx = new(cfg, argTypes, retType, options); @@ -49,7 +51,18 @@ namespace ARMeilleure.Translation Logger.EndPass(PassName.RegisterToLocal, cfg); } - return CodeGenerator.Generate(cctx); + if (target == Architecture.X64) + { + return CodeGen.X86.CodeGenerator.Generate(cctx); + } + else if (target == Architecture.Arm64) + { + return CodeGen.Arm64.CodeGenerator.Generate(cctx); + } + else + { + throw new NotImplementedException(target.ToString()); + } } } } \ No newline at end of file diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index f99d6e516..6f57e1883 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 4159; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 4114; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index 77ccdaeab..75c4df23e 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -14,6 +14,7 @@ using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; +using System.Runtime.InteropServices; using System.Threading; using static ARMeilleure.IntermediateRepresentation.Operand.Factory; @@ -282,7 +283,7 @@ namespace ARMeilleure.Translation options |= CompilerOptions.Relocatable; } - CompiledFunction compiledFunc = Compiler.Compile(cfg, argTypes, retType, options); + CompiledFunction compiledFunc = Compiler.Compile(cfg, argTypes, retType, options, RuntimeInformation.ProcessArchitecture); if (context.HasPtc && !singleStep) { @@ -359,9 +360,14 @@ namespace ARMeilleure.Translation } } - if (block.Address == context.EntryAddress && !context.HighCq) + if (block.Address == context.EntryAddress) { - EmitRejitCheck(context, out counter); + if (!context.HighCq) + { + EmitRejitCheck(context, out counter); + } + + context.ClearQcFlag(); } context.CurrBlock = block; @@ -386,9 +392,14 @@ namespace ARMeilleure.Translation bool isLastOp = opcIndex == block.OpCodes.Count - 1; - if (isLastOp && block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address) + if (isLastOp) { - EmitSynchronization(context); + context.SyncQcFlag(); + + if (block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address) + { + EmitSynchronization(context); + } } Operand lblPredicateSkip = default; diff --git a/ARMeilleure/Translation/TranslatorStubs.cs b/ARMeilleure/Translation/TranslatorStubs.cs index 67d2bba8e..6ed84de80 100644 --- a/ARMeilleure/Translation/TranslatorStubs.cs +++ b/ARMeilleure/Translation/TranslatorStubs.cs @@ -171,7 +171,7 @@ namespace ARMeilleure.Translation var retType = OperandType.I64; var argTypes = new[] { OperandType.I64 }; - var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map(); + var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); return Marshal.GetFunctionPointerForDelegate(func); } @@ -197,7 +197,7 @@ namespace ARMeilleure.Translation var retType = OperandType.I64; var argTypes = new[] { OperandType.I64 }; - var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map(); + var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); return Marshal.GetFunctionPointerForDelegate(func); } @@ -235,7 +235,7 @@ namespace ARMeilleure.Translation var retType = OperandType.None; var argTypes = new[] { OperandType.I64, OperandType.I64 }; - return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map(); + return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); } } } diff --git a/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs b/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs index 27bb09ccb..0cf35c17b 100644 --- a/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs +++ b/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs @@ -6,6 +6,6 @@ namespace Ryujinx.Cpu.Jit public class JitMemoryAllocator : IJitMemoryAllocator { public IJitMemoryBlock Allocate(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.None); - public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve); + public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve | MemoryAllocationFlags.Jit); } } diff --git a/Ryujinx.Memory/MemoryAllocationFlags.cs b/Ryujinx.Memory/MemoryAllocationFlags.cs index 313f33e5f..6f0ef1aa9 100644 --- a/Ryujinx.Memory/MemoryAllocationFlags.cs +++ b/Ryujinx.Memory/MemoryAllocationFlags.cs @@ -35,6 +35,18 @@ namespace Ryujinx.Memory /// Indicates that the memory block should support mapping views of a mirrorable memory block. /// The block that is to have their views mapped should be created with the flag. /// - ViewCompatible = 1 << 3 + ViewCompatible = 1 << 3, + + /// + /// If used with the flag, indicates that the memory block will only be used as + /// backing storage and will never be accessed directly, so the memory for the block will not be mapped. + /// + NoMap = 1 << 4, + + /// + /// Indicates that the memory will be used to store JIT generated code. + /// On some platforms, this requires special flags to be passed that will allow the memory to be executable. + /// + Jit = 1 << 5 } } diff --git a/Ryujinx.Memory/MemoryBlock.cs b/Ryujinx.Memory/MemoryBlock.cs index 6b9d852de..e1f19c27a 100644 --- a/Ryujinx.Memory/MemoryBlock.cs +++ b/Ryujinx.Memory/MemoryBlock.cs @@ -1,6 +1,6 @@ using System; -using System.Collections.Concurrent; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Threading; namespace Ryujinx.Memory @@ -13,10 +13,9 @@ namespace Ryujinx.Memory private readonly bool _usesSharedMemory; private readonly bool _isMirror; private readonly bool _viewCompatible; + private readonly bool _forJit; private IntPtr _sharedMemory; private IntPtr _pointer; - private ConcurrentDictionary _viewStorages; - private int _viewCount; /// /// Pointer to the memory block data. @@ -40,24 +39,27 @@ namespace Ryujinx.Memory if (flags.HasFlag(MemoryAllocationFlags.Mirrorable)) { _sharedMemory = MemoryManagement.CreateSharedMemory(size, flags.HasFlag(MemoryAllocationFlags.Reserve)); - _pointer = MemoryManagement.MapSharedMemory(_sharedMemory, size); + + if (!flags.HasFlag(MemoryAllocationFlags.NoMap)) + { + _pointer = MemoryManagement.MapSharedMemory(_sharedMemory, size); + } + _usesSharedMemory = true; } else if (flags.HasFlag(MemoryAllocationFlags.Reserve)) { _viewCompatible = flags.HasFlag(MemoryAllocationFlags.ViewCompatible); - _pointer = MemoryManagement.Reserve(size, _viewCompatible); + _forJit = flags.HasFlag(MemoryAllocationFlags.Jit); + _pointer = MemoryManagement.Reserve(size, _forJit, _viewCompatible); } else { - _pointer = MemoryManagement.Allocate(size); + _forJit = flags.HasFlag(MemoryAllocationFlags.Jit); + _pointer = MemoryManagement.Allocate(size, _forJit); } Size = size; - - _viewStorages = new ConcurrentDictionary(); - _viewStorages.TryAdd(this, 0); - _viewCount = 1; } /// @@ -104,7 +106,7 @@ namespace Ryujinx.Memory /// Throw when either or are out of range public bool Commit(ulong offset, ulong size) { - return MemoryManagement.Commit(GetPointerInternal(offset, size), size); + return MemoryManagement.Commit(GetPointerInternal(offset, size), size, _forJit); } /// @@ -138,11 +140,6 @@ namespace Ryujinx.Memory throw new ArgumentException("The source memory block is not mirrorable, and thus cannot be mapped on the current block."); } - if (_viewStorages.TryAdd(srcBlock, 0)) - { - srcBlock.IncrementViewCount(); - } - MemoryManagement.MapView(srcBlock._sharedMemory, srcOffset, GetPointerInternal(dstOffset, size), size, this); } @@ -403,33 +400,16 @@ namespace Ryujinx.Memory { MemoryManagement.Free(ptr, Size); } - - foreach (MemoryBlock viewStorage in _viewStorages.Keys) - { - viewStorage.DecrementViewCount(); - } - - _viewStorages.Clear(); } - } - /// - /// Increments the number of views that uses this memory block as storage. - /// - private void IncrementViewCount() - { - Interlocked.Increment(ref _viewCount); - } - - /// - /// Decrements the number of views that uses this memory block as storage. - /// - private void DecrementViewCount() - { - if (Interlocked.Decrement(ref _viewCount) == 0 && _sharedMemory != IntPtr.Zero && !_isMirror) + if (!_isMirror) { - MemoryManagement.DestroySharedMemory(_sharedMemory); - _sharedMemory = IntPtr.Zero; + IntPtr sharedMemory = Interlocked.Exchange(ref _sharedMemory, IntPtr.Zero); + + if (sharedMemory != IntPtr.Zero) + { + MemoryManagement.DestroySharedMemory(sharedMemory); + } } } @@ -453,6 +433,16 @@ namespace Ryujinx.Memory return true; } + public static ulong GetPageSize() + { + if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) + { + return 1UL << 14; + } + + return 1UL << 12; + } + private static void ThrowInvalidMemoryRegionException() => throw new InvalidMemoryRegionException(); } } diff --git a/Ryujinx.Memory/MemoryManagement.cs b/Ryujinx.Memory/MemoryManagement.cs index 7c042eba3..c4b5ac4c9 100644 --- a/Ryujinx.Memory/MemoryManagement.cs +++ b/Ryujinx.Memory/MemoryManagement.cs @@ -4,7 +4,7 @@ namespace Ryujinx.Memory { public static class MemoryManagement { - public static IntPtr Allocate(ulong size) + public static IntPtr Allocate(ulong size, bool forJit) { if (OperatingSystem.IsWindows()) { @@ -12,7 +12,7 @@ namespace Ryujinx.Memory } else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS()) { - return MemoryManagementUnix.Allocate(size); + return MemoryManagementUnix.Allocate(size, forJit); } else { @@ -20,7 +20,7 @@ namespace Ryujinx.Memory } } - public static IntPtr Reserve(ulong size, bool viewCompatible) + public static IntPtr Reserve(ulong size, bool forJit, bool viewCompatible) { if (OperatingSystem.IsWindows()) { @@ -28,7 +28,7 @@ namespace Ryujinx.Memory } else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS()) { - return MemoryManagementUnix.Reserve(size); + return MemoryManagementUnix.Reserve(size, forJit); } else { @@ -36,7 +36,7 @@ namespace Ryujinx.Memory } } - public static bool Commit(IntPtr address, ulong size) + public static bool Commit(IntPtr address, ulong size, bool forJit) { if (OperatingSystem.IsWindows()) { @@ -44,7 +44,7 @@ namespace Ryujinx.Memory } else if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS()) { - return MemoryManagementUnix.Commit(address, size); + return MemoryManagementUnix.Commit(address, size, forJit); } else { diff --git a/Ryujinx.Memory/MemoryManagementUnix.cs b/Ryujinx.Memory/MemoryManagementUnix.cs index df3fcea91..affcff92b 100644 --- a/Ryujinx.Memory/MemoryManagementUnix.cs +++ b/Ryujinx.Memory/MemoryManagementUnix.cs @@ -13,17 +13,17 @@ namespace Ryujinx.Memory { private static readonly ConcurrentDictionary _allocations = new ConcurrentDictionary(); - public static IntPtr Allocate(ulong size) + public static IntPtr Allocate(ulong size, bool forJit) { - return AllocateInternal(size, MmapProts.PROT_READ | MmapProts.PROT_WRITE); + return AllocateInternal(size, MmapProts.PROT_READ | MmapProts.PROT_WRITE, forJit); } - public static IntPtr Reserve(ulong size) + public static IntPtr Reserve(ulong size, bool forJit) { - return AllocateInternal(size, MmapProts.PROT_NONE); + return AllocateInternal(size, MmapProts.PROT_NONE, forJit); } - private static IntPtr AllocateInternal(ulong size, MmapProts prot, bool shared = false) + private static IntPtr AllocateInternal(ulong size, MmapProts prot, bool forJit, bool shared = false) { MmapFlags flags = MmapFlags.MAP_ANONYMOUS; @@ -41,6 +41,16 @@ namespace Ryujinx.Memory flags |= MmapFlags.MAP_NORESERVE; } + if (OperatingSystem.IsMacOSVersionAtLeast(10, 14) && forJit) + { + flags |= MmapFlags.MAP_JIT_DARWIN; + + if (prot == (MmapProts.PROT_READ | MmapProts.PROT_WRITE)) + { + prot |= MmapProts.PROT_EXEC; + } + } + IntPtr ptr = mmap(IntPtr.Zero, size, prot, flags, -1, 0); if (ptr == new IntPtr(-1L)) @@ -57,9 +67,16 @@ namespace Ryujinx.Memory return ptr; } - public static bool Commit(IntPtr address, ulong size) + public static bool Commit(IntPtr address, ulong size, bool forJit) { - return mprotect(address, size, MmapProts.PROT_READ | MmapProts.PROT_WRITE) == 0; + MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE; + + if (OperatingSystem.IsMacOSVersionAtLeast(10, 14) && forJit) + { + prot |= MmapProts.PROT_EXEC; + } + + return mprotect(address, size, prot) == 0; } public static bool Decommit(IntPtr address, ulong size) diff --git a/Ryujinx.Memory/MemoryManagerUnixHelper.cs b/Ryujinx.Memory/MemoryManagerUnixHelper.cs index 87a81a79b..204f1ca4d 100644 --- a/Ryujinx.Memory/MemoryManagerUnixHelper.cs +++ b/Ryujinx.Memory/MemoryManagerUnixHelper.cs @@ -22,7 +22,8 @@ namespace Ryujinx.Memory MAP_ANONYMOUS = 4, MAP_NORESERVE = 8, MAP_FIXED = 16, - MAP_UNLOCKED = 32 + MAP_UNLOCKED = 32, + MAP_JIT_DARWIN = 0x800 } [Flags] @@ -45,7 +46,6 @@ namespace Ryujinx.Memory private const int MAP_UNLOCKED_LINUX_GENERIC = 0x80000; private const int MAP_NORESERVE_DARWIN = 0x40; - private const int MAP_JIT_DARWIN = 0x800; private const int MAP_ANONYMOUS_DARWIN = 0x1000; public const int MADV_DONTNEED = 4; @@ -151,10 +151,9 @@ namespace Ryujinx.Memory } } - if (OperatingSystem.IsMacOSVersionAtLeast(10, 14)) + if (flags.HasFlag(MmapFlags.MAP_JIT_DARWIN) && OperatingSystem.IsMacOSVersionAtLeast(10, 14)) { - // Only to be used with the Hardened Runtime. - // result |= MAP_JIT_DARWIN; + result |= (int)MmapFlags.MAP_JIT_DARWIN; } return result;