diff --git a/ARMeilleure/ARMeilleure.csproj b/ARMeilleure/ARMeilleure.csproj
new file mode 100644
index 000000000..9268dcbee
--- /dev/null
+++ b/ARMeilleure/ARMeilleure.csproj
@@ -0,0 +1,20 @@
+
+
+
+ netcoreapp2.1
+ win-x64;osx-x64;linux-x64
+
+
+
+ true
+
+
+
+ true
+
+
+
+
+
+
+
diff --git a/ARMeilleure/CodeGen/CompiledFunction.cs b/ARMeilleure/CodeGen/CompiledFunction.cs
new file mode 100644
index 000000000..61e89c240
--- /dev/null
+++ b/ARMeilleure/CodeGen/CompiledFunction.cs
@@ -0,0 +1,17 @@
+using ARMeilleure.CodeGen.Unwinding;
+
+namespace ARMeilleure.CodeGen
+{
+ struct CompiledFunction
+ {
+ public byte[] Code { get; }
+
+ public UnwindInfo UnwindInfo { get; }
+
+ public CompiledFunction(byte[] code, UnwindInfo unwindInfo)
+ {
+ Code = code;
+ UnwindInfo = unwindInfo;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
new file mode 100644
index 000000000..84eedee0e
--- /dev/null
+++ b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
@@ -0,0 +1,258 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class ConstantFolding
+ {
+ public static void RunPass(Operation operation)
+ {
+ if (operation.Destination == null || operation.SourcesCount == 0)
+ {
+ return;
+ }
+
+ if (!AreAllSourcesConstant(operation))
+ {
+ return;
+ }
+
+ OperandType type = operation.Destination.Type;
+
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x + y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x + y);
+ }
+ break;
+
+ case Instruction.BitwiseAnd:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x & y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x & y);
+ }
+ break;
+
+ case Instruction.BitwiseExclusiveOr:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x ^ y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x ^ y);
+ }
+ break;
+
+ case Instruction.BitwiseNot:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => ~x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => ~x);
+ }
+ break;
+
+ case Instruction.BitwiseOr:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x | y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x | y);
+ }
+ break;
+
+ case Instruction.Copy:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => x);
+ }
+ break;
+
+ case Instruction.Divide:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => y != 0 ? x / y : 0);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => y != 0 ? x / y : 0);
+ }
+ break;
+
+ case Instruction.DivideUI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => y != 0 ? (int)((uint)x / (uint)y) : 0);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => y != 0 ? (long)((ulong)x / (ulong)y) : 0);
+ }
+ break;
+
+ case Instruction.Multiply:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x * y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x * y);
+ }
+ break;
+
+ case Instruction.Negate:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => -x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => -x);
+ }
+ break;
+
+ case Instruction.ShiftLeft:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x << y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x << (int)y);
+ }
+ break;
+
+ case Instruction.ShiftRightSI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x >> y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x >> (int)y);
+ }
+ break;
+
+ case Instruction.ShiftRightUI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => (int)((uint)x >> y));
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => (long)((ulong)x >> (int)y));
+ }
+ break;
+
+ case Instruction.SignExtend16:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (short)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (short)x);
+ }
+ break;
+
+ case Instruction.SignExtend32:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (int)x);
+ }
+ break;
+
+ case Instruction.SignExtend8:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (sbyte)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (sbyte)x);
+ }
+ break;
+
+ case Instruction.Subtract:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x - y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x - y);
+ }
+ break;
+ }
+ }
+
+ private static bool AreAllSourcesConstant(Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ if (operation.GetSource(index).Kind != OperandKind.Constant)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static void EvaluateUnaryI32(Operation operation, Func op)
+ {
+ int x = operation.GetSource(0).AsInt32();
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateUnaryI64(Operation operation, Func op)
+ {
+ long x = operation.GetSource(0).AsInt64();
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateBinaryI32(Operation operation, Func op)
+ {
+ int x = operation.GetSource(0).AsInt32();
+ int y = operation.GetSource(1).AsInt32();
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+
+ private static void EvaluateBinaryI64(Operation operation, Func op)
+ {
+ long x = operation.GetSource(0).AsInt64();
+ long y = operation.GetSource(1).AsInt64();
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
new file mode 100644
index 000000000..c01a8f1e7
--- /dev/null
+++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
@@ -0,0 +1,126 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class Optimizer
+ {
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ LinkedListNode node = block.Operations.First;
+
+ while (node != null)
+ {
+ LinkedListNode nextNode = node.Next;
+
+ bool isUnused = IsUnused(node.Value);
+
+ if (!(node.Value is Operation operation) || isUnused)
+ {
+ if (isUnused)
+ {
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+
+ node = nextNode;
+
+ continue;
+ }
+
+ ConstantFolding.RunPass(operation);
+
+ Simplification.RunPass(operation);
+
+ if (DestIsLocalVar(operation) && IsPropagableCopy(operation))
+ {
+ PropagateCopy(operation);
+
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+
+ node = nextNode;
+ }
+ }
+ }
+ while (modified);
+ }
+
+ private static void PropagateCopy(Operation copyOp)
+ {
+ // Propagate copy source operand to all uses of the destination operand.
+ Operand dest = copyOp.Destination;
+ Operand source = copyOp.GetSource(0);
+
+ Node[] uses = dest.Uses.ToArray();
+
+ foreach (Node use in uses)
+ {
+ for (int index = 0; index < use.SourcesCount; index++)
+ {
+ if (use.GetSource(index) == dest)
+ {
+ use.SetSource(index, source);
+ }
+ }
+ }
+ }
+
+ private static void RemoveNode(BasicBlock block, LinkedListNode llNode)
+ {
+ // Remove a node from the nodes list, and also remove itself
+ // from all the use lists on the operands that this node uses.
+ block.Operations.Remove(llNode);
+
+ Node node = llNode.Value;
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, null);
+ }
+
+ Debug.Assert(node.Destination == null || node.Destination.Uses.Count == 0);
+
+ node.Destination = null;
+ }
+
+ private static bool IsUnused(Node node)
+ {
+ return DestIsLocalVar(node) && node.Destination.Uses.Count == 0 && !HasSideEffects(node);
+ }
+
+ private static bool DestIsLocalVar(Node node)
+ {
+ return node.Destination != null && node.Destination.Kind == OperandKind.LocalVariable;
+ }
+
+ private static bool HasSideEffects(Node node)
+ {
+ return (node is Operation operation) && operation.Instruction == Instruction.Call;
+ }
+
+ private static bool IsPropagableCopy(Operation operation)
+ {
+ if (operation.Instruction != Instruction.Copy)
+ {
+ return false;
+ }
+
+ return operation.Destination.Type == operation.GetSource(0).Type;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Optimizations/Simplification.cs b/ARMeilleure/CodeGen/Optimizations/Simplification.cs
new file mode 100644
index 000000000..cafc025ca
--- /dev/null
+++ b/ARMeilleure/CodeGen/Optimizations/Simplification.cs
@@ -0,0 +1,157 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class Simplification
+ {
+ public static void RunPass(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseExclusiveOr:
+ TryEliminateBinaryOpComutative(operation, 0);
+ break;
+
+ case Instruction.BitwiseAnd:
+ TryEliminateBitwiseAnd(operation);
+ break;
+
+ case Instruction.BitwiseOr:
+ TryEliminateBitwiseOr(operation);
+ break;
+
+ case Instruction.ConditionalSelect:
+ TryEliminateConditionalSelect(operation);
+ break;
+
+ case Instruction.Divide:
+ TryEliminateBinaryOpY(operation, 1);
+ break;
+
+ case Instruction.Multiply:
+ TryEliminateBinaryOpComutative(operation, 1);
+ break;
+
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.Subtract:
+ TryEliminateBinaryOpY(operation, 0);
+ break;
+ }
+ }
+
+ private static void TryEliminateBitwiseAnd(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
+ // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, AllOnes(x.Type)))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, AllOnes(y.Type)))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, 0) || IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(Const(0));
+ }
+ }
+
+ private static void TryEliminateBitwiseOr(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x | 0x00000000 == x, 0x00000000 | y == y,
+ // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, 0))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, AllOnes(x.Type)) || IsConstEqual(y, AllOnes(y.Type)))
+ {
+ operation.TurnIntoCopy(Const(AllOnes(x.Type)));
+ }
+ }
+
+ private static void TryEliminateBinaryOpY(Operation operation, ulong comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateBinaryOpComutative(Operation operation, ulong comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, comparand))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateConditionalSelect(Operation operation)
+ {
+ Operand cond = operation.GetSource(0);
+
+ if (cond.Kind != OperandKind.Constant)
+ {
+ return;
+ }
+
+ // The condition is constant, we can turn it into a copy, and select
+ // the source based on the condition value.
+ int srcIndex = cond.Value != 0 ? 1 : 2;
+
+ Operand source = operation.GetSource(srcIndex);
+
+ operation.TurnIntoCopy(source);
+ }
+
+ private static bool IsConstEqual(Operand operand, ulong comparand)
+ {
+ if (operand.Kind != OperandKind.Constant || !operand.Type.IsInteger())
+ {
+ return false;
+ }
+
+ return operand.Value == comparand;
+ }
+
+ private static ulong AllOnes(OperandType type)
+ {
+ switch (type)
+ {
+ case OperandType.I32: return ~0U;
+ case OperandType.I64: return ~0UL;
+ }
+
+ throw new ArgumentException("Invalid operand type \"" + type + "\".");
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
new file mode 100644
index 000000000..94ac6991b
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ struct AllocationResult
+ {
+ public int IntUsedRegisters { get; }
+ public int VecUsedRegisters { get; }
+ public int SpillRegionSize { get; }
+
+ public AllocationResult(
+ int intUsedRegisters,
+ int vecUsedRegisters,
+ int spillRegionSize)
+ {
+ IntUsedRegisters = intUsedRegisters;
+ VecUsedRegisters = vecUsedRegisters;
+ SpillRegionSize = spillRegionSize;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
new file mode 100644
index 000000000..65901e80c
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
@@ -0,0 +1,246 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class CopyResolver
+ {
+ private class ParallelCopy
+ {
+ private struct Copy
+ {
+ public Register Dest { get; }
+ public Register Source { get; }
+
+ public OperandType Type { get; }
+
+ public Copy(Register dest, Register source, OperandType type)
+ {
+ Dest = dest;
+ Source = source;
+ Type = type;
+ }
+ }
+
+ private List _copies;
+
+ public int Count => _copies.Count;
+
+ public ParallelCopy()
+ {
+ _copies = new List();
+ }
+
+ public void AddCopy(Register dest, Register source, OperandType type)
+ {
+ _copies.Add(new Copy(dest, source, type));
+ }
+
+ public void Sequence(List sequence)
+ {
+ Dictionary locations = new Dictionary();
+ Dictionary sources = new Dictionary();
+
+ Dictionary types = new Dictionary();
+
+ Queue pendingQueue = new Queue();
+ Queue readyQueue = new Queue();
+
+ foreach (Copy copy in _copies)
+ {
+ locations[copy.Source] = copy.Source;
+ sources[copy.Dest] = copy.Source;
+ types[copy.Dest] = copy.Type;
+
+ pendingQueue.Enqueue(copy.Dest);
+ }
+
+ foreach (Copy copy in _copies)
+ {
+ // If the destination is not used anywhere, we can assign it immediately.
+ if (!locations.ContainsKey(copy.Dest))
+ {
+ readyQueue.Enqueue(copy.Dest);
+ }
+ }
+
+ while (pendingQueue.TryDequeue(out Register current))
+ {
+ Register copyDest;
+ Register origSource;
+ Register copySource;
+
+ while (readyQueue.TryDequeue(out copyDest))
+ {
+ origSource = sources[copyDest];
+ copySource = locations[origSource];
+
+ OperandType type = types[copyDest];
+
+ EmitCopy(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+ locations[origSource] = copyDest;
+
+ if (origSource == copySource && sources.ContainsKey(origSource))
+ {
+ readyQueue.Enqueue(origSource);
+ }
+ }
+
+ copyDest = current;
+ origSource = sources[copyDest];
+ copySource = locations[origSource];
+
+ if (copyDest != copySource)
+ {
+ OperandType type = types[copyDest];
+
+ type = type.IsInteger() ? OperandType.I64 : OperandType.V128;
+
+ EmitXorSwap(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+ locations[origSource] = copyDest;
+
+ Register swapOther = copySource;
+
+ if (copyDest != locations[sources[copySource]])
+ {
+ // Find the other swap destination register.
+ // To do that, we search all the pending registers, and pick
+ // the one where the copy source register is equal to the
+ // current destination register being processed (copyDest).
+ foreach (Register pending in pendingQueue)
+ {
+ // Is this a copy of pending <- copyDest?
+ if (copyDest == locations[sources[pending]])
+ {
+ swapOther = pending;
+
+ break;
+ }
+ }
+ }
+
+ // The value that was previously at "copyDest" now lives on
+ // "copySource" thanks to the swap, now we need to update the
+ // location for the next copy that is supposed to copy the value
+ // that used to live on "copyDest".
+ locations[sources[swapOther]] = copySource;
+ }
+ }
+ }
+
+ private static void EmitCopy(List sequence, Operand x, Operand y)
+ {
+ sequence.Add(new Operation(Instruction.Copy, x, y));
+ }
+
+ private static void EmitXorSwap(List sequence, Operand x, Operand y)
+ {
+ sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+ sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, y, y, x));
+ sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+ }
+ }
+
+ private Queue _fillQueue = new Queue();
+ private Queue _spillQueue = new Queue();
+
+ private ParallelCopy _parallelCopy;
+
+ public bool HasCopy { get; private set; }
+
+ public CopyResolver()
+ {
+ _fillQueue = new Queue();
+ _spillQueue = new Queue();
+
+ _parallelCopy = new ParallelCopy();
+ }
+
+ public void AddSplit(LiveInterval left, LiveInterval right)
+ {
+ if (left.Local != right.Local)
+ {
+ throw new ArgumentException("Intervals of different variables are not allowed.");
+ }
+
+ OperandType type = left.Local.Type;
+
+ if (left.IsSpilled && !right.IsSpilled)
+ {
+ // Move from the stack to a register.
+ AddSplitFill(left, right, type);
+ }
+ else if (!left.IsSpilled && right.IsSpilled)
+ {
+ // Move from a register to the stack.
+ AddSplitSpill(left, right, type);
+ }
+ else if (!left.IsSpilled && !right.IsSpilled && left.Register != right.Register)
+ {
+ // Move from one register to another.
+ AddSplitCopy(left, right, type);
+ }
+ else if (left.SpillOffset != right.SpillOffset)
+ {
+ // This would be the stack-to-stack move case, but this is not supported.
+ throw new ArgumentException("Both intervals were spilled.");
+ }
+ }
+
+ private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ Operand register = GetRegister(right.Register, type);
+
+ Operand offset = new Operand(left.SpillOffset);
+
+ _fillQueue.Enqueue(new Operation(Instruction.Fill, register, offset));
+
+ HasCopy = true;
+ }
+
+ private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ Operand offset = new Operand(right.SpillOffset);
+
+ Operand register = GetRegister(left.Register, type);
+
+ _spillQueue.Enqueue(new Operation(Instruction.Spill, null, offset, register));
+
+ HasCopy = true;
+ }
+
+ private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ _parallelCopy.AddCopy(right.Register, left.Register, type);
+
+ HasCopy = true;
+ }
+
+ public Operation[] Sequence()
+ {
+ List sequence = new List();
+
+ while (_spillQueue.TryDequeue(out Operation spillOp))
+ {
+ sequence.Add(spillOp);
+ }
+
+ _parallelCopy.Sequence(sequence);
+
+ while (_fillQueue.TryDequeue(out Operation fillOp))
+ {
+ sequence.Add(fillOp);
+ }
+
+ return sequence.ToArray();
+ }
+
+ private static Operand GetRegister(Register reg, OperandType type)
+ {
+ return new Operand(reg.Index, reg.Type, type);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
new file mode 100644
index 000000000..9a827420b
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
@@ -0,0 +1,382 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class HybridAllocator : IRegisterAllocator
+ {
+ private const int RegistersCount = 16;
+ private const int MaxIROperands = 4;
+
+ private struct BlockInfo
+ {
+ public bool HasCall { get; }
+
+ public int IntFixedRegisters { get; }
+ public int VecFixedRegisters { get; }
+
+ public BlockInfo(bool hasCall, int intFixedRegisters, int vecFixedRegisters)
+ {
+ HasCall = hasCall;
+ IntFixedRegisters = intFixedRegisters;
+ VecFixedRegisters = vecFixedRegisters;
+ }
+ }
+
+ private class LocalInfo
+ {
+ public int Uses { get; set; }
+ public int UseCount { get; set; }
+
+ public bool PreAllocated { get; set; }
+ public int Register { get; set; }
+ public int SpillOffset { get; set; }
+
+ public int Sequence { get; set; }
+
+ public Operand Temp { get; set; }
+
+ public OperandType Type { get; }
+
+ private int _first;
+ private int _last;
+
+ public bool IsBlockLocal => _first == _last;
+
+ public LocalInfo(OperandType type, int uses)
+ {
+ Uses = uses;
+ Type = type;
+
+ _first = -1;
+ _last = -1;
+ }
+
+ public void SetBlockIndex(int blkIndex)
+ {
+ if (_first == -1 || blkIndex < _first)
+ {
+ _first = blkIndex;
+ }
+
+ if (_last == -1 || blkIndex > _last)
+ {
+ _last = blkIndex;
+ }
+ }
+ }
+
+ public AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks)
+ {
+ int intUsedRegisters = 0;
+ int vecUsedRegisters = 0;
+
+ int intFreeRegisters = regMasks.IntAvailableRegisters;
+ int vecFreeRegisters = regMasks.VecAvailableRegisters;
+
+ BlockInfo[] blockInfo = new BlockInfo[cfg.Blocks.Count];
+
+ List locInfo = new List();
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ int intFixedRegisters = 0;
+ int vecFixedRegisters = 0;
+
+ bool hasCall = false;
+
+ foreach (Node node in block.Operations)
+ {
+ if (node is Operation operation && operation.Instruction == Instruction.Call)
+ {
+ hasCall = true;
+ }
+
+ for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ Operand source = node.GetSource(srcIndex);
+
+ if (source.Kind == OperandKind.LocalVariable)
+ {
+ locInfo[source.AsInt32() - 1].SetBlockIndex(block.Index);
+ }
+ }
+
+ for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++)
+ {
+ Operand dest = node.GetDestination(dstIndex);
+
+ if (dest.Kind == OperandKind.LocalVariable)
+ {
+ LocalInfo info;
+
+ if (dest.Value != 0)
+ {
+ info = locInfo[dest.AsInt32() - 1];
+ }
+ else
+ {
+ dest.NumberLocal(locInfo.Count + 1);
+
+ info = new LocalInfo(dest.Type, UsesCount(dest));
+
+ locInfo.Add(info);
+ }
+
+ info.SetBlockIndex(block.Index);
+ }
+ else if (dest.Kind == OperandKind.Register)
+ {
+ if (dest.Type.IsInteger())
+ {
+ intFixedRegisters |= 1 << dest.GetRegister().Index;
+ }
+ else
+ {
+ vecFixedRegisters |= 1 << dest.GetRegister().Index;
+ }
+ }
+ }
+ }
+
+ blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters);
+ }
+
+ int sequence = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ BlockInfo blkInfo = blockInfo[block.Index];
+
+ int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters;
+ int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters;
+
+ int intCallerSavedRegisters = blkInfo.HasCall ? regMasks.IntCallerSavedRegisters : 0;
+ int vecCallerSavedRegisters = blkInfo.HasCall ? regMasks.VecCallerSavedRegisters : 0;
+
+ int intSpillTempRegisters = SelectSpillTemps(
+ intCallerSavedRegisters & ~blkInfo.IntFixedRegisters,
+ intLocalFreeRegisters);
+ int vecSpillTempRegisters = SelectSpillTemps(
+ vecCallerSavedRegisters & ~blkInfo.VecFixedRegisters,
+ vecLocalFreeRegisters);
+
+ intLocalFreeRegisters &= ~(intSpillTempRegisters | intCallerSavedRegisters);
+ vecLocalFreeRegisters &= ~(vecSpillTempRegisters | vecCallerSavedRegisters);
+
+ for (LinkedListNode llNode = block.Operations.First; llNode != null; llNode = llNode.Next)
+ {
+ Node node = llNode.Value;
+
+ int intLocalUse = 0;
+ int vecLocalUse = 0;
+
+ for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ Operand source = node.GetSource(srcIndex);
+
+ if (source.Kind != OperandKind.LocalVariable)
+ {
+ continue;
+ }
+
+ LocalInfo info = locInfo[source.AsInt32() - 1];
+
+ info.UseCount++;
+
+ Debug.Assert(info.UseCount <= info.Uses);
+
+ if (info.Register != -1)
+ {
+ node.SetSource(srcIndex, Register(info.Register, source.Type.ToRegisterType(), source.Type));
+
+ if (info.UseCount == info.Uses && !info.PreAllocated)
+ {
+ if (source.Type.IsInteger())
+ {
+ intLocalFreeRegisters |= 1 << info.Register;
+ }
+ else
+ {
+ vecLocalFreeRegisters |= 1 << info.Register;
+ }
+ }
+ }
+ else
+ {
+ Operand temp = info.Temp;
+
+ if (temp == null || info.Sequence != sequence)
+ {
+ temp = source.Type.IsInteger()
+ ? GetSpillTemp(source, intSpillTempRegisters, ref intLocalUse)
+ : GetSpillTemp(source, vecSpillTempRegisters, ref vecLocalUse);
+
+ info.Sequence = sequence;
+ info.Temp = temp;
+ }
+
+ node.SetSource(srcIndex, temp);
+
+ Operation fillOp = new Operation(Instruction.Fill, temp, Const(info.SpillOffset));
+
+ block.Operations.AddBefore(llNode, fillOp);
+ }
+ }
+
+ int intLocalAsg = 0;
+ int vecLocalAsg = 0;
+
+ for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++)
+ {
+ Operand dest = node.GetDestination(dstIndex);
+
+ if (dest.Kind != OperandKind.LocalVariable)
+ {
+ continue;
+ }
+
+ LocalInfo info = locInfo[dest.AsInt32() - 1];
+
+ if (info.UseCount == 0 && !info.PreAllocated)
+ {
+ int mask = dest.Type.IsInteger()
+ ? intLocalFreeRegisters
+ : vecLocalFreeRegisters;
+
+ if (info.IsBlockLocal && mask != 0)
+ {
+ int selectedReg = BitUtils.LowestBitSet(mask);
+
+ info.Register = selectedReg;
+
+ if (dest.Type.IsInteger())
+ {
+ intLocalFreeRegisters &= ~(1 << selectedReg);
+ intUsedRegisters |= 1 << selectedReg;
+ }
+ else
+ {
+ vecLocalFreeRegisters &= ~(1 << selectedReg);
+ vecUsedRegisters |= 1 << selectedReg;
+ }
+ }
+ else
+ {
+ info.Register = -1;
+ info.SpillOffset = stackAlloc.Allocate(dest.Type.GetSizeInBytes());
+ }
+ }
+
+ info.UseCount++;
+
+ Debug.Assert(info.UseCount <= info.Uses);
+
+ if (info.Register != -1)
+ {
+ node.SetDestination(dstIndex, Register(info.Register, dest.Type.ToRegisterType(), dest.Type));
+ }
+ else
+ {
+ Operand temp = info.Temp;
+
+ if (temp == null || info.Sequence != sequence)
+ {
+ temp = dest.Type.IsInteger()
+ ? GetSpillTemp(dest, intSpillTempRegisters, ref intLocalAsg)
+ : GetSpillTemp(dest, vecSpillTempRegisters, ref vecLocalAsg);
+
+ info.Sequence = sequence;
+ info.Temp = temp;
+ }
+
+ node.SetDestination(dstIndex, temp);
+
+ Operation spillOp = new Operation(Instruction.Spill, null, Const(info.SpillOffset), temp);
+
+ llNode = block.Operations.AddAfter(llNode, spillOp);
+ }
+ }
+
+ sequence++;
+
+ intUsedRegisters |= intLocalAsg | intLocalUse;
+ vecUsedRegisters |= vecLocalAsg | vecLocalUse;
+ }
+ }
+
+ return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize);
+ }
+
+ private static int SelectSpillTemps(int mask0, int mask1)
+ {
+ int selection = 0;
+ int count = 0;
+
+ while (count < MaxIROperands && mask0 != 0)
+ {
+ int mask = mask0 & -mask0;
+
+ selection |= mask;
+
+ mask0 &= ~mask;
+
+ count++;
+ }
+
+ while (count < MaxIROperands && mask1 != 0)
+ {
+ int mask = mask1 & -mask1;
+
+ selection |= mask;
+
+ mask1 &= ~mask;
+
+ count++;
+ }
+
+ Debug.Assert(count == MaxIROperands, "No enough registers for spill temps.");
+
+ return selection;
+ }
+
+ private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask)
+ {
+ int selectedReg = BitUtils.LowestBitSet(freeMask & ~useMask);
+
+ useMask |= 1 << selectedReg;
+
+ return Register(selectedReg, local.Type.ToRegisterType(), local.Type);
+ }
+
+ private static int UsesCount(Operand local)
+ {
+ return local.Assignments.Count + local.Uses.Count;
+ }
+
+ private static IEnumerable Successors(BasicBlock block)
+ {
+ if (block.Next != null)
+ {
+ yield return block.Next;
+ }
+
+ if (block.Branch != null)
+ {
+ yield return block.Branch;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
new file mode 100644
index 000000000..8f236c253
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
@@ -0,0 +1,12 @@
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ interface IRegisterAllocator
+ {
+ AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks);
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
new file mode 100644
index 000000000..6d5ecc141
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
@@ -0,0 +1,1019 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ // Based on:
+ // "Linear Scan Register Allocation for the Java(tm) HotSpot Client Compiler".
+ // http://www.christianwimmer.at/Publications/Wimmer04a/Wimmer04a.pdf
+ class LinearScanAllocator : IRegisterAllocator
+ {
+ private const int InstructionGap = 2;
+ private const int InstructionGapMask = InstructionGap - 1;
+
+ private const int RegistersCount = 16;
+
+ private HashSet _blockEdges;
+
+ private LiveRange[] _blockRanges;
+
+ private BitMap[] _blockLiveIn;
+
+ private List _intervals;
+
+ private LiveInterval[] _parentIntervals;
+
+ private List> _operationNodes;
+
+ private int _operationsCount;
+
+ private class AllocationContext
+ {
+ public RegisterMasks Masks { get; }
+
+ public StackAllocator StackAlloc { get; }
+
+ public BitMap Active { get; }
+ public BitMap Inactive { get; }
+
+ public int IntUsedRegisters { get; set; }
+ public int VecUsedRegisters { get; set; }
+
+ public AllocationContext(StackAllocator stackAlloc, RegisterMasks masks, int intervalsCount)
+ {
+ StackAlloc = stackAlloc;
+ Masks = masks;
+
+ Active = new BitMap(intervalsCount);
+ Inactive = new BitMap(intervalsCount);
+ }
+
+ public void MoveActiveToInactive(int bit)
+ {
+ Move(Active, Inactive, bit);
+ }
+
+ public void MoveInactiveToActive(int bit)
+ {
+ Move(Inactive, Active, bit);
+ }
+
+ private static void Move(BitMap source, BitMap dest, int bit)
+ {
+ source.Clear(bit);
+
+ dest.Set(bit);
+ }
+ }
+
+ public AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks)
+ {
+ NumberLocals(cfg);
+
+ AllocationContext context = new AllocationContext(stackAlloc, regMasks, _intervals.Count);
+
+ BuildIntervals(cfg, context);
+
+ for (int index = 0; index < _intervals.Count; index++)
+ {
+ LiveInterval current = _intervals[index];
+
+ if (current.IsEmpty)
+ {
+ continue;
+ }
+
+ if (current.IsFixed)
+ {
+ context.Active.Set(index);
+
+ if (current.Register.Type == RegisterType.Integer)
+ {
+ context.IntUsedRegisters |= 1 << current.Register.Index;
+ }
+ else /* if (interval.Register.Type == RegisterType.Vector) */
+ {
+ context.VecUsedRegisters |= 1 << current.Register.Index;
+ }
+
+ continue;
+ }
+
+ AllocateInterval(context, current, index);
+ }
+
+ for (int index = RegistersCount * 2; index < _intervals.Count; index++)
+ {
+ if (!_intervals[index].IsSpilled)
+ {
+ ReplaceLocalWithRegister(_intervals[index]);
+ }
+ }
+
+ InsertSplitCopies();
+ InsertSplitCopiesAtEdges(cfg);
+
+ return new AllocationResult(
+ context.IntUsedRegisters,
+ context.VecUsedRegisters,
+ context.StackAlloc.TotalSize);
+ }
+
+ private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex)
+ {
+ // Check active intervals that already ended.
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.GetEnd() < current.GetStart())
+ {
+ context.Active.Clear(iIndex);
+ }
+ else if (!interval.Overlaps(current.GetStart()))
+ {
+ context.MoveActiveToInactive(iIndex);
+ }
+ }
+
+ // Check inactive intervals that already ended or were reactivated.
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.GetEnd() < current.GetStart())
+ {
+ context.Inactive.Clear(iIndex);
+ }
+ else if (interval.Overlaps(current.GetStart()))
+ {
+ context.MoveInactiveToActive(iIndex);
+ }
+ }
+
+ if (!TryAllocateRegWithoutSpill(context, current, cIndex))
+ {
+ AllocateRegWithSpill(context, current, cIndex);
+ }
+ }
+
+ private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex)
+ {
+ RegisterType regType = current.Local.Type.ToRegisterType();
+
+ int availableRegisters = context.Masks.GetAvailableRegisters(regType);
+
+ int[] freePositions = new int[RegistersCount];
+
+ for (int index = 0; index < RegistersCount; index++)
+ {
+ if ((availableRegisters & (1 << index)) != 0)
+ {
+ freePositions[index] = int.MaxValue;
+ }
+ }
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.Register.Type == regType)
+ {
+ freePositions[interval.Register.Index] = 0;
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.Register.Type == regType)
+ {
+ int overlapPosition = interval.GetOverlapPosition(current);
+
+ if (overlapPosition != LiveInterval.NotFound && freePositions[interval.Register.Index] > overlapPosition)
+ {
+ freePositions[interval.Register.Index] = overlapPosition;
+ }
+ }
+ }
+
+ int selectedReg = GetHighestValueIndex(freePositions);
+
+ int selectedNextUse = freePositions[selectedReg];
+
+ // Intervals starts and ends at odd positions, unless they span an entire
+ // block, in this case they will have ranges at a even position.
+ // When a interval is loaded from the stack to a register, we can only
+ // do the split at a odd position, because otherwise the split interval
+ // that is inserted on the list to be processed may clobber a register
+ // used by the instruction at the same position as the split.
+ // The problem only happens when a interval ends exactly at this instruction,
+ // because otherwise they would interfere, and the register wouldn't be selected.
+ // When the interval is aligned and the above happens, there's no problem as
+ // the instruction that is actually with the last use is the one
+ // before that position.
+ selectedNextUse &= ~InstructionGapMask;
+
+ if (selectedNextUse <= current.GetStart())
+ {
+ return false;
+ }
+ else if (selectedNextUse < current.GetEnd())
+ {
+ Debug.Assert(selectedNextUse > current.GetStart(), "Trying to split interval at the start.");
+
+ LiveInterval splitChild = current.Split(selectedNextUse);
+
+ if (splitChild.UsesCount != 0)
+ {
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+ }
+
+ current.Register = new Register(selectedReg, regType);
+
+ if (regType == RegisterType.Integer)
+ {
+ context.IntUsedRegisters |= 1 << selectedReg;
+ }
+ else /* if (regType == RegisterType.Vector) */
+ {
+ context.VecUsedRegisters |= 1 << selectedReg;
+ }
+
+ context.Active.Set(cIndex);
+
+ return true;
+ }
+
+ private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex)
+ {
+ RegisterType regType = current.Local.Type.ToRegisterType();
+
+ int availableRegisters = context.Masks.GetAvailableRegisters(regType);
+
+ int[] usePositions = new int[RegistersCount];
+ int[] blockedPositions = new int[RegistersCount];
+
+ for (int index = 0; index < RegistersCount; index++)
+ {
+ if ((availableRegisters & (1 << index)) != 0)
+ {
+ usePositions[index] = int.MaxValue;
+
+ blockedPositions[index] = int.MaxValue;
+ }
+ }
+
+ void SetUsePosition(int index, int position)
+ {
+ usePositions[index] = Math.Min(usePositions[index], position);
+ }
+
+ void SetBlockedPosition(int index, int position)
+ {
+ blockedPositions[index] = Math.Min(blockedPositions[index], position);
+
+ SetUsePosition(index, position);
+ }
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register.Type == regType)
+ {
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ if (nextUse != -1)
+ {
+ SetUsePosition(interval.Register.Index, nextUse);
+ }
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register.Type == regType && interval.Overlaps(current))
+ {
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ if (nextUse != -1)
+ {
+ SetUsePosition(interval.Register.Index, nextUse);
+ }
+ }
+ }
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.IsFixed && interval.Register.Type == regType)
+ {
+ SetBlockedPosition(interval.Register.Index, 0);
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (interval.IsFixed && interval.Register.Type == regType)
+ {
+ int overlapPosition = interval.GetOverlapPosition(current);
+
+ if (overlapPosition != LiveInterval.NotFound)
+ {
+ SetBlockedPosition(interval.Register.Index, overlapPosition);
+ }
+ }
+ }
+
+ int selectedReg = GetHighestValueIndex(usePositions);
+
+ int currentFirstUse = current.FirstUse();
+
+ Debug.Assert(currentFirstUse >= 0, "Current interval has no uses.");
+
+ if (usePositions[selectedReg] < currentFirstUse)
+ {
+ // All intervals on inactive and active are being used before current,
+ // so spill the current interval.
+ Debug.Assert(currentFirstUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+ LiveInterval splitChild = current.Split(currentFirstUse);
+
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild);
+
+ Spill(context, current);
+ }
+ else if (blockedPositions[selectedReg] > current.GetEnd())
+ {
+ // Spill made the register available for the entire current lifetime,
+ // so we only need to split the intervals using the selected register.
+ current.Register = new Register(selectedReg, regType);
+
+ SplitAndSpillOverlappingIntervals(context, current);
+
+ context.Active.Set(cIndex);
+ }
+ else
+ {
+ // There are conflicts even after spill due to the use of fixed registers
+ // that can't be spilled, so we need to also split current at the point of
+ // the first fixed register use.
+ current.Register = new Register(selectedReg, regType);
+
+ int splitPosition = blockedPositions[selectedReg] & ~InstructionGapMask;
+
+ Debug.Assert(splitPosition > current.GetStart(), "Trying to split a interval at a invalid position.");
+
+ LiveInterval splitChild = current.Split(splitPosition);
+
+ if (splitChild.UsesCount != 0)
+ {
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+
+ SplitAndSpillOverlappingIntervals(context, current);
+
+ context.Active.Set(cIndex);
+ }
+ }
+
+ private static int GetHighestValueIndex(int[] array)
+ {
+ int higuest = array[0];
+
+ if (higuest == int.MaxValue)
+ {
+ return 0;
+ }
+
+ int selected = 0;
+
+ for (int index = 1; index < array.Length; index++)
+ {
+ int current = array[index];
+
+ if (higuest < current)
+ {
+ higuest = current;
+ selected = index;
+
+ if (current == int.MaxValue)
+ {
+ break;
+ }
+ }
+ }
+
+ return selected;
+ }
+
+ private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current)
+ {
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register == current.Register)
+ {
+ SplitAndSpillOverlappingInterval(context, current, interval);
+
+ context.Active.Clear(iIndex);
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current))
+ {
+ SplitAndSpillOverlappingInterval(context, current, interval);
+
+ context.Inactive.Clear(iIndex);
+ }
+ }
+ }
+
+ private void SplitAndSpillOverlappingInterval(
+ AllocationContext context,
+ LiveInterval current,
+ LiveInterval interval)
+ {
+ // If there's a next use after the start of the current interval,
+ // we need to split the spilled interval twice, and re-insert it
+ // on the "pending" list to ensure that it will get a new register
+ // on that use position.
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ LiveInterval splitChild;
+
+ if (interval.GetStart() < current.GetStart())
+ {
+ splitChild = interval.Split(current.GetStart());
+ }
+ else
+ {
+ splitChild = interval;
+ }
+
+ if (nextUse != -1)
+ {
+ Debug.Assert(nextUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+ if (nextUse > splitChild.GetStart())
+ {
+ LiveInterval right = splitChild.Split(nextUse);
+
+ Spill(context, splitChild);
+
+ splitChild = right;
+ }
+
+ InsertInterval(splitChild);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+ }
+
+ private void InsertInterval(LiveInterval interval)
+ {
+ Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses.");
+ Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval.");
+ Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval.");
+
+ int startIndex = RegistersCount * 2;
+
+ int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null);
+
+ if (insertIndex < 0)
+ {
+ insertIndex = ~insertIndex;
+ }
+
+ _intervals.Insert(insertIndex, interval);
+ }
+
+ private void Spill(AllocationContext context, LiveInterval interval)
+ {
+ Debug.Assert(!interval.IsFixed, "Trying to spill a fixed interval.");
+ Debug.Assert(interval.UsesCount == 0, "Trying to spill a interval with uses.");
+
+ // We first check if any of the siblings were spilled, if so we can reuse
+ // the stack offset. Otherwise, we allocate a new space on the stack.
+ // This prevents stack-to-stack copies being necessary for a split interval.
+ if (!interval.TrySpillWithSiblingOffset())
+ {
+ interval.Spill(context.StackAlloc.Allocate(interval.Local.Type));
+ }
+ }
+
+ private void InsertSplitCopies()
+ {
+ Dictionary copyResolvers = new Dictionary();
+
+ CopyResolver GetCopyResolver(int position)
+ {
+ CopyResolver copyResolver = new CopyResolver();
+
+ if (copyResolvers.TryAdd(position, copyResolver))
+ {
+ return copyResolver;
+ }
+
+ return copyResolvers[position];
+ }
+
+ foreach (LiveInterval interval in _intervals.Where(x => x.IsSplit))
+ {
+ LiveInterval previous = interval;
+
+ foreach (LiveInterval splitChild in interval.SplitChilds())
+ {
+ int splitPosition = splitChild.GetStart();
+
+ if (!_blockEdges.Contains(splitPosition) && previous.GetEnd() == splitPosition)
+ {
+ GetCopyResolver(splitPosition).AddSplit(previous, splitChild);
+ }
+
+ previous = splitChild;
+ }
+ }
+
+ foreach (KeyValuePair kv in copyResolvers)
+ {
+ CopyResolver copyResolver = kv.Value;
+
+ if (!copyResolver.HasCopy)
+ {
+ continue;
+ }
+
+ int splitPosition = kv.Key;
+
+ LinkedListNode node = GetOperationNode(splitPosition);
+
+ Operation[] sequence = copyResolver.Sequence();
+
+ node = node.List.AddBefore(node, sequence[0]);
+
+ for (int index = 1; index < sequence.Length; index++)
+ {
+ node = node.List.AddAfter(node, sequence[index]);
+ }
+ }
+ }
+
+ private void InsertSplitCopiesAtEdges(ControlFlowGraph cfg)
+ {
+ int blocksCount = cfg.Blocks.Count;
+
+ bool IsSplitEdgeBlock(BasicBlock block)
+ {
+ return block.Index >= blocksCount;
+ }
+
+ for (LinkedListNode node = cfg.Blocks.First; node != null; node = node.Next)
+ {
+ BasicBlock block = node.Value;
+
+ if (IsSplitEdgeBlock(block))
+ {
+ continue;
+ }
+
+ bool hasSingleOrNoSuccessor = block.Next == null || block.Branch == null;
+
+ foreach (BasicBlock successor in Successors(block))
+ {
+ int succIndex = successor.Index;
+
+ // If the current node is a split node, then the actual successor node
+ // (the successor before the split) should be right after it.
+ if (IsSplitEdgeBlock(successor))
+ {
+ succIndex = Successors(successor).First().Index;
+ }
+
+ CopyResolver copyResolver = new CopyResolver();
+
+ foreach (int iIndex in _blockLiveIn[succIndex])
+ {
+ LiveInterval interval = _parentIntervals[iIndex];
+
+ if (!interval.IsSplit)
+ {
+ continue;
+ }
+
+ int lEnd = _blockRanges[block.Index].End - 1;
+ int rStart = _blockRanges[succIndex].Start;
+
+ LiveInterval left = interval.GetSplitChild(lEnd);
+ LiveInterval right = interval.GetSplitChild(rStart);
+
+ if (left != null && right != null && left != right)
+ {
+ copyResolver.AddSplit(left, right);
+ }
+ }
+
+ if (!copyResolver.HasCopy)
+ {
+ continue;
+ }
+
+ Operation[] sequence = copyResolver.Sequence();
+
+ if (hasSingleOrNoSuccessor)
+ {
+ foreach (Operation operation in sequence)
+ {
+ block.Append(operation);
+ }
+ }
+ else if (successor.Predecessors.Count == 1)
+ {
+ LinkedListNode prependNode = successor.Operations.AddFirst(sequence[0]);
+
+ for (int index = 1; index < sequence.Length; index++)
+ {
+ Operation operation = sequence[index];
+
+ prependNode = successor.Operations.AddAfter(prependNode, operation);
+ }
+ }
+ else
+ {
+ // Split the critical edge.
+ BasicBlock splitBlock = cfg.SplitEdge(block, successor);
+
+ foreach (Operation operation in sequence)
+ {
+ splitBlock.Append(operation);
+ }
+ }
+ }
+ }
+ }
+
+ private void ReplaceLocalWithRegister(LiveInterval current)
+ {
+ Operand register = GetRegister(current);
+
+ foreach (int usePosition in current.UsePositions())
+ {
+ Node operation = GetOperationNode(usePosition).Value;
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand source = operation.GetSource(index);
+
+ if (source == current.Local)
+ {
+ operation.SetSource(index, register);
+ }
+ }
+
+ for (int index = 0; index < operation.DestinationsCount; index++)
+ {
+ Operand dest = operation.GetDestination(index);
+
+ if (dest == current.Local)
+ {
+ operation.SetDestination(index, register);
+ }
+ }
+ }
+ }
+
+ private static Operand GetRegister(LiveInterval interval)
+ {
+ Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed.");
+
+ return new Operand(
+ interval.Register.Index,
+ interval.Register.Type,
+ interval.Local.Type);
+ }
+
+ private LinkedListNode GetOperationNode(int position)
+ {
+ return _operationNodes[position / InstructionGap];
+ }
+
+ private void NumberLocals(ControlFlowGraph cfg)
+ {
+ _operationNodes = new List>();
+
+ _intervals = new List();
+
+ for (int index = 0; index < RegistersCount; index++)
+ {
+ _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer)));
+ _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector)));
+ }
+
+ HashSet visited = new HashSet();
+
+ _operationsCount = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ for (LinkedListNode node = block.Operations.First; node != null; node = node.Next)
+ {
+ _operationNodes.Add(node);
+
+ Node operation = node.Value;
+
+ foreach (Operand dest in Destinations(operation))
+ {
+ if (dest.Kind == OperandKind.LocalVariable && visited.Add(dest))
+ {
+ dest.NumberLocal(_intervals.Count);
+
+ _intervals.Add(new LiveInterval(dest));
+ }
+ }
+ }
+
+ _operationsCount += block.Operations.Count * InstructionGap;
+
+ if (block.Operations.Count == 0)
+ {
+ // Pretend we have a dummy instruction on the empty block.
+ _operationNodes.Add(null);
+
+ _operationsCount += InstructionGap;
+ }
+ }
+
+ _parentIntervals = _intervals.ToArray();
+ }
+
+ private void BuildIntervals(ControlFlowGraph cfg, AllocationContext context)
+ {
+ _blockRanges = new LiveRange[cfg.Blocks.Count];
+
+ int mapSize = _intervals.Count;
+
+ BitMap[] blkLiveGen = new BitMap[cfg.Blocks.Count];
+ BitMap[] blkLiveKill = new BitMap[cfg.Blocks.Count];
+
+ // Compute local live sets.
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ BitMap liveGen = new BitMap(mapSize);
+ BitMap liveKill = new BitMap(mapSize);
+
+ foreach (Node node in block.Operations)
+ {
+ foreach (Operand source in Sources(node))
+ {
+ int id = GetOperandId(source);
+
+ if (!liveKill.IsSet(id))
+ {
+ liveGen.Set(id);
+ }
+ }
+
+ foreach (Operand dest in Destinations(node))
+ {
+ liveKill.Set(GetOperandId(dest));
+ }
+ }
+
+ blkLiveGen [block.Index] = liveGen;
+ blkLiveKill[block.Index] = liveKill;
+ }
+
+ // Compute global live sets.
+ BitMap[] blkLiveIn = new BitMap[cfg.Blocks.Count];
+ BitMap[] blkLiveOut = new BitMap[cfg.Blocks.Count];
+
+ for (int index = 0; index < cfg.Blocks.Count; index++)
+ {
+ blkLiveIn [index] = new BitMap(mapSize);
+ blkLiveOut[index] = new BitMap(mapSize);
+ }
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ BitMap liveOut = blkLiveOut[block.Index];
+
+ foreach (BasicBlock successor in Successors(block))
+ {
+ if (liveOut.Set(blkLiveIn[successor.Index]))
+ {
+ modified = true;
+ }
+ }
+
+ BitMap liveIn = blkLiveIn[block.Index];
+
+ liveIn.Set (liveOut);
+ liveIn.Clear(blkLiveKill[block.Index]);
+ liveIn.Set (blkLiveGen [block.Index]);
+ }
+ }
+ while (modified);
+
+ _blockLiveIn = blkLiveIn;
+
+ _blockEdges = new HashSet();
+
+ // Compute lifetime intervals.
+ int operationPos = _operationsCount;
+
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ // We handle empty blocks by pretending they have a dummy instruction,
+ // because otherwise the block would have the same start and end position,
+ // and this is not valid.
+ int instCount = Math.Max(block.Operations.Count, 1);
+
+ int blockStart = operationPos - instCount * InstructionGap;
+ int blockEnd = operationPos;
+
+ _blockRanges[block.Index] = new LiveRange(blockStart, blockEnd);
+
+ _blockEdges.Add(blockStart);
+
+ BitMap liveOut = blkLiveOut[block.Index];
+
+ foreach (int id in liveOut)
+ {
+ _intervals[id].AddRange(blockStart, blockEnd);
+ }
+
+ if (block.Operations.Count == 0)
+ {
+ operationPos -= InstructionGap;
+
+ continue;
+ }
+
+ foreach (Node node in BottomOperations(block))
+ {
+ operationPos -= InstructionGap;
+
+ foreach (Operand dest in Destinations(node))
+ {
+ LiveInterval interval = _intervals[GetOperandId(dest)];
+
+ interval.SetStart(operationPos + 1);
+ interval.AddUsePosition(operationPos + 1);
+ }
+
+ foreach (Operand source in Sources(node))
+ {
+ LiveInterval interval = _intervals[GetOperandId(source)];
+
+ interval.AddRange(blockStart, operationPos + 1);
+ interval.AddUsePosition(operationPos);
+ }
+
+ if (node is Operation operation && operation.Instruction == Instruction.Call)
+ {
+ AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer);
+ AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector);
+ }
+ }
+ }
+ }
+
+ private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType)
+ {
+ while (mask != 0)
+ {
+ int regIndex = BitUtils.LowestBitSet(mask);
+
+ Register callerSavedReg = new Register(regIndex, regType);
+
+ LiveInterval interval = _intervals[GetRegisterId(callerSavedReg)];
+
+ interval.AddRange(operationPos + 1, operationPos + InstructionGap);
+
+ mask &= ~(1 << regIndex);
+ }
+ }
+
+ private static int GetOperandId(Operand operand)
+ {
+ if (operand.Kind == OperandKind.LocalVariable)
+ {
+ return operand.AsInt32();
+ }
+ else if (operand.Kind == OperandKind.Register)
+ {
+ return GetRegisterId(operand.GetRegister());
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid operand kind \"{operand.Kind}\".");
+ }
+ }
+
+ private static int GetRegisterId(Register register)
+ {
+ return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0);
+ }
+
+ private static IEnumerable Successors(BasicBlock block)
+ {
+ if (block.Next != null)
+ {
+ yield return block.Next;
+ }
+
+ if (block.Branch != null)
+ {
+ yield return block.Branch;
+ }
+ }
+
+ private static IEnumerable BottomOperations(BasicBlock block)
+ {
+ LinkedListNode node = block.Operations.Last;
+
+ while (node != null && !(node.Value is PhiNode))
+ {
+ yield return node.Value;
+
+ node = node.Previous;
+ }
+ }
+
+ private static IEnumerable Destinations(Node node)
+ {
+ for (int index = 0; index < node.DestinationsCount; index++)
+ {
+ yield return node.GetDestination(index);
+ }
+ }
+
+ private static IEnumerable Sources(Node node)
+ {
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ Operand source = node.GetSource(index);
+
+ if (IsLocalOrRegister(source.Kind))
+ {
+ yield return source;
+ }
+ }
+ }
+
+ private static bool IsLocalOrRegister(OperandKind kind)
+ {
+ return kind == OperandKind.LocalVariable ||
+ kind == OperandKind.Register;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
new file mode 100644
index 000000000..18858a768
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
@@ -0,0 +1,390 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class LiveInterval : IComparable
+ {
+ public const int NotFound = -1;
+
+ private LiveInterval _parent;
+
+ private SortedSet _usePositions;
+
+ public int UsesCount => _usePositions.Count;
+
+ private List _ranges;
+
+ private SortedList _childs;
+
+ public bool IsSplit => _childs.Count != 0;
+
+ public Operand Local { get; }
+
+ public Register Register { get; set; }
+
+ public int SpillOffset { get; private set; }
+
+ public bool IsSpilled => SpillOffset != -1;
+ public bool IsFixed { get; }
+
+ public bool IsEmpty => _ranges.Count == 0;
+
+ public LiveInterval(Operand local = null, LiveInterval parent = null)
+ {
+ Local = local;
+ _parent = parent ?? this;
+
+ _usePositions = new SortedSet();
+
+ _ranges = new List();
+
+ _childs = new SortedList();
+
+ SpillOffset = -1;
+ }
+
+ public LiveInterval(Register register) : this()
+ {
+ IsFixed = true;
+ Register = register;
+ }
+
+ public void SetStart(int position)
+ {
+ if (_ranges.Count != 0)
+ {
+ Debug.Assert(position != _ranges[0].End);
+
+ _ranges[0] = new LiveRange(position, _ranges[0].End);
+ }
+ else
+ {
+ _ranges.Add(new LiveRange(position, position + 1));
+ }
+ }
+
+ public int GetStart()
+ {
+ if (_ranges.Count == 0)
+ {
+ throw new InvalidOperationException("Empty interval.");
+ }
+
+ return _ranges[0].Start;
+ }
+
+ public void SetEnd(int position)
+ {
+ if (_ranges.Count != 0)
+ {
+ int lastIdx = _ranges.Count - 1;
+
+ Debug.Assert(position != _ranges[lastIdx].Start);
+
+ _ranges[lastIdx] = new LiveRange(_ranges[lastIdx].Start, position);
+ }
+ else
+ {
+ _ranges.Add(new LiveRange(position, position + 1));
+ }
+ }
+
+ public int GetEnd()
+ {
+ if (_ranges.Count == 0)
+ {
+ throw new InvalidOperationException("Empty interval.");
+ }
+
+ return _ranges[_ranges.Count - 1].End;
+ }
+
+ public void AddRange(int start, int end)
+ {
+ if (start >= end)
+ {
+ throw new ArgumentException("Invalid range start position " + start + ", " + end);
+ }
+
+ int index = _ranges.BinarySearch(new LiveRange(start, end));
+
+ if (index >= 0)
+ {
+ // New range insersects with an existing range, we need to remove
+ // all the intersecting ranges before adding the new one.
+ // We also extend the new range as needed, based on the values of
+ // the existing ranges being removed.
+ int lIndex = index;
+ int rIndex = index;
+
+ while (lIndex > 0 && _ranges[lIndex - 1].End >= start)
+ {
+ lIndex--;
+ }
+
+ while (rIndex + 1 < _ranges.Count && _ranges[rIndex + 1].Start <= end)
+ {
+ rIndex++;
+ }
+
+ if (start > _ranges[lIndex].Start)
+ {
+ start = _ranges[lIndex].Start;
+ }
+
+ if (end < _ranges[rIndex].End)
+ {
+ end = _ranges[rIndex].End;
+ }
+
+ _ranges.RemoveRange(lIndex, (rIndex - lIndex) + 1);
+
+ InsertRange(lIndex, start, end);
+ }
+ else
+ {
+ InsertRange(~index, start, end);
+ }
+ }
+
+ private void InsertRange(int index, int start, int end)
+ {
+ // Here we insert a new range on the ranges list.
+ // If possible, we extend an existing range rather than inserting a new one.
+ // We can extend an existing range if any of the following conditions are true:
+ // - The new range starts right after the end of the previous range on the list.
+ // - The new range ends right before the start of the next range on the list.
+ // If both cases are true, we can extend either one. We prefer to extend the
+ // previous range, and then remove the next one, but theres no specific reason
+ // for that, extending either one will do.
+ int? extIndex = null;
+
+ if (index > 0 && _ranges[index - 1].End == start)
+ {
+ start = _ranges[index - 1].Start;
+
+ extIndex = index - 1;
+ }
+
+ if (index < _ranges.Count && _ranges[index].Start == end)
+ {
+ end = _ranges[index].End;
+
+ if (extIndex.HasValue)
+ {
+ _ranges.RemoveAt(index);
+ }
+ else
+ {
+ extIndex = index;
+ }
+ }
+
+ if (extIndex.HasValue)
+ {
+ _ranges[extIndex.Value] = new LiveRange(start, end);
+ }
+ else
+ {
+ _ranges.Insert(index, new LiveRange(start, end));
+ }
+ }
+
+ public void AddUsePosition(int position)
+ {
+ _usePositions.Add(position);
+ }
+
+ public bool Overlaps(int position)
+ {
+ return _ranges.BinarySearch(new LiveRange(position, position + 1)) >= 0;
+ }
+
+ public bool Overlaps(LiveInterval other)
+ {
+ foreach (LiveRange range in other._ranges)
+ {
+ if (_ranges.BinarySearch(range) >= 0)
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public int GetOverlapPosition(LiveInterval other)
+ {
+ foreach (LiveRange range in other._ranges)
+ {
+ int overlapIndex = _ranges.BinarySearch(range);
+
+ if (overlapIndex >= 0)
+ {
+ // It's possible that we have multiple overlaps within a single interval,
+ // in this case, we pick the one with the lowest start position, since
+ // we return the first overlap position.
+ while (overlapIndex > 0 && _ranges[overlapIndex - 1].End > range.Start)
+ {
+ overlapIndex--;
+ }
+
+ LiveRange overlappingRange = _ranges[overlapIndex];
+
+ return overlappingRange.Start;
+ }
+ }
+
+ return NotFound;
+ }
+
+ public IEnumerable SplitChilds()
+ {
+ return _childs.Values;
+ }
+
+ public IEnumerable UsePositions()
+ {
+ return _usePositions;
+ }
+
+ public int FirstUse()
+ {
+ if (_usePositions.Count == 0)
+ {
+ return NotFound;
+ }
+
+ return _usePositions.First();
+ }
+
+ public int NextUseAfter(int position)
+ {
+ foreach (int usePosition in _usePositions)
+ {
+ if (usePosition >= position)
+ {
+ return usePosition;
+ }
+ }
+
+ return NotFound;
+ }
+
+ public LiveInterval Split(int position)
+ {
+ LiveInterval right = new LiveInterval(Local, _parent);
+
+ int splitIndex = 0;
+
+ for (; splitIndex < _ranges.Count; splitIndex++)
+ {
+ LiveRange range = _ranges[splitIndex];
+
+ if (position > range.Start && position <= range.End)
+ {
+ right._ranges.Add(new LiveRange(position, range.End));
+
+ range = new LiveRange(range.Start, position);
+
+ _ranges[splitIndex++] = range;
+
+ break;
+ }
+
+ if (range.Start >= position)
+ {
+ break;
+ }
+ }
+
+ if (splitIndex < _ranges.Count)
+ {
+ int count = _ranges.Count - splitIndex;
+
+ right._ranges.AddRange(_ranges.GetRange(splitIndex, count));
+
+ _ranges.RemoveRange(splitIndex, count);
+ }
+
+ foreach (int usePosition in _usePositions.Where(x => x >= position))
+ {
+ right._usePositions.Add(usePosition);
+ }
+
+ _usePositions.RemoveWhere(x => x >= position);
+
+ Debug.Assert(_ranges.Count != 0, "Left interval is empty after split.");
+
+ Debug.Assert(right._ranges.Count != 0, "Right interval is empty after split.");
+
+ AddSplitChild(right);
+
+ return right;
+ }
+
+ private void AddSplitChild(LiveInterval child)
+ {
+ Debug.Assert(!child.IsEmpty, "Trying to insert a empty interval.");
+
+ _parent._childs.Add(child.GetStart(), child);
+ }
+
+ public LiveInterval GetSplitChild(int position)
+ {
+ if (Overlaps(position))
+ {
+ return this;
+ }
+
+ foreach (LiveInterval splitChild in _childs.Values)
+ {
+ if (splitChild.Overlaps(position))
+ {
+ return splitChild;
+ }
+ }
+
+ return null;
+ }
+
+ public bool TrySpillWithSiblingOffset()
+ {
+ foreach (LiveInterval splitChild in _parent._childs.Values)
+ {
+ if (splitChild.IsSpilled)
+ {
+ Spill(splitChild.SpillOffset);
+
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public void Spill(int offset)
+ {
+ SpillOffset = offset;
+ }
+
+ public int CompareTo(LiveInterval other)
+ {
+ if (_ranges.Count == 0 || other._ranges.Count == 0)
+ {
+ return _ranges.Count.CompareTo(other._ranges.Count);
+ }
+
+ return _ranges[0].Start.CompareTo(other._ranges[0].Start);
+ }
+
+ public override string ToString()
+ {
+ return string.Join("; ", _ranges);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
new file mode 100644
index 000000000..b5faeffd5
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
@@ -0,0 +1,31 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ struct LiveRange : IComparable
+ {
+ public int Start { get; }
+ public int End { get; }
+
+ public LiveRange(int start, int end)
+ {
+ Start = start;
+ End = end;
+ }
+
+ public int CompareTo(LiveRange other)
+ {
+ if (Start < other.End && other.Start < End)
+ {
+ return 0;
+ }
+
+ return Start.CompareTo(other.Start);
+ }
+
+ public override string ToString()
+ {
+ return $"[{Start}, {End}[";
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
new file mode 100644
index 000000000..9652224e5
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
@@ -0,0 +1,47 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ struct RegisterMasks
+ {
+ public int IntAvailableRegisters { get; }
+ public int VecAvailableRegisters { get; }
+ public int IntCallerSavedRegisters { get; }
+ public int VecCallerSavedRegisters { get; }
+ public int IntCalleeSavedRegisters { get; }
+ public int VecCalleeSavedRegisters { get; }
+
+ public RegisterMasks(
+ int intAvailableRegisters,
+ int vecAvailableRegisters,
+ int intCallerSavedRegisters,
+ int vecCallerSavedRegisters,
+ int intCalleeSavedRegisters,
+ int vecCalleeSavedRegisters)
+ {
+ IntAvailableRegisters = intAvailableRegisters;
+ VecAvailableRegisters = vecAvailableRegisters;
+ IntCallerSavedRegisters = intCallerSavedRegisters;
+ VecCallerSavedRegisters = vecCallerSavedRegisters;
+ IntCalleeSavedRegisters = intCalleeSavedRegisters;
+ VecCalleeSavedRegisters = vecCalleeSavedRegisters;
+ }
+
+ public int GetAvailableRegisters(RegisterType type)
+ {
+ if (type == RegisterType.Integer)
+ {
+ return IntAvailableRegisters;
+ }
+ else if (type == RegisterType.Vector)
+ {
+ return VecAvailableRegisters;
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid register type \"{type}\".");
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
new file mode 100644
index 000000000..a6233d6ee
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
@@ -0,0 +1,27 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class StackAllocator
+ {
+ private int _offset;
+
+ public int TotalSize => _offset;
+
+ public int Allocate(OperandType type)
+ {
+ return Allocate(type.GetSizeInBytes());
+ }
+
+ public int Allocate(int sizeInBytes)
+ {
+ int offset = _offset;
+
+ _offset += sizeInBytes;
+
+ return offset;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs b/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
new file mode 100644
index 000000000..4955f1b4a
--- /dev/null
+++ b/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ struct UnwindInfo
+ {
+ public UnwindPushEntry[] PushEntries { get; }
+
+ public int PrologueSize { get; }
+
+ public int FixedAllocSize { get; }
+
+ public UnwindInfo(UnwindPushEntry[] pushEntries, int prologueSize, int fixedAllocSize)
+ {
+ PushEntries = pushEntries;
+ PrologueSize = prologueSize;
+ FixedAllocSize = fixedAllocSize;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs b/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
new file mode 100644
index 000000000..6597e2b4b
--- /dev/null
+++ b/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
@@ -0,0 +1,20 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ struct UnwindPushEntry
+ {
+ public int Index { get; }
+
+ public RegisterType Type { get; }
+
+ public int StreamEndOffset { get; }
+
+ public UnwindPushEntry(int index, RegisterType type, int streamEndOffset)
+ {
+ Index = index;
+ Type = type;
+ StreamEndOffset = streamEndOffset;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs
new file mode 100644
index 000000000..c64838945
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -0,0 +1,1358 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class Assembler
+ {
+ private const int BadOp = 0;
+ private const int OpModRMBits = 24;
+
+ private const byte RexPrefix = 0x40;
+ private const byte RexWPrefix = 0x48;
+ private const byte LockPrefix = 0xf0;
+
+ [Flags]
+ private enum InstructionFlags
+ {
+ None = 0,
+ RegOnly = 1 << 0,
+ Reg8Src = 1 << 1,
+ Reg8Dest = 1 << 2,
+ RexW = 1 << 3,
+ Vex = 1 << 4,
+
+ PrefixBit = 16,
+ PrefixMask = 3 << PrefixBit,
+ Prefix66 = 1 << PrefixBit,
+ PrefixF3 = 2 << PrefixBit,
+ PrefixF2 = 3 << PrefixBit
+ }
+
+ private struct InstructionInfo
+ {
+ public int OpRMR { get; }
+ public int OpRMImm8 { get; }
+ public int OpRMImm32 { get; }
+ public int OpRImm64 { get; }
+ public int OpRRM { get; }
+
+ public InstructionFlags Flags { get; }
+
+ public InstructionInfo(
+ int opRMR,
+ int opRMImm8,
+ int opRMImm32,
+ int opRImm64,
+ int opRRM,
+ InstructionFlags flags)
+ {
+ OpRMR = opRMR;
+ OpRMImm8 = opRMImm8;
+ OpRMImm32 = opRMImm32;
+ OpRImm64 = opRImm64;
+ OpRRM = opRRM;
+ Flags = flags;
+ }
+ }
+
+ private static InstructionInfo[] _instTable;
+
+ private Stream _stream;
+
+ static Assembler()
+ {
+ _instTable = new InstructionInfo[(int)X86Instruction.Count];
+
+ // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags
+ Add(X86Instruction.Add, new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstructionFlags.None));
+ Add(X86Instruction.Addpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex));
+ Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None));
+ Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex));
+ Add(X86Instruction.Bsr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstructionFlags.None));
+ Add(X86Instruction.Bswap, new InstructionInfo(0x00000fc8, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RegOnly));
+ Add(X86Instruction.Call, new InstructionInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Cmovcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstructionFlags.None));
+ Add(X86Instruction.Cmp, new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstructionFlags.None));
+ Add(X86Instruction.Cmppd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cmpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex));
+ Add(X86Instruction.Cmpsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cmpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW));
+ Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex));
+ Add(X86Instruction.Cpuid, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fa2, InstructionFlags.RegOnly));
+ Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex));
+ Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtpd2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cvtps2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cvtps2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex));
+ Add(X86Instruction.Cvtsd2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsd2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsi2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsi2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtss2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Div, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstructionFlags.None));
+ Add(X86Instruction.Divpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Divps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex));
+ Add(X86Instruction.Divsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Divss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Haddpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Haddps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Idiv, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstructionFlags.None));
+ Add(X86Instruction.Imul, new InstructionInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstructionFlags.None));
+ Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None));
+ Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None));
+ Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex));
+ Add(X86Instruction.Maxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Maxss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Minpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Minps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex));
+ Add(X86Instruction.Minsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Minss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Mov, new InstructionInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None));
+ Add(X86Instruction.Mov16, new InstructionInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstructionFlags.Prefix66));
+ Add(X86Instruction.Mov8, new InstructionInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest));
+ Add(X86Instruction.Movd, new InstructionInfo(0x00000f7e, BadOp, BadOp, BadOp, 0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Movdqu, new InstructionInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movhlps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstructionFlags.Vex));
+ Add(X86Instruction.Movlhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstructionFlags.Vex));
+ Add(X86Instruction.Movq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movsd, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Movss, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movsx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstructionFlags.None));
+ Add(X86Instruction.Movsx32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstructionFlags.None));
+ Add(X86Instruction.Movsx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Movzx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstructionFlags.None));
+ Add(X86Instruction.Movzx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Mul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstructionFlags.None));
+ Add(X86Instruction.Mulpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Mulps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex));
+ Add(X86Instruction.Mulsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Mulss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Neg, new InstructionInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Not, new InstructionInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Or, new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstructionFlags.None));
+ Add(X86Instruction.Paddb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrb, new InstructionInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrd, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrq, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmulld, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmullw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pop, new InstructionInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Popcnt, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstructionFlags.PrefixF3));
+ Add(X86Instruction.Por, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pshufb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pshufd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pslld, new InstructionInfo(BadOp, 0x06000f72, BadOp, BadOp, 0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pslldq, new InstructionInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psllq, new InstructionInfo(BadOp, 0x06000f73, BadOp, BadOp, 0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psllw, new InstructionInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrad, new InstructionInfo(BadOp, 0x04000f72, BadOp, BadOp, 0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psraw, new InstructionInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrld, new InstructionInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrlq, new InstructionInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrldq, new InstructionInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrlw, new InstructionInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckldq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Push, new InstructionInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstructionFlags.None));
+ Add(X86Instruction.Pxor, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Rcpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex));
+ Add(X86Instruction.Rcpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Ror, new InstructionInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Roundpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Rsqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex));
+ Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest));
+ Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Shufps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex));
+ Add(X86Instruction.Sqrtpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex));
+ Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None));
+ Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex));
+ Add(X86Instruction.Subsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Subss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Test, new InstructionInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Unpckhpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex));
+ Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex));
+ Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
+ Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex));
+ }
+
+ private static void Add(X86Instruction inst, InstructionInfo info)
+ {
+ _instTable[(int)inst] = info;
+ }
+
+ public Assembler(Stream stream)
+ {
+ _stream = stream;
+ }
+
+ public void Add(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Add);
+ }
+
+ public void Addsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Addsd);
+ }
+
+ public void Addss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Addss);
+ }
+
+ public void And(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.And);
+ }
+
+ public void Bsr(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Bsr);
+ }
+
+ public void Bswap(Operand dest)
+ {
+ WriteInstruction(dest, null, dest.Type, X86Instruction.Bswap);
+ }
+
+ public void Call(Operand dest)
+ {
+ WriteInstruction(dest, null, OperandType.None, X86Instruction.Call);
+ }
+
+ public void Cdq()
+ {
+ WriteByte(0x99);
+ }
+
+ public void Cmovcc(Operand dest, Operand source, OperandType type, X86Condition condition)
+ {
+ InstructionInfo info = _instTable[(int)X86Instruction.Cmovcc];
+
+ WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM | (int)condition, rrm: true);
+ }
+
+ public void Cmp(Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(src1, src2, type, X86Instruction.Cmp);
+ }
+
+ public void Cqo()
+ {
+ WriteByte(0x48);
+ WriteByte(0x99);
+ }
+
+ public void Cmpxchg16b(MemoryOperand memOp)
+ {
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, null, OperandType.None, X86Instruction.Cmpxchg16b);
+ }
+
+ public void Comisd(Operand src1, Operand src2)
+ {
+ WriteInstruction(src1, null, src2, X86Instruction.Comisd);
+ }
+
+ public void Comiss(Operand src1, Operand src2)
+ {
+ WriteInstruction(src1, null, src2, X86Instruction.Comiss);
+ }
+
+ public void Cpuid()
+ {
+ WriteInstruction(null, null, OperandType.None, X86Instruction.Cpuid);
+ }
+
+ public void Cvtsd2ss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss);
+ }
+
+ public void Cvtsi2sd(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2sd, type);
+ }
+
+ public void Cvtsi2ss(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2ss, type);
+ }
+
+ public void Cvtss2sd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtss2sd);
+ }
+
+ public void Div(Operand source)
+ {
+ WriteInstruction(null, source, source.Type, X86Instruction.Div);
+ }
+
+ public void Divsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Divsd);
+ }
+
+ public void Divss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Divss);
+ }
+
+ public void Idiv(Operand source)
+ {
+ WriteInstruction(null, source, source.Type, X86Instruction.Idiv);
+ }
+
+ public void Imul(Operand source)
+ {
+ WriteInstruction(null, source, source.Type, X86Instruction.Imul128);
+ }
+
+ public void Imul(Operand dest, Operand source, OperandType type)
+ {
+ if (source.Kind != OperandKind.Register)
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+ }
+
+ WriteInstruction(dest, source, type, X86Instruction.Imul);
+ }
+
+ public void Imul(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ InstructionInfo info = _instTable[(int)X86Instruction.Imul];
+
+ if (src2.Kind != OperandKind.Constant)
+ {
+ throw new ArgumentException($"Invalid source 2 operand kind \"{src2.Kind}\".");
+ }
+
+ if (IsImm8(src2.Value, src2.Type) && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, null, src1, type, info.Flags, info.OpRMImm8, rrm: true);
+
+ WriteByte(src2.AsByte());
+ }
+ else if (IsImm32(src2.Value, src2.Type) && info.OpRMImm32 != BadOp)
+ {
+ WriteOpCode(dest, null, src1, type, info.Flags, info.OpRMImm32, rrm: true);
+
+ WriteInt32(src2.AsInt32());
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{src2.Value:X}.");
+ }
+ }
+
+ public void Insertps(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Insertps);
+
+ WriteByte(imm);
+ }
+
+ public void Jcc(X86Condition condition, long offset)
+ {
+ if (ConstFitsOnS8(offset))
+ {
+ WriteByte((byte)(0x70 | (int)condition));
+
+ WriteByte((byte)offset);
+ }
+ else if (ConstFitsOnS32(offset))
+ {
+ WriteByte(0x0f);
+ WriteByte((byte)(0x80 | (int)condition));
+
+ WriteInt32((int)offset);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public void Jmp(long offset)
+ {
+ if (ConstFitsOnS8(offset))
+ {
+ WriteByte(0xeb);
+
+ WriteByte((byte)offset);
+ }
+ else if (ConstFitsOnS32(offset))
+ {
+ WriteByte(0xe9);
+
+ WriteInt32((int)offset);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public void Lea(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Lea);
+ }
+
+ public void Mov(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Mov);
+ }
+
+ public void Mov16(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov16);
+ }
+
+ public void Mov8(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov8);
+ }
+
+ public void Movd(Operand dest, Operand source)
+ {
+ InstructionInfo info = _instTable[(int)X86Instruction.Movd];
+
+ if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, null, source, OperandType.None, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ WriteOpCode(dest, null, source, OperandType.None, info.Flags, info.OpRMR);
+ }
+ }
+
+ public void Movdqu(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Movdqu);
+ }
+
+ public void Movhlps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movhlps);
+ }
+
+ public void Movlhps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movlhps);
+ }
+
+ public void Movq(Operand dest, Operand source)
+ {
+ InstructionInfo info = _instTable[(int)X86Instruction.Movd];
+
+ InstructionFlags flags = info.Flags | InstructionFlags.RexW;
+
+ if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, null, source, OperandType.None, flags, info.OpRRM, rrm: true);
+ }
+ else if (dest.Type.IsInteger() || dest.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, null, source, OperandType.None, flags, info.OpRMR);
+ }
+ else
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Movq);
+ }
+ }
+
+ public void Movsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movsd);
+ }
+
+ public void Movss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movss);
+ }
+
+ public void Movsx16(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx16);
+ }
+
+ public void Movsx32(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx32);
+ }
+
+ public void Movsx8(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx8);
+ }
+
+ public void Movzx16(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movzx16);
+ }
+
+ public void Movzx8(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movzx8);
+ }
+
+ public void Mul(Operand source)
+ {
+ WriteInstruction(null, source, source.Type, X86Instruction.Mul128);
+ }
+
+ public void Mulsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Mulsd);
+ }
+
+ public void Mulss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Mulss);
+ }
+
+ public void Neg(Operand dest)
+ {
+ WriteInstruction(dest, null, dest.Type, X86Instruction.Neg);
+ }
+
+ public void Not(Operand dest)
+ {
+ WriteInstruction(dest, null, dest.Type, X86Instruction.Not);
+ }
+
+ public void Or(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Or);
+ }
+
+ public void Pcmpeqw(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pcmpeqw);
+ }
+
+ public void Pextrb(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Pextrb);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrd(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Pextrd);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrq(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Pextrq);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrw(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Pextrw);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrb(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrb);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrd(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrd);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrq(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrq);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrw(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrw);
+
+ WriteByte(imm);
+ }
+
+ public void Pop(Operand dest)
+ {
+ if (dest.Kind == OperandKind.Register)
+ {
+ WriteCompactInst(dest, 0x58);
+ }
+ else
+ {
+ WriteInstruction(dest, null, dest.Type, X86Instruction.Pop);
+ }
+ }
+
+ public void Popcnt(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Popcnt);
+ }
+
+ public void Pshufd(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, X86Instruction.Pshufd);
+
+ WriteByte(imm);
+ }
+
+ public void Push(Operand source)
+ {
+ if (source.Kind == OperandKind.Register)
+ {
+ WriteCompactInst(source, 0x50);
+ }
+ else
+ {
+ WriteInstruction(null, source, source.Type, X86Instruction.Push);
+ }
+ }
+
+ public void Return()
+ {
+ WriteByte(0xc3);
+ }
+
+ public void Ror(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Ror);
+ }
+
+ public void Sar(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Sar);
+ }
+
+ public void Shl(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Shl);
+ }
+
+ public void Shr(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Shr);
+ }
+
+ public void Setcc(Operand dest, X86Condition condition)
+ {
+ InstructionInfo info = _instTable[(int)X86Instruction.Setcc];
+
+ WriteOpCode(dest, null, null, OperandType.None, info.Flags, info.OpRRM | (int)condition);
+ }
+
+ public void Sub(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Sub);
+ }
+
+ public void Subsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Subsd);
+ }
+
+ public void Subss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Subss);
+ }
+
+ public void Test(Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(src1, src2, type, X86Instruction.Test);
+ }
+
+ public void Xor(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Xor);
+ }
+
+ public void Xorps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Xorps);
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand source,
+ OperandType type = OperandType.None)
+ {
+ WriteInstruction(dest, null, source, inst, type);
+ }
+
+ public void WriteInstruction(X86Instruction inst, Operand dest, Operand src1, Operand src2)
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ WriteInstruction(src1, dest, src2, inst);
+ }
+ else
+ {
+ WriteInstruction(dest, src1, src2, inst);
+ }
+ }
+
+ public void WriteInstruction(X86Instruction inst, Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, null, source, inst);
+
+ WriteByte(imm);
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ Operand src3)
+ {
+ // 3+ operands can only be encoded with the VEX encoding scheme.
+ Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
+
+ WriteInstruction(dest, src1, src2, inst);
+
+ WriteByte((byte)(src3.AsByte() << 4));
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ byte imm)
+ {
+ WriteInstruction(dest, src1, src2, inst);
+
+ WriteByte(imm);
+ }
+
+ private void WriteShiftInst(Operand dest, Operand source, OperandType type, X86Instruction inst)
+ {
+ if (source.Kind == OperandKind.Register)
+ {
+ X86Register shiftReg = (X86Register)source.GetRegister().Index;
+
+ if (shiftReg != X86Register.Rcx)
+ {
+ throw new ArgumentException($"Invalid shift register \"{shiftReg}\".");
+ }
+
+ source = null;
+ }
+
+ WriteInstruction(dest, source, type, inst);
+ }
+
+ private void WriteInstruction(Operand dest, Operand source, OperandType type, X86Instruction inst)
+ {
+ InstructionInfo info = _instTable[(int)inst];
+
+ if (source != null)
+ {
+ if (source.Kind == OperandKind.Constant)
+ {
+ ulong imm = source.Value;
+
+ if (inst == X86Instruction.Mov8)
+ {
+ WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else if (inst == X86Instruction.Mov16)
+ {
+ WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm32);
+
+ WriteInt16((short)imm);
+ }
+ else if (IsImm8(imm, type) && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else if (IsImm32(imm, type) && info.OpRMImm32 != BadOp)
+ {
+ WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm32);
+
+ WriteInt32((int)imm);
+ }
+ else if (dest != null && dest.Kind == OperandKind.Register && info.OpRImm64 != BadOp)
+ {
+ int rexPrefix = GetRexPrefix(dest, source, type, rrm: false);
+
+ if (rexPrefix != 0)
+ {
+ WriteByte((byte)rexPrefix);
+ }
+
+ WriteByte((byte)(info.OpRImm64 + (dest.GetRegister().Index & 0b111)));
+
+ WriteUInt64(imm);
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+ }
+ }
+ else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, null, source, type, info.Flags, info.OpRMR);
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+ }
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else if (info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, null, source, type, info.Flags, info.OpRMR);
+ }
+ else
+ {
+ throw new ArgumentNullException(nameof(source));
+ }
+ }
+
+ private void WriteInstruction(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ X86Instruction inst,
+ OperandType type = OperandType.None)
+ {
+ InstructionInfo info = _instTable[(int)inst];
+
+ if (src2 != null)
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ ulong imm = src2.Value;
+
+ if ((byte)imm == imm && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, src1, null, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+ }
+ }
+ else if (src2.Kind == OperandKind.Register && info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{src2.Kind}\".");
+ }
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else if (info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+ }
+ else
+ {
+ throw new ArgumentNullException(nameof(src2));
+ }
+ }
+
+ private void WriteOpCode(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type,
+ InstructionFlags flags,
+ int opCode,
+ bool rrm = false)
+ {
+ int rexPrefix = GetRexPrefix(dest, src2, type, rrm);
+
+ if ((flags & InstructionFlags.RexW) != 0)
+ {
+ rexPrefix |= RexWPrefix;
+ }
+
+ int modRM = (opCode >> OpModRMBits) << 3;
+
+ MemoryOperand memOp = null;
+
+ if (dest != null)
+ {
+ if (dest.Kind == OperandKind.Register)
+ {
+ int regIndex = dest.GetRegister().Index;
+
+ modRM |= (regIndex & 0b111) << (rrm ? 3 : 0);
+
+ if ((flags & InstructionFlags.Reg8Dest) != 0 && regIndex >= 4)
+ {
+ rexPrefix |= RexPrefix;
+ }
+ }
+ else if (dest.Kind == OperandKind.Memory)
+ {
+ memOp = dest as MemoryOperand;
+ }
+ else
+ {
+ throw new ArgumentException("Invalid destination operand kind \"" + dest.Kind + "\".");
+ }
+ }
+
+ if (src2 != null)
+ {
+ if (src2.Kind == OperandKind.Register)
+ {
+ int regIndex = src2.GetRegister().Index;
+
+ modRM |= (regIndex & 0b111) << (rrm ? 0 : 3);
+
+ if ((flags & InstructionFlags.Reg8Src) != 0 && regIndex >= 4)
+ {
+ rexPrefix |= RexPrefix;
+ }
+ }
+ else if (src2.Kind == OperandKind.Memory && memOp == null)
+ {
+ memOp = src2 as MemoryOperand;
+ }
+ else
+ {
+ throw new ArgumentException("Invalid source operand kind \"" + src2.Kind + "\".");
+ }
+ }
+
+ bool needsSibByte = false;
+ bool needsDisplacement = false;
+
+ int sib = 0;
+
+ if (memOp != null)
+ {
+ // Either source or destination is a memory operand.
+ Register baseReg = memOp.BaseAddress.GetRegister();
+
+ X86Register baseRegLow = (X86Register)(baseReg.Index & 0b111);
+
+ needsSibByte = memOp.Index != null || baseRegLow == X86Register.Rsp;
+ needsDisplacement = memOp.Displacement != 0 || baseRegLow == X86Register.Rbp;
+
+ if (needsDisplacement)
+ {
+ if (ConstFitsOnS8(memOp.Displacement))
+ {
+ modRM |= 0x40;
+ }
+ else /* if (ConstFitsOnS32(memOp.Displacement)) */
+ {
+ modRM |= 0x80;
+ }
+ }
+
+ if (baseReg.Index >= 8)
+ {
+ rexPrefix |= RexPrefix | (baseReg.Index >> 3);
+ }
+
+ if (needsSibByte)
+ {
+ sib = (int)baseRegLow;
+
+ if (memOp.Index != null)
+ {
+ int indexReg = memOp.Index.GetRegister().Index;
+
+ if (indexReg == (int)X86Register.Rsp)
+ {
+ throw new ArgumentException("Using RSP as index register on the memory operand is not allowed.");
+ }
+
+ if (indexReg >= 8)
+ {
+ rexPrefix |= RexPrefix | (indexReg >> 3) << 1;
+ }
+
+ sib |= (indexReg & 0b111) << 3;
+ }
+ else
+ {
+ sib |= 0b100 << 3;
+ }
+
+ sib |= (int)memOp.Scale << 6;
+
+ modRM |= 0b100;
+ }
+ else
+ {
+ modRM |= (int)baseRegLow;
+ }
+ }
+ else
+ {
+ // Source and destination are registers.
+ modRM |= 0xc0;
+ }
+
+ Debug.Assert(opCode != BadOp, "Invalid opcode value.");
+
+ if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
+ {
+ int vexByte2 = (int)(flags & InstructionFlags.PrefixMask) >> (int)InstructionFlags.PrefixBit;
+
+ if (src1 != null)
+ {
+ vexByte2 |= (src1.GetRegister().Index ^ 0xf) << 3;
+ }
+ else
+ {
+ vexByte2 |= 0b1111 << 3;
+ }
+
+ ushort opCodeHigh = (ushort)(opCode >> 8);
+
+ if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf)
+ {
+ // Two-byte form.
+ WriteByte(0xc5);
+
+ vexByte2 |= (~rexPrefix & 4) << 5;
+
+ WriteByte((byte)vexByte2);
+ }
+ else
+ {
+ // Three-byte form.
+ WriteByte(0xc4);
+
+ int vexByte1 = (~rexPrefix & 7) << 5;
+
+ switch (opCodeHigh)
+ {
+ case 0xf: vexByte1 |= 1; break;
+ case 0xf38: vexByte1 |= 2; break;
+ case 0xf3a: vexByte1 |= 3; break;
+
+ default: Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}."); break;
+ }
+
+ vexByte2 |= (rexPrefix & 8) << 4;
+
+ WriteByte((byte)vexByte1);
+ WriteByte((byte)vexByte2);
+ }
+
+ opCode &= 0xff;
+ }
+ else
+ {
+ switch (flags & InstructionFlags.PrefixMask)
+ {
+ case InstructionFlags.Prefix66: WriteByte(0x66); break;
+ case InstructionFlags.PrefixF2: WriteByte(0xf2); break;
+ case InstructionFlags.PrefixF3: WriteByte(0xf3); break;
+ }
+
+ if (rexPrefix != 0)
+ {
+ WriteByte((byte)rexPrefix);
+ }
+ }
+
+ if (dest != null && (flags & InstructionFlags.RegOnly) != 0)
+ {
+ opCode += dest.GetRegister().Index & 7;
+ }
+
+ if ((opCode & 0xff0000) != 0)
+ {
+ WriteByte((byte)(opCode >> 16));
+ }
+
+ if ((opCode & 0xff00) != 0)
+ {
+ WriteByte((byte)(opCode >> 8));
+ }
+
+ WriteByte((byte)opCode);
+
+ if ((flags & InstructionFlags.RegOnly) == 0)
+ {
+ WriteByte((byte)modRM);
+
+ if (needsSibByte)
+ {
+ WriteByte((byte)sib);
+ }
+
+ if (needsDisplacement)
+ {
+ if (ConstFitsOnS8(memOp.Displacement))
+ {
+ WriteByte((byte)memOp.Displacement);
+ }
+ else /* if (ConstFitsOnS32(memOp.Displacement)) */
+ {
+ WriteInt32(memOp.Displacement);
+ }
+ }
+ }
+ }
+
+ private void WriteCompactInst(Operand operand, int opCode)
+ {
+ int regIndex = operand.GetRegister().Index;
+
+ if (regIndex >= 8)
+ {
+ WriteByte(0x41);
+ }
+
+ WriteByte((byte)(opCode + (regIndex & 0b111)));
+ }
+
+ private static int GetRexPrefix(Operand dest, Operand source, OperandType type, bool rrm)
+ {
+ int rexPrefix = 0;
+
+ if (Is64Bits(type))
+ {
+ rexPrefix = RexWPrefix;
+ }
+
+ void SetRegisterHighBit(Register reg, int bit)
+ {
+ if (reg.Index >= 8)
+ {
+ rexPrefix |= RexPrefix | (reg.Index >> 3) << bit;
+ }
+ }
+
+ if (dest != null && dest.Kind == OperandKind.Register)
+ {
+ SetRegisterHighBit(dest.GetRegister(), rrm ? 2 : 0);
+ }
+
+ if (source != null && source.Kind == OperandKind.Register)
+ {
+ SetRegisterHighBit(source.GetRegister(), rrm ? 0 : 2);
+ }
+
+ return rexPrefix;
+ }
+
+ private static bool Is64Bits(OperandType type)
+ {
+ return type == OperandType.I64 || type == OperandType.FP64;
+ }
+
+ private static bool IsImm8(ulong immediate, OperandType type)
+ {
+ long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+ return ConstFitsOnS8(value);
+ }
+
+ private static bool IsImm32(ulong immediate, OperandType type)
+ {
+ long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+ return ConstFitsOnS32(value);
+ }
+
+ public static int GetJccLength(long offset)
+ {
+ if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+ {
+ return 2;
+ }
+ else if (ConstFitsOnS32(offset < 0 ? offset - 6 : offset))
+ {
+ return 6;
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public static int GetJmpLength(long offset)
+ {
+ if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+ {
+ return 2;
+ }
+ else if (ConstFitsOnS32(offset < 0 ? offset - 5 : offset))
+ {
+ return 5;
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ private static bool ConstFitsOnS8(long value)
+ {
+ return value == (sbyte)value;
+ }
+
+ private static bool ConstFitsOnS32(long value)
+ {
+ return value == (int)value;
+ }
+
+ private void WriteInt16(short value)
+ {
+ WriteUInt16((ushort)value);
+ }
+
+ private void WriteInt32(int value)
+ {
+ WriteUInt32((uint)value);
+ }
+
+ private void WriteByte(byte value)
+ {
+ _stream.WriteByte(value);
+ }
+
+ private void WriteUInt16(ushort value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ }
+
+ private void WriteUInt32(uint value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ }
+
+ private void WriteUInt64(ulong value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ _stream.WriteByte((byte)(value >> 32));
+ _stream.WriteByte((byte)(value >> 40));
+ _stream.WriteByte((byte)(value >> 48));
+ _stream.WriteByte((byte)(value >> 56));
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CallConvName.cs b/ARMeilleure/CodeGen/X86/CallConvName.cs
new file mode 100644
index 000000000..be3676282
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CallConvName.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum CallConvName
+ {
+ SystemV,
+ Windows
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CallingConvention.cs b/ARMeilleure/CodeGen/X86/CallingConvention.cs
new file mode 100644
index 000000000..2769fd93e
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CallingConvention.cs
@@ -0,0 +1,159 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CallingConvention
+ {
+ private const int RegistersMask = 0xffff;
+
+ public static int GetIntAvailableRegisters()
+ {
+ return RegistersMask & ~(1 << (int)X86Register.Rsp);
+ }
+
+ public static int GetVecAvailableRegisters()
+ {
+ return RegistersMask;
+ }
+
+ public static int GetIntCallerSavedRegisters()
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ return (1 << (int)X86Register.Rax) |
+ (1 << (int)X86Register.Rcx) |
+ (1 << (int)X86Register.Rdx) |
+ (1 << (int)X86Register.R8) |
+ (1 << (int)X86Register.R9) |
+ (1 << (int)X86Register.R10) |
+ (1 << (int)X86Register.R11);
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ return (1 << (int)X86Register.Rax) |
+ (1 << (int)X86Register.Rcx) |
+ (1 << (int)X86Register.Rdx) |
+ (1 << (int)X86Register.Rsi) |
+ (1 << (int)X86Register.Rdi) |
+ (1 << (int)X86Register.R8) |
+ (1 << (int)X86Register.R9) |
+ (1 << (int)X86Register.R10) |
+ (1 << (int)X86Register.R11);
+ }
+ }
+
+ public static int GetVecCallerSavedRegisters()
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ return (1 << (int)X86Register.Xmm0) |
+ (1 << (int)X86Register.Xmm1) |
+ (1 << (int)X86Register.Xmm2) |
+ (1 << (int)X86Register.Xmm3) |
+ (1 << (int)X86Register.Xmm4) |
+ (1 << (int)X86Register.Xmm5);
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ return RegistersMask;
+ }
+ }
+
+ public static int GetIntCalleeSavedRegisters()
+ {
+ return GetIntCallerSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetVecCalleeSavedRegisters()
+ {
+ return GetVecCallerSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetArgumentsOnRegsCount()
+ {
+ return 4;
+ }
+
+ public static int GetIntArgumentsOnRegsCount()
+ {
+ return 6;
+ }
+
+ public static int GetVecArgumentsOnRegsCount()
+ {
+ return 8;
+ }
+
+ public static X86Register GetIntArgumentRegister(int index)
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ switch (index)
+ {
+ case 0: return X86Register.Rcx;
+ case 1: return X86Register.Rdx;
+ case 2: return X86Register.R8;
+ case 3: return X86Register.R9;
+ }
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ switch (index)
+ {
+ case 0: return X86Register.Rdi;
+ case 1: return X86Register.Rsi;
+ case 2: return X86Register.Rdx;
+ case 3: return X86Register.Rcx;
+ case 4: return X86Register.R8;
+ case 5: return X86Register.R9;
+ }
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static X86Register GetVecArgumentRegister(int index)
+ {
+ int count;
+
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ count = 4;
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ count = 8;
+ }
+
+ if ((uint)index < count)
+ {
+ return X86Register.Xmm0 + index;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static X86Register GetIntReturnRegister()
+ {
+ return X86Register.Rax;
+ }
+
+ public static X86Register GetIntReturnRegisterHigh()
+ {
+ return X86Register.Rdx;
+ }
+
+ public static X86Register GetVecReturnRegister()
+ {
+ return X86Register.Xmm0;
+ }
+
+ public static CallConvName GetCurrentCallConv()
+ {
+ return RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
+ ? CallConvName.Windows
+ : CallConvName.SystemV;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/ARMeilleure/CodeGen/X86/CodeGenContext.cs
new file mode 100644
index 000000000..d719b5164
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CodeGenContext.cs
@@ -0,0 +1,305 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class CodeGenContext
+ {
+ private const int ReservedBytesForJump = 1;
+
+ private Stream _stream;
+
+ public int StreamOffset => (int)_stream.Length;
+
+ public AllocationResult AllocResult { get; }
+
+ public Assembler Assembler { get; }
+
+ public BasicBlock CurrBlock { get; private set; }
+
+ public int CallArgsRegionSize { get; }
+ public int XmmSaveRegionSize { get; }
+
+ private long[] _blockOffsets;
+
+ private struct Jump
+ {
+ public bool IsConditional { get; }
+
+ public X86Condition Condition { get; }
+
+ public BasicBlock Target { get; }
+
+ public long JumpPosition { get; }
+
+ public long RelativeOffset { get; set; }
+
+ public int InstSize { get; set; }
+
+ public Jump(BasicBlock target, long jumpPosition)
+ {
+ IsConditional = false;
+ Condition = 0;
+ Target = target;
+ JumpPosition = jumpPosition;
+
+ RelativeOffset = 0;
+
+ InstSize = 0;
+ }
+
+ public Jump(X86Condition condition, BasicBlock target, long jumpPosition)
+ {
+ IsConditional = true;
+ Condition = condition;
+ Target = target;
+ JumpPosition = jumpPosition;
+
+ RelativeOffset = 0;
+
+ InstSize = 0;
+ }
+ }
+
+ private List _jumps;
+
+ private X86Condition _jNearCondition;
+
+ private long _jNearPosition;
+ private int _jNearLength;
+
+ public CodeGenContext(Stream stream, AllocationResult allocResult, int maxCallArgs, int blocksCount)
+ {
+ _stream = stream;
+
+ AllocResult = allocResult;
+
+ Assembler = new Assembler(stream);
+
+ CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int xmmSaveRegionSize);
+ XmmSaveRegionSize = xmmSaveRegionSize;
+
+ _blockOffsets = new long[blocksCount];
+
+ _jumps = new List();
+ }
+
+ private int GetCallArgsRegionSize(AllocationResult allocResult, int maxCallArgs, out int xmmSaveRegionSize)
+ {
+ // We need to add 8 bytes to the total size, as the call to this
+ // function already pushed 8 bytes (the return address).
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters;
+
+ xmmSaveRegionSize = BitUtils.CountBits(vecMask) * 16;
+
+ int calleeSaveRegionSize = BitUtils.CountBits(intMask) * 8 + xmmSaveRegionSize + 8;
+
+ int argsCount = maxCallArgs;
+
+ if (argsCount < 0)
+ {
+ // When the function has no calls, argsCount is -1.
+ // In this case, we don't need to allocate the shadow space.
+ argsCount = 0;
+ }
+ else if (argsCount < 4)
+ {
+ // The ABI mandates that the space for at least 4 arguments
+ // is reserved on the stack (this is called shadow space).
+ argsCount = 4;
+ }
+
+ int frameSize = calleeSaveRegionSize + allocResult.SpillRegionSize;
+
+ // TODO: Instead of always multiplying by 16 (the largest possible size of a variable,
+ // since a V128 has 16 bytes), we should calculate the exact size consumed by the
+ // arguments passed to the called functions on the stack.
+ int callArgsAndFrameSize = frameSize + argsCount * 16;
+
+ // Ensure that the Stack Pointer will be aligned to 16 bytes.
+ callArgsAndFrameSize = (callArgsAndFrameSize + 0xf) & ~0xf;
+
+ return callArgsAndFrameSize - frameSize;
+ }
+
+ public void EnterBlock(BasicBlock block)
+ {
+ _blockOffsets[block.Index] = _stream.Position;
+
+ CurrBlock = block;
+ }
+
+ public void JumpTo(BasicBlock target)
+ {
+ _jumps.Add(new Jump(target, _stream.Position));
+
+ WritePadding(ReservedBytesForJump);
+ }
+
+ public void JumpTo(X86Condition condition, BasicBlock target)
+ {
+ _jumps.Add(new Jump(condition, target, _stream.Position));
+
+ WritePadding(ReservedBytesForJump);
+ }
+
+ public void JumpToNear(X86Condition condition)
+ {
+ _jNearCondition = condition;
+ _jNearPosition = _stream.Position;
+ _jNearLength = Assembler.GetJccLength(0);
+
+ _stream.Seek(_jNearLength, SeekOrigin.Current);
+ }
+
+ public void JumpHere()
+ {
+ long currentPosition = _stream.Position;
+
+ _stream.Seek(_jNearPosition, SeekOrigin.Begin);
+
+ long offset = currentPosition - (_jNearPosition + _jNearLength);
+
+ Debug.Assert(_jNearLength == Assembler.GetJccLength(offset), "Relative offset doesn't fit on near jump.");
+
+ Assembler.Jcc(_jNearCondition, offset);
+
+ _stream.Seek(currentPosition, SeekOrigin.Begin);
+ }
+
+ private void WritePadding(int size)
+ {
+ while (size-- > 0)
+ {
+ _stream.WriteByte(0);
+ }
+ }
+
+ public byte[] GetCode()
+ {
+ // Write jump relative offsets.
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int index = 0; index < _jumps.Count; index++)
+ {
+ Jump jump = _jumps[index];
+
+ long jumpTarget = _blockOffsets[jump.Target.Index];
+
+ long offset = jumpTarget - jump.JumpPosition;
+
+ if (offset < 0)
+ {
+ for (int index2 = index - 1; index2 >= 0; index2--)
+ {
+ Jump jump2 = _jumps[index2];
+
+ if (jump2.JumpPosition < jumpTarget)
+ {
+ break;
+ }
+
+ offset -= jump2.InstSize - ReservedBytesForJump;
+ }
+ }
+ else
+ {
+ for (int index2 = index + 1; index2 < _jumps.Count; index2++)
+ {
+ Jump jump2 = _jumps[index2];
+
+ if (jump2.JumpPosition >= jumpTarget)
+ {
+ break;
+ }
+
+ offset += jump2.InstSize - ReservedBytesForJump;
+ }
+
+ offset -= ReservedBytesForJump;
+ }
+
+ if (jump.IsConditional)
+ {
+ jump.InstSize = Assembler.GetJccLength(offset);
+ }
+ else
+ {
+ jump.InstSize = Assembler.GetJmpLength(offset);
+ }
+
+ // The jump is relative to the next instruction, not the current one.
+ // Since we didn't know the next instruction address when calculating
+ // the offset (as the size of the current jump instruction was not know),
+ // we now need to compensate the offset with the jump instruction size.
+ // It's also worth to note that:
+ // - This is only needed for backward jumps.
+ // - The GetJmpLength and GetJccLength also compensates the offset
+ // internally when computing the jump instruction size.
+ if (offset < 0)
+ {
+ offset -= jump.InstSize;
+ }
+
+ if (jump.RelativeOffset != offset)
+ {
+ modified = true;
+ }
+
+ jump.RelativeOffset = offset;
+
+ _jumps[index] = jump;
+ }
+ }
+ while (modified);
+
+ // Write the code, ignoring the dummy bytes after jumps, into a new stream.
+ _stream.Seek(0, SeekOrigin.Begin);
+
+ using (MemoryStream codeStream = new MemoryStream())
+ {
+ Assembler assembler = new Assembler(codeStream);
+
+ byte[] buffer;
+
+ for (int index = 0; index < _jumps.Count; index++)
+ {
+ Jump jump = _jumps[index];
+
+ buffer = new byte[jump.JumpPosition - _stream.Position];
+
+ _stream.Read(buffer, 0, buffer.Length);
+ _stream.Seek(ReservedBytesForJump, SeekOrigin.Current);
+
+ codeStream.Write(buffer);
+
+ if (jump.IsConditional)
+ {
+ assembler.Jcc(jump.Condition, jump.RelativeOffset);
+ }
+ else
+ {
+ assembler.Jmp(jump.RelativeOffset);
+ }
+ }
+
+ buffer = new byte[_stream.Length - _stream.Position];
+
+ _stream.Read(buffer, 0, buffer.Length);
+
+ codeStream.Write(buffer);
+
+ return codeStream.ToArray();
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
new file mode 100644
index 000000000..ae24b5631
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -0,0 +1,1661 @@
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CodeGenerator
+ {
+ private const int PageSize = 0x1000;
+ private const int StackGuardSize = 0x2000;
+
+ private static Action[] _instTable;
+
+ static CodeGenerator()
+ {
+ _instTable = new Action[EnumUtils.GetCount(typeof(Instruction))];
+
+ Add(Instruction.Add, GenerateAdd);
+ Add(Instruction.BitwiseAnd, GenerateBitwiseAnd);
+ Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr);
+ Add(Instruction.BitwiseNot, GenerateBitwiseNot);
+ Add(Instruction.BitwiseOr, GenerateBitwiseOr);
+ Add(Instruction.Branch, GenerateBranch);
+ Add(Instruction.BranchIfFalse, GenerateBranchIfFalse);
+ Add(Instruction.BranchIfTrue, GenerateBranchIfTrue);
+ Add(Instruction.ByteSwap, GenerateByteSwap);
+ Add(Instruction.Call, GenerateCall);
+ Add(Instruction.Clobber, GenerateClobber);
+ Add(Instruction.CompareAndSwap128, GenerateCompareAndSwap128);
+ Add(Instruction.CompareEqual, GenerateCompareEqual);
+ Add(Instruction.CompareGreater, GenerateCompareGreater);
+ Add(Instruction.CompareGreaterOrEqual, GenerateCompareGreaterOrEqual);
+ Add(Instruction.CompareGreaterOrEqualUI, GenerateCompareGreaterOrEqualUI);
+ Add(Instruction.CompareGreaterUI, GenerateCompareGreaterUI);
+ Add(Instruction.CompareLess, GenerateCompareLess);
+ Add(Instruction.CompareLessOrEqual, GenerateCompareLessOrEqual);
+ Add(Instruction.CompareLessOrEqualUI, GenerateCompareLessOrEqualUI);
+ Add(Instruction.CompareLessUI, GenerateCompareLessUI);
+ Add(Instruction.CompareNotEqual, GenerateCompareNotEqual);
+ Add(Instruction.ConditionalSelect, GenerateConditionalSelect);
+ Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32);
+ Add(Instruction.ConvertToFP, GenerateConvertToFP);
+ Add(Instruction.Copy, GenerateCopy);
+ Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
+ Add(Instruction.CpuId, GenerateCpuId);
+ Add(Instruction.Divide, GenerateDivide);
+ Add(Instruction.DivideUI, GenerateDivideUI);
+ Add(Instruction.Fill, GenerateFill);
+ Add(Instruction.Load, GenerateLoad);
+ Add(Instruction.Load16, GenerateLoad16);
+ Add(Instruction.Load8, GenerateLoad8);
+ Add(Instruction.Multiply, GenerateMultiply);
+ Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI);
+ Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI);
+ Add(Instruction.Negate, GenerateNegate);
+ Add(Instruction.Return, GenerateReturn);
+ Add(Instruction.RotateRight, GenerateRotateRight);
+ Add(Instruction.ShiftLeft, GenerateShiftLeft);
+ Add(Instruction.ShiftRightSI, GenerateShiftRightSI);
+ Add(Instruction.ShiftRightUI, GenerateShiftRightUI);
+ Add(Instruction.SignExtend16, GenerateSignExtend16);
+ Add(Instruction.SignExtend32, GenerateSignExtend32);
+ Add(Instruction.SignExtend8, GenerateSignExtend8);
+ Add(Instruction.Spill, GenerateSpill);
+ Add(Instruction.SpillArg, GenerateSpillArg);
+ Add(Instruction.StackAlloc, GenerateStackAlloc);
+ Add(Instruction.Store, GenerateStore);
+ Add(Instruction.Store16, GenerateStore16);
+ Add(Instruction.Store8, GenerateStore8);
+ Add(Instruction.Subtract, GenerateSubtract);
+ Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar);
+ Add(Instruction.VectorExtract, GenerateVectorExtract);
+ Add(Instruction.VectorExtract16, GenerateVectorExtract16);
+ Add(Instruction.VectorExtract8, GenerateVectorExtract8);
+ Add(Instruction.VectorInsert, GenerateVectorInsert);
+ Add(Instruction.VectorInsert16, GenerateVectorInsert16);
+ Add(Instruction.VectorInsert8, GenerateVectorInsert8);
+ Add(Instruction.VectorOne, GenerateVectorOne);
+ Add(Instruction.VectorZero, GenerateVectorZero);
+ Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64);
+ Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96);
+ Add(Instruction.ZeroExtend16, GenerateZeroExtend16);
+ Add(Instruction.ZeroExtend32, GenerateZeroExtend32);
+ Add(Instruction.ZeroExtend8, GenerateZeroExtend8);
+ }
+
+ private static void Add(Instruction inst, Action func)
+ {
+ _instTable[(int)inst] = func;
+ }
+
+ public static CompiledFunction Generate(CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+
+ Logger.StartPass(PassName.Optimization);
+
+ if ((cctx.Options & CompilerOptions.SsaForm) != 0 &&
+ (cctx.Options & CompilerOptions.Optimize) != 0)
+ {
+ Optimizer.RunPass(cfg);
+ }
+
+ Logger.EndPass(PassName.Optimization, cfg);
+
+ Logger.StartPass(PassName.PreAllocation);
+
+ StackAllocator stackAlloc = new StackAllocator();
+
+ PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs);
+
+ Logger.EndPass(PassName.PreAllocation, cfg);
+
+ Logger.StartPass(PassName.RegisterAllocation);
+
+ if ((cctx.Options & CompilerOptions.SsaForm) != 0)
+ {
+ Ssa.Deconstruct(cfg);
+ }
+
+ IRegisterAllocator regAlloc;
+
+ if ((cctx.Options & CompilerOptions.Lsra) != 0)
+ {
+ regAlloc = new LinearScanAllocator();
+ }
+ else
+ {
+ regAlloc = new HybridAllocator();
+ }
+
+ RegisterMasks regMasks = new RegisterMasks(
+ CallingConvention.GetIntAvailableRegisters(),
+ CallingConvention.GetVecAvailableRegisters(),
+ CallingConvention.GetIntCallerSavedRegisters(),
+ CallingConvention.GetVecCallerSavedRegisters(),
+ CallingConvention.GetIntCalleeSavedRegisters(),
+ CallingConvention.GetVecCalleeSavedRegisters());
+
+ AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
+
+ Logger.EndPass(PassName.RegisterAllocation, cfg);
+
+ Logger.StartPass(PassName.CodeGeneration);
+
+ using (MemoryStream stream = new MemoryStream())
+ {
+ CodeGenContext context = new CodeGenContext(stream, allocResult, maxCallArgs, cfg.Blocks.Count);
+
+ UnwindInfo unwindInfo = WritePrologue(context);
+
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ context.EnterBlock(block);
+
+ foreach (Node node in block.Operations)
+ {
+ if (node is Operation operation)
+ {
+ GenerateOperation(context, operation);
+ }
+ }
+ }
+
+ Logger.EndPass(PassName.CodeGeneration);
+
+ return new CompiledFunction(context.GetCode(), unwindInfo);
+ }
+ }
+
+ private static void GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ if (operation.Instruction == Instruction.Extended)
+ {
+ IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
+
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrinOp.Intrinsic);
+
+ switch (info.Type)
+ {
+ case IntrinsicType.Comis_:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ switch (intrinOp.Intrinsic)
+ {
+ case Intrinsic.X86Comisdeq:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Equal);
+ break;
+
+ case Intrinsic.X86Comisdge:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+ break;
+
+ case Intrinsic.X86Comisdlt:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Below);
+ break;
+
+ case Intrinsic.X86Comisseq:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Equal);
+ break;
+
+ case Intrinsic.X86Comissge:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+ break;
+
+ case Intrinsic.X86Comisslt:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Below);
+ break;
+ }
+
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+
+ break;
+ }
+
+ case IntrinsicType.PopCount:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Popcnt(dest, source, dest.Type);
+
+ break;
+ }
+
+ case IntrinsicType.Unary:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, source);
+
+ break;
+ }
+
+ case IntrinsicType.UnaryToGpr:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
+
+ break;
+ }
+
+ case IntrinsicType.Binary:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger());
+ Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+
+ break;
+ }
+
+ case IntrinsicType.BinaryImm:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte());
+
+ break;
+ }
+
+ case IntrinsicType.Ternary:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src1, src2, src3);
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding)
+ {
+ context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3);
+ }
+ else
+ {
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src3.GetRegister().Index == 0);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+ }
+
+ break;
+ }
+
+ case IntrinsicType.TernaryImm:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src1, src2);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte());
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ Action func = _instTable[(int)operation.Instruction];
+
+ if (func != null)
+ {
+ func(context, operation);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\".");
+ }
+ }
+ }
+
+ private static void GenerateAdd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Add(dest, src2, dest.Type);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Addss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Addsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.And(dest, src2, dest.Type);
+ }
+
+ private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Xor(dest, src2, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Xorps(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseNot(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Not(dest);
+ }
+
+ private static void GenerateBitwiseOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Or(dest, src2, dest.Type);
+ }
+
+ private static void GenerateBranch(CodeGenContext context, Operation operation)
+ {
+ context.JumpTo(context.CurrBlock.Branch);
+ }
+
+ private static void GenerateBranchIfFalse(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(0);
+
+ context.Assembler.Test(source, source, source.Type);
+
+ context.JumpTo(X86Condition.Equal, context.CurrBlock.Branch);
+ }
+
+ private static void GenerateBranchIfTrue(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(0);
+
+ context.Assembler.Test(source, source, source.Type);
+
+ context.JumpTo(X86Condition.NotEqual, context.CurrBlock.Branch);
+ }
+
+ private static void GenerateByteSwap(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Bswap(dest);
+ }
+
+ private static void GenerateCall(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Call(operation.GetSource(0));
+ }
+
+ private static void GenerateClobber(CodeGenContext context, Operation operation)
+ {
+ // This is only used to indicate that a register is clobbered to the
+ // register allocator, we don't need to produce any code.
+ }
+
+ private static void GenerateCompareAndSwap128(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(0);
+
+ MemoryOperand memOp = new MemoryOperand(OperandType.I64, source);
+
+ context.Assembler.Cmpxchg16b(memOp);
+ }
+
+ private static void GenerateCompareEqual(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.Equal);
+ }
+
+ private static void GenerateCompareGreater(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.Greater);
+ }
+
+ private static void GenerateCompareGreaterOrEqual(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.GreaterOrEqual);
+ }
+
+ private static void GenerateCompareGreaterOrEqualUI(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.AboveOrEqual);
+ }
+
+ private static void GenerateCompareGreaterUI(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.Above);
+ }
+
+ private static void GenerateCompareLess(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.Less);
+ }
+
+ private static void GenerateCompareLessOrEqual(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.LessOrEqual);
+ }
+
+ private static void GenerateCompareLessOrEqualUI(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.BelowOrEqual);
+ }
+
+ private static void GenerateCompareLessUI(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.Below);
+ }
+
+ private static void GenerateCompareNotEqual(CodeGenContext context, Operation operation)
+ {
+ GenerateCompare(context, operation, X86Condition.NotEqual);
+ }
+
+ private static void GenerateCompare(CodeGenContext context, Operation operation, X86Condition condition)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(src1, src2);
+
+ Debug.Assert(dest.Type == OperandType.I32);
+
+ context.Assembler.Cmp(src1, src2, src1.Type);
+ context.Assembler.Setcc(dest, condition);
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+ }
+
+ private static void GenerateConditionalSelect(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameReg (dest, src3);
+ EnsureSameType(dest, src2, src3);
+
+ Debug.Assert(dest.Type.IsInteger());
+ Debug.Assert(src1.Type == OperandType.I32);
+
+ context.Assembler.Test (src1, src1, src1.Type);
+ context.Assembler.Cmovcc(dest, src2, dest.Type, X86Condition.NotEqual);
+ }
+
+ private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64);
+
+ context.Assembler.Mov(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateConvertToFP(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+
+ if (dest.Type == OperandType.FP32)
+ {
+ Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP64);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.Xorps (dest, dest, dest);
+ context.Assembler.Cvtsi2ss(dest, dest, source, source.Type);
+ }
+ else /* if (source.Type == OperandType.FP64) */
+ {
+ context.Assembler.Cvtsd2ss(dest, dest, source);
+
+ GenerateZeroUpper96(context, dest, dest);
+ }
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP32);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.Xorps (dest, dest, dest);
+ context.Assembler.Cvtsi2sd(dest, dest, source, source.Type);
+ }
+ else /* if (source.Type == OperandType.FP32) */
+ {
+ context.Assembler.Cvtss2sd(dest, dest, source);
+
+ GenerateZeroUpper64(context, dest, dest);
+ }
+ }
+ }
+
+ private static void GenerateCopy(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
+
+ // Moves to the same register are useless.
+ if (dest.Kind == source.Kind && dest.Value == source.Value)
+ {
+ return;
+ }
+
+ if (dest.Kind == OperandKind.Register &&
+ source.Kind == OperandKind.Constant && source.Value == 0)
+ {
+ // Assemble "mov reg, 0" as "xor reg, reg" as the later is more efficient.
+ context.Assembler.Xor(dest, dest, OperandType.I32);
+ }
+ else if (dest.Type.IsInteger())
+ {
+ context.Assembler.Mov(dest, source, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Movdqu(dest, source);
+ }
+ }
+
+ private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Bsr(dest, source, dest.Type);
+
+ int operandSize = dest.Type == OperandType.I32 ? 32 : 64;
+ int operandMask = operandSize - 1;
+
+ // When the input operand is 0, the result is undefined, however the
+ // ZF flag is set. We are supposed to return the operand size on that
+ // case. So, add an additional jump to handle that case, by moving the
+ // operand size constant to the destination register.
+ context.JumpToNear(X86Condition.NotEqual);
+
+ context.Assembler.Mov(dest, new Operand(operandSize | operandMask), OperandType.I32);
+
+ context.JumpHere();
+
+ // BSR returns the zero based index of the last bit set on the operand,
+ // starting from the least significant bit. However we are supposed to
+ // return the number of 0 bits on the high end. So, we invert the result
+ // of the BSR using XOR to get the correct value.
+ context.Assembler.Xor(dest, new Operand(operandMask), OperandType.I32);
+ }
+
+ private static void GenerateCpuId(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Cpuid();
+ }
+
+ private static void GenerateDivide(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+
+ if (!dest.Type.IsInteger())
+ {
+ ValidateBinOp(dest, dividend, divisor);
+ }
+
+ if (dest.Type.IsInteger())
+ {
+ divisor = operation.GetSource(2);
+
+ EnsureSameType(dest, divisor);
+
+ if (divisor.Type == OperandType.I32)
+ {
+ context.Assembler.Cdq();
+ }
+ else
+ {
+ context.Assembler.Cqo();
+ }
+
+ context.Assembler.Idiv(divisor);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Divss(dest, dividend, divisor);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Divsd(dest, dividend, divisor);
+ }
+ }
+
+ private static void GenerateDivideUI(CodeGenContext context, Operation operation)
+ {
+ Operand divisor = operation.GetSource(2);
+
+ Operand rdx = Register(X86Register.Rdx);
+
+ Debug.Assert(divisor.Type.IsInteger());
+
+ context.Assembler.Xor(rdx, rdx, OperandType.I32);
+ context.Assembler.Div(divisor);
+ }
+
+ private static void GenerateFill(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ MemoryOperand memOp = new MemoryOperand(dest.Type, rsp, null, Multiplier.x1, offs);
+
+ GenerateLoad(context, memOp, dest);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ GenerateLoad(context, address, value);
+ }
+
+ private static void GenerateLoad16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Movzx16(value, address, value.Type);
+ }
+
+ private static void GenerateLoad8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Movzx8(value, address, value.Type);
+ }
+
+ private static void GenerateMultiply(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ if (src2.Kind != OperandKind.Constant)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ EnsureSameType(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ context.Assembler.Imul(dest, src1, src2, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Imul(dest, src2, dest.Type);
+ }
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Mulss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Mulsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(source.Type == OperandType.I64);
+
+ context.Assembler.Imul(source);
+ }
+
+ private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(source.Type == OperandType.I64);
+
+ context.Assembler.Mul(source);
+ }
+
+ private static void GenerateNegate(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Neg(dest);
+ }
+
+ private static void GenerateReturn(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Return();
+ }
+
+ private static void GenerateRotateRight(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Ror(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftLeft(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Shl(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftRightSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Sar(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftRightUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Shr(dest, src2, dest.Type);
+ }
+
+ private static void GenerateSignExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx16(dest, source, dest.Type);
+ }
+
+ private static void GenerateSignExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx32(dest, source, dest.Type);
+ }
+
+ private static void GenerateSignExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx8(dest, source, dest.Type);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, context.CallArgsRegionSize);
+ }
+
+ private static void GenerateSpillArg(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, 0);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset)
+ {
+ Operand offset = operation.GetSource(0);
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + baseOffset;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ MemoryOperand memOp = new MemoryOperand(source.Type, rsp, null, Multiplier.x1, offs);
+
+ GenerateStore(context, memOp, source);
+ }
+
+ private static void GenerateStackAlloc(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ MemoryOperand memOp = new MemoryOperand(OperandType.I64, rsp, null, Multiplier.x1, offs);
+
+ context.Assembler.Lea(dest, memOp, OperandType.I64);
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ GenerateStore(context, address, value);
+ }
+
+ private static void GenerateStore16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Mov16(address, value);
+ }
+
+ private static void GenerateStore8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Mov8(address, value);
+ }
+
+ private static void GenerateSubtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sub(dest, src2, dest.Type);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Subss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Subsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger());
+
+ if (source.Type == OperandType.I32)
+ {
+ context.Assembler.Movd(dest, source);
+ }
+ else /* if (source.Type == OperandType.I64) */
+ {
+ context.Assembler.Movq(dest, source);
+ }
+ }
+
+ private static void GenerateVectorExtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ if (dest.Type == OperandType.I32)
+ {
+ Debug.Assert(index < 4);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrd(dest, src1, index);
+ }
+ else
+ {
+ if (index != 0)
+ {
+ int mask0 = 0b11_10_01_00;
+ int mask1 = 0b11_10_01_00;
+
+ mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
+ mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+ context.Assembler.Pshufd(src1, src1, (byte)mask0);
+ context.Assembler.Movd (dest, src1);
+ context.Assembler.Pshufd(src1, src1, (byte)mask1);
+ }
+ else
+ {
+ context.Assembler.Movd(dest, src1);
+ }
+ }
+ }
+ else if (dest.Type == OperandType.I64)
+ {
+ Debug.Assert(index < 2);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrq(dest, src1, index);
+ }
+ else
+ {
+ if (index != 0)
+ {
+ const byte mask = 0b01_00_11_10;
+
+ context.Assembler.Pshufd(src1, src1, mask);
+ context.Assembler.Movq (dest, src1);
+ context.Assembler.Pshufd(src1, src1, mask);
+ }
+ else
+ {
+ context.Assembler.Movq(dest, src1);
+ }
+ }
+ }
+ else
+ {
+ Debug.Assert(index < (dest.Type == OperandType.FP32 ? 4 : 2));
+
+ // Floating-point types.
+ if ((index >= 2 && dest.Type == OperandType.FP32) ||
+ (index == 1 && dest.Type == OperandType.FP64))
+ {
+ context.Assembler.Movhlps(dest, dest, src1);
+ context.Assembler.Movq (dest, dest);
+ }
+ else
+ {
+ context.Assembler.Movq(dest, src1);
+ }
+
+ if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Pshufd(dest, dest, (byte)(0xfc | (index & 1)));
+ }
+ }
+ }
+
+ private static void GenerateVectorExtract16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 8);
+
+ context.Assembler.Pextrw(dest, src1, index);
+ }
+
+ private static void GenerateVectorExtract8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 16);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrb(dest, src1, index);
+ }
+ else
+ {
+ context.Assembler.Pextrw(dest, src1, (byte)(index >> 1));
+
+ if ((index & 1) != 0)
+ {
+ context.Assembler.Shr(dest, new Operand(8), OperandType.I32);
+ }
+ else
+ {
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+ }
+ }
+ }
+
+ private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ void InsertIntSse2(int words)
+ {
+ if (dest.GetRegister() != src1.GetRegister())
+ {
+ context.Assembler.Movdqu(dest, src1);
+ }
+
+ for (int word = 0; word < words; word++)
+ {
+ // Insert lower 16-bits.
+ context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word));
+
+ // Move next word down.
+ context.Assembler.Ror(src2, new Operand(16), src2.Type);
+ }
+ }
+
+ if (src2.Type == OperandType.I32)
+ {
+ Debug.Assert(index < 4);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pinsrd(dest, src1, src2, index);
+ }
+ else
+ {
+ InsertIntSse2(2);
+ }
+ }
+ else if (src2.Type == OperandType.I64)
+ {
+ Debug.Assert(index < 2);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pinsrq(dest, src1, src2, index);
+ }
+ else
+ {
+ InsertIntSse2(4);
+ }
+ }
+ else if (src2.Type == OperandType.FP32)
+ {
+ Debug.Assert(index < 4);
+
+ if (index != 0)
+ {
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4));
+ }
+ else
+ {
+ if (src1.GetRegister() == src2.GetRegister())
+ {
+ int mask = 0b11_10_01_00;
+
+ mask &= ~(0b11 << index * 2);
+
+ context.Assembler.Pshufd(dest, src1, (byte)mask);
+ }
+ else
+ {
+ int mask0 = 0b11_10_01_00;
+ int mask1 = 0b11_10_01_00;
+
+ mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
+ mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+ context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0.
+ context.Assembler.Movss (dest, src1, src2); // dest[127:0] = src1[127:32] | src2[31:0]
+ context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position.
+
+ if (dest.GetRegister() != src1.GetRegister())
+ {
+ context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1.
+ }
+ }
+ }
+ }
+ else
+ {
+ context.Assembler.Movss(dest, src1, src2);
+ }
+ }
+ else /* if (src2.Type == OperandType.FP64) */
+ {
+ Debug.Assert(index < 2);
+
+ if (index != 0)
+ {
+ context.Assembler.Movlhps(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.Movsd(dest, src1, src2);
+ }
+ }
+ }
+
+ private static void GenerateVectorInsert16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Pinsrw(dest, src1, src2, index);
+ }
+
+ private static void GenerateVectorInsert8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ // It's not possible to emulate this instruction without
+ // SSE 4.1 support without the use of a temporary register,
+ // so we instead handle that case on the pre-allocator when
+ // SSE 4.1 is not supported on the CPU.
+ Debug.Assert(HardwareCapabilities.SupportsSse41);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Pinsrb(dest, src1, src2, index);
+ }
+
+ private static void GenerateVectorOne(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.Pcmpeqw(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZero(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.Xorps(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ GenerateZeroUpper64(context, dest, source);
+ }
+
+ private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ GenerateZeroUpper96(context, dest, source);
+ }
+
+ private static void GenerateZeroExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movzx16(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateZeroExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Mov(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateZeroExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movzx8(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operand address, Operand value)
+ {
+ switch (value.Type)
+ {
+ case OperandType.I32: context.Assembler.Mov (value, address, OperandType.I32); break;
+ case OperandType.I64: context.Assembler.Mov (value, address, OperandType.I64); break;
+ case OperandType.FP32: context.Assembler.Movd (value, address); break;
+ case OperandType.FP64: context.Assembler.Movq (value, address); break;
+ case OperandType.V128: context.Assembler.Movdqu(value, address); break;
+
+ default: Debug.Assert(false); break;
+ }
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operand address, Operand value)
+ {
+ switch (value.Type)
+ {
+ case OperandType.I32: context.Assembler.Mov (address, value, OperandType.I32); break;
+ case OperandType.I64: context.Assembler.Mov (address, value, OperandType.I64); break;
+ case OperandType.FP32: context.Assembler.Movd (address, value); break;
+ case OperandType.FP64: context.Assembler.Movq (address, value); break;
+ case OperandType.V128: context.Assembler.Movdqu(address, value); break;
+
+ default: Debug.Assert(false); break;
+ }
+ }
+
+ private static void GenerateZeroUpper64(CodeGenContext context, Operand dest, Operand source)
+ {
+ context.Assembler.Movq(dest, source);
+ }
+
+ private static void GenerateZeroUpper96(CodeGenContext context, Operand dest, Operand source)
+ {
+ context.Assembler.Movq(dest, source);
+ context.Assembler.Pshufd(dest, dest, 0xfc);
+ }
+
+ private static void ValidateUnOp(Operand dest, Operand source)
+ {
+#if DEBUG
+ EnsureSameReg (dest, source);
+ EnsureSameType(dest, source);
+#endif
+ }
+
+ private static void ValidateBinOp(Operand dest, Operand src1, Operand src2)
+ {
+#if DEBUG
+ EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1, src2);
+#endif
+ }
+
+ private static void ValidateShift(Operand dest, Operand src1, Operand src2)
+ {
+#if DEBUG
+ EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32);
+#endif
+ }
+
+ private static void EnsureSameReg(Operand op1, Operand op2)
+ {
+ if (!op1.Type.IsInteger() && HardwareCapabilities.SupportsVexEncoding)
+ {
+ return;
+ }
+
+ Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory);
+ Debug.Assert(op1.Kind == op2.Kind);
+ Debug.Assert(op1.Value == op2.Value);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ Debug.Assert(op1.Type == op4.Type);
+ }
+
+ private static UnwindInfo WritePrologue(CodeGenContext context)
+ {
+ List pushEntries = new List();
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.LowestBitSet(mask);
+
+ context.Assembler.Push(Register((X86Register)bit));
+
+ pushEntries.Add(new UnwindPushEntry(bit, RegisterType.Integer, context.StreamOffset));
+
+ mask &= ~(1 << bit);
+ }
+
+ int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+ reservedStackSize += context.XmmSaveRegionSize;
+
+ if (reservedStackSize >= StackGuardSize)
+ {
+ GenerateInlineStackProbe(context, reservedStackSize);
+ }
+
+ if (reservedStackSize != 0)
+ {
+ context.Assembler.Sub(rsp, new Operand(reservedStackSize), OperandType.I64);
+ }
+
+ int offset = reservedStackSize;
+
+ mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.LowestBitSet(mask);
+
+ offset -= 16;
+
+ MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset);
+
+ context.Assembler.Movdqu(memOp, Xmm((X86Register)bit));
+
+ pushEntries.Add(new UnwindPushEntry(bit, RegisterType.Vector, context.StreamOffset));
+
+ mask &= ~(1 << bit);
+ }
+
+ return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset, reservedStackSize);
+ }
+
+ private static void WriteEpilogue(CodeGenContext context)
+ {
+ Operand rsp = Register(X86Register.Rsp);
+
+ int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+ reservedStackSize += context.XmmSaveRegionSize;
+
+ int offset = reservedStackSize;
+
+ int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.LowestBitSet(mask);
+
+ offset -= 16;
+
+ MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset);
+
+ context.Assembler.Movdqu(Xmm((X86Register)bit), memOp);
+
+ mask &= ~(1 << bit);
+ }
+
+ if (reservedStackSize != 0)
+ {
+ context.Assembler.Add(rsp, new Operand(reservedStackSize), OperandType.I64);
+ }
+
+ mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.HighestBitSet(mask);
+
+ context.Assembler.Pop(Register((X86Register)bit));
+
+ mask &= ~(1 << bit);
+ }
+ }
+
+ private static void GenerateInlineStackProbe(CodeGenContext context, int size)
+ {
+ // Windows does lazy stack allocation, and there are just 2
+ // guard pages on the end of the stack. So, if the allocation
+ // size we make is greater than this guard size, we must ensure
+ // that the OS will map all pages that we'll use. We do that by
+ // doing a dummy read on those pages, forcing a page fault and
+ // the OS to map them. If they are already mapped, nothing happens.
+ const int pageMask = PageSize - 1;
+
+ size = (size + pageMask) & ~pageMask;
+
+ Operand rsp = Register(X86Register.Rsp);
+ Operand temp = Register(CallingConvention.GetIntReturnRegister());
+
+ for (int offset = PageSize; offset < size; offset += PageSize)
+ {
+ Operand memOp = new MemoryOperand(OperandType.I32, rsp, null, Multiplier.x1, -offset);;
+
+ context.Assembler.Mov(temp, memOp, OperandType.I32);
+ }
+ }
+
+ private static MemoryOperand Memory(Operand operand, OperandType type)
+ {
+ if (operand.Kind == OperandKind.Memory)
+ {
+ return operand as MemoryOperand;
+ }
+
+ return new MemoryOperand(type, operand);
+ }
+
+ private static Operand Register(X86Register register, OperandType type = OperandType.I64)
+ {
+ return new Operand((int)register, RegisterType.Integer, type);
+ }
+
+ private static Operand Xmm(X86Register register)
+ {
+ return new Operand((int)register, RegisterType.Vector, OperandType.V128);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
new file mode 100644
index 000000000..7f930d6b9
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
@@ -0,0 +1,52 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class HardwareCapabilities
+ {
+ private delegate ulong GetFeatureInfo();
+
+ private static ulong _featureInfo;
+
+ public static bool SupportsSse3 => (_featureInfo & (1UL << 0)) != 0;
+ public static bool SupportsPclmulqdq => (_featureInfo & (1UL << 1)) != 0;
+ public static bool SupportsSsse3 => (_featureInfo & (1UL << 9)) != 0;
+ public static bool SupportsFma => (_featureInfo & (1UL << 12)) != 0;
+ public static bool SupportsCx16 => (_featureInfo & (1UL << 13)) != 0;
+ public static bool SupportsSse41 => (_featureInfo & (1UL << 19)) != 0;
+ public static bool SupportsSse42 => (_featureInfo & (1UL << 20)) != 0;
+ public static bool SupportsPopcnt => (_featureInfo & (1UL << 23)) != 0;
+ public static bool SupportsAesni => (_featureInfo & (1UL << 25)) != 0;
+ public static bool SupportsAvx => (_featureInfo & (1UL << 28)) != 0;
+ public static bool SupportsF16c => (_featureInfo & (1UL << 29)) != 0;
+
+ public static bool SupportsSse => (_featureInfo & (1UL << 32 + 25)) != 0;
+ public static bool SupportsSse2 => (_featureInfo & (1UL << 32 + 26)) != 0;
+
+ public static bool ForceLegacySse { get; set; }
+
+ public static bool SupportsVexEncoding => !ForceLegacySse && SupportsAvx;
+
+ static HardwareCapabilities()
+ {
+ EmitterContext context = new EmitterContext();
+
+ Operand featureInfo = context.CpuId();
+
+ context.Return(featureInfo);
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[0];
+
+ GetFeatureInfo getFeatureInfo = Compiler.Compile(
+ cfg,
+ argTypes,
+ OperandType.I64,
+ CompilerOptions.HighCq);
+
+ _featureInfo = getFeatureInfo();
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs b/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
new file mode 100644
index 000000000..b1af352bc
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ struct IntrinsicInfo
+ {
+ public X86Instruction Inst { get; }
+ public IntrinsicType Type { get; }
+
+ public IntrinsicInfo(X86Instruction inst, IntrinsicType type)
+ {
+ Inst = inst;
+ Type = type;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
new file mode 100644
index 000000000..e225f2542
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -0,0 +1,160 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class IntrinsicTable
+ {
+ private const int BadOp = 0;
+
+ private static IntrinsicInfo[] _intrinTable;
+
+ static IntrinsicTable()
+ {
+ _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
+
+ Add(Intrinsic.X86Addpd, new IntrinsicInfo(X86Instruction.Addpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Cmppd, new IntrinsicInfo(X86Instruction.Cmppd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpps, new IntrinsicInfo(X86Instruction.Cmpps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpsd, new IntrinsicInfo(X86Instruction.Cmpsd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpss, new IntrinsicInfo(X86Instruction.Cmpss, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Comisdeq, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisdge, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisdlt, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtpd2ps, new IntrinsicInfo(X86Instruction.Cvtpd2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtps2dq, new IntrinsicInfo(X86Instruction.Cvtps2dq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr));
+ Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divsd, new IntrinsicInfo(X86Instruction.Divsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divss, new IntrinsicInfo(X86Instruction.Divss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxss, new IntrinsicInfo(X86Instruction.Maxss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minpd, new IntrinsicInfo(X86Instruction.Minpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minps, new IntrinsicInfo(X86Instruction.Minps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minsd, new IntrinsicInfo(X86Instruction.Minsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pavgw, new IntrinsicInfo(X86Instruction.Pavgw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pblendvb, new IntrinsicInfo(X86Instruction.Pblendvb, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Pcmpeqb, new IntrinsicInfo(X86Instruction.Pcmpeqb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqd, new IntrinsicInfo(X86Instruction.Pcmpeqd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqq, new IntrinsicInfo(X86Instruction.Pcmpeqq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqw, new IntrinsicInfo(X86Instruction.Pcmpeqw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtb, new IntrinsicInfo(X86Instruction.Pcmpgtb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtd, new IntrinsicInfo(X86Instruction.Pcmpgtd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtq, new IntrinsicInfo(X86Instruction.Pcmpgtq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtw, new IntrinsicInfo(X86Instruction.Pcmpgtw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsb, new IntrinsicInfo(X86Instruction.Pmaxsb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsd, new IntrinsicInfo(X86Instruction.Pmaxsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsw, new IntrinsicInfo(X86Instruction.Pmaxsw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxub, new IntrinsicInfo(X86Instruction.Pmaxub, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxud, new IntrinsicInfo(X86Instruction.Pmaxud, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxuw, new IntrinsicInfo(X86Instruction.Pmaxuw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsb, new IntrinsicInfo(X86Instruction.Pminsb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsd, new IntrinsicInfo(X86Instruction.Pminsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsw, new IntrinsicInfo(X86Instruction.Pminsw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminub, new IntrinsicInfo(X86Instruction.Pminub, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminud, new IntrinsicInfo(X86Instruction.Pminud, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminuw, new IntrinsicInfo(X86Instruction.Pminuw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmovsxbw, new IntrinsicInfo(X86Instruction.Pmovsxbw, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovsxdq, new IntrinsicInfo(X86Instruction.Pmovsxdq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovsxwd, new IntrinsicInfo(X86Instruction.Pmovsxwd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxbw, new IntrinsicInfo(X86Instruction.Pmovzxbw, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxdq, new IntrinsicInfo(X86Instruction.Pmovzxdq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxwd, new IntrinsicInfo(X86Instruction.Pmovzxwd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmulld, new IntrinsicInfo(X86Instruction.Pmulld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmullw, new IntrinsicInfo(X86Instruction.Pmullw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount));
+ Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psllw, new IntrinsicInfo(X86Instruction.Psllw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrad, new IntrinsicInfo(X86Instruction.Psrad, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psraw, new IntrinsicInfo(X86Instruction.Psraw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrld, new IntrinsicInfo(X86Instruction.Psrld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrlq, new IntrinsicInfo(X86Instruction.Psrlq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrldq, new IntrinsicInfo(X86Instruction.Psrldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrlw, new IntrinsicInfo(X86Instruction.Psrlw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubb, new IntrinsicInfo(X86Instruction.Psubb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubd, new IntrinsicInfo(X86Instruction.Psubd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubq, new IntrinsicInfo(X86Instruction.Psubq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubw, new IntrinsicInfo(X86Instruction.Psubw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhbw, new IntrinsicInfo(X86Instruction.Punpckhbw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhdq, new IntrinsicInfo(X86Instruction.Punpckhdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhqdq, new IntrinsicInfo(X86Instruction.Punpckhqdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhwd, new IntrinsicInfo(X86Instruction.Punpckhwd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklbw, new IntrinsicInfo(X86Instruction.Punpcklbw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckldq, new IntrinsicInfo(X86Instruction.Punpckldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklqdq, new IntrinsicInfo(X86Instruction.Punpcklqdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklwd, new IntrinsicInfo(X86Instruction.Punpcklwd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pxor, new IntrinsicInfo(X86Instruction.Pxor, IntrinsicType.Binary));
+ Add(Intrinsic.X86Rcpps, new IntrinsicInfo(X86Instruction.Rcpps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Rcpss, new IntrinsicInfo(X86Instruction.Rcpss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Roundpd, new IntrinsicInfo(X86Instruction.Roundpd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundps, new IntrinsicInfo(X86Instruction.Roundps, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundsd, new IntrinsicInfo(X86Instruction.Roundsd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subss, new IntrinsicInfo(X86Instruction.Subss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpckhpd, new IntrinsicInfo(X86Instruction.Unpckhpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpckhps, new IntrinsicInfo(X86Instruction.Unpckhps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpcklpd, new IntrinsicInfo(X86Instruction.Unpcklpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary));
+ }
+
+ private static void Add(Intrinsic intrin, IntrinsicInfo info)
+ {
+ _intrinTable[(int)intrin] = info;
+ }
+
+ public static IntrinsicInfo GetInfo(Intrinsic intrin)
+ {
+ return _intrinTable[(int)intrin];
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/ARMeilleure/CodeGen/X86/IntrinsicType.cs
new file mode 100644
index 000000000..4e9b33e1e
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/IntrinsicType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum IntrinsicType
+ {
+ Comis_,
+ PopCount,
+ Unary,
+ UnaryToGpr,
+ Binary,
+ BinaryImm,
+ Ternary,
+ TernaryImm
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs
new file mode 100644
index 000000000..a14901311
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs
@@ -0,0 +1,1280 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ using LLNode = LinkedListNode;
+
+ static class PreAllocator
+ {
+ public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
+ {
+ maxCallArgs = -1;
+
+ CallConvName callConv = CallingConvention.GetCurrentCallConv();
+
+ Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
+
+ foreach (BasicBlock block in cctx.Cfg.Blocks)
+ {
+ LLNode nextNode;
+
+ for (LLNode node = block.Operations.First; node != null; node = nextNode)
+ {
+ nextNode = node.Next;
+
+ if (!(node.Value is Operation operation))
+ {
+ continue;
+ }
+
+ HandleConstantCopy(node, operation);
+
+ HandleSameDestSrc1Copy(node, operation);
+
+ HandleFixedRegisterCopy(node, operation);
+
+ switch (operation.Instruction)
+ {
+ case Instruction.Call:
+ // Get the maximum number of arguments used on a call.
+ // On windows, when a struct is returned from the call,
+ // we also need to pass the pointer where the struct
+ // should be written on the first argument.
+ int argsCount = operation.SourcesCount - 1;
+
+ if (operation.Destination != null && operation.Destination.Type == OperandType.V128)
+ {
+ argsCount++;
+ }
+
+ if (maxCallArgs < argsCount)
+ {
+ maxCallArgs = argsCount;
+ }
+
+ // Copy values to registers expected by the function
+ // being called, as mandated by the ABI.
+ if (callConv == CallConvName.Windows)
+ {
+ node = HandleCallWindowsAbi(stackAlloc, node, operation);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ node = HandleCallSystemVAbi(node, operation);
+ }
+ break;
+
+ case Instruction.ConvertToFPUI:
+ HandleConvertToFPUI(node, operation);
+ break;
+
+ case Instruction.LoadArgument:
+ if (callConv == CallConvName.Windows)
+ {
+ HandleLoadArgumentWindowsAbi(cctx, node, preservedArgs, operation);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ HandleLoadArgumentSystemVAbi(cctx, node, preservedArgs, operation);
+ }
+ break;
+
+ case Instruction.Negate:
+ if (!operation.GetSource(0).Type.IsInteger())
+ {
+ node = HandleNegate(node, operation);
+ }
+ break;
+
+ case Instruction.Return:
+ if (callConv == CallConvName.Windows)
+ {
+ HandleReturnWindowsAbi(cctx, node, preservedArgs, operation);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ HandleReturnSystemVAbi(node, operation);
+ }
+ break;
+
+ case Instruction.VectorInsert8:
+ if (!HardwareCapabilities.SupportsSse41)
+ {
+ node = HandleVectorInsert8(node, operation);
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ private static void HandleConstantCopy(LLNode node, Operation operation)
+ {
+ if (operation.SourcesCount == 0 || IsIntrinsic(operation.Instruction))
+ {
+ return;
+ }
+
+ Instruction inst = operation.Instruction;
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2;
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ if (!src1.Type.IsInteger())
+ {
+ // Handle non-integer types (FP32, FP64 and V128).
+ // For instructions without an immediate operand, we do the following:
+ // - Insert a copy with the constant value (as integer) to a GPR.
+ // - Insert a copy from the GPR to a XMM register.
+ // - Replace the constant use with the XMM register.
+ src1 = AddXmmCopy(node, src1);
+
+ operation.SetSource(0, src1);
+ }
+ else if (!HasConstSrc1(inst))
+ {
+ // Handle integer types.
+ // Most ALU instructions accepts a 32-bits immediate on the second operand.
+ // We need to ensure the following:
+ // - If the constant is on operand 1, we need to move it.
+ // -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
+ // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
+ // - If the constant is on operand 2, we check if the instruction supports it,
+ // if not, we also add a copy. 64-bits constants are usually not supported.
+ if (IsCommutative(inst))
+ {
+ src2 = operation.GetSource(1);
+
+ Operand temp = src1;
+
+ src1 = src2;
+ src2 = temp;
+
+ operation.SetSource(0, src1);
+ operation.SetSource(1, src2);
+ }
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ src1 = AddCopy(node, src1);
+
+ operation.SetSource(0, src1);
+ }
+ }
+ }
+
+ if (operation.SourcesCount < 2)
+ {
+ return;
+ }
+
+ src2 = operation.GetSource(1);
+
+ if (src2.Kind == OperandKind.Constant)
+ {
+ if (!src2.Type.IsInteger())
+ {
+ src2 = AddXmmCopy(node, src2);
+
+ operation.SetSource(1, src2);
+ }
+ else if (!HasConstSrc2(inst) || IsLongConst(src2))
+ {
+ src2 = AddCopy(node, src2);
+
+ operation.SetSource(1, src2);
+ }
+ }
+ }
+
+ private static LLNode HandleFixedRegisterCopy(LLNode node, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ LinkedList nodes = node.List;
+
+ switch (operation.Instruction)
+ {
+ case Instruction.CompareAndSwap128:
+ {
+ // Handle the many restrictions of the compare and exchange (16 bytes) instruction:
+ // - The expected value should be in RDX:RAX.
+ // - The new value to be written should be in RCX:RBX.
+ // - The value at the memory location is loaded to RDX:RAX.
+ void SplitOperand(Operand source, Operand lr, Operand hr)
+ {
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, lr, source, Const(0)));
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1)));
+ }
+
+ Operand rax = Gpr(X86Register.Rax, OperandType.I64);
+ Operand rbx = Gpr(X86Register.Rbx, OperandType.I64);
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
+ Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
+
+ SplitOperand(operation.GetSource(1), rax, rdx);
+ SplitOperand(operation.GetSource(2), rbx, rcx);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, rax));
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1)));
+
+ operation.SetDestinations(new Operand[] { rdx, rax });
+
+ operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx });
+
+ break;
+ }
+
+ case Instruction.CpuId:
+ {
+ // Handle the many restrictions of the CPU Id instruction:
+ // - EAX controls the information returned by this instruction.
+ // - When EAX is 1, feature information is returned.
+ // - The information is written to registers EAX, EBX, ECX and EDX.
+ Debug.Assert(dest.Type == OperandType.I64);
+
+ Operand eax = Gpr(X86Register.Rax, OperandType.I32);
+ Operand ebx = Gpr(X86Register.Rbx, OperandType.I32);
+ Operand ecx = Gpr(X86Register.Rcx, OperandType.I32);
+ Operand edx = Gpr(X86Register.Rdx, OperandType.I32);
+
+ // Value 0x01 = Version, family and feature information.
+ nodes.AddBefore(node, new Operation(Instruction.Copy, eax, Const(1)));
+
+ // Copy results to the destination register.
+ // The values are split into 2 32-bits registers, we merge them
+ // into a single 64-bits register.
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, dest, edx));
+ node = nodes.AddAfter(node, new Operation(Instruction.ShiftLeft, dest, dest, Const(32)));
+ node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, dest, dest, rcx));
+
+ operation.SetDestinations(new Operand[] { eax, ebx, ecx, edx });
+
+ operation.SetSources(new Operand[] { eax });
+
+ break;
+ }
+
+ case Instruction.Divide:
+ case Instruction.DivideUI:
+ {
+ // Handle the many restrictions of the division instructions:
+ // - The dividend is always in RDX:RAX.
+ // - The result is always in RAX.
+ // - Additionally it also writes the remainder in RDX.
+ if (dest.Type.IsInteger())
+ {
+ Operand src1 = operation.GetSource(0);
+
+ Operand rax = Gpr(X86Register.Rax, src1.Type);
+ Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, rax, src1));
+ nodes.AddBefore(node, new Operation(Instruction.Clobber, rdx));
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rax));
+
+ operation.SetDestinations(new Operand[] { rdx, rax });
+
+ operation.SetSources(new Operand[] { rdx, rax, operation.GetSource(1) });
+
+ operation.Destination = rax;
+ }
+
+ break;
+ }
+
+ case Instruction.Extended:
+ {
+ IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
+
+ // PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
+ if (intrinOp.Intrinsic == Intrinsic.X86Pblendvb && !HardwareCapabilities.SupportsVexEncoding)
+ {
+ Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, xmm0, operation.GetSource(2)));
+
+ operation.SetSource(2, xmm0);
+ }
+
+ break;
+ }
+
+ case Instruction.Multiply64HighSI:
+ case Instruction.Multiply64HighUI:
+ {
+ // Handle the many restrictions of the i64 * i64 = i128 multiply instructions:
+ // - The multiplicand is always in RAX.
+ // - The lower 64-bits of the result is always in RAX.
+ // - The higher 64-bits of the result is always in RDX.
+ Operand src1 = operation.GetSource(0);
+
+ Operand rax = Gpr(X86Register.Rax, src1.Type);
+ Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, rax, src1));
+
+ operation.SetSource(0, rax);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rdx));
+
+ operation.SetDestinations(new Operand[] { rdx, rax });
+
+ break;
+ }
+
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ {
+ // The shift register is always implied to be CL (low 8-bits of RCX or ECX).
+ if (operation.GetSource(1).Kind == OperandKind.LocalVariable)
+ {
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I32);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, rcx, operation.GetSource(1)));
+
+ operation.SetSource(1, rcx);
+ }
+
+ break;
+ }
+ }
+
+ return node;
+ }
+
+ private static LLNode HandleSameDestSrc1Copy(LLNode node, Operation operation)
+ {
+ if (operation.Destination == null || operation.SourcesCount == 0)
+ {
+ return node;
+ }
+
+ Instruction inst = operation.Instruction;
+
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+
+ LinkedList nodes = node.List;
+
+ // The multiply instruction (that maps to IMUL) is somewhat special, it has
+ // a three operand form where the second source is a immediate value.
+ bool threeOperandForm = inst == Instruction.Multiply && operation.GetSource(1).Kind == OperandKind.Constant;
+
+ if (IsSameOperandDestSrc1(operation) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm)
+ {
+ bool useNewLocal = false;
+
+ for (int srcIndex = 1; srcIndex < operation.SourcesCount; srcIndex++)
+ {
+ if (operation.GetSource(srcIndex) == dest)
+ {
+ useNewLocal = true;
+
+ break;
+ }
+ }
+
+ if (useNewLocal)
+ {
+ // Dest is being used as some source already, we need to use a new
+ // local to store the temporary value, otherwise the value on dest
+ // local would be overwritten.
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, temp, src1));
+
+ operation.SetSource(0, temp);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, temp));
+
+ operation.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, new Operation(Instruction.Copy, dest, src1));
+
+ operation.SetSource(0, dest);
+ }
+ }
+ else if (inst == Instruction.ConditionalSelect)
+ {
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ if (src1 == dest || src2 == dest)
+ {
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, temp, src3));
+
+ operation.SetSource(2, temp);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, temp));
+
+ operation.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, new Operation(Instruction.Copy, dest, src3));
+
+ operation.SetSource(2, dest);
+ }
+ }
+
+ return node;
+ }
+
+ private static LLNode HandleConvertToFPUI(LLNode node, Operation operation)
+ {
+ // Unsigned integer to FP conversions are not supported on X86.
+ // We need to turn them into signed integer to FP conversions, and
+ // adjust the final result.
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\".");
+
+ LinkedList nodes = node.List;
+
+ LLNode currentNode = node;
+
+ if (source.Type == OperandType.I32)
+ {
+ // For 32-bits integers, we can just zero-extend to 64-bits,
+ // and then use the 64-bits signed conversion instructions.
+ Operand zex = Local(OperandType.I64);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, zex, source));
+ node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, dest, zex));
+ }
+ else /* if (source.Type == OperandType.I64) */
+ {
+ // For 64-bits integers, we need to do the following:
+ // - Ensure that the integer has the most significant bit clear.
+ // -- This can be done by shifting the value right by 1, that is, dividing by 2.
+ // -- The least significant bit is lost in this case though.
+ // - We can then convert the shifted value with a signed integer instruction.
+ // - The result still needs to be corrected after that.
+ // -- First, we need to multiply the result by 2, as we divided it by 2 before.
+ // --- This can be done efficiently by adding the result to itself.
+ // -- Then, we need to add the least significant bit that was shifted out.
+ // --- We can convert the least significant bit to float, and add it to the result.
+ Operand lsb = Local(OperandType.I64);
+ Operand half = Local(OperandType.I64);
+
+ Operand lsbF = Local(dest.Type);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, lsb, source));
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, half, source));
+
+ node = nodes.AddAfter(node, new Operation(Instruction.BitwiseAnd, lsb, lsb, Const(1L)));
+ node = nodes.AddAfter(node, new Operation(Instruction.ShiftRightUI, half, half, Const(1)));
+
+ node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, lsbF, lsb));
+ node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, dest, half));
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, dest));
+ node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, lsbF));
+ }
+
+ Delete(currentNode, operation);
+
+ return node;
+ }
+
+ private static LLNode HandleNegate(LLNode node, Operation operation)
+ {
+ // There's no SSE FP negate instruction, so we need to transform that into
+ // a XOR of the value to be negated with a mask with the highest bit set.
+ // This also produces -0 for a negation of the value 0.
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 ||
+ dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\".");
+
+ LinkedList nodes = node.List;
+
+ LLNode currentNode = node;
+
+ Operand res = Local(dest.Type);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorOne, res));
+
+ if (dest.Type == OperandType.FP32)
+ {
+ node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Pslld, res, res, Const(31)));
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Psllq, res, res, Const(63)));
+ }
+
+ node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Xorps, res, res, source));
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, res));
+
+ Delete(currentNode, operation);
+
+ return node;
+ }
+
+ private static LLNode HandleVectorInsert8(LLNode node, Operation operation)
+ {
+ // Handle vector insertion, when SSE 4.1 is not supported.
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ Debug.Assert(index < 16);
+
+ LinkedList nodes = node.List;
+
+ LLNode currentNode = node;
+
+ Operand temp1 = Local(OperandType.I32);
+ Operand temp2 = Local(OperandType.I32);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.Copy, temp2, src2));
+
+ Operation vextOp = new Operation(Instruction.VectorExtract16, temp1, src1, Const(index >> 1));
+
+ node = nodes.AddAfter(node, vextOp);
+
+ if ((index & 1) != 0)
+ {
+ node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend8, temp1, temp1));
+ node = nodes.AddAfter(node, new Operation(Instruction.ShiftLeft, temp2, temp2, Const(8)));
+ node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, temp1, temp1, temp2));
+ }
+ else
+ {
+ node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend8, temp2, temp2));
+ node = nodes.AddAfter(node, new Operation(Instruction.BitwiseAnd, temp1, temp1, Const(0xff00)));
+ node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr, temp1, temp1, temp2));
+ }
+
+ Operation vinsOp = new Operation(Instruction.VectorInsert16, dest, src1, temp1, Const(index >> 1));
+
+ node = nodes.AddAfter(node, vinsOp);
+
+ Delete(currentNode, operation);
+
+ return node;
+ }
+
+ private static LLNode HandleCallWindowsAbi(StackAllocator stackAlloc, LLNode node, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ LinkedList nodes = node.List;
+
+ // Handle struct arguments.
+ int retArgs = 0;
+
+ int stackAllocOffset = 0;
+
+ int AllocateOnStack(int size)
+ {
+ // We assume that the stack allocator is initially empty (TotalSize = 0).
+ // Taking that into account, we can reuse the space allocated for other
+ // calls by keeping track of our own allocated size (stackAllocOffset).
+ // If the space allocated is not big enough, then we just expand it.
+ int offset = stackAllocOffset;
+
+ if (stackAllocOffset + size > stackAlloc.TotalSize)
+ {
+ stackAlloc.Allocate((stackAllocOffset + size) - stackAlloc.TotalSize);
+ }
+
+ stackAllocOffset += size;
+
+ return offset;
+ }
+
+ Operand arg0Reg = null;
+
+ if (dest != null && dest.Type == OperandType.V128)
+ {
+ int stackOffset = AllocateOnStack(dest.Type.GetSizeInBytes());
+
+ arg0Reg = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+ Operation allocOp = new Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset));
+
+ nodes.AddBefore(node, allocOp);
+
+ retArgs = 1;
+ }
+
+ int argsCount = operation.SourcesCount - 1;
+
+ int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs;
+
+ if (argsCount > maxArgs)
+ {
+ argsCount = maxArgs;
+ }
+
+ Operand[] sources = new Operand[1 + retArgs + argsCount];
+
+ sources[0] = operation.GetSource(0);
+
+ if (arg0Reg != null)
+ {
+ sources[1] = arg0Reg;
+ }
+
+ for (int index = 1; index < operation.SourcesCount; index++)
+ {
+ Operand source = operation.GetSource(index);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand stackAddr = Local(OperandType.I64);
+
+ int stackOffset = AllocateOnStack(source.Type.GetSizeInBytes());
+
+ nodes.AddBefore(node, new Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset)));
+
+ Operation storeOp = new Operation(Instruction.Store, null, stackAddr, source);
+
+ HandleConstantCopy(nodes.AddBefore(node, storeOp), storeOp);
+
+ operation.SetSource(index, stackAddr);
+ }
+ }
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(index + 1);
+
+ Operand argReg;
+
+ int argIndex = index + retArgs;
+
+ if (source.Type.IsInteger())
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(argIndex), source.Type);
+ }
+ else
+ {
+ argReg = Xmm(CallingConvention.GetVecArgumentRegister(argIndex), source.Type);
+ }
+
+ Operation copyOp = new Operation(Instruction.Copy, argReg, source);
+
+ HandleConstantCopy(nodes.AddBefore(node, copyOp), copyOp);
+
+ sources[1 + retArgs + index] = argReg;
+ }
+
+ // The remaining arguments (those that are not passed on registers)
+ // should be passed on the stack, we write them to the stack with "SpillArg".
+ for (int index = argsCount; index < operation.SourcesCount - 1; index++)
+ {
+ Operand source = operation.GetSource(index + 1);
+
+ Operand offset = new Operand((index + retArgs) * 8);
+
+ Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source);
+
+ HandleConstantCopy(nodes.AddBefore(node, spillOp), spillOp);
+ }
+
+ if (dest != null)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retValueAddr = Local(OperandType.I64);
+
+ nodes.AddBefore(node, new Operation(Instruction.Copy, retValueAddr, arg0Reg));
+
+ Operation loadOp = new Operation(Instruction.Load, dest, retValueAddr);
+
+ node = nodes.AddAfter(node, loadOp);
+
+ operation.Destination = null;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, dest, retReg);
+
+ node = nodes.AddAfter(node, copyOp);
+
+ operation.Destination = retReg;
+ }
+ }
+
+ operation.SetSources(sources);
+
+ return node;
+ }
+
+ private static LLNode HandleCallSystemVAbi(LLNode node, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ LinkedList nodes = node.List;
+
+ List sources = new List();
+
+ sources.Add(operation.GetSource(0));
+
+ int argsCount = operation.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetIntArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetVecArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ int stackOffset = 0;
+
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(index + 1);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < intMax;
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, argReg, source);
+
+ HandleConstantCopy(nodes.AddBefore(node, copyOp), copyOp);
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ Operand offset = new Operand(stackOffset);
+
+ Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source);
+
+ HandleConstantCopy(nodes.AddBefore(node, spillOp), spillOp);
+
+ stackOffset += source.Type.GetSizeInBytes();
+ }
+ }
+
+ if (dest != null)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, retLReg));
+ node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
+
+ operation.Destination = null;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, dest, retReg);
+
+ node = nodes.AddAfter(node, copyOp);
+
+ operation.Destination = retReg;
+ }
+ }
+
+ operation.SetSources(sources.ToArray());
+
+ return node;
+ }
+
+ private static void HandleLoadArgumentWindowsAbi(
+ CompilerContext cctx,
+ LLNode node,
+ Operand[] preservedArgs,
+ Operation operation)
+ {
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int retArgs = cctx.FuncReturnType == OperandType.V128 ? 1 : 0;
+
+ int index = source.AsInt32() + retArgs;
+
+ if (index < CallingConvention.GetArgumentsOnRegsCount())
+ {
+ Operand dest = operation.Destination;
+
+ if (preservedArgs[index] == null)
+ {
+ Operand argReg, pArg;
+
+ if (dest.Type.IsInteger())
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), dest.Type);
+
+ pArg = Local(dest.Type);
+ }
+ else if (dest.Type == OperandType.V128)
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), OperandType.I64);
+
+ pArg = Local(OperandType.I64);
+ }
+ else
+ {
+ argReg = Xmm(CallingConvention.GetVecArgumentRegister(index), dest.Type);
+
+ pArg = Local(dest.Type);
+ }
+
+ Operation copyOp = new Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+
+ Operation argCopyOp = new Operation(dest.Type == OperandType.V128
+ ? Instruction.Load
+ : Instruction.Copy, dest, preservedArgs[index]);
+
+ node.List.AddBefore(node, argCopyOp);
+
+ Delete(node, operation);
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ }
+ }
+
+ private static void HandleLoadArgumentSystemVAbi(
+ CompilerContext cctx,
+ LLNode node,
+ Operand[] preservedArgs,
+ Operation operation)
+ {
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int index = source.AsInt32();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ for (int cIndex = 0; cIndex < index; cIndex++)
+ {
+ OperandType argType = cctx.FuncArgTypes[cIndex];
+
+ if (argType.IsInteger())
+ {
+ intCount++;
+ }
+ else if (argType == OperandType.V128)
+ {
+ intCount += 2;
+ }
+ else
+ {
+ vecCount++;
+ }
+ }
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < CallingConvention.GetIntArgumentsOnRegsCount();
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < CallingConvention.GetIntArgumentsOnRegsCount();
+ }
+ else
+ {
+ passOnReg = vecCount < CallingConvention.GetVecArgumentsOnRegsCount();
+ }
+
+ if (passOnReg)
+ {
+ Operand dest = operation.Destination;
+
+ if (preservedArgs[index] == null)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand pArg = Local(OperandType.V128);
+
+ Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
+ Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
+
+ Operation copyL = new Operation(Instruction.VectorCreateScalar, pArg, argLReg);
+ Operation copyH = new Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyH);
+ cctx.Cfg.Entry.Operations.AddFirst(copyL);
+
+ preservedArgs[index] = pArg;
+ }
+ else
+ {
+ Operand pArg = Local(dest.Type);
+
+ Operand argReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+ }
+
+ Operation argCopyOp = new Operation(Instruction.Copy, dest, preservedArgs[index]);
+
+ node.List.AddBefore(node, argCopyOp);
+
+ Delete(node, operation);
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ }
+ }
+
+ private static void HandleReturnWindowsAbi(
+ CompilerContext cctx,
+ LLNode node,
+ Operand[] preservedArgs,
+ Operation operation)
+ {
+ if (operation.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = operation.GetSource(0);
+
+ Operand retReg;
+
+ if (source.Type.IsInteger())
+ {
+ retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type);
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ if (preservedArgs[0] == null)
+ {
+ Operand preservedArg = Local(OperandType.I64);
+
+ Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+ Operation copyOp = new Operation(Instruction.Copy, preservedArg, arg0);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[0] = preservedArg;
+ }
+
+ retReg = preservedArgs[0];
+ }
+ else
+ {
+ retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+ }
+
+ if (source.Type == OperandType.V128)
+ {
+ Operation retStoreOp = new Operation(Instruction.Store, null, retReg, source);
+
+ node.List.AddBefore(node, retStoreOp);
+ }
+ else
+ {
+ Operation retCopyOp = new Operation(Instruction.Copy, retReg, source);
+
+ node.List.AddBefore(node, retCopyOp);
+ }
+
+ operation.SetSources(new Operand[0]);
+ }
+
+ private static void HandleReturnSystemVAbi(LLNode node, Operation operation)
+ {
+ if (operation.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = operation.GetSource(0);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ node.List.AddBefore(node, new Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
+ node.List.AddBefore(node, new Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
+ }
+ else
+ {
+ Operand retReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+
+ Operation retCopyOp = new Operation(Instruction.Copy, retReg, source);
+
+ node.List.AddBefore(node, retCopyOp);
+ }
+ }
+
+ private static Operand AddXmmCopy(LLNode node, Operand source)
+ {
+ Operand temp = Local(source.Type);
+
+ Operand intConst = AddCopy(node, GetIntConst(source));
+
+ Operation copyOp = new Operation(Instruction.VectorCreateScalar, temp, intConst);
+
+ node.List.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ private static Operand AddCopy(LLNode node, Operand source)
+ {
+ Operand temp = Local(source.Type);
+
+ Operation copyOp = new Operation(Instruction.Copy, temp, source);
+
+ node.List.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ private static Operand GetIntConst(Operand value)
+ {
+ if (value.Type == OperandType.FP32)
+ {
+ return Const(value.AsInt32());
+ }
+ else if (value.Type == OperandType.FP64)
+ {
+ return Const(value.AsInt64());
+ }
+
+ return value;
+ }
+
+ private static bool IsLongConst(Operand operand)
+ {
+ long value = operand.Type == OperandType.I32
+ ? operand.AsInt32()
+ : operand.AsInt64();
+
+ return !ConstFitsOnS32(value);
+ }
+
+ private static bool ConstFitsOnS32(long value)
+ {
+ return value == (int)value;
+ }
+
+ private static void Delete(LLNode node, Operation operation)
+ {
+ operation.Destination = null;
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, null);
+ }
+
+ node.List.Remove(node);
+ }
+
+ private static Operand Gpr(X86Register register, OperandType type)
+ {
+ return Register((int)register, RegisterType.Integer, type);
+ }
+
+ private static Operand Xmm(X86Register register, OperandType type)
+ {
+ return Register((int)register, RegisterType.Vector, type);
+ }
+
+ private static bool IsSameOperandDestSrc1(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.Multiply:
+ case Instruction.Subtract:
+ return !HardwareCapabilities.SupportsVexEncoding || operation.Destination.Type.IsInteger();
+
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseNot:
+ case Instruction.BitwiseOr:
+ case Instruction.ByteSwap:
+ case Instruction.Negate:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ return true;
+
+ case Instruction.Divide:
+ return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger();
+
+ case Instruction.VectorInsert:
+ case Instruction.VectorInsert16:
+ case Instruction.VectorInsert8:
+ return !HardwareCapabilities.SupportsVexEncoding;
+ }
+
+ return IsVexSameOperandDestSrc1(operation);
+ }
+
+ private static bool IsVexSameOperandDestSrc1(Operation operation)
+ {
+ if (IsIntrinsic(operation.Instruction))
+ {
+ bool isUnary = operation.SourcesCount < 2;
+
+ bool hasVecDest = operation.Destination != null && operation.Destination.Type == OperandType.V128;
+
+ return !HardwareCapabilities.SupportsVexEncoding && !isUnary && hasVecDest;
+ }
+
+ return false;
+ }
+
+ private static bool HasConstSrc1(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Copy:
+ case Instruction.LoadArgument:
+ case Instruction.Spill:
+ case Instruction.SpillArg:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool HasConstSrc2(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.CompareEqual:
+ case Instruction.CompareGreater:
+ case Instruction.CompareGreaterOrEqual:
+ case Instruction.CompareGreaterOrEqualUI:
+ case Instruction.CompareGreaterUI:
+ case Instruction.CompareLess:
+ case Instruction.CompareLessOrEqual:
+ case Instruction.CompareLessOrEqualUI:
+ case Instruction.CompareLessUI:
+ case Instruction.CompareNotEqual:
+ case Instruction.Multiply:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.Subtract:
+ case Instruction.VectorExtract:
+ case Instruction.VectorExtract16:
+ case Instruction.VectorExtract8:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsCommutative(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.CompareEqual:
+ case Instruction.CompareNotEqual:
+ case Instruction.Multiply:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsIntrinsic(Instruction inst)
+ {
+ return inst == Instruction.Extended;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/X86Condition.cs b/ARMeilleure/CodeGen/X86/X86Condition.cs
new file mode 100644
index 000000000..a17c6d6c5
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/X86Condition.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Condition
+ {
+ Overflow = 0x0,
+ NotOverflow = 0x1,
+ Below = 0x2,
+ AboveOrEqual = 0x3,
+ Equal = 0x4,
+ NotEqual = 0x5,
+ BelowOrEqual = 0x6,
+ Above = 0x7,
+ Sign = 0x8,
+ NotSign = 0x9,
+ ParityEven = 0xa,
+ ParityOdd = 0xb,
+ Less = 0xc,
+ GreaterOrEqual = 0xd,
+ LessOrEqual = 0xe,
+ Greater = 0xf
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs
new file mode 100644
index 000000000..10ba891aa
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -0,0 +1,190 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Instruction
+ {
+ Add,
+ Addpd,
+ Addps,
+ Addsd,
+ Addss,
+ And,
+ Andnpd,
+ Andnps,
+ Bsr,
+ Bswap,
+ Call,
+ Cmovcc,
+ Cmp,
+ Cmppd,
+ Cmpps,
+ Cmpsd,
+ Cmpss,
+ Cmpxchg16b,
+ Comisd,
+ Comiss,
+ Cpuid,
+ Cvtdq2pd,
+ Cvtdq2ps,
+ Cvtpd2dq,
+ Cvtpd2ps,
+ Cvtps2dq,
+ Cvtps2pd,
+ Cvtsd2si,
+ Cvtsd2ss,
+ Cvtsi2sd,
+ Cvtsi2ss,
+ Cvtss2sd,
+ Div,
+ Divpd,
+ Divps,
+ Divsd,
+ Divss,
+ Haddpd,
+ Haddps,
+ Idiv,
+ Imul,
+ Imul128,
+ Insertps,
+ Lea,
+ Maxpd,
+ Maxps,
+ Maxsd,
+ Maxss,
+ Minpd,
+ Minps,
+ Minsd,
+ Minss,
+ Mov,
+ Mov16,
+ Mov8,
+ Movd,
+ Movdqu,
+ Movhlps,
+ Movlhps,
+ Movq,
+ Movsd,
+ Movss,
+ Movsx16,
+ Movsx32,
+ Movsx8,
+ Movzx16,
+ Movzx8,
+ Mul128,
+ Mulpd,
+ Mulps,
+ Mulsd,
+ Mulss,
+ Neg,
+ Not,
+ Or,
+ Paddb,
+ Paddd,
+ Paddq,
+ Paddw,
+ Pand,
+ Pandn,
+ Pavgb,
+ Pavgw,
+ Pblendvb,
+ Pcmpeqb,
+ Pcmpeqd,
+ Pcmpeqq,
+ Pcmpeqw,
+ Pcmpgtb,
+ Pcmpgtd,
+ Pcmpgtq,
+ Pcmpgtw,
+ Pextrb,
+ Pextrd,
+ Pextrq,
+ Pextrw,
+ Pinsrb,
+ Pinsrd,
+ Pinsrq,
+ Pinsrw,
+ Pmaxsb,
+ Pmaxsd,
+ Pmaxsw,
+ Pmaxub,
+ Pmaxud,
+ Pmaxuw,
+ Pminsb,
+ Pminsd,
+ Pminsw,
+ Pminub,
+ Pminud,
+ Pminuw,
+ Pmovsxbw,
+ Pmovsxdq,
+ Pmovsxwd,
+ Pmovzxbw,
+ Pmovzxdq,
+ Pmovzxwd,
+ Pmulld,
+ Pmullw,
+ Pop,
+ Popcnt,
+ Por,
+ Pshufb,
+ Pshufd,
+ Pslld,
+ Pslldq,
+ Psllq,
+ Psllw,
+ Psrad,
+ Psraw,
+ Psrld,
+ Psrlq,
+ Psrldq,
+ Psrlw,
+ Psubb,
+ Psubd,
+ Psubq,
+ Psubw,
+ Punpckhbw,
+ Punpckhdq,
+ Punpckhqdq,
+ Punpckhwd,
+ Punpcklbw,
+ Punpckldq,
+ Punpcklqdq,
+ Punpcklwd,
+ Push,
+ Pxor,
+ Rcpps,
+ Rcpss,
+ Ror,
+ Roundpd,
+ Roundps,
+ Roundsd,
+ Roundss,
+ Rsqrtps,
+ Rsqrtss,
+ Sar,
+ Setcc,
+ Shl,
+ Shr,
+ Shufpd,
+ Shufps,
+ Sqrtpd,
+ Sqrtps,
+ Sqrtsd,
+ Sqrtss,
+ Sub,
+ Subpd,
+ Subps,
+ Subsd,
+ Subss,
+ Test,
+ Unpckhpd,
+ Unpckhps,
+ Unpcklpd,
+ Unpcklps,
+ Vpblendvb,
+ Xor,
+ Xorpd,
+ Xorps,
+
+ Count
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/X86Register.cs b/ARMeilleure/CodeGen/X86/X86Register.cs
new file mode 100644
index 000000000..01f63e311
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/X86Register.cs
@@ -0,0 +1,41 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Register
+ {
+ Invalid = -1,
+
+ Rax = 0,
+ Rcx = 1,
+ Rdx = 2,
+ Rbx = 3,
+ Rsp = 4,
+ Rbp = 5,
+ Rsi = 6,
+ Rdi = 7,
+ R8 = 8,
+ R9 = 9,
+ R10 = 10,
+ R11 = 11,
+ R12 = 12,
+ R13 = 13,
+ R14 = 14,
+ R15 = 15,
+
+ Xmm0 = 0,
+ Xmm1 = 1,
+ Xmm2 = 2,
+ Xmm3 = 3,
+ Xmm4 = 4,
+ Xmm5 = 5,
+ Xmm6 = 6,
+ Xmm7 = 7,
+ Xmm8 = 8,
+ Xmm9 = 9,
+ Xmm10 = 10,
+ Xmm11 = 11,
+ Xmm12 = 12,
+ Xmm13 = 13,
+ Xmm14 = 14,
+ Xmm15 = 15
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Common/BitMap.cs b/ARMeilleure/Common/BitMap.cs
new file mode 100644
index 000000000..9dff271b4
--- /dev/null
+++ b/ARMeilleure/Common/BitMap.cs
@@ -0,0 +1,138 @@
+using System.Collections;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Common
+{
+ class BitMap : IEnumerable
+ {
+ private const int IntSize = 32;
+ private const int IntMask = IntSize - 1;
+
+ private List _masks;
+
+ public BitMap(int initialCapacity)
+ {
+ int count = (initialCapacity + IntMask) / IntSize;
+
+ _masks = new List(count);
+
+ while (count-- > 0)
+ {
+ _masks.Add(0);
+ }
+ }
+
+ public bool Set(int bit)
+ {
+ EnsureCapacity(bit + 1);
+
+ int wordIndex = bit / IntSize;
+ int wordBit = bit & IntMask;
+
+ int wordMask = 1 << wordBit;
+
+ if ((_masks[wordIndex] & wordMask) != 0)
+ {
+ return false;
+ }
+
+ _masks[wordIndex] |= wordMask;
+
+ return true;
+ }
+
+ public void Clear(int bit)
+ {
+ EnsureCapacity(bit + 1);
+
+ int wordIndex = bit / IntSize;
+ int wordBit = bit & IntMask;
+
+ int wordMask = 1 << wordBit;
+
+ _masks[wordIndex] &= ~wordMask;
+ }
+
+ public bool IsSet(int bit)
+ {
+ EnsureCapacity(bit + 1);
+
+ int wordIndex = bit / IntSize;
+ int wordBit = bit & IntMask;
+
+ return (_masks[wordIndex] & (1 << wordBit)) != 0;
+ }
+
+ public bool Set(BitMap map)
+ {
+ EnsureCapacity(map._masks.Count * IntSize);
+
+ bool modified = false;
+
+ for (int index = 0; index < _masks.Count; index++)
+ {
+ int newValue = _masks[index] | map._masks[index];
+
+ if (_masks[index] != newValue)
+ {
+ _masks[index] = newValue;
+
+ modified = true;
+ }
+ }
+
+ return modified;
+ }
+
+ public bool Clear(BitMap map)
+ {
+ EnsureCapacity(map._masks.Count * IntSize);
+
+ bool modified = false;
+
+ for (int index = 0; index < _masks.Count; index++)
+ {
+ int newValue = _masks[index] & ~map._masks[index];
+
+ if (_masks[index] != newValue)
+ {
+ _masks[index] = newValue;
+
+ modified = true;
+ }
+ }
+
+ return modified;
+ }
+
+ private void EnsureCapacity(int size)
+ {
+ while (_masks.Count * IntSize < size)
+ {
+ _masks.Add(0);
+ }
+ }
+
+ public IEnumerator GetEnumerator()
+ {
+ for (int index = 0; index < _masks.Count; index++)
+ {
+ int mask = _masks[index];
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.LowestBitSet(mask);
+
+ mask &= ~(1 << bit);
+
+ yield return index * IntSize + bit;
+ }
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Common/BitUtils.cs b/ARMeilleure/Common/BitUtils.cs
new file mode 100644
index 000000000..55344608c
--- /dev/null
+++ b/ARMeilleure/Common/BitUtils.cs
@@ -0,0 +1,109 @@
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.Common
+{
+ static class BitUtils
+ {
+ private const int DeBrujinSequence = 0x77cb531;
+
+ private static int[] DeBrujinLbsLut;
+
+ static BitUtils()
+ {
+ DeBrujinLbsLut = new int[32];
+
+ for (int index = 0; index < DeBrujinLbsLut.Length; index++)
+ {
+ uint lutIndex = (uint)(DeBrujinSequence * (1 << index)) >> 27;
+
+ DeBrujinLbsLut[lutIndex] = index;
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int LowestBitSet(int value)
+ {
+ if (value == 0)
+ {
+ return -1;
+ }
+
+ int lsb = value & -value;
+
+ return DeBrujinLbsLut[(uint)(DeBrujinSequence * lsb) >> 27];
+ }
+
+ public static int HighestBitSet(int value)
+ {
+ if (value == 0)
+ {
+ return -1;
+ }
+
+ for (int bit = 31; bit >= 0; bit--)
+ {
+ if (((value >> bit) & 1) != 0)
+ {
+ return bit;
+ }
+ }
+
+ return -1;
+ }
+
+ private static readonly sbyte[] HbsNibbleLut = { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
+
+ public static int HighestBitSetNibble(int value) => HbsNibbleLut[value & 0b1111];
+
+ public static long Replicate(long bits, int size)
+ {
+ long output = 0;
+
+ for (int bit = 0; bit < 64; bit += size)
+ {
+ output |= bits << bit;
+ }
+
+ return output;
+ }
+
+ public static int CountBits(int value)
+ {
+ int count = 0;
+
+ while (value != 0)
+ {
+ value &= ~(value & -value);
+
+ count++;
+ }
+
+ return count;
+ }
+
+ public static long FillWithOnes(int bits)
+ {
+ return bits == 64 ? -1L : (1L << bits) - 1;
+ }
+
+ public static int RotateRight(int bits, int shift, int size)
+ {
+ return (int)RotateRight((uint)bits, shift, size);
+ }
+
+ public static uint RotateRight(uint bits, int shift, int size)
+ {
+ return (bits >> shift) | (bits << (size - shift));
+ }
+
+ public static long RotateRight(long bits, int shift, int size)
+ {
+ return (long)RotateRight((ulong)bits, shift, size);
+ }
+
+ public static ulong RotateRight(ulong bits, int shift, int size)
+ {
+ return (bits >> shift) | (bits << (size - shift));
+ }
+ }
+}
diff --git a/ARMeilleure/Common/EnumUtils.cs b/ARMeilleure/Common/EnumUtils.cs
new file mode 100644
index 000000000..2a4aa645b
--- /dev/null
+++ b/ARMeilleure/Common/EnumUtils.cs
@@ -0,0 +1,12 @@
+using System;
+
+namespace ARMeilleure.Common
+{
+ static class EnumUtils
+ {
+ public static int GetCount(Type enumType)
+ {
+ return Enum.GetNames(enumType).Length;
+ }
+ }
+}
diff --git a/ARMeilleure/Decoders/Block.cs b/ARMeilleure/Decoders/Block.cs
new file mode 100644
index 000000000..3d13c2d5e
--- /dev/null
+++ b/ARMeilleure/Decoders/Block.cs
@@ -0,0 +1,99 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Decoders
+{
+ class Block
+ {
+ public ulong Address { get; set; }
+ public ulong EndAddress { get; set; }
+
+ public Block Next { get; set; }
+ public Block Branch { get; set; }
+
+ public List OpCodes { get; private set; }
+
+ public Block()
+ {
+ OpCodes = new List();
+ }
+
+ public Block(ulong address) : this()
+ {
+ Address = address;
+ }
+
+ public void Split(Block rightBlock)
+ {
+ int splitIndex = BinarySearch(OpCodes, rightBlock.Address);
+
+ if ((ulong)OpCodes[splitIndex].Address < rightBlock.Address)
+ {
+ splitIndex++;
+ }
+
+ int splitCount = OpCodes.Count - splitIndex;
+
+ if (splitCount <= 0)
+ {
+ throw new ArgumentException("Can't split at right block address.");
+ }
+
+ rightBlock.EndAddress = EndAddress;
+
+ rightBlock.Next = Next;
+ rightBlock.Branch = Branch;
+
+ rightBlock.OpCodes.AddRange(OpCodes.GetRange(splitIndex, splitCount));
+
+ EndAddress = rightBlock.Address;
+
+ Next = rightBlock;
+ Branch = null;
+
+ OpCodes.RemoveRange(splitIndex, splitCount);
+ }
+
+ private static int BinarySearch(List opCodes, ulong address)
+ {
+ int left = 0;
+ int middle = 0;
+ int right = opCodes.Count - 1;
+
+ while (left <= right)
+ {
+ int size = right - left;
+
+ middle = left + (size >> 1);
+
+ OpCode opCode = opCodes[middle];
+
+ if (address == (ulong)opCode.Address)
+ {
+ break;
+ }
+
+ if (address < (ulong)opCode.Address)
+ {
+ right = middle - 1;
+ }
+ else
+ {
+ left = middle + 1;
+ }
+ }
+
+ return middle;
+ }
+
+ public OpCode GetLastOp()
+ {
+ if (OpCodes.Count > 0)
+ {
+ return OpCodes[OpCodes.Count - 1];
+ }
+
+ return null;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/Condition.cs b/ARMeilleure/Decoders/Condition.cs
new file mode 100644
index 000000000..727f897da
--- /dev/null
+++ b/ARMeilleure/Decoders/Condition.cs
@@ -0,0 +1,32 @@
+namespace ARMeilleure.Decoders
+{
+ enum Condition
+ {
+ Eq = 0,
+ Ne = 1,
+ GeUn = 2,
+ LtUn = 3,
+ Mi = 4,
+ Pl = 5,
+ Vs = 6,
+ Vc = 7,
+ GtUn = 8,
+ LeUn = 9,
+ Ge = 10,
+ Lt = 11,
+ Gt = 12,
+ Le = 13,
+ Al = 14,
+ Nv = 15
+ }
+
+ static class ConditionExtensions
+ {
+ public static Condition Invert(this Condition cond)
+ {
+ // Bit 0 of all conditions is basically a negation bit, so
+ // inverting this bit has the effect of inverting the condition.
+ return (Condition)((int)cond ^ 1);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/DataOp.cs b/ARMeilleure/Decoders/DataOp.cs
new file mode 100644
index 000000000..464d00898
--- /dev/null
+++ b/ARMeilleure/Decoders/DataOp.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ enum DataOp
+ {
+ Adr = 0,
+ Arithmetic = 1,
+ Logical = 2,
+ BitField = 3
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/Decoder.cs b/ARMeilleure/Decoders/Decoder.cs
new file mode 100644
index 000000000..2311e9e96
--- /dev/null
+++ b/ARMeilleure/Decoders/Decoder.cs
@@ -0,0 +1,351 @@
+using ARMeilleure.Instructions;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Reflection.Emit;
+
+namespace ARMeilleure.Decoders
+{
+ static class Decoder
+ {
+ private delegate object MakeOp(InstDescriptor inst, ulong address, int opCode);
+
+ private static ConcurrentDictionary _opActivators;
+
+ static Decoder()
+ {
+ _opActivators = new ConcurrentDictionary();
+ }
+
+ public static Block[] DecodeBasicBlock(MemoryManager memory, ulong address, ExecutionMode mode)
+ {
+ Block block = new Block(address);
+
+ FillBlock(memory, mode, block, ulong.MaxValue);
+
+ return new Block[] { block };
+ }
+
+ public static Block[] DecodeFunction(MemoryManager memory, ulong address, ExecutionMode mode)
+ {
+ List blocks = new List();
+
+ Queue workQueue = new Queue();
+
+ Dictionary visited = new Dictionary();
+
+ Block GetBlock(ulong blkAddress)
+ {
+ if (!visited.TryGetValue(blkAddress, out Block block))
+ {
+ block = new Block(blkAddress);
+
+ workQueue.Enqueue(block);
+
+ visited.Add(blkAddress, block);
+ }
+
+ return block;
+ }
+
+ GetBlock(address);
+
+ while (workQueue.TryDequeue(out Block currBlock))
+ {
+ // Check if the current block is inside another block.
+ if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ if (nBlock.Address == currBlock.Address)
+ {
+ throw new InvalidOperationException("Found duplicate block address on the list.");
+ }
+
+ nBlock.Split(currBlock);
+
+ blocks.Insert(nBlkIndex + 1, currBlock);
+
+ continue;
+ }
+
+ // If we have a block after the current one, set the limit address.
+ ulong limitAddress = ulong.MaxValue;
+
+ if (nBlkIndex != blocks.Count)
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ int nextIndex = nBlkIndex + 1;
+
+ if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
+ {
+ limitAddress = blocks[nextIndex].Address;
+ }
+ else if (nBlock.Address > currBlock.Address)
+ {
+ limitAddress = blocks[nBlkIndex].Address;
+ }
+ }
+
+ FillBlock(memory, mode, currBlock, limitAddress);
+
+ if (currBlock.OpCodes.Count != 0)
+ {
+ // Set child blocks. "Branch" is the block the branch instruction
+ // points to (when taken), "Next" is the block at the next address,
+ // executed when the branch is not taken. For Unconditional Branches
+ // (except BL/BLR that are sub calls) or end of executable, Next is null.
+ OpCode lastOp = currBlock.GetLastOp();
+
+ bool isCall = IsCall(lastOp);
+
+ if (lastOp is IOpCodeBImm op && !isCall)
+ {
+ currBlock.Branch = GetBlock((ulong)op.Immediate);
+ }
+
+ if (!IsUnconditionalBranch(lastOp) /*|| isCall*/)
+ {
+ currBlock.Next = GetBlock(currBlock.EndAddress);
+ }
+ }
+
+ // Insert the new block on the list (sorted by address).
+ if (blocks.Count != 0)
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
+ }
+ else
+ {
+ blocks.Add(currBlock);
+ }
+ }
+
+ return blocks.ToArray();
+ }
+
+ private static bool BinarySearch(List blocks, ulong address, out int index)
+ {
+ index = 0;
+
+ int left = 0;
+ int right = blocks.Count - 1;
+
+ while (left <= right)
+ {
+ int size = right - left;
+
+ int middle = left + (size >> 1);
+
+ Block block = blocks[middle];
+
+ index = middle;
+
+ if (address >= block.Address && address < block.EndAddress)
+ {
+ return true;
+ }
+
+ if (address < block.Address)
+ {
+ right = middle - 1;
+ }
+ else
+ {
+ left = middle + 1;
+ }
+ }
+
+ return false;
+ }
+
+ private static void FillBlock(
+ MemoryManager memory,
+ ExecutionMode mode,
+ Block block,
+ ulong limitAddress)
+ {
+ ulong address = block.Address;
+
+ OpCode opCode;
+
+ do
+ {
+ if (address >= limitAddress)
+ {
+ break;
+ }
+
+ opCode = DecodeOpCode(memory, address, mode);
+
+ block.OpCodes.Add(opCode);
+
+ address += (ulong)opCode.OpCodeSizeInBytes;
+ }
+ while (!(IsBranch(opCode) || IsException(opCode)));
+
+ block.EndAddress = address;
+ }
+
+ private static bool IsBranch(OpCode opCode)
+ {
+ return opCode is OpCodeBImm ||
+ opCode is OpCodeBReg || IsAarch32Branch(opCode);
+ }
+
+ private static bool IsUnconditionalBranch(OpCode opCode)
+ {
+ return opCode is OpCodeBImmAl ||
+ opCode is OpCodeBReg || IsAarch32UnconditionalBranch(opCode);
+ }
+
+ private static bool IsAarch32UnconditionalBranch(OpCode opCode)
+ {
+ if (!(opCode is OpCode32 op))
+ {
+ return false;
+ }
+
+ // Note: On ARM32, most instructions have conditional execution,
+ // so there's no "Always" (unconditional) branch like on ARM64.
+ // We need to check if the condition is "Always" instead.
+ return IsAarch32Branch(op) && op.Cond >= Condition.Al;
+ }
+
+ private static bool IsAarch32Branch(OpCode opCode)
+ {
+ // Note: On ARM32, most ALU operations can write to R15 (PC),
+ // so we must consider such operations as a branch in potential aswell.
+ if (opCode is IOpCode32Alu opAlu && opAlu.Rd == RegisterAlias.Aarch32Pc)
+ {
+ return true;
+ }
+
+ // Same thing for memory operations. We have the cases where PC is a target
+ // register (Rt == 15 or (mask & (1 << 15)) != 0), and cases where there is
+ // a write back to PC (wback == true && Rn == 15), however the later may
+ // be "undefined" depending on the CPU, so compilers should not produce that.
+ if (opCode is IOpCode32Mem || opCode is IOpCode32MemMult)
+ {
+ int rt, rn;
+
+ bool wBack, isLoad;
+
+ if (opCode is IOpCode32Mem opMem)
+ {
+ rt = opMem.Rt;
+ rn = opMem.Rn;
+ wBack = opMem.WBack;
+ isLoad = opMem.IsLoad;
+
+ // For the dual load, we also need to take into account the
+ // case were Rt2 == 15 (PC).
+ if (rt == 14 && opMem.Instruction.Name == InstName.Ldrd)
+ {
+ rt = RegisterAlias.Aarch32Pc;
+ }
+ }
+ else if (opCode is IOpCode32MemMult opMemMult)
+ {
+ const int pcMask = 1 << RegisterAlias.Aarch32Pc;
+
+ rt = (opMemMult.RegisterMask & pcMask) != 0 ? RegisterAlias.Aarch32Pc : 0;
+ rn = opMemMult.Rn;
+ wBack = opMemMult.PostOffset != 0;
+ isLoad = opMemMult.IsLoad;
+ }
+ else
+ {
+ throw new NotImplementedException($"The type \"{opCode.GetType().Name}\" is not implemented on the decoder.");
+ }
+
+ if ((rt == RegisterAlias.Aarch32Pc && isLoad) ||
+ (rn == RegisterAlias.Aarch32Pc && wBack))
+ {
+ return true;
+ }
+ }
+
+ // Explicit branch instructions.
+ return opCode is IOpCode32BImm ||
+ opCode is IOpCode32BReg;
+ }
+
+ private static bool IsCall(OpCode opCode)
+ {
+ // TODO (CQ): ARM32 support.
+ return opCode.Instruction.Name == InstName.Bl ||
+ opCode.Instruction.Name == InstName.Blr;
+ }
+
+ private static bool IsException(OpCode opCode)
+ {
+ return opCode.Instruction.Name == InstName.Brk ||
+ opCode.Instruction.Name == InstName.Svc ||
+ opCode.Instruction.Name == InstName.Und;
+ }
+
+ public static OpCode DecodeOpCode(MemoryManager memory, ulong address, ExecutionMode mode)
+ {
+ int opCode = memory.ReadInt32((long)address);
+
+ InstDescriptor inst;
+
+ Type type;
+
+ if (mode == ExecutionMode.Aarch64)
+ {
+ (inst, type) = OpCodeTable.GetInstA64(opCode);
+ }
+ else
+ {
+ if (mode == ExecutionMode.Aarch32Arm)
+ {
+ (inst, type) = OpCodeTable.GetInstA32(opCode);
+ }
+ else /* if (mode == ExecutionMode.Aarch32Thumb) */
+ {
+ (inst, type) = OpCodeTable.GetInstT32(opCode);
+ }
+ }
+
+ if (type != null)
+ {
+ return MakeOpCode(inst, type, address, opCode);
+ }
+ else
+ {
+ return new OpCode(inst, address, opCode);
+ }
+ }
+
+ private static OpCode MakeOpCode(InstDescriptor inst, Type type, ulong address, int opCode)
+ {
+ MakeOp createInstance = _opActivators.GetOrAdd(type, CacheOpActivator);
+
+ return (OpCode)createInstance(inst, address, opCode);
+ }
+
+ private static MakeOp CacheOpActivator(Type type)
+ {
+ Type[] argTypes = new Type[] { typeof(InstDescriptor), typeof(ulong), typeof(int) };
+
+ DynamicMethod mthd = new DynamicMethod($"Make{type.Name}", type, argTypes);
+
+ ILGenerator generator = mthd.GetILGenerator();
+
+ generator.Emit(OpCodes.Ldarg_0);
+ generator.Emit(OpCodes.Ldarg_1);
+ generator.Emit(OpCodes.Ldarg_2);
+ generator.Emit(OpCodes.Newobj, type.GetConstructor(argTypes));
+ generator.Emit(OpCodes.Ret);
+
+ return (MakeOp)mthd.CreateDelegate(typeof(MakeOp));
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/DecoderHelper.cs b/ARMeilleure/Decoders/DecoderHelper.cs
new file mode 100644
index 000000000..3cbd49123
--- /dev/null
+++ b/ARMeilleure/Decoders/DecoderHelper.cs
@@ -0,0 +1,113 @@
+using ARMeilleure.Common;
+using System;
+
+namespace ARMeilleure.Decoders
+{
+ static class DecoderHelper
+ {
+ public struct BitMask
+ {
+ public long WMask;
+ public long TMask;
+ public int Pos;
+ public int Shift;
+ public bool IsUndefined;
+
+ public static BitMask Invalid => new BitMask { IsUndefined = true };
+ }
+
+ public static BitMask DecodeBitMask(int opCode, bool immediate)
+ {
+ int immS = (opCode >> 10) & 0x3f;
+ int immR = (opCode >> 16) & 0x3f;
+
+ int n = (opCode >> 22) & 1;
+ int sf = (opCode >> 31) & 1;
+
+ int length = BitUtils.HighestBitSet((~immS & 0x3f) | (n << 6));
+
+ if (length < 1 || (sf == 0 && n != 0))
+ {
+ return BitMask.Invalid;
+ }
+
+ int size = 1 << length;
+
+ int levels = size - 1;
+
+ int s = immS & levels;
+ int r = immR & levels;
+
+ if (immediate && s == levels)
+ {
+ return BitMask.Invalid;
+ }
+
+ long wMask = BitUtils.FillWithOnes(s + 1);
+ long tMask = BitUtils.FillWithOnes(((s - r) & levels) + 1);
+
+ if (r > 0)
+ {
+ wMask = BitUtils.RotateRight(wMask, r, size);
+ wMask &= BitUtils.FillWithOnes(size);
+ }
+
+ return new BitMask()
+ {
+ WMask = BitUtils.Replicate(wMask, size),
+ TMask = BitUtils.Replicate(tMask, size),
+
+ Pos = immS,
+ Shift = immR
+ };
+ }
+
+ public static long DecodeImm8Float(long imm, int size)
+ {
+ int e = 0, f = 0;
+
+ switch (size)
+ {
+ case 0: e = 8; f = 23; break;
+ case 1: e = 11; f = 52; break;
+
+ default: throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ long value = (imm & 0x3f) << f - 4;
+
+ long eBit = (imm >> 6) & 1;
+ long sBit = (imm >> 7) & 1;
+
+ if (eBit != 0)
+ {
+ value |= (1L << e - 3) - 1 << f + 2;
+ }
+
+ value |= (eBit ^ 1) << f + e - 1;
+ value |= sBit << f + e;
+
+ return value;
+ }
+
+ public static long DecodeImm24_2(int opCode)
+ {
+ return ((long)opCode << 40) >> 38;
+ }
+
+ public static long DecodeImm26_2(int opCode)
+ {
+ return ((long)opCode << 38) >> 36;
+ }
+
+ public static long DecodeImmS19_2(int opCode)
+ {
+ return (((long)opCode << 40) >> 43) & ~3;
+ }
+
+ public static long DecodeImmS14_2(int opCode)
+ {
+ return (((long)opCode << 45) >> 48) & ~3;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode.cs b/ARMeilleure/Decoders/IOpCode.cs
new file mode 100644
index 000000000..37ba7a4c6
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode.cs
@@ -0,0 +1,17 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode
+ {
+ ulong Address { get; }
+
+ InstDescriptor Instruction { get; }
+
+ RegisterSize RegisterSize { get; }
+
+ int GetBitsCount();
+
+ OperandType GetOperandType();
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32.cs b/ARMeilleure/Decoders/IOpCode32.cs
new file mode 100644
index 000000000..126c10690
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32 : IOpCode
+ {
+ Condition Cond { get; }
+
+ uint GetPc();
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32Alu.cs b/ARMeilleure/Decoders/IOpCode32Alu.cs
new file mode 100644
index 000000000..72aea30ef
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32Alu.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Alu : IOpCode32
+ {
+ int Rd { get; }
+ int Rn { get; }
+
+ bool SetFlags { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32BImm.cs b/ARMeilleure/Decoders/IOpCode32BImm.cs
new file mode 100644
index 000000000..ec7db2c26
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32BImm.cs
@@ -0,0 +1,4 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32BImm : IOpCode32, IOpCodeBImm { }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32BReg.cs b/ARMeilleure/Decoders/IOpCode32BReg.cs
new file mode 100644
index 000000000..097ab4275
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32BReg.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32BReg : IOpCode32
+ {
+ int Rm { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32Mem.cs b/ARMeilleure/Decoders/IOpCode32Mem.cs
new file mode 100644
index 000000000..0585ab53a
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32Mem.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32Mem : IOpCode32
+ {
+ int Rt { get; }
+ int Rn { get; }
+
+ bool WBack { get; }
+
+ bool IsLoad { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32MemMult.cs b/ARMeilleure/Decoders/IOpCode32MemMult.cs
new file mode 100644
index 000000000..18fd3f6bf
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32MemMult.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCode32MemMult : IOpCode32
+ {
+ int Rn { get; }
+
+ int RegisterMask { get; }
+
+ int PostOffset { get; }
+
+ bool IsLoad { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeAlu.cs b/ARMeilleure/Decoders/IOpCodeAlu.cs
new file mode 100644
index 000000000..b8c28513d
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeAlu.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAlu : IOpCode
+ {
+ int Rd { get; }
+ int Rn { get; }
+
+ DataOp DataOp { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeAluImm.cs b/ARMeilleure/Decoders/IOpCodeAluImm.cs
new file mode 100644
index 000000000..02f4c997b
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeAluImm.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAluImm : IOpCodeAlu
+ {
+ long Immediate { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeAluRs.cs b/ARMeilleure/Decoders/IOpCodeAluRs.cs
new file mode 100644
index 000000000..22540b11a
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeAluRs.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAluRs : IOpCodeAlu
+ {
+ int Shift { get; }
+ int Rm { get; }
+
+ ShiftType ShiftType { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeAluRx.cs b/ARMeilleure/Decoders/IOpCodeAluRx.cs
new file mode 100644
index 000000000..9d16be787
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeAluRx.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeAluRx : IOpCodeAlu
+ {
+ int Shift { get; }
+ int Rm { get; }
+
+ IntType IntType { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeBImm.cs b/ARMeilleure/Decoders/IOpCodeBImm.cs
new file mode 100644
index 000000000..958bff28d
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeBImm.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeBImm : IOpCode
+ {
+ long Immediate { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeCond.cs b/ARMeilleure/Decoders/IOpCodeCond.cs
new file mode 100644
index 000000000..9808f7c08
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeCond.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeCond : IOpCode
+ {
+ Condition Cond { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeLit.cs b/ARMeilleure/Decoders/IOpCodeLit.cs
new file mode 100644
index 000000000..74084a457
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeLit.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeLit : IOpCode
+ {
+ int Rt { get; }
+ long Immediate { get; }
+ int Size { get; }
+ bool Signed { get; }
+ bool Prefetch { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeSimd.cs b/ARMeilleure/Decoders/IOpCodeSimd.cs
new file mode 100644
index 000000000..056ef045c
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeSimd.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+ interface IOpCodeSimd : IOpCode
+ {
+ int Size { get; }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/InstDescriptor.cs b/ARMeilleure/Decoders/InstDescriptor.cs
new file mode 100644
index 000000000..ee2b1c2e4
--- /dev/null
+++ b/ARMeilleure/Decoders/InstDescriptor.cs
@@ -0,0 +1,18 @@
+using ARMeilleure.Instructions;
+
+namespace ARMeilleure.Decoders
+{
+ struct InstDescriptor
+ {
+ public static InstDescriptor Undefined => new InstDescriptor(InstName.Und, null);
+
+ public InstName Name { get; }
+ public InstEmitter Emitter { get; }
+
+ public InstDescriptor(InstName name, InstEmitter emitter)
+ {
+ Name = name;
+ Emitter = emitter;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/InstEmitter.cs b/ARMeilleure/Decoders/InstEmitter.cs
new file mode 100644
index 000000000..a8b526569
--- /dev/null
+++ b/ARMeilleure/Decoders/InstEmitter.cs
@@ -0,0 +1,6 @@
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.Decoders
+{
+ delegate void InstEmitter(ArmEmitterContext context);
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IntType.cs b/ARMeilleure/Decoders/IntType.cs
new file mode 100644
index 000000000..244e96805
--- /dev/null
+++ b/ARMeilleure/Decoders/IntType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ enum IntType
+ {
+ UInt8 = 0,
+ UInt16 = 1,
+ UInt32 = 2,
+ UInt64 = 3,
+ Int8 = 4,
+ Int16 = 5,
+ Int32 = 6,
+ Int64 = 7
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode.cs b/ARMeilleure/Decoders/OpCode.cs
new file mode 100644
index 000000000..0bfc2456b
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode.cs
@@ -0,0 +1,48 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode : IOpCode
+ {
+ public ulong Address { get; private set; }
+ public int RawOpCode { get; private set; }
+
+ public int OpCodeSizeInBytes { get; protected set; } = 4;
+
+ public InstDescriptor Instruction { get; protected set; }
+
+ public RegisterSize RegisterSize { get; protected set; }
+
+ public OpCode(InstDescriptor inst, ulong address, int opCode)
+ {
+ Address = address;
+ RawOpCode = opCode;
+
+ Instruction = inst;
+
+ RegisterSize = RegisterSize.Int64;
+ }
+
+ public int GetPairsCount() => GetBitsCount() / 16;
+ public int GetBytesCount() => GetBitsCount() / 8;
+
+ public int GetBitsCount()
+ {
+ switch (RegisterSize)
+ {
+ case RegisterSize.Int32: return 32;
+ case RegisterSize.Int64: return 64;
+ case RegisterSize.Simd64: return 64;
+ case RegisterSize.Simd128: return 128;
+ }
+
+ throw new InvalidOperationException();
+ }
+
+ public OperandType GetOperandType()
+ {
+ return RegisterSize == RegisterSize.Int32 ? OperandType.I32 : OperandType.I64;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32.cs b/ARMeilleure/Decoders/OpCode32.cs
new file mode 100644
index 000000000..20927d5e4
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32.cs
@@ -0,0 +1,21 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32 : OpCode
+ {
+ public Condition Cond { get; protected set; }
+
+ public OpCode32(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ RegisterSize = RegisterSize.Int32;
+
+ Cond = (Condition)((uint)opCode >> 28);
+ }
+
+ public uint GetPc()
+ {
+ // Due to backwards compatibility and legacy behavior of ARMv4 CPUs pipeline,
+ // the PC actually points 2 instructions ahead.
+ return (uint)Address + (uint)OpCodeSizeInBytes * 2;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32Alu.cs b/ARMeilleure/Decoders/OpCode32Alu.cs
new file mode 100644
index 000000000..8d03baddb
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32Alu.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Alu : OpCode32, IOpCode32Alu
+ {
+ public int Rd { get; private set; }
+ public int Rn { get; private set; }
+
+ public bool SetFlags { get; private set; }
+
+ public OpCode32Alu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ SetFlags = ((opCode >> 20) & 1) != 0;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32AluImm.cs b/ARMeilleure/Decoders/OpCode32AluImm.cs
new file mode 100644
index 000000000..bba03e4d8
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32AluImm.cs
@@ -0,0 +1,21 @@
+using ARMeilleure.Common;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluImm : OpCode32Alu
+ {
+ public int Immediate { get; private set; }
+
+ public bool IsRotated { get; private set; }
+
+ public OpCode32AluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int value = (opCode >> 0) & 0xff;
+ int shift = (opCode >> 8) & 0xf;
+
+ Immediate = BitUtils.RotateRight(value, shift * 2, 32);
+
+ IsRotated = shift != 0;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32AluRsImm.cs b/ARMeilleure/Decoders/OpCode32AluRsImm.cs
new file mode 100644
index 000000000..779d6cecf
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32AluRsImm.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32AluRsImm : OpCode32Alu
+ {
+ public int Rm { get; private set; }
+ public int Imm { get; private set; }
+
+ public ShiftType ShiftType { get; private set; }
+
+ public OpCode32AluRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 0) & 0xf;
+ Imm = (opCode >> 7) & 0x1f;
+
+ ShiftType = (ShiftType)((opCode >> 5) & 3);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32BImm.cs b/ARMeilleure/Decoders/OpCode32BImm.cs
new file mode 100644
index 000000000..ea6443bc8
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32BImm.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32BImm : OpCode32, IOpCode32BImm
+ {
+ public long Immediate { get; private set; }
+
+ public OpCode32BImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ uint pc = GetPc();
+
+ // When the condition is never, the instruction is BLX to Thumb mode.
+ if (Cond != Condition.Nv)
+ {
+ pc &= ~3u;
+ }
+
+ Immediate = pc + DecoderHelper.DecodeImm24_2(opCode);
+
+ if (Cond == Condition.Nv)
+ {
+ long H = (opCode >> 23) & 2;
+
+ Immediate |= H;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32BReg.cs b/ARMeilleure/Decoders/OpCode32BReg.cs
new file mode 100644
index 000000000..ffb487070
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32BReg.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32BReg : OpCode32, IOpCode32BReg
+ {
+ public int Rm { get; private set; }
+
+ public OpCode32BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = opCode & 0xf;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32Mem.cs b/ARMeilleure/Decoders/OpCode32Mem.cs
new file mode 100644
index 000000000..f4e88d592
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32Mem.cs
@@ -0,0 +1,37 @@
+using ARMeilleure.Instructions;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCode32Mem : OpCode32, IOpCode32Mem
+ {
+ public int Rt { get; private set; }
+ public int Rn { get; private set; }
+
+ public int Immediate { get; protected set; }
+
+ public bool Index { get; private set; }
+ public bool Add { get; private set; }
+ public bool WBack { get; private set; }
+ public bool Unprivileged { get; private set; }
+
+ public bool IsLoad { get; private set; }
+
+ public OpCode32Mem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 12) & 0xf;
+ Rn = (opCode >> 16) & 0xf;
+
+ bool isLoad = (opCode & (1 << 20)) != 0;
+ bool w = (opCode & (1 << 21)) != 0;
+ bool u = (opCode & (1 << 23)) != 0;
+ bool p = (opCode & (1 << 24)) != 0;
+
+ Index = p;
+ Add = u;
+ WBack = !p || w;
+ Unprivileged = !p && w;
+
+ IsLoad = isLoad || inst.Name == InstName.Ldrd;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32MemImm.cs b/ARMeilleure/Decoders/OpCode32MemImm.cs
new file mode 100644
index 000000000..f79c63197
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32MemImm.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemImm : OpCode32Mem
+ {
+ public OpCode32MemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = opCode & 0xfff;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32MemImm8.cs b/ARMeilleure/Decoders/OpCode32MemImm8.cs
new file mode 100644
index 000000000..08027fb75
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32MemImm8.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemImm8 : OpCode32Mem
+ {
+ public OpCode32MemImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm4L = (opCode >> 0) & 0xf;
+ int imm4H = (opCode >> 8) & 0xf;
+
+ Immediate = imm4L | (imm4H << 4);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32MemMult.cs b/ARMeilleure/Decoders/OpCode32MemMult.cs
new file mode 100644
index 000000000..b61b50ea8
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32MemMult.cs
@@ -0,0 +1,55 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCode32MemMult : OpCode32, IOpCode32MemMult
+ {
+ public int Rn { get; private set; }
+
+ public int RegisterMask { get; private set; }
+ public int Offset { get; private set; }
+ public int PostOffset { get; private set; }
+
+ public bool IsLoad { get; private set; }
+
+ public OpCode32MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rn = (opCode >> 16) & 0xf;
+
+ bool isLoad = (opCode & (1 << 20)) != 0;
+ bool w = (opCode & (1 << 21)) != 0;
+ bool u = (opCode & (1 << 23)) != 0;
+ bool p = (opCode & (1 << 24)) != 0;
+
+ RegisterMask = opCode & 0xffff;
+
+ int regsSize = 0;
+
+ for (int index = 0; index < 16; index++)
+ {
+ regsSize += (RegisterMask >> index) & 1;
+ }
+
+ regsSize *= 4;
+
+ if (!u)
+ {
+ Offset -= regsSize;
+ }
+
+ if (u == p)
+ {
+ Offset += 4;
+ }
+
+ if (w)
+ {
+ PostOffset = u ? regsSize : -regsSize;
+ }
+ else
+ {
+ PostOffset = 0;
+ }
+
+ IsLoad = isLoad;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAdr.cs b/ARMeilleure/Decoders/OpCodeAdr.cs
new file mode 100644
index 000000000..fc8219f6c
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAdr.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAdr : OpCode
+ {
+ public int Rd { get; private set; }
+
+ public long Immediate { get; private set; }
+
+ public OpCodeAdr(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = opCode & 0x1f;
+
+ Immediate = DecoderHelper.DecodeImmS19_2(opCode);
+ Immediate |= ((long)opCode >> 29) & 3;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAlu.cs b/ARMeilleure/Decoders/OpCodeAlu.cs
new file mode 100644
index 000000000..171662a06
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAlu.cs
@@ -0,0 +1,21 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAlu : OpCode, IOpCodeAlu
+ {
+ public int Rd { get; protected set; }
+ public int Rn { get; private set; }
+
+ public DataOp DataOp { get; private set; }
+
+ public OpCodeAlu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x1f;
+ Rn = (opCode >> 5) & 0x1f;
+ DataOp = (DataOp)((opCode >> 24) & 0x3);
+
+ RegisterSize = (opCode >> 31) != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAluBinary.cs b/ARMeilleure/Decoders/OpCodeAluBinary.cs
new file mode 100644
index 000000000..2bdf1d798
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAluBinary.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluBinary : OpCodeAlu
+ {
+ public int Rm { get; private set; }
+
+ public OpCodeAluBinary(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAluImm.cs b/ARMeilleure/Decoders/OpCodeAluImm.cs
new file mode 100644
index 000000000..35c83fcc3
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAluImm.cs
@@ -0,0 +1,38 @@
+using System;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluImm : OpCodeAlu, IOpCodeAluImm
+ {
+ public long Immediate { get; private set; }
+
+ public OpCodeAluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ if (DataOp == DataOp.Arithmetic)
+ {
+ Immediate = (opCode >> 10) & 0xfff;
+
+ int shift = (opCode >> 22) & 3;
+
+ Immediate <<= shift * 12;
+ }
+ else if (DataOp == DataOp.Logical)
+ {
+ var bm = DecoderHelper.DecodeBitMask(opCode, true);
+
+ if (bm.IsUndefined)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Immediate = bm.WMask;
+ }
+ else
+ {
+ throw new ArgumentException(nameof(opCode));
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAluRs.cs b/ARMeilleure/Decoders/OpCodeAluRs.cs
new file mode 100644
index 000000000..84fb6ac6d
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAluRs.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluRs : OpCodeAlu, IOpCodeAluRs
+ {
+ public int Shift { get; private set; }
+ public int Rm { get; private set; }
+
+ public ShiftType ShiftType { get; private set; }
+
+ public OpCodeAluRs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int shift = (opCode >> 10) & 0x3f;
+
+ if (shift >= GetBitsCount())
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Shift = shift;
+
+ Rm = (opCode >> 16) & 0x1f;
+ ShiftType = (ShiftType)((opCode >> 22) & 0x3);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAluRx.cs b/ARMeilleure/Decoders/OpCodeAluRx.cs
new file mode 100644
index 000000000..5c8d427e8
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAluRx.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeAluRx : OpCodeAlu, IOpCodeAluRx
+ {
+ public int Shift { get; private set; }
+ public int Rm { get; private set; }
+
+ public IntType IntType { get; private set; }
+
+ public OpCodeAluRx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Shift = (opCode >> 10) & 0x7;
+ IntType = (IntType)((opCode >> 13) & 0x7);
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBImm.cs b/ARMeilleure/Decoders/OpCodeBImm.cs
new file mode 100644
index 000000000..2821a6246
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBImm.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImm : OpCode, IOpCodeBImm
+ {
+ public long Immediate { get; protected set; }
+
+ public OpCodeBImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBImmAl.cs b/ARMeilleure/Decoders/OpCodeBImmAl.cs
new file mode 100644
index 000000000..94bcea884
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBImmAl.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmAl : OpCodeBImm
+ {
+ public OpCodeBImmAl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = (long)address + DecoderHelper.DecodeImm26_2(opCode);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBImmCmp.cs b/ARMeilleure/Decoders/OpCodeBImmCmp.cs
new file mode 100644
index 000000000..2b7c28341
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBImmCmp.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmCmp : OpCodeBImm
+ {
+ public int Rt { get; private set; }
+
+ public OpCodeBImmCmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+ RegisterSize = (opCode >> 31) != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBImmCond.cs b/ARMeilleure/Decoders/OpCodeBImmCond.cs
new file mode 100644
index 000000000..f898821ac
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBImmCond.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmCond : OpCodeBImm, IOpCodeCond
+ {
+ public Condition Cond { get; private set; }
+
+ public OpCodeBImmCond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int o0 = (opCode >> 4) & 1;
+
+ if (o0 != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Cond = (Condition)(opCode & 0xf);
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBImmTest.cs b/ARMeilleure/Decoders/OpCodeBImmTest.cs
new file mode 100644
index 000000000..6687c2e7a
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBImmTest.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBImmTest : OpCodeBImm
+ {
+ public int Rt { get; private set; }
+ public int Bit { get; private set; }
+
+ public OpCodeBImmTest(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS14_2(opCode);
+
+ Bit = (opCode >> 19) & 0x1f;
+ Bit |= (opCode >> 26) & 0x20;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBReg.cs b/ARMeilleure/Decoders/OpCodeBReg.cs
new file mode 100644
index 000000000..00c51ec71
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBReg.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBReg : OpCode
+ {
+ public int Rn { get; private set; }
+
+ public OpCodeBReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int op4 = (opCode >> 0) & 0x1f;
+ int op2 = (opCode >> 16) & 0x1f;
+
+ if (op2 != 0b11111 || op4 != 0b00000)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Rn = (opCode >> 5) & 0x1f;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBfm.cs b/ARMeilleure/Decoders/OpCodeBfm.cs
new file mode 100644
index 000000000..2ae8edf56
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBfm.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeBfm : OpCodeAlu
+ {
+ public long WMask { get; private set; }
+ public long TMask { get; private set; }
+ public int Pos { get; private set; }
+ public int Shift { get; private set; }
+
+ public OpCodeBfm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ var bm = DecoderHelper.DecodeBitMask(opCode, false);
+
+ if (bm.IsUndefined)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ WMask = bm.WMask;
+ TMask = bm.TMask;
+ Pos = bm.Pos;
+ Shift = bm.Shift;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeCcmp.cs b/ARMeilleure/Decoders/OpCodeCcmp.cs
new file mode 100644
index 000000000..c302f6a32
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeCcmp.cs
@@ -0,0 +1,30 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCcmp : OpCodeAlu, IOpCodeCond
+ {
+ public int Nzcv { get; private set; }
+ protected int RmImm;
+
+ public Condition Cond { get; private set; }
+
+ public OpCodeCcmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int o3 = (opCode >> 4) & 1;
+
+ if (o3 != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Nzcv = (opCode >> 0) & 0xf;
+ Cond = (Condition)((opCode >> 12) & 0xf);
+ RmImm = (opCode >> 16) & 0x1f;
+
+ Rd = RegisterAlias.Zr;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeCcmpImm.cs b/ARMeilleure/Decoders/OpCodeCcmpImm.cs
new file mode 100644
index 000000000..4a2d01f46
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeCcmpImm.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCcmpImm : OpCodeCcmp, IOpCodeAluImm
+ {
+ public long Immediate => RmImm;
+
+ public OpCodeCcmpImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeCcmpReg.cs b/ARMeilleure/Decoders/OpCodeCcmpReg.cs
new file mode 100644
index 000000000..0e2b922cf
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeCcmpReg.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCcmpReg : OpCodeCcmp, IOpCodeAluRs
+ {
+ public int Rm => RmImm;
+
+ public int Shift => 0;
+
+ public ShiftType ShiftType => ShiftType.Lsl;
+
+ public OpCodeCcmpReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeCsel.cs b/ARMeilleure/Decoders/OpCodeCsel.cs
new file mode 100644
index 000000000..fd07e6fd4
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeCsel.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeCsel : OpCodeAlu, IOpCodeCond
+ {
+ public int Rm { get; private set; }
+
+ public Condition Cond { get; private set; }
+
+ public OpCodeCsel(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 16) & 0x1f;
+ Cond = (Condition)((opCode >> 12) & 0xf);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeException.cs b/ARMeilleure/Decoders/OpCodeException.cs
new file mode 100644
index 000000000..9781c543b
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeException.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeException : OpCode
+ {
+ public int Id { get; private set; }
+
+ public OpCodeException(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Id = (opCode >> 5) & 0xffff;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMem.cs b/ARMeilleure/Decoders/OpCodeMem.cs
new file mode 100644
index 000000000..5a7ab482a
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMem.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMem : OpCode
+ {
+ public int Rt { get; protected set; }
+ public int Rn { get; protected set; }
+ public int Size { get; protected set; }
+ public bool Extend64 { get; protected set; }
+
+ public OpCodeMem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 0) & 0x1f;
+ Rn = (opCode >> 5) & 0x1f;
+ Size = (opCode >> 30) & 0x3;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMemEx.cs b/ARMeilleure/Decoders/OpCodeMemEx.cs
new file mode 100644
index 000000000..5956f3672
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMemEx.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemEx : OpCodeMem
+ {
+ public int Rt2 { get; private set; }
+ public int Rs { get; private set; }
+
+ public OpCodeMemEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt2 = (opCode >> 10) & 0x1f;
+ Rs = (opCode >> 16) & 0x1f;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMemImm.cs b/ARMeilleure/Decoders/OpCodeMemImm.cs
new file mode 100644
index 000000000..517434f29
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMemImm.cs
@@ -0,0 +1,51 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemImm : OpCodeMem
+ {
+ public long Immediate { get; protected set; }
+ public bool WBack { get; protected set; }
+ public bool PostIdx { get; protected set; }
+ protected bool Unscaled { get; private set; }
+
+ private enum MemOp
+ {
+ Unscaled = 0,
+ PostIndexed = 1,
+ Unprivileged = 2,
+ PreIndexed = 3,
+ Unsigned
+ }
+
+ public OpCodeMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Extend64 = ((opCode >> 22) & 3) == 2;
+ WBack = ((opCode >> 24) & 1) == 0;
+
+ // The type is not valid for the Unsigned Immediate 12-bits encoding,
+ // because the bits 11:10 are used for the larger Immediate offset.
+ MemOp type = WBack ? (MemOp)((opCode >> 10) & 3) : MemOp.Unsigned;
+
+ PostIdx = type == MemOp.PostIndexed;
+ Unscaled = type == MemOp.Unscaled ||
+ type == MemOp.Unprivileged;
+
+ // Unscaled and Unprivileged doesn't write back,
+ // but they do use the 9-bits Signed Immediate.
+ if (Unscaled)
+ {
+ WBack = false;
+ }
+
+ if (WBack || Unscaled)
+ {
+ // 9-bits Signed Immediate.
+ Immediate = (opCode << 11) >> 23;
+ }
+ else
+ {
+ // 12-bits Unsigned Immediate.
+ Immediate = ((opCode >> 10) & 0xfff) << Size;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMemLit.cs b/ARMeilleure/Decoders/OpCodeMemLit.cs
new file mode 100644
index 000000000..b80585cb4
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMemLit.cs
@@ -0,0 +1,26 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemLit : OpCode, IOpCodeLit
+ {
+ public int Rt { get; private set; }
+ public long Immediate { get; private set; }
+ public int Size { get; private set; }
+ public bool Signed { get; private set; }
+ public bool Prefetch { get; private set; }
+
+ public OpCodeMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+ switch ((opCode >> 30) & 3)
+ {
+ case 0: Size = 2; Signed = false; Prefetch = false; break;
+ case 1: Size = 3; Signed = false; Prefetch = false; break;
+ case 2: Size = 2; Signed = true; Prefetch = false; break;
+ case 3: Size = 0; Signed = false; Prefetch = true; break;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMemPair.cs b/ARMeilleure/Decoders/OpCodeMemPair.cs
new file mode 100644
index 000000000..ea329a1db
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMemPair.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemPair : OpCodeMemImm
+ {
+ public int Rt2 { get; private set; }
+
+ public OpCodeMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt2 = (opCode >> 10) & 0x1f;
+ WBack = ((opCode >> 23) & 0x1) != 0;
+ PostIdx = ((opCode >> 23) & 0x3) == 1;
+ Extend64 = ((opCode >> 30) & 0x3) == 1;
+ Size = ((opCode >> 31) & 0x1) | 2;
+
+ DecodeImm(opCode);
+ }
+
+ protected void DecodeImm(int opCode)
+ {
+ Immediate = ((long)(opCode >> 15) << 57) >> (57 - Size);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMemReg.cs b/ARMeilleure/Decoders/OpCodeMemReg.cs
new file mode 100644
index 000000000..f5c2f9911
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMemReg.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMemReg : OpCodeMem
+ {
+ public bool Shift { get; private set; }
+ public int Rm { get; private set; }
+
+ public IntType IntType { get; private set; }
+
+ public OpCodeMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Shift = ((opCode >> 12) & 0x1) != 0;
+ IntType = (IntType)((opCode >> 13) & 0x7);
+ Rm = (opCode >> 16) & 0x1f;
+ Extend64 = ((opCode >> 22) & 0x3) == 2;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMov.cs b/ARMeilleure/Decoders/OpCodeMov.cs
new file mode 100644
index 000000000..b65178cff
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMov.cs
@@ -0,0 +1,36 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMov : OpCode
+ {
+ public int Rd { get; private set; }
+
+ public long Immediate { get; private set; }
+
+ public int Bit { get; private set; }
+
+ public OpCodeMov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int p1 = (opCode >> 22) & 1;
+ int sf = (opCode >> 31) & 1;
+
+ if (sf == 0 && p1 != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Rd = (opCode >> 0) & 0x1f;
+ Immediate = (opCode >> 5) & 0xffff;
+ Bit = (opCode >> 21) & 0x3;
+
+ Bit <<= 4;
+
+ Immediate <<= Bit;
+
+ RegisterSize = (opCode >> 31) != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMul.cs b/ARMeilleure/Decoders/OpCodeMul.cs
new file mode 100644
index 000000000..3eb4dc97c
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMul.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeMul : OpCodeAlu
+ {
+ public int Rm { get; private set; }
+ public int Ra { get; private set; }
+
+ public OpCodeMul(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Ra = (opCode >> 10) & 0x1f;
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimd.cs b/ARMeilleure/Decoders/OpCodeSimd.cs
new file mode 100644
index 000000000..a258446c1
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimd.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimd : OpCode, IOpCodeSimd
+ {
+ public int Rd { get; private set; }
+ public int Rn { get; private set; }
+ public int Opc { get; private set; }
+ public int Size { get; protected set; }
+
+ public OpCodeSimd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = (opCode >> 0) & 0x1f;
+ Rn = (opCode >> 5) & 0x1f;
+ Opc = (opCode >> 15) & 0x3;
+ Size = (opCode >> 22) & 0x3;
+
+ RegisterSize = ((opCode >> 30) & 1) != 0
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdCvt.cs b/ARMeilleure/Decoders/OpCodeSimdCvt.cs
new file mode 100644
index 000000000..15658bb89
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdCvt.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdCvt : OpCodeSimd
+ {
+ public int FBits { get; private set; }
+
+ public OpCodeSimdCvt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int scale = (opCode >> 10) & 0x3f;
+ int sf = (opCode >> 31) & 0x1;
+
+ FBits = 64 - scale;
+
+ RegisterSize = sf != 0
+ ? RegisterSize.Int64
+ : RegisterSize.Int32;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdExt.cs b/ARMeilleure/Decoders/OpCodeSimdExt.cs
new file mode 100644
index 000000000..d585449c1
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdExt.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdExt : OpCodeSimdReg
+ {
+ public int Imm4 { get; private set; }
+
+ public OpCodeSimdExt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Imm4 = (opCode >> 11) & 0xf;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdFcond.cs b/ARMeilleure/Decoders/OpCodeSimdFcond.cs
new file mode 100644
index 000000000..9e7a5f3bf
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdFcond.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdFcond : OpCodeSimdReg, IOpCodeCond
+ {
+ public int Nzcv { get; private set; }
+
+ public Condition Cond { get; private set; }
+
+ public OpCodeSimdFcond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Nzcv = (opCode >> 0) & 0xf;
+ Cond = (Condition)((opCode >> 12) & 0xf);
+ }
+ }
+}
diff --git a/ARMeilleure/Decoders/OpCodeSimdFmov.cs b/ARMeilleure/Decoders/OpCodeSimdFmov.cs
new file mode 100644
index 000000000..61a3f077d
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdFmov.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdFmov : OpCode, IOpCodeSimd
+ {
+ public int Rd { get; private set; }
+ public long Immediate { get; private set; }
+ public int Size { get; private set; }
+
+ public OpCodeSimdFmov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm5 = (opCode >> 5) & 0x1f;
+ int type = (opCode >> 22) & 0x3;
+
+ if (imm5 != 0b00000 || type > 1)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Size = type;
+
+ long imm;
+
+ Rd = (opCode >> 0) & 0x1f;
+ imm = (opCode >> 13) & 0xff;
+
+ Immediate = DecoderHelper.DecodeImm8Float(imm, type);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdImm.cs b/ARMeilleure/Decoders/OpCodeSimdImm.cs
new file mode 100644
index 000000000..ecad906d9
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdImm.cs
@@ -0,0 +1,98 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdImm : OpCode, IOpCodeSimd
+ {
+ public int Rd { get; private set; }
+ public long Immediate { get; private set; }
+ public int Size { get; private set; }
+
+ public OpCodeSimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rd = opCode & 0x1f;
+
+ int cMode = (opCode >> 12) & 0xf;
+ int op = (opCode >> 29) & 0x1;
+
+ int modeLow = cMode & 1;
+ int modeHigh = cMode >> 1;
+
+ long imm;
+
+ imm = ((uint)opCode >> 5) & 0x1f;
+ imm |= ((uint)opCode >> 11) & 0xe0;
+
+ if (modeHigh == 0b111)
+ {
+ Size = modeLow != 0 ? op : 3;
+
+ switch (op | (modeLow << 1))
+ {
+ case 0:
+ // 64-bits Immediate.
+ // Transform abcd efgh into abcd efgh abcd efgh ...
+ imm = (long)((ulong)imm * 0x0101010101010101);
+ break;
+
+ case 1:
+ // 64-bits Immediate.
+ // Transform abcd efgh into aaaa aaaa bbbb bbbb ...
+ imm = (imm & 0xf0) >> 4 | (imm & 0x0f) << 4;
+ imm = (imm & 0xcc) >> 2 | (imm & 0x33) << 2;
+ imm = (imm & 0xaa) >> 1 | (imm & 0x55) << 1;
+
+ imm = (long)((ulong)imm * 0x8040201008040201);
+ imm = (long)((ulong)imm & 0x8080808080808080);
+
+ imm |= imm >> 4;
+ imm |= imm >> 2;
+ imm |= imm >> 1;
+ break;
+
+ case 2:
+ case 3:
+ // Floating point Immediate.
+ imm = DecoderHelper.DecodeImm8Float(imm, Size);
+ break;
+ }
+ }
+ else if ((modeHigh & 0b110) == 0b100)
+ {
+ // 16-bits shifted Immediate.
+ Size = 1; imm <<= (modeHigh & 1) << 3;
+ }
+ else if ((modeHigh & 0b100) == 0b000)
+ {
+ // 32-bits shifted Immediate.
+ Size = 2; imm <<= modeHigh << 3;
+ }
+ else if ((modeHigh & 0b111) == 0b110)
+ {
+ // 32-bits shifted Immediate (fill with ones).
+ Size = 2; imm = ShlOnes(imm, 8 << modeLow);
+ }
+ else
+ {
+ // 8 bits without shift.
+ Size = 0;
+ }
+
+ Immediate = imm;
+
+ RegisterSize = ((opCode >> 30) & 1) != 0
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+ }
+
+ private static long ShlOnes(long value, int shift)
+ {
+ if (shift != 0)
+ {
+ return value << shift | (long)(ulong.MaxValue >> (64 - shift));
+ }
+ else
+ {
+ return value;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdIns.cs b/ARMeilleure/Decoders/OpCodeSimdIns.cs
new file mode 100644
index 000000000..78328adb5
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdIns.cs
@@ -0,0 +1,34 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdIns : OpCodeSimd
+ {
+ public int SrcIndex { get; private set; }
+ public int DstIndex { get; private set; }
+
+ public OpCodeSimdIns(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int imm4 = (opCode >> 11) & 0xf;
+ int imm5 = (opCode >> 16) & 0x1f;
+
+ if (imm5 == 0b10000)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Size = imm5 & -imm5;
+
+ switch (Size)
+ {
+ case 1: Size = 0; break;
+ case 2: Size = 1; break;
+ case 4: Size = 2; break;
+ case 8: Size = 3; break;
+ }
+
+ SrcIndex = imm4 >> Size;
+ DstIndex = imm5 >> (Size + 1);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemImm.cs b/ARMeilleure/Decoders/OpCodeSimdMemImm.cs
new file mode 100644
index 000000000..6b9e66d93
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemImm.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemImm : OpCodeMemImm, IOpCodeSimd
+ {
+ public OpCodeSimdMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size |= (opCode >> 21) & 4;
+
+ if (!WBack && !Unscaled && Size >= 4)
+ {
+ Immediate <<= 4;
+ }
+
+ Extend64 = false;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemLit.cs b/ARMeilleure/Decoders/OpCodeSimdMemLit.cs
new file mode 100644
index 000000000..607df1392
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemLit.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemLit : OpCode, IOpCodeSimd, IOpCodeLit
+ {
+ public int Rt { get; private set; }
+ public long Immediate { get; private set; }
+ public int Size { get; private set; }
+ public bool Signed => false;
+ public bool Prefetch => false;
+
+ public OpCodeSimdMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int opc = (opCode >> 30) & 3;
+
+ if (opc == 3)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Rt = opCode & 0x1f;
+
+ Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+ Size = opc + 2;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemMs.cs b/ARMeilleure/Decoders/OpCodeSimdMemMs.cs
new file mode 100644
index 000000000..9fa5ff42c
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemMs.cs
@@ -0,0 +1,46 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemMs : OpCodeMemReg, IOpCodeSimd
+ {
+ public int Reps { get; private set; }
+ public int SElems { get; private set; }
+ public int Elems { get; private set; }
+ public bool WBack { get; private set; }
+
+ public OpCodeSimdMemMs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ switch ((opCode >> 12) & 0xf)
+ {
+ case 0b0000: Reps = 1; SElems = 4; break;
+ case 0b0010: Reps = 4; SElems = 1; break;
+ case 0b0100: Reps = 1; SElems = 3; break;
+ case 0b0110: Reps = 3; SElems = 1; break;
+ case 0b0111: Reps = 1; SElems = 1; break;
+ case 0b1000: Reps = 1; SElems = 2; break;
+ case 0b1010: Reps = 2; SElems = 1; break;
+
+ default: Instruction = InstDescriptor.Undefined; return;
+ }
+
+ Size = (opCode >> 10) & 3;
+ WBack = ((opCode >> 23) & 1) != 0;
+
+ bool q = ((opCode >> 30) & 1) != 0;
+
+ if (!q && Size == 3 && SElems != 1)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ Extend64 = false;
+
+ RegisterSize = q
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+
+ Elems = (GetBitsCount() >> 3) >> Size;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemPair.cs b/ARMeilleure/Decoders/OpCodeSimdMemPair.cs
new file mode 100644
index 000000000..a4af49d02
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemPair.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemPair : OpCodeMemPair, IOpCodeSimd
+ {
+ public OpCodeSimdMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size = ((opCode >> 30) & 3) + 2;
+
+ Extend64 = false;
+
+ DecodeImm(opCode);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemReg.cs b/ARMeilleure/Decoders/OpCodeSimdMemReg.cs
new file mode 100644
index 000000000..7b783d63d
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemReg.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemReg : OpCodeMemReg, IOpCodeSimd
+ {
+ public OpCodeSimdMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size |= (opCode >> 21) & 4;
+
+ Extend64 = false;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemSs.cs b/ARMeilleure/Decoders/OpCodeSimdMemSs.cs
new file mode 100644
index 000000000..302decbcc
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemSs.cs
@@ -0,0 +1,95 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdMemSs : OpCodeMemReg, IOpCodeSimd
+ {
+ public int SElems { get; private set; }
+ public int Index { get; private set; }
+ public bool Replicate { get; private set; }
+ public bool WBack { get; private set; }
+
+ public OpCodeSimdMemSs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ int size = (opCode >> 10) & 3;
+ int s = (opCode >> 12) & 1;
+ int sElems = (opCode >> 12) & 2;
+ int scale = (opCode >> 14) & 3;
+ int l = (opCode >> 22) & 1;
+ int q = (opCode >> 30) & 1;
+
+ sElems |= (opCode >> 21) & 1;
+
+ sElems++;
+
+ int index = (q << 3) | (s << 2) | size;
+
+ switch (scale)
+ {
+ case 1:
+ {
+ if ((size & 1) != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ index >>= 1;
+
+ break;
+ }
+
+ case 2:
+ {
+ if ((size & 2) != 0 ||
+ ((size & 1) != 0 && s != 0))
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ if ((size & 1) != 0)
+ {
+ index >>= 3;
+
+ scale = 3;
+ }
+ else
+ {
+ index >>= 2;
+ }
+
+ break;
+ }
+
+ case 3:
+ {
+ if (l == 0 || s != 0)
+ {
+ Instruction = InstDescriptor.Undefined;
+
+ return;
+ }
+
+ scale = size;
+
+ Replicate = true;
+
+ break;
+ }
+ }
+
+ Index = index;
+ SElems = sElems;
+ Size = scale;
+
+ Extend64 = false;
+
+ WBack = ((opCode >> 23) & 1) != 0;
+
+ RegisterSize = q != 0
+ ? RegisterSize.Simd128
+ : RegisterSize.Simd64;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdReg.cs b/ARMeilleure/Decoders/OpCodeSimdReg.cs
new file mode 100644
index 000000000..d076806a6
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdReg.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdReg : OpCodeSimd
+ {
+ public bool Bit3 { get; private set; }
+ public int Ra { get; private set; }
+ public int Rm { get; protected set; }
+
+ public OpCodeSimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Bit3 = ((opCode >> 3) & 0x1) != 0;
+ Ra = (opCode >> 10) & 0x1f;
+ Rm = (opCode >> 16) & 0x1f;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdRegElem.cs b/ARMeilleure/Decoders/OpCodeSimdRegElem.cs
new file mode 100644
index 000000000..d2f1583d2
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdRegElem.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdRegElem : OpCodeSimdReg
+ {
+ public int Index { get; private set; }
+
+ public OpCodeSimdRegElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ switch (Size)
+ {
+ case 1:
+ Index = (opCode >> 20) & 3 |
+ (opCode >> 9) & 4;
+
+ Rm &= 0xf;
+
+ break;
+
+ case 2:
+ Index = (opCode >> 21) & 1 |
+ (opCode >> 10) & 2;
+
+ break;
+
+ default: Instruction = InstDescriptor.Undefined; break;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs b/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs
new file mode 100644
index 000000000..365b77172
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdRegElemF : OpCodeSimdReg
+ {
+ public int Index { get; private set; }
+
+ public OpCodeSimdRegElemF(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ switch ((opCode >> 21) & 3) // sz:L
+ {
+ case 0: // H:0
+ Index = (opCode >> 10) & 2; // 0, 2
+
+ break;
+
+ case 1: // H:1
+ Index = (opCode >> 10) & 2;
+ Index++; // 1, 3
+
+ break;
+
+ case 2: // H
+ Index = (opCode >> 11) & 1; // 0, 1
+
+ break;
+
+ default: Instruction = InstDescriptor.Undefined; break;
+ }
+ }
+ }
+}
diff --git a/ARMeilleure/Decoders/OpCodeSimdShImm.cs b/ARMeilleure/Decoders/OpCodeSimdShImm.cs
new file mode 100644
index 000000000..d260c4b3e
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdShImm.cs
@@ -0,0 +1,16 @@
+using ARMeilleure.Common;
+
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdShImm : OpCodeSimd
+ {
+ public int Imm { get; private set; }
+
+ public OpCodeSimdShImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Imm = (opCode >> 16) & 0x7f;
+
+ Size = BitUtils.HighestBitSetNibble(Imm >> 3);
+ }
+ }
+}
diff --git a/ARMeilleure/Decoders/OpCodeSimdTbl.cs b/ARMeilleure/Decoders/OpCodeSimdTbl.cs
new file mode 100644
index 000000000..14fdd6489
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdTbl.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSimdTbl : OpCodeSimdReg
+ {
+ public OpCodeSimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Size = ((opCode >> 13) & 3) + 1;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSystem.cs b/ARMeilleure/Decoders/OpCodeSystem.cs
new file mode 100644
index 000000000..cf7c5cc15
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSystem.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeSystem : OpCode
+ {
+ public int Rt { get; private set; }
+ public int Op2 { get; private set; }
+ public int CRm { get; private set; }
+ public int CRn { get; private set; }
+ public int Op1 { get; private set; }
+ public int Op0 { get; private set; }
+
+ public OpCodeSystem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rt = (opCode >> 0) & 0x1f;
+ Op2 = (opCode >> 5) & 0x7;
+ CRm = (opCode >> 8) & 0xf;
+ CRn = (opCode >> 12) & 0xf;
+ Op1 = (opCode >> 16) & 0x7;
+ Op0 = ((opCode >> 19) & 0x1) | 2;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeT16.cs b/ARMeilleure/Decoders/OpCodeT16.cs
new file mode 100644
index 000000000..e7b7aff53
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeT16.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16 : OpCode32
+ {
+ public OpCodeT16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Cond = Condition.Al;
+
+ OpCodeSizeInBytes = 2;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeT16AluImm8.cs b/ARMeilleure/Decoders/OpCodeT16AluImm8.cs
new file mode 100644
index 000000000..197d3b091
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeT16AluImm8.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16AluImm8 : OpCodeT16, IOpCode32Alu
+ {
+ private int _rdn;
+
+ public int Rd => _rdn;
+ public int Rn => _rdn;
+
+ public bool SetFlags => false;
+
+ public int Immediate { get; private set; }
+
+ public OpCodeT16AluImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Immediate = (opCode >> 0) & 0xff;
+ _rdn = (opCode >> 8) & 0x7;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeT16BReg.cs b/ARMeilleure/Decoders/OpCodeT16BReg.cs
new file mode 100644
index 000000000..1fb397591
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeT16BReg.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+ class OpCodeT16BReg : OpCodeT16, IOpCode32BReg
+ {
+ public int Rm { get; private set; }
+
+ public OpCodeT16BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+ {
+ Rm = (opCode >> 3) & 0xf;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
new file mode 100644
index 000000000..22c762d62
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -0,0 +1,787 @@
+using ARMeilleure.Instructions;
+using ARMeilleure.State;
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Decoders
+{
+ static class OpCodeTable
+ {
+ private const int FastLookupSize = 0x1000;
+
+ private struct InstInfo
+ {
+ public int Mask { get; }
+ public int Value { get; }
+
+ public InstDescriptor Inst { get; }
+
+ public Type Type { get; }
+
+ public InstInfo(int mask, int value, InstDescriptor inst, Type type)
+ {
+ Mask = mask;
+ Value = value;
+ Inst = inst;
+ Type = type;
+ }
+ }
+
+ private static List _allInstA32 = new List();
+ private static List _allInstT32 = new List();
+ private static List _allInstA64 = new List();
+
+ private static InstInfo[][] _instA32FastLookup = new InstInfo[FastLookupSize][];
+ private static InstInfo[][] _instT32FastLookup = new InstInfo[FastLookupSize][];
+ private static InstInfo[][] _instA64FastLookup = new InstInfo[FastLookupSize][];
+
+ static OpCodeTable()
+ {
+#region "OpCode Table (AArch64)"
+ // Base
+ SetA64("x0011010000xxxxx000000xxxxxxxxxx", InstName.Adc, InstEmit.Adc, typeof(OpCodeAluRs));
+ SetA64("x0111010000xxxxx000000xxxxxxxxxx", InstName.Adcs, InstEmit.Adcs, typeof(OpCodeAluRs));
+ SetA64("x00100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, typeof(OpCodeAluImm));
+ SetA64("00001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, typeof(OpCodeAluRs));
+ SetA64("10001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, typeof(OpCodeAluRs));
+ SetA64("x0001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Add, InstEmit.Add, typeof(OpCodeAluRx));
+ SetA64("x0001011001xxxxxxxx100xxxxxxxxxx", InstName.Add, InstEmit.Add, typeof(OpCodeAluRx));
+ SetA64("x01100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, typeof(OpCodeAluImm));
+ SetA64("00101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, typeof(OpCodeAluRs));
+ SetA64("10101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, typeof(OpCodeAluRs));
+ SetA64("x0101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, typeof(OpCodeAluRx));
+ SetA64("x0101011001xxxxxxxx100xxxxxxxxxx", InstName.Adds, InstEmit.Adds, typeof(OpCodeAluRx));
+ SetA64("0xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adr, InstEmit.Adr, typeof(OpCodeAdr));
+ SetA64("1xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adrp, InstEmit.Adrp, typeof(OpCodeAdr));
+ SetA64("0001001000xxxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, typeof(OpCodeAluImm));
+ SetA64("100100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, typeof(OpCodeAluImm));
+ SetA64("00001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.And, InstEmit.And, typeof(OpCodeAluRs));
+ SetA64("10001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, typeof(OpCodeAluRs));
+ SetA64("0111001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, typeof(OpCodeAluImm));
+ SetA64("111100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, typeof(OpCodeAluImm));
+ SetA64("01101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, typeof(OpCodeAluRs));
+ SetA64("11101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, typeof(OpCodeAluRs));
+ SetA64("x0011010110xxxxx001010xxxxxxxxxx", InstName.Asrv, InstEmit.Asrv, typeof(OpCodeAluRs));
+ SetA64("000101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit.B, typeof(OpCodeBImmAl));
+ SetA64("01010100xxxxxxxxxxxxxxxxxxx0xxxx", InstName.B_Cond, InstEmit.B_Cond, typeof(OpCodeBImmCond));
+ SetA64("00110011000xxxxx0xxxxxxxxxxxxxxx", InstName.Bfm, InstEmit.Bfm, typeof(OpCodeBfm));
+ SetA64("1011001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Bfm, InstEmit.Bfm, typeof(OpCodeBfm));
+ SetA64("00001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bic, InstEmit.Bic, typeof(OpCodeAluRs));
+ SetA64("10001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bic, InstEmit.Bic, typeof(OpCodeAluRs));
+ SetA64("01101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bics, InstEmit.Bics, typeof(OpCodeAluRs));
+ SetA64("11101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bics, InstEmit.Bics, typeof(OpCodeAluRs));
+ SetA64("100101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit.Bl, typeof(OpCodeBImmAl));
+ SetA64("1101011000111111000000xxxxx00000", InstName.Blr, InstEmit.Blr, typeof(OpCodeBReg));
+ SetA64("1101011000011111000000xxxxx00000", InstName.Br, InstEmit.Br, typeof(OpCodeBReg));
+ SetA64("11010100001xxxxxxxxxxxxxxxx00000", InstName.Brk, InstEmit.Brk, typeof(OpCodeException));
+ SetA64("x0110101xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbnz, InstEmit.Cbnz, typeof(OpCodeBImmCmp));
+ SetA64("x0110100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbz, InstEmit.Cbz, typeof(OpCodeBImmCmp));
+ SetA64("x0111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmn, InstEmit.Ccmn, typeof(OpCodeCcmpImm));
+ SetA64("x0111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmn, InstEmit.Ccmn, typeof(OpCodeCcmpReg));
+ SetA64("x1111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmp, InstEmit.Ccmp, typeof(OpCodeCcmpImm));
+ SetA64("x1111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmp, InstEmit.Ccmp, typeof(OpCodeCcmpReg));
+ SetA64("11010101000000110011xxxx01011111", InstName.Clrex, InstEmit.Clrex, typeof(OpCodeSystem));
+ SetA64("x101101011000000000101xxxxxxxxxx", InstName.Cls, InstEmit.Cls, typeof(OpCodeAlu));
+ SetA64("x101101011000000000100xxxxxxxxxx", InstName.Clz, InstEmit.Clz, typeof(OpCodeAlu));
+ SetA64("00011010110xxxxx010000xxxxxxxxxx", InstName.Crc32b, InstEmit.Crc32b, typeof(OpCodeAluBinary));
+ SetA64("00011010110xxxxx010001xxxxxxxxxx", InstName.Crc32h, InstEmit.Crc32h, typeof(OpCodeAluBinary));
+ SetA64("00011010110xxxxx010010xxxxxxxxxx", InstName.Crc32w, InstEmit.Crc32w, typeof(OpCodeAluBinary));
+ SetA64("10011010110xxxxx010011xxxxxxxxxx", InstName.Crc32x, InstEmit.Crc32x, typeof(OpCodeAluBinary));
+ SetA64("00011010110xxxxx010100xxxxxxxxxx", InstName.Crc32cb, InstEmit.Crc32cb, typeof(OpCodeAluBinary));
+ SetA64("00011010110xxxxx010101xxxxxxxxxx", InstName.Crc32ch, InstEmit.Crc32ch, typeof(OpCodeAluBinary));
+ SetA64("00011010110xxxxx010110xxxxxxxxxx", InstName.Crc32cw, InstEmit.Crc32cw, typeof(OpCodeAluBinary));
+ SetA64("10011010110xxxxx010111xxxxxxxxxx", InstName.Crc32cx, InstEmit.Crc32cx, typeof(OpCodeAluBinary));
+ SetA64("x0011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csel, InstEmit.Csel, typeof(OpCodeCsel));
+ SetA64("x0011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csinc, InstEmit.Csinc, typeof(OpCodeCsel));
+ SetA64("x1011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csinv, InstEmit.Csinv, typeof(OpCodeCsel));
+ SetA64("x1011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csneg, InstEmit.Csneg, typeof(OpCodeCsel));
+ SetA64("11010101000000110011xxxx10111111", InstName.Dmb, InstEmit.Dmb, typeof(OpCodeSystem));
+ SetA64("11010101000000110011xxxx10011111", InstName.Dsb, InstEmit.Dsb, typeof(OpCodeSystem));
+ SetA64("01001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Eon, InstEmit.Eon, typeof(OpCodeAluRs));
+ SetA64("11001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Eon, InstEmit.Eon, typeof(OpCodeAluRs));
+ SetA64("0101001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, typeof(OpCodeAluImm));
+ SetA64("110100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, typeof(OpCodeAluImm));
+ SetA64("01001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, typeof(OpCodeAluRs));
+ SetA64("11001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, typeof(OpCodeAluRs));
+ SetA64("00010011100xxxxx0xxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, typeof(OpCodeAluRs));
+ SetA64("10010011110xxxxxxxxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, typeof(OpCodeAluRs));
+ SetA64("11010101000000110010xxxxxxx11111", InstName.Hint, InstEmit.Hint, typeof(OpCodeSystem));
+ SetA64("11010101000000110011xxxx11011111", InstName.Isb, InstEmit.Isb, typeof(OpCodeSystem));
+ SetA64("xx001000110xxxxx1xxxxxxxxxxxxxxx", InstName.Ldar, InstEmit.Ldar, typeof(OpCodeMemEx));
+ SetA64("1x001000011xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxp, InstEmit.Ldaxp, typeof(OpCodeMemEx));
+ SetA64("xx001000010xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxr, InstEmit.Ldaxr, typeof(OpCodeMemEx));
+ SetA64("<<10100xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp, InstEmit.Ldp, typeof(OpCodeMemPair));
+ SetA64("xx111000010xxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeMemImm));
+ SetA64("xx11100101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeMemImm));
+ SetA64("xx111000011xxxxxxxxx10xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeMemReg));
+ SetA64("xx011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal, InstEmit.Ldr_Literal, typeof(OpCodeMemLit));
+ SetA64("0x1110001x0xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemImm));
+ SetA64("0x1110011xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemImm));
+ SetA64("10111000100xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemImm));
+ SetA64("1011100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemImm));
+ SetA64("0x1110001x1xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemReg));
+ SetA64("10111000101xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, typeof(OpCodeMemReg));
+ SetA64("xx001000010xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxr, InstEmit.Ldxr, typeof(OpCodeMemEx));
+ SetA64("1x001000011xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxp, InstEmit.Ldxp, typeof(OpCodeMemEx));
+ SetA64("x0011010110xxxxx001000xxxxxxxxxx", InstName.Lslv, InstEmit.Lslv, typeof(OpCodeAluRs));
+ SetA64("x0011010110xxxxx001001xxxxxxxxxx", InstName.Lsrv, InstEmit.Lsrv, typeof(OpCodeAluRs));
+ SetA64("x0011011000xxxxx0xxxxxxxxxxxxxxx", InstName.Madd, InstEmit.Madd, typeof(OpCodeMul));
+ SetA64("0111001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movk, InstEmit.Movk, typeof(OpCodeMov));
+ SetA64("111100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movk, InstEmit.Movk, typeof(OpCodeMov));
+ SetA64("0001001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movn, InstEmit.Movn, typeof(OpCodeMov));
+ SetA64("100100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movn, InstEmit.Movn, typeof(OpCodeMov));
+ SetA64("0101001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movz, InstEmit.Movz, typeof(OpCodeMov));
+ SetA64("110100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movz, InstEmit.Movz, typeof(OpCodeMov));
+ SetA64("110101010011xxxxxxxxxxxxxxxxxxxx", InstName.Mrs, InstEmit.Mrs, typeof(OpCodeSystem));
+ SetA64("110101010001xxxxxxxxxxxxxxxxxxxx", InstName.Msr, InstEmit.Msr, typeof(OpCodeSystem));
+ SetA64("x0011011000xxxxx1xxxxxxxxxxxxxxx", InstName.Msub, InstEmit.Msub, typeof(OpCodeMul));
+ SetA64("11010101000000110010000000011111", InstName.Nop, InstEmit.Nop, typeof(OpCodeSystem));
+ SetA64("00101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Orn, InstEmit.Orn, typeof(OpCodeAluRs));
+ SetA64("10101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Orn, InstEmit.Orn, typeof(OpCodeAluRs));
+ SetA64("0011001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, typeof(OpCodeAluImm));
+ SetA64("101100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, typeof(OpCodeAluImm));
+ SetA64("00101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, typeof(OpCodeAluRs));
+ SetA64("10101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, typeof(OpCodeAluRs));
+ SetA64("1111100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Pfrm, InstEmit.Pfrm, typeof(OpCodeMemImm));
+ SetA64("11111000100xxxxxxxxx00xxxxxxxxxx", InstName.Pfrm, InstEmit.Pfrm, typeof(OpCodeMemImm));
+ SetA64("11011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pfrm, InstEmit.Pfrm, typeof(OpCodeMemLit));
+ SetA64("x101101011000000000000xxxxxxxxxx", InstName.Rbit, InstEmit.Rbit, typeof(OpCodeAlu));
+ SetA64("1101011001011111000000xxxxx00000", InstName.Ret, InstEmit.Ret, typeof(OpCodeBReg));
+ SetA64("x101101011000000000001xxxxxxxxxx", InstName.Rev16, InstEmit.Rev16, typeof(OpCodeAlu));
+ SetA64("x101101011000000000010xxxxxxxxxx", InstName.Rev32, InstEmit.Rev32, typeof(OpCodeAlu));
+ SetA64("1101101011000000000011xxxxxxxxxx", InstName.Rev64, InstEmit.Rev64, typeof(OpCodeAlu));
+ SetA64("x0011010110xxxxx001011xxxxxxxxxx", InstName.Rorv, InstEmit.Rorv, typeof(OpCodeAluRs));
+ SetA64("x1011010000xxxxx000000xxxxxxxxxx", InstName.Sbc, InstEmit.Sbc, typeof(OpCodeAluRs));
+ SetA64("x1111010000xxxxx000000xxxxxxxxxx", InstName.Sbcs, InstEmit.Sbcs, typeof(OpCodeAluRs));
+ SetA64("00010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Sbfm, InstEmit.Sbfm, typeof(OpCodeBfm));
+ SetA64("1001001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Sbfm, InstEmit.Sbfm, typeof(OpCodeBfm));
+ SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv, InstEmit.Sdiv, typeof(OpCodeAluBinary));
+ SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl, InstEmit.Smaddl, typeof(OpCodeMul));
+ SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl, InstEmit.Smsubl, typeof(OpCodeMul));
+ SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smulh, InstEmit.Smulh, typeof(OpCodeMul));
+ SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr, InstEmit.Stlr, typeof(OpCodeMemEx));
+ SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp, InstEmit.Stlxp, typeof(OpCodeMemEx));
+ SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr, InstEmit.Stlxr, typeof(OpCodeMemEx));
+ SetA64("x010100xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, typeof(OpCodeMemPair));
+ SetA64("xx111000000xxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeMemImm));
+ SetA64("xx11100100xxxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeMemImm));
+ SetA64("xx111000001xxxxxxxxx10xxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeMemReg));
+ SetA64("1x001000001xxxxx0xxxxxxxxxxxxxxx", InstName.Stxp, InstEmit.Stxp, typeof(OpCodeMemEx));
+ SetA64("xx001000000xxxxx0xxxxxxxxxxxxxxx", InstName.Stxr, InstEmit.Stxr, typeof(OpCodeMemEx));
+ SetA64("x10100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, typeof(OpCodeAluImm));
+ SetA64("01001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, typeof(OpCodeAluRs));
+ SetA64("11001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, typeof(OpCodeAluRs));
+ SetA64("x1001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, typeof(OpCodeAluRx));
+ SetA64("x1001011001xxxxxxxx100xxxxxxxxxx", InstName.Sub, InstEmit.Sub, typeof(OpCodeAluRx));
+ SetA64("x11100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, typeof(OpCodeAluImm));
+ SetA64("01101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, typeof(OpCodeAluRs));
+ SetA64("11101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, typeof(OpCodeAluRs));
+ SetA64("x1101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, typeof(OpCodeAluRx));
+ SetA64("x1101011001xxxxxxxx100xxxxxxxxxx", InstName.Subs, InstEmit.Subs, typeof(OpCodeAluRx));
+ SetA64("11010100000xxxxxxxxxxxxxxxx00001", InstName.Svc, InstEmit.Svc, typeof(OpCodeException));
+ SetA64("1101010100001xxxxxxxxxxxxxxxxxxx", InstName.Sys, InstEmit.Sys, typeof(OpCodeSystem));
+ SetA64("x0110111xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbnz, InstEmit.Tbnz, typeof(OpCodeBImmTest));
+ SetA64("x0110110xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbz, InstEmit.Tbz, typeof(OpCodeBImmTest));
+ SetA64("01010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Ubfm, InstEmit.Ubfm, typeof(OpCodeBfm));
+ SetA64("1101001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ubfm, InstEmit.Ubfm, typeof(OpCodeBfm));
+ SetA64("x0011010110xxxxx000010xxxxxxxxxx", InstName.Udiv, InstEmit.Udiv, typeof(OpCodeAluBinary));
+ SetA64("10011011101xxxxx0xxxxxxxxxxxxxxx", InstName.Umaddl, InstEmit.Umaddl, typeof(OpCodeMul));
+ SetA64("10011011101xxxxx1xxxxxxxxxxxxxxx", InstName.Umsubl, InstEmit.Umsubl, typeof(OpCodeMul));
+ SetA64("10011011110xxxxx0xxxxxxxxxxxxxxx", InstName.Umulh, InstEmit.Umulh, typeof(OpCodeMul));
+
+ // FP & SIMD
+ SetA64("0101111011100000101110xxxxxxxxxx", InstName.Abs_S, InstEmit.Abs_S, typeof(OpCodeSimd));
+ SetA64("0>001110<<100000101110xxxxxxxxxx", InstName.Abs_V, InstEmit.Abs_V, typeof(OpCodeSimd));
+ SetA64("01011110111xxxxx100001xxxxxxxxxx", InstName.Add_S, InstEmit.Add_S, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<1xxxxx100001xxxxxxxxxx", InstName.Add_V, InstEmit.Add_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<1xxxxx010000xxxxxxxxxx", InstName.Addhn_V, InstEmit.Addhn_V, typeof(OpCodeSimdReg));
+ SetA64("0101111011110001101110xxxxxxxxxx", InstName.Addp_S, InstEmit.Addp_S, typeof(OpCodeSimd));
+ SetA64("0>001110<<1xxxxx101111xxxxxxxxxx", InstName.Addp_V, InstEmit.Addp_V, typeof(OpCodeSimdReg));
+ SetA64("000011100x110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, typeof(OpCodeSimd));
+ SetA64("01001110<<110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, typeof(OpCodeSimd));
+ SetA64("0100111000101000010110xxxxxxxxxx", InstName.Aesd_V, InstEmit.Aesd_V, typeof(OpCodeSimd));
+ SetA64("0100111000101000010010xxxxxxxxxx", InstName.Aese_V, InstEmit.Aese_V, typeof(OpCodeSimd));
+ SetA64("0100111000101000011110xxxxxxxxxx", InstName.Aesimc_V, InstEmit.Aesimc_V, typeof(OpCodeSimd));
+ SetA64("0100111000101000011010xxxxxxxxxx", InstName.Aesmc_V, InstEmit.Aesmc_V, typeof(OpCodeSimd));
+ SetA64("0x001110001xxxxx000111xxxxxxxxxx", InstName.And_V, InstEmit.And_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110011xxxxx000111xxxxxxxxxx", InstName.Bic_V, InstEmit.Bic_V, typeof(OpCodeSimdReg));
+ SetA64("0x10111100000xxx0xx101xxxxxxxxxx", InstName.Bic_Vi, InstEmit.Bic_Vi, typeof(OpCodeSimdImm));
+ SetA64("0x10111100000xxx10x101xxxxxxxxxx", InstName.Bic_Vi, InstEmit.Bic_Vi, typeof(OpCodeSimdImm));
+ SetA64("0x101110111xxxxx000111xxxxxxxxxx", InstName.Bif_V, InstEmit.Bif_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110101xxxxx000111xxxxxxxxxx", InstName.Bit_V, InstEmit.Bit_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110011xxxxx000111xxxxxxxxxx", InstName.Bsl_V, InstEmit.Bsl_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<100000010010xxxxxxxxxx", InstName.Cls_V, InstEmit.Cls_V, typeof(OpCodeSimd));
+ SetA64("0x101110<<100000010010xxxxxxxxxx", InstName.Clz_V, InstEmit.Clz_V, typeof(OpCodeSimd));
+ SetA64("01111110111xxxxx100011xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, typeof(OpCodeSimdReg));
+ SetA64("0101111011100000100110xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, typeof(OpCodeSimd));
+ SetA64("0>101110<<1xxxxx100011xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<100000100110xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, typeof(OpCodeSimd));
+ SetA64("01011110111xxxxx001111xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, typeof(OpCodeSimdReg));
+ SetA64("0111111011100000100010xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, typeof(OpCodeSimd));
+ SetA64("0>001110<<1xxxxx001111xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, typeof(OpCodeSimdReg));
+ SetA64("0>101110<<100000100010xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, typeof(OpCodeSimd));
+ SetA64("01011110111xxxxx001101xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, typeof(OpCodeSimdReg));
+ SetA64("0101111011100000100010xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, typeof(OpCodeSimd));
+ SetA64("0>001110<<1xxxxx001101xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<100000100010xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, typeof(OpCodeSimd));
+ SetA64("01111110111xxxxx001101xxxxxxxxxx", InstName.Cmhi_S, InstEmit.Cmhi_S, typeof(OpCodeSimdReg));
+ SetA64("0>101110<<1xxxxx001101xxxxxxxxxx", InstName.Cmhi_V, InstEmit.Cmhi_V, typeof(OpCodeSimdReg));
+ SetA64("01111110111xxxxx001111xxxxxxxxxx", InstName.Cmhs_S, InstEmit.Cmhs_S, typeof(OpCodeSimdReg));
+ SetA64("0>101110<<1xxxxx001111xxxxxxxxxx", InstName.Cmhs_V, InstEmit.Cmhs_V, typeof(OpCodeSimdReg));
+ SetA64("0111111011100000100110xxxxxxxxxx", InstName.Cmle_S, InstEmit.Cmle_S, typeof(OpCodeSimd));
+ SetA64("0>101110<<100000100110xxxxxxxxxx", InstName.Cmle_V, InstEmit.Cmle_V, typeof(OpCodeSimd));
+ SetA64("0101111011100000101010xxxxxxxxxx", InstName.Cmlt_S, InstEmit.Cmlt_S, typeof(OpCodeSimd));
+ SetA64("0>001110<<100000101010xxxxxxxxxx", InstName.Cmlt_V, InstEmit.Cmlt_V, typeof(OpCodeSimd));
+ SetA64("01011110111xxxxx100011xxxxxxxxxx", InstName.Cmtst_S, InstEmit.Cmtst_S, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstName.Cmtst_V, InstEmit.Cmtst_V, typeof(OpCodeSimdReg));
+ SetA64("0x00111000100000010110xxxxxxxxxx", InstName.Cnt_V, InstEmit.Cnt_V, typeof(OpCodeSimd));
+ SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, InstEmit.Dup_Gp, typeof(OpCodeSimdIns));
+ SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, InstEmit.Dup_S, typeof(OpCodeSimdIns));
+ SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V, InstEmit.Dup_V, typeof(OpCodeSimdIns));
+ SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstName.Eor_V, InstEmit.Eor_V, typeof(OpCodeSimdReg));
+ SetA64("0>101110000xxxxx01011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, InstEmit.Fabd_V, typeof(OpCodeSimdReg));
+ SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S, InstEmit.Fabs_S, typeof(OpCodeSimd));
+ SetA64("0>0011101<100000111110xxxxxxxxxx", InstName.Fabs_V, InstEmit.Fabs_V, typeof(OpCodeSimd));
+ SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstName.Fadd_S, InstEmit.Fadd_S, typeof(OpCodeSimdReg));
+ SetA64("0>0011100<1xxxxx110101xxxxxxxxxx", InstName.Fadd_V, InstEmit.Fadd_V, typeof(OpCodeSimdReg));
+ SetA64("011111100x110000110110xxxxxxxxxx", InstName.Faddp_S, InstEmit.Faddp_S, typeof(OpCodeSimd));
+ SetA64("0>1011100<1xxxxx110101xxxxxxxxxx", InstName.Faddp_V, InstEmit.Faddp_V, typeof(OpCodeSimdReg));
+ SetA64("000111100x1xxxxxxxxx01xxxxx0xxxx", InstName.Fccmp_S, InstEmit.Fccmp_S, typeof(OpCodeSimdFcond));
+ SetA64("000111100x1xxxxxxxxx01xxxxx1xxxx", InstName.Fccmpe_S, InstEmit.Fccmpe_S, typeof(OpCodeSimdFcond));
+ SetA64("010111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, typeof(OpCodeSimdReg));
+ SetA64("010111101x100000110110xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, typeof(OpCodeSimd));
+ SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, typeof(OpCodeSimdReg));
+ SetA64("0>0011101<100000110110xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, typeof(OpCodeSimd));
+ SetA64("011111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, typeof(OpCodeSimdReg));
+ SetA64("011111101x100000110010xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, typeof(OpCodeSimd));
+ SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, typeof(OpCodeSimdReg));
+ SetA64("0>1011101<100000110010xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, typeof(OpCodeSimd));
+ SetA64("011111101x1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, typeof(OpCodeSimdReg));
+ SetA64("010111101x100000110010xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, typeof(OpCodeSimd));
+ SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, typeof(OpCodeSimdReg));
+ SetA64("0>0011101<100000110010xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, typeof(OpCodeSimd));
+ SetA64("011111101x100000110110xxxxxxxxxx", InstName.Fcmle_S, InstEmit.Fcmle_S, typeof(OpCodeSimd));
+ SetA64("0>1011101<100000110110xxxxxxxxxx", InstName.Fcmle_V, InstEmit.Fcmle_V, typeof(OpCodeSimd));
+ SetA64("010111101x100000111010xxxxxxxxxx", InstName.Fcmlt_S, InstEmit.Fcmlt_S, typeof(OpCodeSimd));
+ SetA64("0>0011101<100000111010xxxxxxxxxx", InstName.Fcmlt_V, InstEmit.Fcmlt_V, typeof(OpCodeSimd));
+ SetA64("000111100x1xxxxx001000xxxxx0x000", InstName.Fcmp_S, InstEmit.Fcmp_S, typeof(OpCodeSimdReg));
+ SetA64("000111100x1xxxxx001000xxxxx1x000", InstName.Fcmpe_S, InstEmit.Fcmpe_S, typeof(OpCodeSimdReg));
+ SetA64("000111100x1xxxxxxxxx11xxxxxxxxxx", InstName.Fcsel_S, InstEmit.Fcsel_S, typeof(OpCodeSimdFcond));
+ SetA64("00011110xx10001xx10000xxxxxxxxxx", InstName.Fcvt_S, InstEmit.Fcvt_S, typeof(OpCodeSimd));
+ SetA64("x00111100x100100000000xxxxxxxxxx", InstName.Fcvtas_Gp, InstEmit.Fcvtas_Gp, typeof(OpCodeSimdCvt));
+ SetA64("x00111100x100101000000xxxxxxxxxx", InstName.Fcvtau_Gp, InstEmit.Fcvtau_Gp, typeof(OpCodeSimdCvt));
+ SetA64("0x0011100x100001011110xxxxxxxxxx", InstName.Fcvtl_V, InstEmit.Fcvtl_V, typeof(OpCodeSimd));
+ SetA64("x00111100x110000000000xxxxxxxxxx", InstName.Fcvtms_Gp, InstEmit.Fcvtms_Gp, typeof(OpCodeSimdCvt));
+ SetA64("x00111100x110001000000xxxxxxxxxx", InstName.Fcvtmu_Gp, InstEmit.Fcvtmu_Gp, typeof(OpCodeSimdCvt));
+ SetA64("0x0011100x100001011010xxxxxxxxxx", InstName.Fcvtn_V, InstEmit.Fcvtn_V, typeof(OpCodeSimd));
+ SetA64("010111100x100001101010xxxxxxxxxx", InstName.Fcvtns_S, InstEmit.Fcvtns_S, typeof(OpCodeSimd));
+ SetA64("0>0011100<100001101010xxxxxxxxxx", InstName.Fcvtns_V, InstEmit.Fcvtns_V, typeof(OpCodeSimd));
+ SetA64("011111100x100001101010xxxxxxxxxx", InstName.Fcvtnu_S, InstEmit.Fcvtnu_S, typeof(OpCodeSimd));
+ SetA64("0>1011100<100001101010xxxxxxxxxx", InstName.Fcvtnu_V, InstEmit.Fcvtnu_V, typeof(OpCodeSimd));
+ SetA64("x00111100x101000000000xxxxxxxxxx", InstName.Fcvtps_Gp, InstEmit.Fcvtps_Gp, typeof(OpCodeSimdCvt));
+ SetA64("x00111100x101001000000xxxxxxxxxx", InstName.Fcvtpu_Gp, InstEmit.Fcvtpu_Gp, typeof(OpCodeSimdCvt));
+ SetA64("x00111100x111000000000xxxxxxxxxx", InstName.Fcvtzs_Gp, InstEmit.Fcvtzs_Gp, typeof(OpCodeSimdCvt));
+ SetA64(">00111100x011000>xxxxxxxxxxxxxxx", InstName.Fcvtzs_Gp_Fixed, InstEmit.Fcvtzs_Gp_Fixed, typeof(OpCodeSimdCvt));
+ SetA64("010111101x100001101110xxxxxxxxxx", InstName.Fcvtzs_S, InstEmit.Fcvtzs_S, typeof(OpCodeSimd));
+ SetA64("0>0011101<100001101110xxxxxxxxxx", InstName.Fcvtzs_V, InstEmit.Fcvtzs_V, typeof(OpCodeSimd));
+ SetA64("0x001111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed, InstEmit.Fcvtzs_V_Fixed, typeof(OpCodeSimdShImm));
+ SetA64("0100111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed, InstEmit.Fcvtzs_V_Fixed, typeof(OpCodeSimdShImm));
+ SetA64("x00111100x111001000000xxxxxxxxxx", InstName.Fcvtzu_Gp, InstEmit.Fcvtzu_Gp, typeof(OpCodeSimdCvt));
+ SetA64(">00111100x011001>xxxxxxxxxxxxxxx", InstName.Fcvtzu_Gp_Fixed, InstEmit.Fcvtzu_Gp_Fixed, typeof(OpCodeSimdCvt));
+ SetA64("011111101x100001101110xxxxxxxxxx", InstName.Fcvtzu_S, InstEmit.Fcvtzu_S, typeof(OpCodeSimd));
+ SetA64("0>1011101<100001101110xxxxxxxxxx", InstName.Fcvtzu_V, InstEmit.Fcvtzu_V, typeof(OpCodeSimd));
+ SetA64("0x101111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, InstEmit.Fcvtzu_V_Fixed, typeof(OpCodeSimdShImm));
+ SetA64("0110111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, InstEmit.Fcvtzu_V_Fixed, typeof(OpCodeSimdShImm));
+ SetA64("000111100x1xxxxx000110xxxxxxxxxx", InstName.Fdiv_S, InstEmit.Fdiv_S, typeof(OpCodeSimdReg));
+ SetA64("0>1011100<1xxxxx111111xxxxxxxxxx", InstName.Fdiv_V, InstEmit.Fdiv_V, typeof(OpCodeSimdReg));
+ SetA64("000111110x0xxxxx0xxxxxxxxxxxxxxx", InstName.Fmadd_S, InstEmit.Fmadd_S, typeof(OpCodeSimdReg));
+ SetA64("000111100x1xxxxx010010xxxxxxxxxx", InstName.Fmax_S, InstEmit.Fmax_S, typeof(OpCodeSimdReg));
+ SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, typeof(OpCodeSimdReg));
+ SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, typeof(OpCodeSimdReg));
+ SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, typeof(OpCodeSimdReg));
+ SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, typeof(OpCodeSimdReg));
+ SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, typeof(OpCodeSimdReg));
+ SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, typeof(OpCodeSimdReg));
+ SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, typeof(OpCodeSimdReg));
+ SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, typeof(OpCodeSimdReg));
+ SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, typeof(OpCodeSimdReg));
+ SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, typeof(OpCodeSimdRegElemF));
+ SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V, InstEmit.Fmla_V, typeof(OpCodeSimdReg));
+ SetA64("0>00111110011101<1xxxxx110011xxxxxxxxxx", InstName.Fmls_V, InstEmit.Fmls_V, typeof(OpCodeSimdReg));
+ SetA64("0>00111111011100<1xxxxx110111xxxxxxxxxx", InstName.Fmul_V, InstEmit.Fmul_V, typeof(OpCodeSimdReg));
+ SetA64("0>00111110011100<1xxxxx110111xxxxxxxxxx", InstName.Fmulx_V, InstEmit.Fmulx_V, typeof(OpCodeSimdReg));
+ SetA64("0>10111111011101<100000111110xxxxxxxxxx", InstName.Fneg_V, InstEmit.Fneg_V, typeof(OpCodeSimd));
+ SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", InstName.Fnmadd_S, InstEmit.Fnmadd_S, typeof(OpCodeSimdReg));
+ SetA64("000111110x1xxxxx1xxxxxxxxxxxxxxx", InstName.Fnmsub_S, InstEmit.Fnmsub_S, typeof(OpCodeSimdReg));
+ SetA64("000111100x1xxxxx100010xxxxxxxxxx", InstName.Fnmul_S, InstEmit.Fnmul_S, typeof(OpCodeSimdReg));
+ SetA64("010111101x100001110110xxxxxxxxxx", InstName.Frecpe_S, InstEmit.Frecpe_S, typeof(OpCodeSimd));
+ SetA64("0>0011101<100001110110xxxxxxxxxx", InstName.Frecpe_V, InstEmit.Frecpe_V, typeof(OpCodeSimd));
+ SetA64("010111100x1xxxxx111111xxxxxxxxxx", InstName.Frecps_S, InstEmit.Frecps_S, typeof(OpCodeSimdReg));
+ SetA64("0>0011100<1xxxxx111111xxxxxxxxxx", InstName.Frecps_V, InstEmit.Frecps_V, typeof(OpCodeSimdReg));
+ SetA64("010111101x100001111110xxxxxxxxxx", InstName.Frecpx_S, InstEmit.Frecpx_S, typeof(OpCodeSimd));
+ SetA64("000111100x100110010000xxxxxxxxxx", InstName.Frinta_S, InstEmit.Frinta_S, typeof(OpCodeSimd));
+ SetA64("0>1011100<100001100010xxxxxxxxxx", InstName.Frinta_V, InstEmit.Frinta_V, typeof(OpCodeSimd));
+ SetA64("000111100x100111110000xxxxxxxxxx", InstName.Frinti_S, InstEmit.Frinti_S, typeof(OpCodeSimd));
+ SetA64("0>1011101<100001100110xxxxxxxxxx", InstName.Frinti_V, InstEmit.Frinti_V, typeof(OpCodeSimd));
+ SetA64("000111100x100101010000xxxxxxxxxx", InstName.Frintm_S, InstEmit.Frintm_S, typeof(OpCodeSimd));
+ SetA64("0>0011100<100001100110xxxxxxxxxx", InstName.Frintm_V, InstEmit.Frintm_V, typeof(OpCodeSimd));
+ SetA64("000111100x100100010000xxxxxxxxxx", InstName.Frintn_S, InstEmit.Frintn_S, typeof(OpCodeSimd));
+ SetA64("0>0011100<100001100010xxxxxxxxxx", InstName.Frintn_V, InstEmit.Frintn_V, typeof(OpCodeSimd));
+ SetA64("000111100x100100110000xxxxxxxxxx", InstName.Frintp_S, InstEmit.Frintp_S, typeof(OpCodeSimd));
+ SetA64("0>0011101<100001100010xxxxxxxxxx", InstName.Frintp_V, InstEmit.Frintp_V, typeof(OpCodeSimd));
+ SetA64("000111100x100111010000xxxxxxxxxx", InstName.Frintx_S, InstEmit.Frintx_S, typeof(OpCodeSimd));
+ SetA64("0>1011100<100001100110xxxxxxxxxx", InstName.Frintx_V, InstEmit.Frintx_V, typeof(OpCodeSimd));
+ SetA64("000111100x100101110000xxxxxxxxxx", InstName.Frintz_S, InstEmit.Frintz_S, typeof(OpCodeSimd));
+ SetA64("0>0011101<100001100110xxxxxxxxxx", InstName.Frintz_V, InstEmit.Frintz_V, typeof(OpCodeSimd));
+ SetA64("011111101x100001110110xxxxxxxxxx", InstName.Frsqrte_S, InstEmit.Frsqrte_S, typeof(OpCodeSimd));
+ SetA64("0>1011101<100001110110xxxxxxxxxx", InstName.Frsqrte_V, InstEmit.Frsqrte_V, typeof(OpCodeSimd));
+ SetA64("010111101x1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_S, InstEmit.Frsqrts_S, typeof(OpCodeSimdReg));
+ SetA64("0>0011101<1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_V, InstEmit.Frsqrts_V, typeof(OpCodeSimdReg));
+ SetA64("000111100x100001110000xxxxxxxxxx", InstName.Fsqrt_S, InstEmit.Fsqrt_S, typeof(OpCodeSimd));
+ SetA64("0>1011101<100001111110xxxxxxxxxx", InstName.Fsqrt_V, InstEmit.Fsqrt_V, typeof(OpCodeSimd));
+ SetA64("000111100x1xxxxx001110xxxxxxxxxx", InstName.Fsub_S, InstEmit.Fsub_S, typeof(OpCodeSimdReg));
+ SetA64("0>0011101<1xxxxx110101xxxxxxxxxx", InstName.Fsub_V, InstEmit.Fsub_V, typeof(OpCodeSimdReg));
+ SetA64("01001110000xxxxx000111xxxxxxxxxx", InstName.Ins_Gp, InstEmit.Ins_Gp, typeof(OpCodeSimdIns));
+ SetA64("01101110000xxxxx0xxxx1xxxxxxxxxx", InstName.Ins_V, InstEmit.Ins_V, typeof(OpCodeSimdIns));
+ SetA64("0x00110001000000xxxxxxxxxxxxxxxx", InstName.Ld__Vms, InstEmit.Ld__Vms, typeof(OpCodeSimdMemMs));
+ SetA64("0x001100110xxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vms, InstEmit.Ld__Vms, typeof(OpCodeSimdMemMs));
+ SetA64("0x00110101x00000xxxxxxxxxxxxxxxx", InstName.Ld__Vss, InstEmit.Ld__Vss, typeof(OpCodeSimdMemSs));
+ SetA64("0x00110111xxxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vss, InstEmit.Ld__Vss, typeof(OpCodeSimdMemSs));
+ SetA64("xx10110xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp, InstEmit.Ldp, typeof(OpCodeSimdMemPair));
+ SetA64("xx111100x10xxxxxxxxx00xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeSimdMemImm));
+ SetA64("xx111100x10xxxxxxxxx01xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeSimdMemImm));
+ SetA64("xx111100x10xxxxxxxxx11xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeSimdMemImm));
+ SetA64("xx111101x1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeSimdMemImm));
+ SetA64("xx111100x11xxxxxxxxx10xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, typeof(OpCodeSimdMemReg));
+ SetA64("xx011100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal, InstEmit.Ldr_Literal, typeof(OpCodeSimdMemLit));
+ SetA64("0x001110<<1xxxxx100101xxxxxxxxxx", InstName.Mla_V, InstEmit.Mla_V, typeof(OpCodeSimdReg));
+ SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", InstName.Mla_Ve, InstEmit.Mla_Ve, typeof(OpCodeSimdRegElem));
+ SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", InstName.Mls_V, InstEmit.Mls_V, typeof(OpCodeSimdReg));
+ SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", InstName.Mls_Ve, InstEmit.Mls_Ve, typeof(OpCodeSimdRegElem));
+ SetA64("0x00111100000xxx0xx001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm));
+ SetA64("0x00111100000xxx10x001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm));
+ SetA64("0x00111100000xxx110x01xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm));
+ SetA64("0xx0111100000xxx111001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, typeof(OpCodeSimdImm));
+ SetA64("0x001110<<1xxxxx100111xxxxxxxxxx", InstName.Mul_V, InstEmit.Mul_V, typeof(OpCodeSimdReg));
+ SetA64("0x001111xxxxxxxx1000x0xxxxxxxxxx", InstName.Mul_Ve, InstEmit.Mul_Ve, typeof(OpCodeSimdRegElem));
+ SetA64("0x10111100000xxx0xx001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm));
+ SetA64("0x10111100000xxx10x001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm));
+ SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, typeof(OpCodeSimdImm));
+ SetA64("0111111011100000101110xxxxxxxxxx", InstName.Neg_S, InstEmit.Neg_S, typeof(OpCodeSimd));
+ SetA64("0>101110<<100000101110xxxxxxxxxx", InstName.Neg_V, InstEmit.Neg_V, typeof(OpCodeSimd));
+ SetA64("0x10111000100000010110xxxxxxxxxx", InstName.Not_V, InstEmit.Not_V, typeof(OpCodeSimd));
+ SetA64("0x001110111xxxxx000111xxxxxxxxxx", InstName.Orn_V, InstEmit.Orn_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110101xxxxx000111xxxxxxxxxx", InstName.Orr_V, InstEmit.Orr_V, typeof(OpCodeSimdReg));
+ SetA64("0x00111100000xxx0xx101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, typeof(OpCodeSimdImm));
+ SetA64("0x00111100000xxx10x101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, typeof(OpCodeSimdImm));
+ SetA64("0x101110<<1xxxxx010000xxxxxxxxxx", InstName.Raddhn_V, InstEmit.Raddhn_V, typeof(OpCodeSimdReg));
+ SetA64("0x10111001100000010110xxxxxxxxxx", InstName.Rbit_V, InstEmit.Rbit_V, typeof(OpCodeSimd));
+ SetA64("0x00111000100000000110xxxxxxxxxx", InstName.Rev16_V, InstEmit.Rev16_V, typeof(OpCodeSimd));
+ SetA64("0x1011100x100000000010xxxxxxxxxx", InstName.Rev32_V, InstEmit.Rev32_V, typeof(OpCodeSimd));
+ SetA64("0x001110<<100000000010xxxxxxxxxx", InstName.Rev64_V, InstEmit.Rev64_V, typeof(OpCodeSimd));
+ SetA64("0x00111100>>>xxx100011xxxxxxxxxx", InstName.Rshrn_V, InstEmit.Rshrn_V, typeof(OpCodeSimdShImm));
+ SetA64("0x101110<<1xxxxx011000xxxxxxxxxx", InstName.Rsubhn_V, InstEmit.Rsubhn_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<1xxxxx011111xxxxxxxxxx", InstName.Saba_V, InstEmit.Saba_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<1xxxxx010100xxxxxxxxxx", InstName.Sabal_V, InstEmit.Sabal_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<1xxxxx011101xxxxxxxxxx", InstName.Sabd_V, InstEmit.Sabd_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<1xxxxx011100xxxxxxxxxx", InstName.Sabdl_V, InstEmit.Sabdl_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<100000011010xxxxxxxxxx", InstName.Sadalp_V, InstEmit.Sadalp_V, typeof(OpCodeSimd));
+ SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstName.Saddl_V, InstEmit.Saddl_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<100000001010xxxxxxxxxx", InstName.Saddlp_V, InstEmit.Saddlp_V, typeof(OpCodeSimd));
+ SetA64("000011100x110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, typeof(OpCodeSimd));
+ SetA64("01001110<<110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, typeof(OpCodeSimd));
+ SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstName.Saddw_V, InstEmit.Saddw_V, typeof(OpCodeSimdReg));
+ SetA64("x00111100x100010000000xxxxxxxxxx", InstName.Scvtf_Gp, InstEmit.Scvtf_Gp, typeof(OpCodeSimdCvt));
+ SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstName.Scvtf_Gp_Fixed, InstEmit.Scvtf_Gp_Fixed, typeof(OpCodeSimdCvt));
+ SetA64("010111100x100001110110xxxxxxxxxx", InstName.Scvtf_S, InstEmit.Scvtf_S, typeof(OpCodeSimd));
+ SetA64("0>0011100<100001110110xxxxxxxxxx", InstName.Scvtf_V, InstEmit.Scvtf_V, typeof(OpCodeSimd));
+ SetA64("0x001111001xxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, typeof(OpCodeSimdShImm));
+ SetA64("0100111101xxxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, typeof(OpCodeSimdShImm));
+ SetA64("01011110000xxxxx000000xxxxxxxxxx", InstName.Sha1c_V, InstEmit.Sha1c_V, typeof(OpCodeSimdReg));
+ SetA64("0101111000101000000010xxxxxxxxxx", InstName.Sha1h_V, InstEmit.Sha1h_V, typeof(OpCodeSimd));
+ SetA64("01011110000xxxxx001000xxxxxxxxxx", InstName.Sha1m_V, InstEmit.Sha1m_V, typeof(OpCodeSimdReg));
+ SetA64("01011110000xxxxx000100xxxxxxxxxx", InstName.Sha1p_V, InstEmit.Sha1p_V, typeof(OpCodeSimdReg));
+ SetA64("01011110000xxxxx001100xxxxxxxxxx", InstName.Sha1su0_V, InstEmit.Sha1su0_V, typeof(OpCodeSimdReg));
+ SetA64("0101111000101000000110xxxxxxxxxx", InstName.Sha1su1_V, InstEmit.Sha1su1_V, typeof(OpCodeSimd));
+ SetA64("01011110000xxxxx010000xxxxxxxxxx", InstName.Sha256h_V, InstEmit.Sha256h_V, typeof(OpCodeSimdReg));
+ SetA64("01011110000xxxxx010100xxxxxxxxxx", InstName.Sha256h2_V, InstEmit.Sha256h2_V, typeof(OpCodeSimdReg));
+ SetA64("0101111000101000001010xxxxxxxxxx", InstName.Sha256su0_V, InstEmit.Sha256su0_V, typeof(OpCodeSimd));
+ SetA64("01011110000xxxxx011000xxxxxxxxxx", InstName.Sha256su1_V, InstEmit.Sha256su1_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", InstName.Shadd_V, InstEmit.Shadd_V, typeof(OpCodeSimdReg));
+ SetA64("0101111101xxxxxx010101xxxxxxxxxx", InstName.Shl_S, InstEmit.Shl_S, typeof(OpCodeSimdShImm));
+ SetA64("0x00111100>>>xxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, typeof(OpCodeSimdShImm));
+ SetA64("0100111101xxxxxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, typeof(OpCodeSimdShImm));
+ SetA64("0x101110<<100001001110xxxxxxxxxx", InstName.Shll_V, InstEmit.Shll_V, typeof(OpCodeSimd));
+ SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstName.Shrn_V, InstEmit.Shrn_V, typeof(OpCodeSimdShImm));
+ SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstName.Shsub_V, InstEmit.Shsub_V, typeof(OpCodeSimdReg));
+ SetA64("0x10111100>>>xxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, typeof(OpCodeSimdShImm));
+ SetA64("0110111101xxxxxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, typeof(OpCodeSimdShImm));
+ SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstName.Smax_V, InstEmit.Smax_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<1xxxxx101001xxxxxxxxxx", InstName.Smaxp_V, InstEmit.Smaxp_V, typeof(OpCodeSimdReg));
+ SetA64("000011100x110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, typeof(OpCodeSimd));
+ SetA64("01001110<<110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, typeof(OpCodeSimd));
+ SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", InstName.Smin_V, InstEmit.Smin_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", InstName.Sminp_V, InstEmit.Sminp_V, typeof(OpCodeSimdReg));
+ SetA64("000011100x110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, typeof(OpCodeSimd));
+ SetA64("01001110<<110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, typeof(OpCodeSimd));
+ SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", InstName.Smlal_V, InstEmit.Smlal_V, typeof(OpCodeSimdReg));
+ SetA64("0x001111xxxxxxxx0010x0xxxxxxxxxx", InstName.Smlal_Ve, InstEmit.Smlal_Ve, typeof(OpCodeSimdRegElem));
+ SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", InstName.Smlsl_V, InstEmit.Smlsl_V, typeof(OpCodeSimdReg));
+ SetA64("0x001111xxxxxxxx0110x0xxxxxxxxxx", InstName.Smlsl_Ve, InstEmit.Smlsl_Ve, typeof(OpCodeSimdRegElem));
+ SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstName.Smov_S, InstEmit.Smov_S, typeof(OpCodeSimdIns));
+ SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", InstName.Smull_V, InstEmit.Smull_V, typeof(OpCodeSimdReg));
+ SetA64("0x001111xxxxxxxx1010x0xxxxxxxxxx", InstName.Smull_Ve, InstEmit.Smull_Ve, typeof(OpCodeSimdRegElem));
+ SetA64("01011110xx100000011110xxxxxxxxxx", InstName.Sqabs_S, InstEmit.Sqabs_S, typeof(OpCodeSimd));
+ SetA64("0>001110<<100000011110xxxxxxxxxx", InstName.Sqabs_V, InstEmit.Sqabs_V, typeof(OpCodeSimd));
+ SetA64("01011110xx1xxxxx000011xxxxxxxxxx", InstName.Sqadd_S, InstEmit.Sqadd_S, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<1xxxxx000011xxxxxxxxxx", InstName.Sqadd_V, InstEmit.Sqadd_V, typeof(OpCodeSimdReg));
+ SetA64("01011110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, typeof(OpCodeSimdReg));
+ SetA64("01011110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, typeof(OpCodeSimdReg));
+ SetA64("0x001110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, typeof(OpCodeSimdReg));
+ SetA64("01111110xx100000011110xxxxxxxxxx", InstName.Sqneg_S, InstEmit.Sqneg_S, typeof(OpCodeSimd));
+ SetA64("0>101110<<100000011110xxxxxxxxxx", InstName.Sqneg_V, InstEmit.Sqneg_V, typeof(OpCodeSimd));
+ SetA64("01111110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, typeof(OpCodeSimdReg));
+ SetA64("01111110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, typeof(OpCodeSimdReg));
+ SetA64("0x101110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<1xxxxx010111xxxxxxxxxx", InstName.Sqrshl_V, InstEmit.Sqrshl_V, typeof(OpCodeSimdReg));
+ SetA64("0101111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_S, InstEmit.Sqrshrn_S, typeof(OpCodeSimdShImm));
+ SetA64("0x00111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_V, InstEmit.Sqrshrn_V, typeof(OpCodeSimdShImm));
+ SetA64("0111111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_S, InstEmit.Sqrshrun_S, typeof(OpCodeSimdShImm));
+ SetA64("0x10111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_V, InstEmit.Sqrshrun_V, typeof(OpCodeSimdShImm));
+ SetA64("0>001110<<1xxxxx010011xxxxxxxxxx", InstName.Sqshl_V, InstEmit.Sqshl_V, typeof(OpCodeSimdReg));
+ SetA64("0101111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_S, InstEmit.Sqshrn_S, typeof(OpCodeSimdShImm));
+ SetA64("0x00111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_V, InstEmit.Sqshrn_V, typeof(OpCodeSimdShImm));
+ SetA64("0111111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_S, InstEmit.Sqshrun_S, typeof(OpCodeSimdShImm));
+ SetA64("0x10111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_V, InstEmit.Sqshrun_V, typeof(OpCodeSimdShImm));
+ SetA64("01011110xx1xxxxx001011xxxxxxxxxx", InstName.Sqsub_S, InstEmit.Sqsub_S, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", InstName.Sqsub_V, InstEmit.Sqsub_V, typeof(OpCodeSimdReg));
+ SetA64("01011110<<100001010010xxxxxxxxxx", InstName.Sqxtn_S, InstEmit.Sqxtn_S, typeof(OpCodeSimd));
+ SetA64("0x001110<<100001010010xxxxxxxxxx", InstName.Sqxtn_V, InstEmit.Sqxtn_V, typeof(OpCodeSimd));
+ SetA64("01111110<<100001001010xxxxxxxxxx", InstName.Sqxtun_S, InstEmit.Sqxtun_S, typeof(OpCodeSimd));
+ SetA64("0x101110<<100001001010xxxxxxxxxx", InstName.Sqxtun_V, InstEmit.Sqxtun_V, typeof(OpCodeSimd));
+ SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstName.Srhadd_V, InstEmit.Srhadd_V, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstName.Srshl_V, InstEmit.Srshl_V, typeof(OpCodeSimdReg));
+ SetA64("0101111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_S, InstEmit.Srshr_S, typeof(OpCodeSimdShImm));
+ SetA64("0x00111100>>>xxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, typeof(OpCodeSimdShImm));
+ SetA64("0100111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, typeof(OpCodeSimdShImm));
+ SetA64("0101111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_S, InstEmit.Srsra_S, typeof(OpCodeSimdShImm));
+ SetA64("0x00111100>>>xxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm));
+ SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm));
+ SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V, InstEmit.Sshl_V, typeof(OpCodeSimdReg));
+ SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V, InstEmit.Sshll_V, typeof(OpCodeSimdShImm));
+ SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S, InstEmit.Sshr_S, typeof(OpCodeSimdShImm));
+ SetA64("0x00111100>>>xxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, typeof(OpCodeSimdShImm));
+ SetA64("0100111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, typeof(OpCodeSimdShImm));
+ SetA64("0101111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_S, InstEmit.Ssra_S, typeof(OpCodeSimdShImm));
+ SetA64("0x00111100>>>xxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, typeof(OpCodeSimdShImm));
+ SetA64("0100111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, typeof(OpCodeSimdShImm));
+ SetA64("0x001110<<1xxxxx001000xxxxxxxxxx", InstName.Ssubl_V, InstEmit.Ssubl_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", InstName.Ssubw_V, InstEmit.Ssubw_V, typeof(OpCodeSimdReg));
+ SetA64("0x00110000000000xxxxxxxxxxxxxxxx", InstName.St__Vms, InstEmit.St__Vms, typeof(OpCodeSimdMemMs));
+ SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", InstName.St__Vms, InstEmit.St__Vms, typeof(OpCodeSimdMemMs));
+ SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", InstName.St__Vss, InstEmit.St__Vss, typeof(OpCodeSimdMemSs));
+ SetA64("0x00110110xxxxxxxxxxxxxxxxxxxxxx", InstName.St__Vss, InstEmit.St__Vss, typeof(OpCodeSimdMemSs));
+ SetA64("xx10110xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, typeof(OpCodeSimdMemPair));
+ SetA64("xx111100x00xxxxxxxxx00xxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeSimdMemImm));
+ SetA64("xx111100x00xxxxxxxxx01xxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeSimdMemImm));
+ SetA64("xx111100x00xxxxxxxxx11xxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeSimdMemImm));
+ SetA64("xx111101x0xxxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeSimdMemImm));
+ SetA64("xx111100x01xxxxxxxxx10xxxxxxxxxx", InstName.Str, InstEmit.Str, typeof(OpCodeSimdMemReg));
+ SetA64("01111110111xxxxx100001xxxxxxxxxx", InstName.Sub_S, InstEmit.Sub_S, typeof(OpCodeSimdReg));
+ SetA64("0>101110<<1xxxxx100001xxxxxxxxxx", InstName.Sub_V, InstEmit.Sub_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", InstName.Subhn_V, InstEmit.Subhn_V, typeof(OpCodeSimdReg));
+ SetA64("01011110xx100000001110xxxxxxxxxx", InstName.Suqadd_S, InstEmit.Suqadd_S, typeof(OpCodeSimd));
+ SetA64("0>001110<<100000001110xxxxxxxxxx", InstName.Suqadd_V, InstEmit.Suqadd_V, typeof(OpCodeSimd));
+ SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", InstName.Tbl_V, InstEmit.Tbl_V, typeof(OpCodeSimdTbl));
+ SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", InstName.Trn1_V, InstEmit.Trn1_V, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", InstName.Trn2_V, InstEmit.Trn2_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110<<1xxxxx011111xxxxxxxxxx", InstName.Uaba_V, InstEmit.Uaba_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110<<1xxxxx010100xxxxxxxxxx", InstName.Uabal_V, InstEmit.Uabal_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110<<1xxxxx011101xxxxxxxxxx", InstName.Uabd_V, InstEmit.Uabd_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110<<1xxxxx011100xxxxxxxxxx", InstName.Uabdl_V, InstEmit.Uabdl_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110<<100000011010xxxxxxxxxx", InstName.Uadalp_V, InstEmit.Uadalp_V, typeof(OpCodeSimd));
+ SetA64("0x101110<<1xxxxx000000xxxxxxxxxx", InstName.Uaddl_V, InstEmit.Uaddl_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110<<100000001010xxxxxxxxxx", InstName.Uaddlp_V, InstEmit.Uaddlp_V, typeof(OpCodeSimd));
+ SetA64("001011100x110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, typeof(OpCodeSimd));
+ SetA64("01101110<<110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, typeof(OpCodeSimd));
+ SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstName.Uaddw_V, InstEmit.Uaddw_V, typeof(OpCodeSimdReg));
+ SetA64("x00111100x100011000000xxxxxxxxxx", InstName.Ucvtf_Gp, InstEmit.Ucvtf_Gp, typeof(OpCodeSimdCvt));
+ SetA64(">00111100x000011>xxxxxxxxxxxxxxx", InstName.Ucvtf_Gp_Fixed, InstEmit.Ucvtf_Gp_Fixed, typeof(OpCodeSimdCvt));
+ SetA64("011111100x100001110110xxxxxxxxxx", InstName.Ucvtf_S, InstEmit.Ucvtf_S, typeof(OpCodeSimd));
+ SetA64("0>1011100<100001110110xxxxxxxxxx", InstName.Ucvtf_V, InstEmit.Ucvtf_V, typeof(OpCodeSimd));
+ SetA64("0x101111001xxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed, InstEmit.Ucvtf_V_Fixed, typeof(OpCodeSimdShImm));
+ SetA64("0110111101xxxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed, InstEmit.Ucvtf_V_Fixed, typeof(OpCodeSimdShImm));
+ SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstName.Uhadd_V, InstEmit.Uhadd_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstName.Uhsub_V, InstEmit.Uhsub_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstName.Umax_V, InstEmit.Umax_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110<<1xxxxx101001xxxxxxxxxx", InstName.Umaxp_V, InstEmit.Umaxp_V, typeof(OpCodeSimdReg));
+ SetA64("001011100x110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, typeof(OpCodeSimd));
+ SetA64("01101110<<110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, typeof(OpCodeSimd));
+ SetA64("0x101110<<1xxxxx011011xxxxxxxxxx", InstName.Umin_V, InstEmit.Umin_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110<<1xxxxx101011xxxxxxxxxx", InstName.Uminp_V, InstEmit.Uminp_V, typeof(OpCodeSimdReg));
+ SetA64("001011100x110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, typeof(OpCodeSimd));
+ SetA64("01101110<<110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, typeof(OpCodeSimd));
+ SetA64("0x101110<<1xxxxx100000xxxxxxxxxx", InstName.Umlal_V, InstEmit.Umlal_V, typeof(OpCodeSimdReg));
+ SetA64("0x101111xxxxxxxx0010x0xxxxxxxxxx", InstName.Umlal_Ve, InstEmit.Umlal_Ve, typeof(OpCodeSimdRegElem));
+ SetA64("0x101110<<1xxxxx101000xxxxxxxxxx", InstName.Umlsl_V, InstEmit.Umlsl_V, typeof(OpCodeSimdReg));
+ SetA64("0x101111xxxxxxxx0110x0xxxxxxxxxx", InstName.Umlsl_Ve, InstEmit.Umlsl_Ve, typeof(OpCodeSimdRegElem));
+ SetA64("0x001110000xxxxx001111xxxxxxxxxx", InstName.Umov_S, InstEmit.Umov_S, typeof(OpCodeSimdIns));
+ SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", InstName.Umull_V, InstEmit.Umull_V, typeof(OpCodeSimdReg));
+ SetA64("0x101111xxxxxxxx1010x0xxxxxxxxxx", InstName.Umull_Ve, InstEmit.Umull_Ve, typeof(OpCodeSimdRegElem));
+ SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstName.Uqadd_S, InstEmit.Uqadd_S, typeof(OpCodeSimdReg));
+ SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", InstName.Uqadd_V, InstEmit.Uqadd_V, typeof(OpCodeSimdReg));
+ SetA64("0>101110<<1xxxxx010111xxxxxxxxxx", InstName.Uqrshl_V, InstEmit.Uqrshl_V, typeof(OpCodeSimdReg));
+ SetA64("0111111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_S, InstEmit.Uqrshrn_S, typeof(OpCodeSimdShImm));
+ SetA64("0x10111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_V, InstEmit.Uqrshrn_V, typeof(OpCodeSimdShImm));
+ SetA64("0>101110<<1xxxxx010011xxxxxxxxxx", InstName.Uqshl_V, InstEmit.Uqshl_V, typeof(OpCodeSimdReg));
+ SetA64("0111111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_S, InstEmit.Uqshrn_S, typeof(OpCodeSimdShImm));
+ SetA64("0x10111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_V, InstEmit.Uqshrn_V, typeof(OpCodeSimdShImm));
+ SetA64("01111110xx1xxxxx001011xxxxxxxxxx", InstName.Uqsub_S, InstEmit.Uqsub_S, typeof(OpCodeSimdReg));
+ SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", InstName.Uqsub_V, InstEmit.Uqsub_V, typeof(OpCodeSimdReg));
+ SetA64("01111110<<100001010010xxxxxxxxxx", InstName.Uqxtn_S, InstEmit.Uqxtn_S, typeof(OpCodeSimd));
+ SetA64("0x101110<<100001010010xxxxxxxxxx", InstName.Uqxtn_V, InstEmit.Uqxtn_V, typeof(OpCodeSimd));
+ SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", InstName.Urhadd_V, InstEmit.Urhadd_V, typeof(OpCodeSimdReg));
+ SetA64("0>101110<<1xxxxx010101xxxxxxxxxx", InstName.Urshl_V, InstEmit.Urshl_V, typeof(OpCodeSimdReg));
+ SetA64("0111111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_S, InstEmit.Urshr_S, typeof(OpCodeSimdShImm));
+ SetA64("0x10111100>>>xxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, typeof(OpCodeSimdShImm));
+ SetA64("0110111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, typeof(OpCodeSimdShImm));
+ SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S, InstEmit.Ursra_S, typeof(OpCodeSimdShImm));
+ SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm));
+ SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm));
+ SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V, InstEmit.Ushl_V, typeof(OpCodeSimdReg));
+ SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V, InstEmit.Ushll_V, typeof(OpCodeSimdShImm));
+ SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, InstEmit.Ushr_S, typeof(OpCodeSimdShImm));
+ SetA64("0x10111100>>>xxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, typeof(OpCodeSimdShImm));
+ SetA64("0110111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, typeof(OpCodeSimdShImm));
+ SetA64("01111110xx100000001110xxxxxxxxxx", InstName.Usqadd_S, InstEmit.Usqadd_S, typeof(OpCodeSimd));
+ SetA64("0>101110<<100000001110xxxxxxxxxx", InstName.Usqadd_V, InstEmit.Usqadd_V, typeof(OpCodeSimd));
+ SetA64("0111111101xxxxxx000101xxxxxxxxxx", InstName.Usra_S, InstEmit.Usra_S, typeof(OpCodeSimdShImm));
+ SetA64("0x10111100>>>xxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, typeof(OpCodeSimdShImm));
+ SetA64("0110111101xxxxxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, typeof(OpCodeSimdShImm));
+ SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", InstName.Usubl_V, InstEmit.Usubl_V, typeof(OpCodeSimdReg));
+ SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", InstName.Usubw_V, InstEmit.Usubw_V, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V, InstEmit.Uzp1_V, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", InstName.Uzp2_V, InstEmit.Uzp2_V, typeof(OpCodeSimdReg));
+ SetA64("0x001110<<100001001010xxxxxxxxxx", InstName.Xtn_V, InstEmit.Xtn_V, typeof(OpCodeSimd));
+ SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", InstName.Zip1_V, InstEmit.Zip1_V, typeof(OpCodeSimdReg));
+ SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", InstName.Zip2_V, InstEmit.Zip2_V, typeof(OpCodeSimdReg));
+#endregion
+
+#region "OpCode Table (AArch32)"
+ // Base
+ SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluImm));
+ SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluRsImm));
+ SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit32.B, typeof(OpCode32BImm));
+ SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit32.Bl, typeof(OpCode32BImm));
+ SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Blx, InstEmit32.Blx, typeof(OpCode32BImm));
+ SetA32("<<<<000100101111111111110001xxxx", InstName.Bx, InstEmit32.Bx, typeof(OpCode32BReg));
+ SetT32("xxxxxxxxxxxxxxxx010001110xxxx000", InstName.Bx, InstEmit32.Bx, typeof(OpCodeT16BReg));
+ SetA32("<<<<00110101xxxx0000xxxxxxxxxxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluImm));
+ SetA32("<<<<00010101xxxx0000xxxxxxx0xxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluRsImm));
+ SetA32("<<<<100xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, typeof(OpCode32MemMult));
+ SetA32("<<<<010xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, typeof(OpCode32MemImm));
+ SetA32("<<<<010xx1x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, typeof(OpCode32MemImm));
+ SetA32("<<<<000xx1x0xxxxxxxxxxxx1101xxxx", InstName.Ldrd, InstEmit32.Ldrd, typeof(OpCode32MemImm8));
+ SetA32("<<<<000xx1x1xxxxxxxxxxxx1011xxxx", InstName.Ldrh, InstEmit32.Ldrh, typeof(OpCode32MemImm8));
+ SetA32("<<<<000xx1x1xxxxxxxxxxxx1101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, typeof(OpCode32MemImm8));
+ SetA32("<<<<000xx1x1xxxxxxxxxxxx1111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, typeof(OpCode32MemImm8));
+ SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluImm));
+ SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluRsImm));
+ SetT32("xxxxxxxxxxxxxxxx00100xxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCodeT16AluImm8));
+ SetA32("<<<<100xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, typeof(OpCode32MemMult));
+ SetA32("<<<<010xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit32.Str, typeof(OpCode32MemImm));
+ SetA32("<<<<010xx1x0xxxxxxxxxxxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, typeof(OpCode32MemImm));
+ SetA32("<<<<000xx1x0xxxxxxxxxxxx1111xxxx", InstName.Strd, InstEmit32.Strd, typeof(OpCode32MemImm8));
+ SetA32("<<<<000xx1x0xxxxxxxxxxxx1011xxxx", InstName.Strh, InstEmit32.Strh, typeof(OpCode32MemImm8));
+ SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluImm));
+ SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluRsImm));
+#endregion
+
+ FillFastLookupTable(_instA32FastLookup, _allInstA32);
+ FillFastLookupTable(_instT32FastLookup, _allInstT32);
+ FillFastLookupTable(_instA64FastLookup, _allInstA64);
+ }
+
+ private static void FillFastLookupTable(InstInfo[][] table, List allInsts)
+ {
+ List[] temp = new List[FastLookupSize];
+
+ for (int index = 0; index < FastLookupSize; index++)
+ {
+ temp[index] = new List();
+ }
+
+ foreach (InstInfo inst in allInsts)
+ {
+ int mask = ToFastLookupIndex(inst.Mask);
+ int value = ToFastLookupIndex(inst.Value);
+
+ for (int index = 0; index < FastLookupSize; index++)
+ {
+ if ((index & mask) == value)
+ {
+ temp[index].Add(inst);
+ }
+ }
+ }
+
+ for (int index = 0; index < FastLookupSize; index++)
+ {
+ table[index] = temp[index].ToArray();
+ }
+ }
+
+ private static void SetA32(string encoding, InstName name, InstEmitter emitter, Type type)
+ {
+ Set(encoding, ExecutionMode.Aarch32Arm, new InstDescriptor(name, emitter), type);
+ }
+
+ private static void SetT32(string encoding, InstName name, InstEmitter emitter, Type type)
+ {
+ Set(encoding, ExecutionMode.Aarch32Thumb, new InstDescriptor(name, emitter), type);
+ }
+
+ private static void SetA64(string encoding, InstName name, InstEmitter emitter, Type type)
+ {
+ Set(encoding, ExecutionMode.Aarch64, new InstDescriptor(name, emitter), type);
+ }
+
+ private static void Set(string encoding, ExecutionMode mode, InstDescriptor inst, Type type)
+ {
+ int bit = encoding.Length - 1;
+ int value = 0;
+ int xMask = 0;
+ int xBits = 0;
+
+ int[] xPos = new int[encoding.Length];
+
+ int blacklisted = 0;
+
+ for (int index = 0; index < encoding.Length; index++, bit--)
+ {
+ // Note: < and > are used on special encodings.
+ // The < means that we should never have ALL bits with the '<' set.
+ // So, when the encoding has <<, it means that 00, 01, and 10 are valid,
+ // but not 11. <<< is 000, 001, ..., 110 but NOT 111, and so on...
+ // For >, the invalid value is zero. So, for >> 01, 10 and 11 are valid,
+ // but 00 isn't.
+ char chr = encoding[index];
+
+ if (chr == '1')
+ {
+ value |= 1 << bit;
+ }
+ else if (chr == 'x')
+ {
+ xMask |= 1 << bit;
+ }
+ else if (chr == '>')
+ {
+ xPos[xBits++] = bit;
+ }
+ else if (chr == '<')
+ {
+ xPos[xBits++] = bit;
+
+ blacklisted |= 1 << bit;
+ }
+ else if (chr != '0')
+ {
+ throw new ArgumentException(nameof(encoding));
+ }
+ }
+
+ xMask = ~xMask;
+
+ if (xBits == 0)
+ {
+ InsertInst(new InstInfo(xMask, value, inst, type), mode);
+
+ return;
+ }
+
+ for (int index = 0; index < (1 << xBits); index++)
+ {
+ int mask = 0;
+
+ for (int x = 0; x < xBits; x++)
+ {
+ mask |= ((index >> x) & 1) << xPos[x];
+ }
+
+ if (mask != blacklisted)
+ {
+ InsertInst(new InstInfo(xMask, value | mask, inst, type), mode);
+ }
+ }
+ }
+
+ private static void InsertInst(InstInfo info, ExecutionMode mode)
+ {
+ switch (mode)
+ {
+ case ExecutionMode.Aarch32Arm: _allInstA32.Add(info); break;
+ case ExecutionMode.Aarch32Thumb: _allInstT32.Add(info); break;
+ case ExecutionMode.Aarch64: _allInstA64.Add(info); break;
+ }
+ }
+
+ public static (InstDescriptor inst, Type type) GetInstA32(int opCode)
+ {
+ return GetInstFromList(_instA32FastLookup[ToFastLookupIndex(opCode)], opCode);
+ }
+
+ public static (InstDescriptor inst, Type type) GetInstT32(int opCode)
+ {
+ return GetInstFromList(_instT32FastLookup[ToFastLookupIndex(opCode)], opCode);
+ }
+
+ public static (InstDescriptor inst, Type type) GetInstA64(int opCode)
+ {
+ return GetInstFromList(_instA64FastLookup[ToFastLookupIndex(opCode)], opCode);
+ }
+
+ private static (InstDescriptor inst, Type type) GetInstFromList(InstInfo[] insts, int opCode)
+ {
+ foreach (InstInfo info in insts)
+ {
+ if ((opCode & info.Mask) == info.Value)
+ {
+ return (info.Inst, info.Type);
+ }
+ }
+
+ return (new InstDescriptor(InstName.Und, InstEmit.Und), typeof(OpCode));
+ }
+
+ private static int ToFastLookupIndex(int value)
+ {
+ return ((value >> 10) & 0x00F) | ((value >> 18) & 0xFF0);
+ }
+ }
+}
diff --git a/ARMeilleure/Decoders/RegisterSize.cs b/ARMeilleure/Decoders/RegisterSize.cs
new file mode 100644
index 000000000..c9cea03ed
--- /dev/null
+++ b/ARMeilleure/Decoders/RegisterSize.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ enum RegisterSize
+ {
+ Int32,
+ Int64,
+ Simd64,
+ Simd128
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/ShiftType.cs b/ARMeilleure/Decoders/ShiftType.cs
new file mode 100644
index 000000000..8583f16ad
--- /dev/null
+++ b/ARMeilleure/Decoders/ShiftType.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+ enum ShiftType
+ {
+ Lsl = 0,
+ Lsr = 1,
+ Asr = 2,
+ Ror = 3
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Diagnostics/IRDumper.cs b/ARMeilleure/Diagnostics/IRDumper.cs
new file mode 100644
index 000000000..55d5b493e
--- /dev/null
+++ b/ARMeilleure/Diagnostics/IRDumper.cs
@@ -0,0 +1,168 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace ARMeilleure.Diagnostics
+{
+ static class IRDumper
+ {
+ private const string Indentation = " ";
+
+ public static string GetDump(ControlFlowGraph cfg)
+ {
+ StringBuilder sb = new StringBuilder();
+
+ Dictionary localNames = new Dictionary();
+
+ string indentation = string.Empty;
+
+ void IncreaseIndentation()
+ {
+ indentation += Indentation;
+ }
+
+ void DecreaseIndentation()
+ {
+ indentation = indentation.Substring(0, indentation.Length - Indentation.Length);
+ }
+
+ void AppendLine(string text)
+ {
+ sb.AppendLine(indentation + text);
+ }
+
+ IncreaseIndentation();
+
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ string blockName = GetBlockName(block);
+
+ if (block.Next != null)
+ {
+ blockName += $" (next {GetBlockName(block.Next)})";
+ }
+
+ if (block.Branch != null)
+ {
+ blockName += $" (branch {GetBlockName(block.Branch)})";
+ }
+
+ blockName += ":";
+
+ AppendLine(blockName);
+
+ IncreaseIndentation();
+
+ foreach (Node node in block.Operations)
+ {
+ string[] sources = new string[node.SourcesCount];
+
+ string instName = string.Empty;
+
+ if (node is PhiNode phi)
+ {
+ for (int index = 0; index < sources.Length; index++)
+ {
+ string phiBlockName = GetBlockName(phi.GetBlock(index));
+
+ string operName = GetOperandName(phi.GetSource(index), localNames);
+
+ sources[index] = $"({phiBlockName}: {operName})";
+ }
+
+ instName = "Phi";
+ }
+ else if (node is Operation operation)
+ {
+ for (int index = 0; index < sources.Length; index++)
+ {
+ sources[index] = GetOperandName(operation.GetSource(index), localNames);
+ }
+
+ instName = operation.Instruction.ToString();
+ }
+
+ string allSources = string.Join(", ", sources);
+
+ string line = instName + " " + allSources;
+
+ if (node.Destination != null)
+ {
+ line = GetOperandName(node.Destination, localNames) + " = " + line;
+ }
+
+ AppendLine(line);
+ }
+
+ DecreaseIndentation();
+ }
+
+ return sb.ToString();
+ }
+
+ private static string GetBlockName(BasicBlock block)
+ {
+ return $"block{block.Index}";
+ }
+
+ private static string GetOperandName(Operand operand, Dictionary localNames)
+ {
+ if (operand == null)
+ {
+ return "";
+ }
+
+ string name = string.Empty;
+
+ if (operand.Kind == OperandKind.LocalVariable)
+ {
+ if (!localNames.TryGetValue(operand, out string localName))
+ {
+ localName = "%" + localNames.Count;
+
+ localNames.Add(operand, localName);
+ }
+
+ name = localName;
+ }
+ else if (operand.Kind == OperandKind.Register)
+ {
+ Register reg = operand.GetRegister();
+
+ switch (reg.Type)
+ {
+ case RegisterType.Flag: name = "b" + reg.Index; break;
+ case RegisterType.Integer: name = "r" + reg.Index; break;
+ case RegisterType.Vector: name = "v" + reg.Index; break;
+ }
+ }
+ else if (operand.Kind == OperandKind.Constant)
+ {
+ name = "0x" + operand.Value.ToString("X");
+ }
+ else
+ {
+ name = operand.Kind.ToString().ToLower();
+ }
+
+ return GetTypeName(operand.Type) + " " + name;
+ }
+
+ private static string GetTypeName(OperandType type)
+ {
+ switch (type)
+ {
+ case OperandType.FP32: return "f32";
+ case OperandType.FP64: return "f64";
+ case OperandType.I32: return "i32";
+ case OperandType.I64: return "i64";
+ case OperandType.None: return "none";
+ case OperandType.V128: return "v128";
+ }
+
+ throw new ArgumentException($"Invalid operand type \"{type}\".");
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Diagnostics/Logger.cs b/ARMeilleure/Diagnostics/Logger.cs
new file mode 100644
index 000000000..29d9c79b9
--- /dev/null
+++ b/ARMeilleure/Diagnostics/Logger.cs
@@ -0,0 +1,59 @@
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.Diagnostics
+{
+ static class Logger
+ {
+ private static long _startTime;
+
+ private static long[] _accumulatedTime;
+
+ static Logger()
+ {
+ _accumulatedTime = new long[(int)PassName.Count];
+ }
+
+ public static void StartPass(PassName name)
+ {
+#if M_DEBUG
+ WriteOutput(name + " pass started...");
+
+ _startTime = Stopwatch.GetTimestamp();
+#endif
+ }
+
+ public static void EndPass(PassName name, ControlFlowGraph cfg)
+ {
+#if M_DEBUG
+ EndPass(name);
+
+ WriteOutput("IR after " + name + " pass:");
+
+ WriteOutput(IRDumper.GetDump(cfg));
+#endif
+ }
+
+ public static void EndPass(PassName name)
+ {
+#if M_DEBUG
+ long elapsedTime = Stopwatch.GetTimestamp() - _startTime;
+
+ _accumulatedTime[(int)name] += elapsedTime;
+
+ WriteOutput($"{name} pass ended after {GetMilliseconds(_accumulatedTime[(int)name])} ms...");
+#endif
+ }
+
+ private static long GetMilliseconds(long ticks)
+ {
+ return (long)(((double)ticks / Stopwatch.Frequency) * 1000);
+ }
+
+ private static void WriteOutput(string text)
+ {
+ Console.WriteLine(text);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Diagnostics/PassName.cs b/ARMeilleure/Diagnostics/PassName.cs
new file mode 100644
index 000000000..e37439855
--- /dev/null
+++ b/ARMeilleure/Diagnostics/PassName.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Diagnostics
+{
+ enum PassName
+ {
+ Decoding,
+ Translation,
+ RegisterUsage,
+ Dominance,
+ SsaConstruction,
+ Optimization,
+ PreAllocation,
+ RegisterAllocation,
+ CodeGeneration,
+
+ Count
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/CryptoHelper.cs b/ARMeilleure/Instructions/CryptoHelper.cs
new file mode 100644
index 000000000..b6b4a62d3
--- /dev/null
+++ b/ARMeilleure/Instructions/CryptoHelper.cs
@@ -0,0 +1,279 @@
+// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
+
+using ARMeilleure.State;
+
+namespace ARMeilleure.Instructions
+{
+ static class CryptoHelper
+ {
+#region "LookUp Tables"
+ private static readonly byte[] _sBox = new byte[]
+ {
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+ };
+
+ private static readonly byte[] _invSBox = new byte[]
+ {
+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+ };
+
+ private static readonly byte[] _gfMul02 = new byte[]
+ {
+ 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+ 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+ 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+ 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+ 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+ 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+ 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+ 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+ 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
+ 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
+ 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
+ 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
+ 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
+ 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
+ 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
+ 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5
+ };
+
+ private static readonly byte[] _gfMul03 = new byte[]
+ {
+ 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
+ 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+ 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+ 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
+ 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
+ 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+ 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+ 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
+ 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
+ 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
+ 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
+ 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
+ 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
+ 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
+ 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
+ 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a
+ };
+
+ private static readonly byte[] _gfMul09 = new byte[]
+ {
+ 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+ 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
+ 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
+ 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
+ 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
+ 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
+ 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
+ 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
+ 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
+ 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
+ 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
+ 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+ 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
+ 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
+ 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
+ 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46
+ };
+
+ private static readonly byte[] _gfMul0B = new byte[]
+ {
+ 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
+ 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
+ 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
+ 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
+ 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
+ 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
+ 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
+ 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
+ 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
+ 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
+ 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
+ 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
+ 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
+ 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
+ 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
+ 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3
+ };
+
+ private static readonly byte[] _gfMul0D = new byte[]
+ {
+ 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
+ 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+ 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
+ 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
+ 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
+ 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+ 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
+ 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
+ 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+ 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
+ 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
+ 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
+ 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
+ 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
+ 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
+ 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97
+ };
+
+ private static readonly byte[] _gfMul0E = new byte[]
+ {
+ 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+ 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
+ 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
+ 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
+ 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
+ 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+ 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
+ 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
+ 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
+ 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
+ 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
+ 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+ 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
+ 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
+ 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+ 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d
+ };
+
+ private static readonly byte[] _srPerm = new byte[]
+ {
+ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+ };
+
+ private static readonly byte[] _isrPerm = new byte[]
+ {
+ 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+ };
+#endregion
+
+ public static V128 AesInvMixColumns(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int columns = 0; columns <= 3; columns++)
+ {
+ int idx = columns << 2;
+
+ byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+ byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+ byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+ byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+ outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]);
+ outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]);
+ outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]);
+ outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]);
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesInvShiftRows(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[_isrPerm[idx]] = inState[idx];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesInvSubBytes(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[idx] = _invSBox[inState[idx]];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesMixColumns(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int columns = 0; columns <= 3; columns++)
+ {
+ int idx = columns << 2;
+
+ byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+ byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+ byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+ byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+ outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3);
+ outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3);
+ outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]);
+ outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]);
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesShiftRows(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[_srPerm[idx]] = inState[idx];
+ }
+
+ return new V128(outState);
+ }
+
+ public static V128 AesSubBytes(V128 op)
+ {
+ byte[] inState = op.ToArray();
+ byte[] outState = new byte[16];
+
+ for (int idx = 0; idx <= 15; idx++)
+ {
+ outState[idx] = _sBox[inState[idx]];
+ }
+
+ return new V128(outState);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/DelegateTypes.cs b/ARMeilleure/Instructions/DelegateTypes.cs
new file mode 100644
index 000000000..e90e4d77a
--- /dev/null
+++ b/ARMeilleure/Instructions/DelegateTypes.cs
@@ -0,0 +1,78 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ delegate double _F64_F64(double a1);
+ delegate double _F64_F64_F64(double a1, double a2);
+ delegate double _F64_F64_F64_F64(double a1, double a2, double a3);
+ delegate double _F64_F64_MidpointRounding(double a1, MidpointRounding a2);
+
+ delegate float _F32_F32(float a1);
+ delegate float _F32_F32_F32(float a1, float a2);
+ delegate float _F32_F32_F32_F32(float a1, float a2, float a3);
+ delegate float _F32_F32_MidpointRounding(float a1, MidpointRounding a2);
+ delegate float _F32_U16(ushort a1);
+
+ delegate int _S32_F32(float a1);
+ delegate int _S32_F32_F32_Bool(float a1, float a2, bool a3);
+ delegate int _S32_F64(double a1);
+ delegate int _S32_F64_F64_Bool(double a1, double a2, bool a3);
+ delegate int _S32_U64_U16(ulong a1, ushort a2);
+ delegate int _S32_U64_U32(ulong a1, uint a2);
+ delegate int _S32_U64_U64(ulong a1, ulong a2);
+ delegate int _S32_U64_U8(ulong a1, byte a2);
+ delegate int _S32_U64_V128(ulong a1, V128 a2);
+
+ delegate long _S64_F32(float a1);
+ delegate long _S64_F64(double a1);
+ delegate long _S64_S64(long a1);
+ delegate long _S64_S64_S32(long a1, int a2);
+ delegate long _S64_S64_S64(long a1, long a2);
+ delegate long _S64_S64_S64_Bool_S32(long a1, long a2, bool a3, int a4);
+ delegate long _S64_S64_S64_S32(long a1, long a2, int a3);
+ delegate long _S64_U64_S32(ulong a1, int a2);
+ delegate long _S64_U64_S64(ulong a1, long a2);
+
+ delegate ushort _U16_F32(float a1);
+ delegate ushort _U16_U64(ulong a1);
+
+ delegate uint _U32_F32(float a1);
+ delegate uint _U32_F64(double a1);
+ delegate uint _U32_U32(uint a1);
+ delegate uint _U32_U32_U16(uint a1, ushort a2);
+ delegate uint _U32_U32_U32(uint a1, uint a2);
+ delegate uint _U32_U32_U64(uint a1, ulong a2);
+ delegate uint _U32_U32_U8(uint a1, byte a2);
+ delegate uint _U32_U64(ulong a1);
+
+ delegate ulong _U64();
+ delegate ulong _U64_F32(float a1);
+ delegate ulong _U64_F64(double a1);
+ delegate ulong _U64_S64_S32(long a1, int a2);
+ delegate ulong _U64_S64_U64(long a1, ulong a2);
+ delegate ulong _U64_U64(ulong a1);
+ delegate ulong _U64_U64_S32(ulong a1, int a2);
+ delegate ulong _U64_U64_S64_S32(ulong a1, long a2, int a3);
+ delegate ulong _U64_U64_U64(ulong a1, ulong a2);
+ delegate ulong _U64_U64_U64_Bool_S32(ulong a1, ulong a2, bool a3, int a4);
+
+ delegate byte _U8_U64(ulong a1);
+
+ delegate V128 _V128_U64(ulong a1);
+ delegate V128 _V128_V128(V128 a1);
+ delegate V128 _V128_V128_U32_V128(V128 a1, uint a2, V128 a3);
+ delegate V128 _V128_V128_V128(V128 a1, V128 a2);
+ delegate V128 _V128_V128_V128_V128(V128 a1, V128 a2, V128 a3);
+ delegate V128 _V128_V128_V128_V128_V128(V128 a1, V128 a2, V128 a3, V128 a4);
+ delegate V128 _V128_V128_V128_V128_V128_V128(V128 a1, V128 a2, V128 a3, V128 a4, V128 a5);
+
+ delegate void _Void();
+ delegate void _Void_U64(ulong a1);
+ delegate void _Void_U64_S32(ulong a1, int a2);
+ delegate void _Void_U64_U16(ulong a1, ushort a2);
+ delegate void _Void_U64_U32(ulong a1, uint a2);
+ delegate void _Void_U64_U64(ulong a1, ulong a2);
+ delegate void _Void_U64_U8(ulong a1, byte a2);
+ delegate void _Void_U64_V128(ulong a1, V128 a2);
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitAlu.cs b/ARMeilleure/Instructions/InstEmitAlu.cs
new file mode 100644
index 000000000..947c9f70b
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitAlu.cs
@@ -0,0 +1,369 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Adc(ArmEmitterContext context) => EmitAdc(context, setFlags: false);
+ public static void Adcs(ArmEmitterContext context) => EmitAdc(context, setFlags: true);
+
+ private static void EmitAdc(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.Add(n, m);
+
+ Operand carry = GetFlag(PState.CFlag);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+ {
+ carry = context.ZeroExtend32(OperandType.I64, carry);
+ }
+
+ d = context.Add(d, carry);
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+
+ EmitAdcsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Add(ArmEmitterContext context)
+ {
+ SetAluD(context, context.Add(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Adds(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ context.MarkComparison(n, m);
+
+ Operand d = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitAddsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void And(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseAnd(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Ands(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseAnd(n, m);
+
+ EmitNZFlagsCheck(context, d);
+ EmitCVFlagsClear(context);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Asrv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftRightSI(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Bic(ArmEmitterContext context) => EmitBic(context, setFlags: false);
+ public static void Bics(ArmEmitterContext context) => EmitBic(context, setFlags: true);
+
+ private static void EmitBic(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseAnd(n, context.BitwiseNot(m));
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+ EmitCVFlagsClear(context);
+ }
+
+ SetAluD(context, d, setFlags);
+ }
+
+ public static void Cls(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand nHigh = context.ShiftRightUI(n, Const(1));
+
+ bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+ Operand mask = is32Bits ? Const(int.MaxValue) : Const(long.MaxValue);
+
+ Operand nLow = context.BitwiseAnd(n, mask);
+
+ Operand res = context.CountLeadingZeros(context.BitwiseExclusiveOr(nHigh, nLow));
+
+ res = context.Subtract(res, Const(res.Type, 1));
+
+ SetAluDOrZR(context, res);
+ }
+
+ public static void Clz(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand d = context.CountLeadingZeros(n);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Eon(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseExclusiveOr(n, context.BitwiseNot(m));
+
+ SetAluD(context, d);
+ }
+
+ public static void Eor(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseExclusiveOr(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Extr(ArmEmitterContext context)
+ {
+ OpCodeAluRs op = (OpCodeAluRs)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rm);
+
+ if (op.Shift != 0)
+ {
+ if (op.Rn == op.Rm)
+ {
+ res = context.RotateRight(res, Const(op.Shift));
+ }
+ else
+ {
+ res = context.ShiftRightUI(res, Const(op.Shift));
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ int invShift = op.GetBitsCount() - op.Shift;
+
+ res = context.BitwiseOr(res, context.ShiftLeft(n, Const(invShift)));
+ }
+ }
+
+ SetAluDOrZR(context, res);
+ }
+
+ public static void Lslv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftLeft(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Lsrv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.ShiftRightUI(GetAluN(context), GetAluMShift(context)));
+ }
+
+ public static void Sbc(ArmEmitterContext context) => EmitSbc(context, setFlags: false);
+ public static void Sbcs(ArmEmitterContext context) => EmitSbc(context, setFlags: true);
+
+ private static void EmitSbc(ArmEmitterContext context, bool setFlags)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.Subtract(n, m);
+
+ Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+ {
+ borrow = context.ZeroExtend32(OperandType.I64, borrow);
+ }
+
+ d = context.Subtract(d, borrow);
+
+ if (setFlags)
+ {
+ EmitNZFlagsCheck(context, d);
+
+ EmitSbcsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Sub(ArmEmitterContext context)
+ {
+ SetAluD(context, context.Subtract(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Subs(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ context.MarkComparison(n, m);
+
+ Operand d = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitSubsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Orn(ArmEmitterContext context)
+ {
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ Operand d = context.BitwiseOr(n, context.BitwiseNot(m));
+
+ SetAluD(context, d);
+ }
+
+ public static void Orr(ArmEmitterContext context)
+ {
+ SetAluD(context, context.BitwiseOr(GetAluN(context), GetAluM(context)));
+ }
+
+ public static void Rbit(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = context.Call(new _U32_U32(SoftFallback.ReverseBits32), n);
+ }
+ else
+ {
+ d = context.Call(new _U64_U64(SoftFallback.ReverseBits64), n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Rev16(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand d;
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ d = context.Call(new _U32_U32(SoftFallback.ReverseBytes16_32), n);
+ }
+ else
+ {
+ d = context.Call(new _U64_U64(SoftFallback.ReverseBytes16_64), n);
+ }
+
+ SetAluDOrZR(context, d);
+ }
+
+ public static void Rev32(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ SetAluDOrZR(context, context.ByteSwap(n));
+ }
+ else
+ {
+ Operand d = context.Call(new _U64_U64(SoftFallback.ReverseBytes32_64), n);
+
+ SetAluDOrZR(context, d);
+ }
+ }
+
+ public static void Rev64(ArmEmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ SetAluDOrZR(context, context.ByteSwap(GetIntOrZR(context, op.Rn)));
+ }
+
+ public static void Rorv(ArmEmitterContext context)
+ {
+ SetAluDOrZR(context, context.RotateRight(GetAluN(context), GetAluMShift(context)));
+ }
+
+ private static Operand GetAluMShift(ArmEmitterContext context)
+ {
+ IOpCodeAluRs op = (IOpCodeAluRs)context.CurrOp;
+
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Int64)
+ {
+ m = context.ConvertI64ToI32(m);
+ }
+
+ return context.BitwiseAnd(m, Const(context.CurrOp.GetBitsCount() - 1));
+ }
+
+ private static void EmitCVFlagsClear(ArmEmitterContext context)
+ {
+ SetFlag(context, PState.CFlag, Const(0));
+ SetFlag(context, PState.VFlag, Const(0));
+ }
+
+ public static void SetAluD(ArmEmitterContext context, Operand d)
+ {
+ SetAluD(context, d, x31IsZR: false);
+ }
+
+ public static void SetAluDOrZR(ArmEmitterContext context, Operand d)
+ {
+ SetAluD(context, d, x31IsZR: true);
+ }
+
+ public static void SetAluD(ArmEmitterContext context, Operand d, bool x31IsZR)
+ {
+ IOpCodeAlu op = (IOpCodeAlu)context.CurrOp;
+
+ if ((x31IsZR || op is IOpCodeAluRs) && op.Rd == RegisterConsts.ZeroIndex)
+ {
+ return;
+ }
+
+ SetIntOrSP(context, op.Rd, d);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs
new file mode 100644
index 000000000..79b0abbc3
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitAlu32.cs
@@ -0,0 +1,129 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void Add(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Add(n, m);
+
+ if (op.SetFlags)
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitAddsCCheck(context, n, res);
+ EmitAddsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ public static void Cmp(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, n, res);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ public static void Mov(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand m = GetAluM(context);
+
+ if (op.SetFlags)
+ {
+ EmitNZFlagsCheck(context, m);
+ }
+
+ EmitAluStore(context, m);
+ }
+
+ public static void Sub(ArmEmitterContext context)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context, setCarry: false);
+
+ Operand res = context.Subtract(n, m);
+
+ if (op.SetFlags)
+ {
+ EmitNZFlagsCheck(context, res);
+
+ EmitSubsCCheck(context, n, res);
+ EmitSubsVCheck(context, n, m, res);
+ }
+
+ EmitAluStore(context, res);
+ }
+
+ private static void EmitAluStore(ArmEmitterContext context, Operand value)
+ {
+ IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+ if (op.Rd == RegisterAlias.Aarch32Pc)
+ {
+ if (op.SetFlags)
+ {
+ // TODO: Load SPSR etc.
+ Operand isThumb = GetFlag(PState.TFlag);
+
+ Operand lblThumb = Label();
+
+ context.BranchIfTrue(lblThumb, isThumb);
+
+ context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~3))));
+
+ context.MarkLabel(lblThumb);
+
+ context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1))));
+ }
+ else
+ {
+ EmitAluWritePc(context, value);
+ }
+ }
+ else
+ {
+ SetIntA32(context, op.Rd, value);
+ }
+ }
+
+ private static void EmitAluWritePc(ArmEmitterContext context, Operand value)
+ {
+ context.StoreToContext();
+
+ if (IsThumb(context.CurrOp))
+ {
+ context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1))));
+ }
+ else
+ {
+ EmitBxWritePc(context, value);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs
new file mode 100644
index 000000000..81d5c9eb3
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs
@@ -0,0 +1,351 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitAluHelper
+ {
+ public static void EmitNZFlagsCheck(ArmEmitterContext context, Operand d)
+ {
+ SetFlag(context, PState.NFlag, context.ICompareLess (d, Const(d.Type, 0)));
+ SetFlag(context, PState.ZFlag, context.ICompareEqual(d, Const(d.Type, 0)));
+ }
+
+ public static void EmitAdcsCCheck(ArmEmitterContext context, Operand n, Operand d)
+ {
+ // C = (Rd == Rn && CIn) || Rd < Rn
+ Operand cIn = GetFlag(PState.CFlag);
+
+ Operand cOut = context.BitwiseAnd(context.ICompareEqual(d, n), cIn);
+
+ cOut = context.BitwiseOr(cOut, context.ICompareLessUI(d, n));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ public static void EmitAddsCCheck(ArmEmitterContext context, Operand n, Operand d)
+ {
+ // C = Rd < Rn
+ SetFlag(context, PState.CFlag, context.ICompareLessUI(d, n));
+ }
+
+ public static void EmitAddsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+ {
+ // V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0
+ Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+ vOut = context.BitwiseAnd(vOut, context.BitwiseNot(context.BitwiseExclusiveOr(n, m)));
+
+ vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+ SetFlag(context, PState.VFlag, vOut);
+ }
+
+ public static void EmitSbcsCCheck(ArmEmitterContext context, Operand n, Operand m)
+ {
+ // C = (Rn == Rm && CIn) || Rn > Rm
+ Operand cIn = GetFlag(PState.CFlag);
+
+ Operand cOut = context.BitwiseAnd(context.ICompareEqual(n, m), cIn);
+
+ cOut = context.BitwiseOr(cOut, context.ICompareGreaterUI(n, m));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ public static void EmitSubsCCheck(ArmEmitterContext context, Operand n, Operand m)
+ {
+ // C = Rn >= Rm
+ SetFlag(context, PState.CFlag, context.ICompareGreaterOrEqualUI(n, m));
+ }
+
+ public static void EmitSubsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+ {
+ // V = (Rd ^ Rn) & (Rn ^ Rm) < 0
+ Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+ vOut = context.BitwiseAnd(vOut, context.BitwiseExclusiveOr(n, m));
+
+ vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+ SetFlag(context, PState.VFlag, vOut);
+ }
+
+
+ public static Operand GetAluN(ArmEmitterContext context)
+ {
+ if (context.CurrOp is IOpCodeAlu op)
+ {
+ if (op.DataOp == DataOp.Logical || op is IOpCodeAluRs)
+ {
+ return GetIntOrZR(context, op.Rn);
+ }
+ else
+ {
+ return GetIntOrSP(context, op.Rn);
+ }
+ }
+ else if (context.CurrOp is IOpCode32Alu op32)
+ {
+ return GetIntA32(context, op32.Rn);
+ }
+ else
+ {
+ throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ public static Operand GetAluM(ArmEmitterContext context, bool setCarry = true)
+ {
+ switch (context.CurrOp)
+ {
+ // ARM32.
+ case OpCode32AluImm op:
+ {
+ if (op.SetFlags && op.IsRotated)
+ {
+ SetFlag(context, PState.CFlag, Const((uint)op.Immediate >> 31));
+ }
+
+ return Const(op.Immediate);
+ }
+
+ case OpCode32AluRsImm op: return GetMShiftedByImmediate(context, op, setCarry);
+
+ case OpCodeT16AluImm8 op: return Const(op.Immediate);
+
+ // ARM64.
+ case IOpCodeAluImm op:
+ {
+ if (op.GetOperandType() == OperandType.I32)
+ {
+ return Const((int)op.Immediate);
+ }
+ else
+ {
+ return Const(op.Immediate);
+ }
+ }
+
+ case IOpCodeAluRs op:
+ {
+ Operand value = GetIntOrZR(context, op.Rm);
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: value = context.ShiftLeft (value, Const(op.Shift)); break;
+ case ShiftType.Lsr: value = context.ShiftRightUI(value, Const(op.Shift)); break;
+ case ShiftType.Asr: value = context.ShiftRightSI(value, Const(op.Shift)); break;
+ case ShiftType.Ror: value = context.RotateRight (value, Const(op.Shift)); break;
+ }
+
+ return value;
+ }
+
+ case IOpCodeAluRx op:
+ {
+ Operand value = GetExtendedM(context, op.Rm, op.IntType);
+
+ value = context.ShiftLeft(value, Const(op.Shift));
+
+ return value;
+ }
+
+ default: throw InvalidOpCodeType(context.CurrOp);
+ }
+ }
+
+ private static Exception InvalidOpCodeType(OpCode opCode)
+ {
+ return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\".");
+ }
+
+ // ARM32 helpers.
+ private static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32AluRsImm op, bool setCarry)
+ {
+ Operand m = GetIntA32(context, op.Rm);
+
+ int shift = op.Imm;
+
+ if (shift == 0)
+ {
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsr: shift = 32; break;
+ case ShiftType.Asr: shift = 32; break;
+ case ShiftType.Ror: shift = 1; break;
+ }
+ }
+
+ if (shift != 0)
+ {
+ setCarry &= op.SetFlags;
+
+ switch (op.ShiftType)
+ {
+ case ShiftType.Lsl: m = GetLslC(context, m, setCarry, shift); break;
+ case ShiftType.Lsr: m = GetLsrC(context, m, setCarry, shift); break;
+ case ShiftType.Asr: m = GetAsrC(context, m, setCarry, shift); break;
+ case ShiftType.Ror:
+ if (op.Imm != 0)
+ {
+ m = GetRorC(context, m, setCarry, shift);
+ }
+ else
+ {
+ m = GetRrxC(context, m, setCarry);
+ }
+ break;
+ }
+ }
+
+ return m;
+ }
+
+ private static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ if ((uint)shift > 32)
+ {
+ return GetShiftByMoreThan32(context, setCarry);
+ }
+ else if (shift == 32)
+ {
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ return Const(0);
+ }
+ else
+ {
+ if (setCarry)
+ {
+ Operand cOut = context.ShiftRightUI(m, Const(32 - shift));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+
+ return context.ShiftLeft(m, Const(shift));
+ }
+ }
+
+ private static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ if ((uint)shift > 32)
+ {
+ return GetShiftByMoreThan32(context, setCarry);
+ }
+ else if (shift == 32)
+ {
+ if (setCarry)
+ {
+ SetCarryMMsb(context, m);
+ }
+
+ return Const(0);
+ }
+ else
+ {
+ if (setCarry)
+ {
+ SetCarryMShrOut(context, m, shift);
+ }
+
+ return context.ShiftRightUI(m, Const(shift));
+ }
+ }
+
+ private static Operand GetShiftByMoreThan32(ArmEmitterContext context, bool setCarry)
+ {
+ if (setCarry)
+ {
+ SetFlag(context, PState.CFlag, Const(0));;
+ }
+
+ return Const(0);
+ }
+
+ private static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ if ((uint)shift >= 32)
+ {
+ m = context.ShiftRightSI(m, Const(31));
+
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ return m;
+ }
+ else
+ {
+ if (setCarry)
+ {
+ SetCarryMShrOut(context, m, shift);
+ }
+
+ return context.ShiftRightSI(m, Const(shift));
+ }
+ }
+
+ private static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+ {
+ shift &= 0x1f;
+
+ m = context.RotateRight(m, Const(shift));
+
+ if (setCarry)
+ {
+ SetCarryMMsb(context, m);
+ }
+
+ return m;
+ }
+
+ private static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry)
+ {
+ // Rotate right by 1 with carry.
+ Operand cIn = context.Copy(GetFlag(PState.CFlag));
+
+ if (setCarry)
+ {
+ SetCarryMLsb(context, m);
+ }
+
+ m = context.ShiftRightUI(m, Const(1));
+
+ m = context.BitwiseOr(m, context.ShiftLeft(cIn, Const(31)));
+
+ return m;
+ }
+
+ private static void SetCarryMLsb(ArmEmitterContext context, Operand m)
+ {
+ SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1)));
+ }
+
+ private static void SetCarryMMsb(ArmEmitterContext context, Operand m)
+ {
+ SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31)));
+ }
+
+ private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift)
+ {
+ Operand cOut = context.ShiftRightUI(m, Const(shift - 1));
+
+ cOut = context.BitwiseAnd(cOut, Const(1));
+
+ SetFlag(context, PState.CFlag, cOut);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitBfm.cs b/ARMeilleure/Instructions/InstEmitBfm.cs
new file mode 100644
index 000000000..8fdbf6cfd
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitBfm.cs
@@ -0,0 +1,196 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Bfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand d = GetIntOrZR(context, op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ Operand res;
+
+ if (op.Pos < op.Shift)
+ {
+ // BFI.
+ int shift = op.GetBitsCount() - op.Shift;
+
+ int width = op.Pos + 1;
+
+ long mask = (long)(ulong.MaxValue >> (64 - width));
+
+ res = context.ShiftLeft(context.BitwiseAnd(n, Const(n.Type, mask)), Const(shift));
+
+ res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~(mask << shift))));
+ }
+ else
+ {
+ // BFXIL.
+ int shift = op.Shift;
+
+ int width = op.Pos - shift + 1;
+
+ long mask = (long)(ulong.MaxValue >> (64 - width));
+
+ res = context.BitwiseAnd(context.ShiftRightUI(n, Const(shift)), Const(n.Type, mask));
+
+ res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~mask)));
+ }
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Sbfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ int bitsCount = op.GetBitsCount();
+
+ if (op.Pos + 1 == bitsCount)
+ {
+ EmitSbfmShift(context);
+ }
+ else if (op.Pos < op.Shift)
+ {
+ EmitSbfiz(context);
+ }
+ else if (op.Pos == 7 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend8(n.Type, n));
+ }
+ else if (op.Pos == 15 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend16(n.Type, n));
+ }
+ else if (op.Pos == 31 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.SignExtend32(n.Type, n));
+ }
+ else
+ {
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = context.ShiftLeft (res, Const(bitsCount - 1 - op.Pos));
+ res = context.ShiftRightSI(res, Const(bitsCount - 1));
+ res = context.BitwiseAnd (res, Const(res.Type, ~op.TMask));
+
+ Operand n2 = GetBfmN(context);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseOr(res, n2));
+ }
+ }
+
+ public static void Ubfm(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ if (op.Pos + 1 == op.GetBitsCount())
+ {
+ EmitUbfmShift(context);
+ }
+ else if (op.Pos < op.Shift)
+ {
+ EmitUbfiz(context);
+ }
+ else if (op.Pos + 1 == op.Shift)
+ {
+ EmitBfmLsl(context);
+ }
+ else if (op.Pos == 7 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xff)));
+ }
+ else if (op.Pos == 15 && op.Shift == 0)
+ {
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xffff)));
+ }
+ else
+ {
+ SetIntOrZR(context, op.Rd, GetBfmN(context));
+ }
+ }
+
+ private static void EmitSbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: true);
+ private static void EmitUbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: false);
+
+ private static void EmitBfiz(ArmEmitterContext context, bool signed)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ int width = op.Pos + 1;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = context.ShiftLeft(res, Const(op.GetBitsCount() - width));
+
+ res = signed
+ ? context.ShiftRightSI(res, Const(op.Shift - width))
+ : context.ShiftRightUI(res, Const(op.Shift - width));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitSbfmShift(ArmEmitterContext context)
+ {
+ EmitBfmShift(context, signed: true);
+ }
+
+ private static void EmitUbfmShift(ArmEmitterContext context)
+ {
+ EmitBfmShift(context, signed: false);
+ }
+
+ private static void EmitBfmShift(ArmEmitterContext context, bool signed)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = signed
+ ? context.ShiftRightSI(res, Const(op.Shift))
+ : context.ShiftRightUI(res, Const(op.Shift));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitBfmLsl(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ int shift = op.GetBitsCount() - op.Shift;
+
+ SetIntOrZR(context, op.Rd, context.ShiftLeft(res, Const(shift)));
+ }
+
+ private static Operand GetBfmN(ArmEmitterContext context)
+ {
+ OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ long mask = op.WMask & op.TMask;
+
+ return context.BitwiseAnd(context.RotateRight(res, Const(op.Shift)), Const(res.Type, mask));
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitCcmp.cs b/ARMeilleure/Instructions/InstEmitCcmp.cs
new file mode 100644
index 000000000..b1b0a2a1c
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitCcmp.cs
@@ -0,0 +1,61 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Ccmn(ArmEmitterContext context) => EmitCcmp(context, isNegated: true);
+ public static void Ccmp(ArmEmitterContext context) => EmitCcmp(context, isNegated: false);
+
+ private static void EmitCcmp(ArmEmitterContext context, bool isNegated)
+ {
+ OpCodeCcmp op = (OpCodeCcmp)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ EmitCondBranch(context, lblTrue, op.Cond);
+
+ SetFlag(context, PState.VFlag, Const((op.Nzcv >> 0) & 1));
+ SetFlag(context, PState.CFlag, Const((op.Nzcv >> 1) & 1));
+ SetFlag(context, PState.ZFlag, Const((op.Nzcv >> 2) & 1));
+ SetFlag(context, PState.NFlag, Const((op.Nzcv >> 3) & 1));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ Operand n = GetAluN(context);
+ Operand m = GetAluM(context);
+
+ if (isNegated)
+ {
+ Operand d = context.Add(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitAddsCCheck(context, n, d);
+ EmitAddsVCheck(context, n, m, d);
+ }
+ else
+ {
+ Operand d = context.Subtract(n, m);
+
+ EmitNZFlagsCheck(context, d);
+
+ EmitSubsCCheck(context, n, m);
+ EmitSubsVCheck(context, n, m, d);
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitCsel.cs b/ARMeilleure/Instructions/InstEmitCsel.cs
new file mode 100644
index 000000000..60baf0bc2
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitCsel.cs
@@ -0,0 +1,53 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private enum CselOperation
+ {
+ None,
+ Increment,
+ Invert,
+ Negate
+ }
+
+ public static void Csel(ArmEmitterContext context) => EmitCsel(context, CselOperation.None);
+ public static void Csinc(ArmEmitterContext context) => EmitCsel(context, CselOperation.Increment);
+ public static void Csinv(ArmEmitterContext context) => EmitCsel(context, CselOperation.Invert);
+ public static void Csneg(ArmEmitterContext context) => EmitCsel(context, CselOperation.Negate);
+
+ private static void EmitCsel(ArmEmitterContext context, CselOperation cselOp)
+ {
+ OpCodeCsel op = (OpCodeCsel)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ if (cselOp == CselOperation.Increment)
+ {
+ m = context.Add(m, Const(m.Type, 1));
+ }
+ else if (cselOp == CselOperation.Invert)
+ {
+ m = context.BitwiseNot(m);
+ }
+ else if (cselOp == CselOperation.Negate)
+ {
+ m = context.Negate(m);
+ }
+
+ Operand condTrue = GetCondTrue(context, op.Cond);
+
+ Operand d = context.ConditionalSelect(condTrue, n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitDiv.cs b/ARMeilleure/Instructions/InstEmitDiv.cs
new file mode 100644
index 000000000..0c21dd1ba
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitDiv.cs
@@ -0,0 +1,67 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Sdiv(ArmEmitterContext context) => EmitDiv(context, unsigned: false);
+ public static void Udiv(ArmEmitterContext context) => EmitDiv(context, unsigned: true);
+
+ private static void EmitDiv(ArmEmitterContext context, bool unsigned)
+ {
+ OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+ // If Rm == 0, Rd = 0 (division by zero).
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand divisorIsZero = context.ICompareEqual(m, Const(m.Type, 0));
+
+ Operand lblBadDiv = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBadDiv, divisorIsZero);
+
+ if (!unsigned)
+ {
+ // If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow).
+ bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+ Operand intMin = is32Bits ? Const(int.MinValue) : Const(long.MinValue);
+ Operand minus1 = is32Bits ? Const(-1) : Const(-1L);
+
+ Operand nIsIntMin = context.ICompareEqual(n, intMin);
+ Operand mIsMinus1 = context.ICompareEqual(m, minus1);
+
+ Operand lblGoodDiv = Label();
+
+ context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1));
+
+ SetAluDOrZR(context, intMin);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblGoodDiv);
+ }
+
+ Operand d = unsigned
+ ? context.DivideUI(n, m)
+ : context.Divide (n, m);
+
+ SetAluDOrZR(context, d);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBadDiv);
+
+ SetAluDOrZR(context, Const(op.GetOperandType(), 0));
+
+ context.MarkLabel(lblEnd);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitException.cs b/ARMeilleure/Instructions/InstEmitException.cs
new file mode 100644
index 000000000..6f7b6fd51
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitException.cs
@@ -0,0 +1,55 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Brk(ArmEmitterContext context)
+ {
+ EmitExceptionCall(context, NativeInterface.Break);
+ }
+
+ public static void Svc(ArmEmitterContext context)
+ {
+ EmitExceptionCall(context, NativeInterface.SupervisorCall);
+ }
+
+ private static void EmitExceptionCall(ArmEmitterContext context, _Void_U64_S32 func)
+ {
+ OpCodeException op = (OpCodeException)context.CurrOp;
+
+ context.StoreToContext();
+
+ context.Call(func, Const(op.Address), Const(op.Id));
+
+ context.LoadFromContext();
+
+ if (context.CurrBlock.Next == null)
+ {
+ context.Return(Const(op.Address + 4));
+ }
+ }
+
+ public static void Und(ArmEmitterContext context)
+ {
+ OpCode op = context.CurrOp;
+
+ Delegate dlg = new _Void_U64_S32(NativeInterface.Undefined);
+
+ context.StoreToContext();
+
+ context.Call(dlg, Const(op.Address), Const(op.RawOpCode));
+
+ context.LoadFromContext();
+
+ if (context.CurrBlock.Next == null)
+ {
+ context.Return(Const(op.Address + 4));
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitFlow.cs b/ARMeilleure/Instructions/InstEmitFlow.cs
new file mode 100644
index 000000000..93d36e1b9
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitFlow.cs
@@ -0,0 +1,159 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void B(ArmEmitterContext context)
+ {
+ OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+ if (context.CurrBlock.Branch != null)
+ {
+ context.Branch(context.GetLabel((ulong)op.Immediate));
+ }
+ else
+ {
+ context.Return(Const(op.Immediate));
+ }
+ }
+
+ public static void B_Cond(ArmEmitterContext context)
+ {
+ OpCodeBImmCond op = (OpCodeBImmCond)context.CurrOp;
+
+ EmitBranch(context, op.Cond);
+ }
+
+ public static void Bl(ArmEmitterContext context)
+ {
+ OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+ context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+ EmitCall(context, (ulong)op.Immediate);
+ }
+
+ public static void Blr(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ Operand n = context.Copy(GetIntOrZR(context, op.Rn));
+
+ context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+ EmitVirtualCall(context, n);
+ }
+
+ public static void Br(ArmEmitterContext context)
+ {
+ OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+ EmitVirtualJump(context, GetIntOrZR(context, op.Rn));
+ }
+
+ public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true);
+ public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false);
+
+ private static void EmitCb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeBImmCmp op = (OpCodeBImmCmp)context.CurrOp;
+
+ EmitBranch(context, GetIntOrZR(context, op.Rt), onNotZero);
+ }
+
+ public static void Ret(ArmEmitterContext context)
+ {
+ context.Return(context.BitwiseOr(GetIntOrZR(context, RegisterAlias.Lr), Const(CallFlag)));
+ }
+
+ public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true);
+ public static void Tbz(ArmEmitterContext context) => EmitTb(context, onNotZero: false);
+
+ private static void EmitTb(ArmEmitterContext context, bool onNotZero)
+ {
+ OpCodeBImmTest op = (OpCodeBImmTest)context.CurrOp;
+
+ Operand value = context.BitwiseAnd(GetIntOrZR(context, op.Rt), Const(1L << op.Bit));
+
+ EmitBranch(context, value, onNotZero);
+ }
+
+ private static void EmitBranch(ArmEmitterContext context, Condition cond)
+ {
+ OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+ if (context.CurrBlock.Branch != null)
+ {
+ EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond);
+
+ if (context.CurrBlock.Next == null)
+ {
+ context.Return(Const(op.Address + 4));
+ }
+ }
+ else
+ {
+ Operand lblTaken = Label();
+
+ EmitCondBranch(context, lblTaken, cond);
+
+ context.Return(Const(op.Address + 4));
+
+ context.MarkLabel(lblTaken);
+
+ context.Return(Const(op.Immediate));
+ }
+ }
+
+ private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero)
+ {
+ OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+ if (context.CurrBlock.Branch != null)
+ {
+ Operand lblTarget = context.GetLabel((ulong)op.Immediate);
+
+ if (onNotZero)
+ {
+ context.BranchIfTrue(lblTarget, value);
+ }
+ else
+ {
+ context.BranchIfFalse(lblTarget, value);
+ }
+
+ if (context.CurrBlock.Next == null)
+ {
+ context.Return(Const(op.Address + 4));
+ }
+ }
+ else
+ {
+ Operand lblTaken = Label();
+
+ if (onNotZero)
+ {
+ context.BranchIfTrue(lblTaken, value);
+ }
+ else
+ {
+ context.BranchIfFalse(lblTaken, value);
+ }
+
+ context.Return(Const(op.Address + 4));
+
+ context.MarkLabel(lblTaken);
+
+ context.Return(Const(op.Immediate));
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitFlow32.cs b/ARMeilleure/Instructions/InstEmitFlow32.cs
new file mode 100644
index 000000000..27addc78e
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitFlow32.cs
@@ -0,0 +1,71 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ public static void B(ArmEmitterContext context)
+ {
+ IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+ if (context.CurrBlock.Branch != null)
+ {
+ context.Branch(context.GetLabel((ulong)op.Immediate));
+ }
+ else
+ {
+ context.StoreToContext();
+
+ context.Return(Const(op.Immediate));
+ }
+ }
+
+ public static void Bl(ArmEmitterContext context)
+ {
+ Blx(context, x: false);
+ }
+
+ public static void Blx(ArmEmitterContext context)
+ {
+ Blx(context, x: true);
+ }
+
+ public static void Bx(ArmEmitterContext context)
+ {
+ IOpCode32BReg op = (IOpCode32BReg)context.CurrOp;
+
+ context.StoreToContext();
+
+ EmitBxWritePc(context, GetIntA32(context, op.Rm));
+ }
+
+ private static void Blx(ArmEmitterContext context, bool x)
+ {
+ IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+ uint pc = op.GetPc();
+
+ bool isThumb = IsThumb(context.CurrOp);
+
+ uint currentPc = isThumb
+ ? op.GetPc() | 1
+ : op.GetPc() - 4;
+
+ SetIntOrSP(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc));
+
+ // If x is true, then this is a branch with link and exchange.
+ // In this case we need to swap the mode between Arm <-> Thumb.
+ if (x)
+ {
+ SetFlag(context, PState.TFlag, Const(isThumb ? 0 : 1));
+ }
+
+ InstEmitFlowHelper.EmitCall(context, (ulong)op.Immediate);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/ARMeilleure/Instructions/InstEmitFlowHelper.cs
new file mode 100644
index 000000000..a8eb21d33
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitFlowHelper.cs
@@ -0,0 +1,192 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitFlowHelper
+ {
+ public const ulong CallFlag = 1;
+
+ public static void EmitCondBranch(ArmEmitterContext context, Operand target, Condition cond)
+ {
+ if (cond != Condition.Al)
+ {
+ context.BranchIfTrue(target, GetCondTrue(context, cond));
+ }
+ else
+ {
+ context.Branch(target);
+ }
+ }
+
+ public static Operand GetCondTrue(ArmEmitterContext context, Condition condition)
+ {
+ Operand cmpResult = context.TryGetComparisonResult(condition);
+
+ if (cmpResult != null)
+ {
+ return cmpResult;
+ }
+
+ Operand value = Const(1);
+
+ Operand Inverse(Operand val)
+ {
+ return context.BitwiseExclusiveOr(val, Const(1));
+ }
+
+ switch (condition)
+ {
+ case Condition.Eq:
+ value = GetFlag(PState.ZFlag);
+ break;
+
+ case Condition.Ne:
+ value = Inverse(GetFlag(PState.ZFlag));
+ break;
+
+ case Condition.GeUn:
+ value = GetFlag(PState.CFlag);
+ break;
+
+ case Condition.LtUn:
+ value = Inverse(GetFlag(PState.CFlag));
+ break;
+
+ case Condition.Mi:
+ value = GetFlag(PState.NFlag);
+ break;
+
+ case Condition.Pl:
+ value = Inverse(GetFlag(PState.NFlag));
+ break;
+
+ case Condition.Vs:
+ value = GetFlag(PState.VFlag);
+ break;
+
+ case Condition.Vc:
+ value = Inverse(GetFlag(PState.VFlag));
+ break;
+
+ case Condition.GtUn:
+ {
+ Operand c = GetFlag(PState.CFlag);
+ Operand z = GetFlag(PState.ZFlag);
+
+ value = context.BitwiseAnd(c, Inverse(z));
+
+ break;
+ }
+
+ case Condition.LeUn:
+ {
+ Operand c = GetFlag(PState.CFlag);
+ Operand z = GetFlag(PState.ZFlag);
+
+ value = context.BitwiseOr(Inverse(c), z);
+
+ break;
+ }
+
+ case Condition.Ge:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.ICompareEqual(n, v);
+
+ break;
+ }
+
+ case Condition.Lt:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.ICompareNotEqual(n, v);
+
+ break;
+ }
+
+ case Condition.Gt:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand z = GetFlag(PState.ZFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.BitwiseAnd(Inverse(z), context.ICompareEqual(n, v));
+
+ break;
+ }
+
+ case Condition.Le:
+ {
+ Operand n = GetFlag(PState.NFlag);
+ Operand z = GetFlag(PState.ZFlag);
+ Operand v = GetFlag(PState.VFlag);
+
+ value = context.BitwiseOr(z, context.ICompareNotEqual(n, v));
+
+ break;
+ }
+ }
+
+ return value;
+ }
+
+ public static void EmitCall(ArmEmitterContext context, ulong immediate)
+ {
+ context.Return(Const(immediate | CallFlag));
+ }
+
+ public static void EmitVirtualCall(ArmEmitterContext context, Operand target)
+ {
+ EmitVirtualCallOrJump(context, target, isJump: false);
+ }
+
+ public static void EmitVirtualJump(ArmEmitterContext context, Operand target)
+ {
+ EmitVirtualCallOrJump(context, target, isJump: true);
+ }
+
+ private static void EmitVirtualCallOrJump(ArmEmitterContext context, Operand target, bool isJump)
+ {
+ context.Return(context.BitwiseOr(target, Const(target.Type, (long)CallFlag)));
+ }
+
+ private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand retVal)
+ {
+ // Note: The return value of the called method will be placed
+ // at the Stack, the return value is always a Int64 with the
+ // return address of the function. We check if the address is
+ // correct, if it isn't we keep returning until we reach the dispatcher.
+ ulong nextAddr = GetNextOpAddress(context.CurrOp);
+
+ if (context.CurrBlock.Next != null)
+ {
+ Operand lblContinue = Label();
+
+ context.BranchIfTrue(lblContinue, context.ICompareEqual(retVal, Const(nextAddr)));
+
+ context.Return(Const(nextAddr));
+
+ context.MarkLabel(lblContinue);
+ }
+ else
+ {
+ context.Return(Const(nextAddr));
+ }
+ }
+
+ private static ulong GetNextOpAddress(OpCode op)
+ {
+ return op.Address + (ulong)op.OpCodeSizeInBytes;
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitHash.cs b/ARMeilleure/Instructions/InstEmitHash.cs
new file mode 100644
index 000000000..0be8458e2
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitHash.cs
@@ -0,0 +1,64 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Crc32b(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32b));
+ }
+
+ public static void Crc32h(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32h));
+ }
+
+ public static void Crc32w(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32w));
+ }
+
+ public static void Crc32x(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32x));
+ }
+
+ public static void Crc32cb(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32cb));
+ }
+
+ public static void Crc32ch(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32ch));
+ }
+
+ public static void Crc32cw(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32cw));
+ }
+
+ public static void Crc32cx(ArmEmitterContext context)
+ {
+ EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32cx));
+ }
+
+ private static void EmitCrc32Call(ArmEmitterContext context, Delegate dlg)
+ {
+ OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Call(dlg, n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs
new file mode 100644
index 000000000..02e104a4f
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitHelper.cs
@@ -0,0 +1,218 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitHelper
+ {
+ public static bool IsThumb(OpCode op)
+ {
+ return op is OpCodeT16;
+ }
+
+ public static Operand GetExtendedM(ArmEmitterContext context, int rm, IntType type)
+ {
+ Operand value = GetIntOrZR(context, rm);
+
+ switch (type)
+ {
+ case IntType.UInt8: value = context.ZeroExtend8 (value.Type, value); break;
+ case IntType.UInt16: value = context.ZeroExtend16(value.Type, value); break;
+ case IntType.UInt32: value = context.ZeroExtend32(value.Type, value); break;
+
+ case IntType.Int8: value = context.SignExtend8 (value.Type, value); break;
+ case IntType.Int16: value = context.SignExtend16(value.Type, value); break;
+ case IntType.Int32: value = context.SignExtend32(value.Type, value); break;
+ }
+
+ return value;
+ }
+
+ public static Operand GetIntA32(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ OpCode32 op = (OpCode32)context.CurrOp;
+
+ return Const((int)op.GetPc());
+ }
+ else
+ {
+ return GetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex));
+ }
+ }
+
+ public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ if (regIndex == RegisterAlias.Aarch32Pc)
+ {
+ context.StoreToContext();
+
+ EmitBxWritePc(context, value);
+ }
+ else
+ {
+ SetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex), value);
+ }
+ }
+
+ public static int GetRegisterAlias(Aarch32Mode mode, int regIndex)
+ {
+ // Only registers >= 8 are banked,
+ // with registers in the range [8, 12] being
+ // banked for the FIQ mode, and registers
+ // 13 and 14 being banked for all modes.
+ if ((uint)regIndex < 8)
+ {
+ return regIndex;
+ }
+
+ return GetBankedRegisterAlias(mode, regIndex);
+ }
+
+ public static int GetBankedRegisterAlias(Aarch32Mode mode, int regIndex)
+ {
+ switch (regIndex)
+ {
+ case 8: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R8Fiq
+ : RegisterAlias.R8Usr;
+
+ case 9: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R9Fiq
+ : RegisterAlias.R9Usr;
+
+ case 10: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R10Fiq
+ : RegisterAlias.R10Usr;
+
+ case 11: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R11Fiq
+ : RegisterAlias.R11Usr;
+
+ case 12: return mode == Aarch32Mode.Fiq
+ ? RegisterAlias.R12Fiq
+ : RegisterAlias.R12Usr;
+
+ case 13:
+ switch (mode)
+ {
+ case Aarch32Mode.User:
+ case Aarch32Mode.System: return RegisterAlias.SpUsr;
+ case Aarch32Mode.Fiq: return RegisterAlias.SpFiq;
+ case Aarch32Mode.Irq: return RegisterAlias.SpIrq;
+ case Aarch32Mode.Supervisor: return RegisterAlias.SpSvc;
+ case Aarch32Mode.Abort: return RegisterAlias.SpAbt;
+ case Aarch32Mode.Hypervisor: return RegisterAlias.SpHyp;
+ case Aarch32Mode.Undefined: return RegisterAlias.SpUnd;
+
+ default: throw new ArgumentException(nameof(mode));
+ }
+
+ case 14:
+ switch (mode)
+ {
+ case Aarch32Mode.User:
+ case Aarch32Mode.Hypervisor:
+ case Aarch32Mode.System: return RegisterAlias.LrUsr;
+ case Aarch32Mode.Fiq: return RegisterAlias.LrFiq;
+ case Aarch32Mode.Irq: return RegisterAlias.LrIrq;
+ case Aarch32Mode.Supervisor: return RegisterAlias.LrSvc;
+ case Aarch32Mode.Abort: return RegisterAlias.LrAbt;
+ case Aarch32Mode.Undefined: return RegisterAlias.LrUnd;
+
+ default: throw new ArgumentException(nameof(mode));
+ }
+
+ default: throw new ArgumentOutOfRangeException(nameof(regIndex));
+ }
+ }
+
+ public static void EmitBxWritePc(ArmEmitterContext context, Operand pc)
+ {
+ Operand mode = context.BitwiseAnd(pc, Const(1));
+
+ SetFlag(context, PState.TFlag, mode);
+
+ Operand lblArmMode = Label();
+
+ context.BranchIfTrue(lblArmMode, mode);
+
+ context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~1))));
+
+ context.MarkLabel(lblArmMode);
+
+ context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~3))));
+ }
+
+ public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex)
+ {
+ if (regIndex == RegisterConsts.ZeroIndex)
+ {
+ OperandType type = context.CurrOp.GetOperandType();
+
+ return type == OperandType.I32 ? Const(0) : Const(0L);
+ }
+ else
+ {
+ return GetIntOrSP(context, regIndex);
+ }
+ }
+
+ public static void SetIntOrZR(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ if (regIndex == RegisterConsts.ZeroIndex)
+ {
+ return;
+ }
+
+ SetIntOrSP(context, regIndex, value);
+ }
+
+ public static Operand GetIntOrSP(ArmEmitterContext context, int regIndex)
+ {
+ Operand value = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ return value;
+ }
+
+ public static void SetIntOrSP(ArmEmitterContext context, int regIndex, Operand value)
+ {
+ Operand reg = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+ if (value.Type == OperandType.I32)
+ {
+ value = context.ZeroExtend32(OperandType.I64, value);
+ }
+
+ context.Copy(reg, value);
+ }
+
+ public static Operand GetVec(int regIndex)
+ {
+ return Register(regIndex, RegisterType.Vector, OperandType.V128);
+ }
+
+ public static Operand GetFlag(PState stateFlag)
+ {
+ return Register((int)stateFlag, RegisterType.Flag, OperandType.I32);
+ }
+
+ public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value)
+ {
+ context.Copy(GetFlag(stateFlag), value);
+
+ context.MarkFlagSet(stateFlag);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitMemory.cs b/ARMeilleure/Instructions/InstEmitMemory.cs
new file mode 100644
index 000000000..1d5953fb2
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemory.cs
@@ -0,0 +1,177 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Adr(ArmEmitterContext context)
+ {
+ OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.Address + (ulong)op.Immediate));
+ }
+
+ public static void Adrp(ArmEmitterContext context)
+ {
+ OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+ ulong address = (op.Address & ~0xfffUL) + ((ulong)op.Immediate << 12);
+
+ SetIntOrZR(context, op.Rd, Const(address));
+ }
+
+ public static void Ldr(ArmEmitterContext context) => EmitLdr(context, signed: false);
+ public static void Ldrs(ArmEmitterContext context) => EmitLdr(context, signed: true);
+
+ private static void EmitLdr(ArmEmitterContext context, bool signed)
+ {
+ OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ if (signed && op.Extend64)
+ {
+ EmitLoadSx64(context, address, op.Rt, op.Size);
+ }
+ else if (signed)
+ {
+ EmitLoadSx32(context, address, op.Rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, address, op.Rt, op.Size);
+ }
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Ldr_Literal(ArmEmitterContext context)
+ {
+ IOpCodeLit op = (IOpCodeLit)context.CurrOp;
+
+ if (op.Prefetch)
+ {
+ return;
+ }
+
+ if (op.Signed)
+ {
+ EmitLoadSx64(context, Const(op.Immediate), op.Rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, Const(op.Immediate), op.Rt, op.Size);
+ }
+ }
+
+ public static void Ldp(ArmEmitterContext context)
+ {
+ OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+ void EmitLoad(int rt, Operand ldAddr)
+ {
+ if (op.Extend64)
+ {
+ EmitLoadSx64(context, ldAddr, rt, op.Size);
+ }
+ else
+ {
+ EmitLoadZx(context, ldAddr, rt, op.Size);
+ }
+ }
+
+ Operand address = GetAddress(context);
+
+ Operand address2 = context.Add(address, Const(1L << op.Size));
+
+ EmitLoad(op.Rt, address);
+ EmitLoad(op.Rt2, address2);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Str(ArmEmitterContext context)
+ {
+ OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ InstEmitMemoryHelper.EmitStore(context, address, op.Rt, op.Size);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ public static void Stp(ArmEmitterContext context)
+ {
+ OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+ Operand address = GetAddress(context);
+
+ Operand address2 = context.Add(address, Const(1L << op.Size));
+
+ InstEmitMemoryHelper.EmitStore(context, address, op.Rt, op.Size);
+ InstEmitMemoryHelper.EmitStore(context, address2, op.Rt2, op.Size);
+
+ EmitWBackIfNeeded(context, address);
+ }
+
+ private static Operand GetAddress(ArmEmitterContext context)
+ {
+ Operand address = null;
+
+ switch (context.CurrOp)
+ {
+ case OpCodeMemImm op:
+ {
+ address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ // Pre-indexing.
+ if (!op.PostIdx)
+ {
+ address = context.Add(address, Const(op.Immediate));
+ }
+
+ break;
+ }
+
+ case OpCodeMemReg op:
+ {
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ Operand m = GetExtendedM(context, op.Rm, op.IntType);
+
+ if (op.Shift)
+ {
+ m = context.ShiftLeft(m, Const(op.Size));
+ }
+
+ address = context.Add(n, m);
+
+ break;
+ }
+ }
+
+ return address;
+ }
+
+ private static void EmitWBackIfNeeded(ArmEmitterContext context, Operand address)
+ {
+ // Check whenever the current OpCode has post-indexed write back, if so write it.
+ if (context.CurrOp is OpCodeMemImm op && op.WBack)
+ {
+ if (op.PostIdx)
+ {
+ address = context.Add(address, Const(op.Immediate));
+ }
+
+ SetIntOrSP(context, op.Rn, address);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMemory32.cs b/ARMeilleure/Instructions/InstEmitMemory32.cs
new file mode 100644
index 000000000..002d2c5c6
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemory32.cs
@@ -0,0 +1,256 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit32
+ {
+ private const int ByteSizeLog2 = 0;
+ private const int HWordSizeLog2 = 1;
+ private const int WordSizeLog2 = 2;
+ private const int DWordSizeLog2 = 3;
+
+ [Flags]
+ enum AccessType
+ {
+ Store = 0,
+ Signed = 1,
+ Load = 2,
+
+ LoadZx = Load,
+ LoadSx = Load | Signed,
+ }
+
+ public static void Ldm(ArmEmitterContext context)
+ {
+ OpCode32MemMult op = (OpCode32MemMult)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ bool writesToPc = (op.RegisterMask & (1 << RegisterAlias.Aarch32Pc)) != 0;
+
+ bool writeBack = op.PostOffset != 0 && (op.Rn != RegisterAlias.Aarch32Pc || !writesToPc);
+
+ if (writeBack)
+ {
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ int mask = op.RegisterMask;
+ int offset = 0;
+
+ for (int register = 0; mask != 0; mask >>= 1, register++)
+ {
+ if ((mask & 1) != 0)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitLoadZx(context, address, register, WordSizeLog2);
+
+ offset += 4;
+ }
+ }
+ }
+
+ public static void Ldr(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, WordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrd(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx);
+ }
+
+ public static void Ldrsb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadSx);
+ }
+
+ public static void Ldrsh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadSx);
+ }
+
+ public static void Stm(ArmEmitterContext context)
+ {
+ OpCode32MemMult op = (OpCode32MemMult)context.CurrOp;
+
+ Operand n = GetIntA32(context, op.Rn);
+
+ Operand baseAddress = context.Add(n, Const(op.Offset));
+
+ int mask = op.RegisterMask;
+ int offset = 0;
+
+ for (int register = 0; mask != 0; mask >>= 1, register++)
+ {
+ if ((mask & 1) != 0)
+ {
+ Operand address = context.Add(baseAddress, Const(offset));
+
+ EmitStore(context, address, register, WordSizeLog2);
+
+ // Note: If Rn is also specified on the register list,
+ // and Rn is the first register on this list, then the
+ // value that is written to memory is the unmodified value,
+ // before the write back. If it is on the list, but it's
+ // not the first one, then the value written to memory
+ // varies between CPUs.
+ if (offset == 0 && op.PostOffset != 0)
+ {
+ // Emit write back after the first write.
+ SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+ }
+
+ offset += 4;
+ }
+ }
+ }
+
+ public static void Str(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, WordSizeLog2, AccessType.Store);
+ }
+
+ public static void Strb(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, ByteSizeLog2, AccessType.Store);
+ }
+
+ public static void Strd(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, DWordSizeLog2, AccessType.Store);
+ }
+
+ public static void Strh(ArmEmitterContext context)
+ {
+ EmitLoadOrStore(context, HWordSizeLog2, AccessType.Store);
+ }
+
+ private static void EmitLoadOrStore(ArmEmitterContext context, int size, AccessType accType)
+ {
+ OpCode32Mem op = (OpCode32Mem)context.CurrOp;
+
+ Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+ Operand temp = null;
+
+ if (op.Index || op.WBack)
+ {
+ temp = op.Add
+ ? context.Add (n, Const(op.Immediate))
+ : context.Subtract(n, Const(op.Immediate));
+ }
+
+ if (op.WBack)
+ {
+ SetIntA32(context, op.Rn, temp);
+ }
+
+ Operand address;
+
+ if (op.Index)
+ {
+ address = temp;
+ }
+ else
+ {
+ address = n;
+ }
+
+ if ((accType & AccessType.Load) != 0)
+ {
+ void Load(int rt, int offs, int loadSize)
+ {
+ Operand addr = context.Add(address, Const(offs));
+
+ if ((accType & AccessType.Signed) != 0)
+ {
+ EmitLoadSx32(context, addr, rt, loadSize);
+ }
+ else
+ {
+ EmitLoadZx(context, addr, rt, loadSize);
+ }
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Load(op.Rt, 0, WordSizeLog2);
+ Load(op.Rt | 1, 4, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Load(op.Rt | 1, 0, WordSizeLog2);
+ Load(op.Rt, 4, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Load(op.Rt, 0, size);
+ }
+ }
+ else
+ {
+ void Store(int rt, int offs, int storeSize)
+ {
+ Operand addr = context.Add(address, Const(offs));
+
+ EmitStore(context, addr, rt, storeSize);
+ }
+
+ if (size == DWordSizeLog2)
+ {
+ Operand lblBigEndian = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+ Store(op.Rt, 0, WordSizeLog2);
+ Store(op.Rt | 1, 4, WordSizeLog2);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblBigEndian);
+
+ Store(op.Rt | 1, 0, WordSizeLog2);
+ Store(op.Rt, 4, WordSizeLog2);
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ Store(op.Rt, 0, size);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/ARMeilleure/Instructions/InstEmitMemoryEx.cs
new file mode 100644
index 000000000..bcca7619d
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemoryEx.cs
@@ -0,0 +1,261 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ [Flags]
+ private enum AccessType
+ {
+ None = 0,
+ Ordered = 1,
+ Exclusive = 2,
+ OrderedEx = Ordered | Exclusive
+ }
+
+ public static void Clrex(ArmEmitterContext context)
+ {
+ context.Call(new _Void(NativeInterface.ClearExclusive));
+ }
+
+ public static void Dmb(ArmEmitterContext context) => EmitBarrier(context);
+ public static void Dsb(ArmEmitterContext context) => EmitBarrier(context);
+
+ public static void Ldar(ArmEmitterContext context) => EmitLdr(context, AccessType.Ordered);
+ public static void Ldaxr(ArmEmitterContext context) => EmitLdr(context, AccessType.OrderedEx);
+ public static void Ldxr(ArmEmitterContext context) => EmitLdr(context, AccessType.Exclusive);
+ public static void Ldxp(ArmEmitterContext context) => EmitLdp(context, AccessType.Exclusive);
+ public static void Ldaxp(ArmEmitterContext context) => EmitLdp(context, AccessType.OrderedEx);
+
+ private static void EmitLdr(ArmEmitterContext context, AccessType accType)
+ {
+ EmitLoadEx(context, accType, pair: false);
+ }
+
+ private static void EmitLdp(ArmEmitterContext context, AccessType accType)
+ {
+ EmitLoadEx(context, accType, pair: true);
+ }
+
+ private static void EmitLoadEx(ArmEmitterContext context, AccessType accType, bool pair)
+ {
+ OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+ bool ordered = (accType & AccessType.Ordered) != 0;
+ bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+
+ Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ if (pair)
+ {
+ // Exclusive loads should be atomic. For pairwise loads, we need to
+ // read all the data at once. For a 32-bits pairwise load, we do a
+ // simple 64-bits load, for a 128-bits load, we need to call a special
+ // method to read 128-bits atomically.
+ if (op.Size == 2)
+ {
+ Operand value = EmitLoad(context, address, exclusive, 3);
+
+ Operand valueLow = context.ConvertI64ToI32(value);
+
+ valueLow = context.ZeroExtend32(OperandType.I64, valueLow);
+
+ Operand valueHigh = context.ShiftRightUI(value, Const(32));
+
+ SetIntOrZR(context, op.Rt, valueLow);
+ SetIntOrZR(context, op.Rt2, valueHigh);
+ }
+ else if (op.Size == 3)
+ {
+ Operand value = EmitLoad(context, address, exclusive, 4);
+
+ Operand valueLow = context.VectorExtract(OperandType.I64, value, 0);
+ Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1);
+
+ SetIntOrZR(context, op.Rt, valueLow);
+ SetIntOrZR(context, op.Rt2, valueHigh);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes.");
+ }
+ }
+ else
+ {
+ // 8, 16, 32 or 64-bits (non-pairwise) load.
+ Operand value = EmitLoad(context, address, exclusive, op.Size);
+
+ SetIntOrZR(context, op.Rt, value);
+ }
+ }
+
+ private static Operand EmitLoad(
+ ArmEmitterContext context,
+ Operand address,
+ bool exclusive,
+ int size)
+ {
+ Delegate fallbackMethodDlg = null;
+
+ if (exclusive)
+ {
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByteExclusive); break;
+ case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16Exclusive); break;
+ case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32Exclusive); break;
+ case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64Exclusive); break;
+ case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128Exclusive); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break;
+ case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16); break;
+ case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32); break;
+ case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64); break;
+ case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break;
+ }
+ }
+
+ return context.Call(fallbackMethodDlg, address);
+ }
+
+ public static void Pfrm(ArmEmitterContext context)
+ {
+ // Memory Prefetch, execute as no-op.
+ }
+
+ public static void Stlr(ArmEmitterContext context) => EmitStr(context, AccessType.Ordered);
+ public static void Stlxr(ArmEmitterContext context) => EmitStr(context, AccessType.OrderedEx);
+ public static void Stxr(ArmEmitterContext context) => EmitStr(context, AccessType.Exclusive);
+ public static void Stxp(ArmEmitterContext context) => EmitStp(context, AccessType.Exclusive);
+ public static void Stlxp(ArmEmitterContext context) => EmitStp(context, AccessType.OrderedEx);
+
+ private static void EmitStr(ArmEmitterContext context, AccessType accType)
+ {
+ EmitStoreEx(context, accType, pair: false);
+ }
+
+ private static void EmitStp(ArmEmitterContext context, AccessType accType)
+ {
+ EmitStoreEx(context, accType, pair: true);
+ }
+
+ private static void EmitStoreEx(ArmEmitterContext context, AccessType accType, bool pair)
+ {
+ OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+ bool ordered = (accType & AccessType.Ordered) != 0;
+ bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+ if (ordered)
+ {
+ EmitBarrier(context);
+ }
+
+ Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+ Operand t = GetIntOrZR(context, op.Rt);
+
+ Operand s = null;
+
+ if (pair)
+ {
+ Debug.Assert(op.Size == 2 || op.Size == 3, "Invalid size for pairwise store.");
+
+ Operand t2 = GetIntOrZR(context, op.Rt2);
+
+ Operand value;
+
+ if (op.Size == 2)
+ {
+ value = context.BitwiseOr(t, context.ShiftLeft(t2, Const(32)));
+ }
+ else /* if (op.Size == 3) */
+ {
+ value = context.VectorInsert(context.VectorZero(), t, 0);
+ value = context.VectorInsert(value, t2, 1);
+ }
+
+ s = EmitStore(context, address, value, exclusive, op.Size + 1);
+ }
+ else
+ {
+ s = EmitStore(context, address, t, exclusive, op.Size);
+ }
+
+ if (s != null)
+ {
+ // This is only needed for exclusive stores. The function returns 0
+ // when the store is successful, and 1 otherwise.
+ SetIntOrZR(context, op.Rs, s);
+ }
+ }
+
+ private static Operand EmitStore(
+ ArmEmitterContext context,
+ Operand address,
+ Operand value,
+ bool exclusive,
+ int size)
+ {
+ if (size < 3)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ Delegate fallbackMethodDlg = null;
+
+ if (exclusive)
+ {
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _S32_U64_U8 (NativeInterface.WriteByteExclusive); break;
+ case 1: fallbackMethodDlg = new _S32_U64_U16 (NativeInterface.WriteUInt16Exclusive); break;
+ case 2: fallbackMethodDlg = new _S32_U64_U32 (NativeInterface.WriteUInt32Exclusive); break;
+ case 3: fallbackMethodDlg = new _S32_U64_U64 (NativeInterface.WriteUInt64Exclusive); break;
+ case 4: fallbackMethodDlg = new _S32_U64_V128(NativeInterface.WriteVector128Exclusive); break;
+ }
+
+ return context.Call(fallbackMethodDlg, address, value);
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break;
+ case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16); break;
+ case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32); break;
+ case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64); break;
+ case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break;
+ }
+
+ context.Call(fallbackMethodDlg, address, value);
+
+ return null;
+ }
+ }
+
+ private static void EmitBarrier(ArmEmitterContext context)
+ {
+ // Note: This barrier is most likely not necessary, and probably
+ // doesn't make any difference since we need to do a ton of stuff
+ // (software MMU emulation) to read or write anything anyway.
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
new file mode 100644
index 000000000..0ae5e3f26
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
@@ -0,0 +1,512 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static class InstEmitMemoryHelper
+ {
+ private enum Extension
+ {
+ Zx,
+ Sx32,
+ Sx64
+ }
+
+ public static void EmitLoadZx(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Zx, rt, size);
+ }
+
+ public static void EmitLoadSx32(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Sx32, rt, size);
+ }
+
+ public static void EmitLoadSx64(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ EmitLoad(context, address, Extension.Sx64, rt, size);
+ }
+
+ private static void EmitLoad(ArmEmitterContext context, Operand address, Extension ext, int rt, int size)
+ {
+ bool isSimd = IsSimd(context);
+
+ if ((uint)size > (isSimd ? 4 : 3))
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if (isSimd)
+ {
+ EmitReadVector(context, address, context.VectorZero(), rt, 0, size);
+ }
+ else
+ {
+ EmitReadInt(context, address, rt, size);
+ }
+
+ if (!isSimd)
+ {
+ Operand value = GetIntOrZR(context, rt);
+
+ if (ext == Extension.Sx32 || ext == Extension.Sx64)
+ {
+ OperandType destType = ext == Extension.Sx64 ? OperandType.I64 : OperandType.I32;
+
+ switch (size)
+ {
+ case 0: value = context.SignExtend8 (destType, value); break;
+ case 1: value = context.SignExtend16(destType, value); break;
+ case 2: value = context.SignExtend32(destType, value); break;
+ }
+ }
+
+ SetIntOrZR(context, rt, value);
+ }
+ }
+
+ public static void EmitLoadSimd(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ EmitReadVector(context, address, vector, rt, elem, size);
+ }
+
+ public static void EmitStore(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ bool isSimd = IsSimd(context);
+
+ if ((uint)size > (isSimd ? 4 : 3))
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if (isSimd)
+ {
+ EmitWriteVector(context, address, rt, 0, size);
+ }
+ else
+ {
+ EmitWriteInt(context, address, rt, size);
+ }
+ }
+
+ public static void EmitStoreSimd(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ EmitWriteVector(context, address, rt, elem, size);
+ }
+
+ private static bool IsSimd(ArmEmitterContext context)
+ {
+ return context.CurrOp is IOpCodeSimd &&
+ !(context.CurrOp is OpCodeSimdMemMs ||
+ context.CurrOp is OpCodeSimdMemSs);
+ }
+
+ private static void EmitReadInt(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+ Operand lblFastPath = Label();
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitReadIntFallback(context, address, rt, size);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+ Operand value = null;
+
+ switch (size)
+ {
+ case 0:
+ value = context.Load8(physAddr);
+ break;
+
+ case 1:
+ value = context.Load16(physAddr);
+ break;
+
+ case 2:
+ value = context.Load(OperandType.I32, physAddr);
+ break;
+
+ case 3:
+ value = context.Load(OperandType.I64, physAddr);
+ break;
+ }
+
+ SetInt(context, rt, value);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitReadVector(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+ Operand lblFastPath = Label();
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitReadVectorFallback(context, address, vector, rt, elem, size);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+ Operand value = null;
+
+ switch (size)
+ {
+ case 0:
+ value = context.VectorInsert8(vector, context.Load8(physAddr), elem);
+ break;
+
+ case 1:
+ value = context.VectorInsert16(vector, context.Load16(physAddr), elem);
+ break;
+
+ case 2:
+ value = context.VectorInsert(vector, context.Load(OperandType.I32, physAddr), elem);
+ break;
+
+ case 3:
+ value = context.VectorInsert(vector, context.Load(OperandType.I64, physAddr), elem);
+ break;
+
+ case 4:
+ value = context.Load(OperandType.V128, physAddr);
+ break;
+ }
+
+ context.Copy(GetVec(rt), value);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static Operand VectorCreate(ArmEmitterContext context, Operand value)
+ {
+ return context.VectorInsert(context.VectorZero(), value, 0);
+ }
+
+ private static void EmitWriteInt(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+ Operand lblFastPath = Label();
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitWriteIntFallback(context, address, rt, size);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+ Operand value = GetInt(context, rt);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ switch (size)
+ {
+ case 0: context.Store8 (physAddr, value); break;
+ case 1: context.Store16(physAddr, value); break;
+ case 2: context.Store (physAddr, value); break;
+ case 3: context.Store (physAddr, value); break;
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitWriteVector(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+ Operand lblFastPath = Label();
+ Operand lblSlowPath = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+ context.MarkLabel(lblSlowPath);
+
+ EmitWriteVectorFallback(context, address, rt, elem, size);
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblFastPath);
+
+ Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+ Operand value = GetVec(rt);
+
+ switch (size)
+ {
+ case 0:
+ context.Store8(physAddr, context.VectorExtract8(value, elem));
+ break;
+
+ case 1:
+ context.Store16(physAddr, context.VectorExtract16(value, elem));
+ break;
+
+ case 2:
+ context.Store(physAddr, context.VectorExtract(OperandType.FP32, value, elem));
+ break;
+
+ case 3:
+ context.Store(physAddr, context.VectorExtract(OperandType.FP64, value, elem));
+ break;
+
+ case 4:
+ context.Store(physAddr, value);
+ break;
+ }
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static Operand EmitAddressCheck(ArmEmitterContext context, Operand address, int size)
+ {
+ long addressCheckMask = ~(context.Memory.AddressSpaceSize - 1);
+
+ addressCheckMask |= (1u << size) - 1;
+
+ return context.BitwiseAnd(address, Const(address.Type, addressCheckMask));
+ }
+
+ private static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblFallbackPath)
+ {
+ Operand pte = Const(context.Memory.PageTable.ToInt64());
+
+ int bit = MemoryManager.PageBits;
+
+ do
+ {
+ Operand addrPart = context.ShiftRightUI(address, Const(bit));
+
+ bit += context.Memory.PtLevelBits;
+
+ if (bit < context.Memory.AddressSpaceBits)
+ {
+ addrPart = context.BitwiseAnd(addrPart, Const(addrPart.Type, context.Memory.PtLevelMask));
+ }
+
+ Operand pteOffset = context.ShiftLeft(addrPart, Const(3));
+
+ if (pteOffset.Type == OperandType.I32)
+ {
+ pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset);
+ }
+
+ Operand pteAddress = context.Add(pte, pteOffset);
+
+ pte = context.Load(OperandType.I64, pteAddress);
+ }
+ while (bit < context.Memory.AddressSpaceBits);
+
+ if (!context.Memory.HasWriteWatchSupport)
+ {
+ Operand hasFlagSet = context.BitwiseAnd(pte, Const((long)MemoryManager.PteFlagsMask));
+
+ context.BranchIfTrue(lblFallbackPath, hasFlagSet);
+ }
+
+ Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, MemoryManager.PageMask));
+
+ if (pageOffset.Type == OperandType.I32)
+ {
+ pageOffset = context.ZeroExtend32(OperandType.I64, pageOffset);
+ }
+
+ Operand physAddr = context.Add(pte, pageOffset);
+
+ return physAddr;
+ }
+
+ private static void EmitReadIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Delegate fallbackMethodDlg = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break;
+ case 1: fallbackMethodDlg = new _U16_U64(NativeInterface.ReadUInt16); break;
+ case 2: fallbackMethodDlg = new _U32_U64(NativeInterface.ReadUInt32); break;
+ case 3: fallbackMethodDlg = new _U64_U64(NativeInterface.ReadUInt64); break;
+ }
+
+ SetInt(context, rt, context.Call(fallbackMethodDlg, address));
+ }
+
+ private static void EmitReadVectorFallback(
+ ArmEmitterContext context,
+ Operand address,
+ Operand vector,
+ int rt,
+ int elem,
+ int size)
+ {
+ Delegate fallbackMethodDlg = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break;
+ case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16); break;
+ case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32); break;
+ case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64); break;
+ case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break;
+ }
+
+ Operand value = context.Call(fallbackMethodDlg, address);
+
+ switch (size)
+ {
+ case 0: value = context.VectorInsert8 (vector, value, elem); break;
+ case 1: value = context.VectorInsert16(vector, value, elem); break;
+ case 2: value = context.VectorInsert (vector, value, elem); break;
+ case 3: value = context.VectorInsert (vector, value, elem); break;
+ }
+
+ context.Copy(GetVec(rt), value);
+ }
+
+ private static void EmitWriteIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+ {
+ Delegate fallbackMethodDlg = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break;
+ case 1: fallbackMethodDlg = new _Void_U64_U16(NativeInterface.WriteUInt16); break;
+ case 2: fallbackMethodDlg = new _Void_U64_U32(NativeInterface.WriteUInt32); break;
+ case 3: fallbackMethodDlg = new _Void_U64_U64(NativeInterface.WriteUInt64); break;
+ }
+
+ Operand value = GetInt(context, rt);
+
+ if (size < 3 && value.Type == OperandType.I64)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ context.Call(fallbackMethodDlg, address, value);
+ }
+
+ private static void EmitWriteVectorFallback(
+ ArmEmitterContext context,
+ Operand address,
+ int rt,
+ int elem,
+ int size)
+ {
+ Delegate fallbackMethodDlg = null;
+
+ switch (size)
+ {
+ case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break;
+ case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16); break;
+ case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32); break;
+ case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64); break;
+ case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break;
+ }
+
+ Operand value = null;
+
+ if (size < 4)
+ {
+ switch (size)
+ {
+ case 0:
+ value = context.VectorExtract8(GetVec(rt), elem);
+ break;
+
+ case 1:
+ value = context.VectorExtract16(GetVec(rt), elem);
+ break;
+
+ case 2:
+ value = context.VectorExtract(OperandType.I32, GetVec(rt), elem);
+ break;
+
+ case 3:
+ value = context.VectorExtract(OperandType.I64, GetVec(rt), elem);
+ break;
+ }
+ }
+ else
+ {
+ value = GetVec(rt);
+ }
+
+ context.Call(fallbackMethodDlg, address, value);
+ }
+
+ private static Operand GetInt(ArmEmitterContext context, int rt)
+ {
+ return context.CurrOp is OpCode32 ? GetIntA32(context, rt) : GetIntOrZR(context, rt);
+ }
+
+ private static void SetInt(ArmEmitterContext context, int rt, Operand value)
+ {
+ if (context.CurrOp is OpCode32)
+ {
+ SetIntA32(context, rt, value);
+ }
+ else
+ {
+ SetIntOrZR(context, rt, value);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMove.cs b/ARMeilleure/Instructions/InstEmitMove.cs
new file mode 100644
index 000000000..bf051f329
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMove.cs
@@ -0,0 +1,41 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Movk(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ OperandType type = op.GetOperandType();
+
+ Operand res = GetIntOrZR(context, op.Rd);
+
+ res = context.BitwiseAnd(res, Const(type, ~(0xffffL << op.Bit)));
+
+ res = context.BitwiseOr(res, Const(type, op.Immediate));
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Movn(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), ~op.Immediate));
+ }
+
+ public static void Movz(ArmEmitterContext context)
+ {
+ OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+ SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), op.Immediate));
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMul.cs b/ARMeilleure/Instructions/InstEmitMul.cs
new file mode 100644
index 000000000..65d11b30d
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMul.cs
@@ -0,0 +1,100 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Madd(ArmEmitterContext context) => EmitMul(context, isAdd: true);
+ public static void Msub(ArmEmitterContext context) => EmitMul(context, isAdd: false);
+
+ private static void EmitMul(ArmEmitterContext context, bool isAdd)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand a = GetIntOrZR(context, op.Ra);
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand res = context.Multiply(n, m);
+
+ res = isAdd ? context.Add(a, res) : context.Subtract(a, res);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Smaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedAdd);
+ public static void Smsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedSubtract);
+ public static void Umaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.Add);
+ public static void Umsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.Subtract);
+
+ [Flags]
+ private enum MullFlags
+ {
+ Subtract = 0,
+ Add = 1 << 0,
+ Signed = 1 << 1,
+
+ SignedAdd = Signed | Add,
+ SignedSubtract = Signed | Subtract
+ }
+
+ private static void EmitMull(ArmEmitterContext context, MullFlags flags)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand GetExtendedRegister32(int index)
+ {
+ Operand value = GetIntOrZR(context, index);
+
+ if ((flags & MullFlags.Signed) != 0)
+ {
+ return context.SignExtend32(value.Type, value);
+ }
+ else
+ {
+ return context.ZeroExtend32(value.Type, value);
+ }
+ }
+
+ Operand a = GetIntOrZR(context, op.Ra);
+
+ Operand n = GetExtendedRegister32(op.Rn);
+ Operand m = GetExtendedRegister32(op.Rm);
+
+ Operand res = context.Multiply(n, m);
+
+ res = (flags & MullFlags.Add) != 0 ? context.Add(a, res) : context.Subtract(a, res);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ public static void Smulh(ArmEmitterContext context)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Multiply64HighSI(n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+
+ public static void Umulh(ArmEmitterContext context)
+ {
+ OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+ Operand m = GetIntOrZR(context, op.Rm);
+
+ Operand d = context.Multiply64HighUI(n, m);
+
+ SetIntOrZR(context, op.Rd, d);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
new file mode 100644
index 000000000..44659e805
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -0,0 +1,3159 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+// https://www.agner.org/optimize/#vectorclass @ vectori128.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func;
+
+ static partial class InstEmit
+ {
+ public static void Abs_S(ArmEmitterContext context)
+ {
+ EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+
+ public static void Abs_V(ArmEmitterContext context)
+ {
+ EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+
+ public static void Add_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+
+ public static void Add_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(addInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Addhn_V(ArmEmitterContext context)
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false);
+ }
+
+ public static void Addp_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size);
+
+ Operand res = context.Add(ne0, ne1);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
+ }
+
+ public static void Addp_V(ArmEmitterContext context)
+ {
+ EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+
+ public static void Addv_V(ArmEmitterContext context)
+ {
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+
+ public static void Cls_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int eSize = 8 << op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingSigns), ne, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Clz_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int eSize = 8 << op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand de;
+
+ if (eSize == 64)
+ {
+ de = context.CountLeadingZeros(ne);
+ }
+ else
+ {
+ de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingZeros), ne, Const(eSize));
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Cnt_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+ Operand de;
+
+ if (Optimizations.UsePopCnt)
+ {
+ de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne);
+ }
+ else
+ {
+ de = context.Call(new _U64_U64(SoftFallback.CountSetBits8), ne);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Fabd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm));
+
+ Operand mask = X86GetScalar(context, -0f);
+
+ res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm));
+
+ Operand mask = X86GetScalar(context, -0d);
+
+ res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+
+ return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res);
+ });
+ }
+ }
+
+ public static void Fabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm));
+
+ Operand mask = X86GetAllElements(context, -0f);
+
+ res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm));
+
+ Operand mask = X86GetAllElements(context, -0d);
+
+ res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+
+ return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res);
+ });
+ }
+ }
+
+ public static void Fabs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ Operand mask = X86GetScalar(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand mask = X86GetScalar(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1);
+ });
+ }
+ }
+
+ public static void Fabs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1);
+ });
+ }
+ }
+
+ public static void Fadd_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
+ });
+ }
+ }
+
+ public static void Fadd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
+ });
+ }
+ }
+
+ public static void Faddp_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse3)
+ {
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ OperandType type = sizeF != 0 ? OperandType.FP64
+ : OperandType.FP32;
+
+ Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
+
+ Operand res = EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, ne0, ne1);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Faddp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorPairwiseOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
+ });
+ }
+ }
+
+ public static void Fdiv_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2);
+ });
+ }
+ }
+
+ public static void Fdiv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmadd_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addss, a, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmax_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Maxss, Intrinsic.X86Maxsd);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmaxnm_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2);
+ });
+ }
+
+ public static void Fmaxnm_V(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2);
+ });
+ }
+
+ public static void Fmaxp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorPairwiseOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmin_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Minss, Intrinsic.X86Minsd);
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
+ });
+ }
+ }
+
+ public static void Fminnm_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2);
+ });
+ }
+
+ public static void Fminnm_V(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2);
+ });
+ }
+
+ public static void Fminp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorPairwiseOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmla_Se(ArmEmitterContext context) // Fused.
+ {
+ EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Fmla_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmla_Ve(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmls_Se(ArmEmitterContext context) // Fused.
+ {
+ EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Fmls_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmls_Ve(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmsub_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand a = GetVec(op.Ra);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size == 0)
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subss, a, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Fmul_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmul_Se(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Fmul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmul_Ve(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ int shuffleMask = op.Index | op.Index << 1;
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
+ });
+ }
+ }
+
+ public static void Fmulx_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+ });
+ }
+
+ public static void Fmulx_Se(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+ });
+ }
+
+ public static void Fmulx_V(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+ });
+ }
+
+ public static void Fmulx_Ve(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+ });
+ }
+
+ public static void Fneg_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ Operand mask = X86GetScalar(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand mask = X86GetScalar(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Fneg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Fnmadd_S(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64
+ : OperandType.FP32;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+ Operand ae = context.VectorExtract(type, GetVec(op.Ra), 0);
+
+ Operand res = context.Subtract(context.Multiply(context.Negate(ne), me), ae);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Fnmsub_S(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64
+ : OperandType.FP32;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+ Operand ae = context.VectorExtract(type, GetVec(op.Ra), 0);
+
+ Operand res = context.Subtract(context.Multiply(ne, me), ae);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Fnmul_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+ }
+
+ public static void Frecpe_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ {
+ EmitScalarUnaryOpF(context, Intrinsic.X86Rcpss, 0);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1);
+ });
+ }
+ }
+
+ public static void Frecpe_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ {
+ EmitVectorUnaryOpF(context, Intrinsic.X86Rcpps, 0);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1);
+ });
+ }
+ }
+
+ public static void Frecps_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetScalar(context, 2f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetScalar(context, 2d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2);
+ });
+ }
+ }
+
+ public static void Frecps_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand mask = X86GetAllElements(context, 2f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand mask = X86GetAllElements(context, 2d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2);
+ });
+ }
+ }
+
+ public static void Frecpx_S(ArmEmitterContext context)
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRecpX, SoftFloat64.FPRecpX, op1);
+ });
+ }
+
+ public static void Frinta_S(ArmEmitterContext context)
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
+
+ public static void Frinta_V(ArmEmitterContext context)
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+ });
+ }
+
+ public static void Frinti_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ if (op.Size == 0)
+ {
+ return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+ }
+ else /* if (op.Size == 1) */
+ {
+ return context.Call(new _F64_F64(SoftFallback.Round), op1);
+ }
+ });
+ }
+
+ public static void Frinti_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ if (sizeF == 0)
+ {
+ return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ return context.Call(new _F64_F64(SoftFallback.Round), op1);
+ }
+ });
+ }
+
+ public static void Frintm_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1);
+ });
+ }
+ }
+
+ public static void Frintm_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitVectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1);
+ });
+ }
+ }
+
+ public static void Frintn_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitScalarRoundOpF(context, FPRoundingMode.ToNearest);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+ });
+ }
+ }
+
+ public static void Frintn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitVectorRoundOpF(context, FPRoundingMode.ToNearest);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+ });
+ }
+ }
+
+ public static void Frintp_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1);
+ });
+ }
+ }
+
+ public static void Frintp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitVectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1);
+ });
+ }
+ }
+
+ public static void Frintx_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ if (op.Size == 0)
+ {
+ return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+ }
+ else /* if (op.Size == 1) */
+ {
+ return context.Call(new _F64_F64(SoftFallback.Round), op1);
+ }
+ });
+ }
+
+ public static void Frintx_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ if (sizeF == 0)
+ {
+ return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ return context.Call(new _F64_F64(SoftFallback.Round), op1);
+ }
+ });
+ }
+
+ public static void Frintz_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitScalarRoundOpF(context, FPRoundingMode.TowardsZero);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1);
+ });
+ }
+ }
+
+ public static void Frintz_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitVectorRoundOpF(context, FPRoundingMode.TowardsZero);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1);
+ });
+ }
+ }
+
+ public static void Frsqrte_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ {
+ EmitScalarUnaryOpF(context, Intrinsic.X86Rsqrtss, 0);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1);
+ });
+ }
+ }
+
+ public static void Frsqrte_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+ {
+ EmitVectorUnaryOpF(context, Intrinsic.X86Rsqrtps, 0);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1);
+ });
+ }
+ }
+
+ public static void Frsqrts_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand maskHalf = X86GetScalar(context, 0.5f);
+ Operand maskThree = X86GetScalar(context, 3f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand maskHalf = X86GetScalar(context, 0.5d);
+ Operand maskThree = X86GetScalar(context, 3d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res);
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2);
+ });
+ }
+ }
+
+ public static void Frsqrts_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5f);
+ Operand maskThree = X86GetAllElements(context, 3f);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand maskHalf = X86GetAllElements(context, 0.5d);
+ Operand maskThree = X86GetAllElements(context, 3d);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm));
+
+ res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2);
+ });
+ }
+ }
+
+ public static void Fsqrt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1);
+ });
+ }
+ }
+
+ public static void Fsqrt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd);
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, (op1) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1);
+ });
+ }
+ }
+
+ public static void Fsub_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+ });
+ }
+ }
+
+ public static void Fsub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ }
+ else if (Optimizations.FastFP)
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+ });
+ }
+ }
+
+ public static void Mla_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Mul_AddSub(context, AddSub.Add);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mla_Ve(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Mls_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Mul_AddSub(context, AddSub.Subtract);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Mls_Ve(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Mul_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Mul_AddSub(context, AddSub.None);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+ }
+
+ public static void Mul_Ve(ArmEmitterContext context)
+ {
+ EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Neg_S(ArmEmitterContext context)
+ {
+ EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+
+ public static void Neg_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+ }
+
+ public static void Raddhn_V(ArmEmitterContext context)
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true);
+ }
+
+ public static void Rsubhn_V(ArmEmitterContext context)
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true);
+ }
+
+ public static void Saba_V(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+
+ public static void Sabal_V(ArmEmitterContext context)
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+
+ public static void Sabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ EmitSse41Sabd(context, op, n, m, isLong: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Sabdl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovsxbw
+ : Intrinsic.X86Pmovsxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ EmitSse41Sabd(context, op, n, m, isLong: true);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Sadalp_V(ArmEmitterContext context)
+ {
+ EmitAddLongPairwise(context, signed: true, accumulate: true);
+ }
+
+ public static void Saddl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Saddlp_V(ArmEmitterContext context)
+ {
+ EmitAddLongPairwise(context, signed: true, accumulate: false);
+ }
+
+ public static void Saddlv_V(ArmEmitterContext context)
+ {
+ EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+
+ public static void Saddw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Shadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad;
+
+ res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, res2);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return context.ShiftRightSI(context.Add(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Shsub_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ Operand nPlusMask = context.AddIntrinsic(addInst, n, mask);
+ Operand mPlusMask = context.AddIntrinsic(addInst, m, mask);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, nPlusMask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ return context.ShiftRightSI(context.Subtract(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Smax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxsInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Max);
+
+ EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+ }
+
+ public static void Smaxp_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Max);
+
+ EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Smaxv_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Max);
+
+ EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Smin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic minInst = X86PminsInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(minInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Min);
+
+ EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+ }
+
+ public static void Sminp_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Min);
+
+ EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Sminv_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _S64_S64_S64(Math.Min);
+
+ EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Smlal_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(addInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlal_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Smlsl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovsxbw
+ : Intrinsic.X86Pmovsxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(subInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Smlsl_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Smull_V(ArmEmitterContext context)
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Smull_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Sqabs_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+
+ public static void Sqabs_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+ }
+
+ public static void Sqadd_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Add);
+ }
+
+ public static void Sqadd_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Add);
+ }
+
+ public static void Sqdmulh_S(ArmEmitterContext context)
+ {
+ EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.ScalarSx);
+ }
+
+ public static void Sqdmulh_V(ArmEmitterContext context)
+ {
+ EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.VectorSx);
+ }
+
+ public static void Sqneg_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+
+ public static void Sqneg_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+ }
+
+ public static void Sqrdmulh_S(ArmEmitterContext context)
+ {
+ EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.ScalarSx);
+ }
+
+ public static void Sqrdmulh_V(ArmEmitterContext context)
+ {
+ EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.VectorSx);
+ }
+
+ public static void Sqsub_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Sub);
+ }
+
+ public static void Sqsub_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Sub);
+ }
+
+ public static void Sqxtn_S(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx);
+ }
+
+ public static void Sqxtn_V(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Sqxtun_S(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx);
+ }
+
+ public static void Sqxtun_V(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Srhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand nMinusMask = context.AddIntrinsic(subInst, n, mask);
+ Operand mMinusMask = context.AddIntrinsic(subInst, m, mask);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, mask, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpSx(context, (op1, op2) =>
+ {
+ Operand res = context.Add(op1, op2);
+
+ res = context.Add(res, Const(1L));
+
+ return context.ShiftRightSI(res, Const(1));
+ });
+ }
+ }
+
+ public static void Ssubl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Ssubw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Sub_S(ArmEmitterContext context)
+ {
+ EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+
+ public static void Sub_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Subhn_V(ArmEmitterContext context)
+ {
+ EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false);
+ }
+
+ public static void Suqadd_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate);
+ }
+
+ public static void Suqadd_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate);
+ }
+
+ public static void Uaba_V(ArmEmitterContext context)
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+
+ public static void Uabal_V(ArmEmitterContext context)
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+ });
+ }
+
+ public static void Uabd_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ EmitSse41Uabd(context, op, n, m, isLong: false);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Uabdl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovzxbw
+ : Intrinsic.X86Pmovzxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ EmitSse41Uabd(context, op, n, m, isLong: true);
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) =>
+ {
+ return EmitAbs(context, context.Subtract(op1, op2));
+ });
+ }
+ }
+
+ public static void Uadalp_V(ArmEmitterContext context)
+ {
+ EmitAddLongPairwise(context, signed: false, accumulate: true);
+ }
+
+ public static void Uaddl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uaddlp_V(ArmEmitterContext context)
+ {
+ EmitAddLongPairwise(context, signed: false, accumulate: false);
+ }
+
+ public static void Uaddlv_V(ArmEmitterContext context)
+ {
+ EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+
+ public static void Uaddw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+ }
+ }
+
+ public static void Uhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld;
+
+ res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, res2);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.ShiftRightUI(context.Add(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Uhsub_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size];
+
+ res = context.AddIntrinsic(subInst, n, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.ShiftRightUI(context.Subtract(op1, op2), Const(1));
+ });
+ }
+ }
+
+ public static void Umax_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Max);
+
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+ }
+
+ public static void Umaxp_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Max);
+
+ EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Umaxv_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Max);
+
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Umin_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic minInst = X86PminuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(minInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Min);
+
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+ }
+
+ public static void Uminp_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Min);
+
+ EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Uminv_V(ArmEmitterContext context)
+ {
+ Delegate dlg = new _U64_U64_U64(Math.Min);
+
+ EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+ }
+
+ public static void Umlal_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(addInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlal_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Umlsl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = op.Size == 0
+ ? Intrinsic.X86Pmovzxbw
+ : Intrinsic.X86Pmovzxwd;
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+ Operand res = context.AddIntrinsic(mullInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(d, context.AddIntrinsic(subInst, d, res));
+ }
+ else
+ {
+ EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+ }
+
+ public static void Umlsl_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+ {
+ return context.Subtract(op1, context.Multiply(op2, op3));
+ });
+ }
+
+ public static void Umull_V(ArmEmitterContext context)
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Umull_Ve(ArmEmitterContext context)
+ {
+ EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+ }
+
+ public static void Uqadd_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+ }
+
+ public static void Uqadd_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+ }
+
+ public static void Uqsub_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+ }
+
+ public static void Uqsub_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+ }
+
+ public static void Uqxtn_S(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx);
+ }
+
+ public static void Uqxtn_V(ArmEmitterContext context)
+ {
+ EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx);
+ }
+
+ public static void Urhadd_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size < 2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+ Operand res = context.AddIntrinsic(avgInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ Operand res = context.Add(op1, op2);
+
+ res = context.Add(res, Const(1L));
+
+ return context.ShiftRightUI(res, Const(1));
+ });
+ }
+ }
+
+ public static void Usqadd_S(ArmEmitterContext context)
+ {
+ EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+ }
+
+ public static void Usqadd_V(ArmEmitterContext context)
+ {
+ EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+ }
+
+ public static void Usubl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ n = context.AddIntrinsic(movInst, n);
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ public static void Usubw_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+ }
+
+ Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+ m = context.AddIntrinsic(movInst, m);
+
+ Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+ }
+ else
+ {
+ EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+ }
+ }
+
+ private static Operand EmitAbs(ArmEmitterContext context, Operand value)
+ {
+ Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
+
+ return context.ConditionalSelect(isPositive, value, context.Negate(value));
+ }
+
+ private static void EmitAddLongPairwise(ArmEmitterContext context, bool signed, bool accumulate)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
+ Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+ Operand e = context.Add(ne0, ne1);
+
+ if (accumulate)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+
+ e = context.Add(e, de);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitDoublingMultiplyHighHalf(
+ ArmEmitterContext context,
+ Operand n,
+ Operand m,
+ bool round)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int eSize = 8 << op.Size;
+
+ Operand res = context.Multiply(n, m);
+
+ if (!round)
+ {
+ res = context.ShiftRightSI(res, Const(eSize - 1));
+ }
+ else
+ {
+ long roundConst = 1L << (eSize - 1);
+
+ res = context.ShiftLeft(res, Const(1));
+
+ res = context.Add(res, Const(roundConst));
+
+ res = context.ShiftRightSI(res, Const(eSize));
+
+ Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue));
+
+ res = context.ConditionalSelect(isIntMin, context.Negate(res), res);
+ }
+
+ return res;
+ }
+
+ private static void EmitHighNarrow(ArmEmitterContext context, Func2I emit, bool round)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ int elems = 8 >> op.Size;
+ int eSize = 8 << op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ long roundConst = 1L << (eSize - 1);
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1);
+
+ Operand de = emit(ne, me);
+
+ if (round)
+ {
+ de = context.Add(de, Const(roundConst));
+ }
+
+ de = context.ShiftRightUI(de, Const(eSize));
+
+ res = EmitVectorInsert(context, res, de, part + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss;
+
+ Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps;
+
+ Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private enum AddSub
+ {
+ None,
+ Add,
+ Subtract
+ }
+
+ private static void EmitSse41Mul_AddSub(ArmEmitterContext context, AddSub addSub)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = null;
+
+ if (op.Size == 0)
+ {
+ Operand ns8 = context.AddIntrinsic(Intrinsic.X86Psrlw, n, Const(8));
+ Operand ms8 = context.AddIntrinsic(Intrinsic.X86Psrlw, m, Const(8));
+
+ res = context.AddIntrinsic(Intrinsic.X86Pmullw, ns8, ms8);
+
+ res = context.AddIntrinsic(Intrinsic.X86Psllw, res, Const(8));
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+
+ Operand mask = X86GetAllElements(context, 0x00FF00FF);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pblendvb, res, res2, mask);
+ }
+ else if (op.Size == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pmulld, n, m);
+ }
+
+ Operand d = GetVec(op.Rd);
+
+ if (addSub == AddSub.Add)
+ {
+ switch (op.Size)
+ {
+ case 0: res = context.AddIntrinsic(Intrinsic.X86Paddb, d, res); break;
+ case 1: res = context.AddIntrinsic(Intrinsic.X86Paddw, d, res); break;
+ case 2: res = context.AddIntrinsic(Intrinsic.X86Paddd, d, res); break;
+ case 3: res = context.AddIntrinsic(Intrinsic.X86Paddq, d, res); break;
+ }
+ }
+ else if (addSub == AddSub.Subtract)
+ {
+ switch (op.Size)
+ {
+ case 0: res = context.AddIntrinsic(Intrinsic.X86Psubb, d, res); break;
+ case 1: res = context.AddIntrinsic(Intrinsic.X86Psubw, d, res); break;
+ case 2: res = context.AddIntrinsic(Intrinsic.X86Psubd, d, res); break;
+ case 3: res = context.AddIntrinsic(Intrinsic.X86Psubq, d, res); break;
+ }
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+
+ private static void EmitSse41Sabd(
+ ArmEmitterContext context,
+ OpCodeSimdReg op,
+ Operand n,
+ Operand m,
+ bool isLong)
+ {
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Intrinsic cmpgtInst = X86PcmpgtInstruction[size];
+
+ Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m);
+
+ Intrinsic subInst = X86PsubInstruction[size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+
+ Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+ res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+ if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse41Uabd(
+ ArmEmitterContext context,
+ OpCodeSimdReg op,
+ Operand n,
+ Operand m,
+ bool isLong)
+ {
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Intrinsic maxInst = X86PmaxuInstruction[size];
+
+ Operand max = context.AddIntrinsic(maxInst, m, n);
+
+ Intrinsic cmpeqInst = X86PcmpeqInstruction[size];
+
+ Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m);
+
+ Operand onesMask = X86GetAllElements(context, -1L);
+
+ cmpMask = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, onesMask);
+
+ Intrinsic subInst = X86PsubInstruction[size];
+
+ Operand res = context.AddIntrinsic(subInst, n, m);
+ Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+ res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+ if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
new file mode 100644
index 000000000..f27121bb3
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
@@ -0,0 +1,712 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func;
+
+ static partial class InstEmit
+ {
+ public static void Cmeq_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmeq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmge_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, m, n);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmgt_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: true);
+ }
+
+ public static void Cmgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ m = GetVec(binOp.Rm);
+ }
+ else
+ {
+ m = context.VectorZero();
+ }
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmhi_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: true);
+ }
+
+ public static void Cmhi_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, m, n);
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ res = context.AddIntrinsic(cmpInst, res, m);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmhs_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: true);
+ }
+
+ public static void Cmhs_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse41 && op.Size < 3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(maxInst, n, m);
+
+ Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+ res = context.AddIntrinsic(cmpInst, res, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmle_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: true);
+ }
+
+ public static void Cmle_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, n, context.VectorZero());
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmlt_S(ArmEmitterContext context)
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: true);
+ }
+
+ public static void Cmlt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse42)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(cmpInst, context.VectorZero(), n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: false);
+ }
+ }
+
+ public static void Cmtst_S(ArmEmitterContext context)
+ {
+ EmitCmtstOp(context, scalar: true);
+ }
+
+ public static void Cmtst_V(ArmEmitterContext context)
+ {
+ EmitCmtstOp(context, scalar: false);
+ }
+
+ public static void Fccmp_S(ArmEmitterContext context)
+ {
+ EmitFccmpOrFccmpe(context, signalNaNs: false);
+ }
+
+ public static void Fccmpe_S(ArmEmitterContext context)
+ {
+ EmitFccmpOrFccmpe(context, signalNaNs: true);
+ }
+
+ public static void Fcmeq_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareEQ, SoftFloat64.FPCompareEQ, scalar: true);
+ }
+ }
+
+ public static void Fcmeq_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareEQ, SoftFloat64.FPCompareEQ, scalar: false);
+ }
+ }
+
+ public static void Fcmge_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: true);
+ }
+ }
+
+ public static void Fcmge_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: false);
+ }
+ }
+
+ public static void Fcmgt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: true);
+ }
+ }
+
+ public static void Fcmgt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: false);
+ }
+ }
+
+ public static void Fcmle_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareLE, SoftFloat64.FPCompareLE, scalar: true);
+ }
+ }
+
+ public static void Fcmle_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareLE, SoftFloat64.FPCompareLE, scalar: false);
+ }
+ }
+
+ public static void Fcmlt_S(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareLT, SoftFloat64.FPCompareLT, scalar: true);
+ }
+ }
+
+ public static void Fcmlt_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, SoftFloat32.FPCompareLT, SoftFloat64.FPCompareLT, scalar: false);
+ }
+ }
+
+ public static void Fcmp_S(ArmEmitterContext context)
+ {
+ EmitFcmpOrFcmpe(context, signalNaNs: false);
+ }
+
+ public static void Fcmpe_S(ArmEmitterContext context)
+ {
+ EmitFcmpOrFcmpe(context, signalNaNs: true);
+ }
+
+ public static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond));
+
+ EmitSetNzcv(context, Const(op.Nzcv));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ EmitFcmpOrFcmpe(context, signalNaNs);
+
+ context.MarkLabel(lblEnd);
+ }
+
+ private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ const int cmpOrdered = 7;
+
+ bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
+
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm);
+
+ Operand lblNaN = Label();
+ Operand lblEnd = Label();
+
+ if (op.Size == 0)
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const(cmpOrdered));
+
+ Operand isOrdered = context.VectorExtract16(ordMask, 0);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
+
+ SetFlag(context, PState.VFlag, Const(0));
+ SetFlag(context, PState.CFlag, cf);
+ SetFlag(context, PState.ZFlag, zf);
+ SetFlag(context, PState.NFlag, nf);
+ }
+ else /* if (op.Size == 1) */
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const(cmpOrdered));
+
+ Operand isOrdered = context.VectorExtract16(ordMask, 0);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
+
+ SetFlag(context, PState.VFlag, Const(0));
+ SetFlag(context, PState.CFlag, cf);
+ SetFlag(context, PState.ZFlag, zf);
+ SetFlag(context, PState.NFlag, nf);
+ }
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNaN);
+
+ SetFlag(context, PState.VFlag, Const(1));
+ SetFlag(context, PState.CFlag, Const(1));
+ SetFlag(context, PState.ZFlag, Const(0));
+ SetFlag(context, PState.NFlag, Const(0));
+
+ context.MarkLabel(lblEnd);
+ }
+ else
+ {
+ OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand me;
+
+ if (cmpWithZero)
+ {
+ me = op.Size == 0 ? ConstF(0f) : ConstF(0d);
+ }
+ else
+ {
+ me = context.VectorExtract(type, GetVec(op.Rm), 0);
+ }
+
+ Delegate dlg = op.Size != 0
+ ? (Delegate)new _S32_F64_F64_Bool(SoftFloat64.FPCompare)
+ : (Delegate)new _S32_F32_F32_Bool(SoftFloat32.FPCompare);
+
+ Operand nzcv = context.Call(dlg, ne, me, Const(signalNaNs));
+
+ EmitSetNzcv(context, nzcv);
+ }
+ }
+
+ private static void EmitSetNzcv(ArmEmitterContext context, Operand nzcv)
+ {
+ Operand Extract(Operand value, int bit)
+ {
+ if (bit != 0)
+ {
+ value = context.ShiftRightUI(value, Const(bit));
+ }
+
+ value = context.BitwiseAnd(value, Const(1));
+
+ return value;
+ }
+
+ SetFlag(context, PState.VFlag, Extract(nzcv, 0));
+ SetFlag(context, PState.CFlag, Extract(nzcv, 1));
+ SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
+ SetFlag(context, PState.NFlag, Extract(nzcv, 3));
+ }
+
+ private static void EmitCmpOp(ArmEmitterContext context, Func2I emitCmp, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ me = EmitVectorExtractSx(context, binOp.Rm, index, op.Size);
+ }
+ else
+ {
+ me = Const(0L);
+ }
+
+ Operand isTrue = emitCmp(ne, me);
+
+ Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+ res = EmitVectorInsert(context, res, mask, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitCmtstOp(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand test = context.BitwiseAnd(ne, me);
+
+ Operand isTrue = context.ICompareNotEqual(test, Const(0L));
+
+ Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+ res = EmitVectorInsert(context, res, mask, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitCmpOpF(
+ ArmEmitterContext context,
+ _F32_F32_F32 f32,
+ _F64_F64_F64 f64,
+ bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeF + 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me;
+
+ if (op is OpCodeSimdReg binOp)
+ {
+ me = context.VectorExtract(type, GetVec(binOp.Rm), index);
+ }
+ else
+ {
+ me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
+ }
+
+ Operand e = EmitSoftFloatCall(context, f32, f64, ne, me);
+
+ res = context.VectorInsert(res, e, index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private enum CmpCondition
+ {
+ Equal = 0,
+ GreaterThanOrEqual = 5,
+ GreaterThan = 6
+ }
+
+ private static void EmitCmpSseOrSse2OpF(
+ ArmEmitterContext context,
+ CmpCondition cond,
+ bool scalar,
+ bool isLeOrLt = false)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = op is OpCodeSimdReg binOp ? GetVec(binOp.Rm) : context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
+
+ Operand res = isLeOrLt
+ ? context.AddIntrinsic(inst, m, n, Const((int)cond))
+ : context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
+
+ Operand res = isLeOrLt
+ ? context.AddIntrinsic(inst, m, n, Const((int)cond))
+ : context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdCrypto.cs b/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
new file mode 100644
index 000000000..2b61fadac
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
@@ -0,0 +1,49 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Aesd_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n));
+ }
+
+ public static void Aese_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n));
+ }
+
+ public static void Aesimc_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n));
+ }
+
+ public static void Aesmc_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.MixColumns), n));
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
new file mode 100644
index 000000000..012bfcce2
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
@@ -0,0 +1,1166 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func;
+
+ static partial class InstEmit
+ {
+ public static void Fcvt_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (op.Size == 0 && op.Opc == 1) // Single -> Double.
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ Operand res = context.ConvertToFP(OperandType.FP64, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 1 && op.Opc == 0) // Double -> Single.
+ {
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
+
+ Operand res = context.ConvertToFP(OperandType.FP32, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+ else if (op.Size == 0 && op.Opc == 3) // Single -> Half.
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+ Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert);
+
+ Operand res = context.Call(dlg, ne);
+
+ res = context.ZeroExtend16(OperandType.I64, res);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
+ }
+ else if (op.Size == 3 && op.Opc == 0) // Half -> Single.
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
+
+ Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert);
+
+ Operand res = context.Call(dlg, ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
+ {
+ throw new NotImplementedException("Double-precision to half-precision.");
+ }
+ else if (op.Size == 3 && op.Opc == 1) // Double -> Half.
+ {
+ throw new NotImplementedException("Half-precision to double-precision.");
+ }
+ else // Invalid encoding.
+ {
+ Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}");
+ }
+ }
+
+ public static void Fcvtas_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
+
+ public static void Fcvtau_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+ }
+
+ public static void Fcvtl_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 1)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand res;
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movhlps, n, n);
+ }
+ else
+ {
+ res = n;
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = 4 >> sizeF;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ if (sizeF == 0)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);
+
+ Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert);
+
+ Operand e = context.Call(dlg, ne);
+
+ res = context.VectorInsert(res, e, index);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);
+
+ Operand e = context.ConvertToFP(OperandType.FP64, ne);
+
+ res = context.VectorInsert(res, e, index);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcvtms_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1));
+ }
+
+ public static void Fcvtmu_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1));
+ }
+
+ public static void Fcvtn_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 1)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, n);
+
+ nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, res, nInt);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int elems = 4 >> sizeF;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ if (sizeF == 0)
+ {
+ Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert);
+
+ Operand e = context.Call(dlg, ne);
+
+ e = context.ZeroExtend16(OperandType.I64, e);
+
+ res = EmitVectorInsert(context, res, e, part + index, 1);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand e = context.ConvertToFP(OperandType.FP32, ne);
+
+ res = context.VectorInsert(res, e, part + index);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcvtns_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtns_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtnu_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: true);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtnu_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: false);
+ }
+ else
+ {
+ EmitFcvtn(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtps_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1));
+ }
+
+ public static void Fcvtpu_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1));
+ }
+
+ public static void Fcvtzs_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_s_Gp(context, (op1) => op1);
+ }
+
+ public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtzs_Gp_Fixed(context);
+ }
+
+ public static void Fcvtzs_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: true);
+ }
+ }
+
+ public static void Fcvtzs_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: true, scalar: false);
+ }
+ }
+
+ public static void Fcvtzu_Gp(ArmEmitterContext context)
+ {
+ EmitFcvt_u_Gp(context, (op1) => op1);
+ }
+
+ public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtzu_Gp_Fixed(context);
+ }
+
+ public static void Fcvtzu_S(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: true);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: true);
+ }
+ }
+
+ public static void Fcvtzu_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false);
+ }
+ else
+ {
+ EmitFcvtz(context, signed: false, scalar: false);
+ }
+ }
+
+ public static void Scvtf_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ res = context.SignExtend32(OperandType.I64, res);
+ }
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ res = context.SignExtend32(OperandType.I64, res);
+ }
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ res = EmitI2fFBitsMul(context, res, op.FBits);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Scvtf_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Scvtf(context, scalar: true);
+ }
+ else
+ {
+ Operand res = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2);
+
+ res = EmitFPConvert(context, res, op.Size, signed: true);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Scvtf_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Scvtf(context, scalar: false);
+ }
+ else
+ {
+ EmitVectorCvtf(context, signed: true);
+ }
+ }
+
+ public static void Scvtf_V_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Scvtf(context, scalar: false);
+ }
+ else
+ {
+ EmitVectorCvtf(context, signed: true);
+ }
+ }
+
+ public static void Ucvtf_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = EmitFPConvert(context, res, op.Size, signed: false);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ Operand res = GetIntOrZR(context, op.Rn);
+
+ res = EmitFPConvert(context, res, op.Size, signed: false);
+
+ res = EmitI2fFBitsMul(context, res, op.FBits);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+
+ public static void Ucvtf_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Ucvtf(context, scalar: true);
+ }
+ else
+ {
+ Operand ne = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2);
+
+ Operand res = EmitFPConvert(context, ne, sizeF, signed: false);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+ }
+ }
+
+ public static void Ucvtf_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Ucvtf(context, scalar: false);
+ }
+ else
+ {
+ EmitVectorCvtf(context, signed: false);
+ }
+ }
+
+ public static void Ucvtf_V_Fixed(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.UseSse2 && sizeF == 0)
+ {
+ EmitSse2Ucvtf(context, scalar: false);
+ }
+ else
+ {
+ EmitVectorCvtf(context, signed: false);
+ }
+ }
+
+ private static void EmitFcvtn(ArmEmitterContext context, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand n = GetVec(op.Rn);
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, n, index);
+
+ Operand e = EmitRoundMathCall(context, MidpointRounding.ToEven, ne);
+
+ if (sizeF == 0)
+ {
+ Delegate dlg = signed
+ ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
+ : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32);
+
+ e = context.Call(dlg, e);
+
+ e = context.ZeroExtend32(OperandType.I64, e);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Delegate dlg = signed
+ ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64)
+ : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
+
+ e = context.Call(dlg, e);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, sizeI);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand n = GetVec(op.Rn);
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ int fBits = GetFBits(context);
+
+ int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, n, index);
+
+ Operand e = EmitF2iFBitsMul(context, ne, fBits);
+
+ if (sizeF == 0)
+ {
+ Delegate dlg = signed
+ ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
+ : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32);
+
+ e = context.Call(dlg, e);
+
+ e = context.ZeroExtend32(OperandType.I64, e);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Delegate dlg = signed
+ ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64)
+ : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
+
+ e = context.Call(dlg, e);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, sizeI);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit)
+ {
+ EmitFcvt___Gp(context, emit, signed: true);
+ }
+
+ private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit)
+ {
+ EmitFcvt___Gp(context, emit, signed: false);
+ }
+
+ private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ Operand res = signed
+ ? EmitScalarFcvts(context, emit(ne), 0)
+ : EmitScalarFcvtu(context, emit(ne), 0);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtz__Gp_Fixed(context, signed: true);
+ }
+
+ private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context)
+ {
+ EmitFcvtz__Gp_Fixed(context, signed: false);
+ }
+
+ private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed)
+ {
+ OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ Operand res = signed
+ ? EmitScalarFcvts(context, ne, op.FBits)
+ : EmitScalarFcvtu(context, ne, op.FBits);
+
+ SetIntOrZR(context, op.Rd, res);
+ }
+
+ private static void EmitVectorCvtf(ArmEmitterContext context, bool signed)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+ int sizeI = sizeF + 2;
+
+ int fBits = GetFBits(context);
+
+ int elems = op.GetBytesCount() >> sizeI;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI);
+
+ Operand e = EmitFPConvert(context, ne, sizeF, signed);
+
+ e = EmitI2fFBitsMul(context, e, fBits);
+
+ res = context.VectorInsert(res, e, index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static int GetFBits(ArmEmitterContext context)
+ {
+ if (context.CurrOp is OpCodeSimdShImm op)
+ {
+ return GetImmShr(op);
+ }
+
+ return 0;
+ }
+
+ private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed)
+ {
+ Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
+ Debug.Assert((uint)size < 2);
+
+ OperandType type = size == 0 ? OperandType.FP32
+ : OperandType.FP64;
+
+ if (signed)
+ {
+ return context.ConvertToFP(type, value);
+ }
+ else
+ {
+ return context.ConvertToFPUI(type, value);
+ }
+ }
+
+ private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ value = EmitF2iFBitsMul(context, value, fBits);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ Delegate dlg = value.Type == OperandType.FP32
+ ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
+ : (Delegate)new _S32_F64(SoftFallback.SatF64ToS32);
+
+ return context.Call(dlg, value);
+ }
+ else
+ {
+ Delegate dlg = value.Type == OperandType.FP32
+ ? (Delegate)new _S64_F32(SoftFallback.SatF32ToS64)
+ : (Delegate)new _S64_F64(SoftFallback.SatF64ToS64);
+
+ return context.Call(dlg, value);
+ }
+ }
+
+ private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ value = EmitF2iFBitsMul(context, value, fBits);
+
+ if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+ {
+ Delegate dlg = value.Type == OperandType.FP32
+ ? (Delegate)new _U32_F32(SoftFallback.SatF32ToU32)
+ : (Delegate)new _U32_F64(SoftFallback.SatF64ToU32);
+
+ return context.Call(dlg, value);
+ }
+ else
+ {
+ Delegate dlg = value.Type == OperandType.FP32
+ ? (Delegate)new _U64_F32(SoftFallback.SatF32ToU64)
+ : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
+
+ return context.Call(dlg, value);
+ }
+ }
+
+ private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ if (fBits == 0)
+ {
+ return value;
+ }
+
+ if (value.Type == OperandType.FP32)
+ {
+ return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
+ }
+ else /* if (value.Type == OperandType.FP64) */
+ {
+ return context.Multiply(value, ConstF(Math.Pow(2d, fBits)));
+ }
+ }
+
+ private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+ {
+ Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+ if (fBits == 0)
+ {
+ return value;
+ }
+
+ if (value.Type == OperandType.FP32)
+ {
+ return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits)));
+ }
+ else /* if (value.Type == OperandType.FP64) */
+ {
+ return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits)));
+ }
+ }
+
+ private static void EmitSse41Fcvts(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ const int cmpGreaterThanOrEqual = 5;
+ const int cmpOrdered = 7;
+
+ // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered));
+
+ Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale);
+ }
+
+ Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode)));
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRnd);
+
+ Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, mask, Const(cmpGreaterThanOrEqual));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered));
+
+ Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale);
+ }
+
+ Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode)));
+
+ Operand high;
+
+ if (!scalar)
+ {
+ high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRnd, nRnd);
+ high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
+ }
+ else
+ {
+ high = Const(0L);
+ }
+
+ Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRnd);
+
+ Operand nInt = EmitVectorLongCreate(context, low, high);
+
+ Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, mask, Const(cmpGreaterThanOrEqual));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSse41Fcvtu(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ const int cmpGreaterThanOrEqual = 5;
+ const int cmpGreaterThan = 6;
+ const int cmpOrdered = 7;
+
+ // sizeF == ((OpCodeSimdShImm)op).Size - 2
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered));
+
+ Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale);
+ }
+
+ Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode)));
+
+ Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, context.VectorZero(), Const(cmpGreaterThan));
+
+ Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRndMasked);
+
+ Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subps, nRndMasked, mask);
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, res, context.VectorZero(), Const(cmpGreaterThan));
+
+ Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, resMasked);
+
+ Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmpps, resMasked, mask, Const(cmpGreaterThanOrEqual));
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
+ res = context.AddIntrinsic(Intrinsic.X86Paddd, res, nInt);
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered));
+
+ Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+ long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale);
+ }
+
+ Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode)));
+
+ Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, context.VectorZero(), Const(cmpGreaterThan));
+
+ Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
+
+ Operand high;
+
+ if (!scalar)
+ {
+ high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRndMasked, nRndMasked);
+ high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
+ }
+ else
+ {
+ high = Const(0L);
+ }
+
+ Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRndMasked);
+
+ Operand nInt = EmitVectorLongCreate(context, low, high);
+
+ Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, nRndMasked, mask);
+
+ Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, res, context.VectorZero(), Const(cmpGreaterThan));
+
+ Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
+
+ if (!scalar)
+ {
+ high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, resMasked, resMasked);
+ high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
+ }
+
+ low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, resMasked);
+
+ res = EmitVectorLongCreate(context, low, high);
+
+ Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmppd, resMasked, mask, Const(cmpGreaterThanOrEqual));
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
+ res = context.AddIntrinsic(Intrinsic.X86Paddq, res, nInt);
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitSse2Scvtf(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitSse2Ucvtf(ArmEmitterContext context, bool scalar)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
+
+ res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
+
+ Operand mask = X86GetAllElements(context, 0x47800000); // 65536.0f (1 << 16)
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
+
+ res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
+
+ if (op is OpCodeSimdShImm fixedOp)
+ {
+ int fBits = GetImmShr(fixedOp);
+
+ // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+ int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+ Operand scale = X86GetAllElements(context, fpScaled);
+
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale);
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size)
+ {
+ OperandType type = size == 3 ? OperandType.I64 : OperandType.I32;
+
+ return context.VectorExtract(type, GetVec(reg), index);
+ }
+
+ private static Operand EmitVectorLongCreate(ArmEmitterContext context, Operand low, Operand high)
+ {
+ Operand vector = context.VectorCreateScalar(low);
+
+ vector = context.VectorInsert(vector, high, 1);
+
+ return vector;
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdHash.cs b/ARMeilleure/Instructions/InstEmitSimdHash.cs
new file mode 100644
index 000000000..4ed960612
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdHash.cs
@@ -0,0 +1,147 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+#region "Sha1"
+ public static void Sha1c_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashChoose), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1h_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand res = context.Call(new _U32_U32(SoftFallback.FixedRotate), ne);
+
+ context.Copy(GetVec(op.Rd), context.VectorCreateScalar(res));
+ }
+
+ public static void Sha1m_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashMajority), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1p_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+
+ Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashParity), d, ne, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1su0_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.Sha1SchedulePart1), d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha1su1_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.Call(new _V128_V128_V128(SoftFallback.Sha1SchedulePart2), d, n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+#endregion
+
+#region "Sha256"
+ public static void Sha256h_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.HashLower), d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256h2_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.HashUpper), d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256su0_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand res = context.Call(new _V128_V128_V128(SoftFallback.Sha256SchedulePart1), d, n);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sha256su1_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.Sha256SchedulePart2), d, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+#endregion
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
new file mode 100644
index 000000000..a3da80fb0
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -0,0 +1,1477 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ using Func1I = Func;
+ using Func2I = Func;
+ using Func3I = Func;
+
+ static class InstEmitSimdHelper
+ {
+#region "X86 SSE Intrinsics"
+ public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Paddb,
+ Intrinsic.X86Paddw,
+ Intrinsic.X86Paddd,
+ Intrinsic.X86Paddq
+ };
+
+ public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pcmpeqb,
+ Intrinsic.X86Pcmpeqw,
+ Intrinsic.X86Pcmpeqd,
+ Intrinsic.X86Pcmpeqq
+ };
+
+ public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pcmpgtb,
+ Intrinsic.X86Pcmpgtw,
+ Intrinsic.X86Pcmpgtd,
+ Intrinsic.X86Pcmpgtq
+ };
+
+ public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmaxsb,
+ Intrinsic.X86Pmaxsw,
+ Intrinsic.X86Pmaxsd
+ };
+
+ public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmaxub,
+ Intrinsic.X86Pmaxuw,
+ Intrinsic.X86Pmaxud
+ };
+
+ public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pminsb,
+ Intrinsic.X86Pminsw,
+ Intrinsic.X86Pminsd
+ };
+
+ public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pminub,
+ Intrinsic.X86Pminuw,
+ Intrinsic.X86Pminud
+ };
+
+ public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmovsxbw,
+ Intrinsic.X86Pmovsxwd,
+ Intrinsic.X86Pmovsxdq
+ };
+
+ public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Pmovzxbw,
+ Intrinsic.X86Pmovzxwd,
+ Intrinsic.X86Pmovzxdq
+ };
+
+ public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psllw,
+ Intrinsic.X86Pslld,
+ Intrinsic.X86Psllq
+ };
+
+ public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psraw,
+ Intrinsic.X86Psrad
+ };
+
+ public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
+ {
+ 0,
+ Intrinsic.X86Psrlw,
+ Intrinsic.X86Psrld,
+ Intrinsic.X86Psrlq
+ };
+
+ public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Psubb,
+ Intrinsic.X86Psubw,
+ Intrinsic.X86Psubd,
+ Intrinsic.X86Psubq
+ };
+
+ public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Punpckhbw,
+ Intrinsic.X86Punpckhwd,
+ Intrinsic.X86Punpckhdq,
+ Intrinsic.X86Punpckhqdq
+ };
+
+ public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
+ {
+ Intrinsic.X86Punpcklbw,
+ Intrinsic.X86Punpcklwd,
+ Intrinsic.X86Punpckldq,
+ Intrinsic.X86Punpcklqdq
+ };
+#endregion
+
+ public static int GetImmShl(OpCodeSimdShImm op)
+ {
+ return op.Imm - (8 << op.Size);
+ }
+
+ public static int GetImmShr(OpCodeSimdShImm op)
+ {
+ return (8 << (op.Size + 1)) - op.Imm;
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, float value)
+ {
+ return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, double value)
+ {
+ return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, int value)
+ {
+ return context.VectorCreateScalar(Const(value));
+ }
+
+ public static Operand X86GetScalar(ArmEmitterContext context, long value)
+ {
+ return context.VectorCreateScalar(Const(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, float value)
+ {
+ return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, double value)
+ {
+ return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, int value)
+ {
+ Operand vector = context.VectorCreateScalar(Const(value));
+
+ vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));
+
+ return vector;
+ }
+
+ public static Operand X86GetAllElements(ArmEmitterContext context, long value)
+ {
+ Operand vector = context.VectorCreateScalar(Const(value));
+
+ vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);
+
+ return vector;
+ }
+
+ public static int X86GetRoundControl(FPRoundingMode roundMode)
+ {
+ switch (roundMode)
+ {
+ case FPRoundingMode.ToNearest: return 8 | 0;
+ case FPRoundingMode.TowardsPlusInfinity: return 8 | 2;
+ case FPRoundingMode.TowardsMinusInfinity: return 8 | 1;
+ case FPRoundingMode.TowardsZero: return 8 | 3;
+ }
+
+ throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
+ }
+
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n);
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n, m);
+
+ if ((op.Size & 1) != 0)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+ else
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ Operand res = context.AddIntrinsic(inst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static Operand EmitUnaryMathCall(ArmEmitterContext context, _F32_F32 f32, _F64_F64 f64, Operand n)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ return (op.Size & 1) == 0 ? context.Call(f32, n) : context.Call(f64, n);
+ }
+
+ public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Delegate dlg;
+
+ if ((op.Size & 1) == 0)
+ {
+ dlg = new _F32_F32_MidpointRounding(MathF.Round);
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ dlg = new _F64_F64_MidpointRounding(Math.Round);
+ }
+
+ return context.Call(dlg, n, Const((int)roundMode));
+ }
+
+ public static Operand EmitSoftFloatCall(
+ ArmEmitterContext context,
+ _F32_F32 f32,
+ _F64_F64 f64,
+ params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64;
+
+ return context.Call(dlg, callArgs);
+ }
+
+ public static Operand EmitSoftFloatCall(
+ ArmEmitterContext context,
+ _F32_F32_F32 f32,
+ _F64_F64_F64 f64,
+ params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64;
+
+ return context.Call(dlg, callArgs);
+ }
+
+ public static Operand EmitSoftFloatCall(
+ ArmEmitterContext context,
+ _F32_F32_F32_F32 f32,
+ _F64_F64_F64_F64 f64,
+ params Operand[] callArgs)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64;
+
+ return context.Call(dlg, callArgs);
+ }
+
+ public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+ }
+
+ public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
+ }
+
+ public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
+ Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+ Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+ d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
+ }
+
+ public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+ }
+
+ public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
+ Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+ Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
+ }
+
+ public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+
+ res = context.VectorInsert(res, emit(ne), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+ res = context.VectorInsert(res, emit(ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ res = context.VectorInsert(res, emit(ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int elems = op.GetBytesCount() >> sizeF + 2;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+ res = context.VectorInsert(res, emit(de, ne, me), index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand imm = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRmBinaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRmBinaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);;
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
+ }
+
+ public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+ {
+ EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
+ }
+
+ private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);;
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorPairwiseOp(context, emit, signed: true);
+ }
+
+ public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorPairwiseOp(context, emit, signed: false);
+ }
+
+ private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
+ Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+ Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed);
+ Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size);
+ res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
+ }
+
+ public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
+ }
+
+ public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
+ }
+
+ public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+ {
+ EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
+ }
+
+ private static void EmitVectorAcrossVectorOp(
+ ArmEmitterContext context,
+ Func2I emit,
+ bool signed,
+ bool isLong)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
+
+ for (int index = 1; index < elems; index++)
+ {
+ Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ res = emit(res, n);
+ }
+
+ int size = isLong ? op.Size + 1 : op.Size;
+
+ Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
+
+ context.Copy(GetVec(op.Rd), d);
+ }
+
+ public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int sizeF = op.Size & 1;
+
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+ int pairs = op.GetPairsCount() >> sizeF + 2;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
+ Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);
+
+ Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
+ Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);
+
+ res = context.VectorInsert(res, emit(n0, n1), index);
+ res = context.VectorInsert(res, emit(m0, m1), pairs + index);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m);
+
+ Operand zero = context.VectorZero();
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
+ }
+ else /* if (op.RegisterSize == RegisterSize.Simd128) */
+ {
+ const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
+ const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm0));
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm1));
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, n, m);
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, n, m);
+
+ context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst64, part0, part1));
+ }
+ }
+
+
+ [Flags]
+ public enum SaturatingFlags
+ {
+ Scalar = 1 << 0,
+ Signed = 1 << 1,
+
+ Add = 1 << 2,
+ Sub = 1 << 3,
+
+ Accumulate = 1 << 4,
+
+ ScalarSx = Scalar | Signed,
+ ScalarZx = Scalar,
+
+ VectorSx = Signed,
+ VectorZx = 0
+ }
+
+ public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.ScalarSx);
+ }
+
+ public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+ {
+ EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx);
+ }
+
+ private static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand de;
+
+ if (op.Size <= 2)
+ {
+ de = EmitSatQ(context, emit(ne), op.Size, signedSrc: true, signedDst: true);
+ }
+ else /* if (op.Size == 3) */
+ {
+ de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarSx | flags);
+ }
+
+ public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarZx | flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorSx | flags);
+ }
+
+ public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+ {
+ EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorZx | flags);
+ }
+
+ public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+ bool signed = (flags & SaturatingFlags.Signed) != 0;
+
+ bool add = (flags & SaturatingFlags.Add) != 0;
+ bool sub = (flags & SaturatingFlags.Sub) != 0;
+
+ bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ if (add || sub)
+ {
+ OpCodeSimdReg opReg = (OpCodeSimdReg)op;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ Operand temp = add ? context.Add (ne, me)
+ : context.Subtract(ne, me);
+
+ de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed);
+ }
+ else if (add) /* if (op.Size == 3) */
+ {
+ de = EmitBinarySatQAdd(context, ne, me, signed);
+ }
+ else /* if (sub) */
+ {
+ de = EmitBinarySatQSub(context, ne, me, signed);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+ else if (accumulate)
+ {
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de;
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
+ Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ Operand temp = context.Add(ne, me);
+
+ de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed);
+ }
+ else /* if (op.Size == 3) */
+ {
+ de = EmitBinarySatQAccumulate(context, ne, me, signed);
+ }
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+ else
+ {
+ OpCodeSimdReg opReg = (OpCodeSimdReg)op;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed);
+ Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed);
+
+ Operand de = EmitSatQ(context, emit(ne, me), op.Size, true, signed);
+
+ res = EmitVectorInsert(context, res, de, index, op.Size);
+ }
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ [Flags]
+ public enum SaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0
+ }
+
+ public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
+
+ int elems = !scalar ? 8 >> op.Size : 1;
+
+ int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+ Operand temp = EmitSatQ(context, ne, op.Size, signedSrc, signedDst);
+
+ res = EmitVectorInsert(context, res, temp, part + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
+ public static Operand EmitSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedSrc, bool signedDst)
+ {
+ if ((uint)sizeDst > 2u)
+ {
+ throw new ArgumentOutOfRangeException(nameof(sizeDst));
+ }
+
+ Delegate dlg;
+
+ if (signedSrc)
+ {
+ dlg = signedDst
+ ? (Delegate)new _S64_S64_S32(SoftFallback.SignedSrcSignedDstSatQ)
+ : (Delegate)new _U64_S64_S32(SoftFallback.SignedSrcUnsignedDstSatQ);
+ }
+ else
+ {
+ dlg = signedDst
+ ? (Delegate)new _S64_U64_S32(SoftFallback.UnsignedSrcSignedDstSatQ)
+ : (Delegate)new _U64_U64_S32(SoftFallback.UnsignedSrcUnsignedDstSatQ);
+ }
+
+ return context.Call(dlg, op, Const(sizeDst));
+ }
+
+ // TSrc (64bit) == TDst (64bit); signed.
+ public static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
+ {
+ Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+ return context.Call(new _S64_S64(SoftFallback.UnarySignedSatQAbsOrNeg), op);
+ }
+
+ // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+ public static Operand EmitBinarySatQAdd(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+ Delegate dlg = signed
+ ? (Delegate)new _S64_S64_S64(SoftFallback.BinarySignedSatQAdd)
+ : (Delegate)new _U64_U64_U64(SoftFallback.BinaryUnsignedSatQAdd);
+
+ return context.Call(dlg, op1, op2);
+ }
+
+ // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+ public static Operand EmitBinarySatQSub(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+ Delegate dlg = signed
+ ? (Delegate)new _S64_S64_S64(SoftFallback.BinarySignedSatQSub)
+ : (Delegate)new _U64_U64_U64(SoftFallback.BinaryUnsignedSatQSub);
+
+ return context.Call(dlg, op1, op2);
+ }
+
+ // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+ public static Operand EmitBinarySatQAccumulate(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+ {
+ Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+ Delegate dlg = signed
+ ? (Delegate)new _S64_U64_S64(SoftFallback.BinarySignedSatQAcc)
+ : (Delegate)new _U64_S64_U64(SoftFallback.BinaryUnsignedSatQAcc);
+
+ return context.Call(dlg, op1, op2);
+ }
+
+ public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract(context, reg, index, size, true);
+ }
+
+ public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
+ {
+ return EmitVectorExtract(context, reg, index, size, false);
+ }
+
+ public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
+ {
+ ThrowIfInvalid(index, size);
+
+ Operand res = null;
+
+ switch (size)
+ {
+ case 0:
+ res = context.VectorExtract8(GetVec(reg), index);
+ break;
+
+ case 1:
+ res = context.VectorExtract16(GetVec(reg), index);
+ break;
+
+ case 2:
+ res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
+ break;
+
+ case 3:
+ res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
+ break;
+ }
+
+ if (signed)
+ {
+ switch (size)
+ {
+ case 0: res = context.SignExtend8 (OperandType.I64, res); break;
+ case 1: res = context.SignExtend16(OperandType.I64, res); break;
+ case 2: res = context.SignExtend32(OperandType.I64, res); break;
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 0: res = context.ZeroExtend8 (OperandType.I64, res); break;
+ case 1: res = context.ZeroExtend16(OperandType.I64, res); break;
+ case 2: res = context.ZeroExtend32(OperandType.I64, res); break;
+ }
+ }
+
+ return res;
+ }
+
+ public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
+ {
+ ThrowIfInvalid(index, size);
+
+ if (size < 3)
+ {
+ value = context.ConvertI64ToI32(value);
+ }
+
+ switch (size)
+ {
+ case 0: vector = context.VectorInsert8 (vector, value, index); break;
+ case 1: vector = context.VectorInsert16(vector, value, index); break;
+ case 2: vector = context.VectorInsert (vector, value, index); break;
+ case 3: vector = context.VectorInsert (vector, value, index); break;
+ }
+
+ return vector;
+ }
+
+ private static void ThrowIfInvalid(int index, int size)
+ {
+ if ((uint)size > 3u)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if ((uint)index >= 16u >> size)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
new file mode 100644
index 000000000..551752d24
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
@@ -0,0 +1,456 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void And_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2));
+ }
+ }
+
+ public static void Bic_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Bic_Vi(ArmEmitterContext context)
+ {
+ EmitVectorImmBinaryOp(context, (op1, op2) =>
+ {
+ return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+ });
+ }
+
+ public static void Bif_V(ArmEmitterContext context)
+ {
+ EmitBifBit(context, notRm: true);
+ }
+
+ public static void Bit_V(ArmEmitterContext context)
+ {
+ EmitBifBit(context, notRm: false);
+ }
+
+ private static void EmitBifBit(ArmEmitterContext context, bool notRm)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
+
+ if (notRm)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res);
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Pand, m, res);
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand d = EmitVectorExtractZx(context, op.Rd, index, 3);
+ Operand n = EmitVectorExtractZx(context, op.Rn, index, 3);
+ Operand m = EmitVectorExtractZx(context, op.Rm, index, 3);
+
+ if (notRm)
+ {
+ m = context.BitwiseNot(m);
+ }
+
+ Operand e = context.BitwiseExclusiveOr(d, n);
+
+ e = context.BitwiseAnd(e, m);
+ e = context.BitwiseExclusiveOr(e, d);
+
+ res = EmitVectorInsert(context, res, e, index, 3);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Bsl_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
+ res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+ {
+ return context.BitwiseExclusiveOr(
+ context.BitwiseAnd(op1,
+ context.BitwiseExclusiveOr(op2, op3)), op3);
+ });
+ }
+ }
+
+ public static void Eor_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2));
+ }
+ }
+
+ public static void Not_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Orn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, -1L);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, n);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) =>
+ {
+ return context.BitwiseOr(op1, context.BitwiseNot(op2));
+ });
+ }
+ }
+
+ public static void Orr_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+ }
+
+ public static void Orr_Vi(ArmEmitterContext context)
+ {
+ EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2));
+ }
+
+ public static void Rbit_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+ ne = context.ConvertI64ToI32(ne);
+
+ Operand de = context.Call(new _U32_U32(SoftFallback.ReverseBits8), ne);
+
+ de = context.ZeroExtend32(OperandType.I64, de);
+
+ res = EmitVectorInsert(context, res, de, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Rev16_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ const long maskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0;
+ const long maskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0;
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 1);
+ }
+ }
+
+ public static void Rev32_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask;
+
+ if (op.Size == 0)
+ {
+ const long maskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0;
+ const long maskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else /* if (op.Size == 1) */
+ {
+ const long maskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0;
+ const long maskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 2);
+ }
+ }
+
+ public static void Rev64_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask;
+
+ if (op.Size == 0)
+ {
+ const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0;
+ const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else if (op.Size == 1)
+ {
+ const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0;
+ const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+ else /* if (op.Size == 2) */
+ {
+ const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0;
+ const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0;
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitRev_V(context, containerSize: 3);
+ }
+ }
+
+ private static void EmitRev_V(ArmEmitterContext context, int containerSize)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int containerMask = (1 << (containerSize - op.Size)) - 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ int revIndex = index ^ containerMask;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdMemory.cs b/ARMeilleure/Instructions/InstEmitSimdMemory.cs
new file mode 100644
index 000000000..22e9ef7a8
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdMemory.cs
@@ -0,0 +1,160 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Ld__Vms(ArmEmitterContext context)
+ {
+ EmitSimdMemMs(context, isLoad: true);
+ }
+
+ public static void Ld__Vss(ArmEmitterContext context)
+ {
+ EmitSimdMemSs(context, isLoad: true);
+ }
+
+ public static void St__Vms(ArmEmitterContext context)
+ {
+ EmitSimdMemMs(context, isLoad: false);
+ }
+
+ public static void St__Vss(ArmEmitterContext context)
+ {
+ EmitSimdMemSs(context, isLoad: false);
+ }
+
+ private static void EmitSimdMemMs(ArmEmitterContext context, bool isLoad)
+ {
+ OpCodeSimdMemMs op = (OpCodeSimdMemMs)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ long offset = 0;
+
+ for (int rep = 0; rep < op.Reps; rep++)
+ for (int elem = 0; elem < op.Elems; elem++)
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rtt = (op.Rt + rep + sElem) & 0x1f;
+
+ Operand tt = GetVec(rtt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ if (isLoad)
+ {
+ EmitLoadSimd(context, address, tt, rtt, elem, op.Size);
+
+ if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1)
+ {
+ context.Copy(tt, context.VectorZeroUpper64(tt));
+ }
+ }
+ else
+ {
+ EmitStoreSimd(context, address, rtt, elem, op.Size);
+ }
+
+ offset += 1 << op.Size;
+ }
+
+ if (op.WBack)
+ {
+ EmitSimdMemWBack(context, offset);
+ }
+ }
+
+ private static void EmitSimdMemSs(ArmEmitterContext context, bool isLoad)
+ {
+ OpCodeSimdMemSs op = (OpCodeSimdMemSs)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+
+ long offset = 0;
+
+ if (op.Replicate)
+ {
+ // Only loads uses the replicate mode.
+ Debug.Assert(isLoad, "Replicate mode is not valid for stores.");
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rt = (op.Rt + sElem) & 0x1f;
+
+ Operand t = GetVec(rt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ for (int index = 0; index < elems; index++)
+ {
+ EmitLoadSimd(context, address, t, rt, index, op.Size);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ context.Copy(t, context.VectorZeroUpper64(t));
+ }
+
+ offset += 1 << op.Size;
+ }
+ }
+ else
+ {
+ for (int sElem = 0; sElem < op.SElems; sElem++)
+ {
+ int rt = (op.Rt + sElem) & 0x1f;
+
+ Operand t = GetVec(rt);
+
+ Operand address = context.Add(n, Const(offset));
+
+ if (isLoad)
+ {
+ EmitLoadSimd(context, address, t, rt, op.Index, op.Size);
+ }
+ else
+ {
+ EmitStoreSimd(context, address, rt, op.Index, op.Size);
+ }
+
+ offset += 1 << op.Size;
+ }
+ }
+
+ if (op.WBack)
+ {
+ EmitSimdMemWBack(context, offset);
+ }
+ }
+
+ private static void EmitSimdMemWBack(ArmEmitterContext context, long offset)
+ {
+ OpCodeMemReg op = (OpCodeMemReg)context.CurrOp;
+
+ Operand n = GetIntOrSP(context, op.Rn);
+ Operand m;
+
+ if (op.Rm != RegisterAlias.Zr)
+ {
+ m = GetIntOrZR(context, op.Rm);
+ }
+ else
+ {
+ m = Const(offset);
+ }
+
+ context.Copy(n, context.Add(n, m));
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdMove.cs b/ARMeilleure/Instructions/InstEmitSimdMove.cs
new file mode 100644
index 000000000..47359161f
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdMove.cs
@@ -0,0 +1,794 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+#region "Masks"
+ private static readonly long[] _masksE0_TrnUzpXtn = new long[]
+ {
+ 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
+ 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
+ };
+
+ private static readonly long[] _masksE1_TrnUzp = new long[]
+ {
+ 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0,
+ 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0,
+ 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0
+ };
+
+ private static readonly long[] _masksE0_Uzp = new long[]
+ {
+ 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0
+ };
+
+ private static readonly long[] _masksE1_Uzp = new long[]
+ {
+ 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
+ 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0
+ };
+#endregion
+
+ public static void Dup_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ if (Optimizations.UseSse2)
+ {
+ switch (op.Size)
+ {
+ case 0: n = context.ZeroExtend8 (n.Type, n); n = context.Multiply(n, Const(n.Type, 0x01010101)); break;
+ case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x00010001)); break;
+ case 2: n = context.ZeroExtend32(n.Type, n); break;
+ }
+
+ Operand res = context.VectorInsert(context.VectorZero(), n, 0);
+
+ if (op.Size < 3)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0));
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ }
+ else
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, n, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Dup_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size));
+ }
+
+ public static void Dup_V(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand res = GetVec(op.Rn);
+
+ if (op.Size == 0)
+ {
+ if (op.DstIndex != 0)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex));
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ else if (op.Size == 1)
+ {
+ if (op.DstIndex != 0)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2));
+ }
+
+ res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+ }
+ else if (op.Size == 2)
+ {
+ int mask = op.DstIndex * 0b01010101;
+
+ res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask));
+ }
+ else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+ }
+ else if (op.DstIndex == 1)
+ {
+ res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Ext_V(ArmEmitterContext context)
+ {
+ OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand nShifted = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ nShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, nShifted, context.VectorZero());
+ }
+
+ nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4));
+
+ Operand mShifted = GetVec(op.Rm);
+
+ mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ mShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, mShifted, context.VectorZero());
+ }
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int bytes = op.GetBytesCount();
+
+ int position = op.Imm4 & (bytes - 1);
+
+ for (int index = 0; index < bytes; index++)
+ {
+ int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
+
+ Operand e = EmitVectorExtractZx(context, reg, position, 0);
+
+ position = (position + 1) & (bytes - 1);
+
+ res = EmitVectorInsert(context, res, e, index, 0);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Fcsel_S(ArmEmitterContext context)
+ {
+ OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+ Operand lblTrue = Label();
+ Operand lblEnd = Label();
+
+ Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond);
+
+ context.BranchIfTrue(lblTrue, isTrue);
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0));
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblTrue);
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+
+ context.MarkLabel(lblEnd);
+ }
+
+ public static void Fmov_Ftoi(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Fmov_Ftoi1(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Fmov_Itof(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2));
+ }
+
+ public static void Fmov_Itof1(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(GetVec(op.Rd), EmitVectorInsert(context, GetVec(op.Rd), n, 1, 3));
+ }
+
+ public static void Fmov_S(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+ Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+ context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+ }
+
+ public static void Fmov_Si(ArmEmitterContext context)
+ {
+ OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp;
+
+ if (op.Size == 0)
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate));
+ }
+ else
+ {
+ context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
+ }
+ }
+
+ public static void Fmov_Vi(ArmEmitterContext context)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ Operand e = Const(op.Immediate);
+
+ Operand res = context.VectorZero();
+
+ int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2;
+
+ for (int index = 0; index < (elems >> op.Size); index++)
+ {
+ res = EmitVectorInsert(context, res, e, index, op.Size + 2);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Ins_Gp(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetIntOrZR(context, op.Rn);
+
+ context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size));
+ }
+
+ public static void Ins_V(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand d = GetVec(op.Rd);
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size);
+
+ context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size));
+ }
+
+ public static void Movi_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitMoviMvni(context, not: false);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, (op1) => op1);
+ }
+ }
+
+ public static void Mvni_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSse2)
+ {
+ EmitMoviMvni(context, not: true);
+ }
+ else
+ {
+ EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1));
+ }
+ }
+
+ public static void Smov_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ ne = context.ZeroExtend32(OperandType.I64, ne);
+ }
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Tbl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL);
+
+ Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask);
+
+ mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask);
+
+ for (int index = 1; index < op.Size; index++)
+ {
+ Operand ni = GetVec((op.Rn + index) & 0x1f);
+
+ Operand indexMask = X86GetAllElements(context, 0x1010101010101010L * index);
+
+ Operand mMinusMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, indexMask);
+
+ Operand mMask2 = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mMinusMask, mask);
+
+ mMask2 = context.AddIntrinsic(Intrinsic.X86Por, mMask2, mMinusMask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask2);
+
+ res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand[] args = new Operand[1 + op.Size];
+
+ args[0] = GetVec(op.Rm);
+
+ for (int index = 0; index < op.Size; index++)
+ {
+ args[1 + index] = GetVec((op.Rn + index) & 0x1f);
+ }
+
+ Delegate dlg = null;
+
+ switch (op.Size)
+ {
+ case 1: dlg = op.RegisterSize == RegisterSize.Simd64
+ ? (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V64)
+ : (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V128); break;
+
+ case 2: dlg = op.RegisterSize == RegisterSize.Simd64
+ ? (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V64)
+ : (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V128); break;
+
+ case 3: dlg = op.RegisterSize == RegisterSize.Simd64
+ ? (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V64)
+ : (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V128); break;
+
+ case 4: dlg = op.RegisterSize == RegisterSize.Simd64
+ ? (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V64)
+ : (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V128); break;
+ }
+
+ context.Copy(GetVec(op.Rd), context.Call(dlg, args));
+ }
+ }
+
+ public static void Trn1_V(ArmEmitterContext context)
+ {
+ EmitVectorTranspose(context, part: 0);
+ }
+
+ public static void Trn2_V(ArmEmitterContext context)
+ {
+ EmitVectorTranspose(context, part: 1);
+ }
+
+ public static void Umov_S(ArmEmitterContext context)
+ {
+ OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+ SetIntOrZR(context, op.Rd, ne);
+ }
+
+ public static void Uzp1_V(ArmEmitterContext context)
+ {
+ EmitVectorUnzip(context, part: 0);
+ }
+
+ public static void Uzp2_V(ArmEmitterContext context)
+ {
+ EmitVectorUnzip(context, part: 1);
+ }
+
+ public static void Xtn_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand d = GetVec(op.Rd);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+ Operand n = GetVec(op.Rn);
+
+ Operand mask = X86GetAllElements(context, _masksE0_TrnUzpXtn[op.Size]);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, res, res2);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+ res = EmitVectorInsert(context, res, ne, part + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ public static void Zip1_V(ArmEmitterContext context)
+ {
+ EmitVectorZip(context, part: 0);
+ }
+
+ public static void Zip2_V(ArmEmitterContext context)
+ {
+ EmitVectorZip(context, part: 1);
+ }
+
+ private static void EmitMoviMvni(ArmEmitterContext context, bool not)
+ {
+ OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+ long imm = op.Immediate;
+
+ switch (op.Size)
+ {
+ case 0: imm *= 0x01010101; break;
+ case 1: imm *= 0x00010001; break;
+ }
+
+ if (not)
+ {
+ imm = ~imm;
+ }
+
+ Operand mask;
+
+ if (op.Size < 3)
+ {
+ mask = X86GetAllElements(context, (int)imm);
+ }
+ else
+ {
+ mask = X86GetAllElements(context, imm);
+ }
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ mask = context.VectorZeroUpper64(mask);
+ }
+
+ context.Copy(GetVec(op.Rd), mask);
+ }
+
+ private static void EmitVectorTranspose(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ Operand mask = null;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = _masksE0_TrnUzpXtn[op.Size];
+ long maskE1 = _masksE1_TrnUzp [op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size < 3)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+ }
+
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size < 3)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? X86PunpcklInstruction[op.Size]
+ : X86PunpckhInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
+ res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitVectorUnzip(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSsse3)
+ {
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Operand mask = null;
+
+ if (op.Size < 3)
+ {
+ long maskE0 = _masksE0_TrnUzpXtn[op.Size];
+ long maskE1 = _masksE1_TrnUzp [op.Size];
+
+ mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+ }
+
+ Operand n = GetVec(op.Rn);
+
+ if (op.Size < 3)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+ }
+
+ Operand m = GetVec(op.Rm);
+
+ if (op.Size < 3)
+ {
+ m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ Intrinsic punpcklInst = X86PunpcklInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpcklInst, n, m);
+
+ if (op.Size < 2)
+ {
+ long maskE0 = _masksE0_Uzp[op.Size];
+ long maskE1 = _masksE1_Uzp[op.Size];
+
+ Operand mask = X86GetScalar(context, maskE0);
+
+ mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask);
+ }
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int idx = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, index, op.Size);
+ res = EmitVectorInsert(context, res, me, pairs + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+
+ private static void EmitVectorZip(ArmEmitterContext context, int part)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ Operand n = GetVec(op.Rn);
+ Operand m = GetVec(op.Rm);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ Intrinsic punpckInst = part == 0
+ ? X86PunpcklInstruction[op.Size]
+ : X86PunpckhInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m);
+
+ Intrinsic punpckInst = part == 0
+ ? Intrinsic.X86Punpcklqdq
+ : Intrinsic.X86Punpckhqdq;
+
+ res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ else
+ {
+ Operand res = context.VectorZero();
+
+ int pairs = op.GetPairsCount() >> op.Size;
+
+ int baseIndex = part != 0 ? pairs : 0;
+
+ for (int index = 0; index < pairs; index++)
+ {
+ int pairIndex = index << 1;
+
+ Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size);
+
+ res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
+ res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs
new file mode 100644
index 000000000..1aae491df
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs
@@ -0,0 +1,1057 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ using Func2I = Func;
+
+ static partial class InstEmit
+ {
+#region "Masks"
+ private static readonly long[] _masks_RshrnShrn = new long[]
+ {
+ 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
+ 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
+ };
+#endregion
+
+ public static void Rshrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+ Operand mask = null;
+
+ switch (op.Size + 1)
+ {
+ case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break;
+ case 2: mask = X86GetAllElements(context, (int)roundConst); break;
+ case 3: mask = X86GetAllElements(context, roundConst); break;
+ }
+
+ Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+ Operand res = context.AddIntrinsic(addInst, n, mask);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(srlInst, res, Const(shift));
+
+ Operand mask2 = X86GetAllElements(context, _masks_RshrnShrn[op.Size]);
+
+ res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, dLow, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: true);
+ }
+ }
+
+ public static void Shl_S(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+ }
+
+ public static void Shl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+ }
+ }
+
+ public static void Shll_V(ArmEmitterContext context)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ int shift = 8 << op.Size;
+
+ if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movsxInst, n);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Shrn_V(ArmEmitterContext context)
+ {
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+ Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Operand mask = X86GetAllElements(context, _masks_RshrnShrn[op.Size]);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask);
+
+ Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+ ? Intrinsic.X86Movlhps
+ : Intrinsic.X86Movhlps;
+
+ res = context.AddIntrinsic(movInst, dLow, res);
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: false);
+ }
+ }
+
+ public static void Sli_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ int shift = GetImmShl(op);
+
+ ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+ Operand neShifted = context.ShiftLeft(ne, Const(shift));
+
+ Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+ Operand deMasked = context.BitwiseAnd(de, Const(mask));
+
+ Operand e = context.BitwiseOr(neShifted, deMasked);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sqrshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(1), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sqrshrn_S(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+ }
+
+ public static void Sqrshrn_V(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Sqrshrun_S(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+ }
+
+ public static void Sqrshrun_V(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Sqshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(0), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sqshrn_S(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+ }
+
+ public static void Sqshrn_V(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Sqshrun_S(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+ }
+
+ public static void Sqshrun_V(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Srshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(1), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Srshr_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
+ }
+
+ public static void Srshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSra);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Srsra_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+
+ public static void Srsra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSra);
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Sshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(0), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Sshll_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movsxInst, n);
+
+ if (shift != 0)
+ {
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Sshr_S(ArmEmitterContext context)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarSx);
+ }
+
+ public static void Sshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorSx);
+ }
+ }
+
+ public static void Ssra_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
+ }
+
+ public static void Ssra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+ {
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Uqrshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(1), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Uqrshrn_S(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+ }
+
+ public static void Uqrshrn_V(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+ }
+
+ public static void Uqshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(0), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Uqshrn_S(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+ }
+
+ public static void Uqshrn_V(ArmEmitterContext context)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+ }
+
+ public static void Urshl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(1), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Urshr_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
+ }
+
+ public static void Urshr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSrl);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
+ }
+ }
+
+ public static void Ursra_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+
+ public static void Ursra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+ int eSize = 8 << op.Size;
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+ Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, nSrl);
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
+ }
+
+ public static void Ushl_V(ArmEmitterContext context)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+ Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+ Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(0), Const(op.Size));
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ public static void Ushll_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseSse41)
+ {
+ Operand n = GetVec(op.Rn);
+
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+ }
+
+ Intrinsic movzxInst = X86PmovzxInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(movzxInst, n);
+
+ if (shift != 0)
+ {
+ Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+ res = context.AddIntrinsic(sllInst, res, Const(shift));
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+ }
+ }
+
+ public static void Ushr_S(ArmEmitterContext context)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarZx);
+ }
+
+ public static void Ushr_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ else
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorZx);
+ }
+ }
+
+ public static void Usra_S(ArmEmitterContext context)
+ {
+ EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
+ }
+
+ public static void Usra_V(ArmEmitterContext context)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ if (Optimizations.UseSse2 && op.Size > 0)
+ {
+ int shift = GetImmShr(op);
+
+ Operand d = GetVec(op.Rd);
+ Operand n = GetVec(op.Rn);
+
+ Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+ Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+ Intrinsic addInst = X86PaddInstruction[op.Size];
+
+ res = context.AddIntrinsic(addInst, res, d);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(d, res);
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
+ }
+ }
+
+ [Flags]
+ private enum ShrImmFlags
+ {
+ Scalar = 1 << 0,
+ Signed = 1 << 1,
+
+ Round = 1 << 2,
+ Accumulate = 1 << 3,
+
+ ScalarSx = Scalar | Signed,
+ ScalarZx = Scalar,
+
+ VectorSx = Signed,
+ VectorZx = 0
+ }
+
+ private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
+ }
+
+ private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
+ }
+
+ private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
+ }
+
+ private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
+ }
+
+ private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ bool scalar = (flags & ShrImmFlags.Scalar) != 0;
+ bool signed = (flags & ShrImmFlags.Signed) != 0;
+ bool round = (flags & ShrImmFlags.Round) != 0;
+ bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+ if (op.Size <= 2)
+ {
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = signed
+ ? context.ShiftRightSI(e, Const(shift))
+ : context.ShiftRightUI(e, Const(shift));
+ }
+ else /* if (op.Size == 3) */
+ {
+ e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift);
+ }
+
+ if (accumulate)
+ {
+ Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
+
+ e = context.Add(e, de);
+ }
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = context.ShiftRightUI(e, Const(shift));
+
+ res = EmitVectorInsert(context, res, e, part + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ [Flags]
+ private enum ShrImmSaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ Round = 1 << 3,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0
+ }
+
+ private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
+ }
+
+ private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+ bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
+ bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ int elems = !scalar ? 8 >> op.Size : 1;
+
+ int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+ Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+ if (op.Size <= 1 || !round)
+ {
+ if (round)
+ {
+ e = context.Add(e, Const(roundConst));
+ }
+
+ e = signedSrc
+ ? context.ShiftRightSI(e, Const(shift))
+ : context.ShiftRightUI(e, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32
+ }
+
+ e = EmitSatQ(context, e, op.Size, signedSrc, signedDst);
+
+ res = EmitVectorInsert(context, res, e, part + index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+
+ // dst64 = (Int(src64, signed) + roundConst) >> shift;
+ private static Operand EmitShrImm64(
+ ArmEmitterContext context,
+ Operand value,
+ bool signed,
+ long roundConst,
+ int shift)
+ {
+ Delegate dlg = signed
+ ? (Delegate)new _S64_S64_S64_S32(SoftFallback.SignedShrImm64)
+ : (Delegate)new _U64_U64_S64_S32(SoftFallback.UnsignedShrImm64);
+
+ return context.Call(dlg, value, Const(roundConst), Const(shift));
+ }
+
+ private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm)
+ {
+ EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true);
+ }
+
+ private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm)
+ {
+ EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false);
+ }
+
+ private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+ res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSystem.cs b/ARMeilleure/Instructions/InstEmitSystem.cs
new file mode 100644
index 000000000..eeb53c1fe
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSystem.cs
@@ -0,0 +1,114 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+ static partial class InstEmit
+ {
+ private const int DczSizeLog2 = 4;
+
+ public static void Hint(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Isb(ArmEmitterContext context)
+ {
+ // Execute as no-op.
+ }
+
+ public static void Mrs(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Delegate dlg;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0000_0000_001: dlg = new _U64(NativeInterface.GetCtrEl0); break;
+ case 0b11_011_0000_0000_111: dlg = new _U64(NativeInterface.GetDczidEl0); break;
+ case 0b11_011_0100_0100_000: dlg = new _U64(NativeInterface.GetFpcr); break;
+ case 0b11_011_0100_0100_001: dlg = new _U64(NativeInterface.GetFpsr); break;
+ case 0b11_011_1101_0000_010: dlg = new _U64(NativeInterface.GetTpidrEl0); break;
+ case 0b11_011_1101_0000_011: dlg = new _U64(NativeInterface.GetTpidr); break;
+ case 0b11_011_1110_0000_000: dlg = new _U64(NativeInterface.GetCntfrqEl0); break;
+ case 0b11_011_1110_0000_001: dlg = new _U64(NativeInterface.GetCntpctEl0); break;
+
+ default: throw new NotImplementedException($"Unknown MRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+
+ SetIntOrZR(context, op.Rt, context.Call(dlg));
+ }
+
+ public static void Msr(ArmEmitterContext context)
+ {
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ Delegate dlg;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0100_0100_000: dlg = new _Void_U64(NativeInterface.SetFpcr); break;
+ case 0b11_011_0100_0100_001: dlg = new _Void_U64(NativeInterface.SetFpsr); break;
+ case 0b11_011_1101_0000_010: dlg = new _Void_U64(NativeInterface.SetTpidrEl0); break;
+
+ default: throw new NotImplementedException($"Unknown MSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+ }
+
+ context.Call(dlg, GetIntOrZR(context, op.Rt));
+ }
+
+ public static void Nop(ArmEmitterContext context)
+ {
+ // Do nothing.
+ }
+
+ public static void Sys(ArmEmitterContext context)
+ {
+ // This instruction is used to do some operations on the CPU like cache invalidation,
+ // address translation and the like.
+ // We treat it as no-op here since we don't have any cache being emulated anyway.
+ OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+ switch (GetPackedId(op))
+ {
+ case 0b11_011_0111_0100_001:
+ {
+ // DC ZVA
+ Operand t = GetIntOrZR(context, op.Rt);
+
+ for (long offset = 0; offset < (4 << DczSizeLog2); offset += 8)
+ {
+ Operand address = context.Add(t, Const(offset));
+
+ context.Call(new _Void_U64_U64(NativeInterface.WriteUInt64), address, Const(0L));
+ }
+
+ break;
+ }
+
+ // No-op
+ case 0b11_011_0111_1110_001: //DC CIVAC
+ break;
+ }
+ }
+
+ private static int GetPackedId(OpCodeSystem op)
+ {
+ int id;
+
+ id = op.Op2 << 0;
+ id |= op.CRm << 3;
+ id |= op.CRn << 7;
+ id |= op.Op1 << 11;
+ id |= op.Op0 << 14;
+
+ return id;
+ }
+ }
+}
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
new file mode 100644
index 000000000..e70ca34bc
--- /dev/null
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -0,0 +1,459 @@
+namespace ARMeilleure.Instructions
+{
+ enum InstName
+ {
+ // Base (AArch64)
+ Adc,
+ Adcs,
+ Add,
+ Adds,
+ Adr,
+ Adrp,
+ And,
+ Ands,
+ Asrv,
+ B,
+ B_Cond,
+ Bfm,
+ Bic,
+ Bics,
+ Bl,
+ Blr,
+ Br,
+ Brk,
+ Cbnz,
+ Cbz,
+ Ccmn,
+ Ccmp,
+ Clrex,
+ Cls,
+ Clz,
+ Crc32b,
+ Crc32h,
+ Crc32w,
+ Crc32x,
+ Crc32cb,
+ Crc32ch,
+ Crc32cw,
+ Crc32cx,
+ Csel,
+ Csinc,
+ Csinv,
+ Csneg,
+ Dmb,
+ Dsb,
+ Eon,
+ Eor,
+ Extr,
+ Hint,
+ Isb,
+ Ldar,
+ Ldaxp,
+ Ldaxr,
+ Ldp,
+ Ldr,
+ Ldr_Literal,
+ Ldrs,
+ Ldxr,
+ Ldxp,
+ Lslv,
+ Lsrv,
+ Madd,
+ Movk,
+ Movn,
+ Movz,
+ Mrs,
+ Msr,
+ Msub,
+ Nop,
+ Orn,
+ Orr,
+ Pfrm,
+ Rbit,
+ Ret,
+ Rev16,
+ Rev32,
+ Rev64,
+ Rorv,
+ Sbc,
+ Sbcs,
+ Sbfm,
+ Sdiv,
+ Smaddl,
+ Smsubl,
+ Smulh,
+ Stlr,
+ Stlxp,
+ Stlxr,
+ Stp,
+ Str,
+ Stxp,
+ Stxr,
+ Sub,
+ Subs,
+ Svc,
+ Sys,
+ Tbnz,
+ Tbz,
+ Ubfm,
+ Udiv,
+ Umaddl,
+ Umsubl,
+ Umulh,
+ Und,
+
+ // FP & SIMD (AArch64)
+ Abs_S,
+ Abs_V,
+ Add_S,
+ Add_V,
+ Addhn_V,
+ Addp_S,
+ Addp_V,
+ Addv_V,
+ Aesd_V,
+ Aese_V,
+ Aesimc_V,
+ Aesmc_V,
+ And_V,
+ Bic_V,
+ Bic_Vi,
+ Bif_V,
+ Bit_V,
+ Bsl_V,
+ Cls_V,
+ Clz_V,
+ Cmeq_S,
+ Cmeq_V,
+ Cmge_S,
+ Cmge_V,
+ Cmgt_S,
+ Cmgt_V,
+ Cmhi_S,
+ Cmhi_V,
+ Cmhs_S,
+ Cmhs_V,
+ Cmle_S,
+ Cmle_V,
+ Cmlt_S,
+ Cmlt_V,
+ Cmtst_S,
+ Cmtst_V,
+ Cnt_V,
+ Dup_Gp,
+ Dup_S,
+ Dup_V,
+ Eor_V,
+ Ext_V,
+ Fabd_S,
+ Fabd_V,
+ Fabs_S,
+ Fabs_V,
+ Fadd_S,
+ Fadd_V,
+ Faddp_S,
+ Faddp_V,
+ Fccmp_S,
+ Fccmpe_S,
+ Fcmeq_S,
+ Fcmeq_V,
+ Fcmge_S,
+ Fcmge_V,
+ Fcmgt_S,
+ Fcmgt_V,
+ Fcmle_S,
+ Fcmle_V,
+ Fcmlt_S,
+ Fcmlt_V,
+ Fcmp_S,
+ Fcmpe_S,
+ Fcsel_S,
+ Fcvt_S,
+ Fcvtas_Gp,
+ Fcvtau_Gp,
+ Fcvtl_V,
+ Fcvtms_Gp,
+ Fcvtmu_Gp,
+ Fcvtn_V,
+ Fcvtns_S,
+ Fcvtns_V,
+ Fcvtnu_S,
+ Fcvtnu_V,
+ Fcvtps_Gp,
+ Fcvtpu_Gp,
+ Fcvtzs_Gp,
+ Fcvtzs_Gp_Fixed,
+ Fcvtzs_S,
+ Fcvtzs_V,
+ Fcvtzs_V_Fixed,
+ Fcvtzu_Gp,
+ Fcvtzu_Gp_Fixed,
+ Fcvtzu_S,
+ Fcvtzu_V,
+ Fcvtzu_V_Fixed,
+ Fdiv_S,
+ Fdiv_V,
+ Fmadd_S,
+ Fmax_S,
+ Fmax_V,
+ Fmaxnm_S,
+ Fmaxnm_V,
+ Fmaxp_V,
+ Fmin_S,
+ Fmin_V,
+ Fminnm_S,
+ Fminnm_V,
+ Fminp_V,
+ Fmla_Se,
+ Fmla_V,
+ Fmla_Ve,
+ Fmls_Se,
+ Fmls_V,
+ Fmls_Ve,
+ Fmov_S,
+ Fmov_Si,
+ Fmov_Vi,
+ Fmov_Ftoi,
+ Fmov_Itof,
+ Fmov_Ftoi1,
+ Fmov_Itof1,
+ Fmsub_S,
+ Fmul_S,
+ Fmul_Se,
+ Fmul_V,
+ Fmul_Ve,
+ Fmulx_S,
+ Fmulx_Se,
+ Fmulx_V,
+ Fmulx_Ve,
+ Fneg_S,
+ Fneg_V,
+ Fnmadd_S,
+ Fnmsub_S,
+ Fnmul_S,
+ Frecpe_S,
+ Frecpe_V,
+ Frecps_S,
+ Frecps_V,
+ Frecpx_S,
+ Frinta_S,
+ Frinta_V,
+ Frinti_S,
+ Frinti_V,
+ Frintm_S,
+ Frintm_V,
+ Frintn_S,
+ Frintn_V,
+ Frintp_S,
+ Frintp_V,
+ Frintx_S,
+ Frintx_V,
+ Frintz_S,
+ Frintz_V,
+ Frsqrte_S,
+ Frsqrte_V,
+ Frsqrts_S,
+ Frsqrts_V,
+ Fsqrt_S,
+ Fsqrt_V,
+ Fsub_S,
+ Fsub_V,
+ Ins_Gp,
+ Ins_V,
+ Ld__Vms,
+ Ld__Vss,
+ Mla_V,
+ Mla_Ve,
+ Mls_V,
+ Mls_Ve,
+ Movi_V,
+ Mul_V,
+ Mul_Ve,
+ Mvni_V,
+ Neg_S,
+ Neg_V,
+ Not_V,
+ Orn_V,
+ Orr_V,
+ Orr_Vi,
+ Raddhn_V,
+ Rbit_V,
+ Rev16_V,
+ Rev32_V,
+ Rev64_V,
+ Rshrn_V,
+ Rsubhn_V,
+ Saba_V,
+ Sabal_V,
+ Sabd_V,
+ Sabdl_V,
+ Sadalp_V,
+ Saddl_V,
+ Saddlp_V,
+ Saddlv_V,
+ Saddw_V,
+ Scvtf_Gp,
+ Scvtf_Gp_Fixed,
+ Scvtf_S,
+ Scvtf_V,
+ Scvtf_V_Fixed,
+ Sha1c_V,
+ Sha1h_V,
+ Sha1m_V,
+ Sha1p_V,
+ Sha1su0_V,
+ Sha1su1_V,
+ Sha256h_V,
+ Sha256h2_V,
+ Sha256su0_V,
+ Sha256su1_V,
+ Shadd_V,
+ Shl_S,
+ Shl_V,
+ Shll_V,
+ Shrn_V,
+ Shsub_V,
+ Sli_V,
+ Smax_V,
+ Smaxp_V,
+ Smaxv_V,
+ Smin_V,
+ Sminp_V,
+ Sminv_V,
+ Smlal_V,
+ Smlal_Ve,
+ Smlsl_V,
+ Smlsl_Ve,
+ Smov_S,
+ Smull_V,
+ Smull_Ve,
+ Sqabs_S,
+ Sqabs_V,
+ Sqadd_S,
+ Sqadd_V,
+ Sqdmulh_S,
+ Sqdmulh_V,
+ Sqneg_S,
+ Sqneg_V,
+ Sqrdmulh_S,
+ Sqrdmulh_V,
+ Sqrshl_V,
+ Sqrshrn_S,
+ Sqrshrn_V,
+ Sqrshrun_S,
+ Sqrshrun_V,
+ Sqshl_V,
+ Sqshrn_S,
+ Sqshrn_V,
+ Sqshrun_S,
+ Sqshrun_V,
+ Sqsub_S,
+ Sqsub_V,
+ Sqxtn_S,
+ Sqxtn_V,
+ Sqxtun_S,
+ Sqxtun_V,
+ Srhadd_V,
+ Srshl_V,
+ Srshr_S,
+ Srshr_V,
+ Srsra_S,
+ Srsra_V,
+ Sshl_V,
+ Sshll_V,
+ Sshr_S,
+ Sshr_V,
+ Ssra_S,
+ Ssra_V,
+ Ssubl_V,
+ Ssubw_V,
+ St__Vms,
+ St__Vss,
+ Sub_S,
+ Sub_V,
+ Subhn_V,
+ Suqadd_S,
+ Suqadd_V,
+ Tbl_V,
+ Trn1_V,
+ Trn2_V,
+ Uaba_V,
+ Uabal_V,
+ Uabd_V,
+ Uabdl_V,
+ Uadalp_V,
+ Uaddl_V,
+ Uaddlp_V,
+ Uaddlv_V,
+ Uaddw_V,
+ Ucvtf_Gp,
+ Ucvtf_Gp_Fixed,
+ Ucvtf_S,
+ Ucvtf_V,
+ Ucvtf_V_Fixed,
+ Uhadd_V,
+ Uhsub_V,
+ Umax_V,
+ Umaxp_V,
+ Umaxv_V,
+ Umin_V,
+ Uminp_V,
+ Uminv_V,
+ Umlal_V,
+ Umlal_Ve,
+ Umlsl_V,
+ Umlsl_Ve,
+ Umov_S,
+ Umull_V,
+ Umull_Ve,
+ Uqadd_S,
+ Uqadd_V,
+ Uqrshl_V,
+ Uqrshrn_S,
+ Uqrshrn_V,
+ Uqshl_V,
+ Uqshrn_S,
+ Uqshrn_V,
+ Uqsub_S,
+ Uqsub_V,
+ Uqxtn_S,
+ Uqxtn_V,
+ Urhadd_V,
+ Urshl_V,
+ Urshr_S,
+ Urshr_V,
+ Ursra_S,
+ Ursra_V,
+ Ushl_V,
+ Ushll_V,
+ Ushr_S,
+ Ushr_V,
+ Usqadd_S,
+ Usqadd_V,
+ Usra_S,
+ Usra_V,
+ Usubl_V,
+ Usubw_V,
+ Uzp1_V,
+ Uzp2_V,
+ Xtn_V,
+ Zip1_V,
+ Zip2_V,
+
+ // Base (AArch32)
+ Blx,
+ Bx,
+ Cmp,
+ Ldm,
+ Ldrb,
+ Ldrd,
+ Ldrh,
+ Ldrsb,
+ Ldrsh,
+ Mov,
+ Stm,
+ Strb,
+ Strd,
+ Strh
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs
new file mode 100644
index 000000000..3a1e91c8e
--- /dev/null
+++ b/ARMeilleure/Instructions/NativeInterface.cs
@@ -0,0 +1,367 @@
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class NativeInterface
+ {
+ private const int ErgSizeLog2 = 4;
+
+ private class ThreadContext
+ {
+ public ExecutionContext Context { get; }
+ public MemoryManager Memory { get; }
+
+ public ulong ExclusiveAddress { get; set; }
+ public ulong ExclusiveValueLow { get; set; }
+ public ulong ExclusiveValueHigh { get; set; }
+
+ public ThreadContext(ExecutionContext context, MemoryManager memory)
+ {
+ Context = context;
+ Memory = memory;
+
+ ExclusiveAddress = ulong.MaxValue;
+ }
+ }
+
+ [ThreadStatic]
+ private static ThreadContext _context;
+
+ public static void RegisterThread(ExecutionContext context, MemoryManager memory)
+ {
+ _context = new ThreadContext(context, memory);
+ }
+
+ public static void UnregisterThread()
+ {
+ _context = null;
+ }
+
+ public static void Break(ulong address, int imm)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnBreak(address, imm);
+
+ Statistics.ResumeTimer();
+ }
+
+ public static void SupervisorCall(ulong address, int imm)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnSupervisorCall(address, imm);
+
+ Statistics.ResumeTimer();
+ }
+
+ public static void Undefined(ulong address, int opCode)
+ {
+ Statistics.PauseTimer();
+
+ GetContext().OnUndefined(address, opCode);
+
+ Statistics.ResumeTimer();
+ }
+
+#region "System registers"
+ public static ulong GetCtrEl0()
+ {
+ return (ulong)GetContext().CtrEl0;
+ }
+
+ public static ulong GetDczidEl0()
+ {
+ return (ulong)GetContext().DczidEl0;
+ }
+
+ public static ulong GetFpcr()
+ {
+ return (ulong)GetContext().Fpcr;
+ }
+
+ public static ulong GetFpsr()
+ {
+ return (ulong)GetContext().Fpsr;
+ }
+
+ public static ulong GetTpidrEl0()
+ {
+ return (ulong)GetContext().TpidrEl0;
+ }
+
+ public static ulong GetTpidr()
+ {
+ return (ulong)GetContext().Tpidr;
+ }
+
+ public static ulong GetCntfrqEl0()
+ {
+ return GetContext().CntfrqEl0;
+ }
+
+ public static ulong GetCntpctEl0()
+ {
+ return GetContext().CntpctEl0;
+ }
+
+ public static void SetFpcr(ulong value)
+ {
+ GetContext().Fpcr = (FPCR)value;
+ }
+
+ public static void SetFpsr(ulong value)
+ {
+ GetContext().Fpsr = (FPSR)value;
+ }
+
+ public static void SetTpidrEl0(ulong value)
+ {
+ GetContext().TpidrEl0 = (long)value;
+ }
+#endregion
+
+#region "Read"
+ public static byte ReadByte(ulong address)
+ {
+ return GetMemoryManager().ReadByte((long)address);
+ }
+
+ public static ushort ReadUInt16(ulong address)
+ {
+ return GetMemoryManager().ReadUInt16((long)address);
+ }
+
+ public static uint ReadUInt32(ulong address)
+ {
+ return GetMemoryManager().ReadUInt32((long)address);
+ }
+
+ public static ulong ReadUInt64(ulong address)
+ {
+ return GetMemoryManager().ReadUInt64((long)address);
+ }
+
+ public static V128 ReadVector128(ulong address)
+ {
+ return GetMemoryManager().ReadVector128((long)address);
+ }
+#endregion
+
+#region "Read exclusive"
+ public static byte ReadByteExclusive(ulong address)
+ {
+ byte value = _context.Memory.ReadByte((long)address);
+
+ _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
+ _context.ExclusiveValueLow = value;
+ _context.ExclusiveValueHigh = 0;
+
+ return value;
+ }
+
+ public static ushort ReadUInt16Exclusive(ulong address)
+ {
+ ushort value = _context.Memory.ReadUInt16((long)address);
+
+ _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
+ _context.ExclusiveValueLow = value;
+ _context.ExclusiveValueHigh = 0;
+
+ return value;
+ }
+
+ public static uint ReadUInt32Exclusive(ulong address)
+ {
+ uint value = _context.Memory.ReadUInt32((long)address);
+
+ _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
+ _context.ExclusiveValueLow = value;
+ _context.ExclusiveValueHigh = 0;
+
+ return value;
+ }
+
+ public static ulong ReadUInt64Exclusive(ulong address)
+ {
+ ulong value = _context.Memory.ReadUInt64((long)address);
+
+ _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
+ _context.ExclusiveValueLow = value;
+ _context.ExclusiveValueHigh = 0;
+
+ return value;
+ }
+
+ public static V128 ReadVector128Exclusive(ulong address)
+ {
+ V128 value = _context.Memory.AtomicLoadInt128((long)address);
+
+ _context.ExclusiveAddress = GetMaskedExclusiveAddress(address);
+ _context.ExclusiveValueLow = value.GetUInt64(0);
+ _context.ExclusiveValueHigh = value.GetUInt64(1);
+
+ return value;
+ }
+#endregion
+
+#region "Write"
+ public static void WriteByte(ulong address, byte value)
+ {
+ GetMemoryManager().WriteByte((long)address, value);
+ }
+
+ public static void WriteUInt16(ulong address, ushort value)
+ {
+ GetMemoryManager().WriteUInt16((long)address, value);
+ }
+
+ public static void WriteUInt32(ulong address, uint value)
+ {
+ GetMemoryManager().WriteUInt32((long)address, value);
+ }
+
+ public static void WriteUInt64(ulong address, ulong value)
+ {
+ GetMemoryManager().WriteUInt64((long)address, value);
+ }
+
+ public static void WriteVector128(ulong address, V128 value)
+ {
+ GetMemoryManager().WriteVector128((long)address, value);
+ }
+#endregion
+
+#region "Write exclusive"
+ public static int WriteByteExclusive(ulong address, byte value)
+ {
+ bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+ if (success)
+ {
+ success = _context.Memory.AtomicCompareExchangeByte(
+ (long)address,
+ (byte)_context.ExclusiveValueLow,
+ (byte)value);
+
+ if (success)
+ {
+ ClearExclusive();
+ }
+ }
+
+ return success ? 0 : 1;
+ }
+
+ public static int WriteUInt16Exclusive(ulong address, ushort value)
+ {
+ bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+ if (success)
+ {
+ success = _context.Memory.AtomicCompareExchangeInt16(
+ (long)address,
+ (short)_context.ExclusiveValueLow,
+ (short)value);
+
+ if (success)
+ {
+ ClearExclusive();
+ }
+ }
+
+ return success ? 0 : 1;
+ }
+
+ public static int WriteUInt32Exclusive(ulong address, uint value)
+ {
+ bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+ if (success)
+ {
+ success = _context.Memory.AtomicCompareExchangeInt32(
+ (long)address,
+ (int)_context.ExclusiveValueLow,
+ (int)value);
+
+ if (success)
+ {
+ ClearExclusive();
+ }
+ }
+
+ return success ? 0 : 1;
+ }
+
+ public static int WriteUInt64Exclusive(ulong address, ulong value)
+ {
+ bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+ if (success)
+ {
+ success = _context.Memory.AtomicCompareExchangeInt64(
+ (long)address,
+ (long)_context.ExclusiveValueLow,
+ (long)value);
+
+ if (success)
+ {
+ ClearExclusive();
+ }
+ }
+
+ return success ? 0 : 1;
+ }
+
+ public static int WriteVector128Exclusive(ulong address, V128 value)
+ {
+ bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+ if (success)
+ {
+ V128 expected = new V128(_context.ExclusiveValueLow, _context.ExclusiveValueHigh);
+
+ success = _context.Memory.AtomicCompareExchangeInt128((long)address, expected, value);
+
+ if (success)
+ {
+ ClearExclusive();
+ }
+ }
+
+ return success ? 0 : 1;
+ }
+#endregion
+
+ private static ulong GetMaskedExclusiveAddress(ulong address)
+ {
+ return address & ~((4UL << ErgSizeLog2) - 1);
+ }
+
+ public static void ClearExclusive()
+ {
+ _context.ExclusiveAddress = ulong.MaxValue;
+ }
+
+ public static void CheckSynchronization()
+ {
+ Statistics.PauseTimer();
+
+ GetContext().CheckInterrupt();
+
+ Statistics.ResumeTimer();
+ }
+
+ public static ExecutionContext GetContext()
+ {
+ return _context.Context;
+ }
+
+ public static MemoryManager GetMemoryManager()
+ {
+ return _context.Memory;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs
new file mode 100644
index 000000000..dc0309218
--- /dev/null
+++ b/ARMeilleure/Instructions/SoftFallback.cs
@@ -0,0 +1,1307 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+ static class SoftFallback
+ {
+#region "ShlReg"
+ public static long SignedShlReg(long value, long shift, bool round, int size)
+ {
+ int eSize = 8 << size;
+
+ int shiftLsB = (sbyte)shift;
+
+ if (shiftLsB < 0)
+ {
+ return SignedShrReg(value, -shiftLsB, round, eSize);
+ }
+ else if (shiftLsB > 0)
+ {
+ if (shiftLsB >= eSize)
+ {
+ return 0L;
+ }
+
+ return value << shiftLsB;
+ }
+ else /* if (shiftLsB == 0) */
+ {
+ return value;
+ }
+ }
+
+ public static ulong UnsignedShlReg(ulong value, ulong shift, bool round, int size)
+ {
+ int eSize = 8 << size;
+
+ int shiftLsB = (sbyte)shift;
+
+ if (shiftLsB < 0)
+ {
+ return UnsignedShrReg(value, -shiftLsB, round, eSize);
+ }
+ else if (shiftLsB > 0)
+ {
+ if (shiftLsB >= eSize)
+ {
+ return 0UL;
+ }
+
+ return value << shiftLsB;
+ }
+ else /* if (shiftLsB == 0) */
+ {
+ return value;
+ }
+ }
+
+ public static long SignedShlRegSatQ(long value, long shift, bool round, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ int shiftLsB = (sbyte)shift;
+
+ if (shiftLsB < 0)
+ {
+ return SignedShrReg(value, -shiftLsB, round, eSize);
+ }
+ else if (shiftLsB > 0)
+ {
+ if (shiftLsB >= eSize)
+ {
+ return SignedSignSatQ(value, eSize, context);
+ }
+
+ if (eSize == 64)
+ {
+ long shl = value << shiftLsB;
+ long shr = shl >> shiftLsB;
+
+ if (shr != value)
+ {
+ return SignedSignSatQ(value, eSize, context);
+ }
+ else /* if (shr == value) */
+ {
+ return shl;
+ }
+ }
+ else /* if (eSize != 64) */
+ {
+ return SignedSrcSignedDstSatQ(value << shiftLsB, size);
+ }
+ }
+ else /* if (shiftLsB == 0) */
+ {
+ return value;
+ }
+ }
+
+ public static ulong UnsignedShlRegSatQ(ulong value, ulong shift, bool round, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ int shiftLsB = (sbyte)shift;
+
+ if (shiftLsB < 0)
+ {
+ return UnsignedShrReg(value, -shiftLsB, round, eSize);
+ }
+ else if (shiftLsB > 0)
+ {
+ if (shiftLsB >= eSize)
+ {
+ return UnsignedSignSatQ(value, eSize, context);
+ }
+
+ if (eSize == 64)
+ {
+ ulong shl = value << shiftLsB;
+ ulong shr = shl >> shiftLsB;
+
+ if (shr != value)
+ {
+ return UnsignedSignSatQ(value, eSize, context);
+ }
+ else /* if (shr == value) */
+ {
+ return shl;
+ }
+ }
+ else /* if (eSize != 64) */
+ {
+ return UnsignedSrcUnsignedDstSatQ(value << shiftLsB, size);
+ }
+ }
+ else /* if (shiftLsB == 0) */
+ {
+ return value;
+ }
+ }
+
+ private static long SignedShrReg(long value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+ {
+ if (round)
+ {
+ if (shift >= eSize)
+ {
+ return 0L;
+ }
+
+ long roundConst = 1L << (shift - 1);
+
+ long add = value + roundConst;
+
+ if (eSize == 64)
+ {
+ if ((~value & (value ^ add)) < 0L)
+ {
+ return (long)((ulong)add >> shift);
+ }
+ else
+ {
+ return add >> shift;
+ }
+ }
+ else /* if (eSize != 64) */
+ {
+ return add >> shift;
+ }
+ }
+ else /* if (!round) */
+ {
+ if (shift >= eSize)
+ {
+ if (value < 0L)
+ {
+ return -1L;
+ }
+ else /* if (value >= 0L) */
+ {
+ return 0L;
+ }
+ }
+
+ return value >> shift;
+ }
+ }
+
+ private static ulong UnsignedShrReg(ulong value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+ {
+ if (round)
+ {
+ if (shift > 64)
+ {
+ return 0UL;
+ }
+
+ ulong roundConst = 1UL << (shift - 1);
+
+ ulong add = value + roundConst;
+
+ if (eSize == 64)
+ {
+ if ((add < value) && (add < roundConst))
+ {
+ if (shift == 64)
+ {
+ return 1UL;
+ }
+
+ return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
+ }
+ else
+ {
+ if (shift == 64)
+ {
+ return 0UL;
+ }
+
+ return add >> shift;
+ }
+ }
+ else /* if (eSize != 64) */
+ {
+ if (shift == 64)
+ {
+ return 0UL;
+ }
+
+ return add >> shift;
+ }
+ }
+ else /* if (!round) */
+ {
+ if (shift >= eSize)
+ {
+ return 0UL;
+ }
+
+ return value >> shift;
+ }
+ }
+
+ private static long SignedSignSatQ(long op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}.
+ {
+ long tMaxValue = (1L << (eSize - 1)) - 1L;
+ long tMinValue = -(1L << (eSize - 1));
+
+ if (op > 0L)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else if (op < 0L)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMinValue;
+ }
+ else
+ {
+ return 0L;
+ }
+ }
+
+ private static ulong UnsignedSignSatQ(ulong op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}.
+ {
+ ulong tMaxValue = ulong.MaxValue >> (64 - eSize);
+
+ if (op > 0UL)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else
+ {
+ return 0UL;
+ }
+ }
+#endregion
+
+#region "ShrImm64"
+ public static long SignedShrImm64(long value, long roundConst, int shift)
+ {
+ if (roundConst == 0L)
+ {
+ if (shift <= 63)
+ {
+ return value >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ if (value < 0L)
+ {
+ return -1L;
+ }
+ else /* if (value >= 0L) */
+ {
+ return 0L;
+ }
+ }
+ }
+ else /* if (roundConst == 1L << (shift - 1)) */
+ {
+ if (shift <= 63)
+ {
+ long add = value + roundConst;
+
+ if ((~value & (value ^ add)) < 0L)
+ {
+ return (long)((ulong)add >> shift);
+ }
+ else
+ {
+ return add >> shift;
+ }
+ }
+ else /* if (shift == 64) */
+ {
+ return 0L;
+ }
+ }
+ }
+
+ public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift)
+ {
+ if (roundConst == 0L)
+ {
+ if (shift <= 63)
+ {
+ return value >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ return 0UL;
+ }
+ }
+ else /* if (roundConst == 1L << (shift - 1)) */
+ {
+ ulong add = value + (ulong)roundConst;
+
+ if ((add < value) && (add < (ulong)roundConst))
+ {
+ if (shift <= 63)
+ {
+ return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
+ }
+ else /* if (shift == 64) */
+ {
+ return 1UL;
+ }
+ }
+ else
+ {
+ if (shift <= 63)
+ {
+ return add >> shift;
+ }
+ else /* if (shift == 64) */
+ {
+ return 0UL;
+ }
+ }
+ }
+ }
+#endregion
+
+#region "Rounding"
+ public static double Round(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ FPRoundingMode roundMode = context.Fpcr.GetRoundingMode();
+
+ if (roundMode == FPRoundingMode.ToNearest)
+ {
+ return Math.Round(value); // even
+ }
+ else if (roundMode == FPRoundingMode.TowardsPlusInfinity)
+ {
+ return Math.Ceiling(value);
+ }
+ else if (roundMode == FPRoundingMode.TowardsMinusInfinity)
+ {
+ return Math.Floor(value);
+ }
+ else /* if (roundMode == FPRoundingMode.TowardsZero) */
+ {
+ return Math.Truncate(value);
+ }
+ }
+
+ public static float RoundF(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ FPRoundingMode roundMode = context.Fpcr.GetRoundingMode();
+
+ if (roundMode == FPRoundingMode.ToNearest)
+ {
+ return MathF.Round(value); // even
+ }
+ else if (roundMode == FPRoundingMode.TowardsPlusInfinity)
+ {
+ return MathF.Ceiling(value);
+ }
+ else if (roundMode == FPRoundingMode.TowardsMinusInfinity)
+ {
+ return MathF.Floor(value);
+ }
+ else /* if (roundMode == FPRoundingMode.TowardsZero) */
+ {
+ return MathF.Truncate(value);
+ }
+ }
+#endregion
+
+#region "Saturation"
+ public static int SatF32ToS32(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
+ }
+
+ public static long SatF32ToS64(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
+ }
+
+ public static uint SatF32ToU32(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
+ }
+
+ public static ulong SatF32ToU64(float value)
+ {
+ if (float.IsNaN(value)) return 0;
+
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+ }
+
+ public static int SatF64ToS32(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= int.MaxValue ? int.MaxValue :
+ value <= int.MinValue ? int.MinValue : (int)value;
+ }
+
+ public static long SatF64ToS64(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= long.MaxValue ? long.MaxValue :
+ value <= long.MinValue ? long.MinValue : (long)value;
+ }
+
+ public static uint SatF64ToU32(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= uint.MaxValue ? uint.MaxValue :
+ value <= uint.MinValue ? uint.MinValue : (uint)value;
+ }
+
+ public static ulong SatF64ToU64(double value)
+ {
+ if (double.IsNaN(value)) return 0;
+
+ return value >= ulong.MaxValue ? ulong.MaxValue :
+ value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+ }
+#endregion
+
+#region "Saturating"
+ public static long SignedSrcSignedDstSatQ(long op, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ long tMaxValue = (1L << (eSize - 1)) - 1L;
+ long tMinValue = -(1L << (eSize - 1));
+
+ if (op > tMaxValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else if (op < tMinValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMinValue;
+ }
+ else
+ {
+ return op;
+ }
+ }
+
+ public static ulong SignedSrcUnsignedDstSatQ(long op, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ ulong tMaxValue = (1UL << eSize) - 1UL;
+ ulong tMinValue = 0UL;
+
+ if (op > (long)tMaxValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else if (op < (long)tMinValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMinValue;
+ }
+ else
+ {
+ return (ulong)op;
+ }
+ }
+
+ public static long UnsignedSrcSignedDstSatQ(ulong op, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ long tMaxValue = (1L << (eSize - 1)) - 1L;
+
+ if (op > (ulong)tMaxValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else
+ {
+ return (long)op;
+ }
+ }
+
+ public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ int eSize = 8 << size;
+
+ ulong tMaxValue = (1UL << eSize) - 1UL;
+
+ if (op > tMaxValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return tMaxValue;
+ }
+ else
+ {
+ return op;
+ }
+ }
+
+ public static long UnarySignedSatQAbsOrNeg(long op)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ if (op == long.MinValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return long.MaxValue;
+ }
+ else
+ {
+ return op;
+ }
+ }
+
+ public static long BinarySignedSatQAdd(long op1, long op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ long add = op1 + op2;
+
+ if ((~(op1 ^ op2) & (op1 ^ add)) < 0L)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ if (op1 < 0L)
+ {
+ return long.MinValue;
+ }
+ else
+ {
+ return long.MaxValue;
+ }
+ }
+ else
+ {
+ return add;
+ }
+ }
+
+ public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ ulong add = op1 + op2;
+
+ if ((add < op1) && (add < op2))
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return ulong.MaxValue;
+ }
+ else
+ {
+ return add;
+ }
+ }
+
+ public static long BinarySignedSatQSub(long op1, long op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ long sub = op1 - op2;
+
+ if (((op1 ^ op2) & (op1 ^ sub)) < 0L)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ if (op1 < 0L)
+ {
+ return long.MinValue;
+ }
+ else
+ {
+ return long.MaxValue;
+ }
+ }
+ else
+ {
+ return sub;
+ }
+ }
+
+ public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ ulong sub = op1 - op2;
+
+ if (op1 < op2)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return ulong.MinValue;
+ }
+ else
+ {
+ return sub;
+ }
+ }
+
+ public static long BinarySignedSatQAcc(ulong op1, long op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ if (op1 <= (ulong)long.MaxValue)
+ {
+ // op1 from ulong.MinValue to (ulong)long.MaxValue
+ // op2 from long.MinValue to long.MaxValue
+
+ long add = (long)op1 + op2;
+
+ if ((~op2 & add) < 0L)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return long.MaxValue;
+ }
+ else
+ {
+ return add;
+ }
+ }
+ else if (op2 >= 0L)
+ {
+ // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+ // op2 from (long)ulong.MinValue to long.MaxValue
+
+ context.Fpsr |= FPSR.Qc;
+
+ return long.MaxValue;
+ }
+ else
+ {
+ // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+ // op2 from long.MinValue to (long)ulong.MinValue - 1L
+
+ ulong add = op1 + (ulong)op2;
+
+ if (add > (ulong)long.MaxValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return long.MaxValue;
+ }
+ else
+ {
+ return (long)add;
+ }
+ }
+ }
+
+ public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ if (op1 >= 0L)
+ {
+ // op1 from (long)ulong.MinValue to long.MaxValue
+ // op2 from ulong.MinValue to ulong.MaxValue
+
+ ulong add = (ulong)op1 + op2;
+
+ if ((add < (ulong)op1) && (add < op2))
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return ulong.MaxValue;
+ }
+ else
+ {
+ return add;
+ }
+ }
+ else if (op2 > (ulong)long.MaxValue)
+ {
+ // op1 from long.MinValue to (long)ulong.MinValue - 1L
+ // op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+
+ return (ulong)op1 + op2;
+ }
+ else
+ {
+ // op1 from long.MinValue to (long)ulong.MinValue - 1L
+ // op2 from ulong.MinValue to (ulong)long.MaxValue
+
+ long add = op1 + (long)op2;
+
+ if (add < (long)ulong.MinValue)
+ {
+ context.Fpsr |= FPSR.Qc;
+
+ return ulong.MinValue;
+ }
+ else
+ {
+ return (ulong)add;
+ }
+ }
+ }
+#endregion
+
+#region "Count"
+ public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+ {
+ value ^= value >> 1;
+
+ int highBit = size - 2;
+
+ for (int bit = highBit; bit >= 0; bit--)
+ {
+ if (((int)(value >> bit) & 0b1) != 0)
+ {
+ return (ulong)(highBit - bit);
+ }
+ }
+
+ return (ulong)(size - 1);
+ }
+
+ private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+ {
+ if (value == 0ul)
+ {
+ return (ulong)size;
+ }
+
+ int nibbleIdx = size;
+ int preCount, count = 0;
+
+ do
+ {
+ nibbleIdx -= 4;
+ preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
+ count += preCount;
+ }
+ while (preCount == 4);
+
+ return (ulong)count;
+ }
+
+ public static ulong CountSetBits8(ulong value) // "size" is 8 (SIMD&FP Inst.).
+ {
+ value = ((value >> 1) & 0x55ul) + (value & 0x55ul);
+ value = ((value >> 2) & 0x33ul) + (value & 0x33ul);
+
+ return (value >> 4) + (value & 0x0ful);
+ }
+#endregion
+
+#region "Table"
+ public static V128 Tbl1_V64(V128 vector, V128 tb0)
+ {
+ return Tbl(vector, 8, tb0);
+ }
+
+ public static V128 Tbl1_V128(V128 vector, V128 tb0)
+ {
+ return Tbl(vector, 16, tb0);
+ }
+
+ public static V128 Tbl2_V64(V128 vector, V128 tb0, V128 tb1)
+ {
+ return Tbl(vector, 8, tb0, tb1);
+ }
+
+ public static V128 Tbl2_V128(V128 vector, V128 tb0, V128 tb1)
+ {
+ return Tbl(vector, 16, tb0, tb1);
+ }
+
+ public static V128 Tbl3_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2)
+ {
+ return Tbl(vector, 8, tb0, tb1, tb2);
+ }
+
+ public static V128 Tbl3_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2)
+ {
+ return Tbl(vector, 16, tb0, tb1, tb2);
+ }
+
+ public static V128 Tbl4_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+ {
+ return Tbl(vector, 8, tb0, tb1, tb2, tb3);
+ }
+
+ public static V128 Tbl4_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+ {
+ return Tbl(vector, 16, tb0, tb1, tb2, tb3);
+ }
+
+ private static V128 Tbl(V128 vector, int bytes, params V128[] tb)
+ {
+ byte[] res = new byte[16];
+ byte[] table = new byte[tb.Length * 16];
+
+ for (byte index = 0; index < tb.Length; index++)
+ {
+ Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16);
+ }
+
+ byte[] v = vector.ToArray();
+
+ for (byte index = 0; index < bytes; index++)
+ {
+ byte tblIndex = v[index];
+
+ if (tblIndex < table.Length)
+ {
+ res[index] = table[tblIndex];
+ }
+ }
+
+ return new V128(res);
+ }
+#endregion
+
+#region "Crc32"
+ private const uint Crc32RevPoly = 0xedb88320;
+ private const uint Crc32cRevPoly = 0x82f63b78;
+
+ public static uint Crc32b(uint crc, byte value) => Crc32 (crc, Crc32RevPoly, value);
+ public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value);
+ public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value);
+ public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value);
+
+ public static uint Crc32cb(uint crc, byte value) => Crc32 (crc, Crc32cRevPoly, value);
+ public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value);
+ public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value);
+ public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value);
+
+ private static uint Crc32h(uint crc, uint poly, ushort val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+
+ return crc;
+ }
+
+ private static uint Crc32w(uint crc, uint poly, uint val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+ crc = Crc32(crc, poly, (byte)(val >> 16));
+ crc = Crc32(crc, poly, (byte)(val >> 24));
+
+ return crc;
+ }
+
+ private static uint Crc32x(uint crc, uint poly, ulong val)
+ {
+ crc = Crc32(crc, poly, (byte)(val >> 0));
+ crc = Crc32(crc, poly, (byte)(val >> 8));
+ crc = Crc32(crc, poly, (byte)(val >> 16));
+ crc = Crc32(crc, poly, (byte)(val >> 24));
+ crc = Crc32(crc, poly, (byte)(val >> 32));
+ crc = Crc32(crc, poly, (byte)(val >> 40));
+ crc = Crc32(crc, poly, (byte)(val >> 48));
+ crc = Crc32(crc, poly, (byte)(val >> 56));
+
+ return crc;
+ }
+
+ private static uint Crc32(uint crc, uint poly, byte val)
+ {
+ crc ^= val;
+
+ for (int bit = 7; bit >= 0; bit--)
+ {
+ uint mask = (uint)(-(int)(crc & 1));
+
+ crc = (crc >> 1) ^ (poly & mask);
+ }
+
+ return crc;
+ }
+#endregion
+
+#region "Aes"
+ public static V128 Decrypt(V128 value, V128 roundKey)
+ {
+ return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey));
+ }
+
+ public static V128 Encrypt(V128 value, V128 roundKey)
+ {
+ return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey));
+ }
+
+ public static V128 InverseMixColumns(V128 value)
+ {
+ return CryptoHelper.AesInvMixColumns(value);
+ }
+
+ public static V128 MixColumns(V128 value)
+ {
+ return CryptoHelper.AesMixColumns(value);
+ }
+#endregion
+
+#region "Sha1"
+ public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaChoose(hash_abcd.GetUInt32(1),
+ hash_abcd.GetUInt32(2),
+ hash_abcd.GetUInt32(3));
+
+ hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e);
+
+ t = Rol(hash_abcd.GetUInt32(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static uint FixedRotate(uint hash_e)
+ {
+ return hash_e.Rol(30);
+ }
+
+ public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaMajority(hash_abcd.GetUInt32(1),
+ hash_abcd.GetUInt32(2),
+ hash_abcd.GetUInt32(3));
+
+ hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e);
+
+ t = Rol(hash_abcd.GetUInt32(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint t = ShaParity(hash_abcd.GetUInt32(1),
+ hash_abcd.GetUInt32(2),
+ hash_abcd.GetUInt32(3));
+
+ hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e);
+
+ t = Rol(hash_abcd.GetUInt32(1), 30);
+
+ hash_abcd.Insert(1, t);
+
+ Rol32_160(ref hash_e, ref hash_abcd);
+ }
+
+ return hash_abcd;
+ }
+
+ public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11)
+ {
+ ulong t2 = w4_7.GetUInt64(0);
+ ulong t1 = w0_3.GetUInt64(1);
+
+ V128 result = new V128(t1, t2);
+
+ return result ^ (w0_3 ^ w8_11);
+ }
+
+ public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15)
+ {
+ V128 t = tw0_3 ^ (w12_15 >> 32);
+
+ uint tE0 = t.GetUInt32(0);
+ uint tE1 = t.GetUInt32(1);
+ uint tE2 = t.GetUInt32(2);
+ uint tE3 = t.GetUInt32(3);
+
+ return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2));
+ }
+
+ private static void Rol32_160(ref uint y, ref V128 x)
+ {
+ uint xE3 = x.GetUInt32(3);
+
+ x <<= 32;
+ x.Insert(0, y);
+
+ y = xE3;
+ }
+
+ private static uint ShaChoose(uint x, uint y, uint z)
+ {
+ return ((y ^ z) & x) ^ z;
+ }
+
+ private static uint ShaMajority(uint x, uint y, uint z)
+ {
+ return (x & y) | ((x | y) & z);
+ }
+
+ private static uint ShaParity(uint x, uint y, uint z)
+ {
+ return x ^ y ^ z;
+ }
+
+ private static uint Rol(this uint value, int count)
+ {
+ return (value << count) | (value >> (32 - count));
+ }
+#endregion
+
+#region "Sha256"
+ public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk)
+ {
+ return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
+ }
+
+ public static V128 HashUpper(V128 hash_efgh, V128 hash_abcd, V128 wk)
+ {
+ return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
+ }
+
+ public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7)
+ {
+ V128 result = new V128();
+
+ for (int e = 0; e <= 3; e++)
+ {
+ uint elt = (e <= 2 ? w0_3 : w4_7).GetUInt32(e <= 2 ? e + 1 : 0);
+
+ elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3);
+
+ elt += w0_3.GetUInt32(e);
+
+ result.Insert(e, elt);
+ }
+
+ return result;
+ }
+
+ public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15)
+ {
+ V128 result = new V128();
+
+ ulong t1 = w12_15.GetUInt64(1);
+
+ for (int e = 0; e <= 1; e++)
+ {
+ uint elt = t1.ULongPart(e);
+
+ elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
+
+ elt += w0_3.GetUInt32(e) + w8_11.GetUInt32(e + 1);
+
+ result.Insert(e, elt);
+ }
+
+ t1 = result.GetUInt64(0);
+
+ for (int e = 2; e <= 3; e++)
+ {
+ uint elt = t1.ULongPart(e - 2);
+
+ elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
+
+ elt += w0_3.GetUInt32(e) + (e == 2 ? w8_11 : w12_15).GetUInt32(e == 2 ? 3 : 0);
+
+ result.Insert(e, elt);
+ }
+
+ return result;
+ }
+
+ private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1)
+ {
+ for (int e = 0; e <= 3; e++)
+ {
+ uint chs = ShaChoose(y.GetUInt32(0),
+ y.GetUInt32(1),
+ y.GetUInt32(2));
+
+ uint maj = ShaMajority(x.GetUInt32(0),
+ x.GetUInt32(1),
+ x.GetUInt32(2));
+
+ uint t1 = y.GetUInt32(3) + ShaHashSigma1(y.GetUInt32(0)) + chs + w.GetUInt32(e);
+
+ uint t2 = t1 + x.GetUInt32(3);
+
+ x.Insert(3, t2);
+
+ t2 = t1 + ShaHashSigma0(x.GetUInt32(0)) + maj;
+
+ y.Insert(3, t2);
+
+ Rol32_256(ref y, ref x);
+ }
+
+ return part1 ? x : y;
+ }
+
+ private static void Rol32_256(ref V128 y, ref V128 x)
+ {
+ uint yE3 = y.GetUInt32(3);
+ uint xE3 = x.GetUInt32(3);
+
+ y <<= 32;
+ x <<= 32;
+
+ y.Insert(0, xE3);
+ x.Insert(0, yE3);
+ }
+
+ private static uint ShaHashSigma0(uint x)
+ {
+ return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22);
+ }
+
+ private static uint ShaHashSigma1(uint x)
+ {
+ return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
+ }
+
+ private static uint Ror(this uint value, int count)
+ {
+ return (value >> count) | (value << (32 - count));
+ }
+
+ private static uint Lsr(this uint value, int count)
+ {
+ return value >> count;
+ }
+
+ private static uint ULongPart(this ulong value, int part)
+ {
+ return part == 0
+ ? (uint)(value & 0xFFFFFFFFUL)
+ : (uint)(value >> 32);
+ }
+#endregion
+
+#region "Reverse"
+ public static uint ReverseBits8(uint value)
+ {
+ value = ((value & 0xaa) >> 1) | ((value & 0x55) << 1);
+ value = ((value & 0xcc) >> 2) | ((value & 0x33) << 2);
+
+ return (value >> 4) | ((value & 0x0f) << 4);
+ }
+
+ public static uint ReverseBits32(uint value)
+ {
+ value = ((value & 0xaaaaaaaa) >> 1) | ((value & 0x55555555) << 1);
+ value = ((value & 0xcccccccc) >> 2) | ((value & 0x33333333) << 2);
+ value = ((value & 0xf0f0f0f0) >> 4) | ((value & 0x0f0f0f0f) << 4);
+ value = ((value & 0xff00ff00) >> 8) | ((value & 0x00ff00ff) << 8);
+
+ return (value >> 16) | (value << 16);
+ }
+
+ public static ulong ReverseBits64(ulong value)
+ {
+ value = ((value & 0xaaaaaaaaaaaaaaaa) >> 1 ) | ((value & 0x5555555555555555) << 1 );
+ value = ((value & 0xcccccccccccccccc) >> 2 ) | ((value & 0x3333333333333333) << 2 );
+ value = ((value & 0xf0f0f0f0f0f0f0f0) >> 4 ) | ((value & 0x0f0f0f0f0f0f0f0f) << 4 );
+ value = ((value & 0xff00ff00ff00ff00) >> 8 ) | ((value & 0x00ff00ff00ff00ff) << 8 );
+ value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
+
+ return (value >> 32) | (value << 32);
+ }
+
+ public static uint ReverseBytes16_32(uint value) => (uint)ReverseBytes16_64(value);
+
+ public static ulong ReverseBytes16_64(ulong value) => ReverseBytes(value, RevSize.Rev16);
+ public static ulong ReverseBytes32_64(ulong value) => ReverseBytes(value, RevSize.Rev32);
+
+ private enum RevSize
+ {
+ Rev16,
+ Rev32,
+ Rev64
+ }
+
+ private static ulong ReverseBytes(ulong value, RevSize size)
+ {
+ value = ((value & 0xff00ff00ff00ff00) >> 8) | ((value & 0x00ff00ff00ff00ff) << 8);
+
+ if (size == RevSize.Rev16)
+ {
+ return value;
+ }
+
+ value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
+
+ if (size == RevSize.Rev32)
+ {
+ return value;
+ }
+
+ value = ((value & 0xffffffff00000000) >> 32) | ((value & 0x00000000ffffffff) << 32);
+
+ if (size == RevSize.Rev64)
+ {
+ return value;
+ }
+
+ throw new ArgumentException(nameof(size));
+ }
+#endregion
+ }
+}
diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs
new file mode 100644
index 000000000..7358e6b2c
--- /dev/null
+++ b/ARMeilleure/Instructions/SoftFloat.cs
@@ -0,0 +1,2757 @@
+using ARMeilleure.State;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.Instructions
+{
+ static class SoftFloat
+ {
+ static SoftFloat()
+ {
+ RecipEstimateTable = BuildRecipEstimateTable();
+ RecipSqrtEstimateTable = BuildRecipSqrtEstimateTable();
+ }
+
+ internal static readonly byte[] RecipEstimateTable;
+ internal static readonly byte[] RecipSqrtEstimateTable;
+
+ private static byte[] BuildRecipEstimateTable()
+ {
+ byte[] tbl = new byte[256];
+
+ for (int idx = 0; idx < 256; idx++)
+ {
+ uint src = (uint)idx + 256u;
+
+ Debug.Assert(256u <= src && src < 512u);
+
+ src = (src << 1) + 1u;
+
+ uint aux = (1u << 19) / src;
+
+ uint dst = (aux + 1u) >> 1;
+
+ Debug.Assert(256u <= dst && dst < 512u);
+
+ tbl[idx] = (byte)(dst - 256u);
+ }
+
+ return tbl;
+ }
+
+ private static byte[] BuildRecipSqrtEstimateTable()
+ {
+ byte[] tbl = new byte[384];
+
+ for (int idx = 0; idx < 384; idx++)
+ {
+ uint src = (uint)idx + 128u;
+
+ Debug.Assert(128u <= src && src < 512u);
+
+ if (src < 256u)
+ {
+ src = (src << 1) + 1u;
+ }
+ else
+ {
+ src = (src >> 1) << 1;
+ src = (src + 1u) << 1;
+ }
+
+ uint aux = 512u;
+
+ while (src * (aux + 1u) * (aux + 1u) < (1u << 28))
+ {
+ aux = aux + 1u;
+ }
+
+ uint dst = (aux + 1u) >> 1;
+
+ Debug.Assert(256u <= dst && dst < 512u);
+
+ tbl[idx] = (byte)(dst - 256u);
+ }
+
+ return tbl;
+ }
+ }
+
+ static class SoftFloat16_32
+ {
+ public static float FPConvert(ushort valueBits)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ result = FPDefaultNaN();
+ }
+ else
+ {
+ result = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPInfinity(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPZero(sign);
+ }
+ else
+ {
+ result = FPRoundCv(real, context);
+ }
+
+ return result;
+ }
+
+ private static float FPDefaultNaN()
+ {
+ return -float.NaN;
+ }
+
+ private static float FPInfinity(bool sign)
+ {
+ return sign ? float.NegativeInfinity : float.PositiveInfinity;
+ }
+
+ private static float FPZero(bool sign)
+ {
+ return sign ? -0f : +0f;
+ }
+
+ private static float FPMaxNormal(bool sign)
+ {
+ return sign ? float.MinValue : float.MaxValue;
+ }
+
+ private static double FPUnpackCv(
+ this ushort valueBits,
+ out FPType type,
+ out bool sign,
+ ExecutionContext context)
+ {
+ sign = (~(uint)valueBits & 0x8000u) == 0u;
+
+ uint exp16 = ((uint)valueBits & 0x7C00u) >> 10;
+ uint frac16 = (uint)valueBits & 0x03FFu;
+
+ double real;
+
+ if (exp16 == 0u)
+ {
+ if (frac16 == 0u)
+ {
+ type = FPType.Zero;
+ real = 0d;
+ }
+ else
+ {
+ type = FPType.Nonzero; // Subnormal.
+ real = Math.Pow(2d, -14) * ((double)frac16 * Math.Pow(2d, -10));
+ }
+ }
+ else if (exp16 == 0x1Fu && (context.Fpcr & FPCR.Ahp) == 0)
+ {
+ if (frac16 == 0u)
+ {
+ type = FPType.Infinity;
+ real = Math.Pow(2d, 1000);
+ }
+ else
+ {
+ type = (~frac16 & 0x0200u) == 0u ? FPType.QNaN : FPType.SNaN;
+ real = 0d;
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Normal.
+ real = Math.Pow(2d, (int)exp16 - 15) * (1d + (double)frac16 * Math.Pow(2d, -10));
+ }
+
+ return sign ? -real : real;
+ }
+
+ private static float FPRoundCv(double real, ExecutionContext context)
+ {
+ const int minimumExp = -126;
+
+ const int e = 8;
+ const int f = 23;
+
+ bool sign;
+ double mantissa;
+
+ if (real < 0d)
+ {
+ sign = true;
+ mantissa = -real;
+ }
+ else
+ {
+ sign = false;
+ mantissa = real;
+ }
+
+ int exponent = 0;
+
+ while (mantissa < 1d)
+ {
+ mantissa *= 2d;
+ exponent--;
+ }
+
+ while (mantissa >= 2d)
+ {
+ mantissa /= 2d;
+ exponent++;
+ }
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp)
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ return FPZero(sign);
+ }
+
+ uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+ if (biasedExp == 0u)
+ {
+ mantissa /= Math.Pow(2d, minimumExp - exponent);
+ }
+
+ uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f));
+ double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+ if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+ {
+ FPProcessException(FPException.Underflow, context);
+ }
+
+ bool overflowToInf;
+ bool roundUp;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest:
+ roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+ overflowToInf = true;
+ break;
+
+ case FPRoundingMode.TowardsPlusInfinity:
+ roundUp = (error != 0d && !sign);
+ overflowToInf = !sign;
+ break;
+
+ case FPRoundingMode.TowardsMinusInfinity:
+ roundUp = (error != 0d && sign);
+ overflowToInf = sign;
+ break;
+
+ case FPRoundingMode.TowardsZero:
+ roundUp = false;
+ overflowToInf = false;
+ break;
+ }
+
+ if (roundUp)
+ {
+ intMant++;
+
+ if (intMant == 1u << f)
+ {
+ biasedExp = 1u;
+ }
+
+ if (intMant == 1u << (f + 1))
+ {
+ biasedExp++;
+ intMant >>= 1;
+ }
+ }
+
+ float result;
+
+ if (biasedExp >= (1u << e) - 1u)
+ {
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ FPProcessException(FPException.Overflow, context);
+
+ error = 1d;
+ }
+ else
+ {
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu)));
+ }
+
+ if (error != 0d)
+ {
+ FPProcessException(FPException.Inexact, context);
+ }
+
+ return result;
+ }
+
+ private static float FPConvertNaN(ushort valueBits)
+ {
+ return BitConverter.Int32BitsToSingle(
+ (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13));
+ }
+
+ private static void FPProcessException(FPException exc, ExecutionContext context)
+ {
+ int enable = (int)exc + 8;
+
+ if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+ {
+ throw new NotImplementedException("Floating-point trap handling.");
+ }
+ else
+ {
+ context.Fpsr |= (FPSR)(1 << (int)exc);
+ }
+ }
+ }
+
+ static class SoftFloat32_16
+ {
+ public static ushort FPConvert(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ double real = value.FPUnpackCv(out FPType type, out bool sign, out uint valueBits, context);
+
+ bool altHp = (context.Fpcr & FPCR.Ahp) != 0;
+
+ ushort resultBits;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ if (altHp)
+ {
+ resultBits = FPZero(sign);
+ }
+ else if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ resultBits = FPDefaultNaN();
+ }
+ else
+ {
+ resultBits = FPConvertNaN(valueBits);
+ }
+
+ if (type == FPType.SNaN || altHp)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else if (type == FPType.Infinity)
+ {
+ if (altHp)
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ resultBits = FPInfinity(sign);
+ }
+ }
+ else if (type == FPType.Zero)
+ {
+ resultBits = FPZero(sign);
+ }
+ else
+ {
+ resultBits = FPRoundCv(real, context);
+ }
+
+ return resultBits;
+ }
+
+ private static ushort FPDefaultNaN()
+ {
+ return (ushort)0x7E00u;
+ }
+
+ private static ushort FPInfinity(bool sign)
+ {
+ return sign ? (ushort)0xFC00u : (ushort)0x7C00u;
+ }
+
+ private static ushort FPZero(bool sign)
+ {
+ return sign ? (ushort)0x8000u : (ushort)0x0000u;
+ }
+
+ private static ushort FPMaxNormal(bool sign)
+ {
+ return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu;
+ }
+
+ private static double FPUnpackCv(
+ this float value,
+ out FPType type,
+ out bool sign,
+ out uint valueBits,
+ ExecutionContext context)
+ {
+ valueBits = (uint)BitConverter.SingleToInt32Bits(value);
+
+ sign = (~valueBits & 0x80000000u) == 0u;
+
+ uint exp32 = (valueBits & 0x7F800000u) >> 23;
+ uint frac32 = valueBits & 0x007FFFFFu;
+
+ double real;
+
+ if (exp32 == 0u)
+ {
+ if (frac32 == 0u || (context.Fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ real = 0d;
+
+ if (frac32 != 0u)
+ {
+ FPProcessException(FPException.InputDenorm, context);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Subnormal.
+ real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23));
+ }
+ }
+ else if (exp32 == 0xFFu)
+ {
+ if (frac32 == 0u)
+ {
+ type = FPType.Infinity;
+ real = Math.Pow(2d, 1000);
+ }
+ else
+ {
+ type = (~frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN;
+ real = 0d;
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero; // Normal.
+ real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23));
+ }
+
+ return sign ? -real : real;
+ }
+
+ private static ushort FPRoundCv(double real, ExecutionContext context)
+ {
+ const int minimumExp = -14;
+
+ const int e = 5;
+ const int f = 10;
+
+ bool sign;
+ double mantissa;
+
+ if (real < 0d)
+ {
+ sign = true;
+ mantissa = -real;
+ }
+ else
+ {
+ sign = false;
+ mantissa = real;
+ }
+
+ int exponent = 0;
+
+ while (mantissa < 1d)
+ {
+ mantissa *= 2d;
+ exponent--;
+ }
+
+ while (mantissa >= 2d)
+ {
+ mantissa /= 2d;
+ exponent++;
+ }
+
+ uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+ if (biasedExp == 0u)
+ {
+ mantissa /= Math.Pow(2d, minimumExp - exponent);
+ }
+
+ uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f));
+ double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+ if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+ {
+ FPProcessException(FPException.Underflow, context);
+ }
+
+ bool overflowToInf;
+ bool roundUp;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest:
+ roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+ overflowToInf = true;
+ break;
+
+ case FPRoundingMode.TowardsPlusInfinity:
+ roundUp = (error != 0d && !sign);
+ overflowToInf = !sign;
+ break;
+
+ case FPRoundingMode.TowardsMinusInfinity:
+ roundUp = (error != 0d && sign);
+ overflowToInf = sign;
+ break;
+
+ case FPRoundingMode.TowardsZero:
+ roundUp = false;
+ overflowToInf = false;
+ break;
+ }
+
+ if (roundUp)
+ {
+ intMant++;
+
+ if (intMant == 1u << f)
+ {
+ biasedExp = 1u;
+ }
+
+ if (intMant == 1u << (f + 1))
+ {
+ biasedExp++;
+ intMant >>= 1;
+ }
+ }
+
+ ushort resultBits;
+
+ if ((context.Fpcr & FPCR.Ahp) == 0)
+ {
+ if (biasedExp >= (1u << e) - 1u)
+ {
+ resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ FPProcessException(FPException.Overflow, context);
+
+ error = 1d;
+ }
+ else
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu));
+ }
+ }
+ else
+ {
+ if (biasedExp >= 1u << e)
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+ FPProcessException(FPException.InvalidOp, context);
+
+ error = 0d;
+ }
+ else
+ {
+ resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu));
+ }
+ }
+
+ if (error != 0d)
+ {
+ FPProcessException(FPException.Inexact, context);
+ }
+
+ return resultBits;
+ }
+
+ private static ushort FPConvertNaN(uint valueBits)
+ {
+ return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13);
+ }
+
+ private static void FPProcessException(FPException exc, ExecutionContext context)
+ {
+ int enable = (int)exc + 8;
+
+ if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+ {
+ throw new NotImplementedException("Floating-point trap handling.");
+ }
+ else
+ {
+ context.Fpsr |= (FPSR)(1 << (int)exc);
+ }
+ }
+ }
+
+ static class SoftFloat32
+ {
+ public static float FPAdd(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == !sign2)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((inf1 && !sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 + value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static int FPCompare(float value1, float value2, bool signalNaNs)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context);
+
+ int result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = 0b0011;
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else
+ {
+ if (value1 == value2)
+ {
+ result = 0b0110;
+ }
+ else if (value1 < value2)
+ {
+ result = 0b1000;
+ }
+ else
+ {
+ result = 0b0010;
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPCompareEQ(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 == value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareGE(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 >= value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareGT(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ float result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 > value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareLE(float value1, float value2)
+ {
+ return FPCompareGE(value2, value1);
+ }
+
+ public static float FPCompareLT(float value1, float value2)
+ {
+ return FPCompareGT(value2, value1);
+ }
+
+ public static float FPDiv(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && inf2) || (zero1 && zero2))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (inf1 || zero2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+
+ if (!inf1)
+ {
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ }
+ else if (zero1 || inf2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 / value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMax(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ if (value1 > value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value1;
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMaxNum(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(true);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(true);
+ }
+
+ return FPMax(value1, value2);
+ }
+
+ public static float FPMin(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ if (value1 < value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value1;
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMinNum(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(false);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(false);
+ }
+
+ return FPMin(value1, value2);
+ }
+
+ public static float FPMul(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMulAdd(float valueA, float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context);
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context);
+
+ if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2)))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+
+ if (!done)
+ {
+ bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero;
+
+ bool signP = sign1 ^ sign2;
+ bool infP = inf1 || inf2;
+ bool zeroP = zero1 || zero2;
+
+ if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((infA && !signA) || (infP && !signP))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((infA && signA) || (infP && signP))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zeroA && zeroP && signA == signP)
+ {
+ result = FPZero(signA);
+ }
+ else
+ {
+ // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = valueA + (value1 * value2);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPMulSub(float valueA, float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static float FPMulX(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(sign1 ^ sign2);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPRecipEstimate(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ else if (MathF.Abs(value) < MathF.Pow(2f, -128))
+ {
+ bool overflowToInf;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest: overflowToInf = true; break;
+ case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break;
+ case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break;
+ case FPRoundingMode.TowardsZero: overflowToInf = false; break;
+ }
+
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ FPProcessException(FPException.Overflow, context);
+ FPProcessException(FPException.Inexact, context);
+ }
+ else if ((context.Fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126)))
+ {
+ result = FPZero(sign);
+
+ context.Fpsr |= FPSR.Ufc;
+ }
+ else
+ {
+ ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+ uint exp = (op & 0x7F800000u) >> 23;
+
+ if (exp == 0u)
+ {
+ if ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+ exp -= 1u;
+ }
+ else
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+ }
+
+ uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+ uint resultExp = 253u - exp;
+
+ uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+ fraction = (ulong)(estimate & 0xFFu) << 44;
+
+ if (resultExp == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+ }
+ else if (resultExp + 1u == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+ resultExp = 0u;
+ }
+
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu));
+ }
+
+ return result;
+ }
+
+ public static float FPRecipStepFused(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = 2f + (value1 * value2);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPRecpX(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else
+ {
+ uint notExp = (~op >> 23) & 0xFFu;
+ uint maxExp = 0xFEu;
+
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (notExp == 0xFFu ? maxExp : notExp) << 23));
+ }
+
+ return result;
+ }
+
+ public static float FPRSqrtEstimate(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(false);
+ }
+ else
+ {
+ ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+ uint exp = (op & 0x7F800000u) >> 23;
+
+ if (exp == 0u)
+ {
+ while ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ exp -= 1u;
+ }
+
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+
+ uint scaled;
+
+ if ((exp & 1u) == 0u)
+ {
+ scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+ }
+ else
+ {
+ scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+ }
+
+ uint resultExp = (380u - exp) >> 1;
+
+ uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+ result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15));
+ }
+
+ return result;
+ }
+
+ public static float FPRSqrtStepFused(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPOnePointFive(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = (3f + (value1 * value2)) / 2f;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPSqrt(float value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value = value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+ float result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Infinity && !sign)
+ {
+ result = FPInfinity(sign);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = MathF.Sqrt(value);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+
+ return result;
+ }
+
+ public static float FPSub(float value1, float value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+ float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 - value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0f);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ private static float FPDefaultNaN()
+ {
+ return -float.NaN;
+ }
+
+ private static float FPInfinity(bool sign)
+ {
+ return sign ? float.NegativeInfinity : float.PositiveInfinity;
+ }
+
+ private static float FPZero(bool sign)
+ {
+ return sign ? -0f : +0f;
+ }
+
+ private static float FPMaxNormal(bool sign)
+ {
+ return sign ? float.MinValue : float.MaxValue;
+ }
+
+ private static float FPTwo(bool sign)
+ {
+ return sign ? -2f : +2f;
+ }
+
+ private static float FPOnePointFive(bool sign)
+ {
+ return sign ? -1.5f : +1.5f;
+ }
+
+ private static float FPNeg(this float value)
+ {
+ return -value;
+ }
+
+ private static float ZerosOrOnes(bool ones)
+ {
+ return BitConverter.Int32BitsToSingle(ones ? -1 : 0);
+ }
+
+ private static float FPUnpack(
+ this float value,
+ out FPType type,
+ out bool sign,
+ out uint valueBits,
+ ExecutionContext context)
+ {
+ valueBits = (uint)BitConverter.SingleToInt32Bits(value);
+
+ sign = (~valueBits & 0x80000000u) == 0u;
+
+ if ((valueBits & 0x7F800000u) == 0u)
+ {
+ if ((valueBits & 0x007FFFFFu) == 0u || (context.Fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ value = FPZero(sign);
+
+ if ((valueBits & 0x007FFFFFu) != 0u)
+ {
+ FPProcessException(FPException.InputDenorm, context);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+ }
+ else if ((~valueBits & 0x7F800000u) == 0u)
+ {
+ if ((valueBits & 0x007FFFFFu) == 0u)
+ {
+ type = FPType.Infinity;
+ }
+ else
+ {
+ type = (~valueBits & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN;
+ value = FPZero(sign);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+
+ return value;
+ }
+
+ private static float FPProcessNaNs(
+ FPType type1,
+ FPType type2,
+ uint op1,
+ uint op2,
+ out bool done,
+ ExecutionContext context)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static float FPProcessNaNs3(
+ FPType type1,
+ FPType type2,
+ FPType type3,
+ uint op1,
+ uint op2,
+ uint op3,
+ out bool done,
+ ExecutionContext context)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type3 == FPType.SNaN)
+ {
+ return FPProcessNaN(type3, op3, context);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type3 == FPType.QNaN)
+ {
+ return FPProcessNaN(type3, op3, context);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static float FPProcessNaN(FPType type, uint op, ExecutionContext context)
+ {
+ if (type == FPType.SNaN)
+ {
+ op |= 1u << 22;
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+
+ if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ return FPDefaultNaN();
+ }
+
+ return BitConverter.Int32BitsToSingle((int)op);
+ }
+
+ private static void FPProcessException(FPException exc, ExecutionContext context)
+ {
+ int enable = (int)exc + 8;
+
+ if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+ {
+ throw new NotImplementedException("Floating-point trap handling.");
+ }
+ else
+ {
+ context.Fpsr |= (FPSR)(1 << (int)exc);
+ }
+ }
+ }
+
+ static class SoftFloat64
+ {
+ public static double FPAdd(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == !sign2)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((inf1 && !sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 + value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static int FPCompare(double value1, double value2, bool signalNaNs)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context);
+
+ int result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = 0b0011;
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else
+ {
+ if (value1 == value2)
+ {
+ result = 0b0110;
+ }
+ else if (value1 < value2)
+ {
+ result = 0b1000;
+ }
+ else
+ {
+ result = 0b0010;
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPCompareEQ(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ if (type1 == FPType.SNaN || type2 == FPType.SNaN)
+ {
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 == value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareGE(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 >= value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareGT(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ double result;
+
+ if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 > value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareLE(double value1, double value2)
+ {
+ return FPCompareGE(value2, value1);
+ }
+
+ public static double FPCompareLT(double value1, double value2)
+ {
+ return FPCompareGT(value2, value1);
+ }
+
+ public static double FPDiv(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && inf2) || (zero1 && zero2))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (inf1 || zero2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+
+ if (!inf1)
+ {
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ }
+ else if (zero1 || inf2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 / value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMax(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ if (value1 > value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value1;
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 && sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMaxNum(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(true);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(true);
+ }
+
+ return FPMax(value1, value2);
+ }
+
+ public static double FPMin(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ if (value1 < value2)
+ {
+ if (type1 == FPType.Infinity)
+ {
+ result = FPInfinity(sign1);
+ }
+ else if (type1 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value1;
+ }
+ }
+ else
+ {
+ if (type2 == FPType.Infinity)
+ {
+ result = FPInfinity(sign2);
+ }
+ else if (type2 == FPType.Zero)
+ {
+ result = FPZero(sign1 || sign2);
+ }
+ else
+ {
+ result = value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMinNum(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1.FPUnpack(out FPType type1, out _, out _, context);
+ value2.FPUnpack(out FPType type2, out _, out _, context);
+
+ if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+ {
+ value1 = FPInfinity(false);
+ }
+ else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+ {
+ value2 = FPInfinity(false);
+ }
+
+ return FPMin(value1, value2);
+ }
+
+ public static double FPMul(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMulAdd(double valueA, double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context);
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context);
+
+ if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2)))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+
+ if (!done)
+ {
+ bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero;
+
+ bool signP = sign1 ^ sign2;
+ bool infP = inf1 || inf2;
+ bool zeroP = zero1 || zero2;
+
+ if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP))
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((infA && !signA) || (infP && !signP))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((infA && signA) || (infP && signP))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zeroA && zeroP && signA == signP)
+ {
+ result = FPZero(signA);
+ }
+ else
+ {
+ // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = valueA + (value1 * value2);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPMulSub(double valueA, double value1, double value2)
+ {
+ value1 = value1.FPNeg();
+
+ return FPMulAdd(valueA, value1, value2);
+ }
+
+ public static double FPMulX(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(sign1 ^ sign2);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else if (zero1 || zero2)
+ {
+ result = FPZero(sign1 ^ sign2);
+ }
+ else
+ {
+ result = value1 * value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPRecipEstimate(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ else if (Math.Abs(value) < Math.Pow(2d, -1024))
+ {
+ bool overflowToInf;
+
+ switch (context.Fpcr.GetRoundingMode())
+ {
+ default:
+ case FPRoundingMode.ToNearest: overflowToInf = true; break;
+ case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break;
+ case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break;
+ case FPRoundingMode.TowardsZero: overflowToInf = false; break;
+ }
+
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ FPProcessException(FPException.Overflow, context);
+ FPProcessException(FPException.Inexact, context);
+ }
+ else if ((context.Fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022)))
+ {
+ result = FPZero(sign);
+
+ context.Fpsr |= FPSR.Ufc;
+ }
+ else
+ {
+ ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+ uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+ if (exp == 0u)
+ {
+ if ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+ exp -= 1u;
+ }
+ else
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+ }
+
+ uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+ uint resultExp = 2045u - exp;
+
+ uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+ fraction = (ulong)(estimate & 0xFFu) << 44;
+
+ if (resultExp == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+ }
+ else if (resultExp + 1u == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+ resultExp = 0u;
+ }
+
+ result = BitConverter.Int64BitsToDouble(
+ (long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul)));
+ }
+
+ return result;
+ }
+
+ public static double FPRecipStepFused(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPTwo(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = 2d + (value1 * value2);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPRecpX(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else
+ {
+ ulong notExp = (~op >> 52) & 0x7FFul;
+ ulong maxExp = 0x7FEul;
+
+ result = BitConverter.Int64BitsToDouble(
+ (long)((sign ? 1ul : 0ul) << 63 | (notExp == 0x7FFul ? maxExp : notExp) << 52));
+ }
+
+ return result;
+ }
+
+ public static double FPRSqrtEstimate(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FPException.DivideByZero, context);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if (type == FPType.Infinity)
+ {
+ result = FPZero(false);
+ }
+ else
+ {
+ ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+ uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+ if (exp == 0u)
+ {
+ while ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ exp -= 1u;
+ }
+
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+
+ uint scaled;
+
+ if ((exp & 1u) == 0u)
+ {
+ scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+ }
+ else
+ {
+ scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+ }
+
+ uint resultExp = (3068u - exp) >> 1;
+
+ uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+ result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44));
+ }
+
+ return result;
+ }
+
+ public static double FPRSqrtStepFused(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPNeg();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ result = FPOnePointFive(false);
+ }
+ else if (inf1 || inf2)
+ {
+ result = FPInfinity(sign1 ^ sign2);
+ }
+ else
+ {
+ // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T);
+ // https://github.com/dotnet/corefx/issues/31903
+
+ result = (3d + (value1 * value2)) / 2d;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPSqrt(double value)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+ double result;
+
+ if (type == FPType.SNaN || type == FPType.QNaN)
+ {
+ result = FPProcessNaN(type, op, context);
+ }
+ else if (type == FPType.Zero)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FPType.Infinity && !sign)
+ {
+ result = FPInfinity(sign);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else
+ {
+ result = Math.Sqrt(value);
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+
+ return result;
+ }
+
+ public static double FPSub(double value1, double value2)
+ {
+ ExecutionContext context = NativeInterface.GetContext();
+
+ value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+ value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+ double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+ if (!done)
+ {
+ bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+ bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+ if (inf1 && inf2 && sign1 == sign2)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+ else if ((inf1 && !sign1) || (inf2 && sign2))
+ {
+ result = FPInfinity(false);
+ }
+ else if ((inf1 && sign1) || (inf2 && !sign2))
+ {
+ result = FPInfinity(true);
+ }
+ else if (zero1 && zero2 && sign1 == !sign2)
+ {
+ result = FPZero(sign1);
+ }
+ else
+ {
+ result = value1 - value2;
+
+ if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+ {
+ context.Fpsr |= FPSR.Ufc;
+
+ result = FPZero(result < 0d);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ private static double FPDefaultNaN()
+ {
+ return -double.NaN;
+ }
+
+ private static double FPInfinity(bool sign)
+ {
+ return sign ? double.NegativeInfinity : double.PositiveInfinity;
+ }
+
+ private static double FPZero(bool sign)
+ {
+ return sign ? -0d : +0d;
+ }
+
+ private static double FPMaxNormal(bool sign)
+ {
+ return sign ? double.MinValue : double.MaxValue;
+ }
+
+ private static double FPTwo(bool sign)
+ {
+ return sign ? -2d : +2d;
+ }
+
+ private static double FPOnePointFive(bool sign)
+ {
+ return sign ? -1.5d : +1.5d;
+ }
+
+ private static double FPNeg(this double value)
+ {
+ return -value;
+ }
+
+ private static double ZerosOrOnes(bool ones)
+ {
+ return BitConverter.Int64BitsToDouble(ones ? -1L : 0L);
+ }
+
+ private static double FPUnpack(
+ this double value,
+ out FPType type,
+ out bool sign,
+ out ulong valueBits,
+ ExecutionContext context)
+ {
+ valueBits = (ulong)BitConverter.DoubleToInt64Bits(value);
+
+ sign = (~valueBits & 0x8000000000000000ul) == 0ul;
+
+ if ((valueBits & 0x7FF0000000000000ul) == 0ul)
+ {
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (context.Fpcr & FPCR.Fz) != 0)
+ {
+ type = FPType.Zero;
+ value = FPZero(sign);
+
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul)
+ {
+ FPProcessException(FPException.InputDenorm, context);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+ }
+ else if ((~valueBits & 0x7FF0000000000000ul) == 0ul)
+ {
+ if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul)
+ {
+ type = FPType.Infinity;
+ }
+ else
+ {
+ type = (~valueBits & 0x0008000000000000ul) == 0ul ? FPType.QNaN : FPType.SNaN;
+ value = FPZero(sign);
+ }
+ }
+ else
+ {
+ type = FPType.Nonzero;
+ }
+
+ return value;
+ }
+
+ private static double FPProcessNaNs(
+ FPType type1,
+ FPType type2,
+ ulong op1,
+ ulong op2,
+ out bool done,
+ ExecutionContext context)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static double FPProcessNaNs3(
+ FPType type1,
+ FPType type2,
+ FPType type3,
+ ulong op1,
+ ulong op2,
+ ulong op3,
+ out bool done,
+ ExecutionContext context)
+ {
+ done = true;
+
+ if (type1 == FPType.SNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.SNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type3 == FPType.SNaN)
+ {
+ return FPProcessNaN(type3, op3, context);
+ }
+ else if (type1 == FPType.QNaN)
+ {
+ return FPProcessNaN(type1, op1, context);
+ }
+ else if (type2 == FPType.QNaN)
+ {
+ return FPProcessNaN(type2, op2, context);
+ }
+ else if (type3 == FPType.QNaN)
+ {
+ return FPProcessNaN(type3, op3, context);
+ }
+
+ done = false;
+
+ return FPZero(false);
+ }
+
+ private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context)
+ {
+ if (type == FPType.SNaN)
+ {
+ op |= 1ul << 51;
+
+ FPProcessException(FPException.InvalidOp, context);
+ }
+
+ if ((context.Fpcr & FPCR.Dn) != 0)
+ {
+ return FPDefaultNaN();
+ }
+
+ return BitConverter.Int64BitsToDouble((long)op);
+ }
+
+ private static void FPProcessException(FPException exc, ExecutionContext context)
+ {
+ int enable = (int)exc + 8;
+
+ if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+ {
+ throw new NotImplementedException("Floating-point trap handling.");
+ }
+ else
+ {
+ context.Fpsr |= (FPSR)(1 << (int)exc);
+ }
+ }
+ }
+}
diff --git a/ARMeilleure/IntermediateRepresentation/BasicBlock.cs b/ARMeilleure/IntermediateRepresentation/BasicBlock.cs
new file mode 100644
index 000000000..06839f309
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/BasicBlock.cs
@@ -0,0 +1,83 @@
+using System.Collections.Generic;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ class BasicBlock
+ {
+ public int Index { get; set; }
+
+ public LinkedListNode Node { get; set; }
+
+ public LinkedList Operations { get; }
+
+ private BasicBlock _next;
+ private BasicBlock _branch;
+
+ public BasicBlock Next
+ {
+ get => _next;
+ set => _next = AddSuccessor(_next, value);
+ }
+
+ public BasicBlock Branch
+ {
+ get => _branch;
+ set => _branch = AddSuccessor(_branch, value);
+ }
+
+ public List Predecessors { get; }
+
+ public HashSet DominanceFrontiers { get; }
+
+ public BasicBlock ImmediateDominator { get; set; }
+
+ public BasicBlock()
+ {
+ Operations = new LinkedList();
+
+ Predecessors = new List();
+
+ DominanceFrontiers = new HashSet();
+
+ Index = -1;
+ }
+
+ public BasicBlock(int index) : this()
+ {
+ Index = index;
+ }
+
+ private BasicBlock AddSuccessor(BasicBlock oldBlock, BasicBlock newBlock)
+ {
+ oldBlock?.Predecessors.Remove(this);
+ newBlock?.Predecessors.Add(this);
+
+ return newBlock;
+ }
+
+ public void Append(Node node)
+ {
+ // If the branch block is not null, then the list of operations
+ // should end with a branch instruction. We insert the new operation
+ // before this branch.
+ if (_branch != null || (Operations.Last != null && IsLeafBlock()))
+ {
+ Operations.AddBefore(Operations.Last, node);
+ }
+ else
+ {
+ Operations.AddLast(node);
+ }
+ }
+
+ private bool IsLeafBlock()
+ {
+ return _branch == null && _next == null;
+ }
+
+ public Node GetLastOp()
+ {
+ return Operations.Last?.Value;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs
new file mode 100644
index 000000000..4c4ecb8f2
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs
@@ -0,0 +1,79 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum Instruction
+ {
+ Add,
+ BitwiseAnd,
+ BitwiseExclusiveOr,
+ BitwiseNot,
+ BitwiseOr,
+ Branch,
+ BranchIfFalse,
+ BranchIfTrue,
+ ByteSwap,
+ Call,
+ CompareAndSwap128,
+ CompareEqual,
+ CompareGreater,
+ CompareGreaterOrEqual,
+ CompareGreaterOrEqualUI,
+ CompareGreaterUI,
+ CompareLess,
+ CompareLessOrEqual,
+ CompareLessOrEqualUI,
+ CompareLessUI,
+ CompareNotEqual,
+ ConditionalSelect,
+ ConvertI64ToI32,
+ ConvertToFP,
+ ConvertToFPUI,
+ Copy,
+ CountLeadingZeros,
+ Divide,
+ DivideUI,
+ Load,
+ Load16,
+ Load8,
+ LoadArgument,
+ Multiply,
+ Multiply64HighSI,
+ Multiply64HighUI,
+ Negate,
+ Return,
+ RotateRight,
+ ShiftLeft,
+ ShiftRightSI,
+ ShiftRightUI,
+ SignExtend16,
+ SignExtend32,
+ SignExtend8,
+ StackAlloc,
+ Store,
+ Store16,
+ Store8,
+ Subtract,
+ VectorCreateScalar,
+ VectorExtract,
+ VectorExtract16,
+ VectorExtract8,
+ VectorInsert,
+ VectorInsert16,
+ VectorInsert8,
+ VectorOne,
+ VectorZero,
+ VectorZeroUpper64,
+ VectorZeroUpper96,
+ ZeroExtend16,
+ ZeroExtend32,
+ ZeroExtend8,
+
+ Clobber,
+ CpuId,
+ Extended,
+ Fill,
+ LoadFromContext,
+ Spill,
+ SpillArg,
+ StoreToContext
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
new file mode 100644
index 000000000..1fe29e855
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
@@ -0,0 +1,138 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum Intrinsic
+ {
+ X86Addpd,
+ X86Addps,
+ X86Addsd,
+ X86Addss,
+ X86Andnpd,
+ X86Andnps,
+ X86Cmppd,
+ X86Cmpps,
+ X86Cmpsd,
+ X86Cmpss,
+ X86Comisdeq,
+ X86Comisdge,
+ X86Comisdlt,
+ X86Comisseq,
+ X86Comissge,
+ X86Comisslt,
+ X86Cvtdq2pd,
+ X86Cvtdq2ps,
+ X86Cvtpd2dq,
+ X86Cvtpd2ps,
+ X86Cvtps2dq,
+ X86Cvtps2pd,
+ X86Cvtsd2si,
+ X86Cvtsd2ss,
+ X86Cvtss2sd,
+ X86Divpd,
+ X86Divps,
+ X86Divsd,
+ X86Divss,
+ X86Haddpd,
+ X86Haddps,
+ X86Maxpd,
+ X86Maxps,
+ X86Maxsd,
+ X86Maxss,
+ X86Minpd,
+ X86Minps,
+ X86Minsd,
+ X86Minss,
+ X86Movhlps,
+ X86Movlhps,
+ X86Mulpd,
+ X86Mulps,
+ X86Mulsd,
+ X86Mulss,
+ X86Paddb,
+ X86Paddd,
+ X86Paddq,
+ X86Paddw,
+ X86Pand,
+ X86Pandn,
+ X86Pavgb,
+ X86Pavgw,
+ X86Pblendvb,
+ X86Pcmpeqb,
+ X86Pcmpeqd,
+ X86Pcmpeqq,
+ X86Pcmpeqw,
+ X86Pcmpgtb,
+ X86Pcmpgtd,
+ X86Pcmpgtq,
+ X86Pcmpgtw,
+ X86Pmaxsb,
+ X86Pmaxsd,
+ X86Pmaxsw,
+ X86Pmaxub,
+ X86Pmaxud,
+ X86Pmaxuw,
+ X86Pminsb,
+ X86Pminsd,
+ X86Pminsw,
+ X86Pminub,
+ X86Pminud,
+ X86Pminuw,
+ X86Pmovsxbw,
+ X86Pmovsxdq,
+ X86Pmovsxwd,
+ X86Pmovzxbw,
+ X86Pmovzxdq,
+ X86Pmovzxwd,
+ X86Pmulld,
+ X86Pmullw,
+ X86Popcnt,
+ X86Por,
+ X86Pshufb,
+ X86Pslld,
+ X86Pslldq,
+ X86Psllq,
+ X86Psllw,
+ X86Psrad,
+ X86Psraw,
+ X86Psrld,
+ X86Psrlq,
+ X86Psrldq,
+ X86Psrlw,
+ X86Psubb,
+ X86Psubd,
+ X86Psubq,
+ X86Psubw,
+ X86Punpckhbw,
+ X86Punpckhdq,
+ X86Punpckhqdq,
+ X86Punpckhwd,
+ X86Punpcklbw,
+ X86Punpckldq,
+ X86Punpcklqdq,
+ X86Punpcklwd,
+ X86Pxor,
+ X86Rcpps,
+ X86Rcpss,
+ X86Roundpd,
+ X86Roundps,
+ X86Roundsd,
+ X86Roundss,
+ X86Rsqrtps,
+ X86Rsqrtss,
+ X86Shufpd,
+ X86Shufps,
+ X86Sqrtpd,
+ X86Sqrtps,
+ X86Sqrtsd,
+ X86Sqrtss,
+ X86Subpd,
+ X86Subps,
+ X86Subsd,
+ X86Subss,
+ X86Unpckhpd,
+ X86Unpckhps,
+ X86Unpcklpd,
+ X86Unpcklps,
+ X86Xorpd,
+ X86Xorps
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/IntrinsicOperation.cs b/ARMeilleure/IntermediateRepresentation/IntrinsicOperation.cs
new file mode 100644
index 000000000..34781b700
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/IntrinsicOperation.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ class IntrinsicOperation : Operation
+ {
+ public Intrinsic Intrinsic { get; }
+
+ public IntrinsicOperation(Intrinsic intrin, Operand dest, params Operand[] sources) : base(Instruction.Extended, dest, sources)
+ {
+ Intrinsic = intrin;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs b/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs
new file mode 100644
index 000000000..742842fa7
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs
@@ -0,0 +1,25 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ class MemoryOperand : Operand
+ {
+ public Operand BaseAddress { get; set; }
+ public Operand Index { get; set; }
+
+ public Multiplier Scale { get; }
+
+ public int Displacement { get; }
+
+ public MemoryOperand(
+ OperandType type,
+ Operand baseAddress,
+ Operand index = null,
+ Multiplier scale = Multiplier.x1,
+ int displacement = 0) : base(OperandKind.Memory, type)
+ {
+ BaseAddress = baseAddress;
+ Index = index;
+ Scale = scale;
+ Displacement = displacement;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Multiplier.cs b/ARMeilleure/IntermediateRepresentation/Multiplier.cs
new file mode 100644
index 000000000..23582072b
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Multiplier.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum Multiplier
+ {
+ x1 = 0,
+ x2 = 1,
+ x4 = 2,
+ x8 = 3
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Node.cs b/ARMeilleure/IntermediateRepresentation/Node.cs
new file mode 100644
index 000000000..167acd072
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Node.cs
@@ -0,0 +1,163 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ class Node
+ {
+ public Operand Destination
+ {
+ get
+ {
+ return _destinations.Length != 0 ? GetDestination(0) : null;
+ }
+ set
+ {
+ if (value != null)
+ {
+ SetDestinations(new Operand[] { value });
+ }
+ else
+ {
+ SetDestinations(new Operand[0]);
+ }
+ }
+ }
+
+ private Operand[] _destinations;
+ private Operand[] _sources;
+
+ private LinkedListNode[] _asgUseNodes;
+ private LinkedListNode[] _srcUseNodes;
+
+ public int DestinationsCount => _destinations.Length;
+ public int SourcesCount => _sources.Length;
+
+ public Node(Operand destination, int sourcesCount)
+ {
+ Destination = destination;
+
+ _sources = new Operand[sourcesCount];
+
+ _srcUseNodes = new LinkedListNode[sourcesCount];
+ }
+
+ public Node(Operand[] destinations, int sourcesCount)
+ {
+ SetDestinations(destinations ?? throw new ArgumentNullException(nameof(destinations)));
+
+ _sources = new Operand[sourcesCount];
+
+ _srcUseNodes = new LinkedListNode[sourcesCount];
+ }
+
+ public Operand GetDestination(int index)
+ {
+ return _destinations[index];
+ }
+
+ public Operand GetSource(int index)
+ {
+ return _sources[index];
+ }
+
+ public void SetDestination(int index, Operand destination)
+ {
+ Operand oldOp = _destinations[index];
+
+ if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
+ {
+ oldOp.Assignments.Remove(_asgUseNodes[index]);
+ }
+
+ if (destination != null && destination.Kind == OperandKind.LocalVariable)
+ {
+ _asgUseNodes[index] = destination.Assignments.AddLast(this);
+ }
+
+ _destinations[index] = destination;
+ }
+
+ public void SetSource(int index, Operand source)
+ {
+ Operand oldOp = _sources[index];
+
+ if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
+ {
+ oldOp.Uses.Remove(_srcUseNodes[index]);
+ }
+
+ if (source != null && source.Kind == OperandKind.LocalVariable)
+ {
+ _srcUseNodes[index] = source.Uses.AddLast(this);
+ }
+
+ _sources[index] = source;
+ }
+
+ public void SetDestinations(Operand[] destinations)
+ {
+ if (_destinations != null)
+ {
+ for (int index = 0; index < _destinations.Length; index++)
+ {
+ Operand oldOp = _destinations[index];
+
+ if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
+ {
+ oldOp.Assignments.Remove(_asgUseNodes[index]);
+ }
+ }
+
+ _destinations = destinations;
+ }
+ else
+ {
+ _destinations = new Operand[destinations.Length];
+ }
+
+ _asgUseNodes = new LinkedListNode[destinations.Length];
+
+ for (int index = 0; index < destinations.Length; index++)
+ {
+ Operand newOp = destinations[index];
+
+ _destinations[index] = newOp;
+
+ if (newOp.Kind == OperandKind.LocalVariable)
+ {
+ _asgUseNodes[index] = newOp.Assignments.AddLast(this);
+ }
+ }
+ }
+
+ public void SetSources(Operand[] sources)
+ {
+ for (int index = 0; index < _sources.Length; index++)
+ {
+ Operand oldOp = _sources[index];
+
+ if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
+ {
+ oldOp.Uses.Remove(_srcUseNodes[index]);
+ }
+ }
+
+ _sources = new Operand[sources.Length];
+
+ _srcUseNodes = new LinkedListNode[sources.Length];
+
+ for (int index = 0; index < sources.Length; index++)
+ {
+ Operand newOp = sources[index];
+
+ _sources[index] = newOp;
+
+ if (newOp.Kind == OperandKind.LocalVariable)
+ {
+ _srcUseNodes[index] = newOp.Uses.AddLast(this);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Operand.cs b/ARMeilleure/IntermediateRepresentation/Operand.cs
new file mode 100644
index 000000000..2df6256fc
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Operand.cs
@@ -0,0 +1,124 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ class Operand
+ {
+ public OperandKind Kind { get; }
+
+ public OperandType Type { get; }
+
+ public ulong Value { get; private set; }
+
+ public LinkedList Assignments { get; }
+ public LinkedList Uses { get; }
+
+ private Operand()
+ {
+ Assignments = new LinkedList();
+ Uses = new LinkedList();
+ }
+
+ public Operand(OperandKind kind, OperandType type = OperandType.None) : this()
+ {
+ Kind = kind;
+ Type = type;
+ }
+
+ public Operand(int value) : this(OperandKind.Constant, OperandType.I32)
+ {
+ Value = (uint)value;
+ }
+
+ public Operand(uint value) : this(OperandKind.Constant, OperandType.I32)
+ {
+ Value = (uint)value;
+ }
+
+ public Operand(long value) : this(OperandKind.Constant, OperandType.I64)
+ {
+ Value = (ulong)value;
+ }
+
+ public Operand(ulong value) : this(OperandKind.Constant, OperandType.I64)
+ {
+ Value = value;
+ }
+
+ public Operand(float value) : this(OperandKind.Constant, OperandType.FP32)
+ {
+ Value = (ulong)BitConverter.SingleToInt32Bits(value);
+ }
+
+ public Operand(double value) : this(OperandKind.Constant, OperandType.FP64)
+ {
+ Value = (ulong)BitConverter.DoubleToInt64Bits(value);
+ }
+
+ public Operand(int index, RegisterType regType, OperandType type) : this()
+ {
+ Kind = OperandKind.Register;
+ Type = type;
+
+ Value = (ulong)((int)regType << 24 | index);
+ }
+
+ public Register GetRegister()
+ {
+ return new Register((int)Value & 0xffffff, (RegisterType)(Value >> 24));
+ }
+
+ public byte AsByte()
+ {
+ return (byte)Value;
+ }
+
+ public short AsInt16()
+ {
+ return (short)Value;
+ }
+
+ public int AsInt32()
+ {
+ return (int)Value;
+ }
+
+ public long AsInt64()
+ {
+ return (long)Value;
+ }
+
+ public float AsFloat()
+ {
+ return BitConverter.Int32BitsToSingle((int)Value);
+ }
+
+ public double AsDouble()
+ {
+ return BitConverter.Int64BitsToDouble((long)Value);
+ }
+
+ internal void NumberLocal(int number)
+ {
+ if (Kind != OperandKind.LocalVariable)
+ {
+ throw new InvalidOperationException("The operand is not a local variable.");
+ }
+
+ Value = (ulong)number;
+ }
+
+ public override int GetHashCode()
+ {
+ if (Kind == OperandKind.LocalVariable)
+ {
+ return base.GetHashCode();
+ }
+ else
+ {
+ return (int)Value ^ ((int)Kind << 16) ^ ((int)Type << 20);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs
new file mode 100644
index 000000000..4a930e03f
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs
@@ -0,0 +1,68 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ static class OperandHelper
+ {
+ public static Operand Const(OperandType type, long value)
+ {
+ return type == OperandType.I32 ? new Operand((int)value) : new Operand(value);
+ }
+
+ public static Operand Const(bool value)
+ {
+ return new Operand(value ? 1 : 0);
+ }
+
+ public static Operand Const(int value)
+ {
+ return new Operand(value);
+ }
+
+ public static Operand Const(uint value)
+ {
+ return new Operand(value);
+ }
+
+ public static Operand Const(long value)
+ {
+ return new Operand(value);
+ }
+
+ public static Operand Const(ulong value)
+ {
+ return new Operand(value);
+ }
+
+ public static Operand ConstF(float value)
+ {
+ return new Operand(value);
+ }
+
+ public static Operand ConstF(double value)
+ {
+ return new Operand(value);
+ }
+
+ public static Operand Label()
+ {
+ return new Operand(OperandKind.Label);
+ }
+
+ public static Operand Local(OperandType type)
+ {
+ return new Operand(OperandKind.LocalVariable, type);
+ }
+
+ public static Operand Register(int index, RegisterType regType, OperandType type)
+ {
+ return new Operand(index, regType, type);
+ }
+
+ public static Operand Undef()
+ {
+ return new Operand(OperandKind.Undefined);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/OperandKind.cs b/ARMeilleure/IntermediateRepresentation/OperandKind.cs
new file mode 100644
index 000000000..576183534
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/OperandKind.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum OperandKind
+ {
+ Constant,
+ Label,
+ LocalVariable,
+ Memory,
+ Register,
+ Undefined
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/OperandType.cs b/ARMeilleure/IntermediateRepresentation/OperandType.cs
new file mode 100644
index 000000000..bfdf5130c
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/OperandType.cs
@@ -0,0 +1,51 @@
+using System;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum OperandType
+ {
+ None,
+ I32,
+ I64,
+ FP32,
+ FP64,
+ V128
+ }
+
+ static class OperandTypeExtensions
+ {
+ public static bool IsInteger(this OperandType type)
+ {
+ return type == OperandType.I32 ||
+ type == OperandType.I64;
+ }
+
+ public static RegisterType ToRegisterType(this OperandType type)
+ {
+ switch (type)
+ {
+ case OperandType.FP32: return RegisterType.Vector;
+ case OperandType.FP64: return RegisterType.Vector;
+ case OperandType.I32: return RegisterType.Integer;
+ case OperandType.I64: return RegisterType.Integer;
+ case OperandType.V128: return RegisterType.Vector;
+ }
+
+ throw new InvalidOperationException($"Invalid operand type \"{type}\".");
+ }
+
+ public static int GetSizeInBytes(this OperandType type)
+ {
+ switch (type)
+ {
+ case OperandType.FP32: return 4;
+ case OperandType.FP64: return 8;
+ case OperandType.I32: return 4;
+ case OperandType.I64: return 8;
+ case OperandType.V128: return 16;
+ }
+
+ throw new InvalidOperationException($"Invalid operand type \"{type}\".");
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Operation.cs b/ARMeilleure/IntermediateRepresentation/Operation.cs
new file mode 100644
index 000000000..620bf3f6e
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Operation.cs
@@ -0,0 +1,40 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ class Operation : Node
+ {
+ public Instruction Instruction { get; private set; }
+
+ public Operation(
+ Instruction instruction,
+ Operand destination,
+ params Operand[] sources) : base(destination, sources.Length)
+ {
+ Instruction = instruction;
+
+ for (int index = 0; index < sources.Length; index++)
+ {
+ SetSource(index, sources[index]);
+ }
+ }
+
+ public Operation(
+ Instruction instruction,
+ Operand[] destinations,
+ Operand[] sources) : base(destinations, sources.Length)
+ {
+ Instruction = instruction;
+
+ for (int index = 0; index < sources.Length; index++)
+ {
+ SetSource(index, sources[index]);
+ }
+ }
+
+ public void TurnIntoCopy(Operand source)
+ {
+ Instruction = Instruction.Copy;
+
+ SetSources(new Operand[] { source });
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/PhiNode.cs b/ARMeilleure/IntermediateRepresentation/PhiNode.cs
new file mode 100644
index 000000000..30fc4d384
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/PhiNode.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ class PhiNode : Node
+ {
+ private BasicBlock[] _blocks;
+
+ public PhiNode(Operand destination, int predecessorsCount) : base(destination, predecessorsCount)
+ {
+ _blocks = new BasicBlock[predecessorsCount];
+ }
+
+ public BasicBlock GetBlock(int index)
+ {
+ return _blocks[index];
+ }
+
+ public void SetBlock(int index, BasicBlock block)
+ {
+ _blocks[index] = block;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Register.cs b/ARMeilleure/IntermediateRepresentation/Register.cs
new file mode 100644
index 000000000..745b31538
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Register.cs
@@ -0,0 +1,43 @@
+using System;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+ struct Register : IEquatable
+ {
+ public int Index { get; }
+
+ public RegisterType Type { get; }
+
+ public Register(int index, RegisterType type)
+ {
+ Index = index;
+ Type = type;
+ }
+
+ public override int GetHashCode()
+ {
+ return (ushort)Index | ((int)Type << 16);
+ }
+
+ public static bool operator ==(Register x, Register y)
+ {
+ return x.Equals(y);
+ }
+
+ public static bool operator !=(Register x, Register y)
+ {
+ return !x.Equals(y);
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is Register reg && Equals(reg);
+ }
+
+ public bool Equals(Register other)
+ {
+ return other.Index == Index &&
+ other.Type == Type;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/RegisterType.cs b/ARMeilleure/IntermediateRepresentation/RegisterType.cs
new file mode 100644
index 000000000..e71795cb9
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/RegisterType.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+ enum RegisterType
+ {
+ Integer,
+ Vector,
+ Flag
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/IMemory.cs b/ARMeilleure/Memory/IMemory.cs
new file mode 100644
index 000000000..0c3849c07
--- /dev/null
+++ b/ARMeilleure/Memory/IMemory.cs
@@ -0,0 +1,37 @@
+namespace ARMeilleure.Memory
+{
+ public interface IMemory
+ {
+ sbyte ReadSByte(long position);
+
+ short ReadInt16(long position);
+
+ int ReadInt32(long position);
+
+ long ReadInt64(long position);
+
+ byte ReadByte(long position);
+
+ ushort ReadUInt16(long position);
+
+ uint ReadUInt32(long position);
+
+ ulong ReadUInt64(long position);
+
+ void WriteSByte(long position, sbyte value);
+
+ void WriteInt16(long position, short value);
+
+ void WriteInt32(long position, int value);
+
+ void WriteInt64(long position, long value);
+
+ void WriteByte(long position, byte value);
+
+ void WriteUInt16(long position, ushort value);
+
+ void WriteUInt32(long position, uint value);
+
+ void WriteUInt64(long position, ulong value);
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/IMemoryManager.cs b/ARMeilleure/Memory/IMemoryManager.cs
new file mode 100644
index 000000000..bcee5db23
--- /dev/null
+++ b/ARMeilleure/Memory/IMemoryManager.cs
@@ -0,0 +1,40 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Memory
+{
+ public interface IMemoryManager : IMemory, IDisposable
+ {
+ void Map(long va, long pa, long size);
+
+ void Unmap(long position, long size);
+
+ bool IsMapped(long position);
+
+ long GetPhysicalAddress(long virtualAddress);
+
+ bool IsRegionModified(long position, long size);
+
+ bool TryGetHostAddress(long position, long size, out IntPtr ptr);
+
+ bool IsValidPosition(long position);
+
+ bool AtomicCompareExchangeInt32(long position, int expected, int desired);
+
+ int AtomicIncrementInt32(long position);
+
+ int AtomicDecrementInt32(long position);
+
+ byte[] ReadBytes(long position, long size);
+
+ void ReadBytes(long position, byte[] data, int startIndex, int size);
+
+ void WriteVector128(long position, V128 value);
+
+ void WriteBytes(long position, byte[] data);
+
+ void WriteBytes(long position, byte[] data, int startIndex, int size);
+
+ void CopyBytes(long src, long dst, long size);
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryHelper.cs b/ARMeilleure/Memory/MemoryHelper.cs
new file mode 100644
index 000000000..71ddac238
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryHelper.cs
@@ -0,0 +1,71 @@
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace ARMeilleure.Memory
+{
+ public static class MemoryHelper
+ {
+ public static void FillWithZeros(IMemoryManager memory, long position, int size)
+ {
+ int size8 = size & ~(8 - 1);
+
+ for (int offs = 0; offs < size8; offs += 8)
+ {
+ memory.WriteInt64(position + offs, 0);
+ }
+
+ for (int offs = size8; offs < (size - size8); offs++)
+ {
+ memory.WriteByte(position + offs, 0);
+ }
+ }
+
+ public unsafe static T Read(IMemoryManager memory, long position) where T : struct
+ {
+ long size = Marshal.SizeOf();
+
+ byte[] data = memory.ReadBytes(position, size);
+
+ fixed (byte* ptr = data)
+ {
+ return Marshal.PtrToStructure((IntPtr)ptr);
+ }
+ }
+
+ public unsafe static void Write(IMemoryManager memory, long position, T value) where T : struct
+ {
+ long size = Marshal.SizeOf();
+
+ byte[] data = new byte[size];
+
+ fixed (byte* ptr = data)
+ {
+ Marshal.StructureToPtr(value, (IntPtr)ptr, false);
+ }
+
+ memory.WriteBytes(position, data);
+ }
+
+ public static string ReadAsciiString(IMemoryManager memory, long position, long maxSize = -1)
+ {
+ using (MemoryStream ms = new MemoryStream())
+ {
+ for (long offs = 0; offs < maxSize || maxSize == -1; offs++)
+ {
+ byte value = (byte)memory.ReadByte(position + offs);
+
+ if (value == 0)
+ {
+ break;
+ }
+
+ ms.WriteByte(value);
+ }
+
+ return Encoding.ASCII.GetString(ms.ToArray());
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryManagement.cs b/ARMeilleure/Memory/MemoryManagement.cs
new file mode 100644
index 000000000..bf0bd02ce
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryManagement.cs
@@ -0,0 +1,114 @@
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Memory
+{
+ public static class MemoryManagement
+ {
+ public static bool HasWriteWatchSupport => RuntimeInformation.IsOSPlatform(OSPlatform.Windows);
+
+ public static IntPtr Allocate(ulong size)
+ {
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ IntPtr sizeNint = new IntPtr((long)size);
+
+ return MemoryManagementWindows.Allocate(sizeNint);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+ RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ return MemoryManagementUnix.Allocate(size);
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+ }
+
+ public static IntPtr AllocateWriteTracked(ulong size)
+ {
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ IntPtr sizeNint = new IntPtr((long)size);
+
+ return MemoryManagementWindows.AllocateWriteTracked(sizeNint);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+ RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ return MemoryManagementUnix.Allocate(size);
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+ }
+
+ public static void Reprotect(IntPtr address, ulong size, MemoryProtection permission)
+ {
+ bool result;
+
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ IntPtr sizeNint = new IntPtr((long)size);
+
+ result = MemoryManagementWindows.Reprotect(address, sizeNint, permission);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+ RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ result = MemoryManagementUnix.Reprotect(address, size, permission);
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ if (!result)
+ {
+ throw new MemoryProtectionException(permission);
+ }
+ }
+
+ public static bool Free(IntPtr address)
+ {
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ return MemoryManagementWindows.Free(address);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+ RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ return MemoryManagementUnix.Free(address);
+ }
+ else
+ {
+ throw new PlatformNotSupportedException();
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool GetModifiedPages(
+ IntPtr address,
+ IntPtr size,
+ IntPtr[] addresses,
+ out ulong count)
+ {
+ // This is only supported on windows, but returning
+ // false (failed) is also valid for platforms without
+ // write tracking support on the OS.
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ return MemoryManagementWindows.GetModifiedPages(address, size, addresses, out count);
+ }
+ else
+ {
+ count = 0;
+
+ return false;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryManagementUnix.cs b/ARMeilleure/Memory/MemoryManagementUnix.cs
new file mode 100644
index 000000000..3331fb428
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryManagementUnix.cs
@@ -0,0 +1,71 @@
+using Mono.Unix.Native;
+using System;
+
+namespace ARMeilleure.Memory
+{
+ static class MemoryManagementUnix
+ {
+ public static IntPtr Allocate(ulong size)
+ {
+ ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
+
+ const MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE;
+
+ const MmapFlags flags = MmapFlags.MAP_PRIVATE | MmapFlags.MAP_ANONYMOUS;
+
+ IntPtr ptr = Syscall.mmap(IntPtr.Zero, size + pageSize, prot, flags, -1, 0);
+
+ if (ptr == IntPtr.Zero)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ unsafe
+ {
+ ptr = new IntPtr(ptr.ToInt64() + (long)pageSize);
+
+ *((ulong*)ptr - 1) = size;
+ }
+
+ return ptr;
+ }
+
+ public static bool Reprotect(IntPtr address, ulong size, Memory.MemoryProtection protection)
+ {
+ MmapProts prot = GetProtection(protection);
+
+ return Syscall.mprotect(address, size, prot) == 0;
+ }
+
+ private static MmapProts GetProtection(Memory.MemoryProtection protection)
+ {
+ switch (protection)
+ {
+ case Memory.MemoryProtection.None: return MmapProts.PROT_NONE;
+ case Memory.MemoryProtection.Read: return MmapProts.PROT_READ;
+ case Memory.MemoryProtection.ReadAndWrite: return MmapProts.PROT_READ | MmapProts.PROT_WRITE;
+ case Memory.MemoryProtection.ReadAndExecute: return MmapProts.PROT_READ | MmapProts.PROT_EXEC;
+ case Memory.MemoryProtection.ReadWriteExecute: return MmapProts.PROT_READ | MmapProts.PROT_WRITE | MmapProts.PROT_EXEC;
+ case Memory.MemoryProtection.Execute: return MmapProts.PROT_EXEC;
+
+ default: throw new ArgumentException($"Invalid permission \"{protection}\".");
+ }
+ }
+
+ public static bool Free(IntPtr address)
+ {
+ ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
+
+ ulong size;
+
+ unsafe
+ {
+ size = *((ulong*)address - 1);
+
+ address = new IntPtr(address.ToInt64() - (long)pageSize);
+ }
+
+ return Syscall.munmap(address, size + pageSize) == 0;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryManagementWindows.cs b/ARMeilleure/Memory/MemoryManagementWindows.cs
new file mode 100644
index 000000000..c1a84c95b
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryManagementWindows.cs
@@ -0,0 +1,156 @@
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Memory
+{
+ static class MemoryManagementWindows
+ {
+ [Flags]
+ private enum AllocationType : uint
+ {
+ Commit = 0x1000,
+ Reserve = 0x2000,
+ Decommit = 0x4000,
+ Release = 0x8000,
+ Reset = 0x80000,
+ Physical = 0x400000,
+ TopDown = 0x100000,
+ WriteWatch = 0x200000,
+ LargePages = 0x20000000
+ }
+
+ [Flags]
+ private enum MemoryProtection : uint
+ {
+ NoAccess = 0x01,
+ ReadOnly = 0x02,
+ ReadWrite = 0x04,
+ WriteCopy = 0x08,
+ Execute = 0x10,
+ ExecuteRead = 0x20,
+ ExecuteReadWrite = 0x40,
+ ExecuteWriteCopy = 0x80,
+ GuardModifierflag = 0x100,
+ NoCacheModifierflag = 0x200,
+ WriteCombineModifierflag = 0x400
+ }
+
+ private enum WriteWatchFlags : uint
+ {
+ None = 0,
+ Reset = 1
+ }
+
+ [DllImport("kernel32.dll")]
+ private static extern IntPtr VirtualAlloc(
+ IntPtr lpAddress,
+ IntPtr dwSize,
+ AllocationType flAllocationType,
+ MemoryProtection flProtect);
+
+ [DllImport("kernel32.dll")]
+ private static extern bool VirtualProtect(
+ IntPtr lpAddress,
+ IntPtr dwSize,
+ MemoryProtection flNewProtect,
+ out MemoryProtection lpflOldProtect);
+
+ [DllImport("kernel32.dll")]
+ private static extern bool VirtualFree(
+ IntPtr lpAddress,
+ IntPtr dwSize,
+ AllocationType dwFreeType);
+
+ [DllImport("kernel32.dll")]
+ private static extern int GetWriteWatch(
+ WriteWatchFlags dwFlags,
+ IntPtr lpBaseAddress,
+ IntPtr dwRegionSize,
+ IntPtr[] lpAddresses,
+ ref ulong lpdwCount,
+ out uint lpdwGranularity);
+
+ public static IntPtr Allocate(IntPtr size)
+ {
+ const AllocationType flags =
+ AllocationType.Reserve |
+ AllocationType.Commit;
+
+ IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
+
+ if (ptr == IntPtr.Zero)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ return ptr;
+ }
+
+ public static IntPtr AllocateWriteTracked(IntPtr size)
+ {
+ const AllocationType flags =
+ AllocationType.Reserve |
+ AllocationType.Commit |
+ AllocationType.WriteWatch;
+
+ IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
+
+ if (ptr == IntPtr.Zero)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ return ptr;
+ }
+
+ public static bool Reprotect(IntPtr address, IntPtr size, Memory.MemoryProtection protection)
+ {
+ MemoryProtection prot = GetProtection(protection);
+
+ return VirtualProtect(address, size, prot, out _);
+ }
+
+ private static MemoryProtection GetProtection(Memory.MemoryProtection protection)
+ {
+ switch (protection)
+ {
+ case Memory.MemoryProtection.None: return MemoryProtection.NoAccess;
+ case Memory.MemoryProtection.Read: return MemoryProtection.ReadOnly;
+ case Memory.MemoryProtection.ReadAndWrite: return MemoryProtection.ReadWrite;
+ case Memory.MemoryProtection.ReadAndExecute: return MemoryProtection.ExecuteRead;
+ case Memory.MemoryProtection.ReadWriteExecute: return MemoryProtection.ExecuteReadWrite;
+ case Memory.MemoryProtection.Execute: return MemoryProtection.Execute;
+
+ default: throw new ArgumentException($"Invalid permission \"{protection}\".");
+ }
+ }
+
+ public static bool Free(IntPtr address)
+ {
+ return VirtualFree(address, IntPtr.Zero, AllocationType.Release);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool GetModifiedPages(
+ IntPtr address,
+ IntPtr size,
+ IntPtr[] addresses,
+ out ulong count)
+ {
+ ulong pagesCount = (ulong)addresses.Length;
+
+ int result = GetWriteWatch(
+ WriteWatchFlags.Reset,
+ address,
+ size,
+ addresses,
+ ref pagesCount,
+ out uint granularity);
+
+ count = pagesCount;
+
+ return result == 0;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryManager.cs b/ARMeilleure/Memory/MemoryManager.cs
new file mode 100644
index 000000000..12c118437
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryManager.cs
@@ -0,0 +1,835 @@
+using ARMeilleure.State;
+using System;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+using static ARMeilleure.Memory.MemoryManagement;
+
+namespace ARMeilleure.Memory
+{
+ public unsafe class MemoryManager : IMemoryManager
+ {
+ public const int PageBits = 12;
+ public const int PageSize = 1 << PageBits;
+ public const int PageMask = PageSize - 1;
+
+ private const long PteFlagNotModified = 1;
+
+ internal const long PteFlagsMask = 7;
+
+ public IntPtr Ram { get; private set; }
+
+ private byte* _ramPtr;
+
+ private IntPtr _pageTable;
+
+ internal IntPtr PageTable => _pageTable;
+
+ internal int PtLevelBits { get; }
+ internal int PtLevelSize { get; }
+ internal int PtLevelMask { get; }
+
+ public bool HasWriteWatchSupport => MemoryManagement.HasWriteWatchSupport;
+
+ public int AddressSpaceBits { get; }
+ public long AddressSpaceSize { get; }
+
+ public MemoryManager(
+ IntPtr ram,
+ int addressSpaceBits = 48,
+ bool useFlatPageTable = false)
+ {
+ Ram = ram;
+
+ _ramPtr = (byte*)ram;
+
+ AddressSpaceBits = addressSpaceBits;
+ AddressSpaceSize = 1L << addressSpaceBits;
+
+ // When flat page table is requested, we use a single
+ // array for the mappings of the entire address space.
+ // This has better performance, but also high memory usage.
+ // The multi level page table uses 9 bits per level, so
+ // the memory usage is lower, but the performance is also
+ // lower, since each address translation requires multiple reads.
+ if (useFlatPageTable)
+ {
+ PtLevelBits = addressSpaceBits - PageBits;
+ }
+ else
+ {
+ PtLevelBits = 9;
+ }
+
+ PtLevelSize = 1 << PtLevelBits;
+ PtLevelMask = PtLevelSize - 1;
+
+ _pageTable = Allocate((ulong)(PtLevelSize * IntPtr.Size));
+ }
+
+ public void Map(long va, long pa, long size)
+ {
+ SetPtEntries(va, _ramPtr + pa, size);
+ }
+
+ public void Unmap(long position, long size)
+ {
+ SetPtEntries(position, null, size);
+ }
+
+ public bool IsMapped(long position)
+ {
+ return Translate(position) != IntPtr.Zero;
+ }
+
+ public long GetPhysicalAddress(long virtualAddress)
+ {
+ byte* ptr = (byte*)Translate(virtualAddress);
+
+ return (long)(ptr - _ramPtr);
+ }
+
+ private IntPtr Translate(long position)
+ {
+ if (!IsValidPosition(position))
+ {
+ return IntPtr.Zero;
+ }
+
+ byte* ptr = GetPtEntry(position);
+
+ ulong ptrUlong = (ulong)ptr;
+
+ if ((ptrUlong & PteFlagsMask) != 0)
+ {
+ ptrUlong &= ~(ulong)PteFlagsMask;
+
+ ptr = (byte*)ptrUlong;
+ }
+
+ return new IntPtr(ptr + (position & PageMask));
+ }
+
+ private IntPtr TranslateWrite(long position)
+ {
+ if (!IsValidPosition(position))
+ {
+ return IntPtr.Zero;
+ }
+
+ byte* ptr = GetPtEntry(position);
+
+ ulong ptrUlong = (ulong)ptr;
+
+ if ((ptrUlong & PteFlagsMask) != 0)
+ {
+ if ((ptrUlong & PteFlagNotModified) != 0)
+ {
+ ClearPtEntryFlag(position, PteFlagNotModified);
+ }
+
+ ptrUlong &= ~(ulong)PteFlagsMask;
+
+ ptr = (byte*)ptrUlong;
+ }
+
+ return new IntPtr(ptr + (position & PageMask));
+ }
+
+ private byte* GetPtEntry(long position)
+ {
+ return *(byte**)GetPtPtr(position);
+ }
+
+ private void SetPtEntries(long va, byte* ptr, long size)
+ {
+ long endPosition = (va + size + PageMask) & ~PageMask;
+
+ while ((ulong)va < (ulong)endPosition)
+ {
+ SetPtEntry(va, ptr);
+
+ va += PageSize;
+
+ if (ptr != null)
+ {
+ ptr += PageSize;
+ }
+ }
+ }
+
+ private void SetPtEntry(long position, byte* ptr)
+ {
+ *(byte**)GetPtPtr(position) = ptr;
+ }
+
+ private void SetPtEntryFlag(long position, long flag)
+ {
+ ModifyPtEntryFlag(position, flag, setFlag: true);
+ }
+
+ private void ClearPtEntryFlag(long position, long flag)
+ {
+ ModifyPtEntryFlag(position, flag, setFlag: false);
+ }
+
+ private void ModifyPtEntryFlag(long position, long flag, bool setFlag)
+ {
+ IntPtr* pt = (IntPtr*)_pageTable;
+
+ while (true)
+ {
+ IntPtr* ptPtr = GetPtPtr(position);
+
+ IntPtr old = *ptPtr;
+
+ long modified = old.ToInt64();
+
+ if (setFlag)
+ {
+ modified |= flag;
+ }
+ else
+ {
+ modified &= ~flag;
+ }
+
+ IntPtr origValue = Interlocked.CompareExchange(ref *ptPtr, new IntPtr(modified), old);
+
+ if (origValue == old)
+ {
+ break;
+ }
+ }
+ }
+
+ private IntPtr* GetPtPtr(long position)
+ {
+ if (!IsValidPosition(position))
+ {
+ throw new ArgumentOutOfRangeException(nameof(position));
+ }
+
+ IntPtr nextPtr = _pageTable;
+
+ IntPtr* ptePtr = null;
+
+ int bit = PageBits;
+
+ while (true)
+ {
+ long index = (position >> bit) & PtLevelMask;
+
+ ptePtr = &((IntPtr*)nextPtr)[index];
+
+ bit += PtLevelBits;
+
+ if (bit >= AddressSpaceBits)
+ {
+ break;
+ }
+
+ nextPtr = *ptePtr;
+
+ if (nextPtr == IntPtr.Zero)
+ {
+ // Entry does not yet exist, allocate a new one.
+ IntPtr newPtr = Allocate((ulong)(PtLevelSize * IntPtr.Size));
+
+ // Try to swap the current pointer (should be zero), with the allocated one.
+ nextPtr = Interlocked.CompareExchange(ref *ptePtr, newPtr, IntPtr.Zero);
+
+ // If the old pointer is not null, then another thread already has set it.
+ if (nextPtr != IntPtr.Zero)
+ {
+ Free(newPtr);
+ }
+ else
+ {
+ nextPtr = newPtr;
+ }
+ }
+ }
+
+ return ptePtr;
+ }
+
+ public bool IsRegionModified(long position, long size)
+ {
+ if (!HasWriteWatchSupport)
+ {
+ return IsRegionModifiedFallback(position, size);
+ }
+
+ IntPtr address = Translate(position);
+
+ IntPtr baseAddr = address;
+ IntPtr expectedAddr = address;
+
+ long pendingPages = 0;
+
+ long pages = size / PageSize;
+
+ bool modified = false;
+
+ bool IsAnyPageModified()
+ {
+ IntPtr pendingSize = new IntPtr(pendingPages * PageSize);
+
+ IntPtr[] addresses = new IntPtr[pendingPages];
+
+ bool result = GetModifiedPages(baseAddr, pendingSize, addresses, out ulong count);
+
+ if (result)
+ {
+ return count != 0;
+ }
+ else
+ {
+ return true;
+ }
+ }
+
+ while (pages-- > 0)
+ {
+ if (address != expectedAddr)
+ {
+ modified |= IsAnyPageModified();
+
+ baseAddr = address;
+
+ pendingPages = 0;
+ }
+
+ expectedAddr = address + PageSize;
+
+ pendingPages++;
+
+ if (pages == 0)
+ {
+ break;
+ }
+
+ position += PageSize;
+
+ address = Translate(position);
+ }
+
+ if (pendingPages != 0)
+ {
+ modified |= IsAnyPageModified();
+ }
+
+ return modified;
+ }
+
+ private unsafe bool IsRegionModifiedFallback(long position, long size)
+ {
+ long endAddr = (position + size + PageMask) & ~PageMask;
+
+ bool modified = false;
+
+ while ((ulong)position < (ulong)endAddr)
+ {
+ if (IsValidPosition(position))
+ {
+ byte* ptr = ((byte**)_pageTable)[position >> PageBits];
+
+ ulong ptrUlong = (ulong)ptr;
+
+ if ((ptrUlong & PteFlagNotModified) == 0)
+ {
+ modified = true;
+
+ SetPtEntryFlag(position, PteFlagNotModified);
+ }
+ }
+ else
+ {
+ modified = true;
+ }
+
+ position += PageSize;
+ }
+
+ return modified;
+ }
+
+ public bool TryGetHostAddress(long position, long size, out IntPtr ptr)
+ {
+ if (IsContiguous(position, size))
+ {
+ ptr = (IntPtr)Translate(position);
+
+ return true;
+ }
+
+ ptr = IntPtr.Zero;
+
+ return false;
+ }
+
+ private bool IsContiguous(long position, long size)
+ {
+ long endPos = position + size;
+
+ position &= ~PageMask;
+
+ long expectedPa = GetPhysicalAddress(position);
+
+ while ((ulong)position < (ulong)endPos)
+ {
+ long pa = GetPhysicalAddress(position);
+
+ if (pa != expectedPa)
+ {
+ return false;
+ }
+
+ position += PageSize;
+ expectedPa += PageSize;
+ }
+
+ return true;
+ }
+
+ public bool IsValidPosition(long position)
+ {
+ return (ulong)position < (ulong)AddressSpaceSize;
+ }
+
+ internal V128 AtomicLoadInt128(long position)
+ {
+ if ((position & 0xf) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ IntPtr ptr = TranslateWrite(position);
+
+ return MemoryManagerPal.AtomicLoad128(ptr);
+ }
+
+ internal bool AtomicCompareExchangeByte(long position, byte expected, byte desired)
+ {
+ int* ptr = (int*)Translate(position);
+
+ int currentValue = *ptr;
+
+ int expected32 = (currentValue & ~byte.MaxValue) | expected;
+ int desired32 = (currentValue & ~byte.MaxValue) | desired;
+
+ return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
+ }
+
+ internal bool AtomicCompareExchangeInt16(long position, short expected, short desired)
+ {
+ if ((position & 1) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ int* ptr = (int*)Translate(position);
+
+ int currentValue = *ptr;
+
+ int expected32 = (currentValue & ~ushort.MaxValue) | (ushort)expected;
+ int desired32 = (currentValue & ~ushort.MaxValue) | (ushort)desired;
+
+ return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
+ }
+
+ public bool AtomicCompareExchangeInt32(long position, int expected, int desired)
+ {
+ if ((position & 3) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ int* ptr = (int*)TranslateWrite(position);
+
+ return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
+ }
+
+ internal bool AtomicCompareExchangeInt64(long position, long expected, long desired)
+ {
+ if ((position & 7) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ long* ptr = (long*)TranslateWrite(position);
+
+ return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
+ }
+
+ internal bool AtomicCompareExchangeInt128(long position, V128 expected, V128 desired)
+ {
+ if ((position & 0xf) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ IntPtr ptr = TranslateWrite(position);
+
+ return MemoryManagerPal.CompareAndSwap128(ptr, expected, desired) == expected;
+ }
+
+ public int AtomicIncrementInt32(long position)
+ {
+ if ((position & 3) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ int* ptr = (int*)TranslateWrite(position);
+
+ return Interlocked.Increment(ref *ptr);
+ }
+
+ public int AtomicDecrementInt32(long position)
+ {
+ if ((position & 3) != 0)
+ {
+ AbortWithAlignmentFault(position);
+ }
+
+ int* ptr = (int*)TranslateWrite(position);
+
+ return Interlocked.Decrement(ref *ptr);
+ }
+
+ private void AbortWithAlignmentFault(long position)
+ {
+ // TODO: Abort mode and exception support on the CPU.
+ throw new InvalidOperationException($"Tried to compare exchange a misaligned address 0x{position:X16}.");
+ }
+
+ public sbyte ReadSByte(long position)
+ {
+ return (sbyte)ReadByte(position);
+ }
+
+ public short ReadInt16(long position)
+ {
+ return (short)ReadUInt16(position);
+ }
+
+ public int ReadInt32(long position)
+ {
+ return (int)ReadUInt32(position);
+ }
+
+ public long ReadInt64(long position)
+ {
+ return (long)ReadUInt64(position);
+ }
+
+ public byte ReadByte(long position)
+ {
+ return *((byte*)Translate(position));
+ }
+
+ public ushort ReadUInt16(long position)
+ {
+ if ((position & 1) == 0)
+ {
+ return *((ushort*)Translate(position));
+ }
+ else
+ {
+ return (ushort)(ReadByte(position + 0) << 0 |
+ ReadByte(position + 1) << 8);
+ }
+ }
+
+ public uint ReadUInt32(long position)
+ {
+ if ((position & 3) == 0)
+ {
+ return *((uint*)Translate(position));
+ }
+ else
+ {
+ return (uint)(ReadUInt16(position + 0) << 0 |
+ ReadUInt16(position + 2) << 16);
+ }
+ }
+
+ public ulong ReadUInt64(long position)
+ {
+ if ((position & 7) == 0)
+ {
+ return *((ulong*)Translate(position));
+ }
+ else
+ {
+ return (ulong)ReadUInt32(position + 0) << 0 |
+ (ulong)ReadUInt32(position + 4) << 32;
+ }
+ }
+
+ public V128 ReadVector128(long position)
+ {
+ return new V128(ReadUInt64(position), ReadUInt64(position + 8));
+ }
+
+ public byte[] ReadBytes(long position, long size)
+ {
+ long endAddr = position + size;
+
+ if ((ulong)size > int.MaxValue)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if ((ulong)endAddr < (ulong)position)
+ {
+ throw new ArgumentOutOfRangeException(nameof(position));
+ }
+
+ byte[] data = new byte[size];
+
+ int offset = 0;
+
+ while ((ulong)position < (ulong)endAddr)
+ {
+ long pageLimit = (position + PageSize) & ~(long)PageMask;
+
+ if ((ulong)pageLimit > (ulong)endAddr)
+ {
+ pageLimit = endAddr;
+ }
+
+ int copySize = (int)(pageLimit - position);
+
+ Marshal.Copy(Translate(position), data, offset, copySize);
+
+ position += copySize;
+ offset += copySize;
+ }
+
+ return data;
+ }
+
+ public void ReadBytes(long position, byte[] data, int startIndex, int size)
+ {
+ // Note: This will be moved later.
+ long endAddr = position + size;
+
+ if ((ulong)size > int.MaxValue)
+ {
+ throw new ArgumentOutOfRangeException(nameof(size));
+ }
+
+ if ((ulong)endAddr < (ulong)position)
+ {
+ throw new ArgumentOutOfRangeException(nameof(position));
+ }
+
+ int offset = startIndex;
+
+ while ((ulong)position < (ulong)endAddr)
+ {
+ long pageLimit = (position + PageSize) & ~(long)PageMask;
+
+ if ((ulong)pageLimit > (ulong)endAddr)
+ {
+ pageLimit = endAddr;
+ }
+
+ int copySize = (int)(pageLimit - position);
+
+ Marshal.Copy(Translate(position), data, offset, copySize);
+
+ position += copySize;
+ offset += copySize;
+ }
+ }
+
+ public void WriteSByte(long position, sbyte value)
+ {
+ WriteByte(position, (byte)value);
+ }
+
+ public void WriteInt16(long position, short value)
+ {
+ WriteUInt16(position, (ushort)value);
+ }
+
+ public void WriteInt32(long position, int value)
+ {
+ WriteUInt32(position, (uint)value);
+ }
+
+ public void WriteInt64(long position, long value)
+ {
+ WriteUInt64(position, (ulong)value);
+ }
+
+ public void WriteByte(long position, byte value)
+ {
+ *((byte*)TranslateWrite(position)) = value;
+ }
+
+ public void WriteUInt16(long position, ushort value)
+ {
+ if ((position & 1) == 0)
+ {
+ *((ushort*)TranslateWrite(position)) = value;
+ }
+ else
+ {
+ WriteByte(position + 0, (byte)(value >> 0));
+ WriteByte(position + 1, (byte)(value >> 8));
+ }
+ }
+
+ public void WriteUInt32(long position, uint value)
+ {
+ if ((position & 3) == 0)
+ {
+ *((uint*)TranslateWrite(position)) = value;
+ }
+ else
+ {
+ WriteUInt16(position + 0, (ushort)(value >> 0));
+ WriteUInt16(position + 2, (ushort)(value >> 16));
+ }
+ }
+
+ public void WriteUInt64(long position, ulong value)
+ {
+ if ((position & 7) == 0)
+ {
+ *((ulong*)TranslateWrite(position)) = value;
+ }
+ else
+ {
+ WriteUInt32(position + 0, (uint)(value >> 0));
+ WriteUInt32(position + 4, (uint)(value >> 32));
+ }
+ }
+
+ public void WriteVector128(long position, V128 value)
+ {
+ WriteUInt64(position + 0, value.GetUInt64(0));
+ WriteUInt64(position + 8, value.GetUInt64(1));
+ }
+
+ public void WriteBytes(long position, byte[] data)
+ {
+ long endAddr = position + data.Length;
+
+ if ((ulong)endAddr < (ulong)position)
+ {
+ throw new ArgumentOutOfRangeException(nameof(position));
+ }
+
+ int offset = 0;
+
+ while ((ulong)position < (ulong)endAddr)
+ {
+ long pageLimit = (position + PageSize) & ~(long)PageMask;
+
+ if ((ulong)pageLimit > (ulong)endAddr)
+ {
+ pageLimit = endAddr;
+ }
+
+ int copySize = (int)(pageLimit - position);
+
+ Marshal.Copy(data, offset, TranslateWrite(position), copySize);
+
+ position += copySize;
+ offset += copySize;
+ }
+ }
+
+ public void WriteBytes(long position, byte[] data, int startIndex, int size)
+ {
+ // Note: This will be moved later.
+ long endAddr = position + size;
+
+ if ((ulong)endAddr < (ulong)position)
+ {
+ throw new ArgumentOutOfRangeException(nameof(position));
+ }
+
+ int offset = startIndex;
+
+ while ((ulong)position < (ulong)endAddr)
+ {
+ long pageLimit = (position + PageSize) & ~(long)PageMask;
+
+ if ((ulong)pageLimit > (ulong)endAddr)
+ {
+ pageLimit = endAddr;
+ }
+
+ int copySize = (int)(pageLimit - position);
+
+ Marshal.Copy(data, offset, Translate(position), copySize);
+
+ position += copySize;
+ offset += copySize;
+ }
+ }
+
+ public void CopyBytes(long src, long dst, long size)
+ {
+ // Note: This will be moved later.
+ if (IsContiguous(src, size) &&
+ IsContiguous(dst, size))
+ {
+ byte* srcPtr = (byte*)Translate(src);
+ byte* dstPtr = (byte*)Translate(dst);
+
+ Buffer.MemoryCopy(srcPtr, dstPtr, size, size);
+ }
+ else
+ {
+ WriteBytes(dst, ReadBytes(src, size));
+ }
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ IntPtr ptr = Interlocked.Exchange(ref _pageTable, IntPtr.Zero);
+
+ if (ptr != IntPtr.Zero)
+ {
+ FreePageTableEntry(ptr, PageBits);
+ }
+ }
+
+ private void FreePageTableEntry(IntPtr ptr, int levelBitEnd)
+ {
+ levelBitEnd += PtLevelBits;
+
+ if (levelBitEnd >= AddressSpaceBits)
+ {
+ Free(ptr);
+
+ return;
+ }
+
+ for (int index = 0; index < PtLevelSize; index++)
+ {
+ IntPtr ptePtr = ((IntPtr*)ptr)[index];
+
+ if (ptePtr != IntPtr.Zero)
+ {
+ FreePageTableEntry(ptePtr, levelBitEnd);
+ }
+ }
+
+ Free(ptr);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryManagerPal.cs b/ARMeilleure/Memory/MemoryManagerPal.cs
new file mode 100644
index 000000000..64191a0ac
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryManagerPal.cs
@@ -0,0 +1,77 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+namespace ARMeilleure.Memory
+{
+ static class MemoryManagerPal
+ {
+ private delegate V128 CompareExchange128(IntPtr address, V128 expected, V128 desired);
+
+ private static CompareExchange128 _compareExchange128;
+
+ private static object _lock;
+
+ static MemoryManagerPal()
+ {
+ _lock = new object();
+ }
+
+ public static V128 AtomicLoad128(IntPtr address)
+ {
+ return GetCompareAndSwap128()(address, V128.Zero, V128.Zero);
+ }
+
+ public static V128 CompareAndSwap128(IntPtr address, V128 expected, V128 desired)
+ {
+ return GetCompareAndSwap128()(address, expected, desired);
+ }
+
+ private static CompareExchange128 GetCompareAndSwap128()
+ {
+ if (_compareExchange128 == null)
+ {
+ GenerateCompareAndSwap128();
+ }
+
+ return _compareExchange128;
+ }
+
+ private static void GenerateCompareAndSwap128()
+ {
+ lock (_lock)
+ {
+ if (_compareExchange128 != null)
+ {
+ return;
+ }
+
+ EmitterContext context = new EmitterContext();
+
+ Operand address = context.LoadArgument(OperandType.I64, 0);
+ Operand expected = context.LoadArgument(OperandType.V128, 1);
+ Operand desired = context.LoadArgument(OperandType.V128, 2);
+
+ Operand result = context.CompareAndSwap128(address, expected, desired);
+
+ context.Return(result);
+
+ ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+ OperandType[] argTypes = new OperandType[]
+ {
+ OperandType.I64,
+ OperandType.V128,
+ OperandType.V128
+ };
+
+ _compareExchange128 = Compiler.Compile(
+ cfg,
+ argTypes,
+ OperandType.V128,
+ CompilerOptions.HighCq);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryProtection.cs b/ARMeilleure/Memory/MemoryProtection.cs
new file mode 100644
index 000000000..6bc16f8ea
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryProtection.cs
@@ -0,0 +1,17 @@
+using System;
+
+namespace ARMeilleure.Memory
+{
+ [Flags]
+ public enum MemoryProtection
+ {
+ None = 0,
+ Read = 1 << 0,
+ Write = 1 << 1,
+ Execute = 1 << 2,
+
+ ReadAndWrite = Read | Write,
+ ReadAndExecute = Read | Execute,
+ ReadWriteExecute = Read | Write | Execute
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryProtectionException.cs b/ARMeilleure/Memory/MemoryProtectionException.cs
new file mode 100644
index 000000000..6313ce6a1
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryProtectionException.cs
@@ -0,0 +1,9 @@
+using System;
+
+namespace ARMeilleure.Memory
+{
+ class MemoryProtectionException : Exception
+ {
+ public MemoryProtectionException(MemoryProtection protection) : base($"Failed to set memory protection to \"{protection}\".") { }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs
new file mode 100644
index 000000000..0b9885dc9
--- /dev/null
+++ b/ARMeilleure/Optimizations.cs
@@ -0,0 +1,33 @@
+using ARMeilleure.CodeGen.X86;
+
+namespace ARMeilleure
+{
+ public static class Optimizations
+ {
+ public static bool AssumeStrictAbiCompliance { get; set; } = true;
+
+ public static bool FastFP { get; set; } = true;
+
+ public static bool UseSseIfAvailable { get; set; } = true;
+ public static bool UseSse2IfAvailable { get; set; } = true;
+ public static bool UseSse3IfAvailable { get; set; } = true;
+ public static bool UseSsse3IfAvailable { get; set; } = true;
+ public static bool UseSse41IfAvailable { get; set; } = true;
+ public static bool UseSse42IfAvailable { get; set; } = true;
+ public static bool UsePopCntIfAvailable { get; set; } = true;
+
+ public static bool ForceLegacySse
+ {
+ get => HardwareCapabilities.ForceLegacySse;
+ set => HardwareCapabilities.ForceLegacySse = value;
+ }
+
+ internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse;
+ internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2;
+ internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3;
+ internal static bool UseSsse3 => UseSsse3IfAvailable && HardwareCapabilities.SupportsSsse3;
+ internal static bool UseSse41 => UseSse41IfAvailable && HardwareCapabilities.SupportsSse41;
+ internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42;
+ internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/Aarch32Mode.cs b/ARMeilleure/State/Aarch32Mode.cs
new file mode 100644
index 000000000..395e288aa
--- /dev/null
+++ b/ARMeilleure/State/Aarch32Mode.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.State
+{
+ enum Aarch32Mode
+ {
+ User = 0b10000,
+ Fiq = 0b10001,
+ Irq = 0b10010,
+ Supervisor = 0b10011,
+ Monitor = 0b10110,
+ Abort = 0b10111,
+ Hypervisor = 0b11010,
+ Undefined = 0b11011,
+ System = 0b11111
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/ExecutionContext.cs b/ARMeilleure/State/ExecutionContext.cs
new file mode 100644
index 000000000..22cfcb694
--- /dev/null
+++ b/ARMeilleure/State/ExecutionContext.cs
@@ -0,0 +1,130 @@
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.State
+{
+ public class ExecutionContext : IExecutionContext
+ {
+ private const int MinCountForCheck = 40000;
+
+ private NativeContext _nativeContext;
+
+ internal IntPtr NativeContextPtr => _nativeContext.BasePtr;
+
+ private bool _interrupted;
+
+ private static Stopwatch _tickCounter;
+
+ private static double _hostTickFreq;
+
+ public uint CtrEl0 => 0x8444c004;
+ public uint DczidEl0 => 0x00000004;
+
+ public ulong CntfrqEl0 { get; set; }
+ public ulong CntpctEl0
+ {
+ get
+ {
+ double ticks = _tickCounter.ElapsedTicks * _hostTickFreq;
+
+ return (ulong)(ticks * CntfrqEl0);
+ }
+ }
+
+ public long TpidrEl0 { get; set; }
+ public long Tpidr { get; set; }
+
+ public FPCR Fpcr { get; set; }
+ public FPSR Fpsr { get; set; }
+
+ public bool IsAarch32 { get; set; }
+
+ internal ExecutionMode ExecutionMode
+ {
+ get
+ {
+ if (IsAarch32)
+ {
+ return GetPstateFlag(PState.TFlag)
+ ? ExecutionMode.Aarch32Thumb
+ : ExecutionMode.Aarch32Arm;
+ }
+ else
+ {
+ return ExecutionMode.Aarch64;
+ }
+ }
+ }
+
+ public bool Running { get; set; }
+
+ public event EventHandler Interrupt;
+ public event EventHandler Break;
+ public event EventHandler SupervisorCall;
+ public event EventHandler Undefined;
+
+ static ExecutionContext()
+ {
+ _hostTickFreq = 1.0 / Stopwatch.Frequency;
+
+ _tickCounter = new Stopwatch();
+
+ _tickCounter.Start();
+ }
+
+ public ExecutionContext()
+ {
+ _nativeContext = new NativeContext();
+
+ Running = true;
+
+ _nativeContext.SetCounter(MinCountForCheck);
+ }
+
+ public ulong GetX(int index) => _nativeContext.GetX(index);
+ public void SetX(int index, ulong value) => _nativeContext.SetX(index, value);
+
+ public V128 GetV(int index) => _nativeContext.GetV(index);
+ public void SetV(int index, V128 value) => _nativeContext.SetV(index, value);
+
+ public bool GetPstateFlag(PState flag) => _nativeContext.GetPstateFlag(flag);
+ public void SetPstateFlag(PState flag, bool value) => _nativeContext.SetPstateFlag(flag, value);
+
+ internal void CheckInterrupt()
+ {
+ if (_interrupted)
+ {
+ _interrupted = false;
+
+ Interrupt?.Invoke(this, EventArgs.Empty);
+ }
+
+ _nativeContext.SetCounter(MinCountForCheck);
+ }
+
+ public void RequestInterrupt()
+ {
+ _interrupted = true;
+ }
+
+ internal void OnBreak(ulong address, int imm)
+ {
+ Break?.Invoke(this, new InstExceptionEventArgs(address, imm));
+ }
+
+ internal void OnSupervisorCall(ulong address, int imm)
+ {
+ SupervisorCall?.Invoke(this, new InstExceptionEventArgs(address, imm));
+ }
+
+ internal void OnUndefined(ulong address, int opCode)
+ {
+ Undefined?.Invoke(this, new InstUndefinedEventArgs(address, opCode));
+ }
+
+ public void Dispose()
+ {
+ _nativeContext.Dispose();
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/ExecutionMode.cs b/ARMeilleure/State/ExecutionMode.cs
new file mode 100644
index 000000000..eaed9d27f
--- /dev/null
+++ b/ARMeilleure/State/ExecutionMode.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.State
+{
+ enum ExecutionMode
+ {
+ Aarch32Arm,
+ Aarch32Thumb,
+ Aarch64
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/FPCR.cs b/ARMeilleure/State/FPCR.cs
new file mode 100644
index 000000000..511681fa9
--- /dev/null
+++ b/ARMeilleure/State/FPCR.cs
@@ -0,0 +1,23 @@
+using System;
+
+namespace ARMeilleure.State
+{
+ [Flags]
+ public enum FPCR
+ {
+ Ufe = 1 << 11,
+ Fz = 1 << 24,
+ Dn = 1 << 25,
+ Ahp = 1 << 26
+ }
+
+ public static class FPCRExtensions
+ {
+ private const int RModeShift = 22;
+
+ public static FPRoundingMode GetRoundingMode(this FPCR fpcr)
+ {
+ return (FPRoundingMode)(((int)fpcr >> RModeShift) & 3);
+ }
+ }
+}
diff --git a/ARMeilleure/State/FPException.cs b/ARMeilleure/State/FPException.cs
new file mode 100644
index 000000000..e24e07af1
--- /dev/null
+++ b/ARMeilleure/State/FPException.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.State
+{
+ enum FPException
+ {
+ InvalidOp = 0,
+ DivideByZero = 1,
+ Overflow = 2,
+ Underflow = 3,
+ Inexact = 4,
+ InputDenorm = 7
+ }
+}
diff --git a/ARMeilleure/State/FPRoundingMode.cs b/ARMeilleure/State/FPRoundingMode.cs
new file mode 100644
index 000000000..ee4f87668
--- /dev/null
+++ b/ARMeilleure/State/FPRoundingMode.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.State
+{
+ public enum FPRoundingMode
+ {
+ ToNearest = 0,
+ TowardsPlusInfinity = 1,
+ TowardsMinusInfinity = 2,
+ TowardsZero = 3
+ }
+}
diff --git a/ARMeilleure/State/FPSR.cs b/ARMeilleure/State/FPSR.cs
new file mode 100644
index 000000000..c20dc4393
--- /dev/null
+++ b/ARMeilleure/State/FPSR.cs
@@ -0,0 +1,11 @@
+using System;
+
+namespace ARMeilleure.State
+{
+ [Flags]
+ public enum FPSR
+ {
+ Ufc = 1 << 3,
+ Qc = 1 << 27
+ }
+}
diff --git a/ARMeilleure/State/FPType.cs b/ARMeilleure/State/FPType.cs
new file mode 100644
index 000000000..84e0db8da
--- /dev/null
+++ b/ARMeilleure/State/FPType.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.State
+{
+ enum FPType
+ {
+ Nonzero,
+ Zero,
+ Infinity,
+ QNaN,
+ SNaN
+ }
+}
diff --git a/ARMeilleure/State/IExecutionContext.cs b/ARMeilleure/State/IExecutionContext.cs
new file mode 100644
index 000000000..df91b7a1e
--- /dev/null
+++ b/ARMeilleure/State/IExecutionContext.cs
@@ -0,0 +1,37 @@
+using System;
+
+namespace ARMeilleure.State
+{
+ public interface IExecutionContext : IDisposable
+ {
+ uint CtrEl0 { get; }
+ uint DczidEl0 { get; }
+
+ ulong CntfrqEl0 { get; set; }
+ ulong CntpctEl0 { get; }
+
+ long TpidrEl0 { get; set; }
+ long Tpidr { get; set; }
+
+ FPCR Fpcr { get; set; }
+ FPSR Fpsr { get; set; }
+
+ bool IsAarch32 { get; set; }
+
+ bool Running { get; set; }
+
+ event EventHandler Interrupt;
+ event EventHandler Break;
+ event EventHandler SupervisorCall;
+ event EventHandler Undefined;
+
+ ulong GetX(int index);
+ void SetX(int index, ulong value);
+
+ V128 GetV(int index);
+
+ bool GetPstateFlag(PState flag);
+
+ void RequestInterrupt();
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/InstExceptionEventArgs.cs b/ARMeilleure/State/InstExceptionEventArgs.cs
new file mode 100644
index 000000000..c2460e4b4
--- /dev/null
+++ b/ARMeilleure/State/InstExceptionEventArgs.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace ARMeilleure.State
+{
+ public class InstExceptionEventArgs : EventArgs
+ {
+ public ulong Address { get; }
+ public int Id { get; }
+
+ public InstExceptionEventArgs(ulong address, int id)
+ {
+ Address = address;
+ Id = id;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/InstUndefinedEventArgs.cs b/ARMeilleure/State/InstUndefinedEventArgs.cs
new file mode 100644
index 000000000..c02b648e1
--- /dev/null
+++ b/ARMeilleure/State/InstUndefinedEventArgs.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace ARMeilleure.State
+{
+ public class InstUndefinedEventArgs : EventArgs
+ {
+ public ulong Address { get; }
+ public int OpCode { get; }
+
+ public InstUndefinedEventArgs(ulong address, int opCode)
+ {
+ Address = address;
+ OpCode = opCode;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/NativeContext.cs b/ARMeilleure/State/NativeContext.cs
new file mode 100644
index 000000000..4e6a5302f
--- /dev/null
+++ b/ARMeilleure/State/NativeContext.cs
@@ -0,0 +1,157 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.State
+{
+ class NativeContext : IDisposable
+ {
+ private const int IntSize = 8;
+ private const int VecSize = 16;
+ private const int FlagSize = 4;
+ private const int ExtraSize = 4;
+
+ private const int TotalSize = RegisterConsts.IntRegsCount * IntSize +
+ RegisterConsts.VecRegsCount * VecSize +
+ RegisterConsts.FlagsCount * FlagSize + ExtraSize;
+
+ public IntPtr BasePtr { get; }
+
+ public NativeContext()
+ {
+ BasePtr = MemoryManagement.Allocate(TotalSize);
+ }
+
+ public ulong GetX(int index)
+ {
+ if ((uint)index >= RegisterConsts.IntRegsCount)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ return (ulong)Marshal.ReadInt64(BasePtr, index * IntSize);
+ }
+
+ public void SetX(int index, ulong value)
+ {
+ if ((uint)index >= RegisterConsts.IntRegsCount)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ Marshal.WriteInt64(BasePtr, index * IntSize, (long)value);
+ }
+
+ public V128 GetV(int index)
+ {
+ if ((uint)index >= RegisterConsts.IntRegsCount)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ int offset = RegisterConsts.IntRegsCount * IntSize + index * VecSize;
+
+ return new V128(
+ Marshal.ReadInt64(BasePtr, offset + 0),
+ Marshal.ReadInt64(BasePtr, offset + 8));
+ }
+
+ public void SetV(int index, V128 value)
+ {
+ if ((uint)index >= RegisterConsts.IntRegsCount)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ int offset = RegisterConsts.IntRegsCount * IntSize + index * VecSize;
+
+ Marshal.WriteInt64(BasePtr, offset + 0, value.GetInt64(0));
+ Marshal.WriteInt64(BasePtr, offset + 8, value.GetInt64(1));
+ }
+
+ public bool GetPstateFlag(PState flag)
+ {
+ if ((uint)flag >= RegisterConsts.FlagsCount)
+ {
+ throw new ArgumentException($"Invalid flag \"{flag}\" specified.");
+ }
+
+ int offset =
+ RegisterConsts.IntRegsCount * IntSize +
+ RegisterConsts.VecRegsCount * VecSize + (int)flag * FlagSize;
+
+ int value = Marshal.ReadInt32(BasePtr, offset);
+
+ return value != 0;
+ }
+
+ public void SetPstateFlag(PState flag, bool value)
+ {
+ if ((uint)flag >= RegisterConsts.FlagsCount)
+ {
+ throw new ArgumentException($"Invalid flag \"{flag}\" specified.");
+ }
+
+ int offset =
+ RegisterConsts.IntRegsCount * IntSize +
+ RegisterConsts.VecRegsCount * VecSize + (int)flag * FlagSize;
+
+ Marshal.WriteInt32(BasePtr, offset, value ? 1 : 0);
+ }
+
+ public int GetCounter()
+ {
+ return Marshal.ReadInt32(BasePtr, GetCounterOffset());
+ }
+
+ public void SetCounter(int value)
+ {
+ Marshal.WriteInt32(BasePtr, GetCounterOffset(), value);
+ }
+
+ public static int GetRegisterOffset(Register reg)
+ {
+ int offset, size;
+
+ if (reg.Type == RegisterType.Integer)
+ {
+ offset = reg.Index * IntSize;
+
+ size = IntSize;
+ }
+ else if (reg.Type == RegisterType.Vector)
+ {
+ offset = RegisterConsts.IntRegsCount * IntSize + reg.Index * VecSize;
+
+ size = VecSize;
+ }
+ else /* if (reg.Type == RegisterType.Flag) */
+ {
+ offset = RegisterConsts.IntRegsCount * IntSize +
+ RegisterConsts.VecRegsCount * VecSize + reg.Index * FlagSize;
+
+ size = FlagSize;
+ }
+
+ if ((uint)(offset + size) > (uint)TotalSize)
+ {
+ throw new ArgumentException("Invalid register.");
+ }
+
+ return offset;
+ }
+
+ public static int GetCounterOffset()
+ {
+ return RegisterConsts.IntRegsCount * IntSize +
+ RegisterConsts.VecRegsCount * VecSize +
+ RegisterConsts.FlagsCount * FlagSize;
+ }
+
+ public void Dispose()
+ {
+ MemoryManagement.Free(BasePtr);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/PState.cs b/ARMeilleure/State/PState.cs
new file mode 100644
index 000000000..ce755e952
--- /dev/null
+++ b/ARMeilleure/State/PState.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace ARMeilleure.State
+{
+ [Flags]
+ public enum PState
+ {
+ TFlag = 5,
+ EFlag = 9,
+
+ VFlag = 28,
+ CFlag = 29,
+ ZFlag = 30,
+ NFlag = 31
+ }
+}
diff --git a/ARMeilleure/State/RegisterAlias.cs b/ARMeilleure/State/RegisterAlias.cs
new file mode 100644
index 000000000..ae0d45628
--- /dev/null
+++ b/ARMeilleure/State/RegisterAlias.cs
@@ -0,0 +1,41 @@
+namespace ARMeilleure.State
+{
+ static class RegisterAlias
+ {
+ public const int R8Usr = 8;
+ public const int R9Usr = 9;
+ public const int R10Usr = 10;
+ public const int R11Usr = 11;
+ public const int R12Usr = 12;
+ public const int SpUsr = 13;
+ public const int LrUsr = 14;
+
+ public const int SpHyp = 15;
+
+ public const int LrIrq = 16;
+ public const int SpIrq = 17;
+
+ public const int LrSvc = 18;
+ public const int SpSvc = 19;
+
+ public const int LrAbt = 20;
+ public const int SpAbt = 21;
+
+ public const int LrUnd = 22;
+ public const int SpUnd = 23;
+
+ public const int R8Fiq = 24;
+ public const int R9Fiq = 25;
+ public const int R10Fiq = 26;
+ public const int R11Fiq = 27;
+ public const int R12Fiq = 28;
+ public const int SpFiq = 29;
+ public const int LrFiq = 30;
+
+ public const int Aarch32Lr = 14;
+ public const int Aarch32Pc = 15;
+
+ public const int Lr = 30;
+ public const int Zr = 31;
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/RegisterConsts.cs b/ARMeilleure/State/RegisterConsts.cs
new file mode 100644
index 000000000..a85117bb2
--- /dev/null
+++ b/ARMeilleure/State/RegisterConsts.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.State
+{
+ static class RegisterConsts
+ {
+ public const int IntRegsCount = 32;
+ public const int VecRegsCount = 32;
+ public const int FlagsCount = 32;
+ public const int IntAndVecRegsCount = IntRegsCount + VecRegsCount;
+ public const int TotalCount = IntRegsCount + VecRegsCount + FlagsCount;
+
+ public const int ZeroIndex = 31;
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/V128.cs b/ARMeilleure/State/V128.cs
new file mode 100644
index 000000000..eeb9ff1ca
--- /dev/null
+++ b/ARMeilleure/State/V128.cs
@@ -0,0 +1,214 @@
+using System;
+
+namespace ARMeilleure.State
+{
+ public struct V128 : IEquatable
+ {
+ private ulong _e0;
+ private ulong _e1;
+
+ private static V128 _zero = new V128(0, 0);
+
+ public static V128 Zero => _zero;
+
+ public V128(float value) : this(value, 0, 0, 0) { }
+
+ public V128(double value) : this(value, 0) { }
+
+ public V128(float e0, float e1, float e2, float e3)
+ {
+ _e0 = (ulong)(uint)BitConverter.SingleToInt32Bits(e0) << 0;
+ _e0 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e1) << 32;
+ _e1 = (ulong)(uint)BitConverter.SingleToInt32Bits(e2) << 0;
+ _e1 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e3) << 32;
+ }
+
+ public V128(double e0, double e1)
+ {
+ _e0 = (ulong)BitConverter.DoubleToInt64Bits(e0);
+ _e1 = (ulong)BitConverter.DoubleToInt64Bits(e1);
+ }
+
+ public V128(int e0, int e1, int e2, int e3)
+ {
+ _e0 = (ulong)(uint)e0 << 0;
+ _e0 |= (ulong)(uint)e1 << 32;
+ _e1 = (ulong)(uint)e2 << 0;
+ _e1 |= (ulong)(uint)e3 << 32;
+ }
+
+ public V128(uint e0, uint e1, uint e2, uint e3)
+ {
+ _e0 = (ulong)e0 << 0;
+ _e0 |= (ulong)e1 << 32;
+ _e1 = (ulong)e2 << 0;
+ _e1 |= (ulong)e3 << 32;
+ }
+
+ public V128(long e0, long e1)
+ {
+ _e0 = (ulong)e0;
+ _e1 = (ulong)e1;
+ }
+
+ public V128(ulong e0, ulong e1)
+ {
+ _e0 = e0;
+ _e1 = e1;
+ }
+
+ public V128(byte[] data)
+ {
+ _e0 = (ulong)BitConverter.ToInt64(data, 0);
+ _e1 = (ulong)BitConverter.ToInt64(data, 8);
+ }
+
+ public void Insert(int index, uint value)
+ {
+ switch (index)
+ {
+ case 0: _e0 = (_e0 & 0xffffffff00000000) | ((ulong)value << 0); break;
+ case 1: _e0 = (_e0 & 0x00000000ffffffff) | ((ulong)value << 32); break;
+ case 2: _e1 = (_e1 & 0xffffffff00000000) | ((ulong)value << 0); break;
+ case 3: _e1 = (_e1 & 0x00000000ffffffff) | ((ulong)value << 32); break;
+
+ default: throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ }
+
+ public void Insert(int index, ulong value)
+ {
+ switch (index)
+ {
+ case 0: _e0 = value; break;
+ case 1: _e1 = value; break;
+
+ default: throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ }
+
+ public float AsFloat()
+ {
+ return GetFloat(0);
+ }
+
+ public double AsDouble()
+ {
+ return GetDouble(0);
+ }
+
+ public float GetFloat(int index)
+ {
+ return BitConverter.Int32BitsToSingle(GetInt32(index));
+ }
+
+ public double GetDouble(int index)
+ {
+ return BitConverter.Int64BitsToDouble(GetInt64(index));
+ }
+
+ public int GetInt32(int index) => (int)GetUInt32(index);
+ public long GetInt64(int index) => (long)GetUInt64(index);
+
+ public uint GetUInt32(int index)
+ {
+ switch (index)
+ {
+ case 0: return (uint)(_e0 >> 0);
+ case 1: return (uint)(_e0 >> 32);
+ case 2: return (uint)(_e1 >> 0);
+ case 3: return (uint)(_e1 >> 32);
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public ulong GetUInt64(int index)
+ {
+ switch (index)
+ {
+ case 0: return _e0;
+ case 1: return _e1;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public byte[] ToArray()
+ {
+ byte[] e0Data = BitConverter.GetBytes(_e0);
+ byte[] e1Data = BitConverter.GetBytes(_e1);
+
+ byte[] data = new byte[16];
+
+ Buffer.BlockCopy(e0Data, 0, data, 0, 8);
+ Buffer.BlockCopy(e1Data, 0, data, 8, 8);
+
+ return data;
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(_e0, _e1);
+ }
+
+ public static V128 operator ~(V128 x)
+ {
+ return new V128(~x._e0, ~x._e1);
+ }
+
+ public static V128 operator &(V128 x, V128 y)
+ {
+ return new V128(x._e0 & y._e0, x._e1 & y._e1);
+ }
+
+ public static V128 operator |(V128 x, V128 y)
+ {
+ return new V128(x._e0 | y._e0, x._e1 | y._e1);
+ }
+
+ public static V128 operator ^(V128 x, V128 y)
+ {
+ return new V128(x._e0 ^ y._e0, x._e1 ^ y._e1);
+ }
+
+ public static V128 operator <<(V128 x, int shift)
+ {
+ ulong shiftOut = x._e0 >> (64 - shift);
+
+ return new V128(x._e0 << shift, (x._e1 << shift) | shiftOut);
+ }
+
+ public static V128 operator >>(V128 x, int shift)
+ {
+ ulong shiftOut = x._e1 & ((1UL << shift) - 1);
+
+ return new V128((x._e0 >> shift) | (shiftOut << (64 - shift)), x._e1 >> shift);
+ }
+
+ public static bool operator ==(V128 x, V128 y)
+ {
+ return x.Equals(y);
+ }
+
+ public static bool operator !=(V128 x, V128 y)
+ {
+ return !x.Equals(y);
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is V128 vector && Equals(vector);
+ }
+
+ public bool Equals(V128 other)
+ {
+ return other._e0 == _e0 && other._e1 == _e1;
+ }
+
+ public override string ToString()
+ {
+ return $"0x{_e1:X16}{_e0:X16}";
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Statistics.cs b/ARMeilleure/Statistics.cs
new file mode 100644
index 000000000..e80ee59d6
--- /dev/null
+++ b/ARMeilleure/Statistics.cs
@@ -0,0 +1,92 @@
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+
+namespace ARMeilleure
+{
+ public static class Statistics
+ {
+ private const int ReportMaxFunctions = 100;
+
+ [ThreadStatic]
+ private static Stopwatch _executionTimer;
+
+ private static ConcurrentDictionary _ticksPerFunction;
+
+ static Statistics()
+ {
+ _ticksPerFunction = new ConcurrentDictionary();
+ }
+
+ public static void InitializeTimer()
+ {
+#if M_PROFILE
+ if (_executionTimer == null)
+ {
+ _executionTimer = new Stopwatch();
+ }
+#endif
+ }
+
+ internal static void StartTimer()
+ {
+#if M_PROFILE
+ _executionTimer.Restart();
+#endif
+ }
+
+ internal static void StopTimer(ulong funcAddr)
+ {
+#if M_PROFILE
+ _executionTimer.Stop();
+
+ long ticks = _executionTimer.ElapsedTicks;
+
+ _ticksPerFunction.AddOrUpdate(funcAddr, ticks, (key, oldTicks) => oldTicks + ticks);
+#endif
+ }
+
+ internal static void ResumeTimer()
+ {
+#if M_PROFILE
+ _executionTimer.Start();
+#endif
+ }
+
+ internal static void PauseTimer()
+ {
+#if M_PROFILE
+ _executionTimer.Stop();
+#endif
+ }
+
+ public static string GetReport()
+ {
+ int count = 0;
+
+ StringBuilder sb = new StringBuilder();
+
+ sb.AppendLine(" Function address | Time");
+ sb.AppendLine("--------------------------");
+
+ KeyValuePair[] funcTable = _ticksPerFunction.ToArray();
+
+ foreach (KeyValuePair kv in funcTable.OrderByDescending(x => x.Value))
+ {
+ long timeInMs = (kv.Value * 1000) / Stopwatch.Frequency;
+
+ sb.AppendLine($" 0x{kv.Key:X16} | {timeInMs} ms");
+
+ if (count++ >= ReportMaxFunctions)
+ {
+ break;
+ }
+ }
+
+ return sb.ToString();
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs
new file mode 100644
index 000000000..d35e985e6
--- /dev/null
+++ b/ARMeilleure/Translation/ArmEmitterContext.cs
@@ -0,0 +1,153 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Instructions;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System.Collections.Generic;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+ class ArmEmitterContext : EmitterContext
+ {
+ private Dictionary _labels;
+
+ private OpCode _optOpLastCompare;
+ private OpCode _optOpLastFlagSet;
+
+ private Operand _optCmpTempN;
+ private Operand _optCmpTempM;
+
+ private Block _currBlock;
+
+ public Block CurrBlock
+ {
+ get
+ {
+ return _currBlock;
+ }
+ set
+ {
+ _currBlock = value;
+
+ ResetBlockState();
+ }
+ }
+
+ public OpCode CurrOp { get; set; }
+
+ public MemoryManager Memory { get; }
+
+ public Aarch32Mode Mode { get; }
+
+ public ArmEmitterContext(MemoryManager memory, Aarch32Mode mode)
+ {
+ Memory = memory;
+ Mode = mode;
+
+ _labels = new Dictionary();
+ }
+
+ public Operand GetLabel(ulong address)
+ {
+ if (!_labels.TryGetValue(address, out Operand label))
+ {
+ label = Label();
+
+ _labels.Add(address, label);
+ }
+
+ return label;
+ }
+
+ public void MarkComparison(Operand n, Operand m)
+ {
+ _optOpLastCompare = CurrOp;
+
+ _optCmpTempN = Copy(n);
+ _optCmpTempM = Copy(m);
+ }
+
+ public void MarkFlagSet(PState stateFlag)
+ {
+ // Set this only if any of the NZCV flag bits were modified.
+ // This is used to ensure that when emiting a direct IL branch
+ // instruction for compare + branch sequences, we're not expecting
+ // to use comparison values from an old instruction, when in fact
+ // the flags were already overwritten by another instruction further along.
+ if (stateFlag >= PState.VFlag)
+ {
+ _optOpLastFlagSet = CurrOp;
+ }
+ }
+
+ private void ResetBlockState()
+ {
+ _optOpLastCompare = null;
+ _optOpLastFlagSet = null;
+ }
+
+ public Operand TryGetComparisonResult(Condition condition)
+ {
+ if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet)
+ {
+ return null;
+ }
+
+ Operand n = _optCmpTempN;
+ Operand m = _optCmpTempM;
+
+ InstName cmpName = _optOpLastCompare.Instruction.Name;
+
+ if (cmpName == InstName.Subs)
+ {
+ switch (condition)
+ {
+ case Condition.Eq: return ICompareEqual (n, m);
+ case Condition.Ne: return ICompareNotEqual (n, m);
+ case Condition.GeUn: return ICompareGreaterOrEqualUI(n, m);
+ case Condition.LtUn: return ICompareLessUI (n, m);
+ case Condition.GtUn: return ICompareGreaterUI (n, m);
+ case Condition.LeUn: return ICompareLessOrEqualUI (n, m);
+ case Condition.Ge: return ICompareGreaterOrEqual (n, m);
+ case Condition.Lt: return ICompareLess (n, m);
+ case Condition.Gt: return ICompareGreater (n, m);
+ case Condition.Le: return ICompareLessOrEqual (n, m);
+ }
+ }
+ else if (cmpName == InstName.Adds && _optOpLastCompare is IOpCodeAluImm op)
+ {
+ // There are several limitations that needs to be taken into account for CMN comparisons:
+ // - The unsigned comparisons are not valid, as they depend on the
+ // carry flag value, and they will have different values for addition and
+ // subtraction. For addition, it's carry, and for subtraction, it's borrow.
+ // So, we need to make sure we're not doing a unsigned compare for the CMN case.
+ // - We can only do the optimization for the immediate variants,
+ // because when the second operand value is exactly INT_MIN, we can't
+ // negate the value as theres no positive counterpart.
+ // Such invalid values can't be encoded on the immediate encodings.
+ if (op.RegisterSize == RegisterSize.Int32)
+ {
+ m = Const((int)-op.Immediate);
+ }
+ else
+ {
+ m = Const(-op.Immediate);
+ }
+
+ switch (condition)
+ {
+ case Condition.Eq: return ICompareEqual (n, m);
+ case Condition.Ne: return ICompareNotEqual (n, m);
+ case Condition.Ge: return ICompareGreaterOrEqual(n, m);
+ case Condition.Lt: return ICompareLess (n, m);
+ case Condition.Gt: return ICompareGreater (n, m);
+ case Condition.Le: return ICompareLessOrEqual (n, m);
+ }
+ }
+
+ return null;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/Compiler.cs b/ARMeilleure/Translation/Compiler.cs
new file mode 100644
index 000000000..4075a7f06
--- /dev/null
+++ b/ARMeilleure/Translation/Compiler.cs
@@ -0,0 +1,47 @@
+using ARMeilleure.CodeGen;
+using ARMeilleure.CodeGen.X86;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation
+{
+ static class Compiler
+ {
+ public static T Compile(
+ ControlFlowGraph cfg,
+ OperandType[] funcArgTypes,
+ OperandType funcReturnType,
+ CompilerOptions options)
+ {
+ Logger.StartPass(PassName.Dominance);
+
+ Dominance.FindDominators(cfg);
+ Dominance.FindDominanceFrontiers(cfg);
+
+ Logger.EndPass(PassName.Dominance);
+
+ Logger.StartPass(PassName.SsaConstruction);
+
+ if ((options & CompilerOptions.SsaForm) != 0)
+ {
+ Ssa.Construct(cfg);
+ }
+ else
+ {
+ RegisterToLocal.Rename(cfg);
+ }
+
+ Logger.EndPass(PassName.SsaConstruction, cfg);
+
+ CompilerContext cctx = new CompilerContext(cfg, funcArgTypes, funcReturnType, options);
+
+ CompiledFunction func = CodeGenerator.Generate(cctx);
+
+ IntPtr codePtr = JitCache.Map(func);
+
+ return Marshal.GetDelegateForFunctionPointer(codePtr);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/CompilerContext.cs b/ARMeilleure/Translation/CompilerContext.cs
new file mode 100644
index 000000000..cfe5ad1e5
--- /dev/null
+++ b/ARMeilleure/Translation/CompilerContext.cs
@@ -0,0 +1,26 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.Translation
+{
+ struct CompilerContext
+ {
+ public ControlFlowGraph Cfg { get; }
+
+ public OperandType[] FuncArgTypes { get; }
+ public OperandType FuncReturnType { get; }
+
+ public CompilerOptions Options { get; }
+
+ public CompilerContext(
+ ControlFlowGraph cfg,
+ OperandType[] funcArgTypes,
+ OperandType funcReturnType,
+ CompilerOptions options)
+ {
+ Cfg = cfg;
+ FuncArgTypes = funcArgTypes;
+ FuncReturnType = funcReturnType;
+ Options = options;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/CompilerOptions.cs b/ARMeilleure/Translation/CompilerOptions.cs
new file mode 100644
index 000000000..53998ec6f
--- /dev/null
+++ b/ARMeilleure/Translation/CompilerOptions.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace ARMeilleure.Translation
+{
+ [Flags]
+ enum CompilerOptions
+ {
+ None = 0,
+ SsaForm = 1 << 0,
+ Optimize = 1 << 1,
+ Lsra = 1 << 2,
+
+ MediumCq = SsaForm | Optimize,
+ HighCq = SsaForm | Optimize | Lsra
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/ControlFlowGraph.cs b/ARMeilleure/Translation/ControlFlowGraph.cs
new file mode 100644
index 000000000..758f1f968
--- /dev/null
+++ b/ARMeilleure/Translation/ControlFlowGraph.cs
@@ -0,0 +1,158 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace ARMeilleure.Translation
+{
+ class ControlFlowGraph
+ {
+ public BasicBlock Entry { get; }
+
+ public LinkedList Blocks { get; }
+
+ public BasicBlock[] PostOrderBlocks { get; }
+
+ public int[] PostOrderMap { get; }
+
+ public ControlFlowGraph(BasicBlock entry, LinkedList blocks)
+ {
+ Entry = entry;
+ Blocks = blocks;
+
+ RemoveUnreachableBlocks(blocks);
+
+ HashSet visited = new HashSet();
+
+ Stack blockStack = new Stack();
+
+ PostOrderBlocks = new BasicBlock[blocks.Count];
+
+ PostOrderMap = new int[blocks.Count];
+
+ visited.Add(entry);
+
+ blockStack.Push(entry);
+
+ int index = 0;
+
+ while (blockStack.TryPop(out BasicBlock block))
+ {
+ if (block.Next != null && visited.Add(block.Next))
+ {
+ blockStack.Push(block);
+ blockStack.Push(block.Next);
+ }
+ else if (block.Branch != null && visited.Add(block.Branch))
+ {
+ blockStack.Push(block);
+ blockStack.Push(block.Branch);
+ }
+ else
+ {
+ PostOrderMap[block.Index] = index;
+
+ PostOrderBlocks[index++] = block;
+ }
+ }
+ }
+
+ private void RemoveUnreachableBlocks(LinkedList blocks)
+ {
+ HashSet visited = new HashSet();
+
+ Queue workQueue = new Queue();
+
+ visited.Add(Entry);
+
+ workQueue.Enqueue(Entry);
+
+ while (workQueue.TryDequeue(out BasicBlock block))
+ {
+ Debug.Assert(block.Index != -1, "Invalid block index.");
+
+ if (block.Next != null && visited.Add(block.Next))
+ {
+ workQueue.Enqueue(block.Next);
+ }
+
+ if (block.Branch != null && visited.Add(block.Branch))
+ {
+ workQueue.Enqueue(block.Branch);
+ }
+ }
+
+ if (visited.Count < blocks.Count)
+ {
+ // Remove unreachable blocks and renumber.
+ int index = 0;
+
+ for (LinkedListNode node = blocks.First; node != null;)
+ {
+ LinkedListNode nextNode = node.Next;
+
+ BasicBlock block = node.Value;
+
+ if (!visited.Contains(block))
+ {
+ block.Next = null;
+ block.Branch = null;
+
+ blocks.Remove(node);
+ }
+ else
+ {
+ block.Index = index++;
+ }
+
+ node = nextNode;
+ }
+ }
+ }
+
+ public BasicBlock SplitEdge(BasicBlock predecessor, BasicBlock successor)
+ {
+ BasicBlock splitBlock = new BasicBlock(Blocks.Count);
+
+ if (predecessor.Next == successor)
+ {
+ predecessor.Next = splitBlock;
+ }
+
+ if (predecessor.Branch == successor)
+ {
+ predecessor.Branch = splitBlock;
+ }
+
+ if (splitBlock.Predecessors.Count == 0)
+ {
+ throw new ArgumentException("Predecessor and successor are not connected.");
+ }
+
+ // Insert the new block on the list of blocks.
+ BasicBlock succPrev = successor.Node.Previous?.Value;
+
+ if (succPrev != null && succPrev != predecessor && succPrev.Next == successor)
+ {
+ // Can't insert after the predecessor or before the successor.
+ // Here, we insert it before the successor by also spliting another
+ // edge (the one between the block before "successor" and "successor").
+ BasicBlock splitBlock2 = new BasicBlock(splitBlock.Index + 1);
+
+ succPrev.Next = splitBlock2;
+
+ splitBlock2.Branch = successor;
+
+ splitBlock2.Operations.AddLast(new Operation(Instruction.Branch, null));
+
+ Blocks.AddBefore(successor.Node, splitBlock2);
+ }
+
+ splitBlock.Next = successor;
+
+ Blocks.AddBefore(successor.Node, splitBlock);
+
+ return splitBlock;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/DelegateCache.cs b/ARMeilleure/Translation/DelegateCache.cs
new file mode 100644
index 000000000..7328c61a6
--- /dev/null
+++ b/ARMeilleure/Translation/DelegateCache.cs
@@ -0,0 +1,26 @@
+using System;
+using System.Collections.Concurrent;
+using System.Reflection;
+
+namespace ARMeilleure.Translation
+{
+ static class DelegateCache
+ {
+ private static ConcurrentDictionary _delegates;
+
+ static DelegateCache()
+ {
+ _delegates = new ConcurrentDictionary();
+ }
+
+ public static Delegate GetOrAdd(Delegate dlg)
+ {
+ return _delegates.GetOrAdd(GetKey(dlg.Method), (key) => dlg);
+ }
+
+ private static string GetKey(MethodInfo info)
+ {
+ return $"{info.DeclaringType.FullName}.{info.Name}";
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/Dominance.cs b/ARMeilleure/Translation/Dominance.cs
new file mode 100644
index 000000000..bb55169ed
--- /dev/null
+++ b/ARMeilleure/Translation/Dominance.cs
@@ -0,0 +1,95 @@
+using ARMeilleure.IntermediateRepresentation;
+using System.Diagnostics;
+
+namespace ARMeilleure.Translation
+{
+ static class Dominance
+ {
+ // Those methods are an implementation of the algorithms on "A Simple, Fast Dominance Algorithm".
+ // https://www.cs.rice.edu/~keith/EMBED/dom.pdf
+ public static void FindDominators(ControlFlowGraph cfg)
+ {
+ BasicBlock Intersect(BasicBlock block1, BasicBlock block2)
+ {
+ while (block1 != block2)
+ {
+ while (cfg.PostOrderMap[block1.Index] < cfg.PostOrderMap[block2.Index])
+ {
+ block1 = block1.ImmediateDominator;
+ }
+
+ while (cfg.PostOrderMap[block2.Index] < cfg.PostOrderMap[block1.Index])
+ {
+ block2 = block2.ImmediateDominator;
+ }
+ }
+
+ return block1;
+ }
+
+ cfg.Entry.ImmediateDominator = cfg.Entry;
+
+ Debug.Assert(cfg.Entry == cfg.PostOrderBlocks[cfg.PostOrderBlocks.Length - 1]);
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int blkIndex = cfg.PostOrderBlocks.Length - 2; blkIndex >= 0; blkIndex--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[blkIndex];
+
+ BasicBlock newIDom = null;
+
+ foreach (BasicBlock predecessor in block.Predecessors)
+ {
+ if (predecessor.ImmediateDominator != null)
+ {
+ if (newIDom != null)
+ {
+ newIDom = Intersect(predecessor, newIDom);
+ }
+ else
+ {
+ newIDom = predecessor;
+ }
+ }
+ }
+
+ if (block.ImmediateDominator != newIDom)
+ {
+ block.ImmediateDominator = newIDom;
+
+ modified = true;
+ }
+ }
+ }
+ while (modified);
+ }
+
+ public static void FindDominanceFrontiers(ControlFlowGraph cfg)
+ {
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ if (block.Predecessors.Count < 2)
+ {
+ continue;
+ }
+
+ for (int pBlkIndex = 0; pBlkIndex < block.Predecessors.Count; pBlkIndex++)
+ {
+ BasicBlock current = block.Predecessors[pBlkIndex];
+
+ while (current != block.ImmediateDominator)
+ {
+ current.DominanceFrontiers.Add(block);
+
+ current = current.ImmediateDominator;
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs
new file mode 100644
index 000000000..13cf677c7
--- /dev/null
+++ b/ARMeilleure/Translation/EmitterContext.cs
@@ -0,0 +1,562 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+ class EmitterContext
+ {
+ private Dictionary _irLabels;
+
+ private LinkedList _irBlocks;
+
+ private BasicBlock _irBlock;
+
+ private bool _needsNewBlock;
+
+ public EmitterContext()
+ {
+ _irLabels = new Dictionary();
+
+ _irBlocks = new LinkedList();
+
+ _needsNewBlock = true;
+ }
+
+ public Operand Add(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Add, Local(op1.Type), op1, op2);
+ }
+
+ public Operand BitwiseAnd(Operand op1, Operand op2)
+ {
+ return Add(Instruction.BitwiseAnd, Local(op1.Type), op1, op2);
+ }
+
+ public Operand BitwiseExclusiveOr(Operand op1, Operand op2)
+ {
+ return Add(Instruction.BitwiseExclusiveOr, Local(op1.Type), op1, op2);
+ }
+
+ public Operand BitwiseNot(Operand op1)
+ {
+ return Add(Instruction.BitwiseNot, Local(op1.Type), op1);
+ }
+
+ public Operand BitwiseOr(Operand op1, Operand op2)
+ {
+ return Add(Instruction.BitwiseOr, Local(op1.Type), op1, op2);
+ }
+
+ public void Branch(Operand label)
+ {
+ Add(Instruction.Branch, null);
+
+ BranchToLabel(label);
+ }
+
+ public void BranchIfFalse(Operand label, Operand op1)
+ {
+ Add(Instruction.BranchIfFalse, null, op1);
+
+ BranchToLabel(label);
+ }
+
+ public void BranchIfTrue(Operand label, Operand op1)
+ {
+ Add(Instruction.BranchIfTrue, null, op1);
+
+ BranchToLabel(label);
+ }
+
+ public Operand ByteSwap(Operand op1)
+ {
+ return Add(Instruction.ByteSwap, Local(op1.Type), op1);
+ }
+
+ public Operand Call(Delegate func, params Operand[] callArgs)
+ {
+ // Add the delegate to the cache to ensure it will not be garbage collected.
+ func = DelegateCache.GetOrAdd(func);
+
+ IntPtr ptr = Marshal.GetFunctionPointerForDelegate(func);
+
+ OperandType returnType = GetOperandType(func.Method.ReturnType);
+
+ return Call(Const(ptr.ToInt64()), returnType, callArgs);
+ }
+
+ private static Dictionary _typeCodeToOperandTypeMap =
+ new Dictionary()
+ {
+ { TypeCode.Boolean, OperandType.I32 },
+ { TypeCode.Byte, OperandType.I32 },
+ { TypeCode.Char, OperandType.I32 },
+ { TypeCode.Double, OperandType.FP64 },
+ { TypeCode.Int16, OperandType.I32 },
+ { TypeCode.Int32, OperandType.I32 },
+ { TypeCode.Int64, OperandType.I64 },
+ { TypeCode.SByte, OperandType.I32 },
+ { TypeCode.Single, OperandType.FP32 },
+ { TypeCode.UInt16, OperandType.I32 },
+ { TypeCode.UInt32, OperandType.I32 },
+ { TypeCode.UInt64, OperandType.I64 }
+ };
+
+ private static OperandType GetOperandType(Type type)
+ {
+ if (_typeCodeToOperandTypeMap.TryGetValue(Type.GetTypeCode(type), out OperandType ot))
+ {
+ return ot;
+ }
+ else if (type == typeof(V128))
+ {
+ return OperandType.V128;
+ }
+ else if (type == typeof(void))
+ {
+ return OperandType.None;
+ }
+
+ throw new ArgumentException($"Invalid type \"{type.Name}\".");
+ }
+
+ public Operand Call(Operand address, OperandType returnType, params Operand[] callArgs)
+ {
+ Operand[] args = new Operand[callArgs.Length + 1];
+
+ args[0] = address;
+
+ Array.Copy(callArgs, 0, args, 1, callArgs.Length);
+
+ if (returnType != OperandType.None)
+ {
+ return Add(Instruction.Call, Local(returnType), args);
+ }
+ else
+ {
+ return Add(Instruction.Call, null, args);
+ }
+ }
+
+ public Operand CompareAndSwap128(Operand address, Operand expected, Operand desired)
+ {
+ return Add(Instruction.CompareAndSwap128, Local(OperandType.V128), address, expected, desired);
+ }
+
+ public Operand ConditionalSelect(Operand op1, Operand op2, Operand op3)
+ {
+ return Add(Instruction.ConditionalSelect, Local(op2.Type), op1, op2, op3);
+ }
+
+ public Operand ConvertI64ToI32(Operand op1)
+ {
+ if (op1.Type != OperandType.I64)
+ {
+ throw new ArgumentException($"Invalid operand type \"{op1.Type}\".");
+ }
+
+ return Add(Instruction.ConvertI64ToI32, Local(OperandType.I32), op1);
+ }
+
+ public Operand ConvertToFP(OperandType type, Operand op1)
+ {
+ return Add(Instruction.ConvertToFP, Local(type), op1);
+ }
+
+ public Operand ConvertToFPUI(OperandType type, Operand op1)
+ {
+ return Add(Instruction.ConvertToFPUI, Local(type), op1);
+ }
+
+ public Operand Copy(Operand op1)
+ {
+ return Add(Instruction.Copy, Local(op1.Type), op1);
+ }
+
+ public Operand Copy(Operand dest, Operand op1)
+ {
+ if (dest.Kind != OperandKind.Register)
+ {
+ throw new ArgumentException($"Invalid dest operand kind \"{dest.Kind}\".");
+ }
+
+ return Add(Instruction.Copy, dest, op1);
+ }
+
+ public Operand CountLeadingZeros(Operand op1)
+ {
+ return Add(Instruction.CountLeadingZeros, Local(op1.Type), op1);
+ }
+
+ internal Operand CpuId()
+ {
+ return Add(Instruction.CpuId, Local(OperandType.I64));
+ }
+
+ public Operand Divide(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Divide, Local(op1.Type), op1, op2);
+ }
+
+ public Operand DivideUI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.DivideUI, Local(op1.Type), op1, op2);
+ }
+
+ public Operand ICompareEqual(Operand op1, Operand op2)
+ {
+ return Add(Instruction.CompareEqual, Local(OperandType.I32), op1, op2);
+ }
+
+ public Operand ICompareGreater(Operand op1, Operand op2)
+ {
+ return Add(Instruction.CompareGreater, Local(OperandType.I32), op1, op2);
+ }
+
+ public Operand ICompareGreaterOrEqual(Operand op1, Operand op2)
+ {
+ return Add(Instruction.CompareGreaterOrEqual, Local(OperandType.I32), op1, op2);
+ }
+
+ public Operand ICompareGreaterOrEqualUI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.CompareGreaterOrEqualUI, Local(OperandType.I32), op1, op2);
+ }
+
+ public Operand ICompareGreaterUI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.CompareGreaterUI, Local(OperandType.I32), op1, op2);
+ }
+
+ public Operand ICompareLess(Operand op1, Operand op2)
+ {
+ return Add(Instruction.CompareLess, Local(OperandType.I32), op1, op2);
+ }
+
+ public Operand ICompareLessOrEqual(Operand op1, Operand op2)
+ {
+ return Add(Instruction.CompareLessOrEqual, Local(OperandType.I32), op1, op2);
+ }
+
+ public Operand ICompareLessOrEqualUI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.CompareLessOrEqualUI, Local(OperandType.I32), op1, op2);
+ }
+
+ public Operand ICompareLessUI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.CompareLessUI, Local(OperandType.I32), op1, op2);
+ }
+
+ public Operand ICompareNotEqual(Operand op1, Operand op2)
+ {
+ return Add(Instruction.CompareNotEqual, Local(OperandType.I32), op1, op2);
+ }
+
+ public Operand Load(OperandType type, Operand address)
+ {
+ return Add(Instruction.Load, Local(type), address);
+ }
+
+ public Operand Load16(Operand address)
+ {
+ return Add(Instruction.Load16, Local(OperandType.I32), address);
+ }
+
+ public Operand Load8(Operand address)
+ {
+ return Add(Instruction.Load8, Local(OperandType.I32), address);
+ }
+
+ public Operand LoadArgument(OperandType type, int index)
+ {
+ return Add(Instruction.LoadArgument, Local(type), Const(index));
+ }
+
+ public void LoadFromContext()
+ {
+ _needsNewBlock = true;
+
+ Add(Instruction.LoadFromContext);
+ }
+
+ public Operand Multiply(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Multiply, Local(op1.Type), op1, op2);
+ }
+
+ public Operand Multiply64HighSI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Multiply64HighSI, Local(OperandType.I64), op1, op2);
+ }
+
+ public Operand Multiply64HighUI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Multiply64HighUI, Local(OperandType.I64), op1, op2);
+ }
+
+ public Operand Negate(Operand op1)
+ {
+ return Add(Instruction.Negate, Local(op1.Type), op1);
+ }
+
+ public void Return()
+ {
+ Add(Instruction.Return);
+
+ _needsNewBlock = true;
+ }
+
+ public void Return(Operand op1)
+ {
+ Add(Instruction.Return, null, op1);
+
+ _needsNewBlock = true;
+ }
+
+ public Operand RotateRight(Operand op1, Operand op2)
+ {
+ return Add(Instruction.RotateRight, Local(op1.Type), op1, op2);
+ }
+
+ public Operand ShiftLeft(Operand op1, Operand op2)
+ {
+ return Add(Instruction.ShiftLeft, Local(op1.Type), op1, op2);
+ }
+
+ public Operand ShiftRightSI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.ShiftRightSI, Local(op1.Type), op1, op2);
+ }
+
+ public Operand ShiftRightUI(Operand op1, Operand op2)
+ {
+ return Add(Instruction.ShiftRightUI, Local(op1.Type), op1, op2);
+ }
+
+ public Operand SignExtend16(OperandType type, Operand op1)
+ {
+ return Add(Instruction.SignExtend16, Local(type), op1);
+ }
+
+ public Operand SignExtend32(OperandType type, Operand op1)
+ {
+ return Add(Instruction.SignExtend32, Local(type), op1);
+ }
+
+ public Operand SignExtend8(OperandType type, Operand op1)
+ {
+ return Add(Instruction.SignExtend8, Local(type), op1);
+ }
+
+ public void Store(Operand address, Operand value)
+ {
+ Add(Instruction.Store, null, address, value);
+ }
+
+ public void Store16(Operand address, Operand value)
+ {
+ Add(Instruction.Store16, null, address, value);
+ }
+
+ public void Store8(Operand address, Operand value)
+ {
+ Add(Instruction.Store8, null, address, value);
+ }
+
+ public void StoreToContext()
+ {
+ Add(Instruction.StoreToContext);
+
+ _needsNewBlock = true;
+ }
+
+ public Operand Subtract(Operand op1, Operand op2)
+ {
+ return Add(Instruction.Subtract, Local(op1.Type), op1, op2);
+ }
+
+ public Operand VectorCreateScalar(Operand value)
+ {
+ return Add(Instruction.VectorCreateScalar, Local(OperandType.V128), value);
+ }
+
+ public Operand VectorExtract(OperandType type, Operand vector, int index)
+ {
+ return Add(Instruction.VectorExtract, Local(type), vector, Const(index));
+ }
+
+ public Operand VectorExtract16(Operand vector, int index)
+ {
+ return Add(Instruction.VectorExtract16, Local(OperandType.I32), vector, Const(index));
+ }
+
+ public Operand VectorExtract8(Operand vector, int index)
+ {
+ return Add(Instruction.VectorExtract8, Local(OperandType.I32), vector, Const(index));
+ }
+
+ public Operand VectorInsert(Operand vector, Operand value, int index)
+ {
+ return Add(Instruction.VectorInsert, Local(OperandType.V128), vector, value, Const(index));
+ }
+
+ public Operand VectorInsert16(Operand vector, Operand value, int index)
+ {
+ return Add(Instruction.VectorInsert16, Local(OperandType.V128), vector, value, Const(index));
+ }
+
+ public Operand VectorInsert8(Operand vector, Operand value, int index)
+ {
+ return Add(Instruction.VectorInsert8, Local(OperandType.V128), vector, value, Const(index));
+ }
+
+ public Operand VectorZero()
+ {
+ return Add(Instruction.VectorZero, Local(OperandType.V128));
+ }
+
+ public Operand VectorZeroUpper64(Operand vector)
+ {
+ return Add(Instruction.VectorZeroUpper64, Local(OperandType.V128), vector);
+ }
+
+ public Operand VectorZeroUpper96(Operand vector)
+ {
+ return Add(Instruction.VectorZeroUpper96, Local(OperandType.V128), vector);
+ }
+
+ public Operand ZeroExtend16(OperandType type, Operand op1)
+ {
+ return Add(Instruction.ZeroExtend16, Local(type), op1);
+ }
+
+ public Operand ZeroExtend32(OperandType type, Operand op1)
+ {
+ return Add(Instruction.ZeroExtend32, Local(type), op1);
+ }
+
+ public Operand ZeroExtend8(OperandType type, Operand op1)
+ {
+ return Add(Instruction.ZeroExtend8, Local(type), op1);
+ }
+
+ private Operand Add(Instruction inst, Operand dest = null, params Operand[] sources)
+ {
+ if (_needsNewBlock)
+ {
+ NewNextBlock();
+ }
+
+ Operation operation = new Operation(inst, dest, sources);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ public Operand AddIntrinsic(Intrinsic intrin, params Operand[] args)
+ {
+ return Add(intrin, Local(OperandType.V128), args);
+ }
+
+ public Operand AddIntrinsicInt(Intrinsic intrin, params Operand[] args)
+ {
+ return Add(intrin, Local(OperandType.I32), args);
+ }
+
+ public Operand AddIntrinsicLong(Intrinsic intrin, params Operand[] args)
+ {
+ return Add(intrin, Local(OperandType.I64), args);
+ }
+
+ private Operand Add(Intrinsic intrin, Operand dest, params Operand[] sources)
+ {
+ if (_needsNewBlock)
+ {
+ NewNextBlock();
+ }
+
+ IntrinsicOperation operation = new IntrinsicOperation(intrin, dest, sources);
+
+ _irBlock.Operations.AddLast(operation);
+
+ return dest;
+ }
+
+ private void BranchToLabel(Operand label)
+ {
+ if (!_irLabels.TryGetValue(label, out BasicBlock branchBlock))
+ {
+ branchBlock = new BasicBlock();
+
+ _irLabels.Add(label, branchBlock);
+ }
+
+ _irBlock.Branch = branchBlock;
+
+ _needsNewBlock = true;
+ }
+
+ public void MarkLabel(Operand label)
+ {
+ if (_irLabels.TryGetValue(label, out BasicBlock nextBlock))
+ {
+ nextBlock.Index = _irBlocks.Count;
+ nextBlock.Node = _irBlocks.AddLast(nextBlock);
+
+ NextBlock(nextBlock);
+ }
+ else
+ {
+ NewNextBlock();
+
+ _irLabels.Add(label, _irBlock);
+ }
+ }
+
+ private void NewNextBlock()
+ {
+ BasicBlock block = new BasicBlock(_irBlocks.Count);
+
+ block.Node = _irBlocks.AddLast(block);
+
+ NextBlock(block);
+ }
+
+ private void NextBlock(BasicBlock nextBlock)
+ {
+ if (_irBlock != null && !EndsWithUnconditional(_irBlock))
+ {
+ _irBlock.Next = nextBlock;
+ }
+
+ _irBlock = nextBlock;
+
+ _needsNewBlock = false;
+ }
+
+ private static bool EndsWithUnconditional(BasicBlock block)
+ {
+ Operation lastOp = block.GetLastOp() as Operation;
+
+ if (lastOp == null)
+ {
+ return false;
+ }
+
+ return lastOp.Instruction == Instruction.Branch ||
+ lastOp.Instruction == Instruction.Return;
+ }
+
+ public ControlFlowGraph GetControlFlowGraph()
+ {
+ return new ControlFlowGraph(_irBlocks.First.Value, _irBlocks);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/GuestFunction.cs b/ARMeilleure/Translation/GuestFunction.cs
new file mode 100644
index 000000000..ac131a0d1
--- /dev/null
+++ b/ARMeilleure/Translation/GuestFunction.cs
@@ -0,0 +1,6 @@
+using System;
+
+namespace ARMeilleure.Translation
+{
+ delegate ulong GuestFunction(IntPtr nativeContextPtr);
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/ITranslator.cs b/ARMeilleure/Translation/ITranslator.cs
new file mode 100644
index 000000000..1063d3a65
--- /dev/null
+++ b/ARMeilleure/Translation/ITranslator.cs
@@ -0,0 +1,9 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Translation
+{
+ public interface ITranslator
+ {
+ void Execute(IExecutionContext context, ulong address);
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/JitCache.cs b/ARMeilleure/Translation/JitCache.cs
new file mode 100644
index 000000000..73f04a966
--- /dev/null
+++ b/ARMeilleure/Translation/JitCache.cs
@@ -0,0 +1,135 @@
+using ARMeilleure.CodeGen;
+using ARMeilleure.Memory;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation
+{
+ static class JitCache
+ {
+ private const int PageSize = 4 * 1024;
+ private const int PageMask = PageSize - 1;
+
+ private const int CodeAlignment = 4; // Bytes
+
+ private const int CacheSize = 512 * 1024 * 1024;
+
+ private static IntPtr _basePointer;
+
+ private static int _offset;
+
+ private static List _cacheEntries;
+
+ private static object _lock;
+
+ static JitCache()
+ {
+ _basePointer = MemoryManagement.Allocate(CacheSize);
+
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ JitUnwindWindows.InstallFunctionTableHandler(_basePointer, CacheSize);
+
+ // The first page is used for the table based SEH structs.
+ _offset = PageSize;
+ }
+
+ _cacheEntries = new List();
+
+ _lock = new object();
+ }
+
+ public static IntPtr Map(CompiledFunction func)
+ {
+ byte[] code = func.Code;
+
+ lock (_lock)
+ {
+ int funcOffset = Allocate(code.Length);
+
+ IntPtr funcPtr = _basePointer + funcOffset;
+
+ Marshal.Copy(code, 0, funcPtr, code.Length);
+
+ ReprotectRange(funcOffset, code.Length);
+
+ Add(new JitCacheEntry(funcOffset, code.Length, func.UnwindInfo));
+
+ return funcPtr;
+ }
+ }
+
+ private static void ReprotectRange(int offset, int size)
+ {
+ // Map pages that are already full as RX.
+ // Map pages that are not full yet as RWX.
+ // On unix, the address must be page aligned.
+ int endOffs = offset + size;
+
+ int pageStart = offset & ~PageMask;
+ int pageEnd = endOffs & ~PageMask;
+
+ int fullPagesSize = pageEnd - pageStart;
+
+ if (fullPagesSize != 0)
+ {
+ IntPtr funcPtr = _basePointer + pageStart;
+
+ MemoryManagement.Reprotect(funcPtr, (ulong)fullPagesSize, MemoryProtection.ReadAndExecute);
+ }
+
+ int remaining = endOffs - pageEnd;
+
+ if (remaining != 0)
+ {
+ IntPtr funcPtr = _basePointer + pageEnd;
+
+ MemoryManagement.Reprotect(funcPtr, (ulong)remaining, MemoryProtection.ReadWriteExecute);
+ }
+ }
+
+ private static int Allocate(int codeSize)
+ {
+ codeSize = checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
+
+ int allocOffset = _offset;
+
+ _offset += codeSize;
+
+ if ((ulong)(uint)_offset > CacheSize)
+ {
+ throw new OutOfMemoryException();
+ }
+
+ return allocOffset;
+ }
+
+ private static void Add(JitCacheEntry entry)
+ {
+ _cacheEntries.Add(entry);
+ }
+
+ public static bool TryFind(int offset, out JitCacheEntry entry)
+ {
+ lock (_lock)
+ {
+ foreach (JitCacheEntry cacheEntry in _cacheEntries)
+ {
+ int endOffset = cacheEntry.Offset + cacheEntry.Size;
+
+ if (offset >= cacheEntry.Offset && offset < endOffset)
+ {
+ entry = cacheEntry;
+
+ return true;
+ }
+ }
+ }
+
+ entry = default(JitCacheEntry);
+
+ return false;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/JitCacheEntry.cs b/ARMeilleure/Translation/JitCacheEntry.cs
new file mode 100644
index 000000000..87d020e68
--- /dev/null
+++ b/ARMeilleure/Translation/JitCacheEntry.cs
@@ -0,0 +1,19 @@
+using ARMeilleure.CodeGen.Unwinding;
+
+namespace ARMeilleure.Translation
+{
+ struct JitCacheEntry
+ {
+ public int Offset { get; }
+ public int Size { get; }
+
+ public UnwindInfo UnwindInfo { get; }
+
+ public JitCacheEntry(int offset, int size, UnwindInfo unwindInfo)
+ {
+ Offset = offset;
+ Size = size;
+ UnwindInfo = unwindInfo;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/JitUnwindWindows.cs b/ARMeilleure/Translation/JitUnwindWindows.cs
new file mode 100644
index 000000000..108dc2c56
--- /dev/null
+++ b/ARMeilleure/Translation/JitUnwindWindows.cs
@@ -0,0 +1,164 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation
+{
+ static class JitUnwindWindows
+ {
+ private const int MaxUnwindCodesArraySize = 9 + 10 * 2 + 3;
+
+ private struct RuntimeFunction
+ {
+ public uint BeginAddress;
+ public uint EndAddress;
+ public uint UnwindData;
+ }
+
+ private struct UnwindInfo
+ {
+ public byte VersionAndFlags;
+ public byte SizeOfProlog;
+ public byte CountOfUnwindCodes;
+ public byte FrameRegister;
+ public unsafe fixed ushort UnwindCodes[MaxUnwindCodesArraySize];
+ }
+
+ private enum UnwindOperation
+ {
+ PushNonvol = 0,
+ AllocLarge = 1,
+ AllocSmall = 2,
+ SetFpreg = 3,
+ SaveNonvol = 4,
+ SaveNonvolFar = 5,
+ SaveXmm128 = 8,
+ SaveXmm128Far = 9,
+ PushMachframe = 10
+ }
+
+ private unsafe delegate RuntimeFunction* GetRuntimeFunctionCallback(ulong controlPc, IntPtr context);
+
+ [DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
+ private static unsafe extern bool RtlInstallFunctionTableCallback(
+ ulong tableIdentifier,
+ ulong baseAddress,
+ uint length,
+ GetRuntimeFunctionCallback callback,
+ IntPtr context,
+ string outOfProcessCallbackDll);
+
+ private static GetRuntimeFunctionCallback _getRuntimeFunctionCallback;
+
+ private static int _sizeOfRuntimeFunction;
+
+ private unsafe static RuntimeFunction* _runtimeFunction;
+
+ private unsafe static UnwindInfo* _unwindInfo;
+
+ public static void InstallFunctionTableHandler(IntPtr codeCachePointer, uint codeCacheLength)
+ {
+ ulong codeCachePtr = (ulong)codeCachePointer.ToInt64();
+
+ _sizeOfRuntimeFunction = Marshal.SizeOf();
+
+ bool result;
+
+ unsafe
+ {
+ _runtimeFunction = (RuntimeFunction*)codeCachePointer;
+
+ _unwindInfo = (UnwindInfo*)(codeCachePointer + _sizeOfRuntimeFunction);
+
+ _getRuntimeFunctionCallback = new GetRuntimeFunctionCallback(FunctionTableHandler);
+
+ result = RtlInstallFunctionTableCallback(
+ codeCachePtr | 3,
+ codeCachePtr,
+ codeCacheLength,
+ _getRuntimeFunctionCallback,
+ codeCachePointer,
+ null);
+ }
+
+ if (!result)
+ {
+ throw new InvalidOperationException("Failure installing function table callback.");
+ }
+ }
+
+ private static unsafe RuntimeFunction* FunctionTableHandler(ulong controlPc, IntPtr context)
+ {
+ int offset = (int)((long)controlPc - context.ToInt64());
+
+ if (!JitCache.TryFind(offset, out JitCacheEntry funcEntry))
+ {
+ // Not found.
+ return null;
+ }
+
+ var unwindInfo = funcEntry.UnwindInfo;
+
+ int codeIndex = 0;
+
+ int spOffset = unwindInfo.FixedAllocSize;
+
+ foreach (var entry in unwindInfo.PushEntries)
+ {
+ if (entry.Type == RegisterType.Vector)
+ {
+ spOffset -= 16;
+ }
+ }
+
+ for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--)
+ {
+ var entry = unwindInfo.PushEntries[index];
+
+ if (entry.Type == RegisterType.Vector)
+ {
+ ushort uwop = PackUwop(UnwindOperation.SaveXmm128, entry.StreamEndOffset, entry.Index);
+
+ _unwindInfo->UnwindCodes[codeIndex++] = uwop;
+ _unwindInfo->UnwindCodes[codeIndex++] = (ushort)spOffset;
+
+ spOffset += 16;
+ }
+ }
+
+ _unwindInfo->UnwindCodes[0] = PackUwop(UnwindOperation.AllocLarge, unwindInfo.PrologueSize, 1);
+ _unwindInfo->UnwindCodes[1] = (ushort)(unwindInfo.FixedAllocSize >> 0);
+ _unwindInfo->UnwindCodes[2] = (ushort)(unwindInfo.FixedAllocSize >> 16);
+
+ codeIndex += 3;
+
+ for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--)
+ {
+ var entry = unwindInfo.PushEntries[index];
+
+ if (entry.Type == RegisterType.Integer)
+ {
+ ushort uwop = PackUwop(UnwindOperation.PushNonvol, entry.StreamEndOffset, entry.Index);
+
+ _unwindInfo->UnwindCodes[codeIndex++] = uwop;
+ }
+ }
+
+ _unwindInfo->VersionAndFlags = 1;
+ _unwindInfo->SizeOfProlog = (byte)unwindInfo.PrologueSize;
+ _unwindInfo->CountOfUnwindCodes = (byte)codeIndex;
+ _unwindInfo->FrameRegister = 0;
+
+ _runtimeFunction->BeginAddress = (uint)funcEntry.Offset;
+ _runtimeFunction->EndAddress = (uint)(funcEntry.Offset + funcEntry.Size);
+ _runtimeFunction->UnwindData = (uint)_sizeOfRuntimeFunction;
+
+ return _runtimeFunction;
+ }
+
+ private static ushort PackUwop(UnwindOperation uwop, int prologOffset, int opInfo)
+ {
+ return (ushort)(prologOffset | ((int)uwop << 8) | (opInfo << 12));
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/PriorityQueue.cs b/ARMeilleure/Translation/PriorityQueue.cs
new file mode 100644
index 000000000..ab593dc07
--- /dev/null
+++ b/ARMeilleure/Translation/PriorityQueue.cs
@@ -0,0 +1,39 @@
+using System.Collections.Concurrent;
+
+namespace ARMeilleure.Translation
+{
+ class PriorityQueue
+ {
+ private ConcurrentQueue[] _queues;
+
+ public PriorityQueue(int priorities)
+ {
+ _queues = new ConcurrentQueue[priorities];
+
+ for (int index = 0; index < priorities; index++)
+ {
+ _queues[index] = new ConcurrentQueue();
+ }
+ }
+
+ public void Enqueue(int priority, T value)
+ {
+ _queues[priority].Enqueue(value);
+ }
+
+ public bool TryDequeue(out T value)
+ {
+ for (int index = 0; index < _queues.Length; index++)
+ {
+ if (_queues[index].TryDequeue(out value))
+ {
+ return true;
+ }
+ }
+
+ value = default(T);
+
+ return false;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/RegisterToLocal.cs b/ARMeilleure/Translation/RegisterToLocal.cs
new file mode 100644
index 000000000..aa9180182
--- /dev/null
+++ b/ARMeilleure/Translation/RegisterToLocal.cs
@@ -0,0 +1,52 @@
+using ARMeilleure.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+ static class RegisterToLocal
+ {
+ public static void Rename(ControlFlowGraph cfg)
+ {
+ Dictionary registerToLocalMap = new Dictionary();
+
+ Operand GetLocal(Operand op)
+ {
+ Register register = op.GetRegister();
+
+ if (!registerToLocalMap.TryGetValue(register, out Operand local))
+ {
+ local = Local(op.Type);
+
+ registerToLocalMap.Add(register, local);
+ }
+
+ return local;
+ }
+
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ foreach (Node node in block.Operations)
+ {
+ Operand dest = node.Destination;
+
+ if (dest != null && dest.Kind == OperandKind.Register)
+ {
+ node.Destination = GetLocal(dest);
+ }
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ Operand source = node.GetSource(index);
+
+ if (source.Kind == OperandKind.Register)
+ {
+ node.SetSource(index, GetLocal(source));
+ }
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/RegisterUsage.cs b/ARMeilleure/Translation/RegisterUsage.cs
new file mode 100644
index 000000000..4164786b9
--- /dev/null
+++ b/ARMeilleure/Translation/RegisterUsage.cs
@@ -0,0 +1,413 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+ static class RegisterUsage
+ {
+ private const long CallerSavedIntRegistersMask = 0x7fL << 9;
+ private const long PStateNzcvFlagsMask = 0xfL << 60;
+
+ private const long CallerSavedVecRegistersMask = 0xffffL << 16;
+
+ private const int RegsCount = 32;
+ private const int RegsMask = RegsCount - 1;
+
+ private struct RegisterMask : IEquatable
+ {
+ public long IntMask { get; set; }
+ public long VecMask { get; set; }
+
+ public RegisterMask(long intMask, long vecMask)
+ {
+ IntMask = intMask;
+ VecMask = vecMask;
+ }
+
+ public static RegisterMask operator &(RegisterMask x, RegisterMask y)
+ {
+ return new RegisterMask(x.IntMask & y.IntMask, x.VecMask & y.VecMask);
+ }
+
+ public static RegisterMask operator |(RegisterMask x, RegisterMask y)
+ {
+ return new RegisterMask(x.IntMask | y.IntMask, x.VecMask | y.VecMask);
+ }
+
+ public static RegisterMask operator ~(RegisterMask x)
+ {
+ return new RegisterMask(~x.IntMask, ~x.VecMask);
+ }
+
+ public static bool operator ==(RegisterMask x, RegisterMask y)
+ {
+ return x.Equals(y);
+ }
+
+ public static bool operator !=(RegisterMask x, RegisterMask y)
+ {
+ return !x.Equals(y);
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is RegisterMask regMask && Equals(regMask);
+ }
+
+ public bool Equals(RegisterMask other)
+ {
+ return IntMask == other.IntMask && VecMask == other.VecMask;
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(IntMask, VecMask);
+ }
+ }
+
+ public static void RunPass(ControlFlowGraph cfg, bool isCompleteFunction)
+ {
+ // Compute local register inputs and outputs used inside blocks.
+ RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Count];
+ RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Count];
+
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ foreach (Node node in block.Operations)
+ {
+ Operation operation = node as Operation;
+
+ for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+ {
+ Operand source = operation.GetSource(srcIndex);
+
+ if (source.Kind != OperandKind.Register)
+ {
+ continue;
+ }
+
+ Register register = source.GetRegister();
+
+ localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index];
+ }
+
+ if (operation.Destination != null && operation.Destination.Kind == OperandKind.Register)
+ {
+ localOutputs[block.Index] |= GetMask(operation.Destination.GetRegister());
+ }
+ }
+ }
+
+ // Compute global register inputs and outputs used across blocks.
+ RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Count];
+
+ RegisterMask[] globalInputs = new RegisterMask[cfg.Blocks.Count];
+ RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Count];
+
+ bool modified;
+
+ bool firstPass = true;
+
+ do
+ {
+ modified = false;
+
+ // Compute register outputs.
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ if (block.Predecessors.Count != 0 && !HasContextLoad(block))
+ {
+ BasicBlock predecessor = block.Predecessors[0];
+
+ RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
+
+ RegisterMask outputs = globalOutputs[predecessor.Index];
+
+ for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++)
+ {
+ predecessor = block.Predecessors[pIndex];
+
+ cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
+
+ outputs |= globalOutputs[predecessor.Index];
+ }
+
+ globalInputs[block.Index] |= outputs & ~cmnOutputs;
+
+ if (!firstPass)
+ {
+ cmnOutputs &= globalCmnOutputs[block.Index];
+ }
+
+ if (Exchange(globalCmnOutputs, block.Index, cmnOutputs))
+ {
+ modified = true;
+ }
+
+ outputs |= localOutputs[block.Index];
+
+ if (Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs))
+ {
+ modified = true;
+ }
+ }
+ else if (Exchange(globalOutputs, block.Index, localOutputs[block.Index]))
+ {
+ modified = true;
+ }
+ }
+
+ // Compute register inputs.
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ RegisterMask inputs = localInputs[block.Index];
+
+ if (block.Next != null)
+ {
+ inputs |= globalInputs[block.Next.Index];
+ }
+
+ if (block.Branch != null)
+ {
+ inputs |= globalInputs[block.Branch.Index];
+ }
+
+ inputs &= ~globalCmnOutputs[block.Index];
+
+ if (Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs))
+ {
+ modified = true;
+ }
+ }
+
+ firstPass = false;
+ }
+ while (modified);
+
+ // Insert load and store context instructions where needed.
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ bool hasContextLoad = HasContextLoad(block);
+
+ if (hasContextLoad)
+ {
+ block.Operations.RemoveFirst();
+ }
+
+ // The only block without any predecessor should be the entry block.
+ // It always needs a context load as it is the first block to run.
+ if (block.Predecessors.Count == 0 || hasContextLoad)
+ {
+ LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector);
+ LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer);
+ }
+
+ bool hasContextStore = HasContextStore(block);
+
+ if (hasContextStore)
+ {
+ block.Operations.RemoveLast();
+ }
+
+ if (EndsWithReturn(block) || hasContextStore)
+ {
+ StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, isCompleteFunction);
+ StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, isCompleteFunction);
+ }
+ }
+ }
+
+ private static bool HasContextLoad(BasicBlock block)
+ {
+ return StartsWith(block, Instruction.LoadFromContext) && block.Operations.First.Value.SourcesCount == 0;
+ }
+
+ private static bool HasContextStore(BasicBlock block)
+ {
+ return EndsWith(block, Instruction.StoreToContext) && block.GetLastOp().SourcesCount == 0;
+ }
+
+ private static bool StartsWith(BasicBlock block, Instruction inst)
+ {
+ if (block.Operations.Count == 0)
+ {
+ return false;
+ }
+
+ return block.Operations.First.Value is Operation operation && operation.Instruction == inst;
+ }
+
+ private static bool EndsWith(BasicBlock block, Instruction inst)
+ {
+ if (block.Operations.Count == 0)
+ {
+ return false;
+ }
+
+ return block.Operations.Last.Value is Operation operation && operation.Instruction == inst;
+ }
+
+ private static RegisterMask GetMask(Register register)
+ {
+ long intMask = 0;
+ long vecMask = 0;
+
+ switch (register.Type)
+ {
+ case RegisterType.Flag: intMask = (1L << RegsCount) << register.Index; break;
+ case RegisterType.Integer: intMask = 1L << register.Index; break;
+ case RegisterType.Vector: vecMask = 1L << register.Index; break;
+ }
+
+ return new RegisterMask(intMask, vecMask);
+ }
+
+ private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value)
+ {
+ RegisterMask oldValue = masks[blkIndex];
+
+ masks[blkIndex] = value;
+
+ return oldValue != value;
+ }
+
+ private static void LoadLocals(BasicBlock block, long inputs, RegisterType baseType)
+ {
+ Operand arg0 = Local(OperandType.I64);
+
+ for (int bit = 63; bit >= 0; bit--)
+ {
+ long mask = 1L << bit;
+
+ if ((inputs & mask) == 0)
+ {
+ continue;
+ }
+
+ Operand dest = GetRegFromBit(bit, baseType);
+
+ long offset = NativeContext.GetRegisterOffset(dest.GetRegister());
+
+ Operand addr = Local(OperandType.I64);
+
+ Operation loadOp = new Operation(Instruction.Load, dest, addr);
+
+ block.Operations.AddFirst(loadOp);
+
+ Operation calcOffsOp = new Operation(Instruction.Add, addr, arg0, Const(offset));
+
+ block.Operations.AddFirst(calcOffsOp);
+ }
+
+ Operation loadArg0 = new Operation(Instruction.LoadArgument, arg0, Const(0));
+
+ block.Operations.AddFirst(loadArg0);
+ }
+
+ private static void StoreLocals(BasicBlock block, long outputs, RegisterType baseType, bool isCompleteFunction)
+ {
+ if (Optimizations.AssumeStrictAbiCompliance && isCompleteFunction)
+ {
+ if (baseType == RegisterType.Integer || baseType == RegisterType.Flag)
+ {
+ outputs = ClearCallerSavedIntRegs(outputs);
+ }
+ else /* if (baseType == RegisterType.Vector) */
+ {
+ outputs = ClearCallerSavedVecRegs(outputs);
+ }
+ }
+
+ Operand arg0 = Local(OperandType.I64);
+
+ Operation loadArg0 = new Operation(Instruction.LoadArgument, arg0, Const(0));
+
+ block.Append(loadArg0);
+
+ for (int bit = 0; bit < 64; bit++)
+ {
+ long mask = 1L << bit;
+
+ if ((outputs & mask) == 0)
+ {
+ continue;
+ }
+
+ Operand source = GetRegFromBit(bit, baseType);
+
+ long offset = NativeContext.GetRegisterOffset(source.GetRegister());
+
+ Operand addr = Local(OperandType.I64);
+
+ Operation calcOffsOp = new Operation(Instruction.Add, addr, arg0, Const(offset));
+
+ block.Append(calcOffsOp);
+
+ Operation storeOp = new Operation(Instruction.Store, null, addr, source);
+
+ block.Append(storeOp);
+ }
+ }
+
+ private static Operand GetRegFromBit(int bit, RegisterType baseType)
+ {
+ if (bit < RegsCount)
+ {
+ return new Operand(bit, baseType, GetOperandType(baseType));
+ }
+ else if (baseType == RegisterType.Integer)
+ {
+ return new Operand(bit & RegsMask, RegisterType.Flag, OperandType.I32);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(bit));
+ }
+ }
+
+ private static OperandType GetOperandType(RegisterType type)
+ {
+ switch (type)
+ {
+ case RegisterType.Flag: return OperandType.I32;
+ case RegisterType.Integer: return OperandType.I64;
+ case RegisterType.Vector: return OperandType.V128;
+ }
+
+ throw new ArgumentException($"Invalid register type \"{type}\".");
+ }
+
+ private static bool EndsWithReturn(BasicBlock block)
+ {
+ if (!(block.GetLastOp() is Operation operation))
+ {
+ return false;
+ }
+
+ return operation.Instruction == Instruction.Return;
+ }
+
+ private static long ClearCallerSavedIntRegs(long mask)
+ {
+ // TODO: ARM32 support.
+ mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask);
+
+ return mask;
+ }
+
+ private static long ClearCallerSavedVecRegs(long mask)
+ {
+ // TODO: ARM32 support.
+ mask &= ~CallerSavedVecRegistersMask;
+
+ return mask;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/SsaConstruction.cs b/ARMeilleure/Translation/SsaConstruction.cs
new file mode 100644
index 000000000..ccf525915
--- /dev/null
+++ b/ARMeilleure/Translation/SsaConstruction.cs
@@ -0,0 +1,293 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using System.Collections.Generic;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+ static partial class Ssa
+ {
+ private class DefMap
+ {
+ private Dictionary _map;
+
+ private BitMap _phiMasks;
+
+ public DefMap()
+ {
+ _map = new Dictionary();
+
+ _phiMasks = new BitMap(RegisterConsts.TotalCount);
+ }
+
+ public bool TryAddOperand(Register reg, Operand operand)
+ {
+ return _map.TryAdd(reg, operand);
+ }
+
+ public bool TryGetOperand(Register reg, out Operand operand)
+ {
+ return _map.TryGetValue(reg, out operand);
+ }
+
+ public bool AddPhi(Register reg)
+ {
+ return _phiMasks.Set(GetIdFromRegister(reg));
+ }
+
+ public bool HasPhi(Register reg)
+ {
+ return _phiMasks.IsSet(GetIdFromRegister(reg));
+ }
+ }
+
+ public static void Construct(ControlFlowGraph cfg)
+ {
+ DefMap[] globalDefs = new DefMap[cfg.Blocks.Count];
+
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ globalDefs[block.Index] = new DefMap();
+ }
+
+ Queue dfPhiBlocks = new Queue();
+
+ // First pass, get all defs and locals uses.
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ Operand[] localDefs = new Operand[RegisterConsts.TotalCount];
+
+ LinkedListNode node = block.Operations.First;
+
+ Operand RenameLocal(Operand operand)
+ {
+ if (operand != null && operand.Kind == OperandKind.Register)
+ {
+ Operand local = localDefs[GetIdFromRegister(operand.GetRegister())];
+
+ operand = local ?? operand;
+ }
+
+ return operand;
+ }
+
+ while (node != null)
+ {
+ if (node.Value is Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, RenameLocal(operation.GetSource(index)));
+ }
+
+ Operand dest = operation.Destination;
+
+ if (dest != null && dest.Kind == OperandKind.Register)
+ {
+ Operand local = Local(dest.Type);
+
+ localDefs[GetIdFromRegister(dest.GetRegister())] = local;
+
+ operation.Destination = local;
+ }
+ }
+
+ node = node.Next;
+ }
+
+ for (int index = 0; index < RegisterConsts.TotalCount; index++)
+ {
+ Operand local = localDefs[index];
+
+ if (local == null)
+ {
+ continue;
+ }
+
+ Register reg = GetRegisterFromId(index);
+
+ globalDefs[block.Index].TryAddOperand(reg, local);
+
+ dfPhiBlocks.Enqueue(block);
+
+ while (dfPhiBlocks.TryDequeue(out BasicBlock dfPhiBlock))
+ {
+ foreach (BasicBlock domFrontier in dfPhiBlock.DominanceFrontiers)
+ {
+ if (globalDefs[domFrontier.Index].AddPhi(reg))
+ {
+ dfPhiBlocks.Enqueue(domFrontier);
+ }
+ }
+ }
+ }
+ }
+
+ // Second pass, rename variables with definitions on different blocks.
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ Operand[] localDefs = new Operand[RegisterConsts.TotalCount];
+
+ LinkedListNode node = block.Operations.First;
+
+ Operand RenameGlobal(Operand operand)
+ {
+ if (operand != null && operand.Kind == OperandKind.Register)
+ {
+ int key = GetIdFromRegister(operand.GetRegister());
+
+ Operand local = localDefs[key];
+
+ if (local == null)
+ {
+ local = FindDef(globalDefs, block, operand);
+
+ localDefs[key] = local;
+ }
+
+ operand = local;
+ }
+
+ return operand;
+ }
+
+ while (node != null)
+ {
+ if (node.Value is Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, RenameGlobal(operation.GetSource(index)));
+ }
+ }
+
+ node = node.Next;
+ }
+ }
+ }
+
+ private static Operand FindDef(DefMap[] globalDefs, BasicBlock current, Operand operand)
+ {
+ if (globalDefs[current.Index].HasPhi(operand.GetRegister()))
+ {
+ return InsertPhi(globalDefs, current, operand);
+ }
+
+ if (current != current.ImmediateDominator)
+ {
+ return FindDefOnPred(globalDefs, current.ImmediateDominator, operand);
+ }
+
+ return Undef();
+ }
+
+ private static Operand FindDefOnPred(DefMap[] globalDefs, BasicBlock current, Operand operand)
+ {
+ BasicBlock previous;
+
+ do
+ {
+ DefMap defMap = globalDefs[current.Index];
+
+ Register reg = operand.GetRegister();
+
+ if (defMap.TryGetOperand(reg, out Operand lastDef))
+ {
+ return lastDef;
+ }
+
+ if (defMap.HasPhi(reg))
+ {
+ return InsertPhi(globalDefs, current, operand);
+ }
+
+ previous = current;
+ current = current.ImmediateDominator;
+ }
+ while (previous != current);
+
+ return Undef();
+ }
+
+ private static Operand InsertPhi(DefMap[] globalDefs, BasicBlock block, Operand operand)
+ {
+ // This block has a Phi that has not been materialized yet, but that
+ // would define a new version of the variable we're looking for. We need
+ // to materialize the Phi, add all the block/operand pairs into the Phi, and
+ // then use the definition from that Phi.
+ Operand local = Local(operand.Type);
+
+ PhiNode phi = new PhiNode(local, block.Predecessors.Count);
+
+ AddPhi(block, phi);
+
+ globalDefs[block.Index].TryAddOperand(operand.GetRegister(), local);
+
+ for (int index = 0; index < block.Predecessors.Count; index++)
+ {
+ BasicBlock predecessor = block.Predecessors[index];
+
+ phi.SetBlock(index, predecessor);
+ phi.SetSource(index, FindDefOnPred(globalDefs, predecessor, operand));
+ }
+
+ return local;
+ }
+
+ private static void AddPhi(BasicBlock block, PhiNode phi)
+ {
+ LinkedListNode node = block.Operations.First;
+
+ if (node != null)
+ {
+ while (node.Next?.Value is PhiNode)
+ {
+ node = node.Next;
+ }
+ }
+
+ if (node?.Value is PhiNode)
+ {
+ block.Operations.AddAfter(node, phi);
+ }
+ else
+ {
+ block.Operations.AddFirst(phi);
+ }
+ }
+
+ private static int GetIdFromRegister(Register reg)
+ {
+ if (reg.Type == RegisterType.Integer)
+ {
+ return reg.Index;
+ }
+ else if (reg.Type == RegisterType.Vector)
+ {
+ return RegisterConsts.IntRegsCount + reg.Index;
+ }
+ else /* if (reg.Type == RegisterType.Flag) */
+ {
+ return RegisterConsts.IntAndVecRegsCount + reg.Index;
+ }
+ }
+
+ private static Register GetRegisterFromId(int id)
+ {
+ if (id < RegisterConsts.IntRegsCount)
+ {
+ return new Register(id, RegisterType.Integer);
+ }
+ else if (id < RegisterConsts.IntAndVecRegsCount)
+ {
+ return new Register(id - RegisterConsts.IntRegsCount, RegisterType.Vector);
+ }
+ else /* if (id < RegisterConsts.TotalCount) */
+ {
+ return new Register(id - RegisterConsts.IntAndVecRegsCount, RegisterType.Flag);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/SsaDeconstruction.cs b/ARMeilleure/Translation/SsaDeconstruction.cs
new file mode 100644
index 000000000..2ba78bdf4
--- /dev/null
+++ b/ARMeilleure/Translation/SsaDeconstruction.cs
@@ -0,0 +1,46 @@
+using ARMeilleure.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+ static partial class Ssa
+ {
+ public static void Deconstruct(ControlFlowGraph cfg)
+ {
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ LinkedListNode node = block.Operations.First;
+
+ while (node?.Value is PhiNode phi)
+ {
+ LinkedListNode nextNode = node.Next;
+
+ Operand local = Local(phi.Destination.Type);
+
+ for (int index = 0; index < phi.SourcesCount; index++)
+ {
+ BasicBlock predecessor = phi.GetBlock(index);
+
+ Operand source = phi.GetSource(index);
+
+ predecessor.Append(new Operation(Instruction.Copy, local, source));
+
+ phi.SetSource(index, null);
+ }
+
+ Operation copyOp = new Operation(Instruction.Copy, phi.Destination, local);
+
+ block.Operations.AddBefore(node, copyOp);
+
+ phi.Destination = null;
+
+ block.Operations.Remove(node);
+
+ node = nextNode;
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/TranslatedFunction.cs b/ARMeilleure/Translation/TranslatedFunction.cs
new file mode 100644
index 000000000..06069cf8f
--- /dev/null
+++ b/ARMeilleure/Translation/TranslatedFunction.cs
@@ -0,0 +1,30 @@
+using System.Threading;
+
+namespace ARMeilleure.Translation
+{
+ class TranslatedFunction
+ {
+ private const int MinCallsForRejit = 100;
+
+ private GuestFunction _func;
+
+ private bool _rejit;
+ private int _callCount;
+
+ public TranslatedFunction(GuestFunction func, bool rejit)
+ {
+ _func = func;
+ _rejit = rejit;
+ }
+
+ public ulong Execute(State.ExecutionContext context)
+ {
+ return _func(context.NativeContextPtr);
+ }
+
+ public bool ShouldRejit()
+ {
+ return _rejit && Interlocked.Increment(ref _callCount) == MinCallsForRejit;
+ }
+ }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs
new file mode 100644
index 000000000..6a7451214
--- /dev/null
+++ b/ARMeilleure/Translation/Translator.cs
@@ -0,0 +1,253 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.Instructions;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System;
+using System.Collections.Concurrent;
+using System.Threading;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+ public class Translator : ITranslator
+ {
+ private const ulong CallFlag = InstEmitFlowHelper.CallFlag;
+
+ private MemoryManager _memory;
+
+ private ConcurrentDictionary _funcs;
+
+ private PriorityQueue _backgroundQueue;
+
+ private AutoResetEvent _backgroundTranslatorEvent;
+
+ private volatile int _threadCount;
+
+ public Translator(MemoryManager memory)
+ {
+ _memory = memory;
+
+ _funcs = new ConcurrentDictionary();
+
+ _backgroundQueue = new PriorityQueue(2);
+
+ _backgroundTranslatorEvent = new AutoResetEvent(false);
+ }
+
+ private void TranslateQueuedSubs()
+ {
+ while (_threadCount != 0)
+ {
+ if (_backgroundQueue.TryDequeue(out ulong address))
+ {
+ TranslatedFunction func = Translate(address, ExecutionMode.Aarch64, highCq: true);
+
+ _funcs.AddOrUpdate(address, func, (key, oldFunc) => func);
+ }
+ else
+ {
+ _backgroundTranslatorEvent.WaitOne();
+ }
+ }
+ }
+
+ public void Execute(IExecutionContext ctx, ulong address)
+ {
+ State.ExecutionContext context = (State.ExecutionContext)ctx;
+
+ if (Interlocked.Increment(ref _threadCount) == 1)
+ {
+ Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs);
+
+ backgroundTranslatorThread.Priority = ThreadPriority.Lowest;
+ backgroundTranslatorThread.Start();
+ }
+
+ Statistics.InitializeTimer();
+
+ NativeInterface.RegisterThread(context, _memory);
+
+ do
+ {
+ address = ExecuteSingle(context, address);
+ }
+ while (context.Running && (address & ~1UL) != 0);
+
+ NativeInterface.UnregisterThread();
+
+ if (Interlocked.Decrement(ref _threadCount) == 0)
+ {
+ _backgroundTranslatorEvent.Set();
+ }
+ }
+
+ public ulong ExecuteSingle(State.ExecutionContext context, ulong address)
+ {
+ TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
+
+ Statistics.StartTimer();
+
+ ulong nextAddr = func.Execute(context);
+
+ Statistics.StopTimer(address);
+
+ return nextAddr;
+ }
+
+ private TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
+ {
+ // TODO: Investigate how we should handle code at unaligned addresses.
+ // Currently, those low bits are used to store special flags.
+ bool isCallTarget = (address & CallFlag) != 0;
+
+ address &= ~CallFlag;
+
+ if (!_funcs.TryGetValue(address, out TranslatedFunction func))
+ {
+ func = Translate(address, mode, highCq: false);
+
+ _funcs.TryAdd(address, func);
+ }
+ else if (isCallTarget && func.ShouldRejit())
+ {
+ _backgroundQueue.Enqueue(0, address);
+
+ _backgroundTranslatorEvent.Set();
+ }
+
+ return func;
+ }
+
+ private TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq)
+ {
+ ArmEmitterContext context = new ArmEmitterContext(_memory, Aarch32Mode.User);
+
+ Logger.StartPass(PassName.Decoding);
+
+ Block[] blocks = highCq
+ ? Decoder.DecodeFunction (_memory, address, mode)
+ : Decoder.DecodeBasicBlock(_memory, address, mode);
+
+ Logger.EndPass(PassName.Decoding);
+
+ Logger.StartPass(PassName.Translation);
+
+ EmitSynchronization(context);
+
+ if (blocks[0].Address != address)
+ {
+ context.Branch(context.GetLabel(address));
+ }
+
+ ControlFlowGraph cfg = EmitAndGetCFG(context, blocks);
+
+ Logger.EndPass(PassName.Translation);
+
+ Logger.StartPass(PassName.RegisterUsage);
+
+ RegisterUsage.RunPass(cfg, isCompleteFunction: false);
+
+ Logger.EndPass(PassName.RegisterUsage);
+
+ OperandType[] argTypes = new OperandType[] { OperandType.I64 };
+
+ CompilerOptions options = highCq
+ ? CompilerOptions.HighCq
+ : CompilerOptions.None;
+
+ GuestFunction func = Compiler.Compile(cfg, argTypes, OperandType.I64, options);
+
+ return new TranslatedFunction(func, rejit: !highCq);
+ }
+
+ private static ControlFlowGraph EmitAndGetCFG(ArmEmitterContext context, Block[] blocks)
+ {
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ Block block = blocks[blkIndex];
+
+ context.CurrBlock = block;
+
+ context.MarkLabel(context.GetLabel(block.Address));
+
+ for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++)
+ {
+ OpCode opCode = block.OpCodes[opcIndex];
+
+ context.CurrOp = opCode;
+
+ bool isLastOp = opcIndex == block.OpCodes.Count - 1;
+
+ if (isLastOp && block.Branch != null && block.Branch.Address <= block.Address)
+ {
+ EmitSynchronization(context);
+ }
+
+ Operand lblPredicateSkip = null;
+
+ if (opCode is OpCode32 op && op.Cond < Condition.Al)
+ {
+ lblPredicateSkip = Label();
+
+ InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, op.Cond.Invert());
+ }
+
+ if (opCode.Instruction.Emitter != null)
+ {
+ opCode.Instruction.Emitter(context);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\".");
+ }
+
+ if (lblPredicateSkip != null)
+ {
+ context.MarkLabel(lblPredicateSkip);
+
+ // If this is the last op on the block, and there's no "next" block
+ // after this one, then we have to return right now, with the address
+ // of the next instruction to be executed (in the case that the condition
+ // is false, and the branch was not taken, as all basic blocks should end
+ // with some kind of branch).
+ if (isLastOp && block.Next == null)
+ {
+ context.Return(Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes));
+ }
+ }
+ }
+ }
+
+ return context.GetControlFlowGraph();
+ }
+
+ private static void EmitSynchronization(EmitterContext context)
+ {
+ long countOffs = NativeContext.GetCounterOffset();
+
+ Operand countAddr = context.Add(context.LoadArgument(OperandType.I64, 0), Const(countOffs));
+
+ Operand count = context.Load(OperandType.I32, countAddr);
+
+ Operand lblNonZero = Label();
+ Operand lblExit = Label();
+
+ context.BranchIfTrue(lblNonZero, count);
+
+ context.Call(new _Void(NativeInterface.CheckSynchronization));
+
+ context.Branch(lblExit);
+
+ context.MarkLabel(lblNonZero);
+
+ count = context.Subtract(count, Const(1));
+
+ context.Store(countAddr, count);
+
+ context.MarkLabel(lblExit);
+ }
+ }
+}
\ No newline at end of file
diff --git a/ChocolArm64/ChocolArm64.csproj b/ChocolArm64/ChocolArm64.csproj
index ea98003f9..cccdd94df 100644
--- a/ChocolArm64/ChocolArm64.csproj
+++ b/ChocolArm64/ChocolArm64.csproj
@@ -2,7 +2,7 @@
netcoreapp2.1
- win10-x64;osx-x64;linux-x64
+ win-x64;osx-x64;linux-x64
Debug;Release;Profile Debug;Profile Release
@@ -33,6 +33,7 @@
+
diff --git a/ChocolArm64/CpuThread.cs b/ChocolArm64/CpuThread.cs
deleted file mode 100644
index ad1fd6f3c..000000000
--- a/ChocolArm64/CpuThread.cs
+++ /dev/null
@@ -1,66 +0,0 @@
-using ChocolArm64.Memory;
-using ChocolArm64.State;
-using ChocolArm64.Translation;
-using System;
-using System.Threading;
-
-namespace ChocolArm64
-{
- public class CpuThread
- {
- public CpuThreadState ThreadState { get; private set; }
- public MemoryManager Memory { get; private set; }
-
- private Translator _translator;
-
- public Thread Work;
-
- public event EventHandler WorkFinished;
-
- private int _isExecuting;
-
- public CpuThread(Translator translator, MemoryManager memory, long entrypoint)
- {
- _translator = translator;
- Memory = memory;
-
- ThreadState = new CpuThreadState();
-
- ThreadState.Running = true;
-
- Work = new Thread(delegate()
- {
- translator.ExecuteSubroutine(this, entrypoint);
-
- WorkFinished?.Invoke(this, EventArgs.Empty);
- });
- }
-
- public bool Execute()
- {
- if (Interlocked.Exchange(ref _isExecuting, 1) == 1)
- {
- return false;
- }
-
- Work.Start();
-
- return true;
- }
-
- public void StopExecution()
- {
- ThreadState.Running = false;
- }
-
- public void RequestInterrupt()
- {
- ThreadState.RequestInterrupt();
- }
-
- public bool IsCurrentThread()
- {
- return Thread.CurrentThread == Work;
- }
- }
-}
\ No newline at end of file
diff --git a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs
index dbb588867..08c8265b5 100644
--- a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs
+++ b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs
@@ -462,11 +462,11 @@ namespace ChocolArm64.Instructions
switch (size)
{
- case 0: fallbackMethodName = nameof(MemoryManager.WriteVector8); break;
- case 1: fallbackMethodName = nameof(MemoryManager.WriteVector16); break;
- case 2: fallbackMethodName = nameof(MemoryManager.WriteVector32); break;
- case 3: fallbackMethodName = nameof(MemoryManager.WriteVector64); break;
- case 4: fallbackMethodName = nameof(MemoryManager.WriteVector128); break;
+ case 0: fallbackMethodName = nameof(MemoryManager.WriteVector8); break;
+ case 1: fallbackMethodName = nameof(MemoryManager.WriteVector16); break;
+ case 2: fallbackMethodName = nameof(MemoryManager.WriteVector32); break;
+ case 3: fallbackMethodName = nameof(MemoryManager.WriteVector64); break;
+ case 4: fallbackMethodName = nameof(MemoryManager.WriteVector128Internal); break;
}
context.EmitCall(typeof(MemoryManager), fallbackMethodName);
diff --git a/ChocolArm64/Instructions/InstEmitSystem.cs b/ChocolArm64/Instructions/InstEmitSystem.cs
index d0d60b9d5..ac264de92 100644
--- a/ChocolArm64/Instructions/InstEmitSystem.cs
+++ b/ChocolArm64/Instructions/InstEmitSystem.cs
@@ -31,8 +31,8 @@ namespace ChocolArm64.Instructions
{
case 0b11_011_0000_0000_001: propName = nameof(CpuThreadState.CtrEl0); break;
case 0b11_011_0000_0000_111: propName = nameof(CpuThreadState.DczidEl0); break;
- case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.Fpcr); break;
- case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.Fpsr); break;
+ case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.CFpcr); break;
+ case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.CFpsr); break;
case 0b11_011_1101_0000_010: propName = nameof(CpuThreadState.TpidrEl0); break;
case 0b11_011_1101_0000_011: propName = nameof(CpuThreadState.Tpidr); break;
case 0b11_011_1110_0000_000: propName = nameof(CpuThreadState.CntfrqEl0); break;
@@ -65,8 +65,8 @@ namespace ChocolArm64.Instructions
switch (GetPackedId(op))
{
- case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.Fpcr); break;
- case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.Fpsr); break;
+ case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.CFpcr); break;
+ case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.CFpsr); break;
case 0b11_011_1101_0000_010: propName = nameof(CpuThreadState.TpidrEl0); break;
default: throw new NotImplementedException($"Unknown MSR at {op.Position:x16}");
diff --git a/ChocolArm64/Instructions/SoftFloat.cs b/ChocolArm64/Instructions/SoftFloat.cs
index 3521ad152..e78932cc4 100644
--- a/ChocolArm64/Instructions/SoftFloat.cs
+++ b/ChocolArm64/Instructions/SoftFloat.cs
@@ -82,7 +82,7 @@ namespace ChocolArm64.Instructions
{
public static float FPConvert(ushort valueBits, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat16_32.FPConvert: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat16_32.FPConvert: state.Fpcr = 0x{state.CFpcr:X8}");
double real = valueBits.FPUnpackCv(out FpType type, out bool sign, state);
@@ -322,13 +322,13 @@ namespace ChocolArm64.Instructions
{
int enable = (int)exc + 8;
- if ((state.Fpcr & (1 << enable)) != 0)
+ if ((state.CFpcr & (1 << enable)) != 0)
{
throw new NotImplementedException("Floating-point trap handling.");
}
else
{
- state.Fpsr |= 1 << (int)exc;
+ state.CFpsr |= 1 << (int)exc;
}
}
}
@@ -337,7 +337,7 @@ namespace ChocolArm64.Instructions
{
public static ushort FPConvert(float value, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32_16.FPConvert: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32_16.FPConvert: state.Fpcr = 0x{state.CFpcr:X8}");
double real = value.FPUnpackCv(out FpType type, out bool sign, out uint valueBits, state);
@@ -609,13 +609,13 @@ namespace ChocolArm64.Instructions
{
int enable = (int)exc + 8;
- if ((state.Fpcr & (1 << enable)) != 0)
+ if ((state.CFpcr & (1 << enable)) != 0)
{
throw new NotImplementedException("Floating-point trap handling.");
}
else
{
- state.Fpsr |= 1 << (int)exc;
+ state.CFpsr |= 1 << (int)exc;
}
}
}
@@ -624,7 +624,7 @@ namespace ChocolArm64.Instructions
{
public static float FPAdd(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPAdd: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPAdd: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -672,7 +672,7 @@ namespace ChocolArm64.Instructions
public static int FPCompare(float value1, float value2, bool signalNaNs, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompare: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state);
@@ -709,7 +709,7 @@ namespace ChocolArm64.Instructions
public static float FPCompareEQ(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareEQ: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareEQ: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -735,7 +735,7 @@ namespace ChocolArm64.Instructions
public static float FPCompareGE(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareGE: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareGE: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -758,7 +758,7 @@ namespace ChocolArm64.Instructions
public static float FPCompareGT(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareGT: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareGT: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -782,7 +782,7 @@ namespace ChocolArm64.Instructions
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float FPCompareLE(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareLE: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareLE: state.Fpcr = 0x{state.CFpcr:X8}");
return FPCompareGE(value2, value1, state);
}
@@ -790,14 +790,14 @@ namespace ChocolArm64.Instructions
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float FPCompareLT(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareLT: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareLT: state.Fpcr = 0x{state.CFpcr:X8}");
return FPCompareGT(value2, value1, state);
}
public static float FPDiv(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -846,7 +846,7 @@ namespace ChocolArm64.Instructions
public static float FPMax(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMax: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMax: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -899,7 +899,7 @@ namespace ChocolArm64.Instructions
public static float FPMaxNum(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMaxNum: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMaxNum: state.Fpcr = 0x{state.CFpcr:X8}");
value1.FPUnpack(out FpType type1, out _, out _, state);
value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -918,7 +918,7 @@ namespace ChocolArm64.Instructions
public static float FPMin(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMin: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMin: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -971,7 +971,7 @@ namespace ChocolArm64.Instructions
public static float FPMinNum(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMinNum: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMinNum: state.Fpcr = 0x{state.CFpcr:X8}");
value1.FPUnpack(out FpType type1, out _, out _, state);
value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -990,7 +990,7 @@ namespace ChocolArm64.Instructions
public static float FPMul(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMul: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMul: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -1038,7 +1038,7 @@ namespace ChocolArm64.Instructions
float value2,
CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMulAdd: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMulAdd: state.Fpcr = 0x{state.CFpcr:X8}");
valueA = valueA.FPUnpack(out FpType typeA, out bool signA, out uint addend, state);
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
@@ -1108,7 +1108,7 @@ namespace ChocolArm64.Instructions
float value2,
CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMulSub: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMulSub: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPNeg();
@@ -1117,7 +1117,7 @@ namespace ChocolArm64.Instructions
public static float FPMulX(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMulX: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMulX: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -1159,7 +1159,7 @@ namespace ChocolArm64.Instructions
public static float FPRecipEstimate(float value, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRecipEstimate: state.Fpcr = 0x{state.CFpcr:X8}");
value.FPUnpack(out FpType type, out bool sign, out uint op, state);
@@ -1248,7 +1248,7 @@ namespace ChocolArm64.Instructions
public static float FPRecipStepFused(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRecipStepFused: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPNeg();
@@ -1291,7 +1291,7 @@ namespace ChocolArm64.Instructions
public static float FPRecpX(float value, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecpX: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRecpX: state.Fpcr = 0x{state.CFpcr:X8}");
value.FPUnpack(out FpType type, out bool sign, out uint op, state);
@@ -1315,7 +1315,7 @@ namespace ChocolArm64.Instructions
public static float FPRSqrtEstimate(float value, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRSqrtEstimate: state.Fpcr = 0x{state.CFpcr:X8}");
value.FPUnpack(out FpType type, out bool sign, out uint op, state);
@@ -1380,7 +1380,7 @@ namespace ChocolArm64.Instructions
public static float FPRSqrtStepFused(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRSqrtStepFused: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPNeg();
@@ -1423,7 +1423,7 @@ namespace ChocolArm64.Instructions
public static float FPSqrt(float value, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPSqrt: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPSqrt: state.Fpcr = 0x{state.CFpcr:X8}");
value = value.FPUnpack(out FpType type, out bool sign, out uint op, state);
@@ -1464,7 +1464,7 @@ namespace ChocolArm64.Instructions
public static float FPSub(float value1, float value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPSub: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPSub: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -1693,13 +1693,13 @@ namespace ChocolArm64.Instructions
{
int enable = (int)exc + 8;
- if ((state.Fpcr & (1 << enable)) != 0)
+ if ((state.CFpcr & (1 << enable)) != 0)
{
throw new NotImplementedException("Floating-point trap handling.");
}
else
{
- state.Fpsr |= 1 << (int)exc;
+ state.CFpsr |= 1 << (int)exc;
}
}
}
@@ -1708,7 +1708,7 @@ namespace ChocolArm64.Instructions
{
public static double FPAdd(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPAdd: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPAdd: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -1756,7 +1756,7 @@ namespace ChocolArm64.Instructions
public static int FPCompare(double value1, double value2, bool signalNaNs, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompare: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state);
@@ -1793,7 +1793,7 @@ namespace ChocolArm64.Instructions
public static double FPCompareEQ(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareEQ: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareEQ: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -1819,7 +1819,7 @@ namespace ChocolArm64.Instructions
public static double FPCompareGE(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareGE: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareGE: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -1842,7 +1842,7 @@ namespace ChocolArm64.Instructions
public static double FPCompareGT(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareGT: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareGT: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -1866,7 +1866,7 @@ namespace ChocolArm64.Instructions
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double FPCompareLE(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareLE: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareLE: state.Fpcr = 0x{state.CFpcr:X8}");
return FPCompareGE(value2, value1, state);
}
@@ -1874,14 +1874,14 @@ namespace ChocolArm64.Instructions
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double FPCompareLT(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareLT: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareLT: state.Fpcr = 0x{state.CFpcr:X8}");
return FPCompareGT(value2, value1, state);
}
public static double FPDiv(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -1930,7 +1930,7 @@ namespace ChocolArm64.Instructions
public static double FPMax(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMax: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMax: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -1983,7 +1983,7 @@ namespace ChocolArm64.Instructions
public static double FPMaxNum(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMaxNum: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMaxNum: state.Fpcr = 0x{state.CFpcr:X8}");
value1.FPUnpack(out FpType type1, out _, out _, state);
value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -2002,7 +2002,7 @@ namespace ChocolArm64.Instructions
public static double FPMin(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMin: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMin: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -2055,7 +2055,7 @@ namespace ChocolArm64.Instructions
public static double FPMinNum(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMinNum: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMinNum: state.Fpcr = 0x{state.CFpcr:X8}");
value1.FPUnpack(out FpType type1, out _, out _, state);
value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -2074,7 +2074,7 @@ namespace ChocolArm64.Instructions
public static double FPMul(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMul: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMul: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -2122,7 +2122,7 @@ namespace ChocolArm64.Instructions
double value2,
CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMulAdd: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMulAdd: state.Fpcr = 0x{state.CFpcr:X8}");
valueA = valueA.FPUnpack(out FpType typeA, out bool signA, out ulong addend, state);
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
@@ -2192,7 +2192,7 @@ namespace ChocolArm64.Instructions
double value2,
CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMulSub: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMulSub: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPNeg();
@@ -2201,7 +2201,7 @@ namespace ChocolArm64.Instructions
public static double FPMulX(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMulX: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMulX: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -2243,7 +2243,7 @@ namespace ChocolArm64.Instructions
public static double FPRecipEstimate(double value, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRecipEstimate: state.Fpcr = 0x{state.CFpcr:X8}");
value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
@@ -2332,7 +2332,7 @@ namespace ChocolArm64.Instructions
public static double FPRecipStepFused(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRecipStepFused: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPNeg();
@@ -2375,7 +2375,7 @@ namespace ChocolArm64.Instructions
public static double FPRecpX(double value, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecpX: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRecpX: state.Fpcr = 0x{state.CFpcr:X8}");
value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
@@ -2399,7 +2399,7 @@ namespace ChocolArm64.Instructions
public static double FPRSqrtEstimate(double value, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRSqrtEstimate: state.Fpcr = 0x{state.CFpcr:X8}");
value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
@@ -2464,7 +2464,7 @@ namespace ChocolArm64.Instructions
public static double FPRSqrtStepFused(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRSqrtStepFused: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPNeg();
@@ -2507,7 +2507,7 @@ namespace ChocolArm64.Instructions
public static double FPSqrt(double value, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPSqrt: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPSqrt: state.Fpcr = 0x{state.CFpcr:X8}");
value = value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
@@ -2548,7 +2548,7 @@ namespace ChocolArm64.Instructions
public static double FPSub(double value1, double value2, CpuThreadState state)
{
- Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPSub: state.Fpcr = 0x{state.Fpcr:X8}");
+ Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPSub: state.Fpcr = 0x{state.CFpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -2777,13 +2777,13 @@ namespace ChocolArm64.Instructions
{
int enable = (int)exc + 8;
- if ((state.Fpcr & (1 << enable)) != 0)
+ if ((state.CFpcr & (1 << enable)) != 0)
{
throw new NotImplementedException("Floating-point trap handling.");
}
else
{
- state.Fpsr |= 1 << (int)exc;
+ state.CFpsr |= 1 << (int)exc;
}
}
}
diff --git a/ChocolArm64/Memory/MemoryManager.cs b/ChocolArm64/Memory/MemoryManager.cs
index 364f6b58a..2347f1eb4 100644
--- a/ChocolArm64/Memory/MemoryManager.cs
+++ b/ChocolArm64/Memory/MemoryManager.cs
@@ -11,7 +11,7 @@ using static ChocolArm64.Memory.MemoryManagement;
namespace ChocolArm64.Memory
{
- public unsafe class MemoryManager : IMemory, IDisposable
+ public unsafe class MemoryManager : ARMeilleure.Memory.IMemoryManager
{
public const int PageBits = 12;
public const int PageSize = 1 << PageBits;
@@ -880,7 +880,7 @@ namespace ChocolArm64.Memory
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public void WriteVector128(long position, Vector128 value)
+ public void WriteVector128Internal(long position, Vector128 value)
{
if (Sse.IsSupported && (position & 15) == 0)
{
@@ -893,6 +893,12 @@ namespace ChocolArm64.Memory
}
}
+ public void WriteVector128(long position, ARMeilleure.State.V128 value)
+ {
+ WriteUInt64(position + 0, value.GetUInt64(0));
+ WriteUInt64(position + 8, value.GetUInt64(1));
+ }
+
public void WriteBytes(long position, byte[] data)
{
long endAddr = position + data.Length;
diff --git a/ChocolArm64/Optimizations.cs b/ChocolArm64/Optimizations.cs
index cbb8131f5..24828ebfb 100644
--- a/ChocolArm64/Optimizations.cs
+++ b/ChocolArm64/Optimizations.cs
@@ -1,24 +1,27 @@
using System.Runtime.Intrinsics.X86;
-public static class Optimizations
+namespace ChocolArm64
{
- public static bool AssumeStrictAbiCompliance { get; set; }
+ public static class Optimizations
+ {
+ public static bool AssumeStrictAbiCompliance { get; set; } = true;
- public static bool FastFP { get; set; } = true;
+ public static bool FastFP { get; set; } = true;
- private const bool UseAllSseIfAvailable = true;
+ private const bool UseAllSseIfAvailable = true;
- public static bool UseSseIfAvailable { get; set; } = UseAllSseIfAvailable;
- public static bool UseSse2IfAvailable { get; set; } = UseAllSseIfAvailable;
- public static bool UseSse3IfAvailable { get; set; } = UseAllSseIfAvailable;
- public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable;
- public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable;
- public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSseIfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse2IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse3IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable;
+ public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable;
- internal static bool UseSse => UseSseIfAvailable && Sse.IsSupported;
- internal static bool UseSse2 => UseSse2IfAvailable && Sse2.IsSupported;
- internal static bool UseSse3 => UseSse3IfAvailable && Sse3.IsSupported;
- internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported;
- internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported;
- internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported;
+ internal static bool UseSse => UseSseIfAvailable && Sse.IsSupported;
+ internal static bool UseSse2 => UseSse2IfAvailable && Sse2.IsSupported;
+ internal static bool UseSse3 => UseSse3IfAvailable && Sse3.IsSupported;
+ internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported;
+ internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported;
+ internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported;
+ }
}
\ No newline at end of file
diff --git a/ChocolArm64/State/CpuThreadState.cs b/ChocolArm64/State/CpuThreadState.cs
index 424f17258..e4baaefa4 100644
--- a/ChocolArm64/State/CpuThreadState.cs
+++ b/ChocolArm64/State/CpuThreadState.cs
@@ -1,13 +1,14 @@
-using ChocolArm64.Events;
using ChocolArm64.Translation;
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
+using static ChocolArm64.Instructions.VectorHelper;
+
namespace ChocolArm64.State
{
- public class CpuThreadState
+ public class CpuThreadState : ARMeilleure.State.IExecutionContext
{
private const int MinCountForCheck = 40000;
@@ -24,7 +25,7 @@ namespace ChocolArm64.State
V16, V17, V18, V19, V20, V21, V22, V23,
V24, V25, V26, V27, V28, V29, V30, V31;
- public bool Aarch32;
+ public bool IsAarch32 { get; set; }
public bool Thumb;
public bool BigEndian;
@@ -45,8 +46,20 @@ namespace ChocolArm64.State
public long TpidrEl0 { get; set; }
public long Tpidr { get; set; }
- public int Fpcr { get; set; }
- public int Fpsr { get; set; }
+ public int CFpcr { get; set; }
+ public int CFpsr { get; set; }
+
+ public ARMeilleure.State.FPCR Fpcr
+ {
+ get => (ARMeilleure.State.FPCR)CFpcr;
+ set => CFpcr = (int)value;
+ }
+
+ public ARMeilleure.State.FPSR Fpsr
+ {
+ get => (ARMeilleure.State.FPSR)CFpsr;
+ set => CFpsr = (int)value;
+ }
public int Psr
{
@@ -73,10 +86,10 @@ namespace ChocolArm64.State
}
}
- public event EventHandler Interrupt;
- public event EventHandler Break;
- public event EventHandler SvcCall;
- public event EventHandler Undefined;
+ public event EventHandler Interrupt;
+ public event EventHandler Break;
+ public event EventHandler SupervisorCall;
+ public event EventHandler Undefined;
private static Stopwatch _tickCounter;
@@ -92,6 +105,8 @@ namespace ChocolArm64.State
public CpuThreadState()
{
ClearExclusiveAddress();
+
+ Running = true;
}
static CpuThreadState()
@@ -151,29 +166,165 @@ namespace ChocolArm64.State
}
}
- internal void RequestInterrupt()
+ public ulong GetX(int index)
+ {
+ switch (index)
+ {
+ case 0: return X0;
+ case 1: return X1;
+ case 2: return X2;
+ case 3: return X3;
+ case 4: return X4;
+ case 5: return X5;
+ case 6: return X6;
+ case 7: return X7;
+ case 8: return X8;
+ case 9: return X9;
+ case 10: return X10;
+ case 11: return X11;
+ case 12: return X12;
+ case 13: return X13;
+ case 14: return X14;
+ case 15: return X15;
+ case 16: return X16;
+ case 17: return X17;
+ case 18: return X18;
+ case 19: return X19;
+ case 20: return X20;
+ case 21: return X21;
+ case 22: return X22;
+ case 23: return X23;
+ case 24: return X24;
+ case 25: return X25;
+ case 26: return X26;
+ case 27: return X27;
+ case 28: return X28;
+ case 29: return X29;
+ case 30: return X30;
+ case 31: return X31;
+
+ default: throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ }
+
+ public void SetX(int index, ulong value)
+ {
+ switch (index)
+ {
+ case 0: X0 = value; break;
+ case 1: X1 = value; break;
+ case 2: X2 = value; break;
+ case 3: X3 = value; break;
+ case 4: X4 = value; break;
+ case 5: X5 = value; break;
+ case 6: X6 = value; break;
+ case 7: X7 = value; break;
+ case 8: X8 = value; break;
+ case 9: X9 = value; break;
+ case 10: X10 = value; break;
+ case 11: X11 = value; break;
+ case 12: X12 = value; break;
+ case 13: X13 = value; break;
+ case 14: X14 = value; break;
+ case 15: X15 = value; break;
+ case 16: X16 = value; break;
+ case 17: X17 = value; break;
+ case 18: X18 = value; break;
+ case 19: X19 = value; break;
+ case 20: X20 = value; break;
+ case 21: X21 = value; break;
+ case 22: X22 = value; break;
+ case 23: X23 = value; break;
+ case 24: X24 = value; break;
+ case 25: X25 = value; break;
+ case 26: X26 = value; break;
+ case 27: X27 = value; break;
+ case 28: X28 = value; break;
+ case 29: X29 = value; break;
+ case 30: X30 = value; break;
+ case 31: X31 = value; break;
+
+ default: throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ }
+
+ public ARMeilleure.State.V128 GetV(int index)
+ {
+ switch (index)
+ {
+ case 0: return new ARMeilleure.State.V128(VectorExtractIntZx(V0, 0, 3), VectorExtractIntZx(V0, 1, 3));
+ case 1: return new ARMeilleure.State.V128(VectorExtractIntZx(V1, 0, 3), VectorExtractIntZx(V1, 1, 3));
+ case 2: return new ARMeilleure.State.V128(VectorExtractIntZx(V2, 0, 3), VectorExtractIntZx(V2, 1, 3));
+ case 3: return new ARMeilleure.State.V128(VectorExtractIntZx(V3, 0, 3), VectorExtractIntZx(V3, 1, 3));
+ case 4: return new ARMeilleure.State.V128(VectorExtractIntZx(V4, 0, 3), VectorExtractIntZx(V4, 1, 3));
+ case 5: return new ARMeilleure.State.V128(VectorExtractIntZx(V5, 0, 3), VectorExtractIntZx(V5, 1, 3));
+ case 6: return new ARMeilleure.State.V128(VectorExtractIntZx(V6, 0, 3), VectorExtractIntZx(V6, 1, 3));
+ case 7: return new ARMeilleure.State.V128(VectorExtractIntZx(V7, 0, 3), VectorExtractIntZx(V7, 1, 3));
+ case 8: return new ARMeilleure.State.V128(VectorExtractIntZx(V8, 0, 3), VectorExtractIntZx(V8, 1, 3));
+ case 9: return new ARMeilleure.State.V128(VectorExtractIntZx(V9, 0, 3), VectorExtractIntZx(V9, 1, 3));
+ case 10: return new ARMeilleure.State.V128(VectorExtractIntZx(V10, 0, 3), VectorExtractIntZx(V10, 1, 3));
+ case 11: return new ARMeilleure.State.V128(VectorExtractIntZx(V11, 0, 3), VectorExtractIntZx(V11, 1, 3));
+ case 12: return new ARMeilleure.State.V128(VectorExtractIntZx(V12, 0, 3), VectorExtractIntZx(V12, 1, 3));
+ case 13: return new ARMeilleure.State.V128(VectorExtractIntZx(V13, 0, 3), VectorExtractIntZx(V13, 1, 3));
+ case 14: return new ARMeilleure.State.V128(VectorExtractIntZx(V14, 0, 3), VectorExtractIntZx(V14, 1, 3));
+ case 15: return new ARMeilleure.State.V128(VectorExtractIntZx(V15, 0, 3), VectorExtractIntZx(V15, 1, 3));
+ case 16: return new ARMeilleure.State.V128(VectorExtractIntZx(V16, 0, 3), VectorExtractIntZx(V16, 1, 3));
+ case 17: return new ARMeilleure.State.V128(VectorExtractIntZx(V17, 0, 3), VectorExtractIntZx(V17, 1, 3));
+ case 18: return new ARMeilleure.State.V128(VectorExtractIntZx(V18, 0, 3), VectorExtractIntZx(V18, 1, 3));
+ case 19: return new ARMeilleure.State.V128(VectorExtractIntZx(V19, 0, 3), VectorExtractIntZx(V19, 1, 3));
+ case 20: return new ARMeilleure.State.V128(VectorExtractIntZx(V20, 0, 3), VectorExtractIntZx(V20, 1, 3));
+ case 21: return new ARMeilleure.State.V128(VectorExtractIntZx(V21, 0, 3), VectorExtractIntZx(V21, 1, 3));
+ case 22: return new ARMeilleure.State.V128(VectorExtractIntZx(V22, 0, 3), VectorExtractIntZx(V22, 1, 3));
+ case 23: return new ARMeilleure.State.V128(VectorExtractIntZx(V23, 0, 3), VectorExtractIntZx(V23, 1, 3));
+ case 24: return new ARMeilleure.State.V128(VectorExtractIntZx(V24, 0, 3), VectorExtractIntZx(V24, 1, 3));
+ case 25: return new ARMeilleure.State.V128(VectorExtractIntZx(V25, 0, 3), VectorExtractIntZx(V25, 1, 3));
+ case 26: return new ARMeilleure.State.V128(VectorExtractIntZx(V26, 0, 3), VectorExtractIntZx(V26, 1, 3));
+ case 27: return new ARMeilleure.State.V128(VectorExtractIntZx(V27, 0, 3), VectorExtractIntZx(V27, 1, 3));
+ case 28: return new ARMeilleure.State.V128(VectorExtractIntZx(V28, 0, 3), VectorExtractIntZx(V28, 1, 3));
+ case 29: return new ARMeilleure.State.V128(VectorExtractIntZx(V29, 0, 3), VectorExtractIntZx(V29, 1, 3));
+ case 30: return new ARMeilleure.State.V128(VectorExtractIntZx(V30, 0, 3), VectorExtractIntZx(V30, 1, 3));
+ case 31: return new ARMeilleure.State.V128(VectorExtractIntZx(V31, 0, 3), VectorExtractIntZx(V31, 1, 3));
+
+ default: throw new ArgumentOutOfRangeException(nameof(index));
+ }
+ }
+
+ public bool GetPstateFlag(ARMeilleure.State.PState flag)
+ {
+ switch (flag)
+ {
+ case ARMeilleure.State.PState.NFlag: return Negative;
+ case ARMeilleure.State.PState.ZFlag: return Zero;
+ case ARMeilleure.State.PState.CFlag: return Carry;
+ case ARMeilleure.State.PState.VFlag: return Overflow;
+
+ default: throw new ArgumentOutOfRangeException(nameof(flag));
+ }
+ }
+
+ public void RequestInterrupt()
{
_interrupted = true;
}
internal void OnBreak(long position, int imm)
{
- Break?.Invoke(this, new InstExceptionEventArgs(position, imm));
+ Break?.Invoke(this, new ARMeilleure.State.InstExceptionEventArgs((ulong)position, imm));
}
internal void OnSvcCall(long position, int imm)
{
- SvcCall?.Invoke(this, new InstExceptionEventArgs(position, imm));
+ SupervisorCall?.Invoke(this, new ARMeilleure.State.InstExceptionEventArgs((ulong)position, imm));
}
internal void OnUndefined(long position, int rawOpCode)
{
- Undefined?.Invoke(this, new InstUndefinedEventArgs(position, rawOpCode));
+ Undefined?.Invoke(this, new ARMeilleure.State.InstUndefinedEventArgs((ulong)position, rawOpCode));
}
internal ExecutionMode GetExecutionMode()
{
- if (!Aarch32)
+ if (!IsAarch32)
{
return ExecutionMode.Aarch64;
}
@@ -185,17 +336,19 @@ namespace ChocolArm64.State
internal bool GetFpcrFlag(Fpcr flag)
{
- return (Fpcr & (1 << (int)flag)) != 0;
+ return (CFpcr & (1 << (int)flag)) != 0;
}
internal void SetFpsrFlag(Fpsr flag)
{
- Fpsr |= 1 << (int)flag;
+ CFpsr |= 1 << (int)flag;
}
internal RoundMode FPRoundingMode()
{
- return (RoundMode)((Fpcr >> (int)State.Fpcr.RMode) & 3);
+ return (RoundMode)((CFpcr >> (int)State.Fpcr.RMode) & 3);
}
+
+ public void Dispose() { }
}
}
diff --git a/ChocolArm64/Translation/Translator.cs b/ChocolArm64/Translation/Translator.cs
index 0803df09b..ab8f474a4 100644
--- a/ChocolArm64/Translation/Translator.cs
+++ b/ChocolArm64/Translation/Translator.cs
@@ -9,7 +9,7 @@ using System.Threading;
namespace ChocolArm64.Translation
{
- public class Translator
+ public class Translator : ARMeilleure.Translation.ITranslator
{
private MemoryManager _memory;
@@ -38,24 +38,18 @@ namespace ChocolArm64.Translation
_queue = new TranslatorQueue();
}
- internal void ExecuteSubroutine(CpuThread thread, long position)
+ public void Execute(ARMeilleure.State.IExecutionContext ctx, ulong address)
{
+ CpuThreadState state = (CpuThreadState)ctx;
+
+ long position = (long)address;
+
if (Interlocked.Increment(ref _threadCount) == 1)
{
_backgroundTranslator = new Thread(TranslateQueuedSubs);
_backgroundTranslator.Start();
}
- ExecuteSubroutine(thread.ThreadState, position);
-
- if (Interlocked.Decrement(ref _threadCount) == 0)
- {
- _queue.ForceSignal();
- }
- }
-
- private void ExecuteSubroutine(CpuThreadState state, long position)
- {
state.CurrentTranslator = this;
do
@@ -75,6 +69,11 @@ namespace ChocolArm64.Translation
while (position != 0 && state.Running);
state.CurrentTranslator = null;
+
+ if (Interlocked.Decrement(ref _threadCount) == 0)
+ {
+ _queue.ForceSignal();
+ }
}
internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
diff --git a/Ryujinx.Audio/Ryujinx.Audio.csproj b/Ryujinx.Audio/Ryujinx.Audio.csproj
index a6a34f40f..e25066eee 100644
--- a/Ryujinx.Audio/Ryujinx.Audio.csproj
+++ b/Ryujinx.Audio/Ryujinx.Audio.csproj
@@ -2,7 +2,7 @@
netcoreapp2.1
- win10-x64;osx-x64;linux-x64
+ win-x64;osx-x64;linux-x64
Debug;Release;Profile Debug;Profile Release
diff --git a/Ryujinx.Common/Ryujinx.Common.csproj b/Ryujinx.Common/Ryujinx.Common.csproj
index cf078db85..86c6c570d 100644
--- a/Ryujinx.Common/Ryujinx.Common.csproj
+++ b/Ryujinx.Common/Ryujinx.Common.csproj
@@ -2,7 +2,7 @@
netcoreapp2.1
- win10-x64;osx-x64;linux-x64
+ win-x64;osx-x64;linux-x64
Debug;Release;Profile Debug;Profile Release
@@ -25,7 +25,7 @@
TRACE;USE_PROFILING
true
-
+
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs
index 2e78cf142..62dae00b5 100644
--- a/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs
+++ b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using OpenTK.Graphics.OpenGL;
using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Memory;
@@ -229,7 +229,7 @@ namespace Ryujinx.Graphics.Texture
public static byte[] ReadTexture(IMemory memory, GalImage image, long position)
{
- MemoryManager cpuMemory;
+ IMemoryManager cpuMemory;
if (memory is NvGpuVmm vmm)
{
@@ -237,7 +237,7 @@ namespace Ryujinx.Graphics.Texture
}
else
{
- cpuMemory = (MemoryManager)memory;
+ cpuMemory = (IMemoryManager)memory;
}
ISwizzle swizzle = TextureHelper.GetSwizzle(image);
@@ -251,7 +251,6 @@ namespace Ryujinx.Graphics.Texture
// Note: Each row of the texture needs to be aligned to 4 bytes.
int pitch = (width * bytesPerPixel + 3) & ~3;
-
int dataLayerSize = height * pitch * depth;
byte[] data = new byte[dataLayerSize * image.LayerCount];
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs
index 1de81008e..22b803db3 100644
--- a/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs
+++ b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common;
using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Memory;
@@ -38,7 +38,7 @@ namespace Ryujinx.Graphics.Texture
}
}
- public static (MemoryManager Memory, long Position) GetMemoryAndPosition(
+ public static (IMemoryManager Memory, long Position) GetMemoryAndPosition(
IMemory memory,
long position)
{
@@ -47,7 +47,7 @@ namespace Ryujinx.Graphics.Texture
return (vmm.Memory, vmm.GetPhysicalAddress(position));
}
- return ((MemoryManager)memory, position);
+ return ((IMemoryManager)memory, position);
}
}
}
diff --git a/Ryujinx.Graphics/Memory/NvGpuVmm.cs b/Ryujinx.Graphics/Memory/NvGpuVmm.cs
index fea99587d..d8ccd6c74 100644
--- a/Ryujinx.Graphics/Memory/NvGpuVmm.cs
+++ b/Ryujinx.Graphics/Memory/NvGpuVmm.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Graphics.Gal;
using System;
@@ -23,7 +23,7 @@ namespace Ryujinx.Graphics.Memory
private const int PtLvl0Bit = PtPageBits + PtLvl1Bits;
private const int PtLvl1Bit = PtPageBits;
- public MemoryManager Memory { get; private set; }
+ public IMemoryManager Memory { get; private set; }
private NvGpuVmmCache _cache;
@@ -32,7 +32,7 @@ namespace Ryujinx.Graphics.Memory
private long[][] _pageTable;
- public NvGpuVmm(MemoryManager memory)
+ public NvGpuVmm(IMemoryManager memory)
{
Memory = memory;
diff --git a/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs b/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs
index ab5ea288c..37ead4e0a 100644
--- a/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs
+++ b/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using System.Collections.Concurrent;
namespace Ryujinx.Graphics.Memory
@@ -12,9 +12,9 @@ namespace Ryujinx.Graphics.Memory
private ConcurrentDictionary[] _cachedPages;
- private MemoryManager _memory;
+ private IMemoryManager _memory;
- public NvGpuVmmCache(MemoryManager memory)
+ public NvGpuVmmCache(IMemoryManager memory)
{
_memory = memory;
diff --git a/Ryujinx.Graphics/Ryujinx.Graphics.csproj b/Ryujinx.Graphics/Ryujinx.Graphics.csproj
index 740008955..e2bf16930 100644
--- a/Ryujinx.Graphics/Ryujinx.Graphics.csproj
+++ b/Ryujinx.Graphics/Ryujinx.Graphics.csproj
@@ -2,7 +2,7 @@
netcoreapp2.1
- win10-x64;osx-x64;linux-x64
+ win-x64;osx-x64;linux-x64
Debug;Release;Profile Debug;Profile Release
@@ -32,8 +32,9 @@
-
+
+
diff --git a/Ryujinx.Graphics/VDec/VideoDecoder.cs b/Ryujinx.Graphics/VDec/VideoDecoder.cs
index 3ebb93f42..9bf60c31b 100644
--- a/Ryujinx.Graphics/VDec/VideoDecoder.cs
+++ b/Ryujinx.Graphics/VDec/VideoDecoder.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Graphics.Gal;
using Ryujinx.Graphics.Memory;
using Ryujinx.Graphics.Texture;
diff --git a/Ryujinx.HLE/DeviceMemory.cs b/Ryujinx.HLE/DeviceMemory.cs
index 3553a6e71..0ead17473 100644
--- a/Ryujinx.HLE/DeviceMemory.cs
+++ b/Ryujinx.HLE/DeviceMemory.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using System;
using System.Runtime.InteropServices;
diff --git a/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs b/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs
index 84bb1fc59..dfbd6c272 100644
--- a/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs
+++ b/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs
@@ -8,6 +8,6 @@ namespace Ryujinx.HLE.Exceptions
public UndefinedInstructionException() : base() { }
- public UndefinedInstructionException(long position, int opCode) : base(string.Format(ExMsg, position, opCode)) { }
+ public UndefinedInstructionException(ulong address, int opCode) : base(string.Format(ExMsg, address, opCode)) { }
}
}
\ No newline at end of file
diff --git a/Ryujinx.HLE/HOS/Homebrew.cs b/Ryujinx.HLE/HOS/Homebrew.cs
index b11a46404..8e54f82c1 100644
--- a/Ryujinx.HLE/HOS/Homebrew.cs
+++ b/Ryujinx.HLE/HOS/Homebrew.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using System.Text;
namespace Ryujinx.HLE.HOS
@@ -8,7 +8,7 @@ namespace Ryujinx.HLE.HOS
public const string TemporaryNroSuffix = ".ryu_tmp.nro";
// http://switchbrew.org/index.php?title=Homebrew_ABI
- public static void WriteHbAbiData(MemoryManager memory, long position, int mainThreadHandle, string switchPath)
+ public static void WriteHbAbiData(IMemoryManager memory, long position, int mainThreadHandle, string switchPath)
{
// MainThreadHandle.
WriteConfigEntry(memory, ref position, 1, 0, mainThreadHandle);
@@ -31,7 +31,7 @@ namespace Ryujinx.HLE.HOS
}
private static void WriteConfigEntry(
- MemoryManager memory,
+ IMemoryManager memory,
ref long position,
int key,
int flags = 0,
@@ -46,7 +46,7 @@ namespace Ryujinx.HLE.HOS
position += 0x18;
}
- public static string ReadHbAbiNextLoadPath(MemoryManager memory, long position)
+ public static string ReadHbAbiNextLoadPath(IMemoryManager memory, long position)
{
string fileName = null;
diff --git a/Ryujinx.HLE/HOS/Horizon.cs b/Ryujinx.HLE/HOS/Horizon.cs
index f8bb345f2..5873223ef 100644
--- a/Ryujinx.HLE/HOS/Horizon.cs
+++ b/Ryujinx.HLE/HOS/Horizon.cs
@@ -110,6 +110,8 @@ namespace Ryujinx.HLE.HOS
public int GlobalAccessLogMode { get; set; }
+ public bool UseLegacyJit { get; set; }
+
internal long HidBaseAddress { get; private set; }
public Horizon(Switch device)
diff --git a/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs b/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs
index e940d774c..50ab3d100 100644
--- a/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs
+++ b/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.HLE.HOS.Kernel.Common;
using Ryujinx.HLE.HOS.Kernel.Ipc;
using Ryujinx.HLE.HOS.Kernel.Process;
@@ -13,7 +13,7 @@ namespace Ryujinx.HLE.HOS.Ipc
public static KernelResult IpcCall(
Switch device,
KProcess process,
- MemoryManager memory,
+ IMemoryManager memory,
KThread thread,
KClientSession session,
IpcMessage request,
diff --git a/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs b/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs
index 0fcb31483..62330d6ba 100644
--- a/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs
@@ -1,5 +1,5 @@
using Ryujinx.HLE.HOS.Kernel.Process;
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
namespace Ryujinx.HLE.HOS.Kernel.Common
{
diff --git a/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs b/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs
index 448ae54c0..fd80b3b9e 100644
--- a/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common;
using Ryujinx.HLE.HOS.Kernel.Common;
using Ryujinx.HLE.HOS.Kernel.Process;
@@ -29,7 +29,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory
private LinkedList _blocks;
- private MemoryManager _cpuMemory;
+ private IMemoryManager _cpuMemory;
private Horizon _system;
@@ -72,7 +72,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory
private MersenneTwister _randomNumberGenerator;
- public KMemoryManager(Horizon system, MemoryManager cpuMemory)
+ public KMemoryManager(Horizon system, IMemoryManager cpuMemory)
{
_system = system;
_cpuMemory = cpuMemory;
diff --git a/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs b/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs
index 223bf5dae..e2ca44b59 100644
--- a/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs
@@ -1,5 +1,5 @@
-using ChocolArm64.Memory;
-using ChocolArm64.State;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
using Ryujinx.HLE.HOS.Diagnostics.Demangler;
using Ryujinx.HLE.HOS.Kernel.Memory;
using Ryujinx.HLE.Loaders.Elf;
@@ -40,7 +40,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
_images = new List();
}
- public string GetGuestStackTrace(CpuThreadState threadState)
+ public string GetGuestStackTrace(IExecutionContext context)
{
EnsureLoaded();
@@ -74,7 +74,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
}
// TODO: ARM32.
- long framePointer = (long)threadState.X29;
+ long framePointer = (long)context.GetX(29);
trace.AppendLine($"Process: {_owner.Name}, PID: {_owner.Pid}");
@@ -218,7 +218,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
}
}
- private void LoadMod0Symbols(MemoryManager memory, long textOffset)
+ private void LoadMod0Symbols(IMemoryManager memory, long textOffset)
{
long mod0Offset = textOffset + memory.ReadUInt32(textOffset + 4);
@@ -288,7 +288,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
}
}
- private ElfSymbol GetSymbol(MemoryManager memory, long address, long strTblAddr)
+ private ElfSymbol GetSymbol(IMemoryManager memory, long address, long strTblAddr)
{
int nameIndex = memory.ReadInt32(address + 0);
int info = memory.ReadByte (address + 4);
diff --git a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs
index 1b5a67722..beb376f64 100644
--- a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs
@@ -1,9 +1,7 @@
-using ChocolArm64;
-using ChocolArm64.Events;
-using ChocolArm64.Memory;
-using ChocolArm64.Translation;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
using Ryujinx.Common;
-using Ryujinx.Common.Logging;
using Ryujinx.HLE.Exceptions;
using Ryujinx.HLE.HOS.Kernel.Common;
using Ryujinx.HLE.HOS.Kernel.Memory;
@@ -80,9 +78,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
public bool IsPaused { get; private set; }
- public MemoryManager CpuMemory { get; private set; }
+ public IMemoryManager CpuMemory { get; private set; }
- public Translator Translator { get; private set; }
+ public ITranslator Translator { get; private set; }
private SvcHandler _svcHandler;
@@ -793,11 +791,11 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
}
}
- public void SubscribeThreadEventHandlers(CpuThread context)
+ public void SubscribeThreadEventHandlers(IExecutionContext context)
{
- context.ThreadState.Interrupt += InterruptHandler;
- context.ThreadState.SvcCall += _svcHandler.SvcCall;
- context.ThreadState.Undefined += UndefinedInstructionHandler;
+ context.Interrupt += InterruptHandler;
+ context.SupervisorCall += _svcHandler.SvcCall;
+ context.Undefined += UndefinedInstructionHandler;
}
private void InterruptHandler(object sender, EventArgs e)
@@ -1001,9 +999,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
{
foreach (KThread thread in _threads)
{
- thread.Context.StopExecution();
+ thread.Context.Running = false;
- System.Scheduler.CoreManager.Set(thread.Context.Work);
+ System.Scheduler.CoreManager.Set(thread.HostThread);
}
}
}
@@ -1024,13 +1022,20 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
bool useFlatPageTable = memRegion == MemoryRegion.Application;
- CpuMemory = new MemoryManager(_system.Device.Memory.RamPointer, addrSpaceBits, useFlatPageTable);
+ if (_system.UseLegacyJit)
+ {
+ CpuMemory = new ChocolArm64.Memory.MemoryManager(_system.Device.Memory.RamPointer, addrSpaceBits, useFlatPageTable);
+
+ Translator = new ChocolArm64.Translation.Translator((ChocolArm64.Memory.MemoryManager)CpuMemory);
+ }
+ else
+ {
+ CpuMemory = new MemoryManager(_system.Device.Memory.RamPointer, addrSpaceBits, useFlatPageTable);
+
+ Translator = new Translator((MemoryManager)CpuMemory);
+ }
MemoryManager = new KMemoryManager(_system, CpuMemory);
-
- Translator = new Translator(CpuMemory);
-
- Translator.CpuTrace += CpuTraceHandler;
}
public void PrintCurrentThreadStackTrace()
@@ -1038,14 +1043,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
System.Scheduler.GetCurrentThread().PrintGuestStackTrace();
}
- private void CpuTraceHandler(object sender, CpuTraceEventArgs e)
- {
- Logger.PrintInfo(LogClass.Cpu, $"Executing at 0x{e.Position:X16}.");
- }
-
private void UndefinedInstructionHandler(object sender, InstUndefinedEventArgs e)
{
- throw new UndefinedInstructionException(e.Position, e.RawOpCode);
+ throw new UndefinedInstructionException(e.Address, e.OpCode);
}
}
}
\ No newline at end of file
diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs
index cf881a793..7509ae048 100644
--- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs
+++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs
@@ -1,5 +1,4 @@
-using ChocolArm64.Events;
-using ChocolArm64.State;
+using ARMeilleure.State;
using Ryujinx.HLE.HOS.Kernel.Process;
using System;
@@ -7,9 +6,9 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
{
partial class SvcHandler
{
- private Switch _device;
- private KProcess _process;
- private Horizon _system;
+ private Switch _device;
+ private KProcess _process;
+ private Horizon _system;
public SvcHandler(Switch device, KProcess process)
{
@@ -20,16 +19,16 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
public void SvcCall(object sender, InstExceptionEventArgs e)
{
- Action svcFunc = SvcTable.GetSvcFunc(e.Id);
+ Action svcFunc = SvcTable.GetSvcFunc(e.Id);
if (svcFunc == null)
{
throw new NotImplementedException($"SVC 0x{e.Id:X4} is not implemented.");
}
- CpuThreadState threadState = (CpuThreadState)sender;
+ IExecutionContext context = (IExecutionContext)sender;
- svcFunc(this, threadState);
+ svcFunc(this, context);
}
}
}
\ No newline at end of file
diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs
index eb7595c0a..7c1c981bf 100644
--- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs
+++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs
@@ -83,7 +83,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
public KernelResult SendSyncRequest64(int handle)
{
- return SendSyncRequest((ulong)_system.Scheduler.GetCurrentThread().Context.ThreadState.Tpidr, 0x100, handle);
+ return SendSyncRequest((ulong)_system.Scheduler.GetCurrentThread().Context.Tpidr, 0x100, handle);
}
public KernelResult SendSyncRequestWithUserBuffer64(ulong messagePtr, ulong size, int handle)
diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs
index 5f971131c..094e1935f 100644
--- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs
+++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common;
using Ryujinx.Common.Logging;
using Ryujinx.HLE.Exceptions;
@@ -138,7 +138,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
public ulong GetSystemTick64()
{
- return _system.Scheduler.GetCurrentThread().Context.ThreadState.CntpctEl0;
+ return _system.Scheduler.GetCurrentThread().Context.CntpctEl0;
}
public KernelResult GetProcessId64(int handle, out long pid)
diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs
index 23934649f..c1a31da9b 100644
--- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs
+++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.State;
+using ARMeilleure.State;
using Ryujinx.Common.Logging;
using Ryujinx.HLE.HOS.Kernel.Common;
using System;
@@ -14,7 +14,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
private static Dictionary _svcFuncs64;
- private static Action[] _svcTable64;
+ private static Action[] _svcTable64;
static SvcTable()
{
@@ -77,10 +77,10 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
{ 0x78, nameof(SvcHandler.UnmapProcessCodeMemory64) }
};
- _svcTable64 = new Action[0x80];
+ _svcTable64 = new Action[0x80];
}
- public static Action GetSvcFunc(int svcId)
+ public static Action GetSvcFunc(int svcId)
{
if (_svcTable64[svcId] != null)
{
@@ -95,9 +95,9 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
return null;
}
- private static Action GenerateMethod(string svcName)
+ private static Action GenerateMethod(string svcName)
{
- Type[] argTypes = new Type[] { typeof(SvcHandler), typeof(CpuThreadState) };
+ Type[] argTypes = new Type[] { typeof(SvcHandler), typeof(IExecutionContext) };
DynamicMethod method = new DynamicMethod(svcName, null, argTypes);
@@ -183,7 +183,11 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
generator.Emit(OpCodes.Conv_I);
generator.Emit(OpCodes.Ldarg_1);
- generator.Emit(OpCodes.Ldfld, GetStateFieldX(byRefArgsCount + index));
+ generator.Emit(OpCodes.Ldc_I4, byRefArgsCount + index);
+
+ MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.GetX));
+
+ generator.Emit(OpCodes.Call, info);
generator.Emit(OpCodes.Box, typeof(ulong));
@@ -227,7 +231,11 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
else
{
generator.Emit(OpCodes.Ldarg_1);
- generator.Emit(OpCodes.Ldfld, GetStateFieldX(byRefArgsCount + index));
+ generator.Emit(OpCodes.Ldc_I4, byRefArgsCount + index);
+
+ MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.GetX));
+
+ generator.Emit(OpCodes.Call, info);
ConvertToArgType(argType);
}
@@ -258,51 +266,44 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
generator.Emit(OpCodes.Stloc, tempLocal);
generator.Emit(OpCodes.Ldarg_1);
+ generator.Emit(OpCodes.Ldc_I4, outRegIndex++);
generator.Emit(OpCodes.Ldloc, tempLocal);
ConvertToFieldType(retType);
- generator.Emit(OpCodes.Stfld, GetStateFieldX(outRegIndex++));
+ MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.SetX));
+
+ generator.Emit(OpCodes.Call, info);
}
for (int index = 0; index < locals.Count; index++)
{
generator.Emit(OpCodes.Ldarg_1);
+ generator.Emit(OpCodes.Ldc_I4, outRegIndex++);
generator.Emit(OpCodes.Ldloc, locals[index]);
ConvertToFieldType(locals[index].LocalType);
- generator.Emit(OpCodes.Stfld, GetStateFieldX(outRegIndex++));
+ MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.SetX));
+
+ generator.Emit(OpCodes.Call, info);
}
// Zero out the remaining unused registers.
while (outRegIndex < SvcFuncMaxArguments)
{
generator.Emit(OpCodes.Ldarg_1);
+ generator.Emit(OpCodes.Ldc_I4, outRegIndex++);
generator.Emit(OpCodes.Ldc_I8, 0L);
- generator.Emit(OpCodes.Stfld, GetStateFieldX(outRegIndex++));
+
+ MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.SetX));
+
+ generator.Emit(OpCodes.Call, info);
}
generator.Emit(OpCodes.Ret);
- return (Action)method.CreateDelegate(typeof(Action));
- }
-
- private static FieldInfo GetStateFieldX(int index)
- {
- switch (index)
- {
- case 0: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X0));
- case 1: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X1));
- case 2: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X2));
- case 3: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X3));
- case 4: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X4));
- case 5: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X5));
- case 6: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X6));
- case 7: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X7));
- }
-
- throw new ArgumentOutOfRangeException(nameof(index));
+ return (Action)method.CreateDelegate(typeof(Action));
}
private static void CheckIfTypeIsSupported(Type type, string svcName)
diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs
index e1f018c19..e49da023a 100644
--- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs
+++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs
@@ -1,4 +1,5 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
using Ryujinx.HLE.HOS.Kernel.Common;
using Ryujinx.HLE.HOS.Kernel.Process;
using Ryujinx.HLE.HOS.Kernel.Threading;
@@ -347,83 +348,91 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
return KernelResult.InvalidThread;
}
- MemoryManager memory = currentProcess.CpuMemory;
+ IMemoryManager memory = currentProcess.CpuMemory;
- memory.WriteUInt64((long)address + 0x0, thread.Context.ThreadState.X0);
- memory.WriteUInt64((long)address + 0x8, thread.Context.ThreadState.X1);
- memory.WriteUInt64((long)address + 0x10, thread.Context.ThreadState.X2);
- memory.WriteUInt64((long)address + 0x18, thread.Context.ThreadState.X3);
- memory.WriteUInt64((long)address + 0x20, thread.Context.ThreadState.X4);
- memory.WriteUInt64((long)address + 0x28, thread.Context.ThreadState.X5);
- memory.WriteUInt64((long)address + 0x30, thread.Context.ThreadState.X6);
- memory.WriteUInt64((long)address + 0x38, thread.Context.ThreadState.X7);
- memory.WriteUInt64((long)address + 0x40, thread.Context.ThreadState.X8);
- memory.WriteUInt64((long)address + 0x48, thread.Context.ThreadState.X9);
- memory.WriteUInt64((long)address + 0x50, thread.Context.ThreadState.X10);
- memory.WriteUInt64((long)address + 0x58, thread.Context.ThreadState.X11);
- memory.WriteUInt64((long)address + 0x60, thread.Context.ThreadState.X12);
- memory.WriteUInt64((long)address + 0x68, thread.Context.ThreadState.X13);
- memory.WriteUInt64((long)address + 0x70, thread.Context.ThreadState.X14);
- memory.WriteUInt64((long)address + 0x78, thread.Context.ThreadState.X15);
- memory.WriteUInt64((long)address + 0x80, thread.Context.ThreadState.X16);
- memory.WriteUInt64((long)address + 0x88, thread.Context.ThreadState.X17);
- memory.WriteUInt64((long)address + 0x90, thread.Context.ThreadState.X18);
- memory.WriteUInt64((long)address + 0x98, thread.Context.ThreadState.X19);
- memory.WriteUInt64((long)address + 0xa0, thread.Context.ThreadState.X20);
- memory.WriteUInt64((long)address + 0xa8, thread.Context.ThreadState.X21);
- memory.WriteUInt64((long)address + 0xb0, thread.Context.ThreadState.X22);
- memory.WriteUInt64((long)address + 0xb8, thread.Context.ThreadState.X23);
- memory.WriteUInt64((long)address + 0xc0, thread.Context.ThreadState.X24);
- memory.WriteUInt64((long)address + 0xc8, thread.Context.ThreadState.X25);
- memory.WriteUInt64((long)address + 0xd0, thread.Context.ThreadState.X26);
- memory.WriteUInt64((long)address + 0xd8, thread.Context.ThreadState.X27);
- memory.WriteUInt64((long)address + 0xe0, thread.Context.ThreadState.X28);
- memory.WriteUInt64((long)address + 0xe8, thread.Context.ThreadState.X29);
- memory.WriteUInt64((long)address + 0xf0, thread.Context.ThreadState.X30);
- memory.WriteUInt64((long)address + 0xf8, thread.Context.ThreadState.X31);
+ memory.WriteUInt64((long)address + 0x0, thread.Context.GetX(0));
+ memory.WriteUInt64((long)address + 0x8, thread.Context.GetX(1));
+ memory.WriteUInt64((long)address + 0x10, thread.Context.GetX(2));
+ memory.WriteUInt64((long)address + 0x18, thread.Context.GetX(3));
+ memory.WriteUInt64((long)address + 0x20, thread.Context.GetX(4));
+ memory.WriteUInt64((long)address + 0x28, thread.Context.GetX(5));
+ memory.WriteUInt64((long)address + 0x30, thread.Context.GetX(6));
+ memory.WriteUInt64((long)address + 0x38, thread.Context.GetX(7));
+ memory.WriteUInt64((long)address + 0x40, thread.Context.GetX(8));
+ memory.WriteUInt64((long)address + 0x48, thread.Context.GetX(9));
+ memory.WriteUInt64((long)address + 0x50, thread.Context.GetX(10));
+ memory.WriteUInt64((long)address + 0x58, thread.Context.GetX(11));
+ memory.WriteUInt64((long)address + 0x60, thread.Context.GetX(12));
+ memory.WriteUInt64((long)address + 0x68, thread.Context.GetX(13));
+ memory.WriteUInt64((long)address + 0x70, thread.Context.GetX(14));
+ memory.WriteUInt64((long)address + 0x78, thread.Context.GetX(15));
+ memory.WriteUInt64((long)address + 0x80, thread.Context.GetX(16));
+ memory.WriteUInt64((long)address + 0x88, thread.Context.GetX(17));
+ memory.WriteUInt64((long)address + 0x90, thread.Context.GetX(18));
+ memory.WriteUInt64((long)address + 0x98, thread.Context.GetX(19));
+ memory.WriteUInt64((long)address + 0xa0, thread.Context.GetX(20));
+ memory.WriteUInt64((long)address + 0xa8, thread.Context.GetX(21));
+ memory.WriteUInt64((long)address + 0xb0, thread.Context.GetX(22));
+ memory.WriteUInt64((long)address + 0xb8, thread.Context.GetX(23));
+ memory.WriteUInt64((long)address + 0xc0, thread.Context.GetX(24));
+ memory.WriteUInt64((long)address + 0xc8, thread.Context.GetX(25));
+ memory.WriteUInt64((long)address + 0xd0, thread.Context.GetX(26));
+ memory.WriteUInt64((long)address + 0xd8, thread.Context.GetX(27));
+ memory.WriteUInt64((long)address + 0xe0, thread.Context.GetX(28));
+ memory.WriteUInt64((long)address + 0xe8, thread.Context.GetX(29));
+ memory.WriteUInt64((long)address + 0xf0, thread.Context.GetX(30));
+ memory.WriteUInt64((long)address + 0xf8, thread.Context.GetX(31));
memory.WriteInt64((long)address + 0x100, thread.LastPc);
- memory.WriteUInt64((long)address + 0x108, (ulong)thread.Context.ThreadState.Psr);
+ memory.WriteUInt64((long)address + 0x108, (ulong)GetPsr(thread.Context));
- memory.WriteVector128((long)address + 0x110, thread.Context.ThreadState.V0);
- memory.WriteVector128((long)address + 0x120, thread.Context.ThreadState.V1);
- memory.WriteVector128((long)address + 0x130, thread.Context.ThreadState.V2);
- memory.WriteVector128((long)address + 0x140, thread.Context.ThreadState.V3);
- memory.WriteVector128((long)address + 0x150, thread.Context.ThreadState.V4);
- memory.WriteVector128((long)address + 0x160, thread.Context.ThreadState.V5);
- memory.WriteVector128((long)address + 0x170, thread.Context.ThreadState.V6);
- memory.WriteVector128((long)address + 0x180, thread.Context.ThreadState.V7);
- memory.WriteVector128((long)address + 0x190, thread.Context.ThreadState.V8);
- memory.WriteVector128((long)address + 0x1a0, thread.Context.ThreadState.V9);
- memory.WriteVector128((long)address + 0x1b0, thread.Context.ThreadState.V10);
- memory.WriteVector128((long)address + 0x1c0, thread.Context.ThreadState.V11);
- memory.WriteVector128((long)address + 0x1d0, thread.Context.ThreadState.V12);
- memory.WriteVector128((long)address + 0x1e0, thread.Context.ThreadState.V13);
- memory.WriteVector128((long)address + 0x1f0, thread.Context.ThreadState.V14);
- memory.WriteVector128((long)address + 0x200, thread.Context.ThreadState.V15);
- memory.WriteVector128((long)address + 0x210, thread.Context.ThreadState.V16);
- memory.WriteVector128((long)address + 0x220, thread.Context.ThreadState.V17);
- memory.WriteVector128((long)address + 0x230, thread.Context.ThreadState.V18);
- memory.WriteVector128((long)address + 0x240, thread.Context.ThreadState.V19);
- memory.WriteVector128((long)address + 0x250, thread.Context.ThreadState.V20);
- memory.WriteVector128((long)address + 0x260, thread.Context.ThreadState.V21);
- memory.WriteVector128((long)address + 0x270, thread.Context.ThreadState.V22);
- memory.WriteVector128((long)address + 0x280, thread.Context.ThreadState.V23);
- memory.WriteVector128((long)address + 0x290, thread.Context.ThreadState.V24);
- memory.WriteVector128((long)address + 0x2a0, thread.Context.ThreadState.V25);
- memory.WriteVector128((long)address + 0x2b0, thread.Context.ThreadState.V26);
- memory.WriteVector128((long)address + 0x2c0, thread.Context.ThreadState.V27);
- memory.WriteVector128((long)address + 0x2d0, thread.Context.ThreadState.V28);
- memory.WriteVector128((long)address + 0x2e0, thread.Context.ThreadState.V29);
- memory.WriteVector128((long)address + 0x2f0, thread.Context.ThreadState.V30);
- memory.WriteVector128((long)address + 0x300, thread.Context.ThreadState.V31);
+ memory.WriteVector128((long)address + 0x110, thread.Context.GetV(0));
+ memory.WriteVector128((long)address + 0x120, thread.Context.GetV(1));
+ memory.WriteVector128((long)address + 0x130, thread.Context.GetV(2));
+ memory.WriteVector128((long)address + 0x140, thread.Context.GetV(3));
+ memory.WriteVector128((long)address + 0x150, thread.Context.GetV(4));
+ memory.WriteVector128((long)address + 0x160, thread.Context.GetV(5));
+ memory.WriteVector128((long)address + 0x170, thread.Context.GetV(6));
+ memory.WriteVector128((long)address + 0x180, thread.Context.GetV(7));
+ memory.WriteVector128((long)address + 0x190, thread.Context.GetV(8));
+ memory.WriteVector128((long)address + 0x1a0, thread.Context.GetV(9));
+ memory.WriteVector128((long)address + 0x1b0, thread.Context.GetV(10));
+ memory.WriteVector128((long)address + 0x1c0, thread.Context.GetV(11));
+ memory.WriteVector128((long)address + 0x1d0, thread.Context.GetV(12));
+ memory.WriteVector128((long)address + 0x1e0, thread.Context.GetV(13));
+ memory.WriteVector128((long)address + 0x1f0, thread.Context.GetV(14));
+ memory.WriteVector128((long)address + 0x200, thread.Context.GetV(15));
+ memory.WriteVector128((long)address + 0x210, thread.Context.GetV(16));
+ memory.WriteVector128((long)address + 0x220, thread.Context.GetV(17));
+ memory.WriteVector128((long)address + 0x230, thread.Context.GetV(18));
+ memory.WriteVector128((long)address + 0x240, thread.Context.GetV(19));
+ memory.WriteVector128((long)address + 0x250, thread.Context.GetV(20));
+ memory.WriteVector128((long)address + 0x260, thread.Context.GetV(21));
+ memory.WriteVector128((long)address + 0x270, thread.Context.GetV(22));
+ memory.WriteVector128((long)address + 0x280, thread.Context.GetV(23));
+ memory.WriteVector128((long)address + 0x290, thread.Context.GetV(24));
+ memory.WriteVector128((long)address + 0x2a0, thread.Context.GetV(25));
+ memory.WriteVector128((long)address + 0x2b0, thread.Context.GetV(26));
+ memory.WriteVector128((long)address + 0x2c0, thread.Context.GetV(27));
+ memory.WriteVector128((long)address + 0x2d0, thread.Context.GetV(28));
+ memory.WriteVector128((long)address + 0x2e0, thread.Context.GetV(29));
+ memory.WriteVector128((long)address + 0x2f0, thread.Context.GetV(30));
+ memory.WriteVector128((long)address + 0x300, thread.Context.GetV(31));
- memory.WriteInt32((long)address + 0x310, thread.Context.ThreadState.Fpcr);
- memory.WriteInt32((long)address + 0x314, thread.Context.ThreadState.Fpsr);
- memory.WriteInt64((long)address + 0x318, thread.Context.ThreadState.Tpidr);
+ memory.WriteInt32((long)address + 0x310, (int)thread.Context.Fpcr);
+ memory.WriteInt32((long)address + 0x314, (int)thread.Context.Fpsr);
+ memory.WriteInt64((long)address + 0x318, thread.Context.Tpidr);
return KernelResult.Success;
}
+
+ private static int GetPsr(IExecutionContext context)
+ {
+ return (context.GetPstateFlag(PState.NFlag) ? (1 << 31) : 0) |
+ (context.GetPstateFlag(PState.ZFlag) ? (1 << 30) : 0) |
+ (context.GetPstateFlag(PState.CFlag) ? (1 << 29) : 0) |
+ (context.GetPstateFlag(PState.VFlag) ? (1 << 28) : 0);
+ }
}
}
\ No newline at end of file
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs
index 42eed26a0..0b9511348 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs
@@ -36,12 +36,12 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
{
KCoreContext coreContext = CoreContexts[core];
- if (coreContext.ContextSwitchNeeded && (coreContext.CurrentThread?.Context.IsCurrentThread() ?? false))
+ if (coreContext.ContextSwitchNeeded && (coreContext.CurrentThread?.IsCurrentHostThread() ?? false))
{
coreContext.ContextSwitch();
}
- if (coreContext.CurrentThread?.Context.IsCurrentThread() ?? false)
+ if (coreContext.CurrentThread?.IsCurrentHostThread() ?? false)
{
selectedCount++;
}
@@ -70,14 +70,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
{
// If this is not the thread that is currently executing, we need
// to request an interrupt to allow safely starting another thread.
- if (!currentThread.Context.IsCurrentThread())
+ if (!currentThread.IsCurrentHostThread())
{
currentThread.Context.RequestInterrupt();
return;
}
- CoreManager.Reset(currentThread.Context.Work);
+ CoreManager.Reset(currentThread.HostThread);
}
// Advance current core and try picking a thread,
@@ -92,9 +92,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
if (coreContext.CurrentThread != null)
{
- CoreManager.Set(coreContext.CurrentThread.Context.Work);
+ CoreManager.Set(coreContext.CurrentThread.HostThread);
- coreContext.CurrentThread.Context.Execute();
+ coreContext.CurrentThread.Execute();
break;
}
@@ -134,14 +134,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
public void ExitThread(KThread thread)
{
- thread.Context.StopExecution();
+ thread.Context.Running = false;
- CoreManager.Exit(thread.Context.Work);
+ CoreManager.Exit(thread.HostThread);
}
public void RemoveThread(KThread thread)
{
- CoreManager.RemoveThread(thread.Context.Work);
+ CoreManager.RemoveThread(thread.HostThread);
}
}
}
\ No newline at end of file
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs
index 979071772..0aa12b0dd 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs
@@ -58,7 +58,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
if (CurrentThread != null)
{
- _coreManager.Reset(CurrentThread.Context.Work);
+ _coreManager.Reset(CurrentThread.HostThread);
}
CurrentThread = SelectedThread;
@@ -70,9 +70,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
CurrentThread.TotalTimeRunning += currentTime - CurrentThread.LastScheduledTime;
CurrentThread.LastScheduledTime = currentTime;
- _coreManager.Set(CurrentThread.Context.Work);
+ _coreManager.Set(CurrentThread.HostThread);
- CurrentThread.Context.Execute();
+ CurrentThread.Execute();
}
}
}
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs
index 39c857b5d..b7013bb7b 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs
@@ -1,4 +1,4 @@
-using ChocolArm64;
+using ARMeilleure;
using System.Threading;
namespace Ryujinx.HLE.HOS.Kernel.Threading
@@ -53,14 +53,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
if (coreContext.ContextSwitchNeeded)
{
- CpuThread currentHleThread = coreContext.CurrentThread?.Context;
+ KThread currentThread = coreContext.CurrentThread;
- if (currentHleThread == null)
+ if (currentThread == null)
{
// Nothing is running, we can perform the context switch immediately.
coreContext.ContextSwitch();
}
- else if (currentHleThread.IsCurrentThread())
+ else if (currentThread.IsCurrentHostThread())
{
// Thread running on the current core, context switch will block.
doContextSwitch = true;
@@ -68,7 +68,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
else
{
// Thread running on another core, request a interrupt.
- currentHleThread.RequestInterrupt();
+ currentThread.Context.RequestInterrupt();
}
}
}
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs
index 8d2cdfce6..dd5422b8e 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs
@@ -203,7 +203,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
{
for (int core = 0; core < CpuCoresCount; core++)
{
- if (CoreContexts[core].CurrentThread?.Context.IsCurrentThread() ?? false)
+ if (CoreContexts[core].CurrentThread?.IsCurrentHostThread() ?? false)
{
return CoreContexts[core].CurrentThread;
}
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
index 50c71ea91..54d5d06c8 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
@@ -1,5 +1,5 @@
-using ChocolArm64;
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
using Ryujinx.Common.Logging;
using Ryujinx.HLE.HOS.Kernel.Common;
using Ryujinx.HLE.HOS.Kernel.Process;
@@ -7,12 +7,17 @@ using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
+using System.Threading;
namespace Ryujinx.HLE.HOS.Kernel.Threading
{
class KThread : KSynchronizationObject, IKFutureSchedulerObject
{
- public CpuThread Context { get; private set; }
+ private int _hostThreadRunning;
+
+ public Thread HostThread { get; private set; }
+
+ public IExecutionContext Context { get; private set; }
public long AffinityMask { get; set; }
@@ -152,30 +157,35 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
is64Bits = true;
}
- Context = new CpuThread(owner.Translator, owner.CpuMemory, (long)entrypoint);
+ HostThread = new Thread(() => ThreadStart(entrypoint));
- bool isAarch32 = (Owner.MmuFlags & 1) == 0;
-
- Context.ThreadState.Aarch32 = isAarch32;
-
- Context.ThreadState.X0 = argsPtr;
-
- if (isAarch32)
+ if (System.UseLegacyJit)
{
- Context.ThreadState.X13 = (uint)stackTop;
+ Context = new ChocolArm64.State.CpuThreadState();
}
else
{
- Context.ThreadState.X31 = stackTop;
+ Context = new ARMeilleure.State.ExecutionContext();
}
- Context.ThreadState.CntfrqEl0 = 19200000;
- Context.ThreadState.Tpidr = (long)_tlsAddress;
+ bool isAarch32 = (Owner.MmuFlags & 1) == 0;
+
+ Context.SetX(0, argsPtr);
+
+ if (isAarch32)
+ {
+ Context.SetX(13, (uint)stackTop);
+ }
+ else
+ {
+ Context.SetX(31, stackTop);
+ }
+
+ Context.CntfrqEl0 = 19200000;
+ Context.Tpidr = (long)_tlsAddress;
owner.SubscribeThreadEventHandlers(Context);
- Context.WorkFinished += ThreadFinishedHandler;
-
ThreadUid = System.GetThreadUid();
_hasBeenInitialized = true;
@@ -1002,8 +1012,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
public void SetEntryArguments(long argsPtr, int threadHandle)
{
- Context.ThreadState.X0 = (ulong)argsPtr;
- Context.ThreadState.X1 = (ulong)threadHandle;
+ Context.SetX(0, (ulong)argsPtr);
+ Context.SetX(1, (ulong)threadHandle);
}
public void TimeUp()
@@ -1013,7 +1023,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
public string GetGuestStackTrace()
{
- return Owner.Debugger.GetGuestStackTrace(Context.ThreadState);
+ return Owner.Debugger.GetGuestStackTrace(Context);
}
public void PrintGuestStackTrace()
@@ -1026,12 +1036,32 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
Logger.PrintInfo(LogClass.Cpu, trace.ToString());
}
- private void ThreadFinishedHandler(object sender, EventArgs e)
+ public void Execute()
+ {
+ if (Interlocked.CompareExchange(ref _hostThreadRunning, 1, 0) == 0)
+ {
+ HostThread.Start();
+ }
+ }
+
+ private void ThreadStart(ulong entrypoint)
+ {
+ Owner.Translator.Execute(Context, entrypoint);
+
+ ThreadExit();
+ }
+
+ private void ThreadExit()
{
System.Scheduler.ExitThread(this);
System.Scheduler.RemoveThread(this);
}
+ public bool IsCurrentHostThread()
+ {
+ return Thread.CurrentThread == HostThread;
+ }
+
public override bool IsSignaled()
{
return _hasExited;
diff --git a/Ryujinx.HLE/HOS/ProgramLoader.cs b/Ryujinx.HLE/HOS/ProgramLoader.cs
index af974e18f..0bc6447e5 100644
--- a/Ryujinx.HLE/HOS/ProgramLoader.cs
+++ b/Ryujinx.HLE/HOS/ProgramLoader.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common;
using Ryujinx.Common.Logging;
using Ryujinx.HLE.HOS.Kernel.Common;
diff --git a/Ryujinx.HLE/HOS/ServiceCtx.cs b/Ryujinx.HLE/HOS/ServiceCtx.cs
index 99b2d5afe..df74ba0a8 100644
--- a/Ryujinx.HLE/HOS/ServiceCtx.cs
+++ b/Ryujinx.HLE/HOS/ServiceCtx.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.HLE.HOS.Ipc;
using Ryujinx.HLE.HOS.Kernel.Ipc;
using Ryujinx.HLE.HOS.Kernel.Process;
@@ -11,7 +11,7 @@ namespace Ryujinx.HLE.HOS
{
public Switch Device { get; }
public KProcess Process { get; }
- public MemoryManager Memory { get; }
+ public IMemoryManager Memory { get; }
public KThread Thread { get; }
public KClientSession Session { get; }
public IpcMessage Request { get; }
@@ -22,7 +22,7 @@ namespace Ryujinx.HLE.HOS
public ServiceCtx(
Switch device,
KProcess process,
- MemoryManager memory,
+ IMemoryManager memory,
KThread thread,
KClientSession session,
IpcMessage request,
diff --git a/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs b/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs
index 050e44971..10210afed 100644
--- a/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs
+++ b/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common.Logging;
using Ryujinx.HLE.HOS.SystemState;
using Ryujinx.HLE.Utilities;
diff --git a/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs b/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs
index 4191dfd67..751d3f704 100644
--- a/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs
+++ b/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Audio;
using Ryujinx.HLE.HOS.Ipc;
using Ryujinx.HLE.HOS.Kernel.Common;
diff --git a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs
index 599f3d81f..e8baf8192 100644
--- a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs
+++ b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Audio;
using Ryujinx.Audio.Adpcm;
using Ryujinx.Common.Logging;
@@ -24,7 +24,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer
private KEvent _updateEvent;
- private MemoryManager _memory;
+ private IMemoryManager _memory;
private IAalOutput _audioOut;
@@ -40,7 +40,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer
public IAudioRenderer(
Horizon system,
- MemoryManager memory,
+ IMemoryManager memory,
IAalOutput audioOut,
AudioRendererParameter Params)
{
diff --git a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs
index 93a16a617..aaff20a5b 100644
--- a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs
+++ b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Audio.Adpcm;
using System;
@@ -65,7 +65,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer
_outStatus.VoiceDropsCount = 0;
}
- public int[] GetBufferData(MemoryManager memory, int maxSamples, out int samplesCount)
+ public int[] GetBufferData(IMemoryManager memory, int maxSamples, out int samplesCount)
{
if (!Playing)
{
@@ -122,7 +122,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer
return output;
}
- private void UpdateBuffer(MemoryManager memory)
+ private void UpdateBuffer(IMemoryManager memory)
{
// TODO: Implement conversion for formats other
// than interleaved stereo (2 channels).
diff --git a/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs b/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs
index ad0dd0445..bea0f3f20 100644
--- a/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs
+++ b/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Audio;
using Ryujinx.Common.Logging;
using Ryujinx.HLE.HOS.Kernel.Threading;
diff --git a/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs b/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs
index b8780730d..748a600d5 100644
--- a/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs
+++ b/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common;
using Ryujinx.HLE.HOS.Ipc;
using Ryujinx.HLE.HOS.Kernel.Common;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs b/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs
index 50ab7e01f..261c1c5ae 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common.Logging;
using Ryujinx.HLE.HOS.Ipc;
using Ryujinx.HLE.HOS.Kernel.Common;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs
index 3b96ed6bc..47d15a7e5 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Memory;
using Ryujinx.HLE.HOS.Kernel.Process;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs
index 4f276d5d9..04b0c63cd 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common.Logging;
using System;
using System.Diagnostics;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs
index c5f296363..e7879f4a2 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Memory;
using Ryujinx.HLE.HOS.Kernel.Process;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs
index 35f1a9491..2a84b677f 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common.Logging;
using Ryujinx.HLE.HOS.Kernel.Process;
using System;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs
index 722866622..d9c579a2a 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Memory;
using Ryujinx.HLE.HOS.Kernel.Process;
diff --git a/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs b/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs
index fea5bf2f6..5b2d6c84e 100644
--- a/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs
+++ b/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs
@@ -40,7 +40,7 @@ namespace Ryujinx.HLE.HOS.Services.Time.Clock
ClockSourceId = GetClockSourceId()
};
- TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.ThreadState.CntpctEl0, thread.Context.ThreadState.CntfrqEl0);
+ TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.CntpctEl0, thread.Context.CntfrqEl0);
result.TimePoint = _setupValue + ticksTimeSpan.ToSeconds();
diff --git a/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs b/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs
index 7a69b014b..6cd4c80b4 100644
--- a/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs
+++ b/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs
@@ -30,7 +30,7 @@ namespace Ryujinx.HLE.HOS.Services.Time.Clock
ClockSourceId = GetClockSourceId()
};
- TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.ThreadState.CntpctEl0, thread.Context.ThreadState.CntfrqEl0);
+ TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.CntpctEl0, thread.Context.CntfrqEl0);
result.TimePoint = ticksTimeSpan.ToSeconds();
diff --git a/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs b/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs
index 9ee038d58..d9c5b4f25 100644
--- a/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs
+++ b/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs
@@ -141,7 +141,7 @@ namespace Ryujinx.HLE.HOS.Services.Time
if (currentTimePoint.ClockSourceId == otherContext.SteadyTimePoint.ClockSourceId)
{
- TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(context.Thread.Context.ThreadState.CntpctEl0, context.Thread.Context.ThreadState.CntfrqEl0);
+ TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(context.Thread.Context.CntpctEl0, context.Thread.Context.CntfrqEl0);
long baseTimePoint = otherContext.Offset + currentTimePoint.TimePoint - ticksTimeSpan.ToSeconds();
context.ResponseData.Write(baseTimePoint);
diff --git a/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs b/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs
index 895bb1f3e..b820de38f 100644
--- a/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs
+++ b/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.Common;
using Ryujinx.Common.Logging;
using Ryujinx.HLE.HOS.Services.Time.TimeZone;
@@ -106,7 +106,7 @@ namespace Ryujinx.HLE.HOS.Services.Time
string locationName = Encoding.ASCII.GetString(context.RequestData.ReadBytes(0x24)).TrimEnd('\0');
ResultCode resultCode = TimeZoneManager.Instance.LoadTimeZoneRules(out TimeZoneRule rules, locationName);
-
+
// Write TimeZoneRule if success
if (resultCode == 0)
{
diff --git a/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs b/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs
index 2f1e68e8f..15db6ff2b 100644
--- a/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs
+++ b/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using Ryujinx.HLE.HOS.Ipc;
using Ryujinx.HLE.HOS.Kernel.Common;
using System;
diff --git a/Ryujinx.HLE/Ryujinx.HLE.csproj b/Ryujinx.HLE/Ryujinx.HLE.csproj
index 78e5c2a3a..3a12a179f 100644
--- a/Ryujinx.HLE/Ryujinx.HLE.csproj
+++ b/Ryujinx.HLE/Ryujinx.HLE.csproj
@@ -2,7 +2,7 @@
netcoreapp2.1
- win10-x64;osx-x64;linux-x64
+ win-x64;osx-x64;linux-x64
Debug;Release;Profile Debug;Profile Release
7.1
@@ -38,16 +38,18 @@
-
+
+
+
diff --git a/Ryujinx.HLE/Utilities/StructReader.cs b/Ryujinx.HLE/Utilities/StructReader.cs
index 441dfd195..36e5c7d19 100644
--- a/Ryujinx.HLE/Utilities/StructReader.cs
+++ b/Ryujinx.HLE/Utilities/StructReader.cs
@@ -1,15 +1,15 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.Utilities
{
class StructReader
{
- private MemoryManager _memory;
+ private IMemoryManager _memory;
public long Position { get; private set; }
- public StructReader(MemoryManager memory, long position)
+ public StructReader(IMemoryManager memory, long position)
{
_memory = memory;
Position = position;
diff --git a/Ryujinx.HLE/Utilities/StructWriter.cs b/Ryujinx.HLE/Utilities/StructWriter.cs
index 86cfeedd7..c156956db 100644
--- a/Ryujinx.HLE/Utilities/StructWriter.cs
+++ b/Ryujinx.HLE/Utilities/StructWriter.cs
@@ -1,15 +1,15 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
using System.Runtime.InteropServices;
namespace Ryujinx.HLE.Utilities
{
class StructWriter
{
- private MemoryManager _memory;
+ private IMemoryManager _memory;
public long Position { get; private set; }
- public StructWriter(MemoryManager memory, long position)
+ public StructWriter(IMemoryManager memory, long position)
{
_memory = memory;
Position = position;
diff --git a/Ryujinx.LLE/Luea.csproj b/Ryujinx.LLE/Luea.csproj
index 719a0ef38..895f27eef 100644
--- a/Ryujinx.LLE/Luea.csproj
+++ b/Ryujinx.LLE/Luea.csproj
@@ -2,7 +2,7 @@
netcoreapp2.1
- win10-x64;osx-x64;linux-x64
+ win-x64;osx-x64;linux-x64
Exe
Debug;Release;Profile Debug;Profile Release
diff --git a/Ryujinx.Profiler/Ryujinx.Profiler.csproj b/Ryujinx.Profiler/Ryujinx.Profiler.csproj
index 5a4c8f4f9..bcc2d17d2 100644
--- a/Ryujinx.Profiler/Ryujinx.Profiler.csproj
+++ b/Ryujinx.Profiler/Ryujinx.Profiler.csproj
@@ -2,7 +2,7 @@
netcoreapp2.1
- win10-x64;osx-x64;linux-x64
+ win-x64;osx-x64;linux-x64
true
Debug;Release;Profile Debug;Profile Release
diff --git a/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj b/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj
index 04cab8328..a2ff36d9b 100644
--- a/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj
+++ b/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj
@@ -2,7 +2,7 @@
netcoreapp2.1
- win10-x64;osx-x64;linux-x64
+ win-x64;osx-x64;linux-x64
Exe
Debug;Release;Profile Debug;Profile Release
diff --git a/Ryujinx.Tests.Unicorn/IndexedProperty.cs b/Ryujinx.Tests.Unicorn/IndexedProperty.cs
index a4365026b..65d445fc0 100644
--- a/Ryujinx.Tests.Unicorn/IndexedProperty.cs
+++ b/Ryujinx.Tests.Unicorn/IndexedProperty.cs
@@ -4,24 +4,24 @@ namespace Ryujinx.Tests.Unicorn
{
public class IndexedProperty
{
- readonly Action SetAction;
- readonly Func GetFunc;
+ private Func _getFunc;
+ private Action _setAction;
public IndexedProperty(Func getFunc, Action setAction)
{
- GetFunc = getFunc;
- SetAction = setAction;
+ _getFunc = getFunc;
+ _setAction = setAction;
}
- public TValue this[TIndex i]
+ public TValue this[TIndex index]
{
get
{
- return GetFunc(i);
+ return _getFunc(index);
}
set
{
- SetAction(i, value);
+ _setAction(index, value);
}
}
}
diff --git a/Ryujinx.Tests.Unicorn/Native/Interface.cs b/Ryujinx.Tests.Unicorn/Native/Interface.cs
index 006585b5c..59b1da079 100644
--- a/Ryujinx.Tests.Unicorn/Native/Interface.cs
+++ b/Ryujinx.Tests.Unicorn/Native/Interface.cs
@@ -16,11 +16,13 @@ namespace Ryujinx.Tests.Unicorn.Native
public static void MarshalArrayOf(IntPtr input, int length, out T[] output)
{
int size = Marshal.SizeOf(typeof(T));
+
output = new T[length];
for (int i = 0; i < length; i++)
{
IntPtr item = new IntPtr(input.ToInt64() + i * size);
+
output[i] = Marshal.PtrToStructure(item);
}
}
@@ -29,7 +31,7 @@ namespace Ryujinx.Tests.Unicorn.Native
public static extern uint uc_version(out uint major, out uint minor);
[DllImport("unicorn", CallingConvention = CallingConvention.Cdecl)]
- public static extern UnicornError uc_open(uint arch, uint mode, out IntPtr uc);
+ public static extern UnicornError uc_open(UnicornArch arch, UnicornMode mode, out IntPtr uc);
[DllImport("unicorn", CallingConvention = CallingConvention.Cdecl)]
public static extern UnicornError uc_close(IntPtr uc);
diff --git a/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs b/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs
index 73710faa8..ff633293e 100644
--- a/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs
+++ b/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs
@@ -1,6 +1,6 @@
namespace Ryujinx.Tests.Unicorn.Native
{
- public enum UnicornArch
+ public enum UnicornArch : uint
{
UC_ARCH_ARM = 1, // ARM architecture (including Thumb, Thumb-2)
UC_ARCH_ARM64, // ARM-64, also called AArch64
diff --git a/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs b/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs
index 5cd835169..8045f2dac 100644
--- a/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs
+++ b/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs
@@ -1,7 +1,7 @@
// ReSharper disable InconsistentNaming
namespace Ryujinx.Tests.Unicorn.Native
{
- public enum UnicornMode
+ public enum UnicornMode : uint
{
UC_MODE_LITTLE_ENDIAN = 0, // little-endian mode (default mode)
UC_MODE_BIG_ENDIAN = 1 << 30, // big-endian mode
diff --git a/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj b/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj
index 5a99b39f1..d15a405bc 100644
--- a/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj
+++ b/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj
@@ -2,7 +2,7 @@
netcoreapp2.1
- win10-x64;osx-x64;linux-x64
+ win-x64;osx-x64;linux-x64
true
Debug;Release;Profile Debug;Profile Release
@@ -23,7 +23,6 @@
-
diff --git a/Ryujinx.Tests.Unicorn/SimdValue.cs b/Ryujinx.Tests.Unicorn/SimdValue.cs
new file mode 100644
index 000000000..2b5284305
--- /dev/null
+++ b/Ryujinx.Tests.Unicorn/SimdValue.cs
@@ -0,0 +1,112 @@
+using System;
+
+namespace Ryujinx.Tests.Unicorn
+{
+ public struct SimdValue : IEquatable
+ {
+ private ulong _e0;
+ private ulong _e1;
+
+ public SimdValue(ulong e0, ulong e1)
+ {
+ _e0 = e0;
+ _e1 = e1;
+ }
+
+ public SimdValue(byte[] data)
+ {
+ _e0 = (ulong)BitConverter.ToInt64(data, 0);
+ _e1 = (ulong)BitConverter.ToInt64(data, 8);
+ }
+
+ public float AsFloat()
+ {
+ return GetFloat(0);
+ }
+
+ public double AsDouble()
+ {
+ return GetDouble(0);
+ }
+
+ public float GetFloat(int index)
+ {
+ return BitConverter.Int32BitsToSingle(GetInt32(index));
+ }
+
+ public double GetDouble(int index)
+ {
+ return BitConverter.Int64BitsToDouble(GetInt64(index));
+ }
+
+ public int GetInt32(int index) => (int)GetUInt32(index);
+ public long GetInt64(int index) => (long)GetUInt64(index);
+
+ public uint GetUInt32(int index)
+ {
+ switch (index)
+ {
+ case 0: return (uint)(_e0 >> 0);
+ case 1: return (uint)(_e0 >> 32);
+ case 2: return (uint)(_e1 >> 0);
+ case 3: return (uint)(_e1 >> 32);
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public ulong GetUInt64(int index)
+ {
+ switch (index)
+ {
+ case 0: return _e0;
+ case 1: return _e1;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public byte[] ToArray()
+ {
+ byte[] e0Data = BitConverter.GetBytes(_e0);
+ byte[] e1Data = BitConverter.GetBytes(_e1);
+
+ byte[] data = new byte[16];
+
+ Buffer.BlockCopy(e0Data, 0, data, 0, 8);
+ Buffer.BlockCopy(e1Data, 0, data, 8, 8);
+
+ return data;
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(_e0, _e1);
+ }
+
+ public static bool operator ==(SimdValue x, SimdValue y)
+ {
+ return x.Equals(y);
+ }
+
+ public static bool operator !=(SimdValue x, SimdValue y)
+ {
+ return !x.Equals(y);
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is SimdValue vector && Equals(vector);
+ }
+
+ public bool Equals(SimdValue other)
+ {
+ return other._e0 == _e0 && other._e1 == _e1;
+ }
+
+ public override string ToString()
+ {
+ return $"0x{_e1:X16}{_e0:X16}";
+ }
+ }
+}
\ No newline at end of file
diff --git a/Ryujinx.Tests.Unicorn/UnicornAArch64.cs b/Ryujinx.Tests.Unicorn/UnicornAArch64.cs
index 0425d1d3a..4453d18d0 100644
--- a/Ryujinx.Tests.Unicorn/UnicornAArch64.cs
+++ b/Ryujinx.Tests.Unicorn/UnicornAArch64.cs
@@ -1,8 +1,5 @@
using Ryujinx.Tests.Unicorn.Native;
using System;
-using System.Diagnostics.Contracts;
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.X86;
namespace Ryujinx.Tests.Unicorn
{
@@ -15,95 +12,96 @@ namespace Ryujinx.Tests.Unicorn
get
{
return new IndexedProperty(
- (int i) => GetX(i),
+ (int i) => GetX(i),
(int i, ulong value) => SetX(i, value));
}
}
- public IndexedProperty> Q
+ public IndexedProperty