diff --git a/ChocolArm64.csproj b/ChocolArm64.csproj
index 1156e36..0b4051b 100644
--- a/ChocolArm64.csproj
+++ b/ChocolArm64.csproj
@@ -14,6 +14,7 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <PackageReference Include="Mono.Posix.NETStandard" Version="1.0.0" />
     <PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" />
   </ItemGroup>
 
diff --git a/CpuThread.cs b/CpuThread.cs
index 6cd34f8..ad1fd6f 100644
--- a/CpuThread.cs
+++ b/CpuThread.cs
@@ -32,8 +32,6 @@ namespace ChocolArm64
             {
                 translator.ExecuteSubroutine(this, entrypoint);
 
-                memory.RemoveMonitor(ThreadState.Core);
-
                 WorkFinished?.Invoke(this, EventArgs.Empty);
             });
         }
diff --git a/Instructions/InstEmitMemoryEx.cs b/Instructions/InstEmitMemoryEx.cs
index 42daca6..215fcff 100644
--- a/Instructions/InstEmitMemoryEx.cs
+++ b/Instructions/InstEmitMemoryEx.cs
@@ -23,7 +23,9 @@ namespace ChocolArm64.Instructions
 
         public static void Clrex(ILEmitterCtx context)
         {
-            EmitMemoryCall(context, nameof(MemoryManager.ClearExclusive));
+            context.EmitLdarg(TranslatedSub.StateArgIdx);
+
+            context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.ClearExclusiveAddress));
         }
 
         public static void Dmb(ILEmitterCtx context) => EmitBarrier(context);
@@ -37,12 +39,12 @@ namespace ChocolArm64.Instructions
 
         private static void EmitLdr(ILEmitterCtx context, AccessType accType)
         {
-            EmitLoad(context, accType, false);
+            EmitLoad(context, accType, pair: false);
         }
 
         private static void EmitLdp(ILEmitterCtx context, AccessType accType)
         {
-            EmitLoad(context, accType, true);
+            EmitLoad(context, accType, pair: true);
         }
 
         private static void EmitLoad(ILEmitterCtx context, AccessType accType, bool pair)
@@ -57,32 +59,128 @@ namespace ChocolArm64.Instructions
                 EmitBarrier(context);
             }
 
-            if (exclusive)
-            {
-                EmitMemoryCall(context, nameof(MemoryManager.SetExclusive), op.Rn);
-            }
-
             context.EmitLdint(op.Rn);
             context.EmitSttmp();
 
-            context.EmitLdarg(TranslatedSub.MemoryArgIdx);
-            context.EmitLdtmp();
+            if (exclusive)
+            {
+                context.EmitLdarg(TranslatedSub.StateArgIdx);
+                context.EmitLdtmp();
 
-            EmitReadZxCall(context, op.Size);
+                context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.SetExclusiveAddress));
+            }
 
-            context.EmitStintzr(op.Rt);
+            void WriteExclusiveValue(string propName)
+            {
+                if (op.Size < 3)
+                {
+                    context.Emit(OpCodes.Conv_U8);
+                }
+
+                context.EmitSttmp2();
+                context.EmitLdarg(TranslatedSub.StateArgIdx);
+                context.EmitLdtmp2();
+
+                context.EmitCallPrivatePropSet(typeof(CpuThreadState), propName);
+
+                context.EmitLdtmp2();
+
+                if (op.Size < 3)
+                {
+                    context.Emit(OpCodes.Conv_U4);
+                }
+            }
 
             if (pair)
             {
+                //Exclusive loads should be atomic. For pairwise loads, we need to
+                //read all the data at once. For a 32-bits pairwise load, we do a
+                //simple 64-bits load, for a 128-bits load, we need to call a special
+                //method to read 128-bits atomically.
+                if (op.Size == 2)
+                {
+                    context.EmitLdarg(TranslatedSub.MemoryArgIdx);
+                    context.EmitLdtmp();
+
+                    EmitReadZxCall(context, 3);
+
+                    context.Emit(OpCodes.Dup);
+
+                    //Mask low half.
+                    context.Emit(OpCodes.Conv_U4);
+
+                    if (exclusive)
+                    {
+                        WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
+                    }
+
+                    context.EmitStintzr(op.Rt);
+
+                    //Shift high half.
+                    context.EmitLsr(32);
+                    context.Emit(OpCodes.Conv_U4);
+
+                    if (exclusive)
+                    {
+                        WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueHigh));
+                    }
+
+                    context.EmitStintzr(op.Rt2);
+                }
+                else if (op.Size == 3)
+                {
+                    context.EmitLdarg(TranslatedSub.MemoryArgIdx);
+                    context.EmitLdtmp();
+
+                    context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicReadInt128));
+
+                    context.Emit(OpCodes.Dup);
+
+                    //Load low part of the vector.
+                    context.EmitLdc_I4(0);
+                    context.EmitLdc_I4(3);
+
+                    VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractIntZx));
+
+                    if (exclusive)
+                    {
+                        WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
+                    }
+
+                    context.EmitStintzr(op.Rt);
+
+                    //Load high part of the vector.
+                    context.EmitLdc_I4(1);
+                    context.EmitLdc_I4(3);
+
+                    VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractIntZx));
+
+                    if (exclusive)
+                    {
+                        WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueHigh));
+                    }
+
+                    context.EmitStintzr(op.Rt2);
+                }
+                else
+                {
+                    throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes.");
+                }
+            }
+            else
+            {
+                //8, 16, 32 or 64-bits (non-pairwise) load.
                 context.EmitLdarg(TranslatedSub.MemoryArgIdx);
                 context.EmitLdtmp();
-                context.EmitLdc_I8(1 << op.Size);
-
-                context.Emit(OpCodes.Add);
 
                 EmitReadZxCall(context, op.Size);
 
-                context.EmitStintzr(op.Rt2);
+                if (exclusive)
+                {
+                    WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
+                }
+
+                context.EmitStintzr(op.Rt);
             }
         }
 
@@ -99,12 +197,12 @@ namespace ChocolArm64.Instructions
 
         private static void EmitStr(ILEmitterCtx context, AccessType accType)
         {
-            EmitStore(context, accType, false);
+            EmitStore(context, accType, pair: false);
         }
 
         private static void EmitStp(ILEmitterCtx context, AccessType accType)
         {
-            EmitStore(context, accType, true);
+            EmitStore(context, accType, pair: true);
         }
 
         private static void EmitStore(ILEmitterCtx context, AccessType accType, bool pair)
@@ -119,66 +217,133 @@ namespace ChocolArm64.Instructions
                 EmitBarrier(context);
             }
 
-            ILLabel lblEx  = new ILLabel();
-            ILLabel lblEnd = new ILLabel();
-
             if (exclusive)
             {
-                EmitMemoryCall(context, nameof(MemoryManager.TestExclusive), op.Rn);
+                ILLabel lblEx  = new ILLabel();
+                ILLabel lblEnd = new ILLabel();
+
+                context.EmitLdarg(TranslatedSub.StateArgIdx);
+                context.EmitLdint(op.Rn);
+
+                context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.CheckExclusiveAddress));
 
                 context.Emit(OpCodes.Brtrue_S, lblEx);
 
-                context.EmitLdc_I8(1);
+                //Address check failed, set error right away and do not store anything.
+                context.EmitLdc_I4(1);
                 context.EmitStintzr(op.Rs);
 
-                context.Emit(OpCodes.Br_S, lblEnd);
-            }
+                context.Emit(OpCodes.Br, lblEnd);
 
-            context.MarkLabel(lblEx);
+                //Address check passsed.
+                context.MarkLabel(lblEx);
 
-            context.EmitLdarg(TranslatedSub.MemoryArgIdx);
-            context.EmitLdint(op.Rn);
-            context.EmitLdintzr(op.Rt);
-
-            EmitWriteCall(context, op.Size);
-
-            if (pair)
-            {
                 context.EmitLdarg(TranslatedSub.MemoryArgIdx);
                 context.EmitLdint(op.Rn);
-                context.EmitLdc_I8(1 << op.Size);
 
-                context.Emit(OpCodes.Add);
+                context.EmitLdarg(TranslatedSub.StateArgIdx);
 
-                context.EmitLdintzr(op.Rt2);
+                context.EmitCallPrivatePropGet(typeof(CpuThreadState), nameof(CpuThreadState.ExclusiveValueLow));
 
-                EmitWriteCall(context, op.Size);
-            }
+                void EmitCast()
+                {
+                    //The input should be always int64.
+                    switch (op.Size)
+                    {
+                        case 0: context.Emit(OpCodes.Conv_U1); break;
+                        case 1: context.Emit(OpCodes.Conv_U2); break;
+                        case 2: context.Emit(OpCodes.Conv_U4); break;
+                    }
+                }
+
+                EmitCast();
+
+                if (pair)
+                {
+                    context.EmitLdarg(TranslatedSub.StateArgIdx);
+
+                    context.EmitCallPrivatePropGet(typeof(CpuThreadState), nameof(CpuThreadState.ExclusiveValueHigh));
+
+                    EmitCast();
+
+                    context.EmitLdintzr(op.Rt);
+
+                    EmitCast();
+
+                    context.EmitLdintzr(op.Rt2);
+
+                    EmitCast();
+
+                    switch (op.Size)
+                    {
+                        case 2: context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchange2xInt32)); break;
+                        case 3: context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt128));  break;
+
+                        default: throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes.");
+                    }
+                }
+                else
+                {
+                    context.EmitLdintzr(op.Rt);
+
+                    EmitCast();
+
+                    switch (op.Size)
+                    {
+                        case 0: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeByte));  break;
+                        case 1: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt16)); break;
+                        case 2: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt32)); break;
+                        case 3: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt64)); break;
+
+                        default: throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes.");
+                    }
+                }
+
+                //The value returned is a bool, true if the values compared
+                //were equal and the new value was written, false otherwise.
+                //We need to invert this result, as on ARM 1 indicates failure,
+                //and 0 success on those instructions.
+                context.EmitLdc_I4(1);
+
+                context.Emit(OpCodes.Xor);
+                context.Emit(OpCodes.Dup);
+                context.Emit(OpCodes.Conv_U8);
 
-            if (exclusive)
-            {
-                context.EmitLdc_I8(0);
                 context.EmitStintzr(op.Rs);
 
-                EmitMemoryCall(context, nameof(MemoryManager.ClearExclusiveForStore));
+                //Only clear the exclusive monitor if the store was successful (Rs = false).
+                context.Emit(OpCodes.Brtrue_S, lblEnd);
+
+                Clrex(context);
+
+                context.MarkLabel(lblEnd);
             }
-
-            context.MarkLabel(lblEnd);
-        }
-
-        private static void EmitMemoryCall(ILEmitterCtx context, string name, int rn = -1)
-        {
-            context.EmitLdarg(TranslatedSub.MemoryArgIdx);
-            context.EmitLdarg(TranslatedSub.StateArgIdx);
-
-            context.EmitCallPropGet(typeof(CpuThreadState), nameof(CpuThreadState.Core));
-
-            if (rn != -1)
+            else
             {
-                context.EmitLdint(rn);
-            }
+                void EmitWrite(int rt, long offset)
+                {
+                    context.EmitLdarg(TranslatedSub.MemoryArgIdx);
+                    context.EmitLdint(op.Rn);
 
-            context.EmitCall(typeof(MemoryManager), name);
+                    if (offset != 0)
+                    {
+                        context.EmitLdc_I8(offset);
+
+                        context.Emit(OpCodes.Add);
+                    }
+
+                    context.EmitLdintzr(rt);
+
+                    EmitWriteCall(context, op.Size);
+                }
+
+                EmitWrite(op.Rt, 0);
+
+                if (pair)
+                {
+                    EmitWrite(op.Rt2, 1 << op.Size);
+                }
+            }
         }
 
         private static void EmitBarrier(ILEmitterCtx context)
diff --git a/Memory/CompareExchange128.cs b/Memory/CompareExchange128.cs
new file mode 100644
index 0000000..0fbe10f
--- /dev/null
+++ b/Memory/CompareExchange128.cs
@@ -0,0 +1,151 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace ChocolArm64.Memory
+{
+    static class CompareExchange128
+    {
+        private struct Int128
+        {
+            public ulong Low  { get; }
+            public ulong High { get; }
+
+            public Int128(ulong low, ulong high)
+            {
+                Low  = low;
+                High = high;
+            }
+        }
+
+        private delegate Int128 InterlockedCompareExchange(IntPtr address, Int128 expected, Int128 desired);
+
+        private delegate int GetCpuId();
+
+        private static InterlockedCompareExchange _interlockedCompareExchange;
+
+        static CompareExchange128()
+        {
+            if (RuntimeInformation.OSArchitecture != Architecture.X64 || !IsCmpxchg16bSupported())
+            {
+                throw new PlatformNotSupportedException();
+            }
+
+            byte[] interlockedCompareExchange128Code;
+
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                interlockedCompareExchange128Code = new byte[]
+                {
+                    0x53,                         // push rbx
+                    0x49, 0x8b, 0x00,             // mov  rax, [r8]
+                    0x49, 0x8b, 0x19,             // mov  rbx, [r9]
+                    0x49, 0x89, 0xca,             // mov  r10, rcx
+                    0x49, 0x89, 0xd3,             // mov  r11, rdx
+                    0x49, 0x8b, 0x49, 0x08,       // mov  rcx, [r9+8]
+                    0x49, 0x8b, 0x50, 0x08,       // mov  rdx, [r8+8]
+                    0xf0, 0x49, 0x0f, 0xc7, 0x0b, // lock cmpxchg16b [r11]
+                    0x49, 0x89, 0x02,             // mov  [r10], rax
+                    0x4c, 0x89, 0xd0,             // mov  rax, r10
+                    0x49, 0x89, 0x52, 0x08,       // mov  [r10+8], rdx
+                    0x5b,                         // pop  rbx
+                    0xc3                          // ret
+                };
+            }
+            else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+                     RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+            {
+                interlockedCompareExchange128Code = new byte[]
+                {
+                    0x53,                         // push rbx
+                    0x49, 0x89, 0xd1,             // mov  r9, rdx
+                    0x48, 0x89, 0xcb,             // mov  rbx, rcx
+                    0x48, 0x89, 0xf0,             // mov  rax, rsi
+                    0x4c, 0x89, 0xca,             // mov  rdx, r9
+                    0x4c, 0x89, 0xc1,             // mov  rcx, r8
+                    0xf0, 0x48, 0x0f, 0xc7, 0x0f, // lock cmpxchg16b [rdi]
+                    0x5b,                         // pop  rbx
+                    0xc3                          // ret
+                };
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+
+            IntPtr funcPtr = MapCodeAsExecutable(interlockedCompareExchange128Code);
+
+            _interlockedCompareExchange = Marshal.GetDelegateForFunctionPointer<InterlockedCompareExchange>(funcPtr);
+        }
+
+        private static bool IsCmpxchg16bSupported()
+        {
+            byte[] getCpuIdCode = new byte[]
+            {
+                0x53,                         // push rbx
+                0xb8, 0x01, 0x00, 0x00, 0x00, // mov eax, 0x1
+                0x0f, 0xa2,                   // cpuid
+                0x89, 0xc8,                   // mov eax, ecx
+                0x5b,                         // pop rbx
+                0xc3                          // ret
+            };
+
+            IntPtr funcPtr = MapCodeAsExecutable(getCpuIdCode);
+
+            GetCpuId getCpuId = Marshal.GetDelegateForFunctionPointer<GetCpuId>(funcPtr);
+
+            int cpuId = getCpuId();
+
+            MemoryAlloc.Free(funcPtr);
+
+            return (cpuId & (1 << 13)) != 0;
+        }
+
+        private static IntPtr MapCodeAsExecutable(byte[] code)
+        {
+            ulong codeLength = (ulong)code.Length;
+
+            IntPtr funcPtr = MemoryAlloc.Allocate(codeLength);
+
+            unsafe
+            {
+                fixed (byte* codePtr = code)
+                {
+                    byte* dest = (byte*)funcPtr;
+
+                    long size = (long)codeLength;
+
+                    Buffer.MemoryCopy(codePtr, dest, size, size);
+                }
+            }
+
+            MemoryAlloc.Reprotect(funcPtr, codeLength, MemoryProtection.Execute);
+
+            return funcPtr;
+        }
+
+        public static bool InterlockedCompareExchange128(
+            IntPtr address,
+            ulong  expectedLow,
+            ulong  expectedHigh,
+            ulong  desiredLow,
+            ulong  desiredHigh)
+        {
+            Int128 expected = new Int128(expectedLow, expectedHigh);
+            Int128 desired  = new Int128(desiredLow,  desiredHigh);
+
+            Int128 old = _interlockedCompareExchange(address, expected, desired);
+
+            return old.Low == expected.Low && old.High == expected.High;
+        }
+
+        public static void InterlockedRead128(IntPtr address, out ulong low, out ulong high)
+        {
+            Int128 zero = new Int128(0, 0);
+
+            Int128 old = _interlockedCompareExchange(address, zero, zero);
+
+            low  = old.Low;
+            high = old.High;
+        }
+    }
+}
\ No newline at end of file
diff --git a/Memory/MemoryAlloc.cs b/Memory/MemoryAlloc.cs
new file mode 100644
index 0000000..a24299c
--- /dev/null
+++ b/Memory/MemoryAlloc.cs
@@ -0,0 +1,114 @@
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace ChocolArm64.Memory
+{
+    public static class MemoryAlloc
+    {
+        public static bool HasWriteWatchSupport => RuntimeInformation.IsOSPlatform(OSPlatform.Windows);
+
+        public static IntPtr Allocate(ulong size)
+        {
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                IntPtr sizeNint = new IntPtr((long)size);
+
+                return MemoryAllocWindows.Allocate(sizeNint);
+            }
+            else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+                     RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+            {
+                return MemoryAllocUnix.Allocate(size);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public static IntPtr AllocateWriteTracked(ulong size)
+        {
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                IntPtr sizeNint = new IntPtr((long)size);
+
+                return MemoryAllocWindows.AllocateWriteTracked(sizeNint);
+            }
+            else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+                     RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+            {
+                return MemoryAllocUnix.Allocate(size);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public static void Reprotect(IntPtr address, ulong size, MemoryProtection permission)
+        {
+            bool result;
+
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                IntPtr sizeNint = new IntPtr((long)size);
+
+                result = MemoryAllocWindows.Reprotect(address, sizeNint, permission);
+            }
+            else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+                     RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+            {
+                result = MemoryAllocUnix.Reprotect(address, size, permission);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+
+            if (!result)
+            {
+                throw new MemoryProtectionException(permission);
+            }
+        }
+
+        public static bool Free(IntPtr address)
+        {
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                return MemoryAllocWindows.Free(address);
+            }
+            else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+                     RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+            {
+                return MemoryAllocUnix.Free(address);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool GetModifiedPages(
+            IntPtr    address,
+            IntPtr    size,
+            IntPtr[]  addresses,
+            out ulong count)
+        {
+            //This is only supported on windows, but returning
+            //false (failed) is also valid for platforms without
+            //write tracking support on the OS.
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                return MemoryAllocWindows.GetModifiedPages(address, size, addresses, out count);
+            }
+            else
+            {
+                count = 0;
+
+                return false;
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/Memory/MemoryAllocUnix.cs b/Memory/MemoryAllocUnix.cs
new file mode 100644
index 0000000..857c1c5
--- /dev/null
+++ b/Memory/MemoryAllocUnix.cs
@@ -0,0 +1,70 @@
+using Mono.Unix.Native;
+using System;
+
+namespace ChocolArm64.Memory
+{
+    static class MemoryAllocUnix
+    {
+        public static IntPtr Allocate(ulong size)
+        {
+            ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
+
+            const MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE;
+
+            const MmapFlags flags = MmapFlags.MAP_PRIVATE | MmapFlags.MAP_ANONYMOUS;
+
+            IntPtr ptr = Syscall.mmap(IntPtr.Zero, size + pageSize, prot, flags, -1, 0);
+
+            if (ptr == IntPtr.Zero)
+            {
+                throw new OutOfMemoryException();
+            }
+
+            unsafe
+            {
+                ptr = new IntPtr(ptr.ToInt64() + (long)pageSize);
+
+                *((ulong*)ptr - 1) = size;
+            }
+
+            return ptr;
+        }
+
+        public static bool Reprotect(IntPtr address, ulong size, Memory.MemoryProtection protection)
+        {
+            MmapProts prot = GetProtection(protection);
+
+            return Syscall.mprotect(address, size, prot) == 0;
+        }
+
+        private static MmapProts GetProtection(Memory.MemoryProtection protection)
+        {
+            switch (protection)
+            {
+                case Memory.MemoryProtection.None:           return MmapProts.PROT_NONE;
+                case Memory.MemoryProtection.Read:           return MmapProts.PROT_READ;
+                case Memory.MemoryProtection.ReadAndWrite:   return MmapProts.PROT_READ | MmapProts.PROT_WRITE;
+                case Memory.MemoryProtection.ReadAndExecute: return MmapProts.PROT_READ | MmapProts.PROT_EXEC;
+                case Memory.MemoryProtection.Execute:        return MmapProts.PROT_EXEC;
+
+                default: throw new ArgumentException($"Invalid permission \"{protection}\".");
+            }
+        }
+
+        public static bool Free(IntPtr address)
+        {
+            ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
+
+            ulong size;
+
+            unsafe
+            {
+                size = *((ulong*)address - 1);
+
+                address = new IntPtr(address.ToInt64() - (long)pageSize);
+            }
+
+            return Syscall.munmap(address, size + pageSize) == 0;
+        }
+    }
+}
\ No newline at end of file
diff --git a/Memory/MemoryAllocWindows.cs b/Memory/MemoryAllocWindows.cs
new file mode 100644
index 0000000..82be8b1
--- /dev/null
+++ b/Memory/MemoryAllocWindows.cs
@@ -0,0 +1,155 @@
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace ChocolArm64.Memory
+{
+    static class MemoryAllocWindows
+    {
+        [Flags]
+        private enum AllocationType : uint
+        {
+            Commit     = 0x1000,
+            Reserve    = 0x2000,
+            Decommit   = 0x4000,
+            Release    = 0x8000,
+            Reset      = 0x80000,
+            Physical   = 0x400000,
+            TopDown    = 0x100000,
+            WriteWatch = 0x200000,
+            LargePages = 0x20000000
+        }
+
+        [Flags]
+        private enum MemoryProtection
+        {
+            NoAccess                 = 0x01,
+            ReadOnly                 = 0x02,
+            ReadWrite                = 0x04,
+            WriteCopy                = 0x08,
+            Execute                  = 0x10,
+            ExecuteRead              = 0x20,
+            ExecuteReadWrite         = 0x40,
+            ExecuteWriteCopy         = 0x80,
+            GuardModifierflag        = 0x100,
+            NoCacheModifierflag      = 0x200,
+            WriteCombineModifierflag = 0x400
+        }
+
+        private enum WriteWatchFlags : uint
+        {
+            None  = 0,
+            Reset = 1
+        }
+
+        [DllImport("kernel32.dll")]
+        private static extern IntPtr VirtualAlloc(
+            IntPtr           lpAddress,
+            IntPtr           dwSize,
+            AllocationType   flAllocationType,
+            MemoryProtection flProtect);
+
+        [DllImport("kernel32.dll")]
+        private static extern bool VirtualProtect(
+            IntPtr               lpAddress,
+            IntPtr               dwSize,
+            MemoryProtection     flNewProtect,
+            out MemoryProtection lpflOldProtect);
+
+        [DllImport("kernel32.dll")]
+        private static extern bool VirtualFree(
+            IntPtr         lpAddress,
+            uint           dwSize,
+            AllocationType dwFreeType);
+
+        [DllImport("kernel32.dll")]
+        private static extern int GetWriteWatch(
+            WriteWatchFlags dwFlags,
+            IntPtr          lpBaseAddress,
+            IntPtr          dwRegionSize,
+            IntPtr[]        lpAddresses,
+            ref ulong       lpdwCount,
+            out uint        lpdwGranularity);
+
+        public static IntPtr Allocate(IntPtr size)
+        {
+            const AllocationType flags =
+                AllocationType.Reserve |
+                AllocationType.Commit;
+
+            IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
+
+            if (ptr == IntPtr.Zero)
+            {
+                throw new OutOfMemoryException();
+            }
+
+            return ptr;
+        }
+
+        public static IntPtr AllocateWriteTracked(IntPtr size)
+        {
+            const AllocationType flags =
+                AllocationType.Reserve |
+                AllocationType.Commit  |
+                AllocationType.WriteWatch;
+
+            IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
+
+            if (ptr == IntPtr.Zero)
+            {
+                throw new OutOfMemoryException();
+            }
+
+            return ptr;
+        }
+
+        public static bool Reprotect(IntPtr address, IntPtr size, Memory.MemoryProtection protection)
+        {
+            MemoryProtection prot = GetProtection(protection);
+
+            return VirtualProtect(address, size, prot, out _);
+        }
+
+        private static MemoryProtection GetProtection(Memory.MemoryProtection protection)
+        {
+            switch (protection)
+            {
+                case Memory.MemoryProtection.None:           return MemoryProtection.NoAccess;
+                case Memory.MemoryProtection.Read:           return MemoryProtection.ReadOnly;
+                case Memory.MemoryProtection.ReadAndWrite:   return MemoryProtection.ReadWrite;
+                case Memory.MemoryProtection.ReadAndExecute: return MemoryProtection.ExecuteRead;
+                case Memory.MemoryProtection.Execute:        return MemoryProtection.Execute;
+
+                default: throw new ArgumentException($"Invalid permission \"{protection}\".");
+            }
+        }
+
+        public static bool Free(IntPtr address)
+        {
+            return VirtualFree(address, 0, AllocationType.Release);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool GetModifiedPages(
+            IntPtr    address,
+            IntPtr    size,
+            IntPtr[]  addresses,
+            out ulong count)
+        {
+            ulong pagesCount = (ulong)addresses.Length;
+
+            int result = GetWriteWatch(
+                WriteWatchFlags.Reset,
+                address,
+                size,
+                addresses,
+                ref pagesCount,
+                out uint granularity);
+
+            count = pagesCount;
+
+            return result == 0;
+        }
+    }
+}
\ No newline at end of file
diff --git a/Memory/MemoryManager.cs b/Memory/MemoryManager.cs
index 1f21256..afb0f65 100644
--- a/Memory/MemoryManager.cs
+++ b/Memory/MemoryManager.cs
@@ -1,16 +1,16 @@
 using ChocolArm64.Events;
 using ChocolArm64.Exceptions;
 using ChocolArm64.Instructions;
-using ChocolArm64.State;
 using System;
 using System.Collections.Concurrent;
-using System.Collections.Generic;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
 using System.Threading;
 
+using static ChocolArm64.Memory.CompareExchange128;
+
 namespace ChocolArm64.Memory
 {
     public unsafe class MemoryManager : IMemory, IDisposable
@@ -30,21 +30,6 @@ namespace ChocolArm64.Memory
         private const int PtLvl0Bit = PageBits + PtLvl1Bits;
         private const int PtLvl1Bit = PageBits;
 
-        private const long ErgMask = (4 << CpuThreadState.ErgSizeLog2) - 1;
-
-        private class ArmMonitor
-        {
-            public long Position;
-            public bool ExState;
-
-            public bool HasExclusiveAccess(long position)
-            {
-                return Position == position && ExState;
-            }
-        }
-
-        private Dictionary<int, ArmMonitor> _monitors;
-
         private ConcurrentDictionary<long, IntPtr> _observedPages;
 
         public IntPtr Ram { get; private set; }
@@ -59,8 +44,6 @@ namespace ChocolArm64.Memory
 
         public MemoryManager(IntPtr ram)
         {
-            _monitors = new Dictionary<int, ArmMonitor>();
-
             _observedPages = new ConcurrentDictionary<long, IntPtr>();
 
             Ram = ram;
@@ -75,104 +58,139 @@ namespace ChocolArm64.Memory
             }
         }
 
-        public void RemoveMonitor(int core)
+        internal bool AtomicCompareExchange2xInt32(
+            long position,
+            int  expectedLow,
+            int  expectedHigh,
+            int  desiredLow,
+            int  desiredHigh)
         {
-            lock (_monitors)
-            {
-                ClearExclusive(core);
+            long expected = (uint)expectedLow;
+            long desired  = (uint)desiredLow;
 
-                _monitors.Remove(core);
-            }
+            expected |= (long)expectedHigh << 32;
+            desired  |= (long)desiredHigh  << 32;
+
+            return AtomicCompareExchangeInt64(position, expected, desired);
         }
 
-        public void SetExclusive(int core, long position)
+        internal bool AtomicCompareExchangeInt128(
+            long  position,
+            ulong expectedLow,
+            ulong expectedHigh,
+            ulong desiredLow,
+            ulong desiredHigh)
         {
-            position &= ~ErgMask;
-
-            lock (_monitors)
+            if ((position & 0xf) != 0)
             {
-                foreach (ArmMonitor mon in _monitors.Values)
-                {
-                    if (mon.Position == position && mon.ExState)
-                    {
-                        mon.ExState = false;
-                    }
-                }
-
-                if (!_monitors.TryGetValue(core, out ArmMonitor threadMon))
-                {
-                    threadMon = new ArmMonitor();
-
-                    _monitors.Add(core, threadMon);
-                }
-
-                threadMon.Position = position;
-                threadMon.ExState  = true;
+                AbortWithAlignmentFault(position);
             }
+
+            IntPtr ptr = new IntPtr(TranslateWrite(position));
+
+            return InterlockedCompareExchange128(ptr, expectedLow, expectedHigh, desiredLow, desiredHigh);
         }
 
-        public bool TestExclusive(int core, long position)
+        internal Vector128<float> AtomicReadInt128(long position)
         {
-            //Note: Any call to this method also should be followed by a
-            //call to ClearExclusiveForStore if this method returns true.
-            position &= ~ErgMask;
-
-            Monitor.Enter(_monitors);
-
-            if (!_monitors.TryGetValue(core, out ArmMonitor threadMon))
+            if ((position & 0xf) != 0)
             {
-                Monitor.Exit(_monitors);
-
-                return false;
+                AbortWithAlignmentFault(position);
             }
 
-            bool exState = threadMon.HasExclusiveAccess(position);
+            IntPtr ptr = new IntPtr(Translate(position));
 
-            if (!exState)
-            {
-                Monitor.Exit(_monitors);
-            }
+            InterlockedRead128(ptr, out ulong low, out ulong high);
 
-            return exState;
+            Vector128<float> vector = default(Vector128<float>);
+
+            vector = VectorHelper.VectorInsertInt(low,  vector, 0, 3);
+            vector = VectorHelper.VectorInsertInt(high, vector, 1, 3);
+
+            return vector;
         }
 
-        public void ClearExclusiveForStore(int core)
+        public bool AtomicCompareExchangeByte(long position, byte expected, byte desired)
         {
-            if (_monitors.TryGetValue(core, out ArmMonitor threadMon))
-            {
-                threadMon.ExState = false;
-            }
+            int* ptr = (int*)Translate(position);
 
-            Monitor.Exit(_monitors);
+            int currentValue = *ptr;
+
+            int expected32 = (currentValue & ~byte.MaxValue) | expected;
+            int desired32  = (currentValue & ~byte.MaxValue) | desired;
+
+            return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
         }
 
-        public void ClearExclusive(int core)
+        public bool AtomicCompareExchangeInt16(long position, short expected, short desired)
         {
-            lock (_monitors)
+            if ((position & 1) != 0)
             {
-                if (_monitors.TryGetValue(core, out ArmMonitor threadMon))
-                {
-                    threadMon.ExState = false;
-                }
+                AbortWithAlignmentFault(position);
             }
+
+            int* ptr = (int*)Translate(position);
+
+            int currentValue = *ptr;
+
+            int expected32 = (currentValue & ~ushort.MaxValue) | (ushort)expected;
+            int desired32  = (currentValue & ~ushort.MaxValue) | (ushort)desired;
+
+            return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
         }
 
-        public void WriteInt32ToSharedAddr(long position, int value)
+        public bool AtomicCompareExchangeInt32(long position, int expected, int desired)
         {
-            long maskedPosition = position & ~ErgMask;
-
-            lock (_monitors)
+            if ((position & 3) != 0)
             {
-                foreach (ArmMonitor mon in _monitors.Values)
-                {
-                    if (mon.Position == maskedPosition && mon.ExState)
-                    {
-                        mon.ExState = false;
-                    }
-                }
-
-                WriteInt32(position, value);
+                AbortWithAlignmentFault(position);
             }
+
+            int* ptr = (int*)TranslateWrite(position);
+
+            return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
+        }
+
+        public bool AtomicCompareExchangeInt64(long position, long expected, long desired)
+        {
+            if ((position & 7) != 0)
+            {
+                AbortWithAlignmentFault(position);
+            }
+
+            long* ptr = (long*)TranslateWrite(position);
+
+            return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
+        }
+
+        public int AtomicIncrementInt32(long position)
+        {
+            if ((position & 3) != 0)
+            {
+                AbortWithAlignmentFault(position);
+            }
+
+            int* ptr = (int*)TranslateWrite(position);
+
+            return Interlocked.Increment(ref *ptr);
+        }
+
+        public int AtomicDecrementInt32(long position)
+        {
+            if ((position & 3) != 0)
+            {
+                AbortWithAlignmentFault(position);
+            }
+
+            int* ptr = (int*)TranslateWrite(position);
+
+            return Interlocked.Decrement(ref *ptr);
+        }
+
+        private void AbortWithAlignmentFault(long position)
+        {
+            //TODO: Abort mode and exception support on the CPU.
+            throw new InvalidOperationException($"Tried to compare exchange a misaligned address 0x{position:X16}.");
         }
 
         public sbyte ReadSByte(long position)
diff --git a/Memory/MemoryProtection.cs b/Memory/MemoryProtection.cs
new file mode 100644
index 0000000..d0874bf
--- /dev/null
+++ b/Memory/MemoryProtection.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace ChocolArm64.Memory
+{
+    [Flags]
+    public enum MemoryProtection
+    {
+        None    = 0,
+        Read    = 1 << 0,
+        Write   = 1 << 1,
+        Execute = 1 << 2,
+
+        ReadAndWrite   = Read | Write,
+        ReadAndExecute = Read | Execute
+    }
+}
\ No newline at end of file
diff --git a/Memory/MemoryProtectionException.cs b/Memory/MemoryProtectionException.cs
new file mode 100644
index 0000000..3d2ceba
--- /dev/null
+++ b/Memory/MemoryProtectionException.cs
@@ -0,0 +1,10 @@
+using System;
+
+namespace ChocolArm64.Memory
+{
+    class MemoryProtectionException : Exception
+    {
+        public MemoryProtectionException(MemoryProtection protection) :
+            base($"Failed to set memory protection to \"{protection}\".") { }
+    }
+}
\ No newline at end of file
diff --git a/State/CpuThreadState.cs b/State/CpuThreadState.cs
index abec60b..caf73de 100644
--- a/State/CpuThreadState.cs
+++ b/State/CpuThreadState.cs
@@ -37,7 +37,6 @@ namespace ChocolArm64.State
         public int ElrHyp;
 
         public bool Running { get; set; }
-        public int  Core    { get; set; }
 
         private bool _interrupted;
 
@@ -85,6 +84,16 @@ namespace ChocolArm64.State
 
         internal Translator CurrentTranslator;
 
+        private ulong _exclusiveAddress;
+
+        internal ulong ExclusiveValueLow  { get; set; }
+        internal ulong ExclusiveValueHigh { get; set; }
+
+        public CpuThreadState()
+        {
+            ClearExclusiveAddress();
+        }
+
         static CpuThreadState()
         {
             _hostTickFreq = 1.0 / Stopwatch.Frequency;
@@ -94,6 +103,26 @@ namespace ChocolArm64.State
             _tickCounter.Start();
         }
 
+        internal void SetExclusiveAddress(ulong address)
+        {
+            _exclusiveAddress = GetMaskedExclusiveAddress(address);
+        }
+
+        internal bool CheckExclusiveAddress(ulong address)
+        {
+            return GetMaskedExclusiveAddress(address) == _exclusiveAddress;
+        }
+
+        internal void ClearExclusiveAddress()
+        {
+            _exclusiveAddress = ulong.MaxValue;
+        }
+
+        private ulong GetMaskedExclusiveAddress(ulong address)
+        {
+            return address & ~((4UL << ErgSizeLog2) - 1);
+        }
+
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal bool Synchronize(int bbWeight)
         {
diff --git a/Translation/ILEmitterCtx.cs b/Translation/ILEmitterCtx.cs
index fa65bbf..5490123 100644
--- a/Translation/ILEmitterCtx.cs
+++ b/Translation/ILEmitterCtx.cs
@@ -49,6 +49,7 @@ namespace ChocolArm64.Translation
         private const int CmpOptTmp2Index = -4;
         private const int VecTmp1Index    = -5;
         private const int VecTmp2Index    = -6;
+        private const int IntTmp2Index    = -7;
 
         public ILEmitterCtx(TranslatorCache cache, TranslatorQueue queue, TranslationTier tier, Block graph)
         {
@@ -562,6 +563,9 @@ namespace ChocolArm64.Translation
         public void EmitLdtmp() => EmitLdint(IntTmpIndex);
         public void EmitSttmp() => EmitStint(IntTmpIndex);
 
+        public void EmitLdtmp2() => EmitLdint(IntTmp2Index);
+        public void EmitSttmp2() => EmitStint(IntTmp2Index);
+
         public void EmitLdvectmp() => EmitLdvec(VecTmp1Index);
         public void EmitStvectmp() => EmitStvec(VecTmp1Index);
 
@@ -635,6 +639,36 @@ namespace ChocolArm64.Translation
             EmitCall(objType.GetMethod($"set_{propName}"));
         }
 
+        public void EmitCallPrivatePropGet(Type objType, string propName)
+        {
+            if (objType == null)
+            {
+                throw new ArgumentNullException(nameof(objType));
+            }
+
+            if (propName == null)
+            {
+                throw new ArgumentNullException(nameof(propName));
+            }
+
+            EmitPrivateCall(objType, $"get_{propName}");
+        }
+
+        public void EmitCallPrivatePropSet(Type objType, string propName)
+        {
+            if (objType == null)
+            {
+                throw new ArgumentNullException(nameof(objType));
+            }
+
+            if (propName == null)
+            {
+                throw new ArgumentNullException(nameof(propName));
+            }
+
+            EmitPrivateCall(objType, $"set_{propName}");
+        }
+
         public void EmitCall(Type objType, string mthdName)
         {
             if (objType == null)