mirror of
https://github.com/Ryujinx/ChocolArm64.git
synced 2024-12-22 14:05:39 +00:00
Add Rshrn_V & Shrn_V Sse opt.. Add Mla_V, Mls_V & Mul_V Sse opt.; add Tests. (#614)
* Update CountLeadingZeros(). * Remove obsolete Tests. * Follow-up. * Follow-up. * Follow-up. * Add Mla_V, Mls_V & Mul_V Tests. * Update PackageReferences. * Remove EmitLd/Stvectmp2(). * Remove Dup. Nits. * Remove EmitLd/Stvectmp2() & Dup; nits. * Remove Tmp stuff & Dup; rework Fcvtz() as Fcvtn(). * Remove Tmp stuff, EmitLd/Stvectmp2() & Dup. Nits. * Add (R)shrn_V Sse opt.; add "Part" & "Shift" opt.. Remove Tmp stuff; remove Dup. Nits. * Add Mla/Mls/Mul_V Sse opt.. Add "Part" opt.. Remove EmitLd/Stvectmp2(), remove Dup. Nits. * Nits. * Nits. * Nit. * Add "Part" opt.. Nit. * Nit. * Nit. * Add Cmhi_V & Cmhs_V Sse opt..
This commit is contained in:
parent
8d5a48ba0a
commit
8c08547a9f
|
@ -1,4 +1,5 @@
|
|||
// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
|
||||
// https://www.agner.org/optimize/#vectorclass @ vectori128.h
|
||||
|
||||
using ChocolArm64.Decoders;
|
||||
using ChocolArm64.State;
|
||||
|
@ -184,8 +185,8 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Type[] typesSsv = new Type[] { typeof(float) };
|
||||
Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
Type[] typesSsv = new Type[] { typeof(float) };
|
||||
Type[] typesSubAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdc_R4(-0f);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
|
||||
|
@ -193,8 +194,8 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAnt));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -202,8 +203,8 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
Type[] typesSsv = new Type[] { typeof(double) };
|
||||
Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
Type[] typesSsv = new Type[] { typeof(double) };
|
||||
Type[] typesSubAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdc_R8(-0d);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
|
||||
|
@ -211,8 +212,8 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAnt));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -240,8 +241,8 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Type[] typesSav = new Type[] { typeof(float) };
|
||||
Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
Type[] typesSav = new Type[] { typeof(float) };
|
||||
Type[] typesSubAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdc_R4(-0f);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
|
||||
|
@ -249,8 +250,8 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAnt));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -261,8 +262,8 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
Type[] typesSav = new Type[] { typeof(double) };
|
||||
Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
Type[] typesSav = new Type[] { typeof(double) };
|
||||
Type[] typesSubAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdc_R8(-0d);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
@ -270,8 +271,8 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAnt));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
|
@ -295,15 +296,15 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
if (op.Size == 0)
|
||||
{
|
||||
Type[] typesSsv = new Type[] { typeof(float) };
|
||||
Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
Type[] typesSsv = new Type[] { typeof(float) };
|
||||
Type[] typesAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdc_R4(-0f);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -311,15 +312,15 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
else /* if (op.Size == 1) */
|
||||
{
|
||||
Type[] typesSsv = new Type[] { typeof(double) };
|
||||
Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
Type[] typesSsv = new Type[] { typeof(double) };
|
||||
Type[] typesAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdc_R8(-0d);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -345,15 +346,15 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Type[] typesSav = new Type[] { typeof(float) };
|
||||
Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
Type[] typesSav = new Type[] { typeof(float) };
|
||||
Type[] typesAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdc_R4(-0f);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -364,15 +365,15 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
Type[] typesSav = new Type[] { typeof(double) };
|
||||
Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
Type[] typesSav = new Type[] { typeof(double) };
|
||||
Type[] typesAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdc_R8(-0d);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
|
@ -429,7 +430,7 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesAddH = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
|
||||
|
||||
|
@ -442,7 +443,7 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesAddH = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
|
||||
|
||||
|
@ -748,11 +749,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
|
||||
|
||||
|
@ -770,11 +773,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
|
||||
|
||||
|
@ -863,11 +868,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub));
|
||||
|
||||
|
@ -885,11 +892,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
|
||||
|
||||
|
@ -1000,11 +1009,13 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesMul = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
@ -1020,11 +1031,13 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesMul = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdc_I4(op.Index | op.Index << 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
@ -1772,11 +1785,18 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Mla_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorTernaryOpZx(context, () =>
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
context.Emit(OpCodes.Mul);
|
||||
context.Emit(OpCodes.Add);
|
||||
});
|
||||
EmitSse41Mul_AddSub(context, nameof(Sse2.Add));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorTernaryOpZx(context, () =>
|
||||
{
|
||||
context.Emit(OpCodes.Mul);
|
||||
context.Emit(OpCodes.Add);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Mla_Ve(ILEmitterCtx context)
|
||||
|
@ -1790,11 +1810,18 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Mls_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorTernaryOpZx(context, () =>
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
context.Emit(OpCodes.Mul);
|
||||
context.Emit(OpCodes.Sub);
|
||||
});
|
||||
EmitSse41Mul_AddSub(context, nameof(Sse2.Subtract));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorTernaryOpZx(context, () =>
|
||||
{
|
||||
context.Emit(OpCodes.Mul);
|
||||
context.Emit(OpCodes.Sub);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Mls_Ve(ILEmitterCtx context)
|
||||
|
@ -1808,7 +1835,14 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Mul_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse41Mul_AddSub(context);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Mul_Ve(ILEmitterCtx context)
|
||||
|
@ -1923,19 +1957,23 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -1969,13 +2007,14 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -1999,25 +2038,19 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp2();
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
|
||||
|
||||
context.EmitLdc_I4(1);
|
||||
context.Emit(OpCodes.Ldc_I4_1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -2185,20 +2218,24 @@ namespace ChocolArm64.Instructions
|
|||
? nameof(Sse41.ConvertToVector128Int16)
|
||||
: nameof(Sse41.ConvertToVector128Int32);
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
|
@ -2244,20 +2281,24 @@ namespace ChocolArm64.Instructions
|
|||
? nameof(Sse41.ConvertToVector128Int16)
|
||||
: nameof(Sse41.ConvertToVector128Int32);
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
|
@ -2441,19 +2482,23 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -2482,13 +2527,14 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -2594,19 +2640,23 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -2659,13 +2709,14 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -2689,25 +2740,19 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp2();
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
|
||||
|
||||
context.EmitLdc_I4(1);
|
||||
context.Emit(OpCodes.Ldc_I4_1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -2737,8 +2782,7 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub));
|
||||
|
@ -2862,20 +2906,24 @@ namespace ChocolArm64.Instructions
|
|||
? nameof(Sse41.ConvertToVector128Int16)
|
||||
: nameof(Sse41.ConvertToVector128Int32);
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
|
@ -2921,20 +2969,24 @@ namespace ChocolArm64.Instructions
|
|||
? nameof(Sse41.ConvertToVector128Int16)
|
||||
: nameof(Sse41.ConvertToVector128Int32);
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
|
||||
|
||||
|
@ -3063,19 +3115,23 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -3104,13 +3160,14 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -3253,5 +3310,77 @@ namespace ChocolArm64.Instructions
|
|||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitSse41Mul_AddSub(ILEmitterCtx context, string nameAddSub = null)
|
||||
{
|
||||
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
||||
|
||||
if (nameAddSub != null)
|
||||
{
|
||||
context.EmitLdvec(op.Rd);
|
||||
}
|
||||
|
||||
if (op.Size == 0)
|
||||
{
|
||||
Type[] typesBle = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
|
||||
Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) };
|
||||
Type[] typesShs = new Type[] { typeof(Vector128<short>), typeof(byte) };
|
||||
Type[] typesSav = new Type[] { typeof(int) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
|
||||
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
|
||||
|
||||
context.EmitLdc_I4(0x00FF00FF);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.BlendVariable), typesBle));
|
||||
}
|
||||
else if (op.Size == 1)
|
||||
{
|
||||
Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
|
||||
}
|
||||
else /* if (op.Size == 2) */
|
||||
{
|
||||
Type[] typesMul = new Type[] { typeof(Vector128<int>), typeof(Vector128<int>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.MultiplyLow), typesMul));
|
||||
}
|
||||
|
||||
if (nameAddSub != null)
|
||||
{
|
||||
Type[] typesAddSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameAddSub, typesAddSub));
|
||||
}
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -86,7 +86,42 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Cmhi_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitCmpOp(context, OpCodes.Bgt_Un_S, scalar: false);
|
||||
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
||||
|
||||
if (Optimizations.UseSse41 && op.Size < 3)
|
||||
{
|
||||
Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
|
||||
Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] };
|
||||
Type[] typesAnt = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
|
||||
Type[] typesSav = new Type[] { typeof(byte) };
|
||||
|
||||
Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmp));
|
||||
|
||||
context.EmitLdc_I4(byte.MaxValue);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitCmpOp(context, OpCodes.Bgt_Un_S, scalar: false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Cmhs_S(ILEmitterCtx context)
|
||||
|
@ -96,7 +131,35 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Cmhs_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitCmpOp(context, OpCodes.Bge_Un_S, scalar: false);
|
||||
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
||||
|
||||
if (Optimizations.UseSse41 && op.Size < 3)
|
||||
{
|
||||
Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
|
||||
Type[] typesCmp = new Type[] { VectorIntTypesPerSizeLog2 [op.Size], VectorIntTypesPerSizeLog2 [op.Size] };
|
||||
|
||||
Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqual), typesCmp));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitCmpOp(context, OpCodes.Bge_Un_S, scalar: false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Cmle_S(ILEmitterCtx context)
|
||||
|
@ -318,9 +381,6 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
if (cmpWithZero)
|
||||
{
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
|
@ -331,7 +391,7 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp2();
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrderedScalar), typesCmp));
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
|
@ -340,18 +400,18 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.Emit(OpCodes.Brtrue_S, lblNaN);
|
||||
|
||||
context.EmitLdc_I4(0);
|
||||
context.Emit(OpCodes.Ldc_I4_0);
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqualOrderedScalar), typesCmp));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareLessThanOrderedScalar), typesCmp));
|
||||
|
||||
context.EmitStflg((int)PState.NBit);
|
||||
|
@ -363,10 +423,10 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.MarkLabel(lblNaN);
|
||||
|
||||
context.EmitLdc_I4(1);
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitLdc_I4(0);
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.Emit(OpCodes.Ldc_I4_1);
|
||||
context.Emit(OpCodes.Ldc_I4_1);
|
||||
context.Emit(OpCodes.Ldc_I4_0);
|
||||
context.Emit(OpCodes.Ldc_I4_0);
|
||||
|
||||
context.EmitStflg((int)PState.NBit);
|
||||
context.EmitStflg((int)PState.ZBit);
|
||||
|
@ -384,9 +444,6 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
if (cmpWithZero)
|
||||
{
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
|
||||
|
@ -397,7 +454,7 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp2();
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrderedScalar), typesCmp));
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
|
||||
|
@ -406,18 +463,18 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.Emit(OpCodes.Brtrue_S, lblNaN);
|
||||
|
||||
context.EmitLdc_I4(0);
|
||||
context.Emit(OpCodes.Ldc_I4_0);
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqualOrderedScalar), typesCmp));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareLessThanOrderedScalar), typesCmp));
|
||||
|
||||
context.EmitStflg((int)PState.NBit);
|
||||
|
@ -429,10 +486,10 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.MarkLabel(lblNaN);
|
||||
|
||||
context.EmitLdc_I4(1);
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitLdc_I4(0);
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.Emit(OpCodes.Ldc_I4_1);
|
||||
context.Emit(OpCodes.Ldc_I4_1);
|
||||
context.Emit(OpCodes.Ldc_I4_0);
|
||||
context.Emit(OpCodes.Ldc_I4_0);
|
||||
|
||||
context.EmitStflg((int)PState.NBit);
|
||||
context.EmitStflg((int)PState.ZBit);
|
||||
|
|
|
@ -21,26 +21,24 @@ namespace ChocolArm64.Instructions
|
|||
if (op.Size == 1 && op.Opc == 0)
|
||||
{
|
||||
//Double -> Single.
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
Type[] typesCvt = new Type[] { typeof(Vector128<float>), typeof(Vector128<double>) };
|
||||
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Single), types));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Single), typesCvt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
else if (op.Size == 0 && op.Opc == 1)
|
||||
{
|
||||
//Single -> Double.
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
|
||||
Type[] typesCvt = new Type[] { typeof(Vector128<double>), typeof(Vector128<float>) };
|
||||
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), types));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), typesCvt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
|
@ -80,14 +78,14 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
Type[] typesCvt = new Type[] { typeof(Vector128<float>) };
|
||||
|
||||
string nameMov = op.RegisterSize == RegisterSize.Simd128
|
||||
? nameof(Sse.MoveHighToLow)
|
||||
: nameof(Sse.MoveLowToHigh);
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameMov));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow)));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt));
|
||||
|
||||
|
@ -249,12 +247,12 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Fcvtzs_S(ILEmitterCtx context)
|
||||
{
|
||||
EmitScalarFcvtzs(context);
|
||||
EmitFcvtz(context, signed: true, scalar: true);
|
||||
}
|
||||
|
||||
public static void Fcvtzs_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorFcvtzs(context);
|
||||
EmitFcvtz(context, signed: true, scalar: false);
|
||||
}
|
||||
|
||||
public static void Fcvtzu_Gp(ILEmitterCtx context)
|
||||
|
@ -269,12 +267,12 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Fcvtzu_S(ILEmitterCtx context)
|
||||
{
|
||||
EmitScalarFcvtzu(context);
|
||||
EmitFcvtz(context, signed: false, scalar: true);
|
||||
}
|
||||
|
||||
public static void Fcvtzu_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorFcvtzu(context);
|
||||
EmitFcvtz(context, signed: false, scalar: false);
|
||||
}
|
||||
|
||||
public static void Scvtf_Gp(ILEmitterCtx context)
|
||||
|
@ -415,11 +413,6 @@ namespace ChocolArm64.Instructions
|
|||
int bytes = op.GetBitsCount() >> 3;
|
||||
int elems = !scalar ? bytes >> sizeI : 1;
|
||||
|
||||
if (scalar && (sizeF == 0))
|
||||
{
|
||||
EmitVectorZeroLowerTmp(context);
|
||||
}
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
EmitVectorExtractF(context, op.Rn, index, sizeF);
|
||||
|
@ -441,13 +434,62 @@ namespace ChocolArm64.Instructions
|
|||
: nameof(VectorHelper.SatF64ToU64));
|
||||
}
|
||||
|
||||
EmitVectorInsertTmp(context, index, sizeI);
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroAll(context, op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, sizeI);
|
||||
}
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitStvec(op.Rd);
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
}
|
||||
|
||||
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
|
||||
private static void EmitFcvtz(ILEmitterCtx context, bool signed, bool scalar)
|
||||
{
|
||||
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
int sizeI = sizeF + 2;
|
||||
|
||||
int fBits = GetFBits(context);
|
||||
|
||||
int bytes = op.GetBitsCount() >> 3;
|
||||
int elems = !scalar ? bytes >> sizeI : 1;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
EmitVectorExtractF(context, op.Rn, index, sizeF);
|
||||
|
||||
EmitF2iFBitsMul(context, sizeF, fBits);
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
VectorHelper.EmitCall(context, signed
|
||||
? nameof(VectorHelper.SatF32ToS32)
|
||||
: nameof(VectorHelper.SatF32ToU32));
|
||||
|
||||
context.Emit(OpCodes.Conv_U8);
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
VectorHelper.EmitCall(context, signed
|
||||
? nameof(VectorHelper.SatF64ToS64)
|
||||
: nameof(VectorHelper.SatF64ToU64));
|
||||
}
|
||||
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroAll(context, op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, sizeI);
|
||||
}
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
|
@ -555,105 +597,6 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
private static void EmitScalarFcvtzs(ILEmitterCtx context)
|
||||
{
|
||||
EmitScalarFcvtz(context, true);
|
||||
}
|
||||
|
||||
private static void EmitScalarFcvtzu(ILEmitterCtx context)
|
||||
{
|
||||
EmitScalarFcvtz(context, false);
|
||||
}
|
||||
|
||||
private static void EmitScalarFcvtz(ILEmitterCtx context, bool signed)
|
||||
{
|
||||
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
int sizeI = sizeF + 2;
|
||||
|
||||
int fBits = GetFBits(context);
|
||||
|
||||
EmitVectorExtractF(context, op.Rn, 0, sizeF);
|
||||
|
||||
EmitF2iFBitsMul(context, sizeF, fBits);
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
VectorHelper.EmitCall(context, signed
|
||||
? nameof(VectorHelper.SatF32ToS32)
|
||||
: nameof(VectorHelper.SatF32ToU32));
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
VectorHelper.EmitCall(context, signed
|
||||
? nameof(VectorHelper.SatF64ToS64)
|
||||
: nameof(VectorHelper.SatF64ToU64));
|
||||
}
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
context.Emit(OpCodes.Conv_U8);
|
||||
}
|
||||
|
||||
EmitScalarSet(context, op.Rd, sizeI);
|
||||
}
|
||||
|
||||
private static void EmitVectorFcvtzs(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorFcvtz(context, true);
|
||||
}
|
||||
|
||||
private static void EmitVectorFcvtzu(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorFcvtz(context, false);
|
||||
}
|
||||
|
||||
private static void EmitVectorFcvtz(ILEmitterCtx context, bool signed)
|
||||
{
|
||||
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
int sizeI = sizeF + 2;
|
||||
|
||||
int fBits = GetFBits(context);
|
||||
|
||||
int bytes = op.GetBitsCount() >> 3;
|
||||
int elems = bytes >> sizeI;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
EmitVectorExtractF(context, op.Rn, index, sizeF);
|
||||
|
||||
EmitF2iFBitsMul(context, sizeF, fBits);
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
VectorHelper.EmitCall(context, signed
|
||||
? nameof(VectorHelper.SatF32ToS32)
|
||||
: nameof(VectorHelper.SatF32ToU32));
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
VectorHelper.EmitCall(context, signed
|
||||
? nameof(VectorHelper.SatF64ToS64)
|
||||
: nameof(VectorHelper.SatF64ToU64));
|
||||
}
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
context.Emit(OpCodes.Conv_U8);
|
||||
}
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, sizeI);
|
||||
}
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
}
|
||||
|
||||
private static int GetFBits(ILEmitterCtx context)
|
||||
{
|
||||
if (context.CurrOp is OpCodeSimdShImm64 op)
|
||||
|
|
|
@ -592,12 +592,9 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
emit();
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
|
@ -898,20 +895,13 @@ namespace ChocolArm64.Instructions
|
|||
Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp2();
|
||||
|
||||
context.EmitLdc_I4(2 << 6 | 0 << 4 | 2 << 2 | 0 << 0);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitLdc_I4(3 << 6 | 1 << 4 | 3 << 2 | 1 << 0);
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
|
||||
|
@ -926,19 +916,12 @@ namespace ChocolArm64.Instructions
|
|||
Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp2();
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), types));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvectmp2();
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types));
|
||||
|
||||
|
@ -985,11 +968,6 @@ namespace ChocolArm64.Instructions
|
|||
int bytes = op.GetBitsCount() >> 3;
|
||||
int elems = !scalar ? bytes >> op.Size : 1;
|
||||
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroLowerTmp(context);
|
||||
}
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
EmitVectorExtractSx(context, op.Rn, index, op.Size);
|
||||
|
@ -1005,13 +983,15 @@ namespace ChocolArm64.Instructions
|
|||
EmitUnarySignedSatQAbsOrNeg(context);
|
||||
}
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroAll(context, op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
|
@ -1052,11 +1032,6 @@ namespace ChocolArm64.Instructions
|
|||
int bytes = op.GetBitsCount() >> 3;
|
||||
int elems = !scalar ? bytes >> op.Size : 1;
|
||||
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroLowerTmp(context);
|
||||
}
|
||||
|
||||
if (add || sub)
|
||||
{
|
||||
for (int index = 0; index < elems; index++)
|
||||
|
@ -1082,7 +1057,12 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroAll(context, op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
}
|
||||
else if (accumulate)
|
||||
|
@ -1103,7 +1083,12 @@ namespace ChocolArm64.Instructions
|
|||
EmitBinarySatQAccumulate(context, signed);
|
||||
}
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroAll(context, op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1117,14 +1102,16 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
EmitSatQ(context, op.Size, true, signed);
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
if (scalar)
|
||||
{
|
||||
EmitVectorZeroAll(context, op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
}
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
}
|
||||
|
@ -1190,7 +1177,7 @@ namespace ChocolArm64.Instructions
|
|||
// TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
|
||||
public static void EmitSatQ(ILEmitterCtx context, int sizeDst, bool signedSrc, bool signedDst)
|
||||
{
|
||||
if ((uint)sizeDst > 2)
|
||||
if ((uint)sizeDst > 2u)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(sizeDst));
|
||||
}
|
||||
|
@ -1381,15 +1368,15 @@ namespace ChocolArm64.Instructions
|
|||
if (Optimizations.UseSse)
|
||||
{
|
||||
//TODO: Use Sse2.MoveScalar once it is fixed,
|
||||
//as of the time of writing it just crashes the JIT (SDK 2.1.503).
|
||||
//as of the time of writing it just crashes the JIT (SDK 2.1.504).
|
||||
|
||||
/*Type[] typesMov = new Type[] { typeof(Vector128<ulong>) };
|
||||
|
||||
EmitLdvecWithUnsignedCast(context, reg, 3);
|
||||
context.EmitLdvec(reg);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), typesMov));
|
||||
|
||||
EmitStvecWithUnsignedCast(context, reg, 3);*/
|
||||
context.EmitStvec(reg);*/
|
||||
|
||||
context.EmitLdvec(reg);
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
|
|
|
@ -30,12 +30,12 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
||||
|
||||
Type[] typesAndNot = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
|
||||
Type[] typesAnt = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -79,18 +79,18 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
Type[] typesXorAndNot = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
|
||||
Type[] typesXorAnd = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
|
||||
|
||||
string nameAndNot = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And);
|
||||
string nameAnd = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And);
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rd);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameAndNot, typesXorAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameAnd, typesXorAnd));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -120,7 +120,6 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
|
||||
context.Emit(OpCodes.And);
|
||||
|
||||
context.Emit(OpCodes.Xor);
|
||||
|
||||
EmitVectorInsert(context, op.Rd, index, 3);
|
||||
|
@ -142,8 +141,7 @@ namespace ChocolArm64.Instructions
|
|||
Type[] typesXorAnd = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.Emit(OpCodes.Dup);
|
||||
|
||||
context.EmitLdvec(op.Rm);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
|
||||
|
@ -151,7 +149,6 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rd);
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesXorAnd));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXorAnd));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
@ -196,15 +193,15 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
|
||||
|
||||
Type[] typesSav = new Type[] { typeof(byte) };
|
||||
Type[] typesAndNot = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
|
||||
Type[] typesSav = new Type[] { typeof(byte) };
|
||||
Type[] typesAnt = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(byte.MaxValue);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
@ -225,8 +222,8 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
|
||||
|
||||
Type[] typesSav = new Type[] { typeof(byte) };
|
||||
Type[] typesAndNotOr = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
|
||||
Type[] typesSav = new Type[] { typeof(byte) };
|
||||
Type[] typesAntOr = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
context.EmitLdvec(op.Rm);
|
||||
|
@ -234,8 +231,8 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdc_I4(byte.MaxValue);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNotOr));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAndNotOr));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAntOr));
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesAntOr));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ using ChocolArm64.State;
|
|||
using ChocolArm64.Translation;
|
||||
using System;
|
||||
using System.Reflection.Emit;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
|
||||
using static ChocolArm64.Instructions.InstEmitSimdHelper;
|
||||
|
@ -13,9 +14,65 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
static partial class InstEmit
|
||||
{
|
||||
#region "Masks"
|
||||
private static readonly long[] _masks_RshrnShrn = new long[]
|
||||
{
|
||||
14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
|
||||
13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
|
||||
11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
|
||||
};
|
||||
#endregion
|
||||
|
||||
public static void Rshrn_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorShrImmNarrowOpZx(context, round: true);
|
||||
if (Optimizations.UseSsse3)
|
||||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], VectorUIntTypesPerSizeLog2[op.Size + 1] };
|
||||
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
|
||||
Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
|
||||
Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size + 1] };
|
||||
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
|
||||
|
||||
string nameMov = op.RegisterSize == RegisterSize.Simd128
|
||||
? nameof(Sse.MoveLowToHigh)
|
||||
: nameof(Sse.MoveHighToLow);
|
||||
|
||||
int shift = GetImmShr(op);
|
||||
|
||||
long roundConst = 1L << (shift - 1);
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I8(roundConst);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value
|
||||
|
||||
context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask
|
||||
context.Emit(OpCodes.Dup); // mask
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
|
||||
|
||||
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameMov));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorShrImmNarrowOpZx(context, round: true);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Shl_S(ILEmitterCtx context)
|
||||
|
@ -80,12 +137,13 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
|
@ -102,7 +160,45 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
public static void Shrn_V(ILEmitterCtx context)
|
||||
{
|
||||
EmitVectorShrImmNarrowOpZx(context, round: false);
|
||||
if (Optimizations.UseSsse3)
|
||||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
|
||||
Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
|
||||
Type[] typesSve = new Type[] { typeof(long), typeof(long) };
|
||||
|
||||
string nameMov = op.RegisterSize == RegisterSize.Simd128
|
||||
? nameof(Sse.MoveLowToHigh)
|
||||
: nameof(Sse.MoveHighToLow);
|
||||
|
||||
int shift = GetImmShr(op);
|
||||
|
||||
context.EmitLdvec(op.Rd);
|
||||
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value
|
||||
|
||||
context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask
|
||||
context.Emit(OpCodes.Dup); // mask
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
|
||||
|
||||
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
|
||||
|
||||
context.EmitCall(typeof(Sse).GetMethod(nameMov));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorShrImmNarrowOpZx(context, round: false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Sli_V(ILEmitterCtx context)
|
||||
|
@ -271,8 +367,7 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
if (Optimizations.UseSse2 && op.Size > 0
|
||||
&& op.Size < 3)
|
||||
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
|
||||
{
|
||||
Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
||||
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
||||
|
@ -282,16 +377,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdc_I4(eSize - shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
|
||||
|
||||
context.EmitLdc_I4(eSize - 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
|
||||
|
@ -320,8 +412,7 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
if (Optimizations.UseSse2 && op.Size > 0
|
||||
&& op.Size < 3)
|
||||
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
|
||||
{
|
||||
Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
||||
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
||||
|
@ -332,16 +423,13 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdc_I4(eSize - shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
|
||||
|
||||
context.EmitLdc_I4(eSize - 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
|
||||
|
@ -403,17 +491,21 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
|
||||
if (shift != 0)
|
||||
{
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
|
||||
}
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
|
@ -432,8 +524,7 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
if (Optimizations.UseSse2 && op.Size > 0
|
||||
&& op.Size < 3)
|
||||
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
|
||||
{
|
||||
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
||||
|
||||
|
@ -464,8 +555,7 @@ namespace ChocolArm64.Instructions
|
|||
{
|
||||
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
|
||||
|
||||
if (Optimizations.UseSse2 && op.Size > 0
|
||||
&& op.Size < 3)
|
||||
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
|
||||
{
|
||||
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
|
||||
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
|
||||
|
@ -474,8 +564,8 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(GetImmShr(op));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
@ -612,16 +702,13 @@ namespace ChocolArm64.Instructions
|
|||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdc_I4(eSize - shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
|
||||
|
||||
context.EmitLdc_I4(eSize - 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
@ -661,16 +748,13 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rd);
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.Emit(OpCodes.Dup);
|
||||
context.EmitStvectmp();
|
||||
|
||||
context.EmitLdc_I4(eSize - shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
|
||||
|
||||
context.EmitLdc_I4(eSize - 1);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
|
||||
|
@ -732,17 +816,21 @@ namespace ChocolArm64.Instructions
|
|||
nameof(Sse41.ConvertToVector128Int32),
|
||||
nameof(Sse41.ConvertToVector128Int64) };
|
||||
|
||||
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
|
||||
|
||||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(numBytes);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
if (op.RegisterSize == RegisterSize.Simd128)
|
||||
{
|
||||
context.Emit(OpCodes.Ldc_I4_8);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
|
||||
}
|
||||
|
||||
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
|
||||
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
|
||||
if (shift != 0)
|
||||
{
|
||||
context.EmitLdc_I4(shift);
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
|
||||
}
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
}
|
||||
|
@ -801,8 +889,8 @@ namespace ChocolArm64.Instructions
|
|||
context.EmitLdvec(op.Rn);
|
||||
|
||||
context.EmitLdc_I4(GetImmShr(op));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
|
||||
|
||||
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
|
||||
|
||||
context.EmitStvec(op.Rd);
|
||||
|
@ -899,12 +987,9 @@ namespace ChocolArm64.Instructions
|
|||
context.Emit(OpCodes.Add);
|
||||
}
|
||||
|
||||
EmitVectorInsertTmp(context, index, op.Size);
|
||||
EmitVectorInsert(context, op.Rd, index, op.Size);
|
||||
}
|
||||
|
||||
context.EmitLdvectmp();
|
||||
context.EmitStvec(op.Rd);
|
||||
|
||||
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
|
||||
{
|
||||
EmitVectorZeroUpper(context, op.Rd);
|
||||
|
@ -1044,11 +1129,7 @@ namespace ChocolArm64.Instructions
|
|||
}
|
||||
|
||||
// dst64 = (Int(src64, signed) + roundConst) >> shift;
|
||||
private static void EmitShrImm64(
|
||||
ILEmitterCtx context,
|
||||
bool signed,
|
||||
long roundConst,
|
||||
int shift)
|
||||
private static void EmitShrImm64(ILEmitterCtx context, bool signed, long roundConst, int shift)
|
||||
{
|
||||
context.EmitLdc_I8(roundConst);
|
||||
context.EmitLdc_I4(shift);
|
||||
|
|
|
@ -61,8 +61,7 @@ namespace ChocolArm64.Translation
|
|||
|
||||
//Vectors are part of another "set" of locals.
|
||||
private const int VecGpTmp1Index = ReservedLocalsCount + 0;
|
||||
private const int VecGpTmp2Index = ReservedLocalsCount + 1;
|
||||
private const int UserVecTempStart = ReservedLocalsCount + 2;
|
||||
private const int UserVecTempStart = ReservedLocalsCount + 1;
|
||||
|
||||
private static int _userIntTempCount;
|
||||
private static int _userVecTempCount;
|
||||
|
@ -630,9 +629,6 @@ namespace ChocolArm64.Translation
|
|||
public void EmitLdvectmp() => EmitLdvec(VecGpTmp1Index);
|
||||
public void EmitStvectmp() => EmitStvec(VecGpTmp1Index);
|
||||
|
||||
public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index);
|
||||
public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index);
|
||||
|
||||
public void EmitLdint(int index) => Ldloc(index, VarType.Int);
|
||||
public void EmitStint(int index) => Stloc(index, VarType.Int);
|
||||
|
||||
|
|
Loading…
Reference in a new issue