Add Sse Opt. for S/Umax_V, S/Umin_V, S/Uaddw_V, S/Usubw_V, Fabs_S/V, Fneg_S/V Inst.; for Fcvtl_V, Fcvtn_V Inst.; and for Fcmp_S Inst.. Add/Improve other Sse Opt.. Add Tests. (#496)

* Update CpuTest.cs

* Update CpuTestSimd.cs

* Update CpuTestSimdReg.cs

* Update InstEmitSimdCmp.cs

* Update SoftFloat.cs

* Update InstEmitAluHelper.cs

* Update InstEmitSimdArithmetic.cs

* Update InstEmitSimdHelper.cs

* Update VectorHelper.cs

* Update InstEmitSimdCvt.cs

* Update InstEmitSimdArithmetic.cs

* Update CpuTestSimd.cs

* Update InstEmitSimdArithmetic.cs

* Update OpCodeTable.cs

* Update InstEmitSimdArithmetic.cs

* Update InstEmitSimdCmp.cs

* Update InstEmitSimdCvt.cs

* Update CpuTestSimd.cs

* Update CpuTestSimdReg.cs

* Create CpuTestSimdFcond.cs

* Update OpCodeTable.cs

* Update InstEmitSimdMove.cs

* Update CpuTestSimdIns.cs

* Create CpuTestSimdExt.cs

* Nit.

* Update PackageReference.
This commit is contained in:
LDj3SNuD 2018-11-18 03:41:16 +01:00 committed by gdkchan
parent 5357291c36
commit 7e98b0f6b2
9 changed files with 1214 additions and 312 deletions

View file

@ -190,23 +190,32 @@ namespace ChocolArm64.Instructions
} }
} }
public static void EmitSetNzcv(ILEmitterCtx context, int nzcv) public static void EmitSetNzcv(ILEmitterCtx context)
{ {
context.EmitLdc_I4((nzcv >> 0) & 1); context.Emit(OpCodes.Dup);
context.Emit(OpCodes.Ldc_I4_1);
context.Emit(OpCodes.And);
context.EmitStflg((int)PState.VBit); context.EmitStflg((int)PState.VBit);
context.EmitLdc_I4((nzcv >> 1) & 1); context.Emit(OpCodes.Ldc_I4_1);
context.Emit(OpCodes.Shr);
context.Emit(OpCodes.Dup);
context.Emit(OpCodes.Ldc_I4_1);
context.Emit(OpCodes.And);
context.EmitStflg((int)PState.CBit); context.EmitStflg((int)PState.CBit);
context.EmitLdc_I4((nzcv >> 2) & 1); context.Emit(OpCodes.Ldc_I4_1);
context.Emit(OpCodes.Shr);
context.Emit(OpCodes.Dup);
context.Emit(OpCodes.Ldc_I4_1);
context.Emit(OpCodes.And);
context.EmitStflg((int)PState.ZBit); context.EmitStflg((int)PState.ZBit);
context.EmitLdc_I4((nzcv >> 3) & 1); context.Emit(OpCodes.Ldc_I4_1);
context.Emit(OpCodes.Shr);
context.Emit(OpCodes.Ldc_I4_1);
context.Emit(OpCodes.And);
context.EmitStflg((int)PState.NBit); context.EmitStflg((int)PState.NBit);
} }
} }
} }

View file

@ -186,18 +186,101 @@ namespace ChocolArm64.Instructions
public static void Fabs_S(ILEmitterCtx context) public static void Fabs_S(ILEmitterCtx context)
{ {
EmitScalarUnaryOpF(context, () => if (Optimizations.UseSse2)
{ {
EmitUnaryMathCall(context, nameof(Math.Abs)); OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
});
if (op.Size == 0)
{
Type[] typesSsv = new Type[] { typeof(float) };
Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
context.EmitStvec(op.Rd);
EmitVectorZero32_128(context, op.Rd);
}
else /* if (op.Size == 1) */
{
Type[] typesSsv = new Type[] { typeof(double) };
Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
EmitStvecWithCastFromDouble(context, op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitScalarUnaryOpF(context, () =>
{
EmitUnaryMathCall(context, nameof(Math.Abs));
});
}
} }
public static void Fabs_V(ILEmitterCtx context) public static void Fabs_V(ILEmitterCtx context)
{ {
EmitVectorUnaryOpF(context, () => if (Optimizations.UseSse2)
{ {
EmitUnaryMathCall(context, nameof(Math.Abs)); OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
});
int sizeF = op.Size & 1;
if (sizeF == 0)
{
Type[] typesSav = new Type[] { typeof(float) };
Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else /* if (sizeF == 1) */
{
Type[] typesSav = new Type[] { typeof(double) };
Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
EmitStvecWithCastFromDouble(context, op.Rd);
}
}
else
{
EmitVectorUnaryOpF(context, () =>
{
EmitUnaryMathCall(context, nameof(Math.Abs));
});
}
} }
public static void Fadd_S(ILEmitterCtx context) public static void Fadd_S(ILEmitterCtx context)
@ -283,7 +366,7 @@ namespace ChocolArm64.Instructions
} }
} }
public static void Fmadd_S(ILEmitterCtx context) public static void Fmadd_S(ILEmitterCtx context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
@ -450,22 +533,118 @@ namespace ChocolArm64.Instructions
}); });
} }
public static void Fmla_V(ILEmitterCtx context) public static void Fmla_V(ILEmitterCtx context) // Fused.
{ {
EmitVectorTernaryOpF(context, () => if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
context.Emit(OpCodes.Mul); OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
context.Emit(OpCodes.Add);
}); int sizeF = op.Size & 1;
if (sizeF == 0)
{
Type[] typesMulAdd = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else /* if (sizeF == 1) */
{
Type[] typesMulAdd = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rd);
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
EmitStvecWithCastFromDouble(context, op.Rd);
}
}
else
{
EmitVectorTernaryOpF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd));
});
}
} }
public static void Fmla_Ve(ILEmitterCtx context) public static void Fmla_Ve(ILEmitterCtx context) // Fused.
{ {
EmitVectorTernaryOpByElemF(context, () => if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
context.Emit(OpCodes.Mul); OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp;
context.Emit(OpCodes.Add);
}); int sizeF = op.Size & 1;
if (sizeF == 0)
{
Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) };
Type[] typesMulAdd = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else /* if (sizeF == 1) */
{
Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) };
Type[] typesMulAdd = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rd);
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
EmitStvecWithCastFromDouble(context, op.Rd);
}
}
else
{
EmitVectorTernaryOpByElemF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd));
});
}
} }
public static void Fmls_Se(ILEmitterCtx context) public static void Fmls_Se(ILEmitterCtx context)
@ -477,25 +656,121 @@ namespace ChocolArm64.Instructions
}); });
} }
public static void Fmls_V(ILEmitterCtx context) public static void Fmls_V(ILEmitterCtx context) // Fused.
{ {
EmitVectorTernaryOpF(context, () => if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
context.Emit(OpCodes.Mul); OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
context.Emit(OpCodes.Sub);
}); int sizeF = op.Size & 1;
if (sizeF == 0)
{
Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else /* if (sizeF == 1) */
{
Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rd);
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
EmitStvecWithCastFromDouble(context, op.Rd);
}
}
else
{
EmitVectorTernaryOpF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub));
});
}
} }
public static void Fmls_Ve(ILEmitterCtx context) public static void Fmls_Ve(ILEmitterCtx context) // Fused.
{ {
EmitVectorTernaryOpByElemF(context, () => if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
context.Emit(OpCodes.Mul); OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp;
context.Emit(OpCodes.Sub);
}); int sizeF = op.Size & 1;
if (sizeF == 0)
{
Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) };
Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else /* if (sizeF == 1) */
{
Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) };
Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rd);
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
EmitStvecWithCastFromDouble(context, op.Rd);
}
}
else
{
EmitVectorTernaryOpByElemF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub));
});
}
} }
public static void Fmsub_S(ILEmitterCtx context) public static void Fmsub_S(ILEmitterCtx context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
@ -580,7 +855,59 @@ namespace ChocolArm64.Instructions
public static void Fmul_Ve(ILEmitterCtx context) public static void Fmul_Ve(ILEmitterCtx context)
{ {
EmitVectorBinaryOpByElemF(context, () => context.Emit(OpCodes.Mul)); if (Optimizations.FastFP && Optimizations.UseSse2)
{
OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp;
int sizeF = op.Size & 1;
if (sizeF == 0)
{
Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) };
Type[] typesMul = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else /* if (sizeF == 1) */
{
Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) };
Type[] typesMul = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul));
EmitStvecWithCastFromDouble(context, op.Rd);
}
}
else
{
EmitVectorBinaryOpByElemF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul));
});
}
} }
public static void Fmulx_S(ILEmitterCtx context) public static void Fmulx_S(ILEmitterCtx context)
@ -617,12 +944,95 @@ namespace ChocolArm64.Instructions
public static void Fneg_S(ILEmitterCtx context) public static void Fneg_S(ILEmitterCtx context)
{ {
EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg)); if (Optimizations.UseSse2)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
if (op.Size == 0)
{
Type[] typesSsv = new Type[] { typeof(float) };
Type[] typesXor = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), typesXor));
context.EmitStvec(op.Rd);
EmitVectorZero32_128(context, op.Rd);
}
else /* if (op.Size == 1) */
{
Type[] typesSsv = new Type[] { typeof(double) };
Type[] typesXor = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor));
EmitStvecWithCastFromDouble(context, op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg));
}
} }
public static void Fneg_V(ILEmitterCtx context) public static void Fneg_V(ILEmitterCtx context)
{ {
EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg)); if (Optimizations.UseSse2)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
if (sizeF == 0)
{
Type[] typesSav = new Type[] { typeof(float) };
Type[] typesXor = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), typesXor));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else /* if (sizeF == 1) */
{
Type[] typesSav = new Type[] { typeof(double) };
Type[] typesXor = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor));
EmitStvecWithCastFromDouble(context, op.Rd);
}
}
else
{
EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg));
}
} }
public static void Fnmadd_S(ILEmitterCtx context) public static void Fnmadd_S(ILEmitterCtx context)
@ -689,7 +1099,7 @@ namespace ChocolArm64.Instructions
}); });
} }
public static void Frecps_S(ILEmitterCtx context) public static void Frecps_S(ILEmitterCtx context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
@ -743,7 +1153,7 @@ namespace ChocolArm64.Instructions
} }
} }
public static void Frecps_V(ILEmitterCtx context) public static void Frecps_V(ILEmitterCtx context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
@ -986,7 +1396,7 @@ namespace ChocolArm64.Instructions
}); });
} }
public static void Frsqrts_S(ILEmitterCtx context) public static void Frsqrts_S(ILEmitterCtx context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
@ -1048,7 +1458,7 @@ namespace ChocolArm64.Instructions
} }
} }
public static void Frsqrts_V(ILEmitterCtx context) public static void Frsqrts_V(ILEmitterCtx context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseSse2) if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
@ -1310,7 +1720,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdc_I4(numBytes); context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -1334,7 +1744,38 @@ namespace ChocolArm64.Instructions
public static void Saddw_V(ILEmitterCtx context) public static void Saddw_V(ILEmitterCtx context)
{ {
EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add)); if (Optimizations.UseSse41)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] };
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1],
VectorIntTypesPerSizeLog2[op.Size + 1] };
string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
}
else
{
EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add));
}
} }
public static void Shadd_V(ILEmitterCtx context) public static void Shadd_V(ILEmitterCtx context)
@ -1439,11 +1880,34 @@ namespace ChocolArm64.Instructions
public static void Smax_V(ILEmitterCtx context) public static void Smax_V(ILEmitterCtx context)
{ {
Type[] types = new Type[] { typeof(long), typeof(long) }; if (Optimizations.UseSse41)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); Type[] typesMax = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
Type[] types = new Type[] { typeof(long), typeof(long) };
MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types);
EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo));
}
} }
public static void Smaxp_V(ILEmitterCtx context) public static void Smaxp_V(ILEmitterCtx context)
@ -1457,11 +1921,34 @@ namespace ChocolArm64.Instructions
public static void Smin_V(ILEmitterCtx context) public static void Smin_V(ILEmitterCtx context)
{ {
Type[] types = new Type[] { typeof(long), typeof(long) }; if (Optimizations.UseSse41)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); Type[] typesMin = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
Type[] types = new Type[] { typeof(long), typeof(long) };
MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types);
EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo));
}
} }
public static void Sminp_V(ILEmitterCtx context) public static void Sminp_V(ILEmitterCtx context)
@ -1484,7 +1971,7 @@ namespace ChocolArm64.Instructions
Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1],
VectorIntTypesPerSizeLog2[op.Size + 1] }; VectorIntTypesPerSizeLog2[op.Size + 1] };
Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
string nameCvt = op.Size == 0 string nameCvt = op.Size == 0
? nameof(Sse41.ConvertToVector128Int16) ? nameof(Sse41.ConvertToVector128Int16)
@ -1508,7 +1995,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
@ -1535,7 +2022,7 @@ namespace ChocolArm64.Instructions
Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1],
VectorIntTypesPerSizeLog2[op.Size + 1] }; VectorIntTypesPerSizeLog2[op.Size + 1] };
Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
string nameCvt = op.Size == 0 string nameCvt = op.Size == 0
? nameof(Sse41.ConvertToVector128Int16) ? nameof(Sse41.ConvertToVector128Int16)
@ -1559,7 +2046,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
@ -1735,7 +2222,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdc_I4(numBytes); context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
@ -1754,7 +2241,38 @@ namespace ChocolArm64.Instructions
public static void Ssubw_V(ILEmitterCtx context) public static void Ssubw_V(ILEmitterCtx context)
{ {
EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub)); if (Optimizations.UseSse41)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] };
Type[] typesSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1],
VectorIntTypesPerSizeLog2[op.Size + 1] };
string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1);
EmitLdvecWithSignedCast(context, op.Rm, op.Size);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
}
else
{
EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub));
}
} }
public static void Sub_S(ILEmitterCtx context) public static void Sub_S(ILEmitterCtx context)
@ -1901,7 +2419,38 @@ namespace ChocolArm64.Instructions
public static void Uaddw_V(ILEmitterCtx context) public static void Uaddw_V(ILEmitterCtx context)
{ {
EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add)); if (Optimizations.UseSse41)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1],
VectorUIntTypesPerSizeLog2[op.Size + 1] };
string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
}
else
{
EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add));
}
} }
public static void Uhadd_V(ILEmitterCtx context) public static void Uhadd_V(ILEmitterCtx context)
@ -1992,11 +2541,34 @@ namespace ChocolArm64.Instructions
public static void Umax_V(ILEmitterCtx context) public static void Umax_V(ILEmitterCtx context)
{ {
Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; if (Optimizations.UseSse41)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
Type[] types = new Type[] { typeof(ulong), typeof(ulong) };
MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types);
EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo));
}
} }
public static void Umaxp_V(ILEmitterCtx context) public static void Umaxp_V(ILEmitterCtx context)
@ -2010,11 +2582,34 @@ namespace ChocolArm64.Instructions
public static void Umin_V(ILEmitterCtx context) public static void Umin_V(ILEmitterCtx context)
{ {
Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; if (Optimizations.UseSse41)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); Type[] typesMin = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
Type[] types = new Type[] { typeof(ulong), typeof(ulong) };
MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types);
EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo));
}
} }
public static void Uminp_V(ILEmitterCtx context) public static void Uminp_V(ILEmitterCtx context)
@ -2037,7 +2632,7 @@ namespace ChocolArm64.Instructions
Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1],
VectorIntTypesPerSizeLog2 [op.Size + 1] }; VectorIntTypesPerSizeLog2 [op.Size + 1] };
Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
string nameCvt = op.Size == 0 string nameCvt = op.Size == 0
? nameof(Sse41.ConvertToVector128Int16) ? nameof(Sse41.ConvertToVector128Int16)
@ -2061,7 +2656,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
@ -2088,7 +2683,7 @@ namespace ChocolArm64.Instructions
Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1],
VectorIntTypesPerSizeLog2 [op.Size + 1] }; VectorIntTypesPerSizeLog2 [op.Size + 1] };
Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
string nameCvt = op.Size == 0 string nameCvt = op.Size == 0
? nameof(Sse41.ConvertToVector128Int16) ? nameof(Sse41.ConvertToVector128Int16)
@ -2112,7 +2707,7 @@ namespace ChocolArm64.Instructions
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
@ -2251,7 +2846,38 @@ namespace ChocolArm64.Instructions
public static void Usubw_V(ILEmitterCtx context) public static void Usubw_V(ILEmitterCtx context)
{ {
EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); if (Optimizations.UseSse41)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
Type[] typesSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1],
VectorUIntTypesPerSizeLog2[op.Size + 1] };
string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
}
else
{
EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub));
}
} }
private static void EmitAbs(ILEmitterCtx context) private static void EmitAbs(ILEmitterCtx context)

View file

@ -3,6 +3,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation; using ChocolArm64.Translation;
using System; using System;
using System.Reflection.Emit; using System.Reflection.Emit;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86; using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instructions.InstEmitAluHelper; using static ChocolArm64.Instructions.InstEmitAluHelper;
@ -137,26 +138,43 @@ namespace ChocolArm64.Instructions
context.EmitCondBranch(lblTrue, op.Cond); context.EmitCondBranch(lblTrue, op.Cond);
EmitSetNzcv(context, op.Nzcv); context.EmitLdc_I4(op.Nzcv);
EmitSetNzcv(context);
context.Emit(OpCodes.Br, lblEnd); context.Emit(OpCodes.Br, lblEnd);
context.MarkLabel(lblTrue); context.MarkLabel(lblTrue);
Fcmp_S(context); EmitFcmpE(context, signalNaNs: false);
context.MarkLabel(lblEnd); context.MarkLabel(lblEnd);
} }
public static void Fccmpe_S(ILEmitterCtx context) public static void Fccmpe_S(ILEmitterCtx context)
{ {
Fccmp_S(context); OpCodeSimdFcond64 op = (OpCodeSimdFcond64)context.CurrOp;
ILLabel lblTrue = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.EmitCondBranch(lblTrue, op.Cond);
context.EmitLdc_I4(op.Nzcv);
EmitSetNzcv(context);
context.Emit(OpCodes.Br, lblEnd);
context.MarkLabel(lblTrue);
EmitFcmpE(context, signalNaNs: true);
context.MarkLabel(lblEnd);
} }
public static void Fcmeq_S(ILEmitterCtx context) public static void Fcmeq_S(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareEqualScalar)); EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareEqualScalar));
} }
@ -169,7 +187,7 @@ namespace ChocolArm64.Instructions
public static void Fcmeq_V(ILEmitterCtx context) public static void Fcmeq_V(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareEqual)); EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareEqual));
} }
@ -182,7 +200,7 @@ namespace ChocolArm64.Instructions
public static void Fcmge_S(ILEmitterCtx context) public static void Fcmge_S(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar)); EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar));
} }
@ -195,7 +213,7 @@ namespace ChocolArm64.Instructions
public static void Fcmge_V(ILEmitterCtx context) public static void Fcmge_V(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual)); EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual));
} }
@ -208,7 +226,7 @@ namespace ChocolArm64.Instructions
public static void Fcmgt_S(ILEmitterCtx context) public static void Fcmgt_S(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar)); EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar));
} }
@ -221,7 +239,7 @@ namespace ChocolArm64.Instructions
public static void Fcmgt_V(ILEmitterCtx context) public static void Fcmgt_V(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan)); EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan));
} }
@ -252,31 +270,157 @@ namespace ChocolArm64.Instructions
} }
public static void Fcmp_S(ILEmitterCtx context) public static void Fcmp_S(ILEmitterCtx context)
{
EmitFcmpE(context, signalNaNs: false);
}
public static void Fcmpe_S(ILEmitterCtx context)
{
EmitFcmpE(context, signalNaNs: true);
}
private static void EmitFcmpE(ILEmitterCtx context, bool signalNaNs)
{ {
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
bool cmpWithZero = !(op is OpCodeSimdFcond64) ? op.Bit3 : false; bool cmpWithZero = !(op is OpCodeSimdFcond64) ? op.Bit3 : false;
//Handle NaN case. if (Optimizations.FastFP && Optimizations.UseSse2)
//If any number is NaN, then NZCV = 0011.
if (cmpWithZero)
{ {
EmitNaNCheck(context, op.Rn); if (op.Size == 0)
{
Type[] typesCmp = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
ILLabel lblNaN = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
if (cmpWithZero)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
}
else
{
context.EmitLdvec(op.Rm);
}
context.Emit(OpCodes.Dup);
context.EmitStvectmp2();
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrderedScalar), typesCmp));
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp));
context.Emit(OpCodes.Brtrue_S, lblNaN);
context.EmitLdc_I4(0);
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqualOrderedScalar), typesCmp));
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp));
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareLessThanOrderedScalar), typesCmp));
context.EmitStflg((int)PState.NBit);
context.EmitStflg((int)PState.ZBit);
context.EmitStflg((int)PState.CBit);
context.EmitStflg((int)PState.VBit);
context.Emit(OpCodes.Br_S, lblEnd);
context.MarkLabel(lblNaN);
context.EmitLdc_I4(1);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(0);
context.Emit(OpCodes.Dup);
context.EmitStflg((int)PState.NBit);
context.EmitStflg((int)PState.ZBit);
context.EmitStflg((int)PState.CBit);
context.EmitStflg((int)PState.VBit);
context.MarkLabel(lblEnd);
}
else /* if (op.Size == 1) */
{
Type[] typesCmp = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
ILLabel lblNaN = new ILLabel();
ILLabel lblEnd = new ILLabel();
EmitLdvecWithCastToDouble(context, op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
if (cmpWithZero)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
}
else
{
EmitLdvecWithCastToDouble(context, op.Rm);
}
context.Emit(OpCodes.Dup);
context.EmitStvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrderedScalar), typesCmp));
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp));
context.Emit(OpCodes.Brtrue_S, lblNaN);
context.EmitLdc_I4(0);
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqualOrderedScalar), typesCmp));
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp));
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareLessThanOrderedScalar), typesCmp));
context.EmitStflg((int)PState.NBit);
context.EmitStflg((int)PState.ZBit);
context.EmitStflg((int)PState.CBit);
context.EmitStflg((int)PState.VBit);
context.Emit(OpCodes.Br_S, lblEnd);
context.MarkLabel(lblNaN);
context.EmitLdc_I4(1);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(0);
context.Emit(OpCodes.Dup);
context.EmitStflg((int)PState.NBit);
context.EmitStflg((int)PState.ZBit);
context.EmitStflg((int)PState.CBit);
context.EmitStflg((int)PState.VBit);
context.MarkLabel(lblEnd);
}
} }
else else
{
EmitNaNCheck(context, op.Rn);
EmitNaNCheck(context, op.Rm);
context.Emit(OpCodes.Or);
}
ILLabel lblNaN = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.Emit(OpCodes.Brtrue_S, lblNaN);
void EmitLoadOpers()
{ {
EmitVectorExtractF(context, op.Rn, 0, op.Size); EmitVectorExtractF(context, op.Rn, 0, op.Size);
@ -286,7 +430,7 @@ namespace ChocolArm64.Instructions
{ {
context.EmitLdc_R4(0f); context.EmitLdc_R4(0f);
} }
else /* if (Op.Size == 1) */ else // if (op.Size == 1)
{ {
context.EmitLdc_R8(0d); context.EmitLdc_R8(0d);
} }
@ -295,67 +439,12 @@ namespace ChocolArm64.Instructions
{ {
EmitVectorExtractF(context, op.Rm, 0, op.Size); EmitVectorExtractF(context, op.Rm, 0, op.Size);
} }
}
//Z = Rn == Rm context.EmitLdc_I4(!signalNaNs ? 0 : 1);
EmitLoadOpers();
context.Emit(OpCodes.Ceq); EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare));
context.Emit(OpCodes.Dup);
context.EmitStflg((int)PState.ZBit); EmitSetNzcv(context);
//C = Rn >= Rm
EmitLoadOpers();
context.Emit(OpCodes.Cgt);
context.Emit(OpCodes.Or);
context.EmitStflg((int)PState.CBit);
//N = Rn < Rm
EmitLoadOpers();
context.Emit(OpCodes.Clt);
context.EmitStflg((int)PState.NBit);
//V = 0
context.EmitLdc_I4(0);
context.EmitStflg((int)PState.VBit);
context.Emit(OpCodes.Br_S, lblEnd);
context.MarkLabel(lblNaN);
EmitSetNzcv(context, 0b0011);
context.MarkLabel(lblEnd);
}
public static void Fcmpe_S(ILEmitterCtx context)
{
Fcmp_S(context);
}
private static void EmitNaNCheck(ILEmitterCtx context, int reg)
{
IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp;
EmitVectorExtractF(context, reg, 0, op.Size);
if (op.Size == 0)
{
context.EmitCall(typeof(float), nameof(float.IsNaN));
}
else if (op.Size == 1)
{
context.EmitCall(typeof(double), nameof(double.IsNaN));
}
else
{
throw new InvalidOperationException();
} }
} }
@ -486,7 +575,7 @@ namespace ChocolArm64.Instructions
{ {
context.EmitLdc_R4(0f); context.EmitLdc_R4(0f);
} }
else /* if (SizeF == 1) */ else /* if (sizeF == 1) */
{ {
context.EmitLdc_R8(0d); context.EmitLdc_R8(0d);
} }

View file

@ -76,33 +76,54 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1; int sizeF = op.Size & 1;
int elems = 4 >> sizeF; if (Optimizations.UseSse2 && sizeF == 1)
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
for (int index = 0; index < elems; index++)
{ {
if (sizeF == 0) Type[] typesMov = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
{ Type[] typesCvt = new Type[] { typeof(Vector128<float>) };
EmitVectorExtractZx(context, op.Rn, part + index, 1);
context.Emit(OpCodes.Conv_U2);
context.EmitLdarg(TranslatedSub.StateArgIdx); string nameMov = op.RegisterSize == RegisterSize.Simd128
? nameof(Sse.MoveHighToLow)
: nameof(Sse.MoveLowToHigh);
context.EmitCall(typeof(SoftFloat16_32), nameof(SoftFloat16_32.FPConvert)); context.EmitLdvec(op.Rn);
} context.Emit(OpCodes.Dup);
else /* if (sizeF == 1) */
{
EmitVectorExtractF(context, op.Rn, part + index, 0);
context.Emit(OpCodes.Conv_R8); context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov));
}
EmitVectorInsertTmpF(context, index, sizeF); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt));
EmitStvecWithCastFromDouble(context, op.Rd);
} }
else
{
int elems = 4 >> sizeF;
context.EmitLdvectmp(); int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
context.EmitStvec(op.Rd);
for (int index = 0; index < elems; index++)
{
if (sizeF == 0)
{
EmitVectorExtractZx(context, op.Rn, part + index, 1);
context.Emit(OpCodes.Conv_U2);
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitCall(typeof(SoftFloat16_32), nameof(SoftFloat16_32.FPConvert));
}
else /* if (sizeF == 1) */
{
EmitVectorExtractF(context, op.Rn, part + index, 0);
context.Emit(OpCodes.Conv_R8);
}
EmitVectorInsertTmpF(context, index, sizeF);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
}
} }
public static void Fcvtms_Gp(ILEmitterCtx context) public static void Fcvtms_Gp(ILEmitterCtx context)
@ -121,43 +142,70 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1; int sizeF = op.Size & 1;
int elems = 4 >> sizeF; if (Optimizations.UseSse2 && sizeF == 1)
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
if (part != 0)
{ {
Type[] typesMov = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
Type[] typesCvt = new Type[] { typeof(Vector128<double>) };
string nameMov = op.RegisterSize == RegisterSize.Simd128
? nameof(Sse.MoveLowToHigh)
: nameof(Sse.MoveHighToLow);
context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rd);
context.EmitStvectmp(); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov));
EmitLdvecWithCastToDouble(context, op.Rn);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
context.Emit(OpCodes.Dup);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov));
context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov));
context.EmitStvec(op.Rd);
} }
else
for (int index = 0; index < elems; index++)
{ {
EmitVectorExtractF(context, op.Rn, index, sizeF); int elems = 4 >> sizeF;
if (sizeF == 0) int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
if (part != 0)
{ {
context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitLdvec(op.Rd);
context.EmitStvectmp();
context.EmitCall(typeof(SoftFloat32_16), nameof(SoftFloat32_16.FPConvert));
context.Emit(OpCodes.Conv_U8);
EmitVectorInsertTmp(context, part + index, 1);
} }
else /* if (sizeF == 1) */
for (int index = 0; index < elems; index++)
{ {
context.Emit(OpCodes.Conv_R4); EmitVectorExtractF(context, op.Rn, index, sizeF);
EmitVectorInsertTmpF(context, part + index, 0); if (sizeF == 0)
{
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitCall(typeof(SoftFloat32_16), nameof(SoftFloat32_16.FPConvert));
context.Emit(OpCodes.Conv_U8);
EmitVectorInsertTmp(context, part + index, 1);
}
else /* if (sizeF == 1) */
{
context.Emit(OpCodes.Conv_R4);
EmitVectorInsertTmpF(context, part + index, 0);
}
} }
}
context.EmitLdvectmp(); context.EmitLdvectmp();
context.EmitStvec(op.Rd); context.EmitStvec(op.Rd);
if (part == 0) if (part == 0)
{ {
EmitVectorZeroUpper(context, op.Rd); EmitVectorZeroUpper(context, op.Rd);
}
} }
} }
@ -260,7 +308,29 @@ namespace ChocolArm64.Instructions
public static void Scvtf_V(ILEmitterCtx context) public static void Scvtf_V(ILEmitterCtx context)
{ {
EmitVectorCvtf(context, signed: true); OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
if (Optimizations.UseSse2 && sizeF == 0)
{
Type[] typesCvt = new Type[] { typeof(Vector128<int>) };
EmitLdvecWithSignedCast(context, op.Rn, 2);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorCvtf(context, signed: true);
}
} }
public static void Ucvtf_Gp(ILEmitterCtx context) public static void Ucvtf_Gp(ILEmitterCtx context)
@ -441,16 +511,6 @@ namespace ChocolArm64.Instructions
context.EmitStintzr(op.Rd); context.EmitStintzr(op.Rd);
} }
private static void EmitVectorScvtf(ILEmitterCtx context)
{
EmitVectorCvtf(context, true);
}
private static void EmitVectorUcvtf(ILEmitterCtx context)
{
EmitVectorCvtf(context, false);
}
private static void EmitVectorCvtf(ILEmitterCtx context, bool signed) private static void EmitVectorCvtf(ILEmitterCtx context, bool signed)
{ {
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

View file

@ -219,7 +219,7 @@ namespace ChocolArm64.Instructions
type = typeof(Sse); type = typeof(Sse);
baseType = typeof(Vector128<float>); baseType = typeof(Vector128<float>);
} }
else /* if (SizeF == 1) */ else /* if (sizeF == 1) */
{ {
type = typeof(Sse2); type = typeof(Sse2);
baseType = typeof(Vector128<double>); baseType = typeof(Vector128<double>);
@ -249,7 +249,7 @@ namespace ChocolArm64.Instructions
{ {
EmitVectorZero32_128(context, op.Rd); EmitVectorZero32_128(context, op.Rd);
} }
else /* if (SizeF == 1) */ else /* if (sizeF == 1) */
{ {
EmitVectorZeroUpper(context, op.Rd); EmitVectorZeroUpper(context, op.Rd);
} }
@ -272,7 +272,7 @@ namespace ChocolArm64.Instructions
{ {
mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float) }); mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float) });
} }
else /* if (SizeF == 1) */ else /* if (sizeF == 1) */
{ {
mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double) }); mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double) });
} }
@ -292,7 +292,7 @@ namespace ChocolArm64.Instructions
{ {
mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(float) }); mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(float) });
} }
else /* if (SizeF == 1) */ else /* if (sizeF == 1) */
{ {
mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(double) }); mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(double) });
} }
@ -312,7 +312,7 @@ namespace ChocolArm64.Instructions
{ {
mthdInfo = typeof(MathF).GetMethod(nameof(MathF.Round), new Type[] { typeof(float), typeof(MidpointRounding) }); mthdInfo = typeof(MathF).GetMethod(nameof(MathF.Round), new Type[] { typeof(float), typeof(MidpointRounding) });
} }
else /* if (SizeF == 1) */ else /* if (sizeF == 1) */
{ {
mthdInfo = typeof(Math).GetMethod(nameof(Math.Round), new Type[] { typeof(double), typeof(MidpointRounding) }); mthdInfo = typeof(Math).GetMethod(nameof(Math.Round), new Type[] { typeof(double), typeof(MidpointRounding) });
} }
@ -334,7 +334,7 @@ namespace ChocolArm64.Instructions
{ {
mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(float) }); mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(float) });
} }
else /* if (SizeF == 1) */ else /* if (sizeF == 1) */
{ {
mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(double) }); mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(double) });
} }
@ -961,7 +961,7 @@ namespace ChocolArm64.Instructions
{ {
EmitSatQ(context, op.Size, true, true); EmitSatQ(context, op.Size, true, true);
} }
else /* if (Op.Size == 3) */ else /* if (op.Size == 3) */
{ {
EmitUnarySignedSatQAbsOrNeg(context); EmitUnarySignedSatQAbsOrNeg(context);
} }
@ -1022,7 +1022,7 @@ namespace ChocolArm64.Instructions
{ {
for (int index = 0; index < elems; index++) for (int index = 0; index < elems; index++)
{ {
EmitVectorExtract(context, op.Rn, index, op.Size, signed); EmitVectorExtract(context, op.Rn, index, op.Size, signed);
EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed); EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed);
if (op.Size <= 2) if (op.Size <= 2)
@ -1031,13 +1031,13 @@ namespace ChocolArm64.Instructions
EmitSatQ(context, op.Size, true, signed); EmitSatQ(context, op.Size, true, signed);
} }
else /* if (Op.Size == 3) */ else /* if (op.Size == 3) */
{ {
if (add) if (add)
{ {
EmitBinarySatQAdd(context, signed); EmitBinarySatQAdd(context, signed);
} }
else /* if (Sub) */ else /* if (sub) */
{ {
EmitBinarySatQSub(context, signed); EmitBinarySatQSub(context, signed);
} }
@ -1059,7 +1059,7 @@ namespace ChocolArm64.Instructions
EmitSatQ(context, op.Size, true, signed); EmitSatQ(context, op.Size, true, signed);
} }
else /* if (Op.Size == 3) */ else /* if (op.Size == 3) */
{ {
EmitBinarySatQAccumulate(context, signed); EmitBinarySatQAccumulate(context, signed);
} }
@ -1071,7 +1071,7 @@ namespace ChocolArm64.Instructions
{ {
for (int index = 0; index < elems; index++) for (int index = 0; index < elems; index++)
{ {
EmitVectorExtract(context, op.Rn, index, op.Size, signed); EmitVectorExtract(context, op.Rn, index, op.Size, signed);
EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed); EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed);
emit(); emit();
@ -1304,52 +1304,64 @@ namespace ChocolArm64.Instructions
} }
} }
public static void EmitVectorZeroAll(ILEmitterCtx context, int rd) public static void EmitVectorZeroAll(ILEmitterCtx context, int reg)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseSse)
{ {
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitStvec(rd); context.EmitStvec(reg);
} }
else else
{ {
EmitVectorZeroLower(context, rd); EmitVectorZeroLower(context, reg);
EmitVectorZeroUpper(context, rd); EmitVectorZeroUpper(context, reg);
} }
} }
public static void EmitVectorZeroLower(ILEmitterCtx context, int rd) public static void EmitVectorZeroLower(ILEmitterCtx context, int reg)
{ {
EmitVectorInsert(context, rd, 0, 3, 0); EmitVectorInsert(context, reg, 0, 3, 0);
} }
public static void EmitVectorZeroLowerTmp(ILEmitterCtx context) public static void EmitVectorZeroLowerTmp(ILEmitterCtx context)
{ {
EmitVectorInsertTmp(context, 0, 3, 0); if (Optimizations.UseSse)
{
context.EmitLdvectmp();
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow)));
context.EmitStvectmp();
}
else
{
EmitVectorInsertTmp(context, 0, 3, 0);
}
} }
public static void EmitVectorZeroUpper(ILEmitterCtx context, int reg) public static void EmitVectorZeroUpper(ILEmitterCtx context, int reg)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseSse)
{ {
//TODO: Use MoveScalar once it is fixed, as of the //TODO: Use Sse2.MoveScalar once it is fixed,
//time of writing it just crashes the JIT. //as of the time of writing it just crashes the JIT (SDK 2.1.500).
/*Type[] typesMov = new Type[] { typeof(Vector128<ulong>) };
EmitLdvecWithUnsignedCast(context, reg, 3); EmitLdvecWithUnsignedCast(context, reg, 3);
Type[] types = new Type[] { typeof(Vector128<ulong>), typeof(byte) }; context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), typesMov));
//Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), Types)); EmitStvecWithUnsignedCast(context, reg, 3);*/
context.EmitLdc_I4(8); context.EmitLdvec(reg);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), types)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
context.EmitLdc_I4(8); context.EmitStvec(reg);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), types));
EmitStvecWithUnsignedCast(context, reg, 3);
} }
else else
{ {
@ -1359,9 +1371,15 @@ namespace ChocolArm64.Instructions
public static void EmitVectorZero32_128(ILEmitterCtx context, int reg) public static void EmitVectorZero32_128(ILEmitterCtx context, int reg)
{ {
if (!Sse.IsSupported)
{
throw new PlatformNotSupportedException();
}
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitLdvec(reg); context.EmitLdvec(reg);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorZero32_128)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveScalar)));
context.EmitStvec(reg); context.EmitStvec(reg);
} }

View file

@ -3,6 +3,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation; using ChocolArm64.Translation;
using System; using System;
using System.Reflection.Emit; using System.Reflection.Emit;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86; using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instructions.InstEmitSimdHelper; using static ChocolArm64.Instructions.InstEmitSimdHelper;
@ -17,6 +18,8 @@ namespace ChocolArm64.Instructions
if (Optimizations.UseSse2) if (Optimizations.UseSse2)
{ {
Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] };
context.EmitLdintzr(op.Rn); context.EmitLdintzr(op.Rn);
switch (op.Size) switch (op.Size)
@ -26,16 +29,9 @@ namespace ChocolArm64.Instructions
case 2: context.Emit(OpCodes.Conv_U4); break; case 2: context.Emit(OpCodes.Conv_U4); break;
} }
Type[] types = new Type[] { UIntTypesPerSizeLog2[op.Size] }; context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), types));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size); EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
} }
else else
{ {
@ -48,11 +44,11 @@ namespace ChocolArm64.Instructions
EmitVectorInsert(context, op.Rd, index, op.Size); EmitVectorInsert(context, op.Rd, index, op.Size);
} }
}
if (op.RegisterSize == RegisterSize.Simd64) if (op.RegisterSize == RegisterSize.Simd64)
{ {
EmitVectorZeroUpper(context, op.Rd); EmitVectorZeroUpper(context, op.Rd);
}
} }
} }
@ -69,14 +65,34 @@ namespace ChocolArm64.Instructions
{ {
OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3; if (Optimizations.UseSse2)
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{ {
Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] };
EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
EmitVectorInsert(context, op.Rd, index, op.Size); switch (op.Size)
{
case 0: context.Emit(OpCodes.Conv_U1); break;
case 1: context.Emit(OpCodes.Conv_U2); break;
case 2: context.Emit(OpCodes.Conv_U4); break;
}
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
}
else
{
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
EmitVectorInsert(context, op.Rd, index, op.Size);
}
} }
if (op.RegisterSize == RegisterSize.Simd64) if (op.RegisterSize == RegisterSize.Simd64)
@ -89,32 +105,65 @@ namespace ChocolArm64.Instructions
{ {
OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp; OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp;
context.EmitLdvec(op.Rd); if (Optimizations.UseSse2)
context.EmitStvectmp();
int bytes = op.GetBitsCount() >> 3;
int position = op.Imm4;
for (int index = 0; index < bytes; index++)
{ {
int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; Type[] typesShs = new Type[] { typeof(Vector128<byte>), typeof(byte) };
Type[] typesOr = new Type[] { typeof(Vector128<byte>), typeof(Vector128<byte>) };
if (position == bytes) EmitLdvecWithUnsignedCast(context, op.Rn, 0);
if (op.RegisterSize == RegisterSize.Simd64)
{ {
position = 0; VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
} }
EmitVectorExtractZx(context, reg, position++, 0); context.EmitLdc_I4(op.Imm4);
EmitVectorInsertTmp(context, index, 0); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesShs));
EmitLdvecWithUnsignedCast(context, op.Rm, 0);
context.EmitLdc_I4((op.RegisterSize == RegisterSize.Simd64 ? 8 : 16) - op.Imm4);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), typesShs));
if (op.RegisterSize == RegisterSize.Simd64)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
}
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
EmitStvecWithUnsignedCast(context, op.Rd, 0);
} }
else
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{ {
EmitVectorZeroUpper(context, op.Rd); int bytes = op.GetBitsCount() >> 3;
int position = op.Imm4;
for (int index = 0; index < bytes; index++)
{
int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
if (position == bytes)
{
position = 0;
}
EmitVectorExtractZx(context, reg, position++, 0);
EmitVectorInsertTmp(context, index, 0);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
} }
} }

View file

@ -789,6 +789,43 @@ namespace ChocolArm64.Instructions
return result; return result;
} }
public static int FPCompare(float value1, float value2, bool signalNaNs, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state);
int result;
if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
{
result = 0b0011;
if (type1 == FpType.SNaN || type2 == FpType.SNaN || signalNaNs)
{
FPProcessException(FpExc.InvalidOp, state);
}
}
else
{
if (value1 == value2)
{
result = 0b0110;
}
else if (value1 < value2)
{
result = 0b1000;
}
else
{
result = 0b0010;
}
}
return result;
}
public static float FPDiv(float value1, float value2, CpuThreadState state) public static float FPDiv(float value1, float value2, CpuThreadState state)
{ {
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}"); Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}");
@ -1584,6 +1621,43 @@ namespace ChocolArm64.Instructions
return result; return result;
} }
public static int FPCompare(double value1, double value2, bool signalNaNs, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state);
value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state);
int result;
if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
{
result = 0b0011;
if (type1 == FpType.SNaN || type2 == FpType.SNaN || signalNaNs)
{
FPProcessException(FpExc.InvalidOp, state);
}
}
else
{
if (value1 == value2)
{
result = 0b0110;
}
else if (value1 < value2)
{
result = 0b1000;
}
else
{
result = 0b0010;
}
}
return result;
}
public static double FPDiv(double value1, double value2, CpuThreadState state) public static double FPDiv(double value1, double value2, CpuThreadState state)
{ {
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}"); Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}");

View file

@ -9,18 +9,6 @@ namespace ChocolArm64.Instructions
{ {
static class VectorHelper static class VectorHelper
{ {
private static readonly Vector128<float> Zero32128Mask;
static VectorHelper()
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
Zero32128Mask = Sse.StaticCast<uint, float>(Sse2.SetVector128(0, 0, 0, 0xffffffff));
}
public static void EmitCall(ILEmitterCtx context, string name64, string name128) public static void EmitCall(ILEmitterCtx context, string name64, string name128)
{ {
bool isSimd64 = context.CurrOp.RegisterSize == RegisterSize.Simd64; bool isSimd64 = context.CurrOp.RegisterSize == RegisterSize.Simd64;
@ -491,7 +479,7 @@ namespace ChocolArm64.Instructions
{ {
int intValue = BitConverter.SingleToInt32Bits(value); int intValue = BitConverter.SingleToInt32Bits(value);
ushort low = (ushort)(intValue >> 0); ushort low = (ushort)(intValue >> 0);
ushort high = (ushort)(intValue >> 16); ushort high = (ushort)(intValue >> 16);
Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector); Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector);
@ -578,17 +566,6 @@ namespace ChocolArm64.Instructions
throw new PlatformNotSupportedException(); throw new PlatformNotSupportedException();
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorZero32_128(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.And(vector, Zero32128Mask);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> vector) public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> vector)
{ {

View file

@ -216,9 +216,9 @@ namespace ChocolArm64
SetA64("01011110111xxxxx100011xxxxxxxxxx", InstEmit.Cmtst_S, typeof(OpCodeSimdReg64)); SetA64("01011110111xxxxx100011xxxxxxxxxx", InstEmit.Cmtst_S, typeof(OpCodeSimdReg64));
SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstEmit.Cmtst_V, typeof(OpCodeSimdReg64)); SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstEmit.Cmtst_V, typeof(OpCodeSimdReg64));
SetA64("0x00111000100000010110xxxxxxxxxx", InstEmit.Cnt_V, typeof(OpCodeSimd64)); SetA64("0x00111000100000010110xxxxxxxxxx", InstEmit.Cnt_V, typeof(OpCodeSimd64));
SetA64("0x001110000xxxxx000011xxxxxxxxxx", InstEmit.Dup_Gp, typeof(OpCodeSimdIns64)); SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstEmit.Dup_Gp, typeof(OpCodeSimdIns64));
SetA64("01011110000xxxxx000001xxxxxxxxxx", InstEmit.Dup_S, typeof(OpCodeSimdIns64)); SetA64("01011110000xxxxx000001xxxxxxxxxx", InstEmit.Dup_S, typeof(OpCodeSimdIns64));
SetA64("0x001110000xxxxx000001xxxxxxxxxx", InstEmit.Dup_V, typeof(OpCodeSimdIns64)); SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstEmit.Dup_V, typeof(OpCodeSimdIns64));
SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstEmit.Eor_V, typeof(OpCodeSimdReg64)); SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstEmit.Eor_V, typeof(OpCodeSimdReg64));
SetA64("0>101110000xxxxx0<xxx0xxxxxxxxxx", InstEmit.Ext_V, typeof(OpCodeSimdExt64)); SetA64("0>101110000xxxxx0<xxx0xxxxxxxxxx", InstEmit.Ext_V, typeof(OpCodeSimdExt64));
SetA64("011111101x1xxxxx110101xxxxxxxxxx", InstEmit.Fabd_S, typeof(OpCodeSimdReg64)); SetA64("011111101x1xxxxx110101xxxxxxxxxx", InstEmit.Fabd_S, typeof(OpCodeSimdReg64));
@ -384,9 +384,9 @@ namespace ChocolArm64
SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstEmit.Saddl_V, typeof(OpCodeSimdReg64)); SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstEmit.Saddl_V, typeof(OpCodeSimdReg64));
SetA64("0x001110<<100000001010xxxxxxxxxx", InstEmit.Saddlp_V, typeof(OpCodeSimd64)); SetA64("0x001110<<100000001010xxxxxxxxxx", InstEmit.Saddlp_V, typeof(OpCodeSimd64));
SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstEmit.Saddw_V, typeof(OpCodeSimdReg64)); SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstEmit.Saddw_V, typeof(OpCodeSimdReg64));
SetA64("x0011110xx100010000000xxxxxxxxxx", InstEmit.Scvtf_Gp, typeof(OpCodeSimdCvt64)); SetA64("x00111100x100010000000xxxxxxxxxx", InstEmit.Scvtf_Gp, typeof(OpCodeSimdCvt64));
SetA64("010111100x100001110110xxxxxxxxxx", InstEmit.Scvtf_S, typeof(OpCodeSimd64)); SetA64("010111100x100001110110xxxxxxxxxx", InstEmit.Scvtf_S, typeof(OpCodeSimd64));
SetA64("0x0011100x100001110110xxxxxxxxxx", InstEmit.Scvtf_V, typeof(OpCodeSimd64)); SetA64("0>0011100<100001110110xxxxxxxxxx", InstEmit.Scvtf_V, typeof(OpCodeSimd64));
SetA64("01011110000xxxxx000000xxxxxxxxxx", InstEmit.Sha1c_V, typeof(OpCodeSimdReg64)); SetA64("01011110000xxxxx000000xxxxxxxxxx", InstEmit.Sha1c_V, typeof(OpCodeSimdReg64));
SetA64("0101111000101000000010xxxxxxxxxx", InstEmit.Sha1h_V, typeof(OpCodeSimd64)); SetA64("0101111000101000000010xxxxxxxxxx", InstEmit.Sha1h_V, typeof(OpCodeSimd64));
SetA64("01011110000xxxxx001000xxxxxxxxxx", InstEmit.Sha1m_V, typeof(OpCodeSimdReg64)); SetA64("01011110000xxxxx001000xxxxxxxxxx", InstEmit.Sha1m_V, typeof(OpCodeSimdReg64));
@ -486,9 +486,9 @@ namespace ChocolArm64
SetA64("001011100x110000001110xxxxxxxxxx", InstEmit.Uaddlv_V, typeof(OpCodeSimd64)); SetA64("001011100x110000001110xxxxxxxxxx", InstEmit.Uaddlv_V, typeof(OpCodeSimd64));
SetA64("01101110<<110000001110xxxxxxxxxx", InstEmit.Uaddlv_V, typeof(OpCodeSimd64)); SetA64("01101110<<110000001110xxxxxxxxxx", InstEmit.Uaddlv_V, typeof(OpCodeSimd64));
SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstEmit.Uaddw_V, typeof(OpCodeSimdReg64)); SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstEmit.Uaddw_V, typeof(OpCodeSimdReg64));
SetA64("x0011110xx100011000000xxxxxxxxxx", InstEmit.Ucvtf_Gp, typeof(OpCodeSimdCvt64)); SetA64("x00111100x100011000000xxxxxxxxxx", InstEmit.Ucvtf_Gp, typeof(OpCodeSimdCvt64));
SetA64("011111100x100001110110xxxxxxxxxx", InstEmit.Ucvtf_S, typeof(OpCodeSimd64)); SetA64("011111100x100001110110xxxxxxxxxx", InstEmit.Ucvtf_S, typeof(OpCodeSimd64));
SetA64("0x1011100x100001110110xxxxxxxxxx", InstEmit.Ucvtf_V, typeof(OpCodeSimd64)); SetA64("0>1011100<100001110110xxxxxxxxxx", InstEmit.Ucvtf_V, typeof(OpCodeSimd64));
SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstEmit.Uhadd_V, typeof(OpCodeSimdReg64)); SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstEmit.Uhadd_V, typeof(OpCodeSimdReg64));
SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstEmit.Uhsub_V, typeof(OpCodeSimdReg64)); SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstEmit.Uhsub_V, typeof(OpCodeSimdReg64));
SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstEmit.Umax_V, typeof(OpCodeSimdReg64)); SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstEmit.Umax_V, typeof(OpCodeSimdReg64));