New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 23:57:08 +00:00
|
|
|
using Ryujinx.Graphics.Shader.Decoders;
|
|
|
|
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
|
|
|
using Ryujinx.Graphics.Shader.Translation;
|
|
|
|
using System;
|
|
|
|
|
|
|
|
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
|
|
|
|
|
|
|
namespace Ryujinx.Graphics.Shader.Instructions
|
|
|
|
{
|
|
|
|
static class InstEmitHelper
|
|
|
|
{
|
|
|
|
public static Operand GetZF(EmitterContext context)
|
|
|
|
{
|
|
|
|
return Register(0, RegisterType.Flag);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand GetNF(EmitterContext context)
|
|
|
|
{
|
|
|
|
return Register(1, RegisterType.Flag);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand GetCF(EmitterContext context)
|
|
|
|
{
|
|
|
|
return Register(2, RegisterType.Flag);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand GetVF(EmitterContext context)
|
|
|
|
{
|
|
|
|
return Register(3, RegisterType.Flag);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand GetDest(EmitterContext context)
|
|
|
|
{
|
|
|
|
return Register(((IOpCodeRd)context.CurrOp).Rd);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand GetSrcA(EmitterContext context)
|
|
|
|
{
|
|
|
|
return Register(((IOpCodeRa)context.CurrOp).Ra);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand GetSrcB(EmitterContext context, FPType floatType)
|
|
|
|
{
|
|
|
|
if (floatType == FPType.FP32)
|
|
|
|
{
|
|
|
|
return GetSrcB(context);
|
|
|
|
}
|
|
|
|
else if (floatType == FPType.FP16)
|
|
|
|
{
|
|
|
|
int h = context.CurrOp.RawOpCode.Extract(41, 1);
|
|
|
|
|
2019-10-13 06:02:07 +00:00
|
|
|
return GetHalfUnpacked(context, GetSrcB(context), FPHalfSwizzle.FP16)[h];
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 23:57:08 +00:00
|
|
|
}
|
|
|
|
else if (floatType == FPType.FP64)
|
|
|
|
{
|
2019-07-02 02:39:22 +00:00
|
|
|
// TODO.
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 23:57:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand GetSrcB(EmitterContext context)
|
|
|
|
{
|
|
|
|
switch (context.CurrOp)
|
|
|
|
{
|
|
|
|
case IOpCodeCbuf op:
|
|
|
|
return Cbuf(op.Slot, op.Offset);
|
|
|
|
|
|
|
|
case IOpCodeImm op:
|
|
|
|
return Const(op.Immediate);
|
|
|
|
|
|
|
|
case IOpCodeImmF op:
|
|
|
|
return ConstF(op.Immediate);
|
|
|
|
|
|
|
|
case IOpCodeReg op:
|
|
|
|
return Register(op.Rb);
|
|
|
|
|
|
|
|
case IOpCodeRegCbuf op:
|
|
|
|
return Register(op.Rc);
|
|
|
|
}
|
|
|
|
|
|
|
|
throw new InvalidOperationException($"Unexpected opcode type \"{context.CurrOp.GetType().Name}\".");
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand GetSrcC(EmitterContext context)
|
|
|
|
{
|
|
|
|
switch (context.CurrOp)
|
|
|
|
{
|
|
|
|
case IOpCodeRegCbuf op:
|
|
|
|
return Cbuf(op.Slot, op.Offset);
|
|
|
|
|
|
|
|
case IOpCodeRc op:
|
|
|
|
return Register(op.Rc);
|
|
|
|
}
|
|
|
|
|
|
|
|
throw new InvalidOperationException($"Unexpected opcode type \"{context.CurrOp.GetType().Name}\".");
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand[] GetHalfSrcA(EmitterContext context)
|
|
|
|
{
|
|
|
|
OpCode op = context.CurrOp;
|
|
|
|
|
|
|
|
bool absoluteA = false, negateA = false;
|
|
|
|
|
|
|
|
if (op is IOpCodeCbuf || op is IOpCodeImm)
|
|
|
|
{
|
|
|
|
negateA = op.RawOpCode.Extract(43);
|
|
|
|
absoluteA = op.RawOpCode.Extract(44);
|
|
|
|
}
|
|
|
|
else if (op is IOpCodeReg)
|
|
|
|
{
|
|
|
|
absoluteA = op.RawOpCode.Extract(44);
|
|
|
|
}
|
|
|
|
else if (op is OpCodeAluImm32 && op.Emitter == InstEmit.Hadd2)
|
|
|
|
{
|
|
|
|
negateA = op.RawOpCode.Extract(56);
|
|
|
|
}
|
|
|
|
|
|
|
|
FPHalfSwizzle swizzle = (FPHalfSwizzle)op.RawOpCode.Extract(47, 2);
|
|
|
|
|
2019-10-13 06:02:07 +00:00
|
|
|
Operand[] operands = GetHalfUnpacked(context, GetSrcA(context), swizzle);
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 23:57:08 +00:00
|
|
|
|
|
|
|
return FPAbsNeg(context, operands, absoluteA, negateA);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand[] GetHalfSrcB(EmitterContext context)
|
|
|
|
{
|
|
|
|
OpCode op = context.CurrOp;
|
|
|
|
|
|
|
|
FPHalfSwizzle swizzle = FPHalfSwizzle.FP16;
|
|
|
|
|
|
|
|
bool absoluteB = false, negateB = false;
|
|
|
|
|
|
|
|
if (op is IOpCodeReg)
|
|
|
|
{
|
|
|
|
swizzle = (FPHalfSwizzle)op.RawOpCode.Extract(28, 2);
|
|
|
|
|
|
|
|
absoluteB = op.RawOpCode.Extract(30);
|
|
|
|
negateB = op.RawOpCode.Extract(31);
|
|
|
|
}
|
|
|
|
else if (op is IOpCodeCbuf)
|
|
|
|
{
|
|
|
|
swizzle = FPHalfSwizzle.FP32;
|
|
|
|
|
|
|
|
absoluteB = op.RawOpCode.Extract(54);
|
|
|
|
}
|
|
|
|
|
2019-10-13 06:02:07 +00:00
|
|
|
Operand[] operands = GetHalfUnpacked(context, GetSrcB(context), swizzle);
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 23:57:08 +00:00
|
|
|
|
|
|
|
return FPAbsNeg(context, operands, absoluteB, negateB);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand[] FPAbsNeg(EmitterContext context, Operand[] operands, bool abs, bool neg)
|
|
|
|
{
|
|
|
|
for (int index = 0; index < operands.Length; index++)
|
|
|
|
{
|
|
|
|
operands[index] = context.FPAbsNeg(operands[index], abs, neg);
|
|
|
|
}
|
|
|
|
|
|
|
|
return operands;
|
|
|
|
}
|
|
|
|
|
2019-10-13 06:02:07 +00:00
|
|
|
public static Operand[] GetHalfUnpacked(EmitterContext context, Operand src, FPHalfSwizzle swizzle)
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-17 23:57:08 +00:00
|
|
|
{
|
|
|
|
switch (swizzle)
|
|
|
|
{
|
|
|
|
case FPHalfSwizzle.FP16:
|
|
|
|
return new Operand[]
|
|
|
|
{
|
|
|
|
context.UnpackHalf2x16Low (src),
|
|
|
|
context.UnpackHalf2x16High(src)
|
|
|
|
};
|
|
|
|
|
|
|
|
case FPHalfSwizzle.FP32: return new Operand[] { src, src };
|
|
|
|
|
|
|
|
case FPHalfSwizzle.DupH0:
|
|
|
|
return new Operand[]
|
|
|
|
{
|
|
|
|
context.UnpackHalf2x16Low(src),
|
|
|
|
context.UnpackHalf2x16Low(src)
|
|
|
|
};
|
|
|
|
|
|
|
|
case FPHalfSwizzle.DupH1:
|
|
|
|
return new Operand[]
|
|
|
|
{
|
|
|
|
context.UnpackHalf2x16High(src),
|
|
|
|
context.UnpackHalf2x16High(src)
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand GetHalfPacked(EmitterContext context, Operand[] results)
|
|
|
|
{
|
|
|
|
OpCode op = context.CurrOp;
|
|
|
|
|
|
|
|
FPHalfSwizzle swizzle = FPHalfSwizzle.FP16;
|
|
|
|
|
|
|
|
if (!(op is OpCodeAluImm32))
|
|
|
|
{
|
|
|
|
swizzle = (FPHalfSwizzle)context.CurrOp.RawOpCode.Extract(49, 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (swizzle)
|
|
|
|
{
|
|
|
|
case FPHalfSwizzle.FP16: return context.PackHalf2x16(results[0], results[1]);
|
|
|
|
|
|
|
|
case FPHalfSwizzle.FP32: return results[0];
|
|
|
|
|
|
|
|
case FPHalfSwizzle.DupH0:
|
|
|
|
{
|
|
|
|
Operand h1 = GetHalfDest(context, isHigh: true);
|
|
|
|
|
|
|
|
return context.PackHalf2x16(results[0], h1);
|
|
|
|
}
|
|
|
|
|
|
|
|
case FPHalfSwizzle.DupH1:
|
|
|
|
{
|
|
|
|
Operand h0 = GetHalfDest(context, isHigh: false);
|
|
|
|
|
|
|
|
return context.PackHalf2x16(h0, results[1]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand GetHalfDest(EmitterContext context, bool isHigh)
|
|
|
|
{
|
|
|
|
if (isHigh)
|
|
|
|
{
|
|
|
|
return context.UnpackHalf2x16High(GetDest(context));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return context.UnpackHalf2x16Low(GetDest(context));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand GetPredicate39(EmitterContext context)
|
|
|
|
{
|
|
|
|
IOpCodeAlu op = (IOpCodeAlu)context.CurrOp;
|
|
|
|
|
|
|
|
Operand local = Register(op.Predicate39);
|
|
|
|
|
|
|
|
if (op.InvertP)
|
|
|
|
{
|
|
|
|
local = context.BitwiseNot(local);
|
|
|
|
}
|
|
|
|
|
|
|
|
return local;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand SignExtendTo32(EmitterContext context, Operand src, int srcBits)
|
|
|
|
{
|
|
|
|
return context.BitfieldExtractS32(src, Const(0), Const(srcBits));
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Operand ZeroExtendTo32(EmitterContext context, Operand src, int srcBits)
|
|
|
|
{
|
|
|
|
int mask = (int)(0xffffffffu >> (32 - srcBits));
|
|
|
|
|
|
|
|
return context.BitwiseAnd(src, Const(mask));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|