From 142cededd4db2ff4f83a4833580d343a4f0a8cde Mon Sep 17 00:00:00 2001 From: riperiperi Date: Tue, 31 Aug 2021 06:51:57 +0100 Subject: [PATCH] Implement Shader Instructions SUATOM and SURED (#2090) * Initial Implementation * Further improvements (no support for float/64-bit types) * Merge atomic and reduce instructions, add missing format switch * Fix rebase issues. * Not used. * Whoops. Fixed. * Partial implementation of inc/dec, cleanup and TODOs * Remove testing path * Address Feedback --- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 2 +- .../CodeGen/Glsl/Instructions/InstGen.cs | 3 +- .../Glsl/Instructions/InstGenHelper.cs | 1 + .../Glsl/Instructions/InstGenMemory.cs | 62 +++- .../CodeGen/Glsl/OperandManager.cs | 3 +- .../Decoders/OpCodeSuatom.cs | 46 +++ .../Decoders/OpCodeSured.cs | 44 +++ .../Decoders/OpCodeTable.cs | 5 + .../Decoders/ReductionType.cs | 4 +- .../Instructions/InstEmitTexture.cs | 292 ++++++++++++++++++ .../IntermediateRepresentation/Instruction.cs | 1 + .../TextureFlags.cs | 15 +- .../StructuredIr/InstructionInfo.cs | 8 +- .../Optimizations/BindlessElimination.cs | 15 +- .../Translation/Optimizations/Optimizer.cs | 1 + .../Translation/ShaderConfig.cs | 26 +- 16 files changed, 510 insertions(+), 18 deletions(-) create mode 100644 Ryujinx.Graphics.Shader/Decoders/OpCodeSuatom.cs create mode 100644 Ryujinx.Graphics.Shader/Decoders/OpCodeSured.cs diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index eb8847cd6..0d4795596 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Version of the codegen (to be changed when codegen or guest format change). /// - private const ulong ShaderCodeGenVersion = 2605; + private const ulong ShaderCodeGenVersion = 2092; // Progress reporting helpers private volatile int _shaderCount; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs index 41a8b9429..24d4cabd4 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs @@ -132,9 +132,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions return Call(context, operation); case Instruction.ImageLoad: - return ImageLoadOrStore(context, operation); - case Instruction.ImageStore: + case Instruction.ImageAtomic: return ImageLoadOrStore(context, operation); case Instruction.LoadAttribute: diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs index a49576580..6f4f0c4e6 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs @@ -72,6 +72,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.GroupMemoryBarrier, InstType.CallNullary, "groupMemoryBarrier"); Add(Instruction.ImageLoad, InstType.Special); Add(Instruction.ImageStore, InstType.Special); + Add(Instruction.ImageAtomic, InstType.Special); Add(Instruction.IsNan, InstType.CallUnary, "isnan"); Add(Instruction.LoadAttribute, InstType.Special); Add(Instruction.LoadConstant, InstType.Special); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs index a0aec28ed..89215736b 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -18,13 +18,39 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions // TODO: Bindless texture support. For now we just return 0/do nothing. if (isBindless) { - return texOp.Inst == Instruction.ImageLoad ? NumberFormatter.FormatFloat(0) : "// imageStore(bindless)"; + return texOp.Inst switch + { + Instruction.ImageStore => "// imageStore(bindless)", + Instruction.ImageLoad => NumberFormatter.FormatFloat(0), + _ => NumberFormatter.FormatInt(0) + }; } bool isArray = (texOp.Type & SamplerType.Array) != 0; bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; - string texCall = texOp.Inst == Instruction.ImageLoad ? "imageLoad" : "imageStore"; + string texCall; + + if (texOp.Inst == Instruction.ImageAtomic) + { + texCall = (texOp.Flags & TextureFlags.AtomicMask) switch { + TextureFlags.Add => "imageAtomicAdd", + TextureFlags.Minimum => "imageAtomicMin", + TextureFlags.Maximum => "imageAtomicMax", + TextureFlags.Increment => "imageAtomicAdd", // TODO: Clamp value. + TextureFlags.Decrement => "imageAtomicAdd", // TODO: Clamp value. + TextureFlags.BitwiseAnd => "imageAtomicAnd", + TextureFlags.BitwiseOr => "imageAtomicOr", + TextureFlags.BitwiseXor => "imageAtomicXor", + TextureFlags.Swap => "imageAtomicExchange", + TextureFlags.CAS => "imageAtomicCompSwap", + _ => "imageAtomicAdd", + }; + } + else + { + texCall = texOp.Inst == Instruction.ImageLoad ? "imageLoad" : "imageStore"; + } int srcIndex = isBindless ? 1 : 0; @@ -95,8 +121,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions if (texOp.Inst == Instruction.ImageStore) { - int texIndex = context.FindImageDescriptorIndex(texOp); - VariableType type = texOp.Format.GetComponentType(); string[] cElems = new string[4]; @@ -128,7 +152,35 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Append(prefix + "vec4(" + string.Join(", ", cElems) + ")"); } - texCall += ")" + (texOp.Inst == Instruction.ImageLoad ? GetMask(texOp.Index) : ""); + if (texOp.Inst == Instruction.ImageAtomic) + { + VariableType type = texOp.Format.GetComponentType(); + + if ((texOp.Flags & TextureFlags.AtomicMask) == TextureFlags.CAS) + { + Append(Src(type)); // Compare value. + } + + string value = (texOp.Flags & TextureFlags.AtomicMask) switch + { + TextureFlags.Increment => NumberFormatter.FormatInt(1, type), // TODO: Clamp value + TextureFlags.Decrement => NumberFormatter.FormatInt(-1, type), // TODO: Clamp value + _ => Src(type) + }; + + Append(value); + + texCall += ")"; + + if (type != VariableType.S32) + { + texCall = "int(" + texCall + ")"; + } + } + else + { + texCall += ")" + (texOp.Inst == Instruction.ImageLoad ? GetMask(texOp.Index) : ""); + } return texCall; } diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs index b9f1b4ef1..deea6c721 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs @@ -362,7 +362,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } else if (operation is AstTextureOperation texOp && (texOp.Inst == Instruction.ImageLoad || - texOp.Inst == Instruction.ImageStore)) + texOp.Inst == Instruction.ImageStore || + texOp.Inst == Instruction.ImageAtomic)) { return texOp.Format.GetComponentType(); } diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeSuatom.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeSuatom.cs new file mode 100644 index 000000000..7c807b369 --- /dev/null +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeSuatom.cs @@ -0,0 +1,46 @@ +using Ryujinx.Graphics.Shader.Instructions; + +namespace Ryujinx.Graphics.Shader.Decoders +{ + class OpCodeSuatom : OpCodeTextureBase + { + public Register Rd { get; } + public Register Ra { get; } + public Register Rb { get; } + public Register Rc { get; } + + public ReductionType Type { get; } + public AtomicOp AtomicOp { get; } + public ImageDimensions Dimensions { get; } + public ClampMode ClampMode { get; } + + public bool ByteAddress { get; } + public bool UseType { get; } + public bool IsBindless { get; } + + public bool CompareAndSwap { get; } + + public new static OpCode Create(InstEmitter emitter, ulong address, long opCode) => new OpCodeSuatom(emitter, address, opCode); + + public OpCodeSuatom(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode) + { + Rd = new Register(opCode.Extract(0, 8), RegisterType.Gpr); + Ra = new Register(opCode.Extract(8, 8), RegisterType.Gpr); + Rb = new Register(opCode.Extract(20, 8), RegisterType.Gpr); + Rc = new Register(opCode.Extract(39, 8), RegisterType.Gpr); + + bool supportsBindless = opCode.Extract(54); + + Type = (ReductionType)opCode.Extract(supportsBindless ? 36 : 51, 3); + ByteAddress = opCode.Extract(28); + AtomicOp = (AtomicOp)opCode.Extract(29, 4); // Only useful if CAS is not true. + Dimensions = (ImageDimensions)opCode.Extract(33, 3); + ClampMode = (ClampMode)opCode.Extract(49, 2); + + IsBindless = supportsBindless && !opCode.Extract(51); + UseType = !supportsBindless || opCode.Extract(52); + + CompareAndSwap = opCode.Extract(55); + } + } +} diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeSured.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeSured.cs new file mode 100644 index 000000000..57b8ec789 --- /dev/null +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeSured.cs @@ -0,0 +1,44 @@ +using Ryujinx.Graphics.Shader.Instructions; + +namespace Ryujinx.Graphics.Shader.Decoders +{ + enum ClampMode + { + Ignore = 0, + Trap = 2 + } + + class OpCodeSured : OpCodeTextureBase + { + public Register Ra { get; } + public Register Rb { get; } + public Register Rc { get; } + + public ReductionType Type { get; } + public AtomicOp AtomicOp { get; } + public ImageDimensions Dimensions { get; } + public ClampMode ClampMode { get; } + + public bool UseType { get; } + public bool IsBindless { get; } + public bool ByteAddress { get; } + + public new static OpCode Create(InstEmitter emitter, ulong address, long opCode) => new OpCodeSured(emitter, address, opCode); + + public OpCodeSured(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode) + { + Ra = new Register(opCode.Extract(8, 8), RegisterType.Gpr); + Rb = new Register(opCode.Extract(0, 8), RegisterType.Gpr); + Rc = new Register(opCode.Extract(39, 8), RegisterType.Gpr); + + Type = (ReductionType)opCode.Extract(20, 3); + ByteAddress = opCode.Extract(23); + AtomicOp = (AtomicOp)opCode.Extract(24, 3); + Dimensions = (ImageDimensions)opCode.Extract(33, 3); + ClampMode = (ClampMode)opCode.Extract(49, 2); + + IsBindless = !opCode.Extract(51); + UseType = opCode.Extract(52); + } + } +} diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs index e5bcd7e5b..df09d907b 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs @@ -209,6 +209,11 @@ namespace Ryujinx.Graphics.Shader.Decoders Set("1110111101011x", InstEmit.Sts, OpCodeMemory.Create); Set("11101011000xxx", InstEmit.Suld, OpCodeImage.Create); Set("11101011001xxx", InstEmit.Sust, OpCodeImage.Create); + Set("11101011010xxx", InstEmit.Sured, OpCodeSured.Create); + Set("11101010110xxx", InstEmit.Suatom, OpCodeSuatom.Create); + Set("1110101010xxxx", InstEmit.Suatom, OpCodeSuatom.Create); + Set("11101010011xxx", InstEmit.Suatom, OpCodeSuatom.Create); + Set("1110101000xxxx", InstEmit.Suatom, OpCodeSuatom.Create); Set("1111000011111x", InstEmit.Sync, OpCodeBranchPop.Create); Set("110000xxxx111x", InstEmit.Tex, OpCodeTex.Create); Set("1101111010111x", InstEmit.TexB, OpCodeTexB.Create); diff --git a/Ryujinx.Graphics.Shader/Decoders/ReductionType.cs b/Ryujinx.Graphics.Shader/Decoders/ReductionType.cs index 4c8bd37e0..90814c697 100644 --- a/Ryujinx.Graphics.Shader/Decoders/ReductionType.cs +++ b/Ryujinx.Graphics.Shader/Decoders/ReductionType.cs @@ -7,6 +7,8 @@ namespace Ryujinx.Graphics.Shader.Decoders U64 = 2, FP32FtzRn = 3, FP16x2FtzRn = 4, - S64 = 5 + S64 = 5, + SD32 = 6, + SD64 = 7 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs index f1ae89d54..ce63398f9 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs @@ -277,6 +277,249 @@ namespace Ryujinx.Graphics.Shader.Instructions context.Add(operation); } + public static void Sured(EmitterContext context) + { + OpCodeSured op = (OpCodeSured)context.CurrOp; + + SamplerType type = ConvertSamplerType(op.Dimensions); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid image reduction sampler type."); + + return; + } + + int raIndex = op.Ra.Index; + int rbIndex = op.Rb.Index; + + Operand Ra() + { + if (raIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(raIndex++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (rbIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(rbIndex++, RegisterType.Gpr)); + } + + List sourcesList = new List(); + + if (op.IsBindless) + { + sourcesList.Add(context.Copy(Register(op.Rc))); + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(Const(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + if (type.HasFlag(SamplerType.Array)) + { + sourcesList.Add(Ra()); + + type |= SamplerType.Array; + } + + TextureFormat format = TextureFormat.R32Sint; + + if (op.UseType) + { + if (op.ByteAddress) + { + int xIndex = op.IsBindless ? 1 : 0; + + sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(op.Type))); + } + + // TODO: FP and 64-bit formats. + format = (op.Type == ReductionType.SD32 || op.Type == ReductionType.SD64) ? + context.Config.GetTextureFormatAtomic(op.HandleOffset) : + GetTextureFormat(op.Type); + } + else if (!op.IsBindless) + { + format = context.Config.GetTextureFormatAtomic(op.HandleOffset); + } + + sourcesList.Add(Rb()); + + Operand[] sources = sourcesList.ToArray(); + + int handle = op.HandleOffset; + + TextureFlags flags = GetAtomicOpFlags(op.AtomicOp); + + if (op.IsBindless) + { + handle = 0; + flags |= TextureFlags.Bindless; + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageAtomic, + type, + format, + flags, + handle, + 0, + null, + sources); + + context.Add(operation); + } + + public static void Suatom(EmitterContext context) + { + OpCodeSuatom op = (OpCodeSuatom)context.CurrOp; + + SamplerType type = ConvertSamplerType(op.Dimensions); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid image atomic sampler type."); + + return; + } + + int raIndex = op.Ra.Index; + int rbIndex = op.Rb.Index; + + Operand Ra() + { + if (raIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(raIndex++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (rbIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(rbIndex++, RegisterType.Gpr)); + } + + int rdIndex = op.Rd.Index; + + Operand GetDest() + { + if (rdIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return Register(rdIndex++, RegisterType.Gpr); + } + + List sourcesList = new List(); + + if (op.IsBindless) + { + sourcesList.Add(context.Copy(Register(op.Rc))); + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(Const(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + if (type.HasFlag(SamplerType.Array)) + { + sourcesList.Add(Ra()); + + type |= SamplerType.Array; + } + + TextureFormat format = TextureFormat.R32Sint; + + if (op.UseType) + { + if (op.ByteAddress) + { + int xIndex = op.IsBindless ? 1 : 0; + + sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(op.Type))); + } + + // TODO: FP and 64-bit formats. + format = (op.Type == ReductionType.SD32 || op.Type == ReductionType.SD64) ? + context.Config.GetTextureFormatAtomic(op.HandleOffset) : + GetTextureFormat(op.Type); + } + else if (!op.IsBindless) + { + format = context.Config.GetTextureFormatAtomic(op.HandleOffset); + } + + if (op.CompareAndSwap) + { + sourcesList.Add(Rb()); + } + + sourcesList.Add(Rb()); + + Operand[] sources = sourcesList.ToArray(); + + int handle = op.HandleOffset; + + TextureFlags flags = op.CompareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(op.AtomicOp); + + if (op.IsBindless) + { + handle = 0; + flags |= TextureFlags.Bindless; + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageAtomic, + type, + format, + flags, + handle, + 0, + GetDest(), + sources); + + context.Add(operation); + } + public static void Tex(EmitterContext context) { EmitTextureSample(context, TextureFlags.None); @@ -1332,6 +1575,55 @@ namespace Ryujinx.Graphics.Shader.Instructions }; } + private static int GetComponentSizeInBytesLog2(ReductionType type) + { + return type switch + { + ReductionType.U32 => 2, + ReductionType.S32 => 2, + ReductionType.U64 => 3, + ReductionType.FP32FtzRn => 2, + ReductionType.FP16x2FtzRn => 2, + ReductionType.S64 => 3, + ReductionType.SD32 => 2, + ReductionType.SD64 => 3, + _ => 2 + }; + } + + private static TextureFormat GetTextureFormat(ReductionType type) + { + return type switch + { + ReductionType.U32 => TextureFormat.R32Uint, + ReductionType.S32 => TextureFormat.R32Sint, + ReductionType.U64 => TextureFormat.R32G32Uint, + ReductionType.FP32FtzRn => TextureFormat.R32Float, + ReductionType.FP16x2FtzRn => TextureFormat.R16G16Float, + ReductionType.S64 => TextureFormat.R32G32Uint, + ReductionType.SD32 => TextureFormat.R32Uint, + ReductionType.SD64 => TextureFormat.R32G32Uint, + _ => TextureFormat.R32Uint + }; + } + + private static TextureFlags GetAtomicOpFlags(AtomicOp op) + { + return op switch + { + AtomicOp.Add => TextureFlags.Add, + AtomicOp.Minimum => TextureFlags.Minimum, + AtomicOp.Maximum => TextureFlags.Maximum, + AtomicOp.Increment => TextureFlags.Increment, + AtomicOp.Decrement => TextureFlags.Decrement, + AtomicOp.BitwiseAnd => TextureFlags.BitwiseAnd, + AtomicOp.BitwiseOr => TextureFlags.BitwiseOr, + AtomicOp.BitwiseExclusiveOr => TextureFlags.BitwiseXor, + AtomicOp.Swap => TextureFlags.Swap, + _ => TextureFlags.Add + }; + } + private static SamplerType ConvertSamplerType(ImageDimensions target) { return target switch diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs index c1431ebc3..b0db56f08 100644 --- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs +++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs @@ -69,6 +69,7 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation GroupMemoryBarrier, ImageLoad, ImageStore, + ImageAtomic, IsNan, LoadAttribute, LoadConstant, diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs index 5334afacc..0cc938a33 100644 --- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs +++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs @@ -13,6 +13,19 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation LodBias = 1 << 4, LodLevel = 1 << 5, Offset = 1 << 6, - Offsets = 1 << 7 + Offsets = 1 << 7, + + AtomicMask = 15 << 16, + + Add = 0 << 16, + Minimum = 1 << 16, + Maximum = 2 << 16, + Increment = 3 << 16, + Decrement = 4 << 16, + BitwiseAnd = 5 << 16, + BitwiseOr = 6 << 16, + BitwiseXor = 7 << 16, + Swap = 8 << 16, + CAS = 9 << 16 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs index 33ee26ba0..79588778c 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs @@ -81,6 +81,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.FusedMultiplyAdd, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.ImageLoad, VariableType.F32); Add(Instruction.ImageStore, VariableType.None); + Add(Instruction.ImageAtomic, VariableType.S32); Add(Instruction.IsNan, VariableType.Bool, VariableType.F32); Add(Instruction.LoadAttribute, VariableType.F32, VariableType.S32, VariableType.S32, VariableType.S32); Add(Instruction.LoadConstant, VariableType.F32, VariableType.S32, VariableType.S32); @@ -146,9 +147,10 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { // TODO: Return correct type depending on source index, // that can improve the decompiler output. - if (inst == Instruction.ImageLoad || - inst == Instruction.ImageStore || - inst == Instruction.Lod || + if (inst == Instruction.ImageLoad || + inst == Instruction.ImageStore || + inst == Instruction.ImageAtomic || + inst == Instruction.Lod || inst == Instruction.TextureSample) { return VariableType.F32; diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs index e66bde0ab..709668f4a 100644 --- a/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs +++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs @@ -61,7 +61,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations src0.GetCbufOffset() | ((src1.GetCbufOffset() + 1) << 16), src0.GetCbufSlot() | ((src1.GetCbufSlot() + 1) << 16)); } - else if (texOp.Inst == Instruction.ImageLoad || texOp.Inst == Instruction.ImageStore) + else if (texOp.Inst == Instruction.ImageLoad || + texOp.Inst == Instruction.ImageStore || + texOp.Inst == Instruction.ImageAtomic) { Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block); @@ -69,7 +71,16 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { int cbufOffset = src0.GetCbufOffset(); int cbufSlot = src0.GetCbufSlot(); - texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot); + + if (texOp.Inst == Instruction.ImageAtomic) + { + texOp.Format = config.GetTextureFormatAtomic(cbufOffset, cbufSlot); + } + else + { + texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot); + } + SetHandle(config, texOp, cbufOffset, cbufSlot); } } diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs index 51fe825f4..078f3bb9e 100644 --- a/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs +++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs @@ -278,6 +278,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations case Instruction.AtomicSwap: case Instruction.AtomicXor: case Instruction.Call: + case Instruction.ImageAtomic: return true; } } diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index f454ceeab..7d3246db1 100644 --- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -162,6 +162,28 @@ namespace Ryujinx.Graphics.Shader.Translation return format; } + private bool FormatSupportsAtomic(TextureFormat format) + { + return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint; + } + + public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1) + { + // Atomic image instructions do not support GL_EXT_shader_image_load_formatted, + // and must have a type specified. Default to R32Sint if not available. + + var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot); + + if (!FormatSupportsAtomic(format)) + { + GpuAccessor.Log($"Unsupported format for texture {handle}: {format}."); + + format = TextureFormat.R32Sint; + } + + return format; + } + public void SizeAdd(int size) { Size += size; @@ -270,8 +292,8 @@ namespace Ryujinx.Graphics.Shader.Translation int handle) { inst &= Instruction.Mask; - bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore; - bool isWrite = inst == Instruction.ImageStore; + bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore || inst == Instruction.ImageAtomic; + bool isWrite = inst == Instruction.ImageStore || inst == Instruction.ImageAtomic; bool accurateType = inst != Instruction.TextureSize && inst != Instruction.Lod; if (isImage)