Support separate textures and samplers (#1216)

* Support separate textures and samplers

* Add missing bindless flag, fix SNORM format on buffer textures

* Add missing separation

* Add comments about the new handles
This commit is contained in:
gdkchan 2020-05-27 11:07:10 -03:00 committed by GitHub
parent 0b6d206daa
commit 5795bb1528
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 240 additions and 59 deletions

View file

@ -4,7 +4,6 @@ using Ryujinx.Graphics.Gpu.Shader;
using Ryujinx.Graphics.Gpu.State;
using Ryujinx.Graphics.Shader;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Engine
{
@ -91,9 +90,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
cbDescAddress += (ulong)cbDescOffset;
ReadOnlySpan<byte> cbDescriptorData = _context.PhysicalMemory.GetSpan(cbDescAddress, 0x10);
SbDescriptor cbDescriptor = MemoryMarshal.Cast<byte, SbDescriptor>(cbDescriptorData)[0];
SbDescriptor cbDescriptor = _context.PhysicalMemory.Read<SbDescriptor>(cbDescAddress);
BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
}
@ -110,9 +107,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
sbDescAddress += (ulong)sbDescOffset;
ReadOnlySpan<byte> sbDescriptorData = _context.PhysicalMemory.GetSpan(sbDescAddress, 0x10);
SbDescriptor sbDescriptor = MemoryMarshal.Cast<byte, SbDescriptor>(sbDescriptorData)[0];
SbDescriptor sbDescriptor = _context.PhysicalMemory.Read<SbDescriptor>(sbDescAddress);
BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size);
}

View file

@ -292,9 +292,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
sbDescAddress += (ulong)sbDescOffset;
ReadOnlySpan<byte> sbDescriptorData = _context.PhysicalMemory.GetSpan(sbDescAddress, 0x10);
SbDescriptor sbDescriptor = MemoryMarshal.Cast<byte, SbDescriptor>(sbDescriptorData)[0];
SbDescriptor sbDescriptor = _context.PhysicalMemory.Read<SbDescriptor>(sbDescAddress);
BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size);
}

View file

@ -1,6 +1,3 @@
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Image
{
/// <summary>
@ -41,11 +38,7 @@ namespace Ryujinx.Graphics.Gpu.Image
if (sampler == null)
{
ulong address = Address + (ulong)(uint)id * DescriptorSize;
ReadOnlySpan<byte> data = Context.PhysicalMemory.GetSpan(address, DescriptorSize);
SamplerDescriptor descriptor = MemoryMarshal.Cast<byte, SamplerDescriptor>(data)[0];
SamplerDescriptor descriptor = Context.PhysicalMemory.Read<SamplerDescriptor>(Address + (ulong)id * DescriptorSize);
sampler = new Sampler(Context, descriptor);

View file

@ -1,8 +1,6 @@
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.State;
using Ryujinx.Graphics.Shader;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Image
{
@ -11,6 +9,9 @@ namespace Ryujinx.Graphics.Gpu.Image
/// </summary>
class TextureBindingsManager
{
private const int HandleHigh = 16;
private const int HandleMask = (1 << HandleHigh) - 1;
private GpuContext _context;
private bool _isCompute;
@ -114,7 +115,6 @@ namespace Ryujinx.Graphics.Gpu.Image
}
_samplerPool = new SamplerPool(_context, address, maximumId);
_samplerIndex = samplerIndex;
}
@ -195,7 +195,7 @@ namespace Ryujinx.Graphics.Gpu.Image
address = bufferManager.GetGraphicsUniformBufferAddress(stageIndex, binding.CbufSlot);
}
packedId = MemoryMarshal.Cast<byte, int>(_context.PhysicalMemory.GetSpan(address + (ulong)binding.CbufOffset * 4, 4))[0];
packedId = _context.PhysicalMemory.Read<int>(address + (ulong)binding.CbufOffset * 4);
}
else
{
@ -324,9 +324,20 @@ namespace Ryujinx.Graphics.Gpu.Image
address = bufferManager.GetGraphicsUniformBufferAddress(stageIndex, textureBufferIndex);
}
address += (uint)wordOffset * 4;
int handle = _context.PhysicalMemory.Read<int>(address + (ulong)(wordOffset & HandleMask) * 4);
return BitConverter.ToInt32(_context.PhysicalMemory.GetSpan(address, 4));
// The "wordOffset" (which is really the immediate value used on texture instructions on the shader)
// is a 13-bit value. However, in order to also support separate samplers and textures (which uses
// bindless textures on the shader), we extend it with another value on the higher 16 bits with
// another offset for the sampler.
// The shader translator has code to detect separate texture and sampler uses with a bindless texture,
// turn that into a regular texture access and produce those special handles with values on the higher 16 bits.
if (wordOffset >> HandleHigh != 0)
{
handle |= _context.PhysicalMemory.Read<int>(address + (ulong)(wordOffset >> HandleHigh) * 4);
}
return handle;
}
/// <summary>

View file

@ -773,6 +773,22 @@ namespace Ryujinx.Graphics.Gpu.Image
}
}
if (info.Target == Target.TextureBuffer)
{
// We assume that the host does not support signed normalized format
// (as is the case with OpenGL), so we just use a unsigned format.
// The shader will need the appropriate conversion code to compensate.
switch (formatInfo.Format)
{
case Format.R8Snorm: formatInfo = new FormatInfo(Format.R8Sint, 1, 1, 1); break;
case Format.R16Snorm: formatInfo = new FormatInfo(Format.R16Sint, 1, 1, 2); break;
case Format.R8G8Snorm: formatInfo = new FormatInfo(Format.R8G8Sint, 1, 1, 2); break;
case Format.R16G16Snorm: formatInfo = new FormatInfo(Format.R16G16Sint, 1, 1, 4); break;
case Format.R8G8B8A8Snorm: formatInfo = new FormatInfo(Format.R8G8B8A8Sint, 1, 1, 4); break;
case Format.R16G16B16A16Snorm: formatInfo = new FormatInfo(Format.R16G16B16A16Sint, 1, 1, 8); break;
}
}
int width = info.Width / info.SamplesInX;
int height = info.Height / info.SamplesInY;

View file

@ -1,9 +1,7 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Memory;
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Image
{
@ -83,11 +81,7 @@ namespace Ryujinx.Graphics.Gpu.Image
/// <returns>The texture descriptor</returns>
public TextureDescriptor GetDescriptor(int id)
{
ulong address = Address + (ulong)(uint)id * DescriptorSize;
ReadOnlySpan<byte> data = Context.PhysicalMemory.GetSpan(address, DescriptorSize);
return MemoryMarshal.Cast<byte, TextureDescriptor>(data)[0];
return Context.PhysicalMemory.Read<TextureDescriptor>(Address + (ulong)id * DescriptorSize);
}
/// <summary>
@ -107,9 +101,7 @@ namespace Ryujinx.Graphics.Gpu.Image
if (texture != null)
{
ReadOnlySpan<byte> data = Context.PhysicalMemory.GetSpan(address, DescriptorSize);
TextureDescriptor descriptor = MemoryMarshal.Cast<byte, TextureDescriptor>(data)[0];
TextureDescriptor descriptor = Context.PhysicalMemory.Read<TextureDescriptor>(address);
// If the descriptors are the same, the texture is the same,
// we don't need to remove as it was not modified. Just continue.

View file

@ -46,11 +46,11 @@ namespace Ryujinx.Graphics.Gpu.Memory
}
/// <summary>
/// Reads a structure from GPU mapped memory.
/// Reads data from GPU mapped memory.
/// </summary>
/// <typeparam name="T">Type of the structure</typeparam>
/// <param name="gpuVa">GPU virtual address where the structure is located</param>
/// <returns>The structure at the specified memory location</returns>
/// <typeparam name="T">Type of the data</typeparam>
/// <param name="gpuVa">GPU virtual address where the data is located</param>
/// <returns>The data at the specified memory location</returns>
public T Read<T>(ulong gpuVa) where T : unmanaged
{
ulong processVa = _context.MemoryManager.Translate(gpuVa);
@ -67,7 +67,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
{
ulong processVa = _context.MemoryManager.Translate(gpuVa);
return BitConverter.ToInt32(_context.PhysicalMemory.GetSpan(processVa, 4));
return _context.PhysicalMemory.Read<int>(processVa);
}
/// <summary>
@ -79,7 +79,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
{
ulong processVa = _context.MemoryManager.Translate(gpuVa);
return BitConverter.ToUInt64(_context.PhysicalMemory.GetSpan(processVa, 8));
return _context.PhysicalMemory.Read<ulong>(processVa);
}
/// <summary>

View file

@ -1,5 +1,6 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Memory
{
@ -33,6 +34,17 @@ namespace Ryujinx.Graphics.Gpu.Memory
return _cpuMemory.GetSpan(address, size);
}
/// <summary>
/// Reads data from the application process.
/// </summary>
/// <typeparam name="T">Type of the structure</typeparam>
/// <param name="gpuVa">Address to read from</param>
/// <returns>The data at the specified memory location</returns>
public T Read<T>(ulong address) where T : unmanaged
{
return MemoryMarshal.Cast<byte, T>(GetSpan(address, Unsafe.SizeOf<T>()))[0];
}
/// <summary>
/// Writes data to the application process.
/// </summary>

View file

@ -324,6 +324,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
if (!images.TryAdd(imageName, texOp))
{
// Ensure that all texture operations share the same format.
// This avoid errors like mismatched formats.
texOp.Format = images[imageName].Format;
continue;
}

View file

@ -229,7 +229,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
}
else
{
suffix = texOp.Handle.ToString();
suffix = texOp.Handle.ToString("X");
if ((texOp.Type & SamplerType.Indexed) != 0)
{
@ -242,7 +242,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
public static string GetImageName(ShaderStage stage, AstTextureOperation texOp, string indexExpr)
{
string suffix = texOp.Handle.ToString();
string suffix = texOp.Handle.ToString("X");
if ((texOp.Type & SamplerType.Indexed) != 0)
{

View file

@ -879,6 +879,8 @@ namespace Ryujinx.Graphics.Shader.Instructions
if (op.IsBindless)
{
sourcesList.Add(Ra());
flags |= TextureFlags.Bindless;
}
SamplerType type = ConvertSamplerType(op.Dimensions);
@ -1081,6 +1083,24 @@ namespace Ryujinx.Graphics.Shader.Instructions
SamplerType type = ConvertSamplerType(op.Dimensions);
bool hasLod = op.LodMode > TextureLodMode.LodZero;
if (type == SamplerType.Texture1D && (flags & ~TextureFlags.Bindless) == TextureFlags.IntCoords && !(hasLod ||
op.HasDepthCompare ||
op.HasOffset ||
op.IsArray ||
op.IsMultisample))
{
// For bindless, we don't have any way to know the texture type,
// so we assume it's texture buffer when the sampler type is 1D, since that's more common.
bool isTypeBuffer = isBindless || context.Config.GpuAccessor.QueryIsTextureBuffer(op.Immediate);
if (isTypeBuffer)
{
type = SamplerType.TextureBuffer;
}
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
@ -1095,8 +1115,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
type |= SamplerType.Array;
}
bool hasLod = op.LodMode > TextureLodMode.LodZero;
Operand lodValue = hasLod ? Rb() : ConstF(0);
Operand packedOffs = op.HasOffset ? Rb() : null;
@ -1110,7 +1128,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
if ((op.LodMode == TextureLodMode.LodZero ||
op.LodMode == TextureLodMode.LodLevel ||
op.LodMode == TextureLodMode.LodLevelA) && !op.IsMultisample)
op.LodMode == TextureLodMode.LodLevelA) && !op.IsMultisample && type != SamplerType.TextureBuffer)
{
sourcesList.Add(lodValue);
@ -1142,16 +1160,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
type |= SamplerType.Multisample;
}
if (type == SamplerType.Texture1D && flags == TextureFlags.IntCoords && !isBindless)
{
bool isTypeBuffer = context.Config.GpuAccessor.QueryIsTextureBuffer(op.Immediate);
if (isTypeBuffer)
{
type = SamplerType.TextureBuffer;
}
}
Operand[] sources = sourcesList.ToArray();
int rdIndex = op.Rd.Index;

View file

@ -1,3 +1,5 @@
using System;
namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
{
class Operation : INode
@ -78,6 +80,18 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
_sources[index] = source;
}
protected void RemoveSource(int index)
{
SetSource(index, null);
Operand[] newSources = new Operand[_sources.Length - 1];
Array.Copy(_sources, 0, newSources, 0, index);
Array.Copy(_sources, index + 1, newSources, index, _sources.Length - (index + 1));
_sources = newSources;
}
public void TurnIntoCopy(Operand source)
{
TurnInto(Instruction.Copy, source);

View file

@ -26,8 +26,18 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
public void TurnIntoIndexed(int handle)
{
Type |= SamplerType.Indexed;
Flags &= ~TextureFlags.Bindless;
Handle = handle;
}
public void SetHandle(int handle)
{
if ((Flags & TextureFlags.Bindless) != 0)
{
Flags &= ~TextureFlags.Bindless;
RemoveSource(0);
}
Handle = handle;
}

View file

@ -5,7 +5,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
class AstTextureOperation : AstOperation
{
public SamplerType Type { get; }
public TextureFormat Format { get; }
public TextureFormat Format { get; set; }
public TextureFlags Flags { get; }
public int Handle { get; }

View file

@ -1,6 +1,7 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
@ -27,9 +28,17 @@ namespace Ryujinx.Graphics.Shader.Translation
node = RewriteGlobalAccess(node, config);
}
if (operation.Inst == Instruction.TextureSample)
if (operation is TextureOperation texOp)
{
node = RewriteTextureSample(node, config);
if (texOp.Inst == Instruction.TextureSample)
{
node = RewriteTextureSample(node, config);
}
if (texOp.Type == SamplerType.TextureBuffer)
{
node = InsertSnormNormalization(node, config);
}
}
}
}
@ -419,5 +428,57 @@ namespace Ryujinx.Graphics.Shader.Translation
return node;
}
private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
{
TextureOperation texOp = (TextureOperation)node.Value;
TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle);
int maxPositive = format switch
{
TextureFormat.R8Snorm => sbyte.MaxValue,
TextureFormat.R8G8Snorm => sbyte.MaxValue,
TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
TextureFormat.R16Snorm => short.MaxValue,
TextureFormat.R16G16Snorm => short.MaxValue,
TextureFormat.R16G16B16A16Snorm => short.MaxValue,
_ => 0
};
// The value being 0 means that the format is not a SNORM format, so there's nothing to do here.
if (maxPositive == 0)
{
return node;
}
// Do normalization. We assume SINT formats are being used as replacement for SNORM (that is not supported).
INode[] uses = texOp.Dest.UseOps.ToArray();
Operation convOp = new Operation(Instruction.ConvertS32ToFP, Local(), texOp.Dest);
Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
node = node.List.AddAfter(node, convOp);
node = node.List.AddAfter(node, normOp);
foreach (INode useOp in uses)
{
if (!(useOp is Operation op))
{
continue;
}
// Replace all uses of the texture pixel value with the normalized value.
for (int index = 0; index < op.SourcesCount; index++)
{
if (op.GetSource(index) == texOp.Dest)
{
op.SetSource(index, normOp.Dest);
}
}
}
return node;
}
}
}

View file

@ -0,0 +1,50 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
class BindlessElimination
{
public static void RunPass(BasicBlock block)
{
// We can turn a bindless into regular access by recognizing the pattern
// produced by the compiler for separate texture and sampler.
// We check for the following conditions:
// - The handle is the result of a bitwise OR logical operation.
// - Both sources of the OR operation comes from CB2 (used by NVN to hold texture handles).
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
{
if (!(node.Value is TextureOperation texOp))
{
continue;
}
if ((texOp.Flags & TextureFlags.Bindless) == 0)
{
continue;
}
if (!(texOp.GetSource(0).AsgOp is Operation handleCombineOp))
{
continue;
}
if (handleCombineOp.Inst != Instruction.BitwiseOr)
{
continue;
}
Operand src0 = handleCombineOp.GetSource(0);
Operand src1 = handleCombineOp.GetSource(1);
if (src0.Type != OperandType.ConstantBuffer || src0.GetCbufSlot() != 2 ||
src1.Type != OperandType.ConstantBuffer || src1.GetCbufSlot() != 2)
{
continue;
}
texOp.SetHandle(src0.GetCbufOffset() | (src1.GetCbufOffset() << 16));
}
}
}
}

View file

@ -1,4 +1,5 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
@ -88,6 +89,22 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{
BindlessToIndexed.RunPass(blocks[blkIndex]);
BindlessElimination.RunPass(blocks[blkIndex]);
// Try to eliminate any operations that are now unused.
LinkedListNode<INode> node = blocks[blkIndex].Operations.First;
while (node != null)
{
LinkedListNode<INode> nextNode = node.Next;
if (IsUnused(node.Value))
{
RemoveNode(blocks[blkIndex], node);
}
node = nextNode;
}
}
}