GPU: Pass SpanOrArray for Texture SetData to avoid copy (#3745)

* GPU: Pass SpanOrArray for Texture SetData to avoid copy

Texture data is often converted before upload, meaning that an array was allocated to perform the conversion into. However, the backend SetData methods were being passed a Span of that data, and the Multithreaded layer does `ToArray()` on it so that it can be stored for later! This method can't extract the original array, so it creates a copy.

This PR changes the type passed for textures to a new ref struct called SpanOrArray, which is backed by either a ReadOnlySpan or an array. The benefit here is that we can have a ToArray method that doesn't copy if it is originally backed by an array.

This will also avoid a copy when running the ASTC decoder.

On NieR this was taking 38% of texture upload time, which it does a _lot_ of when you move between areas, so there should be a 1.6x performance boost when strictly uploading textures. No doubt this will also improve texture streaming performance in UE4 games, and maybe a small reduction with video playback.

From the numbers, it's probably possible to improve the upload rate by a further 1.6x by performing layout conversion on GPU. I'm not sure if we could improve it further than that - multithreading conversion on CPU would probably result in memory bottleneck.

This doesn't extend to buffers, since we don't convert their data on the GPU emulator side.

* Remove implicit cast to array.
This commit is contained in:
riperiperi 2022-10-08 16:04:47 +01:00 committed by GitHub
parent 1ca0517c99
commit bf77d1cab9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 164 additions and 56 deletions

View file

@ -0,0 +1,89 @@
using System;
namespace Ryujinx.Common.Memory
{
/// <summary>
/// A struct that can represent both a Span and Array.
/// This is useful to keep the Array representation when possible to avoid copies.
/// </summary>
/// <typeparam name="T">Element Type</typeparam>
public ref struct SpanOrArray<T> where T : unmanaged
{
public readonly T[] Array;
public readonly ReadOnlySpan<T> Span;
/// <summary>
/// Create a new SpanOrArray from an array.
/// </summary>
/// <param name="array">Array to store</param>
public SpanOrArray(T[] array)
{
Array = array;
Span = ReadOnlySpan<T>.Empty;
}
/// <summary>
/// Create a new SpanOrArray from a readonly span.
/// </summary>
/// <param name="array">Span to store</param>
public SpanOrArray(ReadOnlySpan<T> span)
{
Array = null;
Span = span;
}
/// <summary>
/// Return the contained array, or convert the span if necessary.
/// </summary>
/// <returns>An array containing the data</returns>
public T[] ToArray()
{
return Array ?? Span.ToArray();
}
/// <summary>
/// Return a ReadOnlySpan from either the array or ReadOnlySpan.
/// </summary>
/// <returns>A ReadOnlySpan containing the data</returns>
public ReadOnlySpan<T> AsSpan()
{
return Array ?? Span;
}
/// <summary>
/// Cast an array to a SpanOrArray.
/// </summary>
/// <param name="array">Source array</param>
public static implicit operator SpanOrArray<T>(T[] array)
{
return new SpanOrArray<T>(array);
}
/// <summary>
/// Cast a ReadOnlySpan to a SpanOrArray.
/// </summary>
/// <param name="span">Source ReadOnlySpan</param>
public static implicit operator SpanOrArray<T>(ReadOnlySpan<T> span)
{
return new SpanOrArray<T>(span);
}
/// <summary>
/// Cast a Span to a SpanOrArray.
/// </summary>
/// <param name="span">Source Span</param>
public static implicit operator SpanOrArray<T>(Span<T> span)
{
return new SpanOrArray<T>(span);
}
/// <summary>
/// Cast a SpanOrArray to a ReadOnlySpan
/// </summary>
/// <param name="spanOrArray">Source SpanOrArray</param>
public static implicit operator ReadOnlySpan<T>(SpanOrArray<T> spanOrArray)
{
return spanOrArray.AsSpan();
}
}
}

View file

@ -1,3 +1,4 @@
using Ryujinx.Common.Memory;
using System;
namespace Ryujinx.Graphics.GAL
@ -17,9 +18,9 @@ namespace Ryujinx.Graphics.GAL
ReadOnlySpan<byte> GetData();
ReadOnlySpan<byte> GetData(int layer, int level);
void SetData(ReadOnlySpan<byte> data);
void SetData(ReadOnlySpan<byte> data, int layer, int level);
void SetData(ReadOnlySpan<byte> data, int layer, int level, Rectangle<int> region);
void SetData(SpanOrArray<byte> data);
void SetData(SpanOrArray<byte> data, int layer, int level);
void SetData(SpanOrArray<byte> data, int layer, int level, Rectangle<int> region);
void SetStorage(BufferRange buffer);
void Release();
}

View file

@ -1,4 +1,5 @@
using Ryujinx.Graphics.GAL.Multithreading.Commands.Texture;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.GAL.Multithreading.Commands.Texture;
using Ryujinx.Graphics.GAL.Multithreading.Model;
using System;
@ -107,19 +108,19 @@ namespace Ryujinx.Graphics.GAL.Multithreading.Resources
}
}
public void SetData(ReadOnlySpan<byte> data)
public void SetData(SpanOrArray<byte> data)
{
_renderer.New<TextureSetDataCommand>().Set(Ref(this), Ref(data.ToArray()));
_renderer.QueueCommand();
}
public void SetData(ReadOnlySpan<byte> data, int layer, int level)
public void SetData(SpanOrArray<byte> data, int layer, int level)
{
_renderer.New<TextureSetDataSliceCommand>().Set(Ref(this), Ref(data.ToArray()), layer, level);
_renderer.QueueCommand();
}
public void SetData(ReadOnlySpan<byte> data, int layer, int level, Rectangle<int> region)
public void SetData(SpanOrArray<byte> data, int layer, int level, Rectangle<int> region)
{
_renderer.New<TextureSetDataSliceRegionCommand>().Set(Ref(this), Ref(data.ToArray()), layer, level, region);
_renderer.QueueCommand();

View file

@ -229,7 +229,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
if (target != null)
{
ReadOnlySpan<byte> data;
byte[] data;
if (srcLinear)
{
data = LayoutConverter.ConvertLinearStridedToLinear(

View file

@ -1,5 +1,6 @@
using Ryujinx.Common;
using Ryujinx.Common.Logging;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Memory;
using Ryujinx.Graphics.Texture;
@ -720,9 +721,9 @@ namespace Ryujinx.Graphics.Gpu.Image
}
}
data = ConvertToHostCompatibleFormat(data);
SpanOrArray<byte> result = ConvertToHostCompatibleFormat(data);
HostTexture.SetData(data);
HostTexture.SetData(result);
_hasData = true;
}
@ -731,7 +732,7 @@ namespace Ryujinx.Graphics.Gpu.Image
/// Uploads new texture data to the host GPU.
/// </summary>
/// <param name="data">New data</param>
public void SetData(ReadOnlySpan<byte> data)
public void SetData(SpanOrArray<byte> data)
{
BlacklistScale();
@ -750,7 +751,7 @@ namespace Ryujinx.Graphics.Gpu.Image
/// <param name="data">New data</param>
/// <param name="layer">Target layer</param>
/// <param name="level">Target level</param>
public void SetData(ReadOnlySpan<byte> data, int layer, int level)
public void SetData(SpanOrArray<byte> data, int layer, int level)
{
BlacklistScale();
@ -786,7 +787,7 @@ namespace Ryujinx.Graphics.Gpu.Image
/// <param name="level">Mip level to convert</param>
/// <param name="single">True to convert a single slice</param>
/// <returns>Converted data</returns>
public ReadOnlySpan<byte> ConvertToHostCompatibleFormat(ReadOnlySpan<byte> data, int level = 0, bool single = false)
public SpanOrArray<byte> ConvertToHostCompatibleFormat(ReadOnlySpan<byte> data, int level = 0, bool single = false)
{
int width = Info.Width;
int height = Info.Height;
@ -799,9 +800,11 @@ namespace Ryujinx.Graphics.Gpu.Image
height = Math.Max(height >> level, 1);
depth = Math.Max(depth >> level, 1);
SpanOrArray<byte> result;
if (Info.IsLinear)
{
data = LayoutConverter.ConvertLinearStridedToLinear(
result = LayoutConverter.ConvertLinearStridedToLinear(
width,
height,
Info.FormatInfo.BlockWidth,
@ -813,7 +816,7 @@ namespace Ryujinx.Graphics.Gpu.Image
}
else
{
data = LayoutConverter.ConvertBlockLinearToLinear(
result = LayoutConverter.ConvertBlockLinearToLinear(
width,
height,
depth,
@ -836,7 +839,7 @@ namespace Ryujinx.Graphics.Gpu.Image
if (!_context.Capabilities.SupportsAstcCompression && Format.IsAstc())
{
if (!AstcDecoder.TryDecodeToRgba8P(
data.ToArray(),
result.ToArray(),
Info.FormatInfo.BlockWidth,
Info.FormatInfo.BlockHeight,
width,
@ -856,11 +859,11 @@ namespace Ryujinx.Graphics.Gpu.Image
decoded = BCnEncoder.EncodeBC7(decoded, width, height, depth, levels, layers);
}
data = decoded;
result = decoded;
}
else if (!_context.Capabilities.SupportsR4G4Format && Format == Format.R4G4Unorm)
{
data = PixelConverter.ConvertR4G4ToR4G4B4A4(data);
result = PixelConverter.ConvertR4G4ToR4G4B4A4(result);
}
else if (!TextureCompatibility.HostSupportsBcFormat(Format, Target, _context.Capabilities))
{
@ -868,36 +871,36 @@ namespace Ryujinx.Graphics.Gpu.Image
{
case Format.Bc1RgbaSrgb:
case Format.Bc1RgbaUnorm:
data = BCnDecoder.DecodeBC1(data, width, height, depth, levels, layers);
result = BCnDecoder.DecodeBC1(result, width, height, depth, levels, layers);
break;
case Format.Bc2Srgb:
case Format.Bc2Unorm:
data = BCnDecoder.DecodeBC2(data, width, height, depth, levels, layers);
result = BCnDecoder.DecodeBC2(result, width, height, depth, levels, layers);
break;
case Format.Bc3Srgb:
case Format.Bc3Unorm:
data = BCnDecoder.DecodeBC3(data, width, height, depth, levels, layers);
result = BCnDecoder.DecodeBC3(result, width, height, depth, levels, layers);
break;
case Format.Bc4Snorm:
case Format.Bc4Unorm:
data = BCnDecoder.DecodeBC4(data, width, height, depth, levels, layers, Format == Format.Bc4Snorm);
result = BCnDecoder.DecodeBC4(result, width, height, depth, levels, layers, Format == Format.Bc4Snorm);
break;
case Format.Bc5Snorm:
case Format.Bc5Unorm:
data = BCnDecoder.DecodeBC5(data, width, height, depth, levels, layers, Format == Format.Bc5Snorm);
result = BCnDecoder.DecodeBC5(result, width, height, depth, levels, layers, Format == Format.Bc5Snorm);
break;
case Format.Bc6HSfloat:
case Format.Bc6HUfloat:
data = BCnDecoder.DecodeBC6(data, width, height, depth, levels, layers, Format == Format.Bc6HSfloat);
result = BCnDecoder.DecodeBC6(result, width, height, depth, levels, layers, Format == Format.Bc6HSfloat);
break;
case Format.Bc7Srgb:
case Format.Bc7Unorm:
data = BCnDecoder.DecodeBC7(data, width, height, depth, levels, layers);
result = BCnDecoder.DecodeBC7(result, width, height, depth, levels, layers);
break;
}
}
return data;
return result;
}
/// <summary>

View file

@ -1,4 +1,5 @@
using Ryujinx.Cpu.Tracking;
using Ryujinx.Common.Memory;
using Ryujinx.Cpu.Tracking;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Memory;
using Ryujinx.Graphics.Texture;
@ -348,9 +349,9 @@ namespace Ryujinx.Graphics.Gpu.Image
ReadOnlySpan<byte> data = _physicalMemory.GetSpan(Storage.Range.GetSlice((ulong)offset, (ulong)size));
data = Storage.ConvertToHostCompatibleFormat(data, info.BaseLevel, true);
SpanOrArray<byte> result = Storage.ConvertToHostCompatibleFormat(data, info.BaseLevel, true);
Storage.SetData(data, info.BaseLayer, info.BaseLevel);
Storage.SetData(result, info.BaseLayer, info.BaseLevel);
offsetIndex++;
}

View file

@ -1,4 +1,5 @@
using OpenTK.Graphics.OpenGL;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.GAL;
using System;
@ -48,17 +49,19 @@ namespace Ryujinx.Graphics.OpenGL.Image
return GetData();
}
public void SetData(ReadOnlySpan<byte> data)
public void SetData(SpanOrArray<byte> data)
{
Buffer.SetData(_buffer, _bufferOffset, data.Slice(0, Math.Min(data.Length, _bufferSize)));
var dataSpan = data.AsSpan();
Buffer.SetData(_buffer, _bufferOffset, dataSpan.Slice(0, Math.Min(dataSpan.Length, _bufferSize)));
}
public void SetData(ReadOnlySpan<byte> data, int layer, int level)
public void SetData(SpanOrArray<byte> data, int layer, int level)
{
throw new NotSupportedException();
}
public void SetData(ReadOnlySpan<byte> data, int layer, int level, Rectangle<int> region)
public void SetData(SpanOrArray<byte> data, int layer, int level, Rectangle<int> region)
{
throw new NotSupportedException();
}

View file

@ -1,5 +1,6 @@
using OpenTK.Graphics.OpenGL;
using Ryujinx.Common;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.GAL;
using System;
@ -317,32 +318,36 @@ namespace Ryujinx.Graphics.OpenGL.Image
}
}
public void SetData(ReadOnlySpan<byte> data)
public void SetData(SpanOrArray<byte> data)
{
var dataSpan = data.AsSpan();
if (Format == Format.S8UintD24Unorm)
{
data = FormatConverter.ConvertS8D24ToD24S8(data);
dataSpan = FormatConverter.ConvertS8D24ToD24S8(dataSpan);
}
unsafe
{
fixed (byte* ptr = data)
fixed (byte* ptr = dataSpan)
{
ReadFrom((IntPtr)ptr, data.Length);
ReadFrom((IntPtr)ptr, dataSpan.Length);
}
}
}
public void SetData(ReadOnlySpan<byte> data, int layer, int level)
public void SetData(SpanOrArray<byte> data, int layer, int level)
{
var dataSpan = data.AsSpan();
if (Format == Format.S8UintD24Unorm)
{
data = FormatConverter.ConvertS8D24ToD24S8(data);
dataSpan = FormatConverter.ConvertS8D24ToD24S8(dataSpan);
}
unsafe
{
fixed (byte* ptr = data)
fixed (byte* ptr = dataSpan)
{
int width = Math.Max(Info.Width >> level, 1);
int height = Math.Max(Info.Height >> level, 1);
@ -352,11 +357,13 @@ namespace Ryujinx.Graphics.OpenGL.Image
}
}
public void SetData(ReadOnlySpan<byte> data, int layer, int level, Rectangle<int> region)
public void SetData(SpanOrArray<byte> data, int layer, int level, Rectangle<int> region)
{
var dataSpan = data.AsSpan();
if (Format == Format.S8UintD24Unorm)
{
data = FormatConverter.ConvertS8D24ToD24S8(data);
dataSpan = FormatConverter.ConvertS8D24ToD24S8(dataSpan);
}
int wInBlocks = BitUtils.DivRoundUp(region.Width, Info.BlockWidth);
@ -364,7 +371,7 @@ namespace Ryujinx.Graphics.OpenGL.Image
unsafe
{
fixed (byte* ptr = data)
fixed (byte* ptr = dataSpan)
{
ReadFrom2D(
(IntPtr)ptr,

View file

@ -93,7 +93,7 @@ namespace Ryujinx.Graphics.Texture
};
}
public static Span<byte> ConvertBlockLinearToLinear(
public static byte[] ConvertBlockLinearToLinear(
int width,
int height,
int depth,
@ -119,7 +119,7 @@ namespace Ryujinx.Graphics.Texture
blockHeight,
bytesPerPixel);
Span<byte> output = new byte[outSize];
byte[] output = new byte[outSize];
int outOffs = 0;
@ -246,7 +246,7 @@ namespace Ryujinx.Graphics.Texture
return output;
}
public static Span<byte> ConvertLinearStridedToLinear(
public static byte[] ConvertLinearStridedToLinear(
int width,
int height,
int blockWidth,
@ -262,14 +262,15 @@ namespace Ryujinx.Graphics.Texture
int outStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
lineSize = Math.Min(lineSize, outStride);
Span<byte> output = new byte[h * outStride];
byte[] output = new byte[h * outStride];
Span<byte> outSpan = output;
int outOffs = 0;
int inOffs = 0;
for (int y = 0; y < h; y++)
{
data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize));
data.Slice(inOffs, lineSize).CopyTo(outSpan.Slice(outOffs, lineSize));
inOffs += stride;
outOffs += outStride;

View file

@ -1,4 +1,5 @@
using Ryujinx.Graphics.GAL;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.GAL;
using Silk.NET.Vulkan;
using System;
using System.Collections.Generic;
@ -90,17 +91,17 @@ namespace Ryujinx.Graphics.Vulkan
_bufferView = null;
}
public void SetData(ReadOnlySpan<byte> data)
public void SetData(SpanOrArray<byte> data)
{
_gd.SetBufferData(_bufferHandle, _offset, data);
}
public void SetData(ReadOnlySpan<byte> data, int layer, int level)
public void SetData(SpanOrArray<byte> data, int layer, int level)
{
throw new NotSupportedException();
}
public void SetData(ReadOnlySpan<byte> data, int layer, int level, Rectangle<int> region)
public void SetData(SpanOrArray<byte> data, int layer, int level, Rectangle<int> region)
{
throw new NotSupportedException();
}

View file

@ -1,4 +1,5 @@
using Ryujinx.Graphics.GAL;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.GAL;
using Silk.NET.Vulkan;
using System;
using System.Collections.Generic;
@ -873,17 +874,17 @@ namespace Ryujinx.Graphics.Vulkan
return GetDataFromBuffer(result, size, result);
}
public void SetData(ReadOnlySpan<byte> data)
public void SetData(SpanOrArray<byte> data)
{
SetData(data, 0, 0, Info.GetLayers(), Info.Levels, singleSlice: false);
}
public void SetData(ReadOnlySpan<byte> data, int layer, int level)
public void SetData(SpanOrArray<byte> data, int layer, int level)
{
SetData(data, layer, level, 1, 1, singleSlice: true);
}
public void SetData(ReadOnlySpan<byte> data, int layer, int level, Rectangle<int> region)
public void SetData(SpanOrArray<byte> data, int layer, int level, Rectangle<int> region)
{
SetData(data, layer, level, 1, 1, singleSlice: true, region);
}