From c9c65af59edea05e7206a076cb818128c004384e Mon Sep 17 00:00:00 2001 From: riperiperi Date: Tue, 22 Feb 2022 14:21:29 +0000 Subject: [PATCH] Perform unscaled 2d engine copy on CPU if source texture isn't in cache. (#3112) * Initial implementation of fast 2d copy TODO: Partial copy for mismatching region/size. * WIP * Cleanup * Update Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs Co-authored-by: gdkchan Co-authored-by: gdkchan --- Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs | 197 +++++++++++++++++- Ryujinx.Graphics.Gpu/GraphicsConfig.cs | 8 + Ryujinx.Graphics.Gpu/Image/TextureCache.cs | 10 + .../Image/TextureSearchFlags.cs | 3 +- Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs | 9 +- 5 files changed, 220 insertions(+), 7 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs b/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs index be7065563..d2b6bec37 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs @@ -1,11 +1,15 @@ -using Ryujinx.Graphics.Device; +using Ryujinx.Common; +using Ryujinx.Graphics.Device; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Types; using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Texture; +using Ryujinx.Memory; using System; using System.Collections.Generic; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; namespace Ryujinx.Graphics.Gpu.Engine.Twod { @@ -44,6 +48,180 @@ namespace Ryujinx.Graphics.Gpu.Engine.Twod /// Data to be written public void Write(int offset, int data) => _state.Write(offset, data); + /// + /// Determines if data is compatible between the source and destination texture. + /// The two textures must have the same size, layout, and bytes per pixel. + /// + /// Info for the first texture + /// Info for the second texture + /// Format of the first texture + /// Format of the second texture + /// True if the data is compatible, false otherwise + private bool IsDataCompatible(TwodTexture lhs, TwodTexture rhs, FormatInfo lhsFormat, FormatInfo rhsFormat) + { + if (lhsFormat.BytesPerPixel != rhsFormat.BytesPerPixel || + lhs.Height != rhs.Height || + lhs.Depth != rhs.Depth || + lhs.LinearLayout != rhs.LinearLayout || + lhs.MemoryLayout.Packed != rhs.MemoryLayout.Packed) + { + return false; + } + + if (lhs.LinearLayout) + { + return lhs.Stride == rhs.Stride; + } + else + { + return lhs.Width == rhs.Width; + } + } + + /// + /// Determine if the given region covers the full texture, also considering width alignment. + /// + /// The texture to check + /// + /// Region start x + /// Region start y + /// Region end x + /// Region end y + /// True if the region covers the full texture, false otherwise + private bool IsCopyRegionComplete(TwodTexture texture, FormatInfo formatInfo, int x1, int y1, int x2, int y2) + { + if (x1 != 0 || y1 != 0 || y2 != texture.Height) + { + return false; + } + + int width; + int widthAlignment; + + if (texture.LinearLayout) + { + widthAlignment = 1; + width = texture.Stride / formatInfo.BytesPerPixel; + } + else + { + widthAlignment = Constants.GobAlignment / formatInfo.BytesPerPixel; + width = texture.Width; + } + + return width == BitUtils.AlignUp(x2, widthAlignment); + } + + /// + /// Performs a full data copy between two textures, reading and writing guest memory directly. + /// The textures must have a matching layout, size, and bytes per pixel. + /// + /// The source texture + /// The destination texture + /// Copy width + /// Copy height + /// Bytes per pixel + private void UnscaledFullCopy(TwodTexture src, TwodTexture dst, int w, int h, int bpp) + { + var srcCalculator = new OffsetCalculator( + w, + h, + src.Stride, + src.LinearLayout, + src.MemoryLayout.UnpackGobBlocksInY(), + src.MemoryLayout.UnpackGobBlocksInZ(), + bpp); + + (int _, int srcSize) = srcCalculator.GetRectangleRange(0, 0, w, h); + + var memoryManager = _channel.MemoryManager; + + ulong srcGpuVa = src.Address.Pack(); + ulong dstGpuVa = dst.Address.Pack(); + + ReadOnlySpan srcSpan = memoryManager.GetSpan(srcGpuVa, srcSize, true); + + int width; + int height = src.Height; + if (src.LinearLayout) + { + width = src.Stride / bpp; + } + else + { + width = src.Width; + } + + // If the copy is not equal to the width and height of the texture, we will need to copy partially. + // It's worth noting that it has already been established that the src and dst are the same size. + + if (w == width && h == height) + { + memoryManager.Write(dstGpuVa, srcSpan); + } + else + { + using WritableRegion dstRegion = memoryManager.GetWritableRegion(dstGpuVa, srcSize, true); + Span dstSpan = dstRegion.Memory.Span; + + if (src.LinearLayout) + { + int stride = src.Stride; + int offset = 0; + int lineSize = width * bpp; + + for (int y = 0; y < height; y++) + { + srcSpan.Slice(offset, lineSize).CopyTo(dstSpan.Slice(offset)); + + offset += stride; + } + } + else + { + // Copy with the block linear layout in mind. + // Recreate the offset calculate with bpp 1 for copy. + + int stride = w * bpp; + + srcCalculator = new OffsetCalculator( + stride, + h, + 0, + false, + src.MemoryLayout.UnpackGobBlocksInY(), + src.MemoryLayout.UnpackGobBlocksInZ(), + 1); + + int strideTrunc = BitUtils.AlignDown(stride, 16); + + ReadOnlySpan> srcVec = MemoryMarshal.Cast>(srcSpan); + Span> dstVec = MemoryMarshal.Cast>(dstSpan); + + for (int y = 0; y < h; y++) + { + int x = 0; + + srcCalculator.SetY(y); + + for (; x < strideTrunc; x += 16) + { + int offset = srcCalculator.GetOffset(x) >> 4; + + dstVec[offset] = srcVec[offset]; + } + + for (; x < stride; x++) + { + int offset = srcCalculator.GetOffset(x); + + dstSpan[offset] = srcSpan[offset]; + } + } + } + } + } + /// /// Performs the blit operation, triggered by the register write. /// @@ -114,16 +292,31 @@ namespace Ryujinx.Graphics.Gpu.Engine.Twod srcX1 = 0; } + FormatInfo dstCopyTextureFormat = dstCopyTexture.Format.Convert(); + + bool canDirectCopy = GraphicsConfig.Fast2DCopy && + srcX2 == dstX2 && srcY2 == dstY2 && + IsDataCompatible(srcCopyTexture, dstCopyTexture, srcCopyTextureFormat, dstCopyTextureFormat) && + IsCopyRegionComplete(srcCopyTexture, srcCopyTextureFormat, srcX1, srcY1, srcX2, srcY2) && + IsCopyRegionComplete(dstCopyTexture, dstCopyTextureFormat, dstX1, dstY1, dstX2, dstY2); + var srcTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture( memoryManager, srcCopyTexture, offset, srcCopyTextureFormat, + !canDirectCopy, false, srcHint); if (srcTexture == null) { + if (canDirectCopy) + { + // Directly copy the data on CPU. + UnscaledFullCopy(srcCopyTexture, dstCopyTexture, srcX2, srcY2, srcCopyTextureFormat.BytesPerPixel); + } + return; } @@ -132,7 +325,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Twod // When the source texture that was found has a depth format, // we must enforce the target texture also has a depth format, // as copies between depth and color formats are not allowed. - FormatInfo dstCopyTextureFormat; if (srcTexture.Format.IsDepthOrStencil()) { @@ -148,6 +340,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Twod dstCopyTexture, 0, dstCopyTextureFormat, + true, srcTexture.ScaleMode == TextureScaleMode.Scaled, dstHint); diff --git a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs index d58b8da79..493dbd7bd 100644 --- a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs +++ b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs @@ -28,6 +28,14 @@ namespace Ryujinx.Graphics.Gpu /// public static bool FastGpuTime = true; + /// + /// Enables or disables fast 2d engine texture copies entirely on CPU when possible. + /// Reduces stuttering and # of textures in games that copy textures around for streaming, + /// as textures will not need to be created for the copy, and the data does not need to be + /// flushed from GPU. + /// + public static bool Fast2DCopy = true; + /// /// Enables or disables the Just-in-Time compiler for GPU Macro code. /// diff --git a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs index fed89770a..203a3a125 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs @@ -194,6 +194,7 @@ namespace Ryujinx.Graphics.Gpu.Image TwodTexture copyTexture, ulong offset, FormatInfo formatInfo, + bool shouldCreate, bool preferScaling = true, Size? sizeHint = null) { @@ -234,6 +235,11 @@ namespace Ryujinx.Graphics.Gpu.Image flags |= TextureSearchFlags.WithUpscale; } + if (!shouldCreate) + { + flags |= TextureSearchFlags.NoCreate; + } + Texture texture = FindOrCreateTexture(memoryManager, flags, info, 0, sizeHint); texture?.SynchronizeMemory(); @@ -480,6 +486,10 @@ namespace Ryujinx.Graphics.Gpu.Image return texture; } + else if (flags.HasFlag(TextureSearchFlags.NoCreate)) + { + return null; + } // Calculate texture sizes, used to find all overlapping textures. SizeInfo sizeInfo = info.CalculateSizeInfo(layerSize); diff --git a/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs b/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs index 45e55c02d..aea7b167e 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs @@ -12,6 +12,7 @@ namespace Ryujinx.Graphics.Gpu.Image Strict = 1 << 0, ForSampler = 1 << 1, ForCopy = 1 << 2, - WithUpscale = 1 << 3 + WithUpscale = 1 << 3, + NoCreate = 1 << 4 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index 3968cb96e..b6395e73f 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -154,14 +154,15 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Gets a writable region from GPU mapped memory. /// - /// Start address of the range + /// Start address of the range /// Size in bytes to be range + /// True if write tracking is triggered on the span /// A writable region with the data at the specified memory location - public WritableRegion GetWritableRegion(ulong va, int size) + public WritableRegion GetWritableRegion(ulong va, int size, bool tracked = false) { if (IsContiguous(va, size)) { - return Physical.GetWritableRegion(Translate(va), size); + return Physical.GetWritableRegion(Translate(va), size, tracked); } else { @@ -169,7 +170,7 @@ namespace Ryujinx.Graphics.Gpu.Memory GetSpan(va, size).CopyTo(memory.Span); - return new WritableRegion(this, va, memory); + return new WritableRegion(this, va, memory, tracked); } }