using Ryujinx.Common; using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Shader.Cache; using Ryujinx.Graphics.Gpu.Shader.Cache.Definition; using Ryujinx.Graphics.Gpu.State; using Ryujinx.Graphics.Shader; using Ryujinx.Graphics.Shader.Translation; using System; using System.Collections.Generic; using System.Diagnostics; using System.Threading; namespace Ryujinx.Graphics.Gpu.Shader { /// /// Memory cache of shader code. /// class ShaderCache : IDisposable { private const TranslationFlags DefaultFlags = TranslationFlags.DebugMode; private readonly GpuContext _context; private readonly ShaderDumper _dumper; private readonly Dictionary> _cpPrograms; private readonly Dictionary> _gpPrograms; private CacheManager _cacheManager; private Dictionary _gpProgramsDiskCache; private Dictionary _cpProgramsDiskCache; /// /// Version of the codegen (to be changed when codegen or guest format change). /// private const ulong ShaderCodeGenVersion = 2088; // Progress reporting helpers private int _shaderCount; private readonly AutoResetEvent _progressReportEvent; public event Action ShaderCacheStateChanged; public event Action ShaderCacheProgressChanged; /// /// Creates a new instance of the shader cache. /// /// GPU context that the shader cache belongs to public ShaderCache(GpuContext context) { _context = context; _dumper = new ShaderDumper(); _cpPrograms = new Dictionary>(); _gpPrograms = new Dictionary>(); _gpProgramsDiskCache = new Dictionary(); _cpProgramsDiskCache = new Dictionary(); _progressReportEvent = new AutoResetEvent(false); } /// /// Initialize the cache. /// internal void Initialize() { if (GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null) { _cacheManager = new CacheManager(CacheGraphicsApi.OpenGL, CacheHashType.XxHash128, "glsl", GraphicsConfig.TitleId, ShaderCodeGenVersion); bool isReadOnly = _cacheManager.IsReadOnly; HashSet invalidEntries = null; if (isReadOnly) { Logger.Warning?.Print(LogClass.Gpu, "Loading shader cache in read-only mode (cache in use by another program!)"); } else { invalidEntries = new HashSet(); } ReadOnlySpan guestProgramList = _cacheManager.GetGuestProgramList(); _progressReportEvent.Reset(); _shaderCount = 0; ShaderCacheStateChanged?.Invoke(true); ThreadPool.QueueUserWorkItem(ProgressLogger, guestProgramList.Length); for (int programIndex = 0; programIndex < guestProgramList.Length; programIndex++) { Hash128 key = guestProgramList[programIndex]; byte[] hostProgramBinary = _cacheManager.GetHostProgramByHash(ref key); bool hasHostCache = hostProgramBinary != null; IProgram hostProgram = null; // If the program sources aren't in the cache, compile from saved guest program. byte[] guestProgram = _cacheManager.GetGuestProgramByHash(ref key); if (guestProgram == null) { Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)"); // Should not happen, but if someone messed with the cache it's better to catch it. invalidEntries?.Add(key); continue; } ReadOnlySpan guestProgramReadOnlySpan = guestProgram; ReadOnlySpan cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute) { Debug.Assert(cachedShaderEntries.Length == 1); GuestShaderCacheEntry entry = cachedShaderEntries[0]; HostShaderCacheEntry[] hostShaderEntries = null; // Try loading host shader binary. if (hasHostCache) { hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); hostProgramBinary = hostProgramBinarySpan.ToArray(); hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); } bool isHostProgramValid = hostProgram != null; ShaderProgram program; ShaderProgramInfo shaderProgramInfo; // Reconstruct code holder. if (isHostProgramValid) { program = new ShaderProgram(entry.Header.Stage, ""); shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo(); } else { IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); program = Translator.CreateContext(0, gpuAccessor, DefaultFlags | TranslationFlags.Compute).Translate(out shaderProgramInfo); } ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. if (hostProgram == null) { Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); // Compile shader and create program as the shader program binary got invalidated. shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code); hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null); // As the host program was invalidated, save the new entry in the cache. hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader }); if (!isReadOnly) { if (hasHostCache) { _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); } else { Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); _cacheManager.AddHostProgram(ref key, hostProgramBinary); } } } _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); } else { Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages); ShaderCodeHolder[] shaders = new ShaderCodeHolder[cachedShaderEntries.Length]; List shaderPrograms = new List(); TransformFeedbackDescriptor[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); TranslationFlags flags = DefaultFlags; if (tfd != null) { flags |= TranslationFlags.Feedback; } TranslationCounts counts = new TranslationCounts(); HostShaderCacheEntry[] hostShaderEntries = null; // Try loading host shader binary. if (hasHostCache) { hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); hostProgramBinary = hostProgramBinarySpan.ToArray(); hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); } bool isHostProgramValid = hostProgram != null; // Reconstruct code holder. for (int i = 0; i < cachedShaderEntries.Length; i++) { GuestShaderCacheEntry entry = cachedShaderEntries[i]; if (entry == null) { continue; } ShaderProgram program; if (entry.Header.SizeA != 0) { ShaderProgramInfo shaderProgramInfo; if (isHostProgramValid) { program = new ShaderProgram(entry.Header.Stage, ""); shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); } else { IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); TranslatorContext translatorContext = Translator.CreateContext(0, gpuAccessor, flags, counts); TranslatorContext translatorContext2 = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, flags | TranslationFlags.VertexA, counts); program = translatorContext.Translate(out shaderProgramInfo, translatorContext2); } // NOTE: Vertex B comes first in the shader cache. byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size).ToArray(); byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray(); shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2); } else { ShaderProgramInfo shaderProgramInfo; if (isHostProgramValid) { program = new ShaderProgram(entry.Header.Stage, ""); shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); } else { IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); program = Translator.CreateContext(0, gpuAccessor, flags, counts).Translate(out shaderProgramInfo); } shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); } shaderPrograms.Add(program); } // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. if (!isHostProgramValid) { Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); List hostShaders = new List(); // Compile shaders and create program as the shader program binary got invalidated. for (int stage = 0; stage < Constants.ShaderStages; stage++) { ShaderProgram program = shaders[stage]?.Program; if (program == null) { continue; } IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code); shaders[stage].HostShader = hostShader; hostShaders.Add(hostShader); } hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd); // As the host program was invalidated, save the new entry in the cache. hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders); if (!isReadOnly) { if (hasHostCache) { _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); } else { Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); _cacheManager.AddHostProgram(ref key, hostProgramBinary); } } } _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders)); } _shaderCount = programIndex; } if (!isReadOnly) { // Remove entries that are broken in the cache _cacheManager.RemoveManifestEntries(invalidEntries); _cacheManager.FlushToArchive(); _cacheManager.Synchronize(); } _progressReportEvent.Set(); ShaderCacheStateChanged?.Invoke(false); Logger.Info?.Print(LogClass.Gpu, $"Shader cache loaded {_shaderCount} entries."); } } /// /// Raises ShaderCacheProgressChanged events periodically. /// private void ProgressLogger(object state) { const int refreshRate = 100; // ms int totalCount = (int)state; do { ShaderCacheProgressChanged?.Invoke(_shaderCount, totalCount); } while (!_progressReportEvent.WaitOne(refreshRate)); } /// /// Gets a compute shader from the cache. /// /// /// This automatically translates, compiles and adds the code to the cache if not present. /// /// Current GPU state /// GPU virtual address of the binary shader code /// Local group size X of the computer shader /// Local group size Y of the computer shader /// Local group size Z of the computer shader /// Local memory size of the compute shader /// Shared memory size of the compute shader /// Compiled compute shader code public ShaderBundle GetComputeShader( GpuState state, ulong gpuVa, int localSizeX, int localSizeY, int localSizeZ, int localMemorySize, int sharedMemorySize) { bool isCached = _cpPrograms.TryGetValue(gpuVa, out List list); if (isCached) { foreach (ShaderBundle cachedCpShader in list) { if (IsShaderEqual(cachedCpShader, gpuVa)) { return cachedCpShader; } } } TranslatorContext[] shaderContexts = new TranslatorContext[1]; shaderContexts[0] = DecodeComputeShader( state, gpuVa, localSizeX, localSizeY, localSizeZ, localMemorySize, sharedMemorySize); bool isShaderCacheEnabled = _cacheManager != null; bool isShaderCacheReadOnly = false; Hash128 programCodeHash = default; GuestShaderCacheEntry[] shaderCacheEntries = null; if (isShaderCacheEnabled) { isShaderCacheReadOnly = _cacheManager.IsReadOnly; // Compute hash and prepare data for shader disk cache comparison. shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(_context.MemoryManager, shaderContexts); programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries); } ShaderBundle cpShader; // Search for the program hash in loaded shaders. if (!isShaderCacheEnabled || !_cpProgramsDiskCache.TryGetValue(programCodeHash, out cpShader)) { if (isShaderCacheEnabled) { Logger.Debug?.Print(LogClass.Gpu, $"Shader {programCodeHash} not in cache, compiling!"); } // The shader isn't currently cached, translate it and compile it. ShaderCodeHolder shader = TranslateShader(shaderContexts[0]); bool isDiskShaderCacheIncompatible = shaderContexts[0].UsedFeatures.HasFlag(FeatureFlags.Bindless); shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code); IProgram hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null); byte[] hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader }); cpShader = new ShaderBundle(hostProgram, shader); if (isShaderCacheEnabled && !isDiskShaderCacheIncompatible) { _cpProgramsDiskCache.Add(programCodeHash, cpShader); if (!isShaderCacheReadOnly) { _cacheManager.SaveProgram(ref programCodeHash, CacheHelper.CreateGuestProgramDump(shaderCacheEntries), hostProgramBinary); } } } if (!isCached) { list = new List(); _cpPrograms.Add(gpuVa, list); } list.Add(cpShader); return cpShader; } /// /// Gets a graphics shader program from the shader cache. /// This includes all the specified shader stages. /// /// /// This automatically translates, compiles and adds the code to the cache if not present. /// /// Current GPU state /// Addresses of the shaders for each stage /// Compiled graphics shader code public ShaderBundle GetGraphicsShader(GpuState state, ShaderAddresses addresses) { bool isCached = _gpPrograms.TryGetValue(addresses, out List list); if (isCached) { foreach (ShaderBundle cachedGpShaders in list) { if (IsShaderEqual(cachedGpShaders, addresses)) { return cachedGpShaders; } } } TranslatorContext[] shaderContexts = new TranslatorContext[Constants.ShaderStages + 1]; TransformFeedbackDescriptor[] tfd = GetTransformFeedbackDescriptors(state); TranslationFlags flags = DefaultFlags; if (tfd != null) { flags |= TranslationFlags.Feedback; } TranslationCounts counts = new TranslationCounts(); if (addresses.VertexA != 0) { shaderContexts[0] = DecodeGraphicsShader(state, counts, flags | TranslationFlags.VertexA, ShaderStage.Vertex, addresses.VertexA); } shaderContexts[1] = DecodeGraphicsShader(state, counts, flags, ShaderStage.Vertex, addresses.Vertex); shaderContexts[2] = DecodeGraphicsShader(state, counts, flags, ShaderStage.TessellationControl, addresses.TessControl); shaderContexts[3] = DecodeGraphicsShader(state, counts, flags, ShaderStage.TessellationEvaluation, addresses.TessEvaluation); shaderContexts[4] = DecodeGraphicsShader(state, counts, flags, ShaderStage.Geometry, addresses.Geometry); shaderContexts[5] = DecodeGraphicsShader(state, counts, flags, ShaderStage.Fragment, addresses.Fragment); bool isShaderCacheEnabled = _cacheManager != null; bool isShaderCacheReadOnly = false; Hash128 programCodeHash = default; GuestShaderCacheEntry[] shaderCacheEntries = null; if (isShaderCacheEnabled) { isShaderCacheReadOnly = _cacheManager.IsReadOnly; // Compute hash and prepare data for shader disk cache comparison. shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(_context.MemoryManager, shaderContexts); programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries, tfd); } ShaderBundle gpShaders; // Search for the program hash in loaded shaders. if (!isShaderCacheEnabled || !_gpProgramsDiskCache.TryGetValue(programCodeHash, out gpShaders)) { if (isShaderCacheEnabled) { Logger.Debug?.Print(LogClass.Gpu, $"Shader {programCodeHash} not in cache, compiling!"); } // The shader isn't currently cached, translate it and compile it. ShaderCodeHolder[] shaders = new ShaderCodeHolder[Constants.ShaderStages]; shaders[0] = TranslateShader(shaderContexts[1], shaderContexts[0]); shaders[1] = TranslateShader(shaderContexts[2]); shaders[2] = TranslateShader(shaderContexts[3]); shaders[3] = TranslateShader(shaderContexts[4]); shaders[4] = TranslateShader(shaderContexts[5]); bool isDiskShaderCacheIncompatible = false; for (int i = 0; i < shaderContexts.Length; i++) { if (shaderContexts[i] != null && shaderContexts[i].UsedFeatures.HasFlag(FeatureFlags.Bindless)) { isDiskShaderCacheIncompatible = true; break; } } List hostShaders = new List(); for (int stage = 0; stage < Constants.ShaderStages; stage++) { ShaderProgram program = shaders[stage]?.Program; if (program == null) { continue; } IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code); shaders[stage].HostShader = hostShader; hostShaders.Add(hostShader); } IProgram hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd); byte[] hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders); gpShaders = new ShaderBundle(hostProgram, shaders); if (isShaderCacheEnabled && !isDiskShaderCacheIncompatible) { _gpProgramsDiskCache.Add(programCodeHash, gpShaders); if (!isShaderCacheReadOnly) { _cacheManager.SaveProgram(ref programCodeHash, CacheHelper.CreateGuestProgramDump(shaderCacheEntries, tfd), hostProgramBinary); } } } if (!isCached) { list = new List(); _gpPrograms.Add(addresses, list); } list.Add(gpShaders); return gpShaders; } /// /// Gets transform feedback state from the current GPU state. /// /// Current GPU state /// Four transform feedback descriptors for the enabled TFBs, or null if TFB is disabled private TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(GpuState state) { bool tfEnable = state.Get(MethodOffset.TfEnable); if (!tfEnable) { return null; } TransformFeedbackDescriptor[] descs = new TransformFeedbackDescriptor[Constants.TotalTransformFeedbackBuffers]; for (int i = 0; i < Constants.TotalTransformFeedbackBuffers; i++) { var tf = state.Get(MethodOffset.TfState, i); int length = (int)Math.Min((uint)tf.VaryingsCount, 0x80); var varyingLocations = state.GetSpan(MethodOffset.TfVaryingLocations + i * 0x80, length).ToArray(); descs[i] = new TransformFeedbackDescriptor(tf.BufferIndex, tf.Stride, varyingLocations); } return descs; } /// /// Checks if compute shader code in memory is equal to the cached shader. /// /// Cached compute shader /// GPU virtual address of the shader code in memory /// True if the code is different, false otherwise private bool IsShaderEqual(ShaderBundle cpShader, ulong gpuVa) { return IsShaderEqual(cpShader.Shaders[0], gpuVa); } /// /// Checks if graphics shader code from all stages in memory are equal to the cached shaders. /// /// Cached graphics shaders /// GPU virtual addresses of all enabled shader stages /// True if the code is different, false otherwise private bool IsShaderEqual(ShaderBundle gpShaders, ShaderAddresses addresses) { for (int stage = 0; stage < gpShaders.Shaders.Length; stage++) { ShaderCodeHolder shader = gpShaders.Shaders[stage]; ulong gpuVa = 0; switch (stage) { case 0: gpuVa = addresses.Vertex; break; case 1: gpuVa = addresses.TessControl; break; case 2: gpuVa = addresses.TessEvaluation; break; case 3: gpuVa = addresses.Geometry; break; case 4: gpuVa = addresses.Fragment; break; } if (!IsShaderEqual(shader, gpuVa, addresses.VertexA)) { return false; } } return true; } /// /// Checks if the code of the specified cached shader is different from the code in memory. /// /// Cached shader to compare with /// GPU virtual address of the binary shader code /// Optional GPU virtual address of the "Vertex A" binary shader code /// True if the code is different, false otherwise private bool IsShaderEqual(ShaderCodeHolder shader, ulong gpuVa, ulong gpuVaA = 0) { if (shader == null) { return true; } ReadOnlySpan memoryCode = _context.MemoryManager.GetSpan(gpuVa, shader.Code.Length); bool equals = memoryCode.SequenceEqual(shader.Code); if (equals && shader.Code2 != null) { memoryCode = _context.MemoryManager.GetSpan(gpuVaA, shader.Code2.Length); equals = memoryCode.SequenceEqual(shader.Code2); } return equals; } /// /// Decode the binary Maxwell shader code to a translator context. /// /// Current GPU state /// GPU virtual address of the binary shader code /// Local group size X of the computer shader /// Local group size Y of the computer shader /// Local group size Z of the computer shader /// Local memory size of the compute shader /// Shared memory size of the compute shader /// The generated translator context private TranslatorContext DecodeComputeShader( GpuState state, ulong gpuVa, int localSizeX, int localSizeY, int localSizeZ, int localMemorySize, int sharedMemorySize) { if (gpuVa == 0) { return null; } GpuAccessor gpuAccessor = new GpuAccessor(_context, state, localSizeX, localSizeY, localSizeZ, localMemorySize, sharedMemorySize); return Translator.CreateContext(gpuVa, gpuAccessor, DefaultFlags | TranslationFlags.Compute); } /// /// Decode the binary Maxwell shader code to a translator context. /// /// /// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader. /// /// Current GPU state /// Cumulative shader resource counts /// Flags that controls shader translation /// Shader stage /// GPU virtual address of the shader code /// The generated translator context private TranslatorContext DecodeGraphicsShader( GpuState state, TranslationCounts counts, TranslationFlags flags, ShaderStage stage, ulong gpuVa) { if (gpuVa == 0) { return null; } GpuAccessor gpuAccessor = new GpuAccessor(_context, state, (int)stage - 1); return Translator.CreateContext(gpuVa, gpuAccessor, flags, counts); } /// /// Translates a previously generated translator context to something that the host API accepts. /// /// Current translator context to translate /// Optional translator context of the shader that should be combined /// Compiled graphics shader code private ShaderCodeHolder TranslateShader(TranslatorContext translatorContext, TranslatorContext translatorContext2 = null) { if (translatorContext == null) { return null; } if (translatorContext2 != null) { byte[] codeA = _context.MemoryManager.GetSpan(translatorContext2.Address, translatorContext2.Size).ToArray(); byte[] codeB = _context.MemoryManager.GetSpan(translatorContext.Address, translatorContext.Size).ToArray(); _dumper.Dump(codeA, compute: false, out string fullPathA, out string codePathA); _dumper.Dump(codeB, compute: false, out string fullPathB, out string codePathB); ShaderProgram program = translatorContext.Translate(out ShaderProgramInfo shaderProgramInfo, translatorContext2); if (fullPathA != null && fullPathB != null && codePathA != null && codePathB != null) { program.Prepend("// " + codePathB); program.Prepend("// " + fullPathB); program.Prepend("// " + codePathA); program.Prepend("// " + fullPathA); } return new ShaderCodeHolder(program, shaderProgramInfo, codeB, codeA); } else { byte[] code = _context.MemoryManager.GetSpan(translatorContext.Address, translatorContext.Size).ToArray(); _dumper.Dump(code, translatorContext.Stage == ShaderStage.Compute, out string fullPath, out string codePath); ShaderProgram program = translatorContext.Translate(out ShaderProgramInfo shaderProgramInfo); if (fullPath != null && codePath != null) { program.Prepend("// " + codePath); program.Prepend("// " + fullPath); } return new ShaderCodeHolder(program, shaderProgramInfo, code); } } /// /// Disposes the shader cache, deleting all the cached shaders. /// It's an error to use the shader cache after disposal. /// public void Dispose() { foreach (List list in _cpPrograms.Values) { foreach (ShaderBundle bundle in list) { bundle.Dispose(); } } foreach (List list in _gpPrograms.Values) { foreach (ShaderBundle bundle in list) { bundle.Dispose(); } } _progressReportEvent?.Dispose(); _cacheManager?.Dispose(); } } }