From 1dd754590fb9850bf00ddacbb860076dbbacabc6 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 26 Jun 2018 16:14:14 -0400
Subject: [PATCH] gl_rasterizer_cache: Implement caching for texture and
 framebuffer surfaces.

gl_rasterizer_cache: Improved cache management based on Citra's implementation.

gl_surface_cache: Add some docstrings.
---
 .../renderer_opengl/gl_rasterizer.cpp         |  25 +++-
 .../renderer_opengl/gl_rasterizer_cache.cpp   | 116 ++++++++++++++++--
 .../renderer_opengl/gl_rasterizer_cache.h     |  43 ++++++-
 3 files changed, 168 insertions(+), 16 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f9b0ce434..62ee45a36 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -435,22 +435,35 @@ void RasterizerOpenGL::DrawArrays() {
 
     // Mark framebuffer surfaces as dirty
     if (color_surface != nullptr && write_color_fb) {
-        res_cache.FlushSurface(color_surface);
+        res_cache.MarkSurfaceAsDirty(color_surface);
     }
     if (depth_surface != nullptr && write_depth_fb) {
-        res_cache.FlushSurface(depth_surface);
+        res_cache.MarkSurfaceAsDirty(depth_surface);
     }
 }
 
 void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
 
-void RasterizerOpenGL::FlushAll() {}
+void RasterizerOpenGL::FlushAll() {
+    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
+}
 
-void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {}
+void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
+    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    res_cache.FlushRegion(addr, size);
+}
 
-void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {}
+void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
+    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    res_cache.InvalidateRegion(addr, size);
+}
 
-void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {}
+void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
+    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    res_cache.FlushRegion(addr, size);
+    res_cache.InvalidateRegion(addr, size);
+}
 
 bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
     MICROPROFILE_SCOPE(OpenGL_Blits);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index bd35bdb02..71ad7be74 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -12,6 +12,7 @@
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
 #include "core/memory.h"
+#include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/textures/astc.h"
@@ -215,7 +216,7 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
     cur_state.Apply();
 }
 
-CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) {
+CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
     texture.Create();
     const auto& rect{params.GetRect()};
     AllocateSurfaceTexture(texture.handle,
@@ -370,6 +371,12 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
     draw_framebuffer.Create();
 }
 
+RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
+    while (!surface_cache.empty()) {
+        UnregisterSurface(surface_cache.begin()->second);
+    }
+}
+
 Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
     return GetSurface(SurfaceParams::CreateForTexture(config));
 }
@@ -425,9 +432,17 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
     surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
 }
 
-void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
-    surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
-    surface->FlushGLBuffer();
+void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
+    if (Settings::values.use_accurate_framebuffers) {
+        // If enabled, always flush dirty surfaces
+        surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
+        surface->FlushGLBuffer();
+    } else {
+        // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads
+        // and flushes are very slow and do not seem to improve accuracy
+        const auto& params{surface->GetSurfaceParams()};
+        Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false);
+    }
 }
 
 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
@@ -441,13 +456,16 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
     Surface surface;
     if (search != surface_cache.end()) {
         surface = search->second;
+        if (Settings::values.use_accurate_framebuffers) {
+            // Reload the surface from Switch memory
+            LoadSurface(surface);
+        }
     } else {
         surface = std::make_shared<CachedSurface>(params);
-        surface_cache[surface_key] = surface;
+        RegisterSurface(surface);
+        LoadSurface(surface);
     }
 
-    LoadSurface(surface);
-
     return surface;
 }
 
@@ -476,3 +494,87 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
 
     return surfaces[0];
 }
+
+void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) {
+    // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should
+    // probably implement this in the future, but for now, the `use_accurate_framebufers` setting
+    // can be used to always flush.
+}
+
+void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) {
+    for (const auto& pair : surface_cache) {
+        const auto& surface{pair.second};
+        const auto& params{surface->GetSurfaceParams()};
+
+        if (params.IsOverlappingRegion(addr, size)) {
+            UnregisterSurface(surface);
+        }
+    }
+}
+
+void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
+    const auto& params{surface->GetSurfaceParams()};
+    const auto& surface_key{SurfaceKey::Create(params)};
+    const auto& search{surface_cache.find(surface_key)};
+
+    if (search != surface_cache.end()) {
+        // Registered already
+        return;
+    }
+
+    surface_cache[surface_key] = surface;
+    UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
+}
+
+void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
+    const auto& params{surface->GetSurfaceParams()};
+    const auto& surface_key{SurfaceKey::Create(params)};
+    const auto& search{surface_cache.find(surface_key)};
+
+    if (search == surface_cache.end()) {
+        // Unregistered already
+        return;
+    }
+
+    UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1);
+    surface_cache.erase(search);
+}
+
+template <typename Map, typename Interval>
+constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
+    return boost::make_iterator_range(map.equal_range(interval));
+}
+
+void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
+    const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) -
+                          (addr >> Tegra::MemoryManager::PAGE_BITS) + 1;
+    const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS;
+    const u64 page_end = page_start + num_pages;
+
+    // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
+    // subtract after iterating
+    const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end);
+    if (delta > 0)
+        cached_pages.add({pages_interval, delta});
+
+    for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
+        const auto interval = pair.first & pages_interval;
+        const int count = pair.second;
+
+        const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval)
+                                                    << Tegra::MemoryManager::PAGE_BITS;
+        const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval)
+                                                  << Tegra::MemoryManager::PAGE_BITS;
+        const u64 interval_size = interval_end_addr - interval_start_addr;
+
+        if (delta > 0 && count == delta)
+            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
+        else if (delta < 0 && count == -delta)
+            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
+        else
+            ASSERT(count >= 0);
+    }
+
+    if (delta < 0)
+        cached_pages.add({pages_interval, delta});
+}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 84bdec652..85e7c8888 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -8,7 +8,7 @@
 #include <map>
 #include <memory>
 #include <vector>
-
+#include <boost/icl/interval_map.hpp>
 #include "common/common_types.h"
 #include "common/hash.h"
 #include "common/math_util.h"
@@ -19,6 +19,7 @@
 class CachedSurface;
 using Surface = std::shared_ptr<CachedSurface>;
 using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
+using PageMap = boost::icl::interval_map<u64, int>;
 
 struct SurfaceParams {
     enum class PixelFormat {
@@ -243,8 +244,10 @@ struct SurfaceParams {
         return SurfaceType::Invalid;
     }
 
+    /// Returns the rectangle corresponding to this surface
     MathUtil::Rectangle<u32> GetRect() const;
 
+    /// Returns the size of this surface in bytes, adjusted for compression
     size_t SizeInBytes() const {
         const u32 compression_factor{GetCompressionFactor(pixel_format)};
         ASSERT(width % compression_factor == 0);
@@ -253,10 +256,18 @@ struct SurfaceParams {
                GetFormatBpp(pixel_format) / CHAR_BIT;
     }
 
+    /// Returns the CPU virtual address for this surface
     VAddr GetCpuAddr() const;
 
+    /// Returns true if the specified region overlaps with this surface's region in Switch memory
+    bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const {
+        return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
+    }
+
+    /// Creates SurfaceParams from a texture configation
     static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
 
+    /// Creates SurfaceParams from a framebuffer configation
     static SurfaceParams CreateForFramebuffer(
         const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
 
@@ -272,6 +283,7 @@ struct SurfaceParams {
     size_t size_in_bytes;
 };
 
+/// Hashable variation of SurfaceParams, used for a key in the surface cache
 struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
     static SurfaceKey Create(const SurfaceParams& params) {
         SurfaceKey res;
@@ -325,18 +337,43 @@ private:
 class RasterizerCacheOpenGL final : NonCopyable {
 public:
     RasterizerCacheOpenGL();
+    ~RasterizerCacheOpenGL();
 
+    /// Get a surface based on the texture configuration
     Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
+
+    /// Get the color and depth surfaces based on the framebuffer configuration
     SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
                                                     const MathUtil::Rectangle<s32>& viewport);
-    void LoadSurface(const Surface& surface);
-    void FlushSurface(const Surface& surface);
+
+    /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
+    void MarkSurfaceAsDirty(const Surface& surface);
+
+    /// Tries to find a framebuffer GPU address based on the provided CPU address
     Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
 
+    /// Write any cached resources overlapping the region back to memory (if dirty)
+    void FlushRegion(Tegra::GPUVAddr addr, size_t size);
+
+    /// Mark the specified region as being invalidated
+    void InvalidateRegion(Tegra::GPUVAddr addr, size_t size);
+
 private:
+    void LoadSurface(const Surface& surface);
     Surface GetSurface(const SurfaceParams& params);
 
+    /// Register surface into the cache
+    void RegisterSurface(const Surface& surface);
+
+    /// Remove surface from the cache
+    void UnregisterSurface(const Surface& surface);
+
+    /// Increase/decrease the number of surface in pages touching the specified region
+    void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
+
     std::unordered_map<SurfaceKey, Surface> surface_cache;
+    PageMap cached_pages;
+
     OGLFramebuffer read_framebuffer;
     OGLFramebuffer draw_framebuffer;
 };