diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e30fea268..85926fc8f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -106,6 +106,7 @@ add_library(common STATIC precompiled_headers.h quaternion.h range_map.h + range_mutex.h reader_writer_queue.h ring_buffer.h ${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp diff --git a/src/common/range_mutex.h b/src/common/range_mutex.h new file mode 100644 index 000000000..d6c949811 --- /dev/null +++ b/src/common/range_mutex.h @@ -0,0 +1,93 @@ +// SPDX-FileCopyrightText: 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "common/intrusive_list.h" + +namespace Common { + +class ScopedRangeLock; + +class RangeMutex { +public: + explicit RangeMutex() = default; + ~RangeMutex() = default; + +private: + friend class ScopedRangeLock; + + void Lock(ScopedRangeLock& l); + void Unlock(ScopedRangeLock& l); + bool HasIntersectionLocked(ScopedRangeLock& l); + +private: + std::mutex m_mutex; + std::condition_variable m_cv; + + using LockList = Common::IntrusiveListBaseTraits::ListType; + LockList m_list; +}; + +class ScopedRangeLock : public Common::IntrusiveListBaseNode { +public: + explicit ScopedRangeLock(RangeMutex& mutex, u64 address, u64 size) + : m_mutex(mutex), m_address(address), m_size(size) { + if (m_size > 0) { + m_mutex.Lock(*this); + } + } + ~ScopedRangeLock() { + if (m_size > 0) { + m_mutex.Unlock(*this); + } + } + + u64 GetAddress() const { + return m_address; + } + + u64 GetSize() const { + return m_size; + } + +private: + RangeMutex& m_mutex; + const u64 m_address{}; + const u64 m_size{}; +}; + +inline void RangeMutex::Lock(ScopedRangeLock& l) { + std::unique_lock lk{m_mutex}; + m_cv.wait(lk, [&] { return !HasIntersectionLocked(l); }); + m_list.push_back(l); +} + +inline void RangeMutex::Unlock(ScopedRangeLock& l) { + { + std::scoped_lock lk{m_mutex}; + m_list.erase(m_list.iterator_to(l)); + } + m_cv.notify_all(); +} + +inline bool RangeMutex::HasIntersectionLocked(ScopedRangeLock& l) { + const auto cur_begin = l.GetAddress(); + const auto cur_last = l.GetAddress() + l.GetSize() - 1; + + for (const auto& other : m_list) { + const auto other_begin = other.GetAddress(); + const auto other_last = other.GetAddress() + other.GetSize() - 1; + + if (cur_begin <= other_last && other_begin <= cur_last) { + return true; + } + } + + return false; +} + +} // namespace Common diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index ffeed46cc..63823602c 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h @@ -10,6 +10,7 @@ #include #include "common/common_types.h" +#include "common/range_mutex.h" #include "common/scratch_buffer.h" #include "common/virtual_buffer.h" @@ -204,7 +205,7 @@ private: (1ULL << (device_virtual_bits - page_bits)) / subentries; using CachedPages = std::array; std::unique_ptr cached_pages; - std::mutex counter_guard; + Common::RangeMutex counter_guard; std::mutex mapping_guard; }; diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index eab8a2731..0a59000aa 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -508,12 +508,7 @@ void DeviceMemoryManager::UnregisterProcess(Asid asid) { template void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { - std::unique_lock lk(counter_guard, std::defer_lock); - const auto Lock = [&] { - if (!lk) { - lk.lock(); - } - }; + Common::ScopedRangeLock lk(counter_guard, addr, size); u64 uncache_begin = 0; u64 cache_begin = 0; u64 uncache_bytes = 0; @@ -548,7 +543,6 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size } uncache_bytes += Memory::YUZU_PAGESIZE; } else if (uncache_bytes > 0) { - Lock(); MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, false); uncache_bytes = 0; @@ -559,7 +553,6 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size } cache_bytes += Memory::YUZU_PAGESIZE; } else if (cache_bytes > 0) { - Lock(); MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, true); cache_bytes = 0; @@ -567,12 +560,10 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size vpage++; } if (uncache_bytes > 0) { - Lock(); MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, false); } if (cache_bytes > 0) { - Lock(); MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, true); } diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index dc1b4d5be..e89cca6f2 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp @@ -83,7 +83,9 @@ SessionId Container::OpenSession(Kernel::KProcess* process) { // Check if this memory block is heap. if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) { - if (svc_mem_info.size > region_size) { + if (region_start + region_size == svc_mem_info.base_address) { + region_size += svc_mem_info.size; + } else if (svc_mem_info.size > region_size) { region_size = svc_mem_info.size; region_start = svc_mem_info.base_address; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index e10a4601e..1afba1543 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -1091,6 +1091,20 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { [&] { rasterizer = true; }); if (rasterizer) { impl->InvalidateGPUMemory(ptr, size); + + const auto type = impl->current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Type(); + if (type == Common::PageType::RasterizerCachedMemory) { + // Check if device mapped. If not, this bugged and we can unmark. + DAddr addr{}; + Common::ScratchBuffer buffer; + impl->gpu_device_memory->ApplyOpOnPointer(ptr, buffer, + [&](DAddr address) { addr = address; }); + + if (addr == 0) { + LOG_ERROR(HW_Memory, "Fixing unmapped cached region {:#x}", GetInteger(vaddr)); + impl->RasterizerMarkRegionCached(GetInteger(vaddr), size, false); + } + } } #ifdef __linux__ diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b4bf369d1..8446f34cc 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1546,7 +1546,10 @@ void BufferCache

::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, std::span upload_span; const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; if (IsRangeGranular(device_addr, copy.size)) { - upload_span = std::span(device_memory.GetPointer(device_addr), copy.size); + auto* const ptr = device_memory.GetPointer(device_addr); + if (ptr != nullptr) { + upload_span = std::span(ptr, copy.size); + } } else { if (immediate_buffer.empty()) { immediate_buffer = ImmediateBuffer(largest_copy); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b75376fdb..1dc0e884e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -243,10 +243,12 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf const u64 size_in_bytes{Tegra::Texture::CalculateSize( true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; const u8* const host_ptr{device_memory.GetPointer(framebuffer_addr)}; - const std::span input_data(host_ptr, size_in_bytes); - Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, - framebuffer.width, framebuffer.height, 1, block_height_log2, - 0); + if (host_ptr != nullptr) { + const std::span input_data(host_ptr, size_in_bytes); + Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, + framebuffer.width, framebuffer.height, 1, + block_height_log2, 0); + } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(framebuffer.stride)); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 610f27c84..3a61ddb76 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -230,9 +230,11 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, const u64 tiled_size{Tegra::Texture::CalculateSize(true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; - Tegra::Texture::UnswizzleTexture( - mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size), - bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); + if (host_ptr != nullptr) { + Tegra::Texture::UnswizzleTexture( + mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size), + bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); + } const VkBufferImageCopy copy{ .bufferOffset = image_offset, diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 7cbc9c73c..a28296bda 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1064,8 +1064,6 @@ public: } }); } - auto* ptr = device_memory.GetPointer(new_query->dependant_address); - ASSERT(ptr != nullptr); new_query->dependant_manage = must_manage_dependance; pending_flush_queries.push_back(index); @@ -1104,9 +1102,11 @@ public: tfb_streamer.Free(query->dependant_index); } else { u8* pointer = device_memory.GetPointer(query->dependant_address); - u32 result; - std::memcpy(&result, pointer, sizeof(u32)); - num_vertices = static_cast(result) / query->stride; + if (pointer != nullptr) { + u32 result; + std::memcpy(&result, pointer, sizeof(u32)); + num_vertices = static_cast(result) / query->stride; + } } query->value = [&]() -> u64 { switch (query->topology) { @@ -1360,7 +1360,9 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku const auto check_value = [&](DAddr address) { u8* ptr = impl->device_memory.GetPointer(address); u64 value{}; - std::memcpy(&value, ptr, sizeof(value)); + if (ptr != nullptr) { + std::memcpy(&value, ptr, sizeof(value)); + } return value == 0; }; std::array objects{&object_1, &object_2}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7398ed2ec..a7400adfa 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1431,7 +1431,8 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA } } }; - ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); + ForEachSparseImageInRegion(channel_state->gpu_memory.GetID(), gpu_addr, size_bytes, + region_check_gpu); bool can_rescale = info.rescaleable; bool any_rescaled = false; @@ -1842,7 +1843,7 @@ void TextureCache

::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s if (!storage_id) { return; } - auto& gpu_page_table = gpu_page_table_storage[*storage_id]; + auto& gpu_page_table = gpu_page_table_storage[*storage_id * 2]; ForEachGPUPage(gpu_addr, size, [this, &gpu_page_table, &images, gpu_addr, size, func](u64 page) { const auto it = gpu_page_table.find(page); @@ -1882,41 +1883,48 @@ void TextureCache

::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s template template -void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { +void TextureCache

::ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, + Func&& func) { using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = sparse_page_table.find(page); - if (it == sparse_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); + auto storage_id = getStorageID(as_id); + if (!storage_id) { + return; + } + auto& sparse_page_table = gpu_page_table_storage[*storage_id * 2 + 1]; + ForEachGPUPage(gpu_addr, size, + [this, &sparse_page_table, &images, gpu_addr, size, func](u64 page) { + const auto it = sparse_page_table.find(page); + if (it == sparse_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); for (const ImageId image_id : images) { slot_images[image_id].flags &= ~ImageFlagBits::Picked; } @@ -1988,8 +1996,9 @@ void TextureCache

::RegisterImage(ImageId image_id) { sparse_maps.push_back(map_id); }); sparse_views.emplace(image_id, std::move(sparse_maps)); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { + (*channel_state->sparse_page_table)[page].push_back(image_id); + }); } template @@ -2042,7 +2051,7 @@ void TextureCache

::UnregisterImage(ImageId image_id) { return; } ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { - clear_page_table(page, sparse_page_table); + clear_page_table(page, (*channel_state->sparse_page_table)); }); auto it = sparse_views.find(image_id); ASSERT(it != sparse_views.end()); @@ -2496,13 +2505,15 @@ void TextureCache

::CreateChannel(struct Tegra::Control::ChannelState& channel const auto it = channel_map.find(channel.bind_id); auto* this_state = &channel_storage[it->second]; const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()]; - this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id]; + this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2]; + this_state->sparse_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2 + 1]; } /// Bind a channel for execution. template void TextureCache

::OnGPUASRegister([[maybe_unused]] size_t map_id) { gpu_page_table_storage.emplace_back(); + gpu_page_table_storage.emplace_back(); } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 8699d40d4..f9aebb293 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -86,6 +86,7 @@ public: std::unordered_map samplers; TextureCacheGPUMap* gpu_page_table; + TextureCacheGPUMap* sparse_page_table; }; template @@ -357,7 +358,7 @@ private: void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); template - void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); + void ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); /// Iterates over all the images in a region calling func template @@ -431,7 +432,6 @@ private: std::unordered_map framebuffers; std::unordered_map, Common::IdentityHash> page_table; - std::unordered_map, Common::IdentityHash> sparse_page_table; std::unordered_map> sparse_views; DAddr virtual_invalid_space{};