"Merge Tagged PR 12749"

This commit is contained in:
yuzubot 2024-01-31 13:03:05 +00:00
parent 9634c816a7
commit 79e179d31f
12 changed files with 188 additions and 66 deletions

View file

@ -106,6 +106,7 @@ add_library(common STATIC
precompiled_headers.h
quaternion.h
range_map.h
range_mutex.h
reader_writer_queue.h
ring_buffer.h
${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp

93
src/common/range_mutex.h Normal file
View file

@ -0,0 +1,93 @@
// SPDX-FileCopyrightText: 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <mutex>
#include "common/intrusive_list.h"
namespace Common {
class ScopedRangeLock;
class RangeMutex {
public:
explicit RangeMutex() = default;
~RangeMutex() = default;
private:
friend class ScopedRangeLock;
void Lock(ScopedRangeLock& l);
void Unlock(ScopedRangeLock& l);
bool HasIntersectionLocked(ScopedRangeLock& l);
private:
std::mutex m_mutex;
std::condition_variable m_cv;
using LockList = Common::IntrusiveListBaseTraits<ScopedRangeLock>::ListType;
LockList m_list;
};
class ScopedRangeLock : public Common::IntrusiveListBaseNode<ScopedRangeLock> {
public:
explicit ScopedRangeLock(RangeMutex& mutex, u64 address, u64 size)
: m_mutex(mutex), m_address(address), m_size(size) {
if (m_size > 0) {
m_mutex.Lock(*this);
}
}
~ScopedRangeLock() {
if (m_size > 0) {
m_mutex.Unlock(*this);
}
}
u64 GetAddress() const {
return m_address;
}
u64 GetSize() const {
return m_size;
}
private:
RangeMutex& m_mutex;
const u64 m_address{};
const u64 m_size{};
};
inline void RangeMutex::Lock(ScopedRangeLock& l) {
std::unique_lock lk{m_mutex};
m_cv.wait(lk, [&] { return !HasIntersectionLocked(l); });
m_list.push_back(l);
}
inline void RangeMutex::Unlock(ScopedRangeLock& l) {
{
std::scoped_lock lk{m_mutex};
m_list.erase(m_list.iterator_to(l));
}
m_cv.notify_all();
}
inline bool RangeMutex::HasIntersectionLocked(ScopedRangeLock& l) {
const auto cur_begin = l.GetAddress();
const auto cur_last = l.GetAddress() + l.GetSize() - 1;
for (const auto& other : m_list) {
const auto other_begin = other.GetAddress();
const auto other_last = other.GetAddress() + other.GetSize() - 1;
if (cur_begin <= other_last && other_begin <= cur_last) {
return true;
}
}
return false;
}
} // namespace Common

View file

@ -10,6 +10,7 @@
#include <mutex>
#include "common/common_types.h"
#include "common/range_mutex.h"
#include "common/scratch_buffer.h"
#include "common/virtual_buffer.h"
@ -204,7 +205,7 @@ private:
(1ULL << (device_virtual_bits - page_bits)) / subentries;
using CachedPages = std::array<CounterEntry, num_counter_entries>;
std::unique_ptr<CachedPages> cached_pages;
std::mutex counter_guard;
Common::RangeMutex counter_guard;
std::mutex mapping_guard;
};

View file

@ -508,12 +508,7 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(Asid asid) {
template <typename Traits>
void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) {
std::unique_lock<std::mutex> lk(counter_guard, std::defer_lock);
const auto Lock = [&] {
if (!lk) {
lk.lock();
}
};
Common::ScopedRangeLock lk(counter_guard, addr, size);
u64 uncache_begin = 0;
u64 cache_begin = 0;
u64 uncache_bytes = 0;
@ -548,7 +543,6 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
}
uncache_bytes += Memory::YUZU_PAGESIZE;
} else if (uncache_bytes > 0) {
Lock();
MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
uncache_bytes, false);
uncache_bytes = 0;
@ -559,7 +553,6 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
}
cache_bytes += Memory::YUZU_PAGESIZE;
} else if (cache_bytes > 0) {
Lock();
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
true);
cache_bytes = 0;
@ -567,12 +560,10 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
vpage++;
}
if (uncache_bytes > 0) {
Lock();
MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes,
false);
}
if (cache_bytes > 0) {
Lock();
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
true);
}

View file

@ -83,7 +83,9 @@ SessionId Container::OpenSession(Kernel::KProcess* process) {
// Check if this memory block is heap.
if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) {
if (svc_mem_info.size > region_size) {
if (region_start + region_size == svc_mem_info.base_address) {
region_size += svc_mem_info.size;
} else if (svc_mem_info.size > region_size) {
region_size = svc_mem_info.size;
region_start = svc_mem_info.base_address;
}

View file

@ -1091,6 +1091,20 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
[&] { rasterizer = true; });
if (rasterizer) {
impl->InvalidateGPUMemory(ptr, size);
const auto type = impl->current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Type();
if (type == Common::PageType::RasterizerCachedMemory) {
// Check if device mapped. If not, this bugged and we can unmark.
DAddr addr{};
Common::ScratchBuffer<u32> buffer;
impl->gpu_device_memory->ApplyOpOnPointer(ptr, buffer,
[&](DAddr address) { addr = address; });
if (addr == 0) {
LOG_ERROR(HW_Memory, "Fixing unmapped cached region {:#x}", GetInteger(vaddr));
impl->RasterizerMarkRegionCached(GetInteger(vaddr), size, false);
}
}
}
#ifdef __linux__

View file

@ -1546,7 +1546,10 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
std::span<const u8> upload_span;
const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
if (IsRangeGranular(device_addr, copy.size)) {
upload_span = std::span(device_memory.GetPointer<u8>(device_addr), copy.size);
auto* const ptr = device_memory.GetPointer<u8>(device_addr);
if (ptr != nullptr) {
upload_span = std::span(ptr, copy.size);
}
} else {
if (immediate_buffer.empty()) {
immediate_buffer = ImmediateBuffer(largest_copy);

View file

@ -243,10 +243,12 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
const u64 size_in_bytes{Tegra::Texture::CalculateSize(
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)};
const std::span<const u8> input_data(host_ptr, size_in_bytes);
Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
framebuffer.width, framebuffer.height, 1, block_height_log2,
0);
if (host_ptr != nullptr) {
const std::span<const u8> input_data(host_ptr, size_in_bytes);
Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
framebuffer.width, framebuffer.height, 1,
block_height_log2, 0);
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));

View file

@ -230,9 +230,11 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const u64 tiled_size{Tegra::Texture::CalculateSize(true, bytes_per_pixel,
framebuffer.stride, framebuffer.height,
1, block_height_log2, 0)};
Tegra::Texture::UnswizzleTexture(
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
if (host_ptr != nullptr) {
Tegra::Texture::UnswizzleTexture(
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
}
const VkBufferImageCopy copy{
.bufferOffset = image_offset,

View file

@ -1064,8 +1064,6 @@ public:
}
});
}
auto* ptr = device_memory.GetPointer<u8>(new_query->dependant_address);
ASSERT(ptr != nullptr);
new_query->dependant_manage = must_manage_dependance;
pending_flush_queries.push_back(index);
@ -1104,9 +1102,11 @@ public:
tfb_streamer.Free(query->dependant_index);
} else {
u8* pointer = device_memory.GetPointer<u8>(query->dependant_address);
u32 result;
std::memcpy(&result, pointer, sizeof(u32));
num_vertices = static_cast<u64>(result) / query->stride;
if (pointer != nullptr) {
u32 result;
std::memcpy(&result, pointer, sizeof(u32));
num_vertices = static_cast<u64>(result) / query->stride;
}
}
query->value = [&]() -> u64 {
switch (query->topology) {
@ -1360,7 +1360,9 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
const auto check_value = [&](DAddr address) {
u8* ptr = impl->device_memory.GetPointer<u8>(address);
u64 value{};
std::memcpy(&value, ptr, sizeof(value));
if (ptr != nullptr) {
std::memcpy(&value, ptr, sizeof(value));
}
return value == 0;
};
std::array<VideoCommon::LookupData*, 2> objects{&object_1, &object_2};

View file

@ -1431,7 +1431,8 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
}
}
};
ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
ForEachSparseImageInRegion(channel_state->gpu_memory.GetID(), gpu_addr, size_bytes,
region_check_gpu);
bool can_rescale = info.rescaleable;
bool any_rescaled = false;
@ -1842,7 +1843,7 @@ void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s
if (!storage_id) {
return;
}
auto& gpu_page_table = gpu_page_table_storage[*storage_id];
auto& gpu_page_table = gpu_page_table_storage[*storage_id * 2];
ForEachGPUPage(gpu_addr, size,
[this, &gpu_page_table, &images, gpu_addr, size, func](u64 page) {
const auto it = gpu_page_table.find(page);
@ -1882,41 +1883,48 @@ void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s
template <class P>
template <typename Func>
void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
void TextureCache<P>::ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size,
Func&& func) {
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 8> images;
ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
const auto it = sparse_page_table.find(page);
if (it == sparse_page_table.end()) {
if constexpr (BOOL_BREAK) {
return false;
} else {
return;
}
}
for (const ImageId image_id : it->second) {
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Picked)) {
continue;
}
if (!image.OverlapsGPU(gpu_addr, size)) {
continue;
}
image.flags |= ImageFlagBits::Picked;
images.push_back(image_id);
if constexpr (BOOL_BREAK) {
if (func(image_id, image)) {
return true;
}
} else {
func(image_id, image);
}
}
if constexpr (BOOL_BREAK) {
return false;
}
});
auto storage_id = getStorageID(as_id);
if (!storage_id) {
return;
}
auto& sparse_page_table = gpu_page_table_storage[*storage_id * 2 + 1];
ForEachGPUPage(gpu_addr, size,
[this, &sparse_page_table, &images, gpu_addr, size, func](u64 page) {
const auto it = sparse_page_table.find(page);
if (it == sparse_page_table.end()) {
if constexpr (BOOL_BREAK) {
return false;
} else {
return;
}
}
for (const ImageId image_id : it->second) {
Image& image = slot_images[image_id];
if (True(image.flags & ImageFlagBits::Picked)) {
continue;
}
if (!image.OverlapsGPU(gpu_addr, size)) {
continue;
}
image.flags |= ImageFlagBits::Picked;
images.push_back(image_id);
if constexpr (BOOL_BREAK) {
if (func(image_id, image)) {
return true;
}
} else {
func(image_id, image);
}
}
if constexpr (BOOL_BREAK) {
return false;
}
});
for (const ImageId image_id : images) {
slot_images[image_id].flags &= ~ImageFlagBits::Picked;
}
@ -1988,8 +1996,9 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
sparse_maps.push_back(map_id);
});
sparse_views.emplace(image_id, std::move(sparse_maps));
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
[this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
(*channel_state->sparse_page_table)[page].push_back(image_id);
});
}
template <class P>
@ -2042,7 +2051,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
return;
}
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
clear_page_table(page, sparse_page_table);
clear_page_table(page, (*channel_state->sparse_page_table));
});
auto it = sparse_views.find(image_id);
ASSERT(it != sparse_views.end());
@ -2496,13 +2505,15 @@ void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel
const auto it = channel_map.find(channel.bind_id);
auto* this_state = &channel_storage[it->second];
const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()];
this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id];
this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2];
this_state->sparse_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2 + 1];
}
/// Bind a channel for execution.
template <class P>
void TextureCache<P>::OnGPUASRegister([[maybe_unused]] size_t map_id) {
gpu_page_table_storage.emplace_back();
gpu_page_table_storage.emplace_back();
}
} // namespace VideoCommon

View file

@ -86,6 +86,7 @@ public:
std::unordered_map<TSCEntry, SamplerId> samplers;
TextureCacheGPUMap* gpu_page_table;
TextureCacheGPUMap* sparse_page_table;
};
template <class P>
@ -357,7 +358,7 @@ private:
void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
void ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
/// Iterates over all the images in a region calling func
template <typename Func>
@ -431,7 +432,6 @@ private:
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
DAddr virtual_invalid_space{};