Compare commits

...

5 commits

Author SHA1 Message Date
yuzubot 63802f05d8 "Merge Tagged PR 12173" 2024-02-17 13:03:18 +00:00
yuzubot dd58ddf937 "Merge Tagged PR 12749" 2024-02-17 13:03:18 +00:00
yuzubot 79f57b4a65 "Merge Tagged PR 13000" 2024-02-17 13:03:17 +00:00
yuzubot b7eba25e3f "Merge Tagged PR 13017" 2024-02-17 13:03:16 +00:00
yuzubot 65b11a1ac5 "Merge Tagged PR 13026" 2024-02-17 13:03:16 +00:00
12 changed files with 111 additions and 96 deletions

View file

@ -242,7 +242,7 @@ struct System::Impl {
void Run() {
std::unique_lock<std::mutex> lk(suspend_guard);
kernel.SuspendApplication(false);
kernel.SuspendEmulation(false);
core_timing.SyncPause(false);
is_paused.store(false, std::memory_order_relaxed);
}
@ -251,7 +251,7 @@ struct System::Impl {
std::unique_lock<std::mutex> lk(suspend_guard);
core_timing.SyncPause(true);
kernel.SuspendApplication(true);
kernel.SuspendEmulation(true);
is_paused.store(true, std::memory_order_relaxed);
}
@ -261,7 +261,7 @@ struct System::Impl {
std::unique_lock<std::mutex> StallApplication() {
std::unique_lock<std::mutex> lk(suspend_guard);
kernel.SuspendApplication(true);
kernel.SuspendEmulation(true);
core_timing.SyncPause(true);
return lk;
}
@ -269,7 +269,7 @@ struct System::Impl {
void UnstallApplication() {
if (!IsPaused()) {
core_timing.SyncPause(false);
kernel.SuspendApplication(false);
kernel.SuspendEmulation(false);
}
}
@ -459,7 +459,7 @@ struct System::Impl {
}
Network::CancelPendingSocketOperations();
kernel.SuspendApplication(true);
kernel.SuspendEmulation(true);
if (services) {
services->KillNVNFlinger();
}

View file

@ -522,13 +522,17 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
auto* memory_device_inter = registered_processes[asid.id];
const auto release_pending = [&] {
if (uncache_bytes > 0) {
MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
uncache_bytes, false);
if (memory_device_inter != nullptr) {
MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
uncache_bytes, false);
}
uncache_bytes = 0;
}
if (cache_bytes > 0) {
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS,
cache_bytes, true);
if (memory_device_inter != nullptr) {
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS,
cache_bytes, true);
}
cache_bytes = 0;
}
};

View file

@ -66,6 +66,7 @@ enum class SuspendType : u32 {
Debug = 2,
Backtrace = 3,
Init = 4,
System = 5,
Count,
};
@ -84,8 +85,9 @@ enum class ThreadState : u16 {
DebugSuspended = (1 << (2 + SuspendShift)),
BacktraceSuspended = (1 << (3 + SuspendShift)),
InitSuspended = (1 << (4 + SuspendShift)),
SystemSuspended = (1 << (5 + SuspendShift)),
SuspendFlagMask = ((1 << 5) - 1) << SuspendShift,
SuspendFlagMask = ((1 << 6) - 1) << SuspendShift,
};
DECLARE_ENUM_FLAG_OPERATORS(ThreadState);

View file

@ -1204,39 +1204,48 @@ const Kernel::KSharedMemory& KernelCore::GetHidBusSharedMem() const {
return *impl->hidbus_shared_mem;
}
void KernelCore::SuspendApplication(bool suspended) {
void KernelCore::SuspendEmulation(bool suspended) {
const bool should_suspend{exception_exited || suspended};
const auto activity =
should_suspend ? Svc::ProcessActivity::Paused : Svc::ProcessActivity::Runnable;
auto processes = GetProcessList();
// Get the application process.
KScopedAutoObject<KProcess> process = ApplicationProcess();
if (process.IsNull()) {
for (auto& process : processes) {
KScopedLightLock ll{process->GetListLock()};
for (auto& thread : process->GetThreadList()) {
if (should_suspend) {
thread.RequestSuspend(SuspendType::System);
} else {
thread.Resume(SuspendType::System);
}
}
}
if (!should_suspend) {
return;
}
// Set the new activity.
process->SetActivity(activity);
// Wait for process execution to stop.
bool must_wait{should_suspend};
// KernelCore::SuspendApplication must be called from locked context,
// or we could race another call to SetActivity, interfering with waiting.
while (must_wait) {
// KernelCore::SuspendEmulation must be called from locked context,
// or we could race another call, interfering with waiting.
const auto TryWait = [&]() {
KScopedSchedulerLock sl{*this};
// Assume that all threads have finished running.
must_wait = false;
for (auto i = 0; i < static_cast<s32>(Core::Hardware::NUM_CPU_CORES); ++i) {
if (Scheduler(i).GetSchedulerCurrentThread()->GetOwnerProcess() ==
process.GetPointerUnsafe()) {
// A thread has not finished running yet.
// Continue waiting.
must_wait = true;
for (auto& process : processes) {
for (auto i = 0; i < static_cast<s32>(Core::Hardware::NUM_CPU_CORES); ++i) {
if (Scheduler(i).GetSchedulerCurrentThread()->GetOwnerProcess() ==
process.GetPointerUnsafe()) {
// A thread has not finished running yet.
// Continue waiting.
return false;
}
}
}
return true;
};
while (!TryWait()) {
// ...
}
}
@ -1260,7 +1269,7 @@ bool KernelCore::IsShuttingDown() const {
void KernelCore::ExceptionalExitApplication() {
exception_exited = true;
SuspendApplication(true);
SuspendEmulation(true);
}
void KernelCore::EnterSVCProfile() {

View file

@ -258,8 +258,8 @@ public:
/// Gets the shared memory object for HIDBus services.
const Kernel::KSharedMemory& GetHidBusSharedMem() const;
/// Suspend/unsuspend application process.
void SuspendApplication(bool suspend);
/// Suspend/unsuspend emulated processes.
void SuspendEmulation(bool suspend);
/// Exceptional exit application process.
void ExceptionalExitApplication();

View file

@ -60,11 +60,10 @@ public:
Add(spv::ImageOperandsMask::ConstOffsets, offsets);
}
explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, Id lod, Id ms) {
explicit ImageOperands(Id lod, Id ms) {
if (Sirit::ValidId(lod)) {
Add(spv::ImageOperandsMask::Lod, lod);
}
AddOffset(ctx, offset, ImageFetchOffsetAllowed);
if (Sirit::ValidId(ms)) {
Add(spv::ImageOperandsMask::Sample, ms);
}
@ -312,6 +311,43 @@ Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info,
return coords;
}
}
void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, Id& coords,
Id offset) {
if (!Sirit::ValidId(offset)) {
return;
}
Id result_type{};
switch (info.type) {
case TextureType::Buffer:
case TextureType::Color1D: {
result_type = ctx.U32[1];
break;
}
case TextureType::ColorArray1D:
offset = ctx.OpCompositeConstruct(ctx.U32[2], offset, ctx.u32_zero_value);
[[fallthrough]];
case TextureType::Color2D:
case TextureType::Color2DRect: {
result_type = ctx.U32[2];
break;
}
case TextureType::ColorArray2D:
offset = ctx.OpCompositeConstruct(ctx.U32[3], ctx.OpCompositeExtract(ctx.U32[1], coords, 0),
ctx.OpCompositeExtract(ctx.U32[1], coords, 1),
ctx.u32_zero_value);
[[fallthrough]];
case TextureType::Color3D: {
result_type = ctx.U32[3];
break;
}
case TextureType::ColorCube:
case TextureType::ColorArrayCube:
return;
}
coords = ctx.OpIAdd(result_type, coords, offset);
}
} // Anonymous namespace
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
@ -494,9 +530,10 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
operands.Span());
}
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, Id lod, Id ms) {
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
AddOffsetToCoordinates(ctx, info, coords, offset);
if (info.type == TextureType::Buffer) {
lod = Id{};
}
@ -504,7 +541,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
// This image is multisampled, lod must be implicit
lod = Id{};
}
const ImageOperands operands(ctx, offset, lod, ms);
const ImageOperands operands(lod, ms);
return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
}

View file

@ -537,8 +537,8 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
const IR::Value& offset, const IR::Value& offset2);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, Id lod, Id ms);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms);
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
const IR::Value& skip_mips);
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);

View file

@ -1488,7 +1488,10 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
std::span<const u8> upload_span;
const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
if (IsRangeGranular(device_addr, copy.size)) {
upload_span = std::span(device_memory.GetPointer<u8>(device_addr), copy.size);
auto* const ptr = device_memory.GetPointer<u8>(device_addr);
if (ptr != nullptr) {
upload_span = std::span(ptr, copy.size);
}
} else {
if (immediate_buffer.empty()) {
immediate_buffer = ImmediateBuffer(largest_copy);

View file

@ -1064,8 +1064,6 @@ public:
}
});
}
auto* ptr = device_memory.GetPointer<u8>(new_query->dependant_address);
ASSERT(ptr != nullptr);
new_query->dependant_manage = must_manage_dependance;
pending_flush_queries.push_back(index);
@ -1104,9 +1102,11 @@ public:
tfb_streamer.Free(query->dependant_index);
} else {
u8* pointer = device_memory.GetPointer<u8>(query->dependant_address);
u32 result;
std::memcpy(&result, pointer, sizeof(u32));
num_vertices = static_cast<u64>(result) / query->stride;
if (pointer != nullptr) {
u32 result;
std::memcpy(&result, pointer, sizeof(u32));
num_vertices = static_cast<u64>(result) / query->stride;
}
}
query->value = [&]() -> u64 {
switch (query->topology) {
@ -1360,7 +1360,9 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
const auto check_value = [&](DAddr address) {
u8* ptr = impl->device_memory.GetPointer<u8>(address);
u64 value{};
std::memcpy(&value, ptr, sizeof(value));
if (ptr != nullptr) {
std::memcpy(&value, ptr, sizeof(value));
}
return value == 0;
};
std::array<VideoCommon::LookupData*, 2> objects{&object_1, &object_2};

View file

@ -1050,37 +1050,16 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
regs.zeta.format == Tegra::DepthFormat::X8Z24_UNORM ||
regs.zeta.format == Tegra::DepthFormat::S8Z24_UNORM ||
regs.zeta.format == Tegra::DepthFormat::V8Z24_UNORM;
bool force_unorm = ([&] {
if (!is_d24 || device.SupportsD24DepthBuffer()) {
return false;
}
if (device.IsExtDepthBiasControlSupported()) {
return true;
}
if (!Settings::values.renderer_amdvlk_depth_bias_workaround) {
return false;
}
if (is_d24 && !device.SupportsD24DepthBuffer() &&
Settings::values.renderer_amdvlk_depth_bias_workaround) {
// the base formulas can be obtained from here:
// https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias
const double rescale_factor =
static_cast<double>(1ULL << (32 - 24)) / (static_cast<double>(0x1.ep+127));
units = static_cast<float>(static_cast<double>(units) * rescale_factor);
return false;
})();
}
scheduler.Record([constant = units, clamp = regs.depth_bias_clamp,
factor = regs.slope_scale_depth_bias, force_unorm,
precise = device.HasExactDepthBiasControl()](vk::CommandBuffer cmdbuf) {
if (force_unorm) {
VkDepthBiasRepresentationInfoEXT info{
.sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT,
.pNext = nullptr,
.depthBiasRepresentation =
VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT,
.depthBiasExact = precise ? VK_TRUE : VK_FALSE,
};
cmdbuf.SetDepthBias(constant, clamp, factor, &info);
return;
}
factor = regs.slope_scale_depth_bias](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBias(constant, clamp, factor);
});
}

View file

@ -1137,13 +1137,6 @@ void Device::RemoveUnsuitableExtensions() {
RemoveExtensionFeatureIfUnsuitable(extensions.custom_border_color, features.custom_border_color,
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
// VK_EXT_depth_bias_control
extensions.depth_bias_control =
features.depth_bias_control.depthBiasControl &&
features.depth_bias_control.leastRepresentableValueForceUnormRepresentation;
RemoveExtensionFeatureIfUnsuitable(extensions.depth_bias_control, features.depth_bias_control,
VK_EXT_DEPTH_BIAS_CONTROL_EXTENSION_NAME);
// VK_EXT_depth_clip_control
extensions.depth_clip_control = features.depth_clip_control.depthClipControl;
RemoveExtensionFeatureIfUnsuitable(extensions.depth_clip_control, features.depth_clip_control,

View file

@ -41,7 +41,6 @@ VK_DEFINE_HANDLE(VmaAllocator)
// Define all features which may be used by the implementation and require an extension here.
#define FOR_EACH_VK_FEATURE_EXT(FEATURE) \
FEATURE(EXT, CustomBorderColor, CUSTOM_BORDER_COLOR, custom_border_color) \
FEATURE(EXT, DepthBiasControl, DEPTH_BIAS_CONTROL, depth_bias_control) \
FEATURE(EXT, DepthClipControl, DEPTH_CLIP_CONTROL, depth_clip_control) \
FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \
FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \
@ -97,7 +96,6 @@ VK_DEFINE_HANDLE(VmaAllocator)
#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_DEPTH_BIAS_CONTROL_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \
@ -150,9 +148,6 @@ VK_DEFINE_HANDLE(VmaAllocator)
// Define features where the absence of the feature may result in a degraded experience.
#define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \
FEATURE_NAME(custom_border_color, customBorderColors) \
FEATURE_NAME(depth_bias_control, depthBiasControl) \
FEATURE_NAME(depth_bias_control, leastRepresentableValueForceUnormRepresentation) \
FEATURE_NAME(depth_bias_control, depthBiasExact) \
FEATURE_NAME(extended_dynamic_state, extendedDynamicState) \
FEATURE_NAME(format_a4b4g4r4, formatA4B4G4R4) \
FEATURE_NAME(index_type_uint8, indexTypeUint8) \
@ -479,11 +474,6 @@ public:
return extensions.depth_clip_control;
}
/// Returns true if the device supports VK_EXT_depth_bias_control.
bool IsExtDepthBiasControlSupported() const {
return extensions.depth_bias_control;
}
/// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
bool IsExtShaderViewportIndexLayerSupported() const {
return extensions.shader_viewport_index_layer;
@ -649,10 +639,6 @@ public:
return features.robustness2.nullDescriptor;
}
bool HasExactDepthBiasControl() const {
return features.depth_bias_control.depthBiasExact;
}
u32 GetMaxVertexInputAttributes() const {
return properties.properties.limits.maxVertexInputAttributes;
}