diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers index 9250d5ae8..8188e3fbb 160000 --- a/externals/Vulkan-Headers +++ b/externals/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 9250d5ae8f50202005233dc0512a1d460c8b4833 +Subproject commit 8188e3fbbc105591064093440f88081fb957d4f0 diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 424278816..d1f0ea932 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -39,52 +39,18 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { } // Anonymous namespace -void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept { - raw = 0; - front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail)); - front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail)); - front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass)); - front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func)); - if (regs.stencil_two_side_enable) { - back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail)); - back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail)); - back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass)); - back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func)); - } else { - back.action_stencil_fail.Assign(front.action_stencil_fail); - back.action_depth_fail.Assign(front.action_depth_fail); - back.action_depth_pass.Assign(front.action_depth_pass); - back.test_func.Assign(front.test_func); - } - depth_test_enable.Assign(regs.depth_test_enable); - depth_write_enable.Assign(regs.depth_write_enabled); - depth_bounds_enable.Assign(regs.depth_bounds_enable); - stencil_enable.Assign(regs.stencil_enable); - depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); -} - -void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { +void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) { const auto& clip = regs.view_volume_clip_control; const std::array enabled_lut = {regs.polygon_offset_point_enable, regs.polygon_offset_line_enable, regs.polygon_offset_fill_enable}; const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); - u32 packed_front_face = PackFrontFace(regs.front_face); - if (regs.screen_y_control.triangle_rast_flip != 0) { - // Flip front face - packed_front_face = 1 - packed_front_face; - } - raw = 0; - topology.Assign(topology_index); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); - cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0); - cull_face.Assign(PackCullFace(regs.cull_face)); - front_face.Assign(packed_front_face); polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front)); patch_control_points_minus_one.Assign(regs.patch_vertices - 1); tessellation_primitive.Assign(static_cast<u32>(regs.tess_mode.prim.Value())); @@ -93,19 +59,37 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.operation)); rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); - std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast -} -void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept { + std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast + + for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + binding_divisors[index] = + regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; + } + + for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast<u32>(input.type.Value())); + attribute.size.Assign(static_cast<u32>(input.size.Value())); + } + for (std::size_t index = 0; index < std::size(attachments); ++index) { attachments[index].Fill(regs, index); } -} -void FixedPipelineState::ViewportSwizzles::Fill(const Maxwell& regs) noexcept { const auto& transform = regs.viewport_transform; - std::transform(transform.begin(), transform.end(), swizzles.begin(), + std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(), [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); }); + + if (!has_extended_dynamic_state) { + no_extended_dynamic_state.Assign(1); + dynamic_state.Fill(regs); + } } void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) { @@ -147,20 +131,57 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size enable.Assign(1); } -void FixedPipelineState::Fill(const Maxwell& regs) { - rasterizer.Fill(regs); - depth_stencil.Fill(regs); - color_blending.Fill(regs); - viewport_swizzles.Fill(regs); +void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { + const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); + u32 packed_front_face = PackFrontFace(regs.front_face); + if (regs.screen_y_control.triangle_rast_flip != 0) { + // Flip front face + packed_front_face = 1 - packed_front_face; + } + + raw1 = 0; + raw2 = 0; + front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail)); + front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail)); + front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass)); + front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func)); + if (regs.stencil_two_side_enable) { + back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail)); + back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail)); + back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass)); + back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func)); + } else { + back.action_stencil_fail.Assign(front.action_stencil_fail); + back.action_depth_fail.Assign(front.action_depth_fail); + back.action_depth_pass.Assign(front.action_depth_pass); + back.test_func.Assign(front.test_func); + } + stencil_enable.Assign(regs.stencil_enable); + depth_write_enable.Assign(regs.depth_write_enabled); + depth_bounds_enable.Assign(regs.depth_bounds_enable); + depth_test_enable.Assign(regs.depth_test_enable); + front_face.Assign(packed_front_face); + depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); + topology.Assign(topology_index); + cull_face.Assign(PackCullFace(regs.cull_face)); + cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); + + for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const auto& input = regs.vertex_array[index]; + VertexBinding& binding = vertex_bindings[index]; + binding.raw = 0; + binding.enabled.Assign(input.IsEnabled() ? 1 : 0); + binding.stride.Assign(static_cast<u16>(input.stride.Value())); + } } std::size_t FixedPipelineState::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); + const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); return static_cast<std::size_t>(hash); } bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { - return std::memcmp(this, &rhs, sizeof *this) == 0; + return std::memcmp(this, &rhs, Size()) == 0; } u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 31a6398f2..cdcbb65f5 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -60,14 +60,6 @@ struct FixedPipelineState { void Fill(const Maxwell& regs, std::size_t index); - std::size_t Hash() const noexcept; - - bool operator==(const BlendingAttachment& rhs) const noexcept; - - bool operator!=(const BlendingAttachment& rhs) const noexcept { - return !operator==(rhs); - } - constexpr std::array<bool, 4> Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; } @@ -97,83 +89,77 @@ struct FixedPipelineState { } }; - struct VertexInput { - union Binding { - u16 raw; - BitField<0, 1, u16> enabled; - BitField<1, 12, u16> stride; - }; + union VertexAttribute { + u32 raw; + BitField<0, 1, u32> enabled; + BitField<1, 5, u32> buffer; + BitField<6, 14, u32> offset; + BitField<20, 3, u32> type; + BitField<23, 6, u32> size; - union Attribute { - u32 raw; - BitField<0, 1, u32> enabled; - BitField<1, 5, u32> buffer; - BitField<6, 14, u32> offset; - BitField<20, 3, u32> type; - BitField<23, 6, u32> size; - - constexpr Maxwell::VertexAttribute::Type Type() const noexcept { - return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); - } - - constexpr Maxwell::VertexAttribute::Size Size() const noexcept { - return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); - } - }; - - std::array<Binding, Maxwell::NumVertexArrays> bindings; - std::array<u32, Maxwell::NumVertexArrays> binding_divisors; - std::array<Attribute, Maxwell::NumVertexAttributes> attributes; - - void SetBinding(std::size_t index, bool enabled, u32 stride, u32 divisor) noexcept { - auto& binding = bindings[index]; - binding.raw = 0; - binding.enabled.Assign(enabled ? 1 : 0); - binding.stride.Assign(static_cast<u16>(stride)); - binding_divisors[index] = divisor; + constexpr Maxwell::VertexAttribute::Type Type() const noexcept { + return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); } - void SetAttribute(std::size_t index, bool enabled, u32 buffer, u32 offset, - Maxwell::VertexAttribute::Type type, - Maxwell::VertexAttribute::Size size) noexcept { - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(enabled ? 1 : 0); - attribute.buffer.Assign(buffer); - attribute.offset.Assign(offset); - attribute.type.Assign(static_cast<u32>(type)); - attribute.size.Assign(static_cast<u32>(size)); + constexpr Maxwell::VertexAttribute::Size Size() const noexcept { + return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); } }; - struct Rasterizer { + template <std::size_t Position> + union StencilFace { + BitField<Position + 0, 3, u32> action_stencil_fail; + BitField<Position + 3, 3, u32> action_depth_fail; + BitField<Position + 6, 3, u32> action_depth_pass; + BitField<Position + 9, 3, u32> test_func; + + Maxwell::StencilOp ActionStencilFail() const noexcept { + return UnpackStencilOp(action_stencil_fail); + } + + Maxwell::StencilOp ActionDepthFail() const noexcept { + return UnpackStencilOp(action_depth_fail); + } + + Maxwell::StencilOp ActionDepthPass() const noexcept { + return UnpackStencilOp(action_depth_pass); + } + + Maxwell::ComparisonOp TestFunc() const noexcept { + return UnpackComparisonOp(test_func); + } + }; + + union VertexBinding { + u16 raw; + BitField<0, 12, u16> stride; + BitField<12, 1, u16> enabled; + }; + + struct DynamicState { union { - u32 raw; - BitField<0, 4, u32> topology; - BitField<4, 1, u32> primitive_restart_enable; - BitField<5, 1, u32> cull_enable; - BitField<6, 1, u32> depth_bias_enable; - BitField<7, 1, u32> depth_clamp_disabled; - BitField<8, 1, u32> ndc_minus_one_to_one; - BitField<9, 2, u32> cull_face; - BitField<11, 1, u32> front_face; - BitField<12, 2, u32> polygon_mode; - BitField<14, 5, u32> patch_control_points_minus_one; - BitField<19, 2, u32> tessellation_primitive; - BitField<21, 2, u32> tessellation_spacing; - BitField<23, 1, u32> tessellation_clockwise; - BitField<24, 1, u32> logic_op_enable; - BitField<25, 4, u32> logic_op; - BitField<29, 1, u32> rasterize_enable; + u32 raw1; + StencilFace<0> front; + StencilFace<12> back; + BitField<24, 1, u32> stencil_enable; + BitField<25, 1, u32> depth_write_enable; + BitField<26, 1, u32> depth_bounds_enable; + BitField<27, 1, u32> depth_test_enable; + BitField<28, 1, u32> front_face; + BitField<29, 3, u32> depth_test_func; }; + union { + u32 raw2; + BitField<0, 4, u32> topology; + BitField<4, 2, u32> cull_face; + BitField<6, 1, u32> cull_enable; + }; + std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings; - // TODO(Rodrigo): Move this to push constants - u32 point_size; + void Fill(const Maxwell& regs); - void Fill(const Maxwell& regs) noexcept; - - constexpr Maxwell::PrimitiveTopology Topology() const noexcept { - return static_cast<Maxwell::PrimitiveTopology>(topology.Value()); + Maxwell::ComparisonOp DepthTestFunc() const noexcept { + return UnpackComparisonOp(depth_test_func); } Maxwell::CullFace CullFace() const noexcept { @@ -183,70 +169,36 @@ struct FixedPipelineState { Maxwell::FrontFace FrontFace() const noexcept { return UnpackFrontFace(front_face.Value()); } - }; - struct DepthStencil { - template <std::size_t Position> - union StencilFace { - BitField<Position + 0, 3, u32> action_stencil_fail; - BitField<Position + 3, 3, u32> action_depth_fail; - BitField<Position + 6, 3, u32> action_depth_pass; - BitField<Position + 9, 3, u32> test_func; - - Maxwell::StencilOp ActionStencilFail() const noexcept { - return UnpackStencilOp(action_stencil_fail); - } - - Maxwell::StencilOp ActionDepthFail() const noexcept { - return UnpackStencilOp(action_depth_fail); - } - - Maxwell::StencilOp ActionDepthPass() const noexcept { - return UnpackStencilOp(action_depth_pass); - } - - Maxwell::ComparisonOp TestFunc() const noexcept { - return UnpackComparisonOp(test_func); - } - }; - - union { - u32 raw; - StencilFace<0> front; - StencilFace<12> back; - BitField<24, 1, u32> depth_test_enable; - BitField<25, 1, u32> depth_write_enable; - BitField<26, 1, u32> depth_bounds_enable; - BitField<27, 1, u32> stencil_enable; - BitField<28, 3, u32> depth_test_func; - }; - - void Fill(const Maxwell& regs) noexcept; - - Maxwell::ComparisonOp DepthTestFunc() const noexcept { - return UnpackComparisonOp(depth_test_func); + constexpr Maxwell::PrimitiveTopology Topology() const noexcept { + return static_cast<Maxwell::PrimitiveTopology>(topology.Value()); } }; - struct ColorBlending { - std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; - - void Fill(const Maxwell& regs) noexcept; + union { + u32 raw; + BitField<0, 1, u32> no_extended_dynamic_state; + BitField<2, 1, u32> primitive_restart_enable; + BitField<3, 1, u32> depth_bias_enable; + BitField<4, 1, u32> depth_clamp_disabled; + BitField<5, 1, u32> ndc_minus_one_to_one; + BitField<6, 2, u32> polygon_mode; + BitField<8, 5, u32> patch_control_points_minus_one; + BitField<13, 2, u32> tessellation_primitive; + BitField<15, 2, u32> tessellation_spacing; + BitField<17, 1, u32> tessellation_clockwise; + BitField<18, 1, u32> logic_op_enable; + BitField<19, 4, u32> logic_op; + BitField<23, 1, u32> rasterize_enable; }; + u32 point_size; + std::array<u32, Maxwell::NumVertexArrays> binding_divisors; + std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; + std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; + std::array<u16, Maxwell::NumViewports> viewport_swizzles; + DynamicState dynamic_state; - struct ViewportSwizzles { - std::array<u16, Maxwell::NumViewports> swizzles; - - void Fill(const Maxwell& regs) noexcept; - }; - - VertexInput vertex_input; - Rasterizer rasterizer; - DepthStencil depth_stencil; - ColorBlending color_blending; - ViewportSwizzles viewport_swizzles; - - void Fill(const Maxwell& regs); + void Fill(const Maxwell& regs, bool has_extended_dynamic_state); std::size_t Hash() const noexcept; @@ -255,6 +207,11 @@ struct FixedPipelineState { bool operator!=(const FixedPipelineState& rhs) const noexcept { return !operator==(rhs); } + + std::size_t Size() const noexcept { + const std::size_t total_size = sizeof *this; + return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); + } }; static_assert(std::has_unique_object_representations_v<FixedPipelineState>); static_assert(std::is_trivially_copyable_v<FixedPipelineState>); diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9fd8ac3f6..fdaea4210 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -313,6 +313,16 @@ bool VKDevice::Create() { LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors"); } + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; + if (ext_extended_dynamic_state) { + dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + dynamic_state.pNext = nullptr; + dynamic_state.extendedDynamicState = VK_TRUE; + SetNext(next, dynamic_state); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -541,6 +551,7 @@ std::vector<const char*> VKDevice::LoadExtensions() { bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; + bool has_ext_extended_dynamic_state{}; for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) { Test(extension, nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); Test(extension, khr_uniform_buffer_standard_layout, @@ -558,6 +569,8 @@ std::vector<const char*> VKDevice::LoadExtensions() { false); Test(extension, has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); + Test(extension, has_ext_extended_dynamic_state, + VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { Test(extension, nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -643,6 +656,19 @@ std::vector<const char*> VKDevice::LoadExtensions() { } } + if (has_ext_extended_dynamic_state) { + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; + dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + dynamic_state.pNext = nullptr; + features.pNext = &dynamic_state; + physical.GetFeatures2KHR(features); + + if (dynamic_state.extendedDynamicState) { + extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); + ext_extended_dynamic_state = true; + } + } + return extensions; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 6b9227b09..ae5c21baa 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -182,6 +182,11 @@ public: return ext_custom_border_color; } + /// Returns true if the device supports VK_EXT_extended_dynamic_state. + bool IsExtExtendedDynamicStateSupported() const { + return ext_extended_dynamic_state; + } + /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { return vendor_name; @@ -239,6 +244,7 @@ private: bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. + bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. // Telemetry parameters diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 69b6bba00..844445105 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -176,20 +176,32 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, const SPIRVProgram& program) const { - const auto& vi = fixed_state.vertex_input; - const auto& ds = fixed_state.depth_stencil; - const auto& cd = fixed_state.color_blending; - const auto& rs = fixed_state.rasterizer; - const auto& viewport_swizzles = fixed_state.viewport_swizzles.swizzles; + const auto& state = fixed_state; + const auto& viewport_swizzles = state.viewport_swizzles; + + FixedPipelineState::DynamicState dynamic; + if (device.IsExtExtendedDynamicStateSupported()) { + // Insert dummy values, as long as they are valid they don't matter as extended dynamic + // state is ignored + dynamic.raw1 = 0; + dynamic.raw2 = 0; + for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) { + // Enable all vertex bindings + binding.raw = 0; + binding.enabled.Assign(1); + } + } else { + dynamic = state.dynamic_state; + } std::vector<VkVertexInputBindingDescription> vertex_bindings; std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; - for (std::size_t index = 0; index < std::size(vi.bindings); ++index) { - const auto& binding = vi.bindings[index]; + for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const auto& binding = dynamic.vertex_bindings[index]; if (!binding.enabled) { continue; } - const bool instanced = vi.binding_divisors[index] != 0; + const bool instanced = state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; auto& vertex_binding = vertex_bindings.emplace_back(); @@ -200,14 +212,14 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa if (instanced) { auto& binding_divisor = vertex_binding_divisors.emplace_back(); binding_divisor.binding = static_cast<u32>(index); - binding_divisor.divisor = vi.binding_divisors[index]; + binding_divisor.divisor = state.binding_divisors[index]; } } std::vector<VkVertexInputAttributeDescription> vertex_attributes; const auto& input_attributes = program[0]->entries.attributes; - for (std::size_t index = 0; index < std::size(vi.attributes); ++index) { - const auto& attribute = vi.attributes[index]; + for (std::size_t index = 0; index < state.attributes.size(); ++index) { + const auto& attribute = state.attributes[index]; if (!attribute.enabled) { continue; } @@ -244,15 +256,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; input_assembly_ci.pNext = nullptr; input_assembly_ci.flags = 0; - input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, rs.Topology()); + input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()); input_assembly_ci.primitiveRestartEnable = - rs.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology); + state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology); VkPipelineTessellationStateCreateInfo tessellation_ci; tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; tessellation_ci.pNext = nullptr; tessellation_ci.flags = 0; - tessellation_ci.patchControlPoints = rs.patch_control_points_minus_one.Value() + 1; + tessellation_ci.patchControlPoints = state.patch_control_points_minus_one.Value() + 1; VkPipelineViewportStateCreateInfo viewport_ci; viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; @@ -280,13 +292,13 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; rasterization_ci.pNext = nullptr; rasterization_ci.flags = 0; - rasterization_ci.depthClampEnable = rs.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE; - rasterization_ci.rasterizerDiscardEnable = rs.rasterize_enable == 0 ? VK_TRUE : VK_FALSE; + rasterization_ci.depthClampEnable = state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE; + rasterization_ci.rasterizerDiscardEnable = state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE; rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; rasterization_ci.cullMode = - rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE; - rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.FrontFace()); - rasterization_ci.depthBiasEnable = rs.depth_bias_enable; + dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE; + rasterization_ci.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()); + rasterization_ci.depthBiasEnable = state.depth_bias_enable; rasterization_ci.depthBiasConstantFactor = 0.0f; rasterization_ci.depthBiasClamp = 0.0f; rasterization_ci.depthBiasSlopeFactor = 0.0f; @@ -307,14 +319,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; depth_stencil_ci.pNext = nullptr; depth_stencil_ci.flags = 0; - depth_stencil_ci.depthTestEnable = ds.depth_test_enable; - depth_stencil_ci.depthWriteEnable = ds.depth_write_enable; - depth_stencil_ci.depthCompareOp = - ds.depth_test_enable ? MaxwellToVK::ComparisonOp(ds.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS; - depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable; - depth_stencil_ci.stencilTestEnable = ds.stencil_enable; - depth_stencil_ci.front = GetStencilFaceState(ds.front); - depth_stencil_ci.back = GetStencilFaceState(ds.back); + depth_stencil_ci.depthTestEnable = dynamic.depth_test_enable; + depth_stencil_ci.depthWriteEnable = dynamic.depth_write_enable; + depth_stencil_ci.depthCompareOp = dynamic.depth_test_enable + ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) + : VK_COMPARE_OP_ALWAYS; + depth_stencil_ci.depthBoundsTestEnable = dynamic.depth_bounds_enable; + depth_stencil_ci.stencilTestEnable = dynamic.stencil_enable; + depth_stencil_ci.front = GetStencilFaceState(dynamic.front); + depth_stencil_ci.back = GetStencilFaceState(dynamic.back); depth_stencil_ci.minDepthBounds = 0.0f; depth_stencil_ci.maxDepthBounds = 0.0f; @@ -324,7 +337,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa static constexpr std::array COMPONENT_TABLE = { VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT}; - const auto& blend = cd.attachments[index]; + const auto& blend = state.attachments[index]; VkColorComponentFlags color_components = 0; for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { @@ -354,11 +367,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa color_blend_ci.pAttachments = cb_attachments.data(); std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants)); - static constexpr std::array dynamic_states = { + std::vector dynamic_states = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE}; + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + if (device.IsExtExtendedDynamicStateSupported()) { + static constexpr std::array extended = { + VK_DYNAMIC_STATE_CULL_MODE_EXT, + VK_DYNAMIC_STATE_FRONT_FACE_EXT, + VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT, + VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, + VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, + VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_OP_EXT, + }; + dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); + } VkPipelineDynamicStateCreateInfo dynamic_state_ci; dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ea66e621e..3da835324 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -116,12 +116,12 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, } // Anonymous namespace std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); + const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); return static_cast<std::size_t>(hash); } bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { - return std::memcmp(&rhs, this, sizeof *this) == 0; + return std::memcmp(&rhs, this, Size()) == 0; } std::size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -312,18 +312,19 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const auto& gpu = system.GPU().Maxwell3D(); Specialization specialization; - if (fixed_state.rasterizer.Topology() == Maxwell::PrimitiveTopology::Points) { + if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points || + device.IsExtExtendedDynamicStateSupported()) { float point_size; - std::memcpy(&point_size, &fixed_state.rasterizer.point_size, sizeof(float)); + std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); specialization.point_size = point_size; ASSERT(point_size != 0.0f); } for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - const auto& attribute = fixed_state.vertex_input.attributes[i]; + const auto& attribute = fixed_state.attributes[i]; specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; specialization.attribute_types[i] = attribute.Type(); } - specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; + specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; SPIRVProgram program; std::vector<VkDescriptorSetLayoutBinding> bindings; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 0a36e5112..0a3fe65fb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -44,10 +44,10 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct GraphicsPipelineCacheKey { - FixedPipelineState fixed_state; RenderPassParams renderpass_params; + u32 padding; std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; - u64 padding; // This is necessary for unique object representations + FixedPipelineState fixed_state; std::size_t Hash() const noexcept; @@ -56,6 +56,10 @@ struct GraphicsPipelineCacheKey { bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { return !operator==(rhs); } + + std::size_t Size() const noexcept { + return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); + } }; static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a8d94eac3..380ed532b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -186,13 +186,22 @@ bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { scissor.max_y < regs.zeta_height; } +template <std::size_t N> +std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { + std::array<VkDeviceSize, N> expanded; + std::copy(strides.begin(), strides.end(), expanded.begin()); + return expanded; +} + } // Anonymous namespace class BufferBindings final { public: - void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset) { + void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) { vertex.buffers[vertex.num_buffers] = buffer; vertex.offsets[vertex.num_buffers] = offset; + vertex.sizes[vertex.num_buffers] = size; + vertex.strides[vertex.num_buffers] = static_cast<u16>(stride); ++vertex.num_buffers; } @@ -202,76 +211,76 @@ public: index.type = type; } - void Bind(VKScheduler& scheduler) const { + void Bind(const VKDevice& device, VKScheduler& scheduler) const { // Use this large switch case to avoid dispatching more memory in the record lambda than // what we need. It looks horrible, but it's the best we can do on standard C++. switch (vertex.num_buffers) { case 0: - return BindStatic<0>(scheduler); + return BindStatic<0>(device, scheduler); case 1: - return BindStatic<1>(scheduler); + return BindStatic<1>(device, scheduler); case 2: - return BindStatic<2>(scheduler); + return BindStatic<2>(device, scheduler); case 3: - return BindStatic<3>(scheduler); + return BindStatic<3>(device, scheduler); case 4: - return BindStatic<4>(scheduler); + return BindStatic<4>(device, scheduler); case 5: - return BindStatic<5>(scheduler); + return BindStatic<5>(device, scheduler); case 6: - return BindStatic<6>(scheduler); + return BindStatic<6>(device, scheduler); case 7: - return BindStatic<7>(scheduler); + return BindStatic<7>(device, scheduler); case 8: - return BindStatic<8>(scheduler); + return BindStatic<8>(device, scheduler); case 9: - return BindStatic<9>(scheduler); + return BindStatic<9>(device, scheduler); case 10: - return BindStatic<10>(scheduler); + return BindStatic<10>(device, scheduler); case 11: - return BindStatic<11>(scheduler); + return BindStatic<11>(device, scheduler); case 12: - return BindStatic<12>(scheduler); + return BindStatic<12>(device, scheduler); case 13: - return BindStatic<13>(scheduler); + return BindStatic<13>(device, scheduler); case 14: - return BindStatic<14>(scheduler); + return BindStatic<14>(device, scheduler); case 15: - return BindStatic<15>(scheduler); + return BindStatic<15>(device, scheduler); case 16: - return BindStatic<16>(scheduler); + return BindStatic<16>(device, scheduler); case 17: - return BindStatic<17>(scheduler); + return BindStatic<17>(device, scheduler); case 18: - return BindStatic<18>(scheduler); + return BindStatic<18>(device, scheduler); case 19: - return BindStatic<19>(scheduler); + return BindStatic<19>(device, scheduler); case 20: - return BindStatic<20>(scheduler); + return BindStatic<20>(device, scheduler); case 21: - return BindStatic<21>(scheduler); + return BindStatic<21>(device, scheduler); case 22: - return BindStatic<22>(scheduler); + return BindStatic<22>(device, scheduler); case 23: - return BindStatic<23>(scheduler); + return BindStatic<23>(device, scheduler); case 24: - return BindStatic<24>(scheduler); + return BindStatic<24>(device, scheduler); case 25: - return BindStatic<25>(scheduler); + return BindStatic<25>(device, scheduler); case 26: - return BindStatic<26>(scheduler); + return BindStatic<26>(device, scheduler); case 27: - return BindStatic<27>(scheduler); + return BindStatic<27>(device, scheduler); case 28: - return BindStatic<28>(scheduler); + return BindStatic<28>(device, scheduler); case 29: - return BindStatic<29>(scheduler); + return BindStatic<29>(device, scheduler); case 30: - return BindStatic<30>(scheduler); + return BindStatic<30>(device, scheduler); case 31: - return BindStatic<31>(scheduler); + return BindStatic<31>(device, scheduler); case 32: - return BindStatic<32>(scheduler); + return BindStatic<32>(device, scheduler); } UNREACHABLE(); } @@ -282,6 +291,8 @@ private: std::size_t num_buffers = 0; std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; + std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; + std::array<u16, Maxwell::NumVertexArrays> strides; } vertex; struct { @@ -291,15 +302,23 @@ private: } index; template <std::size_t N> - void BindStatic(VKScheduler& scheduler) const { - if (index.buffer) { - BindStatic<N, true>(scheduler); + void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { + if (device.IsExtExtendedDynamicStateSupported()) { + if (index.buffer) { + BindStatic<N, true, true>(scheduler); + } else { + BindStatic<N, false, true>(scheduler); + } } else { - BindStatic<N, false>(scheduler); + if (index.buffer) { + BindStatic<N, true, false>(scheduler); + } else { + BindStatic<N, false, false>(scheduler); + } } } - template <std::size_t N, bool is_indexed> + template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state> void BindStatic(VKScheduler& scheduler) const { static_assert(N <= Maxwell::NumVertexArrays); if constexpr (N == 0) { @@ -311,6 +330,31 @@ private: std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin()); std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); + if constexpr (has_extended_dynamic_state) { + // With extended dynamic states we can specify the length and stride of a vertex buffer + // std::array<VkDeviceSize, N> sizes; + std::array<u16, N> strides; + // std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin()); + std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin()); + + if constexpr (is_indexed) { + scheduler.Record( + [buffers, offsets, strides, index = index](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); + cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), + offsets.data(), nullptr, + ExpandStrides(strides).data()); + }); + } else { + scheduler.Record([buffers, offsets, strides](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), + offsets.data(), nullptr, + ExpandStrides(strides).data()); + }); + } + return; + } + if constexpr (is_indexed) { // Indexed draw scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) { @@ -369,7 +413,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key; - key.fixed_state.Fill(gpu.regs); + key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); @@ -402,7 +446,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { UpdateDynamicStates(); - buffer_bindings.Bind(scheduler); + buffer_bindings.Bind(device, scheduler); BeginTransformFeedback(); @@ -822,7 +866,7 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt const auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; - SetupVertexArrays(fixed_state.vertex_input, buffer_bindings); + SetupVertexArrays(buffer_bindings); const u32 base_instance = regs.vb_base_instance; const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1; @@ -893,6 +937,17 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateBlendConstants(regs); UpdateDepthBounds(regs); UpdateStencilFaces(regs); + if (device.IsExtExtendedDynamicStateSupported()) { + UpdateCullMode(regs); + UpdateDepthBoundsTestEnable(regs); + UpdateDepthTestEnable(regs); + UpdateDepthWriteEnable(regs); + UpdateDepthCompareOp(regs); + UpdateFrontFace(regs); + UpdatePrimitiveTopology(regs); + UpdateStencilOp(regs); + UpdateStencilTestEnable(regs); + } } void RasterizerVulkan::BeginTransformFeedback() { @@ -940,41 +995,25 @@ void RasterizerVulkan::EndTransformFeedback() { [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } -void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, - BufferBindings& buffer_bindings) { +void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { const auto& regs = system.GPU().Maxwell3D().regs; - for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& attrib = regs.vertex_attrib_format[index]; - if (attrib.IsConstant()) { - vertex_input.SetAttribute(index, false, 0, 0, {}, {}); - continue; - } - vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), - attrib.size.Value()); - } - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { const auto& vertex_array = regs.vertex_array[index]; if (!vertex_array.IsEnabled()) { - vertex_input.SetBinding(index, false, 0, 0); continue; } - vertex_input.SetBinding( - index, true, vertex_array.stride, - regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); - const GPUVAddr start{vertex_array.StartAddress()}; const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; ASSERT(end >= start); - const std::size_t size{end - start}; + const std::size_t size = end - start; if (size == 0) { - buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); + buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); continue; } const auto info = buffer_cache.UploadMemory(start, size); - buffer_bindings.AddVertexBinding(info.handle, info.offset); + buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride); } } @@ -1326,6 +1365,117 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) } } +void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchCullMode()) { + return; + } + scheduler.Record( + [enabled = regs.cull_test_enabled, cull_face = regs.cull_face](vk::CommandBuffer cmdbuf) { + cmdbuf.SetCullModeEXT(enabled ? MaxwellToVK::CullFace(cull_face) : VK_CULL_MODE_NONE); + }); +} + +void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthBoundsTestEnable()) { + return; + } + scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { + cmdbuf.SetDepthBoundsTestEnableEXT(enable); + }); +} + +void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthTestEnable()) { + return; + } + scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) { + cmdbuf.SetDepthTestEnableEXT(enable); + }); +} + +void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthWriteEnable()) { + return; + } + scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) { + cmdbuf.SetDepthWriteEnableEXT(enable); + }); +} + +void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthCompareOp()) { + return; + } + scheduler.Record([func = regs.depth_test_func](vk::CommandBuffer cmdbuf) { + cmdbuf.SetDepthCompareOpEXT(MaxwellToVK::ComparisonOp(func)); + }); +} + +void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchFrontFace()) { + return; + } + + VkFrontFace front_face = MaxwellToVK::FrontFace(regs.front_face); + if (regs.screen_y_control.triangle_rast_flip != 0) { + front_face = front_face == VK_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_COUNTER_CLOCKWISE + : VK_FRONT_FACE_CLOCKWISE; + } + scheduler.Record( + [front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); }); +} + +void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchPrimitiveTopology()) { + return; + } + const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value(); + scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) { + cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology)); + }); +} + +void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchStencilOp()) { + return; + } + const Maxwell::StencilOp fail = regs.stencil_front_op_fail; + const Maxwell::StencilOp zfail = regs.stencil_front_op_zfail; + const Maxwell::StencilOp zpass = regs.stencil_front_op_zpass; + const Maxwell::ComparisonOp compare = regs.stencil_front_func_func; + if (regs.stencil_two_side_enable) { + scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) { + cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail), + MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), + MaxwellToVK::ComparisonOp(compare)); + }); + } else { + const Maxwell::StencilOp back_fail = regs.stencil_back_op_fail; + const Maxwell::StencilOp back_zfail = regs.stencil_back_op_zfail; + const Maxwell::StencilOp back_zpass = regs.stencil_back_op_zpass; + const Maxwell::ComparisonOp back_compare = regs.stencil_back_func_func; + scheduler.Record([fail, zfail, zpass, compare, back_fail, back_zfail, back_zpass, + back_compare](vk::CommandBuffer cmdbuf) { + cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_BIT, MaxwellToVK::StencilOp(fail), + MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), + MaxwellToVK::ComparisonOp(compare)); + cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_BACK_BIT, MaxwellToVK::StencilOp(back_fail), + MaxwellToVK::StencilOp(back_zpass), + MaxwellToVK::StencilOp(back_zfail), + MaxwellToVK::ComparisonOp(back_compare)); + }); + } +} + +void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchStencilTestEnable()) { + return; + } + scheduler.Record([enable = regs.stencil_enable](vk::CommandBuffer cmdbuf) { + cmdbuf.SetStencilTestEnableEXT(enable); + }); +} + std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { std::size_t size = CalculateVertexArraysSize(); if (is_indexed) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 83e00e7e9..923178b0b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -185,8 +185,7 @@ private: bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); - void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, - BufferBindings& buffer_bindings); + void SetupVertexArrays(BufferBindings& buffer_bindings); void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); @@ -246,6 +245,16 @@ private: void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; std::size_t CalculateComputeStreamBufferSize() const; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 94a89e388..e5a583dd5 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -36,6 +36,15 @@ Flags MakeInvalidationFlags() { flags[BlendConstants] = true; flags[DepthBounds] = true; flags[StencilProperties] = true; + flags[CullMode] = true; + flags[DepthBoundsEnable] = true; + flags[DepthTestEnable] = true; + flags[DepthWriteEnable] = true; + flags[DepthCompareOp] = true; + flags[FrontFace] = true; + flags[PrimitiveTopology] = true; + flags[StencilOp] = true; + flags[StencilTestEnable] = true; return flags; } @@ -75,6 +84,57 @@ void SetupDirtyStencilProperties(Tables& tables) { table[OFF(stencil_back_func_mask)] = StencilProperties; } +void SetupDirtyCullMode(Tables& tables) { + auto& table = tables[0]; + table[OFF(cull_face)] = CullMode; + table[OFF(cull_test_enabled)] = CullMode; +} + +void SetupDirtyDepthBoundsEnable(Tables& tables) { + tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable; +} + +void SetupDirtyDepthTestEnable(Tables& tables) { + tables[0][OFF(depth_test_enable)] = DepthTestEnable; +} + +void SetupDirtyDepthWriteEnable(Tables& tables) { + tables[0][OFF(depth_write_enabled)] = DepthWriteEnable; +} + +void SetupDirtyDepthCompareOp(Tables& tables) { + tables[0][OFF(depth_test_func)] = DepthCompareOp; +} + +void SetupDirtyFrontFace(Tables& tables) { + auto& table = tables[0]; + table[OFF(front_face)] = FrontFace; + table[OFF(screen_y_control)] = FrontFace; +} + +void SetupDirtyPrimitiveTopology(Tables& tables) { + tables[0][OFF(draw.topology)] = PrimitiveTopology; +} + +void SetupDirtyStencilOp(Tables& tables) { + auto& table = tables[0]; + table[OFF(stencil_front_op_fail)] = StencilOp; + table[OFF(stencil_front_op_zfail)] = StencilOp; + table[OFF(stencil_front_op_zpass)] = StencilOp; + table[OFF(stencil_front_func_func)] = StencilOp; + table[OFF(stencil_back_op_fail)] = StencilOp; + table[OFF(stencil_back_op_zfail)] = StencilOp; + table[OFF(stencil_back_op_zpass)] = StencilOp; + table[OFF(stencil_back_func_func)] = StencilOp; + + // Table 0 is used by StencilProperties + tables[1][OFF(stencil_two_side_enable)] = StencilOp; +} + +void SetupDirtyStencilTestEnable(Tables& tables) { + tables[0][OFF(stencil_enable)] = StencilTestEnable; +} + } // Anonymous namespace StateTracker::StateTracker(Core::System& system) @@ -90,6 +150,14 @@ void StateTracker::Initialize() { SetupDirtyBlendConstants(tables); SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); + SetupDirtyCullMode(tables); + SetupDirtyDepthBoundsEnable(tables); + SetupDirtyDepthTestEnable(tables); + SetupDirtyDepthWriteEnable(tables); + SetupDirtyDepthCompareOp(tables); + SetupDirtyFrontFace(tables); + SetupDirtyPrimitiveTopology(tables); + SetupDirtyStencilOp(tables); } void StateTracker::InvalidateCommandBufferState() { diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 03bc415b2..54ca0d6c6 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -26,6 +26,16 @@ enum : u8 { DepthBounds, StencilProperties, + CullMode, + DepthBoundsEnable, + DepthTestEnable, + DepthWriteEnable, + DepthCompareOp, + FrontFace, + PrimitiveTopology, + StencilOp, + StencilTestEnable, + Last }; static_assert(Last <= std::numeric_limits<u8>::max()); @@ -64,6 +74,46 @@ public: return Exchange(Dirty::StencilProperties, false); } + bool TouchCullMode() { + return Exchange(Dirty::CullMode, false); + } + + bool TouchDepthBoundsTestEnable() { + return Exchange(Dirty::DepthBoundsEnable, false); + } + + bool TouchDepthTestEnable() { + return Exchange(Dirty::DepthTestEnable, false); + } + + bool TouchDepthBoundsEnable() { + return Exchange(Dirty::DepthBoundsEnable, false); + } + + bool TouchDepthWriteEnable() { + return Exchange(Dirty::DepthWriteEnable, false); + } + + bool TouchDepthCompareOp() { + return Exchange(Dirty::DepthCompareOp, false); + } + + bool TouchFrontFace() { + return Exchange(Dirty::FrontFace, false); + } + + bool TouchPrimitiveTopology() { + return Exchange(Dirty::PrimitiveTopology, false); + } + + bool TouchStencilOp() { + return Exchange(Dirty::StencilOp, false); + } + + bool TouchStencilTestEnable() { + return Exchange(Dirty::StencilTestEnable, false); + } + private: bool Exchange(std::size_t id, bool new_value) const noexcept { auto& flags = system.GPU().Maxwell3D().dirty.flags; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 0d485a662..051298cc8 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -88,6 +88,16 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetStencilWriteMask); X(vkCmdSetViewport); X(vkCmdWaitEvents); + X(vkCmdBindVertexBuffers2EXT); + X(vkCmdSetCullModeEXT); + X(vkCmdSetDepthBoundsTestEnableEXT); + X(vkCmdSetDepthCompareOpEXT); + X(vkCmdSetDepthTestEnableEXT); + X(vkCmdSetDepthWriteEnableEXT); + X(vkCmdSetFrontFaceEXT); + X(vkCmdSetPrimitiveTopologyEXT); + X(vkCmdSetStencilOpEXT); + X(vkCmdSetStencilTestEnableEXT); X(vkCreateBuffer); X(vkCreateBufferView); X(vkCreateCommandPool); diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index d56fdb3f9..71daac9d7 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -207,6 +207,16 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask; PFN_vkCmdSetViewport vkCmdSetViewport; PFN_vkCmdWaitEvents vkCmdWaitEvents; + PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT; + PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT; + PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT; + PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT; + PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT; + PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT; + PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT; + PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; + PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; + PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; PFN_vkCreateBuffer vkCreateBuffer; PFN_vkCreateBufferView vkCreateBufferView; PFN_vkCreateCommandPool vkCreateCommandPool; @@ -969,6 +979,50 @@ public: buffer_barriers.data(), image_barriers.size(), image_barriers.data()); } + void BindVertexBuffers2EXT(u32 first_binding, u32 binding_count, const VkBuffer* buffers, + const VkDeviceSize* offsets, const VkDeviceSize* sizes, + const VkDeviceSize* strides) const noexcept { + dld->vkCmdBindVertexBuffers2EXT(handle, first_binding, binding_count, buffers, offsets, + sizes, strides); + } + + void SetCullModeEXT(VkCullModeFlags cull_mode) const noexcept { + dld->vkCmdSetCullModeEXT(handle, cull_mode); + } + + void SetDepthBoundsTestEnableEXT(bool enable) const noexcept { + dld->vkCmdSetDepthBoundsTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + + void SetDepthCompareOpEXT(VkCompareOp compare_op) const noexcept { + dld->vkCmdSetDepthCompareOpEXT(handle, compare_op); + } + + void SetDepthTestEnableEXT(bool enable) const noexcept { + dld->vkCmdSetDepthTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + + void SetDepthWriteEnableEXT(bool enable) const noexcept { + dld->vkCmdSetDepthWriteEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + + void SetFrontFaceEXT(VkFrontFace front_face) const noexcept { + dld->vkCmdSetFrontFaceEXT(handle, front_face); + } + + void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { + dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); + } + + void SetStencilOpEXT(VkStencilFaceFlags face_mask, VkStencilOp fail_op, VkStencilOp pass_op, + VkStencilOp depth_fail_op, VkCompareOp compare_op) const noexcept { + dld->vkCmdSetStencilOpEXT(handle, face_mask, fail_op, pass_op, depth_fail_op, compare_op); + } + + void SetStencilTestEnableEXT(bool enable) const noexcept { + dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); + } + void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, const VkDeviceSize* offsets, const VkDeviceSize* sizes) const noexcept {