mirror of
				https://github.com/yuzu-emu/yuzu-android.git
				synced 2025-10-25 23:17:56 +00:00 
			
		
		
		
	VideoCore: Implement DispatchIndirect
This commit is contained in:
		
							parent
							
								
									710ca3ca49
								
							
						
					
					
						commit
						115792158d
					
				src/video_core
|  | @ -14,6 +14,7 @@ | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| 
 | 
 | ||||||
| constexpr u32 MacroRegistersStart = 0xE00; | constexpr u32 MacroRegistersStart = 0xE00; | ||||||
|  | constexpr u32 ComputeInline = 0x6D; | ||||||
| 
 | 
 | ||||||
| DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, | DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, | ||||||
|                      Control::ChannelState& channel_state_) |                      Control::ChannelState& channel_state_) | ||||||
|  | @ -83,20 +84,35 @@ bool DmaPusher::Step() { | ||||||
|                     dma_state.dma_get, command_list_header.size * sizeof(u32)); |                     dma_state.dma_get, command_list_header.size * sizeof(u32)); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         if (Settings::IsGPULevelHigh() && dma_state.method < MacroRegistersStart) { |         const auto safe_process = [&] { | ||||||
|             Core::Memory::GpuGuestMemory<Tegra::CommandHeader, |             Core::Memory::GpuGuestMemory<Tegra::CommandHeader, | ||||||
|                                          Core::Memory::GuestMemoryFlags::SafeRead> |                                          Core::Memory::GuestMemoryFlags::SafeRead> | ||||||
|                 headers(memory_manager, dma_state.dma_get, command_list_header.size, |                 headers(memory_manager, dma_state.dma_get, command_list_header.size, | ||||||
|                         &command_headers); |                         &command_headers); | ||||||
|             ProcessCommands(headers); |             ProcessCommands(headers); | ||||||
|             return true; |         }; | ||||||
|         } |         const auto unsafe_process = [&] { | ||||||
|             Core::Memory::GpuGuestMemory<Tegra::CommandHeader, |             Core::Memory::GpuGuestMemory<Tegra::CommandHeader, | ||||||
|                                          Core::Memory::GuestMemoryFlags::UnsafeRead> |                                          Core::Memory::GuestMemoryFlags::UnsafeRead> | ||||||
|             headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers); |                 headers(memory_manager, dma_state.dma_get, command_list_header.size, | ||||||
|  |                         &command_headers); | ||||||
|             ProcessCommands(headers); |             ProcessCommands(headers); | ||||||
|  |         }; | ||||||
|  |         if (Settings::IsGPULevelHigh()) { | ||||||
|  |             if (dma_state.method >= MacroRegistersStart) { | ||||||
|  |                 unsafe_process(); | ||||||
|  |                 return true; | ||||||
|  |             } | ||||||
|  |             if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute && | ||||||
|  |                 dma_state.method == ComputeInline) { | ||||||
|  |                 unsafe_process(); | ||||||
|  |                 return true; | ||||||
|  |             } | ||||||
|  |             safe_process(); | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |         unsafe_process(); | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -130,8 +130,10 @@ public: | ||||||
| 
 | 
 | ||||||
|     void DispatchCalls(); |     void DispatchCalls(); | ||||||
| 
 | 
 | ||||||
|     void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id) { |     void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id, | ||||||
|  |                         Engines::EngineTypes engine_type) { | ||||||
|         subchannels[subchannel_id] = engine; |         subchannels[subchannel_id] = engine; | ||||||
|  |         subchannel_type[subchannel_id] = engine_type; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||||||
|  | @ -170,6 +172,7 @@ private: | ||||||
|     const bool ib_enable{true}; ///< IB mode enabled
 |     const bool ib_enable{true}; ///< IB mode enabled
 | ||||||
| 
 | 
 | ||||||
|     std::array<Engines::EngineInterface*, max_subchannels> subchannels{}; |     std::array<Engines::EngineInterface*, max_subchannels> subchannels{}; | ||||||
|  |     std::array<Engines::EngineTypes, max_subchannels> subchannel_type; | ||||||
| 
 | 
 | ||||||
|     GPU& gpu; |     GPU& gpu; | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|  |  | ||||||
|  | @ -11,6 +11,14 @@ | ||||||
| 
 | 
 | ||||||
| namespace Tegra::Engines { | namespace Tegra::Engines { | ||||||
| 
 | 
 | ||||||
|  | enum class EngineTypes : u32 { | ||||||
|  |     KeplerCompute, | ||||||
|  |     Maxwell3D, | ||||||
|  |     Fermi2D, | ||||||
|  |     MaxwellDMA, | ||||||
|  |     KeplerMemory, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| class EngineInterface { | class EngineInterface { | ||||||
| public: | public: | ||||||
|     virtual ~EngineInterface() = default; |     virtual ~EngineInterface() = default; | ||||||
|  |  | ||||||
|  | @ -69,6 +69,14 @@ public: | ||||||
|     /// Binds a rasterizer to this engine.
 |     /// Binds a rasterizer to this engine.
 | ||||||
|     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||||||
| 
 | 
 | ||||||
|  |     GPUVAddr ExecTargetAddress() const { | ||||||
|  |         return regs.dest.Address(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u32 GetUploadSize() const { | ||||||
|  |         return copy_size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     void ProcessData(std::span<const u8> read_buffer); |     void ProcessData(std::span<const u8> read_buffer); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -43,16 +43,33 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal | ||||||
| 
 | 
 | ||||||
|     switch (method) { |     switch (method) { | ||||||
|     case KEPLER_COMPUTE_REG_INDEX(exec_upload): { |     case KEPLER_COMPUTE_REG_INDEX(exec_upload): { | ||||||
|  |         UploadInfo info{.upload_address = upload_address, | ||||||
|  |                         .exec_address = upload_state.ExecTargetAddress(), | ||||||
|  |                         .copy_size = upload_state.GetUploadSize()}; | ||||||
|  |         uploads.push_back(info); | ||||||
|         upload_state.ProcessExec(regs.exec_upload.linear != 0); |         upload_state.ProcessExec(regs.exec_upload.linear != 0); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case KEPLER_COMPUTE_REG_INDEX(data_upload): { |     case KEPLER_COMPUTE_REG_INDEX(data_upload): { | ||||||
|  |         upload_address = current_dma_segment; | ||||||
|         upload_state.ProcessData(method_argument, is_last_call); |         upload_state.ProcessData(method_argument, is_last_call); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case KEPLER_COMPUTE_REG_INDEX(launch): |     case KEPLER_COMPUTE_REG_INDEX(launch): { | ||||||
|  |         const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | ||||||
|  | 
 | ||||||
|  |         for (auto& data : uploads) { | ||||||
|  |             const GPUVAddr offset = data.exec_address - launch_desc_loc; | ||||||
|  |             if (offset / sizeof(u32) == LAUNCH_REG_INDEX(grid_dim_x) && | ||||||
|  |                 memory_manager.IsMemoryDirty(data.upload_address, data.copy_size)) { | ||||||
|  |                 indirect_compute = {data.upload_address}; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         uploads.clear(); | ||||||
|         ProcessLaunch(); |         ProcessLaunch(); | ||||||
|  |         indirect_compute = std::nullopt; | ||||||
|         break; |         break; | ||||||
|  |     } | ||||||
|     default: |     default: | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  | @ -62,6 +79,7 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun | ||||||
|                                     u32 methods_pending) { |                                     u32 methods_pending) { | ||||||
|     switch (method) { |     switch (method) { | ||||||
|     case KEPLER_COMPUTE_REG_INDEX(data_upload): |     case KEPLER_COMPUTE_REG_INDEX(data_upload): | ||||||
|  |         upload_address = current_dma_segment; | ||||||
|         upload_state.ProcessData(base_start, amount); |         upload_state.ProcessData(base_start, amount); | ||||||
|         return; |         return; | ||||||
|     default: |     default: | ||||||
|  |  | ||||||
|  | @ -5,6 +5,7 @@ | ||||||
| 
 | 
 | ||||||
| #include <array> | #include <array> | ||||||
| #include <cstddef> | #include <cstddef> | ||||||
|  | #include <optional> | ||||||
| #include <vector> | #include <vector> | ||||||
| #include "common/bit_field.h" | #include "common/bit_field.h" | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
|  | @ -36,6 +37,9 @@ namespace Tegra::Engines { | ||||||
| #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \ | #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \ | ||||||
|     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) | ||||||
| 
 | 
 | ||||||
|  | #define LAUNCH_REG_INDEX(field_name)                                                               \ | ||||||
|  |     (offsetof(Tegra::Engines::KeplerCompute::LaunchParams, field_name) / sizeof(u32)) | ||||||
|  | 
 | ||||||
| class KeplerCompute final : public EngineInterface { | class KeplerCompute final : public EngineInterface { | ||||||
| public: | public: | ||||||
|     explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); |     explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); | ||||||
|  | @ -201,6 +205,10 @@ public: | ||||||
|     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|                          u32 methods_pending) override; |                          u32 methods_pending) override; | ||||||
| 
 | 
 | ||||||
|  |     std::optional<GPUVAddr> GetIndirectComputeAddress() const { | ||||||
|  |         return indirect_compute; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     void ProcessLaunch(); |     void ProcessLaunch(); | ||||||
| 
 | 
 | ||||||
|  | @ -216,6 +224,15 @@ private: | ||||||
|     MemoryManager& memory_manager; |     MemoryManager& memory_manager; | ||||||
|     VideoCore::RasterizerInterface* rasterizer = nullptr; |     VideoCore::RasterizerInterface* rasterizer = nullptr; | ||||||
|     Upload::State upload_state; |     Upload::State upload_state; | ||||||
|  |     GPUVAddr upload_address; | ||||||
|  | 
 | ||||||
|  |     struct UploadInfo { | ||||||
|  |         GPUVAddr upload_address; | ||||||
|  |         GPUVAddr exec_address; | ||||||
|  |         u32 copy_size; | ||||||
|  |     }; | ||||||
|  |     std::vector<UploadInfo> uploads; | ||||||
|  |     std::optional<GPUVAddr> indirect_compute{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #define ASSERT_REG_POSITION(field_name, position)                                                  \ | #define ASSERT_REG_POSITION(field_name, position)                                                  \ | ||||||
|  |  | ||||||
|  | @ -34,19 +34,24 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) { | ||||||
|     bound_engines[method_call.subchannel] = engine_id; |     bound_engines[method_call.subchannel] = engine_id; | ||||||
|     switch (engine_id) { |     switch (engine_id) { | ||||||
|     case EngineID::FERMI_TWOD_A: |     case EngineID::FERMI_TWOD_A: | ||||||
|         dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel); |         dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel, | ||||||
|  |                                   EngineTypes::Fermi2D); | ||||||
|         break; |         break; | ||||||
|     case EngineID::MAXWELL_B: |     case EngineID::MAXWELL_B: | ||||||
|         dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel); |         dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel, | ||||||
|  |                                   EngineTypes::Maxwell3D); | ||||||
|         break; |         break; | ||||||
|     case EngineID::KEPLER_COMPUTE_B: |     case EngineID::KEPLER_COMPUTE_B: | ||||||
|         dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel); |         dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel, | ||||||
|  |                                   EngineTypes::KeplerCompute); | ||||||
|         break; |         break; | ||||||
|     case EngineID::MAXWELL_DMA_COPY_A: |     case EngineID::MAXWELL_DMA_COPY_A: | ||||||
|         dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel); |         dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel, | ||||||
|  |                                   EngineTypes::MaxwellDMA); | ||||||
|         break; |         break; | ||||||
|     case EngineID::KEPLER_INLINE_TO_MEMORY_B: |     case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||||||
|         dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel); |         dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel, | ||||||
|  |                                   EngineTypes::KeplerMemory); | ||||||
|         break; |         break; | ||||||
|     default: |     default: | ||||||
|         UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); |         UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); | ||||||
|  |  | ||||||
|  | @ -380,6 +380,17 @@ void RasterizerOpenGL::DispatchCompute() { | ||||||
|     pipeline->SetEngine(kepler_compute, gpu_memory); |     pipeline->SetEngine(kepler_compute, gpu_memory); | ||||||
|     pipeline->Configure(); |     pipeline->Configure(); | ||||||
|     const auto& qmd{kepler_compute->launch_description}; |     const auto& qmd{kepler_compute->launch_description}; | ||||||
|  |     auto indirect_address = kepler_compute->GetIndirectComputeAddress(); | ||||||
|  |     if (indirect_address) { | ||||||
|  |         // DispatchIndirect
 | ||||||
|  |         static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; | ||||||
|  |         const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite; | ||||||
|  |         const auto [buffer, offset] = | ||||||
|  |             buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); | ||||||
|  |         glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer->Handle()); | ||||||
|  |         glDispatchComputeIndirect(static_cast<GLintptr>(offset)); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|     glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); |     glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); | ||||||
|     ++num_queued_commands; |     ++num_queued_commands; | ||||||
|     has_written_global_memory |= pipeline->WritesGlobalMemory(); |     has_written_global_memory |= pipeline->WritesGlobalMemory(); | ||||||
|  |  | ||||||
|  | @ -463,6 +463,20 @@ void RasterizerVulkan::DispatchCompute() { | ||||||
|     pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache); |     pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache); | ||||||
| 
 | 
 | ||||||
|     const auto& qmd{kepler_compute->launch_description}; |     const auto& qmd{kepler_compute->launch_description}; | ||||||
|  |     auto indirect_address = kepler_compute->GetIndirectComputeAddress(); | ||||||
|  |     if (indirect_address) { | ||||||
|  |         // DispatchIndirect
 | ||||||
|  |         static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; | ||||||
|  |         const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite; | ||||||
|  |         const auto [buffer, offset] = | ||||||
|  |             buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); | ||||||
|  |         scheduler.RequestOutsideRenderPassOperationContext(); | ||||||
|  |         scheduler.Record([indirect_buffer = buffer->Handle(), | ||||||
|  |                           indirect_offset = offset](vk::CommandBuffer cmdbuf) { | ||||||
|  |             cmdbuf.DispatchIndirect(indirect_buffer, indirect_offset); | ||||||
|  |         }); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|     const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; |     const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; | ||||||
|     scheduler.RequestOutsideRenderPassOperationContext(); |     scheduler.RequestOutsideRenderPassOperationContext(); | ||||||
|     scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); |     scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); | ||||||
|  |  | ||||||
|  | @ -92,6 +92,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | ||||||
|     X(vkCmdCopyImage); |     X(vkCmdCopyImage); | ||||||
|     X(vkCmdCopyImageToBuffer); |     X(vkCmdCopyImageToBuffer); | ||||||
|     X(vkCmdDispatch); |     X(vkCmdDispatch); | ||||||
|  |     X(vkCmdDispatchIndirect); | ||||||
|     X(vkCmdDraw); |     X(vkCmdDraw); | ||||||
|     X(vkCmdDrawIndexed); |     X(vkCmdDrawIndexed); | ||||||
|     X(vkCmdDrawIndirect); |     X(vkCmdDrawIndirect); | ||||||
|  |  | ||||||
|  | @ -203,6 +203,7 @@ struct DeviceDispatch : InstanceDispatch { | ||||||
|     PFN_vkCmdCopyImage vkCmdCopyImage{}; |     PFN_vkCmdCopyImage vkCmdCopyImage{}; | ||||||
|     PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; |     PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; | ||||||
|     PFN_vkCmdDispatch vkCmdDispatch{}; |     PFN_vkCmdDispatch vkCmdDispatch{}; | ||||||
|  |     PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{}; | ||||||
|     PFN_vkCmdDraw vkCmdDraw{}; |     PFN_vkCmdDraw vkCmdDraw{}; | ||||||
|     PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; |     PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; | ||||||
|     PFN_vkCmdDrawIndirect vkCmdDrawIndirect{}; |     PFN_vkCmdDrawIndirect vkCmdDrawIndirect{}; | ||||||
|  | @ -1209,6 +1210,10 @@ public: | ||||||
|         dld->vkCmdDispatch(handle, x, y, z); |         dld->vkCmdDispatch(handle, x, y, z); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     void DispatchIndirect(VkBuffer indirect_buffer, VkDeviceSize offset) const noexcept { | ||||||
|  |         dld->vkCmdDispatchIndirect(handle, indirect_buffer, offset); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, |     void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, | ||||||
|                          VkDependencyFlags dependency_flags, Span<VkMemoryBarrier> memory_barriers, |                          VkDependencyFlags dependency_flags, Span<VkMemoryBarrier> memory_barriers, | ||||||
|                          Span<VkBufferMemoryBarrier> buffer_barriers, |                          Span<VkBufferMemoryBarrier> buffer_barriers, | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue