mirror of
				https://github.com/yuzu-emu/yuzu-mainline.git
				synced 2025-11-04 09:35:06 +00:00 
			
		
		
		
	Merge pull request #1290 from FernandoS27/shader-header
Implemented (Partialy) Shader Header
This commit is contained in:
		
						commit
						fafc80d72e
					
				| 
						 | 
					@ -14,6 +14,7 @@ add_library(video_core STATIC
 | 
				
			||||||
    engines/maxwell_dma.cpp
 | 
					    engines/maxwell_dma.cpp
 | 
				
			||||||
    engines/maxwell_dma.h
 | 
					    engines/maxwell_dma.h
 | 
				
			||||||
    engines/shader_bytecode.h
 | 
					    engines/shader_bytecode.h
 | 
				
			||||||
 | 
					    engines/shader_header.h
 | 
				
			||||||
    gpu.cpp
 | 
					    gpu.cpp
 | 
				
			||||||
    gpu.h
 | 
					    gpu.h
 | 
				
			||||||
    macro_interpreter.cpp
 | 
					    macro_interpreter.cpp
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										103
									
								
								src/video_core/engines/shader_header.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								src/video_core/engines/shader_header.h
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,103 @@
 | 
				
			||||||
 | 
					// Copyright 2018 yuzu Emulator Project
 | 
				
			||||||
 | 
					// Licensed under GPLv2 or any later version
 | 
				
			||||||
 | 
					// Refer to the license.txt file included.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "common/bit_field.h"
 | 
				
			||||||
 | 
					#include "common/common_funcs.h"
 | 
				
			||||||
 | 
					#include "common/common_types.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Tegra::Shader {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					enum class OutputTopology : u32 {
 | 
				
			||||||
 | 
					    PointList = 1,
 | 
				
			||||||
 | 
					    LineStrip = 6,
 | 
				
			||||||
 | 
					    TriangleStrip = 7,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Documentation in:
 | 
				
			||||||
 | 
					// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
 | 
				
			||||||
 | 
					struct Header {
 | 
				
			||||||
 | 
					    union {
 | 
				
			||||||
 | 
					        BitField<0, 5, u32> sph_type;
 | 
				
			||||||
 | 
					        BitField<5, 5, u32> version;
 | 
				
			||||||
 | 
					        BitField<10, 4, u32> shader_type;
 | 
				
			||||||
 | 
					        BitField<14, 1, u32> mrt_enable;
 | 
				
			||||||
 | 
					        BitField<15, 1, u32> kills_pixels;
 | 
				
			||||||
 | 
					        BitField<16, 1, u32> does_global_store;
 | 
				
			||||||
 | 
					        BitField<17, 4, u32> sass_version;
 | 
				
			||||||
 | 
					        BitField<21, 5, u32> reserved;
 | 
				
			||||||
 | 
					        BitField<26, 1, u32> does_load_or_store;
 | 
				
			||||||
 | 
					        BitField<27, 1, u32> does_fp64;
 | 
				
			||||||
 | 
					        BitField<28, 4, u32> stream_out_mask;
 | 
				
			||||||
 | 
					    } common0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    union {
 | 
				
			||||||
 | 
					        BitField<0, 24, u32> shader_local_memory_low_size;
 | 
				
			||||||
 | 
					        BitField<24, 8, u32> per_patch_attribute_count;
 | 
				
			||||||
 | 
					    } common1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    union {
 | 
				
			||||||
 | 
					        BitField<0, 24, u32> shader_local_memory_high_size;
 | 
				
			||||||
 | 
					        BitField<24, 8, u32> threads_per_input_primitive;
 | 
				
			||||||
 | 
					    } common2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    union {
 | 
				
			||||||
 | 
					        BitField<0, 24, u32> shader_local_memory_crs_size;
 | 
				
			||||||
 | 
					        BitField<24, 4, OutputTopology> output_topology;
 | 
				
			||||||
 | 
					        BitField<28, 4, u32> reserved;
 | 
				
			||||||
 | 
					    } common3;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    union {
 | 
				
			||||||
 | 
					        BitField<0, 12, u32> max_output_vertices;
 | 
				
			||||||
 | 
					        BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
 | 
				
			||||||
 | 
					        BitField<24, 4, u32> reserved;
 | 
				
			||||||
 | 
					        BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
 | 
				
			||||||
 | 
					    } common4;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    union {
 | 
				
			||||||
 | 
					        struct {
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(2);  // ImapColor
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(5);  // ImapFixedFncTexture[10]
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(1);  // ImapReserved
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(3);  // OmapSystemValuesA
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(1);  // OmapSystemValuesB
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(2);  // OmapColor
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(2);  // OmapSystemValuesC
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(5);  // OmapFixedFncTexture[10]
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(1);  // OmapReserved
 | 
				
			||||||
 | 
					        } vtg;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        struct {
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(2);  // ImapColor
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
 | 
				
			||||||
 | 
					            INSERT_PADDING_BYTES(2);  // ImapReserved
 | 
				
			||||||
 | 
					            struct {
 | 
				
			||||||
 | 
					                u32 target;
 | 
				
			||||||
 | 
					                union {
 | 
				
			||||||
 | 
					                    BitField<0, 1, u32> sample_mask;
 | 
				
			||||||
 | 
					                    BitField<1, 1, u32> depth;
 | 
				
			||||||
 | 
					                    BitField<2, 30, u32> reserved;
 | 
				
			||||||
 | 
					                };
 | 
				
			||||||
 | 
					            } omap;
 | 
				
			||||||
 | 
					            bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
 | 
				
			||||||
 | 
					                const u32 bit = render_target * 4 + component;
 | 
				
			||||||
 | 
					                return omap.target & (1 << bit);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        } ps;
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					} // namespace Tegra::Shader
 | 
				
			||||||
| 
						 | 
					@ -12,6 +12,7 @@
 | 
				
			||||||
#include "common/assert.h"
 | 
					#include "common/assert.h"
 | 
				
			||||||
#include "common/common_types.h"
 | 
					#include "common/common_types.h"
 | 
				
			||||||
#include "video_core/engines/shader_bytecode.h"
 | 
					#include "video_core/engines/shader_bytecode.h"
 | 
				
			||||||
 | 
					#include "video_core/engines/shader_header.h"
 | 
				
			||||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
 | 
					#include "video_core/renderer_opengl/gl_rasterizer.h"
 | 
				
			||||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 | 
					#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;
 | 
				
			||||||
using Tegra::Shader::SubOp;
 | 
					using Tegra::Shader::SubOp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
 | 
					constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
 | 
				
			||||||
constexpr u32 PROGRAM_HEADER_SIZE = 0x50;
 | 
					constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class DecompileFail : public std::runtime_error {
 | 
					class DecompileFail : public std::runtime_error {
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
| 
						 | 
					@ -674,7 +675,7 @@ public:
 | 
				
			||||||
                  u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
 | 
					                  u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
 | 
				
			||||||
        : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
 | 
					        : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
 | 
				
			||||||
          stage(stage), suffix(suffix) {
 | 
					          stage(stage), suffix(suffix) {
 | 
				
			||||||
 | 
					        std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
 | 
				
			||||||
        Generate(suffix);
 | 
					        Generate(suffix);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -688,23 +689,6 @@ public:
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    // Shader program header for a Fragment Shader.
 | 
					 | 
				
			||||||
    struct FragmentHeader {
 | 
					 | 
				
			||||||
        INSERT_PADDING_WORDS(5);
 | 
					 | 
				
			||||||
        INSERT_PADDING_WORDS(13);
 | 
					 | 
				
			||||||
        u32 enabled_color_outputs;
 | 
					 | 
				
			||||||
        union {
 | 
					 | 
				
			||||||
            BitField<0, 1, u32> writes_samplemask;
 | 
					 | 
				
			||||||
            BitField<1, 1, u32> writes_depth;
 | 
					 | 
				
			||||||
        };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
 | 
					 | 
				
			||||||
            const u32 bit = render_target * 4 + component;
 | 
					 | 
				
			||||||
            return enabled_color_outputs & (1 << bit);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    };
 | 
					 | 
				
			||||||
    static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /// Gets the Subroutine object corresponding to the specified address.
 | 
					    /// Gets the Subroutine object corresponding to the specified address.
 | 
				
			||||||
    const Subroutine& GetSubroutine(u32 begin, u32 end) const {
 | 
					    const Subroutine& GetSubroutine(u32 begin, u32 end) const {
 | 
				
			||||||
        const auto iter = subroutines.find(Subroutine{begin, end, suffix});
 | 
					        const auto iter = subroutines.find(Subroutine{begin, end, suffix});
 | 
				
			||||||
| 
						 | 
					@ -1010,10 +994,8 @@ private:
 | 
				
			||||||
    /// Writes the output values from a fragment shader to the corresponding GLSL output variables.
 | 
					    /// Writes the output values from a fragment shader to the corresponding GLSL output variables.
 | 
				
			||||||
    void EmitFragmentOutputsWrite() {
 | 
					    void EmitFragmentOutputsWrite() {
 | 
				
			||||||
        ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
 | 
					        ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
 | 
				
			||||||
        FragmentHeader header;
 | 
					 | 
				
			||||||
        std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented");
 | 
					        ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Write the color outputs using the data in the shader registers, disabled
 | 
					        // Write the color outputs using the data in the shader registers, disabled
 | 
				
			||||||
        // rendertargets/components are skipped in the register assignment.
 | 
					        // rendertargets/components are skipped in the register assignment.
 | 
				
			||||||
| 
						 | 
					@ -1022,7 +1004,7 @@ private:
 | 
				
			||||||
             ++render_target) {
 | 
					             ++render_target) {
 | 
				
			||||||
            // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
 | 
					            // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
 | 
				
			||||||
            for (u32 component = 0; component < 4; ++component) {
 | 
					            for (u32 component = 0; component < 4; ++component) {
 | 
				
			||||||
                if (header.IsColorComponentOutputEnabled(render_target, component)) {
 | 
					                if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
 | 
				
			||||||
                    shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
 | 
					                    shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
 | 
				
			||||||
                                               regs.GetRegisterAsFloat(current_reg)));
 | 
					                                               regs.GetRegisterAsFloat(current_reg)));
 | 
				
			||||||
                    ++current_reg;
 | 
					                    ++current_reg;
 | 
				
			||||||
| 
						 | 
					@ -1030,7 +1012,7 @@ private:
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (header.writes_depth) {
 | 
					        if (header.ps.omap.depth) {
 | 
				
			||||||
            // The depth output is always 2 registers after the last color output, and current_reg
 | 
					            // The depth output is always 2 registers after the last color output, and current_reg
 | 
				
			||||||
            // already contains one past the last color register.
 | 
					            // already contains one past the last color register.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2666,6 +2648,7 @@ private:
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    const std::set<Subroutine>& subroutines;
 | 
					    const std::set<Subroutine>& subroutines;
 | 
				
			||||||
    const ProgramCode& program_code;
 | 
					    const ProgramCode& program_code;
 | 
				
			||||||
 | 
					    Tegra::Shader::Header header;
 | 
				
			||||||
    const u32 main_offset;
 | 
					    const u32 main_offset;
 | 
				
			||||||
    Maxwell3D::Regs::ShaderStage stage;
 | 
					    Maxwell3D::Regs::ShaderStage stage;
 | 
				
			||||||
    const std::string& suffix;
 | 
					    const std::string& suffix;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue