mirror of
synced 2025-03-08 10:09:43 +00:00
Merge pull request #3645 from wwylele/shader-manager
renderer_opengl: refactor shader & program objects and add shader manager for rasterizer
This commit is contained in:
@ -32,6 +32,8 @@ add_library(video_core STATIC
@ -177,6 +177,9 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle);
shader_program_manager =
@ -490,6 +493,11 @@ void RasterizerOpenGL::DrawTriangles() {
state.scissor.height = draw_rect.GetHeight();
// Draw the vertex batch
size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex)));
for (size_t base_vertex = 0; base_vertex < vertex_batch.size(); base_vertex += max_vertices) {
@ -1258,95 +1266,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(
void RasterizerOpenGL::SetShader() {
auto config = GLShader::PicaShaderConfig::BuildFromRegs(Pica::g_state.regs);
std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>();
// Find (or generate) the GLSL shader for the current TEV state
auto cached_shader = shader_cache.find(config);
if (cached_shader != shader_cache.end()) {
current_shader = cached_shader->second.get();
state.draw.shader_program = current_shader->shader.handle;
} else {
LOG_DEBUG(Render_OpenGL, "Creating new shader");
state.draw.shader_program = shader->shader.handle;
// Set the texture samplers to correspond to different texture units
GLint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]");
if (uniform_tex != -1) {
glUniform1i(uniform_tex, TextureUnits::PicaTexture(0).id);
uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]");
if (uniform_tex != -1) {
glUniform1i(uniform_tex, TextureUnits::PicaTexture(1).id);
uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]");
if (uniform_tex != -1) {
glUniform1i(uniform_tex, TextureUnits::PicaTexture(2).id);
uniform_tex = glGetUniformLocation(shader->shader.handle, "tex_cube");
if (uniform_tex != -1) {
glUniform1i(uniform_tex, TextureUnits::TextureCube.id);
// Set the texture samplers to correspond to different lookup table texture units
GLint uniform_lut = glGetUniformLocation(shader->shader.handle, "lighting_lut");
if (uniform_lut != -1) {
glUniform1i(uniform_lut, TextureUnits::LightingLUT.id);
GLint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut");
if (uniform_fog_lut != -1) {
glUniform1i(uniform_fog_lut, TextureUnits::FogLUT.id);
GLint uniform_proctex_noise_lut =
glGetUniformLocation(shader->shader.handle, "proctex_noise_lut");
if (uniform_proctex_noise_lut != -1) {
glUniform1i(uniform_proctex_noise_lut, TextureUnits::ProcTexNoiseLUT.id);
GLint uniform_proctex_color_map =
glGetUniformLocation(shader->shader.handle, "proctex_color_map");
if (uniform_proctex_color_map != -1) {
glUniform1i(uniform_proctex_color_map, TextureUnits::ProcTexColorMap.id);
GLint uniform_proctex_alpha_map =
glGetUniformLocation(shader->shader.handle, "proctex_alpha_map");
if (uniform_proctex_alpha_map != -1) {
glUniform1i(uniform_proctex_alpha_map, TextureUnits::ProcTexAlphaMap.id);
GLint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut");
if (uniform_proctex_lut != -1) {
glUniform1i(uniform_proctex_lut, TextureUnits::ProcTexLUT.id);
GLint uniform_proctex_diff_lut =
glGetUniformLocation(shader->shader.handle, "proctex_diff_lut");
if (uniform_proctex_diff_lut != -1) {
glUniform1i(uniform_proctex_diff_lut, TextureUnits::ProcTexDiffLUT.id);
current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
GLuint block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
if (block_index != GL_INVALID_INDEX) {
GLint block_size;
glGetActiveUniformBlockiv(current_shader->shader.handle, block_index,
ASSERT_MSG(block_size == sizeof(UniformData),
"Uniform block size did not match! Got {}, expected {}",
static_cast<int>(block_size), sizeof(UniformData));
glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
void RasterizerOpenGL::SyncClipEnabled() {
@ -8,12 +8,10 @@
#include <cstddef>
#include <cstring>
#include <memory>
#include <unordered_map>
#include <vector>
#include <glad/glad.h>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/vector_math.h"
#include "core/hw/gpu.h"
#include "video_core/pica_state.h"
@ -25,13 +23,14 @@
#include "video_core/regs_texturing.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
#include "video_core/shader/shader.h"
struct ScreenInfo;
class ShaderProgramManager;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
@ -52,12 +51,6 @@ public:
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info) override;
/// OpenGL shader generated for a given Pica register state
struct PicaShader {
/// OpenGL shader resource
OGLShader shader;
struct SamplerInfo {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
@ -121,47 +114,6 @@ private:
GLfloat view[3];
struct LightSrc {
alignas(16) GLvec3 specular_0;
alignas(16) GLvec3 specular_1;
alignas(16) GLvec3 diffuse;
alignas(16) GLvec3 ambient;
alignas(16) GLvec3 position;
alignas(16) GLvec3 spot_direction; // negated
GLfloat dist_atten_bias;
GLfloat dist_atten_scale;
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
struct UniformData {
GLint framebuffer_scale;
GLint alphatest_ref;
GLfloat depth_scale;
GLfloat depth_offset;
GLint scissor_x1;
GLint scissor_y1;
GLint scissor_x2;
GLint scissor_y2;
alignas(16) GLvec3 fog_color;
alignas(8) GLvec2 proctex_noise_f;
alignas(8) GLvec2 proctex_noise_a;
alignas(8) GLvec2 proctex_noise_p;
alignas(16) GLvec3 lighting_global_ambient;
LightSrc light_src[8];
alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) GLvec4 tev_combiner_buffer_color;
alignas(16) GLvec4 clip_coef;
sizeof(UniformData) == 0x460,
"The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/// Syncs entire status to match PICA registers
void SyncEntireState();
@ -269,8 +221,6 @@ private:
std::vector<HardwareVertex> vertex_batch;
std::unordered_map<GLShader::PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache;
const PicaShader* current_shader = nullptr;
bool shader_dirty;
struct {
@ -285,6 +235,8 @@ private:
bool dirty;
} uniform_block_data = {};
std::unique_ptr<ShaderProgramManager> shader_program_manager;
std::array<SamplerInfo, 3> texture_samplers;
OGLVertexArray vertex_array;
static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
@ -377,7 +377,7 @@ private:
OGLVertexArray attributeless_vao;
OGLBuffer d24s8_abgr_buffer;
GLsizeiptr d24s8_abgr_buffer_size;
OGLShader d24s8_abgr_shader;
OGLProgram d24s8_abgr_shader;
GLint d24s8_abgr_tbo_size_u_id;
GLint d24s8_abgr_viewport_u_id;
@ -5,6 +5,7 @@
#pragma once
#include <utility>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@ -96,11 +97,53 @@ public:
return *this;
/// Creates a new internal OpenGL resource and stores the handle
void Create(const char* vert_shader, const char* frag_shader) {
void Create(const char* source, GLenum type) {
if (handle != 0)
handle = GLShader::LoadProgram(vert_shader, frag_shader);
if (source == nullptr)
handle = GLShader::LoadShader(source, type);
void Release() {
if (handle == 0)
handle = 0;
GLuint handle = 0;
class OGLProgram : private NonCopyable {
OGLProgram() = default;
OGLProgram(OGLProgram&& o) : handle(std::exchange(o.handle, 0)) {}
~OGLProgram() {
OGLProgram& operator=(OGLProgram&& o) {
handle = std::exchange(o.handle, 0);
return *this;
/// Creates a new program from given shader objects
void Create(bool separable_program, const std::vector<GLuint>& shaders) {
if (handle != 0)
handle = GLShader::LoadProgram(separable_program, shaders);
/// Creates a new program from given shader soruce code
void Create(const char* vert_shader, const char* frag_shader) {
OGLShader vert, frag;
vert.Create(vert_shader, GL_VERTEX_SHADER);
frag.Create(frag_shader, GL_FRAGMENT_SHADER);
Create(false, {vert.handle, frag.handle});
/// Deletes the internal OpenGL resource
@ -115,6 +158,38 @@ public:
GLuint handle = 0;
class OGLPipeline : private NonCopyable {
OGLPipeline() = default;
OGLPipeline(OGLPipeline&& o) {
handle = std::exchange<GLuint>(o.handle, 0);
~OGLPipeline() {
OGLPipeline& operator=(OGLPipeline&& o) {
handle = std::exchange<GLuint>(o.handle, 0);
return *this;
void Create() {
if (handle != 0)
glGenProgramPipelines(1, &handle);
void Release() {
if (handle == 0)
glDeleteProgramPipelines(1, &handle);
handle = 0;
GLuint handle = 0;
class OGLBuffer : private NonCopyable {
OGLBuffer() = default;
@ -61,6 +61,37 @@ layout (std140) uniform shader_data {
static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_shader) {
std::string out;
auto append_variable = [&](const char* var, int location) {
if (separable_shader) {
out += "layout (location=" + std::to_string(location) + ") ";
out += std::string(is_output ? "out " : "in ") + var + ";\n";
append_variable("vec4 primary_color", ATTRIBUTE_COLOR);
append_variable("vec2 texcoord0", ATTRIBUTE_TEXCOORD0);
append_variable("vec2 texcoord1", ATTRIBUTE_TEXCOORD1);
append_variable("vec2 texcoord2", ATTRIBUTE_TEXCOORD2);
append_variable("float texcoord0_w", ATTRIBUTE_TEXCOORD0_W);
append_variable("vec4 normquat", ATTRIBUTE_NORMQUAT);
append_variable("vec3 view", ATTRIBUTE_VIEW);
if (is_output && separable_shader) {
// gl_PerVertex redeclaration is required for separate shader object
out += R"(
out gl_PerVertex {
vec4 gl_Position;
float gl_ClipDistance[2];
return out;
PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) {
PicaShaderConfig res;
@ -206,11 +237,11 @@ static std::string SampleTexture(const PicaShaderConfig& config, unsigned textur
// Only unit 0 respects the texturing type
switch (state.texture0_type) {
case TexturingRegs::TextureConfig::Texture2D:
return "texture(tex[0], texcoord[0])";
return "texture(tex0, texcoord0)";
case TexturingRegs::TextureConfig::Projection2D:
return "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))";
return "textureProj(tex0, vec3(texcoord0, texcoord0_w))";
case TexturingRegs::TextureConfig::TextureCube:
return "texture(tex_cube, vec3(texcoord[0], texcoord0_w))";
return "texture(tex_cube, vec3(texcoord0, texcoord0_w))";
case TexturingRegs::TextureConfig::Shadow2D:
case TexturingRegs::TextureConfig::ShadowCube:
NGLOG_CRITICAL(HW_GPU, "Unhandled shadow texture");
@ -220,15 +251,15 @@ static std::string SampleTexture(const PicaShaderConfig& config, unsigned textur
LOG_CRITICAL(HW_GPU, "Unhandled texture type %x",
return "texture(tex[0], texcoord[0])";
return "texture(tex0, texcoord0)";
case 1:
return "texture(tex[1], texcoord[1])";
return "texture(tex1, texcoord1)";
case 2:
if (state.texture2_use_coord1)
return "texture(tex[2], texcoord[1])";
return "texture(tex2, texcoord1)";
return "texture(tex[2], texcoord[2])";
return "texture(tex2, texcoord2)";
case 3:
if (state.proctex.enable) {
return "ProcTex()";
@ -1020,7 +1051,12 @@ float ProcTexNoiseCoef(vec2 x) {
out += "vec4 ProcTex() {\n";
out += "vec2 uv = abs(texcoord[" + std::to_string(config.state.proctex.coord) + "]);\n";
if (config.state.proctex.coord < 3) {
out += "vec2 uv = abs(texcoord" + std::to_string(config.state.proctex.coord) + ");\n";
} else {
NGLOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3");
out += "vec2 uv = abs(texcoord0);\n";
// Get shift offset before noise generation
out += "float u_shift = ";
@ -1085,23 +1121,24 @@ float ProcTexNoiseCoef(vec2 x) {
std::string GenerateFragmentShader(const PicaShaderConfig& config) {
std::string GenerateFragmentShader(const PicaShaderConfig& config, bool separable_shader) {
const auto& state = config.state;
std::string out = R"(
#version 330 core
std::string out = "#version 330 core\n";
if (separable_shader) {
out += "#extension GL_ARB_separate_shader_objects : enable\n";
in vec4 primary_color;
in vec2 texcoord[3];
in float texcoord0_w;
in vec4 normquat;
in vec3 view;
out += GetVertexInterfaceDeclaration(false, separable_shader);
out += R"(
in vec4 gl_FragCoord;
out vec4 color;
uniform sampler2D tex[3];
uniform sampler2D tex0;
uniform sampler2D tex1;
uniform sampler2D tex2;
uniform samplerCube tex_cube;
uniform samplerBuffer lighting_lut;
uniform samplerBuffer fog_lut;
@ -1246,8 +1283,11 @@ vec4 secondary_fragment_color = vec4(0.0);
return out;
std::string GenerateVertexShader() {
std::string GenerateTrivialVertexShader(bool separable_shader) {
std::string out = "#version 330 core\n";
if (separable_shader) {
out += "#extension GL_ARB_separate_shader_objects : enable\n";
out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) +
") in vec4 vert_position;\n";
@ -1264,14 +1304,7 @@ std::string GenerateVertexShader() {
") in vec4 vert_normquat;\n";
out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
out += R"(
out vec4 primary_color;
out vec2 texcoord[3];
out float texcoord0_w;
out vec4 normquat;
out vec3 view;
out += GetVertexInterfaceDeclaration(true, separable_shader);
out += UniformBlockDef;
@ -1279,9 +1312,9 @@ out vec3 view;
void main() {
primary_color = vert_color;
texcoord[0] = vert_texcoord0;
texcoord[1] = vert_texcoord1;
texcoord[2] = vert_texcoord2;
texcoord0 = vert_texcoord0;
texcoord1 = vert_texcoord1;
texcoord2 = vert_texcoord2;
texcoord0_w = vert_texcoord0_w;
normquat = vert_normquat;
view = vert_view;
@ -9,7 +9,9 @@
#include <functional>
#include <string>
#include <type_traits>
#include "common/hash.h"
#include "video_core/regs.h"
#include "video_core/shader/shader.h"
namespace GLShader {
@ -132,18 +134,21 @@ struct PicaShaderConfig : Common::HashableStruct<PicaShaderConfigState> {
* Generates the GLSL vertex shader program source code for the current Pica state
* Generates the GLSL vertex shader program source code that accepts vertices from software shader
* and directly passes them to the fragment shader.
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
std::string GenerateVertexShader();
std::string GenerateTrivialVertexShader(bool separable_shader);
* Generates the GLSL fragment shader program source code for the current Pica state
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
std::string GenerateFragmentShader(const PicaShaderConfig& config);
std::string GenerateFragmentShader(const PicaShaderConfig& config, bool separable_shader);
} // namespace GLShader
Normal file
Normal file
@ -0,0 +1,216 @@
// Copyright 2018 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <unordered_map>
#include <boost/functional/hash.hpp>
#include <boost/variant.hpp>
#include "video_core/renderer_opengl/gl_shader_manager.h"
static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding,
size_t expected_size) {
GLuint ub_index = glGetUniformBlockIndex(shader, name);
if (ub_index == GL_INVALID_INDEX) {
GLint ub_size = 0;
glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
ASSERT_MSG(ub_size == expected_size, "Uniform block size did not match! Got %d, expected %zu",
static_cast<int>(ub_size), expected_size);
glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
static void SetShaderUniformBlockBindings(GLuint shader) {
SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common,
static void SetShaderSamplerBinding(GLuint shader, const char* name,
TextureUnits::TextureUnit binding) {
GLint uniform_tex = glGetUniformLocation(shader, name);
if (uniform_tex != -1) {
glUniform1i(uniform_tex, binding.id);
static void SetShaderSamplerBindings(GLuint shader) {
OpenGLState cur_state = OpenGLState::GetCurState();
GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
// Set the texture samplers to correspond to different texture units
SetShaderSamplerBinding(shader, "tex0", TextureUnits::PicaTexture(0));
SetShaderSamplerBinding(shader, "tex1", TextureUnits::PicaTexture(1));
SetShaderSamplerBinding(shader, "tex2", TextureUnits::PicaTexture(2));
SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube);
// Set the texture samplers to correspond to different lookup table texture units
SetShaderSamplerBinding(shader, "lighting_lut", TextureUnits::LightingLUT);
SetShaderSamplerBinding(shader, "fog_lut", TextureUnits::FogLUT);
SetShaderSamplerBinding(shader, "proctex_noise_lut", TextureUnits::ProcTexNoiseLUT);
SetShaderSamplerBinding(shader, "proctex_color_map", TextureUnits::ProcTexColorMap);
SetShaderSamplerBinding(shader, "proctex_alpha_map", TextureUnits::ProcTexAlphaMap);
SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT);
SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT);
cur_state.draw.shader_program = old_program;
* An object representing a shader program staging. It can be either a shader object or a program
* object, depending on whether separable program is used.
class OGLShaderStage {
explicit OGLShaderStage(bool separable) {
if (separable) {
shader_or_program = OGLProgram();
} else {
shader_or_program = OGLShader();
void Create(const char* source, GLenum type) {
if (shader_or_program.which() == 0) {
boost::get<OGLShader>(shader_or_program).Create(source, type);
} else {
OGLShader shader;
shader.Create(source, type);
OGLProgram& program = boost::get<OGLProgram>(shader_or_program);
program.Create(true, {shader.handle});
GLuint GetHandle() const {
if (shader_or_program.which() == 0) {
return boost::get<OGLShader>(shader_or_program).handle;
} else {
return boost::get<OGLProgram>(shader_or_program).handle;
boost::variant<OGLShader, OGLProgram> shader_or_program;
class TrivialVertexShader {
explicit TrivialVertexShader(bool separable) : program(separable) {
program.Create(GLShader::GenerateTrivialVertexShader(separable).c_str(), GL_VERTEX_SHADER);
GLuint Get() const {
return program.GetHandle();
OGLShaderStage program;
template <typename KeyConfigType, std::string (*CodeGenerator)(const KeyConfigType&, bool),
GLenum ShaderType>
class ShaderCache {
explicit ShaderCache(bool separable) : separable(separable) {}
GLuint Get(const KeyConfigType& config) {
auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable});
OGLShaderStage& cached_shader = iter->second;
if (new_shader) {
cached_shader.Create(CodeGenerator(config, separable).c_str(), ShaderType);
return cached_shader.GetHandle();
bool separable;
std::unordered_map<KeyConfigType, OGLShaderStage> shaders;
using FragmentShaders =
ShaderCache<GLShader::PicaShaderConfig, &GLShader::GenerateFragmentShader, GL_FRAGMENT_SHADER>;
class ShaderProgramManager::Impl {
explicit Impl(bool separable)
: separable(separable), trivial_vertex_shader(separable), fragment_shaders(separable) {
if (separable)
struct ShaderTuple {
GLuint vs = 0;
GLuint gs = 0;
GLuint fs = 0;
bool operator==(const ShaderTuple& rhs) const {
return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs);
bool operator!=(const ShaderTuple& rhs) const {
return std::tie(vs, gs, fs) != std::tie(rhs.vs, rhs.gs, rhs.fs);
struct Hash {
std::size_t operator()(const ShaderTuple& tuple) const {
std::size_t hash = 0;
boost::hash_combine(hash, tuple.vs);
boost::hash_combine(hash, tuple.gs);
boost::hash_combine(hash, tuple.fs);
return hash;
ShaderTuple current;
TrivialVertexShader trivial_vertex_shader;
FragmentShaders fragment_shaders;
bool separable;
std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache;
OGLPipeline pipeline;
ShaderProgramManager::ShaderProgramManager(bool separable)
: impl(std::make_unique<Impl>(separable)) {}
ShaderProgramManager::~ShaderProgramManager() = default;
void ShaderProgramManager::UseTrivialVertexShader() {
impl->current.vs = impl->trivial_vertex_shader.Get();
void ShaderProgramManager::UseTrivialGeometryShader() {
impl->current.gs = 0;
void ShaderProgramManager::UseFragmentShader(const GLShader::PicaShaderConfig& config) {
impl->current.fs = impl->fragment_shaders.Get(config);
void ShaderProgramManager::ApplyTo(OpenGLState& state) {
if (impl->separable) {
// Without this reseting, AMD sometimes freezes when one stage is changed but not for the
// others
glUseProgramStages(impl->pipeline.handle, GL_VERTEX_SHADER_BIT, impl->current.vs);
glUseProgramStages(impl->pipeline.handle, GL_GEOMETRY_SHADER_BIT, impl->current.gs);
glUseProgramStages(impl->pipeline.handle, GL_FRAGMENT_SHADER_BIT, impl->current.fs);
state.draw.shader_program = 0;
state.draw.program_pipeline = impl->pipeline.handle;
} else {
OGLProgram& cached_program = impl->program_cache[impl->current];
if (cached_program.handle == 0) {
cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs});
state.draw.shader_program = cached_program.handle;
Normal file
Normal file
@ -0,0 +1,73 @@
// Copyright 2018 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
enum class UniformBindings : GLuint { Common };
struct LightSrc {
alignas(16) GLvec3 specular_0;
alignas(16) GLvec3 specular_1;
alignas(16) GLvec3 diffuse;
alignas(16) GLvec3 ambient;
alignas(16) GLvec3 position;
alignas(16) GLvec3 spot_direction; // negated
GLfloat dist_atten_bias;
GLfloat dist_atten_scale;
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
struct UniformData {
GLint framebuffer_scale;
GLint alphatest_ref;
GLfloat depth_scale;
GLfloat depth_offset;
GLint scissor_x1;
GLint scissor_y1;
GLint scissor_x2;
GLint scissor_y2;
alignas(16) GLvec3 fog_color;
alignas(8) GLvec2 proctex_noise_f;
alignas(8) GLvec2 proctex_noise_a;
alignas(8) GLvec2 proctex_noise_p;
alignas(16) GLvec3 lighting_global_ambient;
LightSrc light_src[8];
alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) GLvec4 tev_combiner_buffer_color;
alignas(16) GLvec4 clip_coef;
sizeof(UniformData) == 0x460,
"The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/// A class that manage different shader stages and configures them with given config data.
class ShaderProgramManager {
explicit ShaderProgramManager(bool separable);
void UseTrivialVertexShader();
void UseTrivialGeometryShader();
void UseFragmentShader(const GLShader::PicaShaderConfig& config);
void ApplyTo(OpenGLState& state);
class Impl;
std::unique_ptr<Impl> impl;
@ -10,66 +10,67 @@
namespace GLShader {
GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) {
GLuint LoadShader(const char* source, GLenum type) {
const char* debug_type;
switch (type) {
debug_type = "vertex";
debug_type = "geometry";
debug_type = "fragment";
// Create the shaders
GLuint vertex_shader_id = glCreateShader(GL_VERTEX_SHADER);
GLuint fragment_shader_id = glCreateShader(GL_FRAGMENT_SHADER);
GLuint shader_id = glCreateShader(type);
glShaderSource(shader_id, 1, &source, nullptr);
NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
GLint result = GL_FALSE;
int info_log_length;
// Compile Vertex Shader
LOG_DEBUG(Render_OpenGL, "Compiling vertex shader...");
glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr);
// Check Vertex Shader
glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result);
glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
GLint info_log_length;
glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
if (info_log_length > 1) {
std::vector<char> vertex_shader_error(info_log_length);
glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]);
std::vector<char> shader_error(info_log_length);
glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
if (result == GL_TRUE) {
LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]);
NGLOG_DEBUG(Render_OpenGL, "{}", &shader_error[0]);
} else {
LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s", &vertex_shader_error[0]);
// Compile Fragment Shader
LOG_DEBUG(Render_OpenGL, "Compiling fragment shader...");
glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr);
// Check Fragment Shader
glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result);
glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
if (info_log_length > 1) {
std::vector<char> fragment_shader_error(info_log_length);
glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, &fragment_shader_error[0]);
if (result == GL_TRUE) {
LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]);
} else {
LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s",
NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type,
NGLOG_ERROR(Render_OpenGL, "Shader source code:\n{}", source);
return shader_id;
GLuint LoadProgram(bool separable_program, const std::vector<GLuint>& shaders) {
// Link the program
LOG_DEBUG(Render_OpenGL, "Linking program...");
NGLOG_DEBUG(Render_OpenGL, "Linking program...");
GLuint program_id = glCreateProgram();
glAttachShader(program_id, vertex_shader_id);
glAttachShader(program_id, fragment_shader_id);
for (GLuint shader : shaders) {
if (shader != 0) {
glAttachShader(program_id, shader);
if (separable_program) {
glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
// Check the program
GLint result = GL_FALSE;
GLint info_log_length;
glGetProgramiv(program_id, GL_LINK_STATUS, &result);
glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
@ -77,21 +78,19 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) {
std::vector<char> program_error(info_log_length);
glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
if (result == GL_TRUE) {
LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]);
NGLOG_DEBUG(Render_OpenGL, "{}", &program_error[0]);
} else {
LOG_ERROR(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]);
NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", &program_error[0]);
// If the program linking failed at least one of the shaders was probably bad
if (result == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader);
LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader);
ASSERT_MSG(result == GL_TRUE, "Shader not linked");
for (GLuint shader : shaders) {
if (shader != 0) {
glDetachShader(program_id, shader);
return program_id;
@ -4,16 +4,24 @@
#pragma once
#include <vector>
#include <glad/glad.h>
namespace GLShader {
* Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
* @param vertex_shader String of the GLSL vertex shader program
* @param fragment_shader String of the GLSL fragment shader program
* @returns Handle of the newly created OpenGL shader object
* Utility function to create and compile an OpenGL GLSL shader
* @param source String of the GLSL shader program
GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader);
GLuint LoadShader(const char* source, GLenum type);
* Utility function to create and link an OpenGL GLSL shader program
* @param separable_program whether to create a separable program
* @param shaders ID of shaders to attach to the program
* @returns Handle of the newly created OpenGL program object
GLuint LoadProgram(bool separable_program, const std::vector<GLuint>& shaders);
} // namespace GLShader
@ -71,6 +71,7 @@ OpenGLState::OpenGLState() {
draw.vertex_buffer = 0;
draw.uniform_buffer = 0;
draw.shader_program = 0;
draw.program_pipeline = 0;
scissor.enabled = false;
scissor.x = 0;
@ -282,6 +283,11 @@ void OpenGLState::Apply() const {
// Program pipeline
if (draw.program_pipeline != cur_state.draw.program_pipeline) {
// Scissor test
if (scissor.enabled != cur_state.scissor.enabled) {
if (scissor.enabled) {
@ -360,6 +366,13 @@ OpenGLState& OpenGLState::ResetProgram(GLuint handle) {
return *this;
OpenGLState& OpenGLState::ResetPipeline(GLuint handle) {
if (draw.program_pipeline == handle) {
draw.program_pipeline = 0;
return *this;
OpenGLState& OpenGLState::ResetBuffer(GLuint handle) {
if (draw.vertex_buffer == handle) {
draw.vertex_buffer = 0;
@ -128,6 +128,7 @@ public:
GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
GLuint shader_program; // GL_CURRENT_PROGRAM
GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
} draw;
struct {
@ -161,6 +162,7 @@ public:
OpenGLState& ResetTexture(GLuint handle);
OpenGLState& ResetSampler(GLuint handle);
OpenGLState& ResetProgram(GLuint handle);
OpenGLState& ResetPipeline(GLuint handle);
OpenGLState& ResetBuffer(GLuint handle);
OpenGLState& ResetVertexArray(GLuint handle);
OpenGLState& ResetFramebuffer(GLuint handle);
@ -73,7 +73,7 @@ private:
// OpenGL object IDs
OGLVertexArray vertex_array;
OGLBuffer vertex_buffer;
OGLShader shader;
OGLProgram shader;
/// Display information for top and bottom screens respectively
std::array<ScreenInfo, 2> screen_infos;
Reference in a new issue