mirror of
https://github.com/yuzu-emu/yuzu-android.git
synced 2025-01-04 07:05:46 +00:00
shader: Implement ATOM/S and RED
This commit is contained in:
parent
479ca00071
commit
3db2b3effa
|
@ -3,6 +3,7 @@ add_library(shader_recompiler STATIC
|
|||
backend/spirv/emit_context.h
|
||||
backend/spirv/emit_spirv.cpp
|
||||
backend/spirv/emit_spirv.h
|
||||
backend/spirv/emit_spirv_atomic.cpp
|
||||
backend/spirv/emit_spirv_barriers.cpp
|
||||
backend/spirv/emit_spirv_bitwise_conversion.cpp
|
||||
backend/spirv/emit_spirv_composite.cpp
|
||||
|
@ -65,6 +66,8 @@ add_library(shader_recompiler STATIC
|
|||
frontend/maxwell/program.h
|
||||
frontend/maxwell/structured_control_flow.cpp
|
||||
frontend/maxwell/structured_control_flow.h
|
||||
frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
|
||||
frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
|
||||
frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
|
||||
frontend/maxwell/translate/impl/barrier_operations.cpp
|
||||
frontend/maxwell/translate/impl/bitfield_extract.cpp
|
||||
|
|
|
@ -15,6 +15,53 @@
|
|||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
enum class CasFunctionType {
|
||||
Increment,
|
||||
Decrement,
|
||||
FPAdd,
|
||||
FPMin,
|
||||
FPMax,
|
||||
};
|
||||
|
||||
Id CasFunction(EmitContext& ctx, CasFunctionType function_type, Id value_type) {
|
||||
const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)};
|
||||
const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
|
||||
const Id op_a{ctx.OpFunctionParameter(value_type)};
|
||||
const Id op_b{ctx.OpFunctionParameter(value_type)};
|
||||
ctx.AddLabel();
|
||||
Id result{};
|
||||
switch (function_type) {
|
||||
case CasFunctionType::Increment: {
|
||||
const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)};
|
||||
const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))};
|
||||
result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr);
|
||||
break;
|
||||
}
|
||||
case CasFunctionType::Decrement: {
|
||||
const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))};
|
||||
const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)};
|
||||
const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)};
|
||||
const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))};
|
||||
result = ctx.OpSelect(value_type, pred, op_b, decr);
|
||||
break;
|
||||
}
|
||||
case CasFunctionType::FPAdd:
|
||||
result = ctx.OpFAdd(value_type, op_a, op_b);
|
||||
break;
|
||||
case CasFunctionType::FPMin:
|
||||
result = ctx.OpFMin(value_type, op_a, op_b);
|
||||
break;
|
||||
case CasFunctionType::FPMax:
|
||||
result = ctx.OpFMax(value_type, op_a, op_b);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
ctx.OpReturnValue(result);
|
||||
ctx.OpFunctionEnd();
|
||||
return func;
|
||||
}
|
||||
|
||||
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
|
||||
const spv::ImageFormat format{spv::ImageFormat::Unknown};
|
||||
const Id type{ctx.F32[1]};
|
||||
|
@ -196,6 +243,56 @@ Id EmitContext::Def(const IR::Value& value) {
|
|||
}
|
||||
}
|
||||
|
||||
Id EmitContext::CasLoop(Id function, CasPointerType pointer_type, Id value_type) {
|
||||
const Id loop_header{OpLabel()};
|
||||
const Id continue_block{OpLabel()};
|
||||
const Id merge_block{OpLabel()};
|
||||
const Id storage_type{pointer_type == CasPointerType::Shared ? shared_memory_u32_type
|
||||
: storage_memory_u32};
|
||||
const Id func_type{TypeFunction(value_type, U32[1], value_type, storage_type)};
|
||||
const Id func{OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
|
||||
const Id index{OpFunctionParameter(U32[1])};
|
||||
const Id op_b{OpFunctionParameter(value_type)};
|
||||
const Id base{OpFunctionParameter(storage_type)};
|
||||
AddLabel();
|
||||
const Id one{Constant(U32[1], 1)};
|
||||
OpBranch(loop_header);
|
||||
AddLabel(loop_header);
|
||||
OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
|
||||
OpBranch(continue_block);
|
||||
|
||||
AddLabel(continue_block);
|
||||
const Id word_pointer{pointer_type == CasPointerType::Shared
|
||||
? OpAccessChain(shared_u32, base, index)
|
||||
: OpAccessChain(storage_u32, base, u32_zero_value, index)};
|
||||
if (value_type.value == F32[2].value) {
|
||||
const Id u32_value{OpLoad(U32[1], word_pointer)};
|
||||
const Id value{OpUnpackHalf2x16(F32[2], u32_value)};
|
||||
const Id new_value{OpFunctionCall(value_type, function, value, op_b)};
|
||||
const Id u32_new_value{OpPackHalf2x16(U32[1], new_value)};
|
||||
const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value,
|
||||
u32_zero_value, u32_new_value, u32_value)};
|
||||
const Id success{OpIEqual(U1, atomic_res, u32_value)};
|
||||
OpBranchConditional(success, merge_block, loop_header);
|
||||
|
||||
AddLabel(merge_block);
|
||||
OpReturnValue(OpUnpackHalf2x16(F32[2], atomic_res));
|
||||
} else {
|
||||
const Id value{OpLoad(U32[1], word_pointer)};
|
||||
const Id new_value{OpBitcast(
|
||||
U32[1], OpFunctionCall(value_type, function, OpBitcast(value_type, value), op_b))};
|
||||
const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value,
|
||||
u32_zero_value, new_value, value)};
|
||||
const Id success{OpIEqual(U1, atomic_res, value)};
|
||||
OpBranchConditional(success, merge_block, loop_header);
|
||||
|
||||
AddLabel(merge_block);
|
||||
OpReturnValue(OpBitcast(value_type, atomic_res));
|
||||
}
|
||||
OpFunctionEnd();
|
||||
return func;
|
||||
}
|
||||
|
||||
void EmitContext::DefineCommonTypes(const Info& info) {
|
||||
void_id = TypeVoid();
|
||||
|
||||
|
@ -300,9 +397,9 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
|
|||
}
|
||||
const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
|
||||
const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))};
|
||||
const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)};
|
||||
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||
shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup);
|
||||
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
||||
interfaces.push_back(shared_memory_u32);
|
||||
|
||||
const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
|
||||
|
@ -346,6 +443,14 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
|
|||
if (program.info.uses_int16) {
|
||||
shared_store_u16_func = make_function(16, 16);
|
||||
}
|
||||
if (program.info.uses_shared_increment) {
|
||||
const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])};
|
||||
increment_cas_shared = CasLoop(inc_func, CasPointerType::Shared, U32[1]);
|
||||
}
|
||||
if (program.info.uses_shared_decrement) {
|
||||
const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])};
|
||||
decrement_cas_shared = CasLoop(dec_func, CasPointerType::Shared, U32[1]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineAttributeMemAccess(const Info& info) {
|
||||
|
@ -530,12 +635,12 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
|
|||
MemberName(struct_type, 0, "data");
|
||||
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
|
||||
|
||||
const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
|
||||
storage_memory_u32 = TypePointer(spv::StorageClass::StorageBuffer, struct_type);
|
||||
storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
|
||||
|
||||
u32 index{};
|
||||
for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
|
||||
const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)};
|
||||
const Id id{AddGlobalVariable(storage_memory_u32, spv::StorageClass::StorageBuffer)};
|
||||
Decorate(id, spv::Decoration::Binding, binding);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, fmt::format("ssbo{}", index));
|
||||
|
@ -546,6 +651,51 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
|
|||
index += desc.count;
|
||||
binding += desc.count;
|
||||
}
|
||||
if (info.uses_global_increment) {
|
||||
AddCapability(spv::Capability::VariablePointersStorageBuffer);
|
||||
const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])};
|
||||
increment_cas_ssbo = CasLoop(inc_func, CasPointerType::Ssbo, U32[1]);
|
||||
}
|
||||
if (info.uses_global_decrement) {
|
||||
AddCapability(spv::Capability::VariablePointersStorageBuffer);
|
||||
const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])};
|
||||
decrement_cas_ssbo = CasLoop(dec_func, CasPointerType::Ssbo, U32[1]);
|
||||
}
|
||||
if (info.uses_atomic_f32_add) {
|
||||
AddCapability(spv::Capability::VariablePointersStorageBuffer);
|
||||
const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[1])};
|
||||
f32_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[1]);
|
||||
}
|
||||
if (info.uses_atomic_f16x2_add) {
|
||||
AddCapability(spv::Capability::VariablePointersStorageBuffer);
|
||||
const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F16[2])};
|
||||
f16x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F16[2]);
|
||||
}
|
||||
if (info.uses_atomic_f16x2_min) {
|
||||
AddCapability(spv::Capability::VariablePointersStorageBuffer);
|
||||
const Id func{CasFunction(*this, CasFunctionType::FPMin, F16[2])};
|
||||
f16x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]);
|
||||
}
|
||||
if (info.uses_atomic_f16x2_max) {
|
||||
AddCapability(spv::Capability::VariablePointersStorageBuffer);
|
||||
const Id func{CasFunction(*this, CasFunctionType::FPMax, F16[2])};
|
||||
f16x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]);
|
||||
}
|
||||
if (info.uses_atomic_f32x2_add) {
|
||||
AddCapability(spv::Capability::VariablePointersStorageBuffer);
|
||||
const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[2])};
|
||||
f32x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[2]);
|
||||
}
|
||||
if (info.uses_atomic_f32x2_min) {
|
||||
AddCapability(spv::Capability::VariablePointersStorageBuffer);
|
||||
const Id func{CasFunction(*this, CasFunctionType::FPMin, F32[2])};
|
||||
f32x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]);
|
||||
}
|
||||
if (info.uses_atomic_f32x2_max) {
|
||||
AddCapability(spv::Capability::VariablePointersStorageBuffer);
|
||||
const Id func{CasFunction(*this, CasFunctionType::FPMax, F32[2])};
|
||||
f32x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) {
|
||||
|
|
|
@ -94,6 +94,7 @@ public:
|
|||
Id output_f32{};
|
||||
|
||||
Id storage_u32{};
|
||||
Id storage_memory_u32{};
|
||||
|
||||
Id image_buffer_type{};
|
||||
Id sampled_texture_buffer_type{};
|
||||
|
@ -136,9 +137,21 @@ public:
|
|||
Id shared_memory_u32{};
|
||||
Id shared_memory_u32x2{};
|
||||
Id shared_memory_u32x4{};
|
||||
Id shared_memory_u32_type{};
|
||||
|
||||
Id shared_store_u8_func{};
|
||||
Id shared_store_u16_func{};
|
||||
Id increment_cas_shared{};
|
||||
Id increment_cas_ssbo{};
|
||||
Id decrement_cas_shared{};
|
||||
Id decrement_cas_ssbo{};
|
||||
Id f32_add_cas{};
|
||||
Id f16x2_add_cas{};
|
||||
Id f16x2_min_cas{};
|
||||
Id f16x2_max_cas{};
|
||||
Id f32x2_add_cas{};
|
||||
Id f32x2_min_cas{};
|
||||
Id f32x2_max_cas{};
|
||||
|
||||
Id input_position{};
|
||||
std::array<Id, 32> input_generics{};
|
||||
|
@ -153,6 +166,11 @@ public:
|
|||
std::vector<Id> interfaces;
|
||||
|
||||
private:
|
||||
enum class CasPointerType {
|
||||
Shared,
|
||||
Ssbo,
|
||||
};
|
||||
|
||||
void DefineCommonTypes(const Info& info);
|
||||
void DefineCommonConstants();
|
||||
void DefineInterfaces(const Info& info);
|
||||
|
@ -171,6 +189,8 @@ private:
|
|||
|
||||
void DefineInputs(const Info& info);
|
||||
void DefineOutputs(const Info& info);
|
||||
|
||||
[[nodiscard]] Id CasLoop(Id function, CasPointerType pointer_type, Id value_type);
|
||||
};
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -238,6 +238,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
|
|||
ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
|
||||
}
|
||||
}
|
||||
if (info.uses_64_bit_atomics && profile.support_int64_atomics) {
|
||||
ctx.AddCapability(spv::Capability::Int64Atomics);
|
||||
}
|
||||
if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
|
||||
ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
|
||||
}
|
||||
|
|
|
@ -306,6 +306,101 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
|
|||
Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
|
||||
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value);
|
||||
Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitGlobalAtomicIAdd32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicSMin32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicUMin32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicSMax32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicUMax32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicInc32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicDec32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicAnd32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicOr32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicXor32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicExchange32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicIAdd64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicSMin64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicUMin64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicSMax64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicUMax64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicInc64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicDec64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicAnd64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicOr64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicXor64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicExchange64(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicAddF32(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicAddF16x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicAddF32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicMinF16x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicMinF32x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
|
||||
Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
|
||||
Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
|
||||
Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
|
||||
|
|
528
src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
Normal file
528
src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
Normal file
|
@ -0,0 +1,528 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
|
||||
Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))};
|
||||
return ctx.profile.support_explicit_workgroup_layout
|
||||
? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)
|
||||
: ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
|
||||
}
|
||||
|
||||
Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) {
|
||||
if (offset.IsImmediate()) {
|
||||
const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)};
|
||||
return ctx.Constant(ctx.U32[1], imm_offset);
|
||||
}
|
||||
const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
|
||||
const Id index{ctx.Def(offset)};
|
||||
if (shift == 0) {
|
||||
return index;
|
||||
}
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], shift)};
|
||||
return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
|
||||
}
|
||||
|
||||
Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
u32 index_offset = 0) {
|
||||
// TODO: Support reinterpreting bindings, guaranteed to be aligned
|
||||
if (!binding.IsImmediate()) {
|
||||
throw NotImplementedException("Dynamic storage buffer indexing");
|
||||
}
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))};
|
||||
return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index);
|
||||
}
|
||||
|
||||
std::pair<Id, Id> GetAtomicArgs(EmitContext& ctx) {
|
||||
const Id scope{ctx.Constant(ctx.U32[1], static_cast<u32>(spv::Scope::Device))};
|
||||
const Id semantics{ctx.u32_zero_value};
|
||||
return {scope, semantics};
|
||||
}
|
||||
|
||||
Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) {
|
||||
const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
|
||||
const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
|
||||
const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)};
|
||||
return ctx.OpBitcast(ctx.U64, original_composite);
|
||||
}
|
||||
|
||||
void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) {
|
||||
const Id composite{ctx.OpBitcast(ctx.U32[2], result)};
|
||||
ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0));
|
||||
ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)};
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value,
|
||||
ctx.shared_memory_u32);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)};
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value,
|
||||
ctx.shared_memory_u32);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer_1{GetSharedPointer(ctx, pointer_offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, value);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpIAdd(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpSMin(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpUMin(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpSMax(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpUMax(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, value);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)};
|
||||
return ctx.OpBitcast(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)};
|
||||
return ctx.OpPackHalf2x16(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)};
|
||||
return ctx.OpBitcast(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)};
|
||||
return ctx.OpPackHalf2x16(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)};
|
||||
return ctx.OpBitcast(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)};
|
||||
return ctx.OpPackHalf2x16(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicIAdd32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicSMin32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicUMin32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicSMax32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicUMax32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicInc32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicDec32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAnd32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicOr32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicXor32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicExchange32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicIAdd64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicSMin64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicUMin64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicSMax64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicUMax64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicInc64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicDec64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAnd64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicOr64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicXor64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicExchange64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAddF32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAddF16x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAddF32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicMinF16x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicMinF32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicMaxF16x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicMaxF32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
|
@ -1284,6 +1284,204 @@ U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed)
|
|||
return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) {
|
||||
return is_signed ? SharedAtomicSMin(pointer_offset, value)
|
||||
: SharedAtomicUMin(pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) {
|
||||
return is_signed ? SharedAtomicSMax(pointer_offset, value)
|
||||
: SharedAtomicUMax(pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value);
|
||||
default:
|
||||
ThrowInvalidType(pointer_offset.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) {
|
||||
return is_signed ? GlobalAtomicSMin(pointer_offset, value)
|
||||
: GlobalAtomicUMin(pointer_offset, value);
|
||||
}
|
||||
|
||||
U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) {
|
||||
return is_signed ? GlobalAtomicSMax(pointer_offset, value)
|
||||
: GlobalAtomicUMax(pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) {
|
||||
return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value);
|
||||
}
|
||||
|
||||
U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value);
|
||||
default:
|
||||
ThrowInvalidType(pointer_offset.Type());
|
||||
}
|
||||
}
|
||||
|
||||
F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
|
||||
const FpControl control) {
|
||||
return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value);
|
||||
}
|
||||
|
||||
Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
|
||||
const FpControl control) {
|
||||
return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value);
|
||||
}
|
||||
|
||||
Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
|
||||
const FpControl control) {
|
||||
return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value);
|
||||
}
|
||||
|
||||
Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
|
||||
const FpControl control) {
|
||||
return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value);
|
||||
}
|
||||
|
||||
U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
|
||||
return Inst<U1>(Opcode::LogicalOr, a, b);
|
||||
}
|
||||
|
@ -1626,7 +1824,7 @@ Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInst
|
|||
}
|
||||
|
||||
void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
|
||||
TextureInstInfo info) {
|
||||
TextureInstInfo info) {
|
||||
const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite};
|
||||
Inst(op, Flags{info}, handle, coords, color);
|
||||
}
|
||||
|
|
|
@ -228,6 +228,45 @@ public:
|
|||
[[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
|
||||
[[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
|
||||
|
||||
[[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value);
|
||||
|
||||
[[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value);
|
||||
[[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value);
|
||||
[[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value);
|
||||
[[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value,
|
||||
bool is_signed);
|
||||
[[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value);
|
||||
[[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value);
|
||||
[[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value,
|
||||
bool is_signed);
|
||||
[[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value);
|
||||
[[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value);
|
||||
[[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value);
|
||||
[[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value);
|
||||
[[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value);
|
||||
|
||||
[[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
|
||||
const FpControl control = {});
|
||||
[[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
|
||||
const FpControl control = {});
|
||||
[[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
|
||||
const FpControl control = {});
|
||||
[[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
|
||||
const FpControl control = {});
|
||||
|
||||
[[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
|
||||
[[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
|
||||
[[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
|
||||
|
|
|
@ -93,6 +93,72 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||
case Opcode::WriteSharedU32:
|
||||
case Opcode::WriteSharedU64:
|
||||
case Opcode::WriteSharedU128:
|
||||
case Opcode::SharedAtomicIAdd32:
|
||||
case Opcode::SharedAtomicSMin32:
|
||||
case Opcode::SharedAtomicUMin32:
|
||||
case Opcode::SharedAtomicSMax32:
|
||||
case Opcode::SharedAtomicUMax32:
|
||||
case Opcode::SharedAtomicInc32:
|
||||
case Opcode::SharedAtomicDec32:
|
||||
case Opcode::SharedAtomicAnd32:
|
||||
case Opcode::SharedAtomicOr32:
|
||||
case Opcode::SharedAtomicXor32:
|
||||
case Opcode::SharedAtomicExchange32:
|
||||
case Opcode::SharedAtomicExchange64:
|
||||
case Opcode::GlobalAtomicIAdd32:
|
||||
case Opcode::GlobalAtomicSMin32:
|
||||
case Opcode::GlobalAtomicUMin32:
|
||||
case Opcode::GlobalAtomicSMax32:
|
||||
case Opcode::GlobalAtomicUMax32:
|
||||
case Opcode::GlobalAtomicInc32:
|
||||
case Opcode::GlobalAtomicDec32:
|
||||
case Opcode::GlobalAtomicAnd32:
|
||||
case Opcode::GlobalAtomicOr32:
|
||||
case Opcode::GlobalAtomicXor32:
|
||||
case Opcode::GlobalAtomicExchange32:
|
||||
case Opcode::GlobalAtomicIAdd64:
|
||||
case Opcode::GlobalAtomicSMin64:
|
||||
case Opcode::GlobalAtomicUMin64:
|
||||
case Opcode::GlobalAtomicSMax64:
|
||||
case Opcode::GlobalAtomicUMax64:
|
||||
case Opcode::GlobalAtomicAnd64:
|
||||
case Opcode::GlobalAtomicOr64:
|
||||
case Opcode::GlobalAtomicXor64:
|
||||
case Opcode::GlobalAtomicExchange64:
|
||||
case Opcode::GlobalAtomicAddF32:
|
||||
case Opcode::GlobalAtomicAddF16x2:
|
||||
case Opcode::GlobalAtomicAddF32x2:
|
||||
case Opcode::GlobalAtomicMinF16x2:
|
||||
case Opcode::GlobalAtomicMinF32x2:
|
||||
case Opcode::GlobalAtomicMaxF16x2:
|
||||
case Opcode::GlobalAtomicMaxF32x2:
|
||||
case Opcode::StorageAtomicIAdd32:
|
||||
case Opcode::StorageAtomicSMin32:
|
||||
case Opcode::StorageAtomicUMin32:
|
||||
case Opcode::StorageAtomicSMax32:
|
||||
case Opcode::StorageAtomicUMax32:
|
||||
case Opcode::StorageAtomicInc32:
|
||||
case Opcode::StorageAtomicDec32:
|
||||
case Opcode::StorageAtomicAnd32:
|
||||
case Opcode::StorageAtomicOr32:
|
||||
case Opcode::StorageAtomicXor32:
|
||||
case Opcode::StorageAtomicExchange32:
|
||||
case Opcode::StorageAtomicIAdd64:
|
||||
case Opcode::StorageAtomicSMin64:
|
||||
case Opcode::StorageAtomicUMin64:
|
||||
case Opcode::StorageAtomicSMax64:
|
||||
case Opcode::StorageAtomicUMax64:
|
||||
case Opcode::StorageAtomicAnd64:
|
||||
case Opcode::StorageAtomicOr64:
|
||||
case Opcode::StorageAtomicXor64:
|
||||
case Opcode::StorageAtomicExchange64:
|
||||
case Opcode::StorageAtomicAddF32:
|
||||
case Opcode::StorageAtomicAddF16x2:
|
||||
case Opcode::StorageAtomicAddF32x2:
|
||||
case Opcode::StorageAtomicMinF16x2:
|
||||
case Opcode::StorageAtomicMinF32x2:
|
||||
case Opcode::StorageAtomicMaxF16x2:
|
||||
case Opcode::StorageAtomicMaxF32x2:
|
||||
case Opcode::BindlessImageWrite:
|
||||
case Opcode::BoundImageWrite:
|
||||
case Opcode::ImageWrite:
|
||||
|
|
|
@ -321,6 +321,76 @@ OPCODE(INotEqual, U1, U32,
|
|||
OPCODE(SGreaterThanEqual, U1, U32, U32, )
|
||||
OPCODE(UGreaterThanEqual, U1, U32, U32, )
|
||||
|
||||
// Atomic operations
|
||||
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicUMax32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicInc32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicDec32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicAnd32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicOr32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicXor32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicExchange32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicExchange64, U64, U32, U64, )
|
||||
|
||||
OPCODE(GlobalAtomicIAdd32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicSMin32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicUMin32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicSMax32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicUMax32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicInc32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicDec32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicAnd32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicOr32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicXor32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicExchange32, U32, U64, U32, )
|
||||
OPCODE(GlobalAtomicIAdd64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicSMin64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicUMin64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicSMax64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicUMax64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicAnd64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicOr64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicXor64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicExchange64, U64, U64, U64, )
|
||||
OPCODE(GlobalAtomicAddF32, F32, U64, F32, )
|
||||
OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, )
|
||||
OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, )
|
||||
OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, )
|
||||
OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, )
|
||||
OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, )
|
||||
OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, )
|
||||
|
||||
OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, )
|
||||
OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, )
|
||||
OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, )
|
||||
OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, )
|
||||
OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, )
|
||||
OPCODE(StorageAtomicInc32, U32, U32, U32, U32, )
|
||||
OPCODE(StorageAtomicDec32, U32, U32, U32, U32, )
|
||||
OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, )
|
||||
OPCODE(StorageAtomicOr32, U32, U32, U32, U32, )
|
||||
OPCODE(StorageAtomicXor32, U32, U32, U32, U32, )
|
||||
OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, )
|
||||
OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicOr64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicXor64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, )
|
||||
OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, )
|
||||
OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, )
|
||||
OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, )
|
||||
OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, )
|
||||
OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, )
|
||||
OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, )
|
||||
OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, )
|
||||
|
||||
// Logical operations
|
||||
OPCODE(LogicalOr, U1, U1, U1, )
|
||||
OPCODE(LogicalAnd, U1, U1, U1, )
|
||||
|
|
|
@ -0,0 +1,222 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class AtomOp : u64 {
|
||||
ADD,
|
||||
MIN,
|
||||
MAX,
|
||||
INC,
|
||||
DEC,
|
||||
AND,
|
||||
OR,
|
||||
XOR,
|
||||
EXCH,
|
||||
SAFEADD,
|
||||
};
|
||||
|
||||
enum class AtomSize : u64 {
|
||||
U32,
|
||||
S32,
|
||||
U64,
|
||||
F32,
|
||||
F16x2,
|
||||
S64,
|
||||
};
|
||||
|
||||
IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
|
||||
AtomOp op, bool is_signed) {
|
||||
switch (op) {
|
||||
case AtomOp::ADD:
|
||||
return ir.GlobalAtomicIAdd(offset, op_b);
|
||||
case AtomOp::MIN:
|
||||
return ir.GlobalAtomicIMin(offset, op_b, is_signed);
|
||||
case AtomOp::MAX:
|
||||
return ir.GlobalAtomicIMax(offset, op_b, is_signed);
|
||||
case AtomOp::INC:
|
||||
return ir.GlobalAtomicInc(offset, op_b);
|
||||
case AtomOp::DEC:
|
||||
return ir.GlobalAtomicDec(offset, op_b);
|
||||
case AtomOp::AND:
|
||||
return ir.GlobalAtomicAnd(offset, op_b);
|
||||
case AtomOp::OR:
|
||||
return ir.GlobalAtomicOr(offset, op_b);
|
||||
case AtomOp::XOR:
|
||||
return ir.GlobalAtomicXor(offset, op_b);
|
||||
case AtomOp::EXCH:
|
||||
return ir.GlobalAtomicExchange(offset, op_b);
|
||||
default:
|
||||
throw NotImplementedException("Integer Atom Operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
|
||||
AtomSize size) {
|
||||
static constexpr IR::FpControl f16_control{
|
||||
.no_contraction{false},
|
||||
.rounding{IR::FpRounding::RN},
|
||||
.fmz_mode{IR::FmzMode::DontCare},
|
||||
};
|
||||
static constexpr IR::FpControl f32_control{
|
||||
.no_contraction{false},
|
||||
.rounding{IR::FpRounding::RN},
|
||||
.fmz_mode{IR::FmzMode::FTZ},
|
||||
};
|
||||
switch (op) {
|
||||
case AtomOp::ADD:
|
||||
return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
|
||||
: ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
|
||||
case AtomOp::MIN:
|
||||
return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
|
||||
case AtomOp::MAX:
|
||||
return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
|
||||
default:
|
||||
throw NotImplementedException("FP Atom Operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<28, 20, s64> addr_offset;
|
||||
BitField<28, 20, u64> rz_addr_offset;
|
||||
BitField<48, 1, u64> e;
|
||||
} const mem{insn};
|
||||
|
||||
const IR::U64 address{[&]() -> IR::U64 {
|
||||
if (mem.e == 0) {
|
||||
return v.ir.UConvert(64, v.X(mem.addr_reg));
|
||||
}
|
||||
return v.L(mem.addr_reg);
|
||||
}()};
|
||||
const u64 addr_offset{[&]() -> u64 {
|
||||
if (mem.addr_reg == IR::Reg::RZ) {
|
||||
// When RZ is used, the address is an absolute address
|
||||
return static_cast<u64>(mem.rz_addr_offset.Value());
|
||||
} else {
|
||||
return static_cast<u64>(mem.addr_offset.Value());
|
||||
}
|
||||
}()};
|
||||
return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
|
||||
}
|
||||
|
||||
bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
|
||||
// TODO: SAFEADD
|
||||
switch (size) {
|
||||
case AtomSize::S32:
|
||||
case AtomSize::U64:
|
||||
return (op == AtomOp::INC || op == AtomOp::DEC);
|
||||
case AtomSize::S64:
|
||||
return !(op == AtomOp::MIN || op == AtomOp::MAX);
|
||||
case AtomSize::F32:
|
||||
return op != AtomOp::ADD;
|
||||
case AtomSize::F16x2:
|
||||
return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
|
||||
switch (size) {
|
||||
case AtomSize::U32:
|
||||
case AtomSize::S32:
|
||||
case AtomSize::F32:
|
||||
case AtomSize::F16x2:
|
||||
return ir.LoadGlobal32(offset);
|
||||
case AtomSize::U64:
|
||||
case AtomSize::S64:
|
||||
return ir.PackUint2x32(ir.LoadGlobal64(offset));
|
||||
default:
|
||||
throw NotImplementedException("Atom Size {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
|
||||
switch (size) {
|
||||
case AtomSize::U32:
|
||||
case AtomSize::S32:
|
||||
case AtomSize::F16x2:
|
||||
return v.X(dest_reg, IR::U32{result});
|
||||
case AtomSize::U64:
|
||||
case AtomSize::S64:
|
||||
return v.L(dest_reg, IR::U64{result});
|
||||
case AtomSize::F32:
|
||||
return v.F(dest_reg, IR::F32{result});
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ATOM(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<20, 8, IR::Reg> src_reg_b;
|
||||
BitField<49, 3, AtomSize> size;
|
||||
BitField<52, 4, AtomOp> op;
|
||||
} const atom{insn};
|
||||
|
||||
const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64};
|
||||
const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64};
|
||||
const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2};
|
||||
const IR::U64 offset{AtomOffset(*this, insn)};
|
||||
IR::Value result;
|
||||
|
||||
if (AtomOpNotApplicable(atom.size, atom.op)) {
|
||||
result = LoadGlobal(ir, offset, atom.size);
|
||||
} else if (!is_integer) {
|
||||
if (atom.size == AtomSize::F32) {
|
||||
result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size);
|
||||
} else {
|
||||
const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))};
|
||||
result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size);
|
||||
}
|
||||
} else if (size_64) {
|
||||
result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed);
|
||||
} else {
|
||||
result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed);
|
||||
}
|
||||
StoreResult(*this, atom.dest_reg, result, atom.size);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::RED(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> src_reg_b;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<20, 3, AtomSize> size;
|
||||
BitField<23, 3, AtomOp> op;
|
||||
} const red{insn};
|
||||
|
||||
if (AtomOpNotApplicable(red.size, red.op)) {
|
||||
return;
|
||||
}
|
||||
const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64};
|
||||
const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64};
|
||||
const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2};
|
||||
const IR::U64 offset{AtomOffset(*this, insn)};
|
||||
if (!is_integer) {
|
||||
if (red.size == AtomSize::F32) {
|
||||
ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size);
|
||||
} else {
|
||||
const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))};
|
||||
ApplyFpAtomOp(ir, offset, src_b, red.op, red.size);
|
||||
}
|
||||
} else if (size_64) {
|
||||
ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed);
|
||||
} else {
|
||||
ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class AtomOp : u64 {
|
||||
ADD,
|
||||
MIN,
|
||||
MAX,
|
||||
INC,
|
||||
DEC,
|
||||
AND,
|
||||
OR,
|
||||
XOR,
|
||||
EXCH,
|
||||
};
|
||||
|
||||
enum class AtomsSize : u64 {
|
||||
U32,
|
||||
S32,
|
||||
U64,
|
||||
};
|
||||
|
||||
IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
|
||||
bool is_signed) {
|
||||
switch (op) {
|
||||
case AtomOp::ADD:
|
||||
return ir.SharedAtomicIAdd(offset, op_b);
|
||||
case AtomOp::MIN:
|
||||
return ir.SharedAtomicIMin(offset, op_b, is_signed);
|
||||
case AtomOp::MAX:
|
||||
return ir.SharedAtomicIMax(offset, op_b, is_signed);
|
||||
case AtomOp::INC:
|
||||
return ir.SharedAtomicInc(offset, op_b);
|
||||
case AtomOp::DEC:
|
||||
return ir.SharedAtomicDec(offset, op_b);
|
||||
case AtomOp::AND:
|
||||
return ir.SharedAtomicAnd(offset, op_b);
|
||||
case AtomOp::OR:
|
||||
return ir.SharedAtomicOr(offset, op_b);
|
||||
case AtomOp::XOR:
|
||||
return ir.SharedAtomicXor(offset, op_b);
|
||||
case AtomOp::EXCH:
|
||||
return ir.SharedAtomicExchange(offset, op_b);
|
||||
default:
|
||||
throw NotImplementedException("Integer Atoms Operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> offset_reg;
|
||||
BitField<30, 22, u64> absolute_offset;
|
||||
BitField<30, 22, s64> relative_offset;
|
||||
} const encoding{insn};
|
||||
|
||||
if (encoding.offset_reg == IR::Reg::RZ) {
|
||||
return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
|
||||
} else {
|
||||
const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
|
||||
return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
|
||||
}
|
||||
}
|
||||
|
||||
void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
|
||||
switch (size) {
|
||||
case AtomsSize::U32:
|
||||
case AtomsSize::S32:
|
||||
return v.X(dest_reg, IR::U32{result});
|
||||
case AtomsSize::U64:
|
||||
return v.L(dest_reg, IR::U64{result});
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ATOMS(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<20, 8, IR::Reg> src_reg_b;
|
||||
BitField<28, 2, AtomsSize> size;
|
||||
BitField<52, 4, AtomOp> op;
|
||||
} const atoms{insn};
|
||||
|
||||
const bool size_64{atoms.size == AtomsSize::U64};
|
||||
if (size_64 && atoms.op != AtomOp::EXCH) {
|
||||
throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
|
||||
}
|
||||
const bool is_signed{atoms.size == AtomsSize::S32};
|
||||
const IR::U32 offset{AtomsOffset(*this, insn)};
|
||||
|
||||
IR::Value result;
|
||||
if (size_64) {
|
||||
result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
|
||||
} else {
|
||||
result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
|
||||
}
|
||||
StoreResult(*this, atoms.dest_reg, result, atoms.size);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
|
@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) {
|
|||
ThrowNotImplemented(Opcode::ATOM_cas);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ATOM(u64) {
|
||||
ThrowNotImplemented(Opcode::ATOM);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ATOMS_cas(u64) {
|
||||
ThrowNotImplemented(Opcode::ATOMS_cas);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::ATOMS(u64) {
|
||||
ThrowNotImplemented(Opcode::ATOMS);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::B2R(u64) {
|
||||
ThrowNotImplemented(Opcode::B2R);
|
||||
}
|
||||
|
@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) {
|
|||
ThrowNotImplemented(Opcode::RAM);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::RED(u64) {
|
||||
ThrowNotImplemented(Opcode::RED);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::RET(u64) {
|
||||
ThrowNotImplemented(Opcode::RET);
|
||||
}
|
||||
|
|
|
@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::FPOrdGreaterThanEqual16:
|
||||
case IR::Opcode::FPUnordGreaterThanEqual16:
|
||||
case IR::Opcode::FPIsNan16:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::StorageAtomicAddF16x2:
|
||||
case IR::Opcode::StorageAtomicMinF16x2:
|
||||
case IR::Opcode::StorageAtomicMaxF16x2:
|
||||
info.uses_fp16 = true;
|
||||
break;
|
||||
case IR::Opcode::CompositeConstructF64x2:
|
||||
|
@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::ConvertF16U64:
|
||||
case IR::Opcode::ConvertF32U64:
|
||||
case IR::Opcode::ConvertF64U64:
|
||||
case IR::Opcode::SharedAtomicExchange64:
|
||||
info.uses_int64 = true;
|
||||
break;
|
||||
default:
|
||||
|
@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
|||
case IR::Opcode::FSwizzleAdd:
|
||||
info.uses_fswzadd = true;
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicInc32:
|
||||
info.uses_shared_increment = true;
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicDec32:
|
||||
info.uses_shared_decrement = true;
|
||||
break;
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
case IR::Opcode::StorageAtomicInc32:
|
||||
info.uses_global_increment = true;
|
||||
break;
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
case IR::Opcode::StorageAtomicDec32:
|
||||
info.uses_global_decrement = true;
|
||||
break;
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::StorageAtomicAddF32:
|
||||
info.uses_atomic_f32_add = true;
|
||||
break;
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::StorageAtomicAddF16x2:
|
||||
info.uses_atomic_f16x2_add = true;
|
||||
break;
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
case IR::Opcode::StorageAtomicAddF32x2:
|
||||
info.uses_atomic_f32x2_add = true;
|
||||
break;
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
case IR::Opcode::StorageAtomicMinF16x2:
|
||||
info.uses_atomic_f16x2_min = true;
|
||||
break;
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
case IR::Opcode::StorageAtomicMinF32x2:
|
||||
info.uses_atomic_f32x2_min = true;
|
||||
break;
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
case IR::Opcode::StorageAtomicMaxF16x2:
|
||||
info.uses_atomic_f16x2_max = true;
|
||||
break;
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
case IR::Opcode::StorageAtomicMaxF32x2:
|
||||
info.uses_atomic_f32x2_max = true;
|
||||
break;
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::StorageAtomicIAdd64:
|
||||
case IR::Opcode::StorageAtomicSMin64:
|
||||
case IR::Opcode::StorageAtomicUMin64:
|
||||
case IR::Opcode::StorageAtomicSMax64:
|
||||
case IR::Opcode::StorageAtomicUMax64:
|
||||
case IR::Opcode::StorageAtomicAnd64:
|
||||
case IR::Opcode::StorageAtomicOr64:
|
||||
case IR::Opcode::StorageAtomicXor64:
|
||||
info.uses_64_bit_atomics = true;
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicExchange64:
|
||||
info.uses_64_bit_atomics = true;
|
||||
info.uses_shared_memory_u32x2 = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) {
|
|||
case IR::Opcode::WriteGlobal32:
|
||||
case IR::Opcode::WriteGlobal64:
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
case IR::Opcode::GlobalAtomicIAdd32:
|
||||
case IR::Opcode::GlobalAtomicSMin32:
|
||||
case IR::Opcode::GlobalAtomicUMin32:
|
||||
case IR::Opcode::GlobalAtomicSMax32:
|
||||
case IR::Opcode::GlobalAtomicUMax32:
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
case IR::Opcode::GlobalAtomicAnd32:
|
||||
case IR::Opcode::GlobalAtomicOr32:
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
|
|||
return IR::Opcode::WriteStorage64;
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
return IR::Opcode::WriteStorage128;
|
||||
case IR::Opcode::GlobalAtomicIAdd32:
|
||||
return IR::Opcode::StorageAtomicIAdd32;
|
||||
case IR::Opcode::GlobalAtomicSMin32:
|
||||
return IR::Opcode::StorageAtomicSMin32;
|
||||
case IR::Opcode::GlobalAtomicUMin32:
|
||||
return IR::Opcode::StorageAtomicUMin32;
|
||||
case IR::Opcode::GlobalAtomicSMax32:
|
||||
return IR::Opcode::StorageAtomicSMax32;
|
||||
case IR::Opcode::GlobalAtomicUMax32:
|
||||
return IR::Opcode::StorageAtomicUMax32;
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
return IR::Opcode::StorageAtomicInc32;
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
return IR::Opcode::StorageAtomicDec32;
|
||||
case IR::Opcode::GlobalAtomicAnd32:
|
||||
return IR::Opcode::StorageAtomicAnd32;
|
||||
case IR::Opcode::GlobalAtomicOr32:
|
||||
return IR::Opcode::StorageAtomicOr32;
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
return IR::Opcode::StorageAtomicXor32;
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
return IR::Opcode::StorageAtomicIAdd64;
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
return IR::Opcode::StorageAtomicSMin64;
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
return IR::Opcode::StorageAtomicUMin64;
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
return IR::Opcode::StorageAtomicSMax64;
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
return IR::Opcode::StorageAtomicUMax64;
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
return IR::Opcode::StorageAtomicAnd64;
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
return IR::Opcode::StorageAtomicOr64;
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
return IR::Opcode::StorageAtomicXor64;
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
return IR::Opcode::StorageAtomicExchange32;
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
return IR::Opcode::StorageAtomicExchange64;
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
return IR::Opcode::StorageAtomicAddF32;
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
return IR::Opcode::StorageAtomicAddF16x2;
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
return IR::Opcode::StorageAtomicMinF16x2;
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
return IR::Opcode::StorageAtomicMaxF16x2;
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
return IR::Opcode::StorageAtomicAddF32x2;
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
return IR::Opcode::StorageAtomicMinF32x2;
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
return IR::Opcode::StorageAtomicMaxF32x2;
|
||||
default:
|
||||
throw InvalidArgument("Invalid global memory opcode {}", opcode);
|
||||
}
|
||||
|
@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index
|
|||
inst.Invalidate();
|
||||
}
|
||||
|
||||
/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
|
||||
void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
const IR::U32& offset) {
|
||||
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::Value value{
|
||||
&*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
|
||||
inst.ReplaceUsesWith(value);
|
||||
}
|
||||
|
||||
/// Replace a global memory instruction with its storage buffer equivalent
|
||||
void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
const IR::U32& offset) {
|
||||
|
@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
|||
case IR::Opcode::WriteGlobal64:
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
return ReplaceWrite(block, inst, storage_index, offset);
|
||||
case IR::Opcode::GlobalAtomicIAdd32:
|
||||
case IR::Opcode::GlobalAtomicSMin32:
|
||||
case IR::Opcode::GlobalAtomicUMin32:
|
||||
case IR::Opcode::GlobalAtomicSMax32:
|
||||
case IR::Opcode::GlobalAtomicUMax32:
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
case IR::Opcode::GlobalAtomicAnd32:
|
||||
case IR::Opcode::GlobalAtomicOr32:
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
return ReplaceAtomic(block, inst, storage_index, offset);
|
||||
default:
|
||||
throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
|
||||
}
|
||||
|
@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
|
|||
CollectStorageBuffers(*block, inst, info);
|
||||
}
|
||||
}
|
||||
u32 storage_index{};
|
||||
for (const StorageBufferAddr& storage_buffer : info.set) {
|
||||
program.info.storage_buffers_descriptors.push_back({
|
||||
.cbuf_index = storage_buffer.index,
|
||||
|
@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
|
|||
.count = 1,
|
||||
.is_written{info.writes.contains(storage_buffer)},
|
||||
});
|
||||
++storage_index;
|
||||
}
|
||||
for (const StorageInst& storage_inst : info.to_replace) {
|
||||
const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
|
||||
|
|
|
@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) {
|
|||
return IR::Opcode::ConvertF32U32;
|
||||
case IR::Opcode::ConvertF16U64:
|
||||
return IR::Opcode::ConvertF32U64;
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
return IR::Opcode::GlobalAtomicAddF32x2;
|
||||
case IR::Opcode::StorageAtomicAddF16x2:
|
||||
return IR::Opcode::StorageAtomicAddF32x2;
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
return IR::Opcode::GlobalAtomicMinF32x2;
|
||||
case IR::Opcode::StorageAtomicMinF16x2:
|
||||
return IR::Opcode::StorageAtomicMinF32x2;
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
return IR::Opcode::GlobalAtomicMaxF32x2;
|
||||
case IR::Opcode::StorageAtomicMaxF16x2:
|
||||
return IR::Opcode::StorageAtomicMaxF32x2;
|
||||
default:
|
||||
return op;
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ struct Profile {
|
|||
bool support_viewport_index_layer_non_geometry{};
|
||||
bool support_typeless_image_loads{};
|
||||
bool warp_size_potentially_larger_than_guest{};
|
||||
bool support_int64_atomics{};
|
||||
|
||||
// FClamp is broken and OpFMax + OpFMin should be used instead
|
||||
bool has_broken_spirv_clamp{};
|
||||
|
|
|
@ -128,6 +128,19 @@ struct Info {
|
|||
bool uses_subgroup_mask{};
|
||||
bool uses_fswzadd{};
|
||||
bool uses_typeless_image_reads{};
|
||||
bool uses_shared_increment{};
|
||||
bool uses_shared_decrement{};
|
||||
bool uses_global_increment{};
|
||||
bool uses_global_decrement{};
|
||||
bool uses_atomic_f32_add{};
|
||||
bool uses_atomic_f16x2_add{};
|
||||
bool uses_atomic_f16x2_min{};
|
||||
bool uses_atomic_f16x2_max{};
|
||||
bool uses_atomic_f32x2_add{};
|
||||
bool uses_atomic_f32x2_min{};
|
||||
bool uses_atomic_f32x2_max{};
|
||||
bool uses_64_bit_atomics{};
|
||||
bool uses_shared_memory_u32x2{};
|
||||
|
||||
IR::Type used_constant_buffer_types{};
|
||||
|
||||
|
|
|
@ -637,6 +637,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
|
|||
device.IsExtShaderViewportIndexLayerSupported(),
|
||||
.support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
|
||||
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
|
||||
.support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
|
||||
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
|
||||
.generic_input_types{},
|
||||
.fixed_state_point_size{},
|
||||
|
|
|
@ -681,6 +681,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
|
|||
bool has_ext_transform_feedback{};
|
||||
bool has_ext_custom_border_color{};
|
||||
bool has_ext_extended_dynamic_state{};
|
||||
bool has_ext_shader_atomic_int64{};
|
||||
for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
|
||||
const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
|
||||
bool push) {
|
||||
|
@ -710,6 +711,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
|
|||
test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
|
||||
test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
|
||||
test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
|
||||
test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
|
||||
test(has_khr_workgroup_memory_explicit_layout,
|
||||
VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
|
||||
if (Settings::values.renderer_debug) {
|
||||
|
@ -760,6 +762,18 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
|
|||
} else {
|
||||
is_warp_potentially_bigger = true;
|
||||
}
|
||||
if (has_ext_shader_atomic_int64) {
|
||||
VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
|
||||
atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
|
||||
atomic_int64.pNext = nullptr;
|
||||
features.pNext = &atomic_int64;
|
||||
physical.GetFeatures2KHR(features);
|
||||
|
||||
if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) {
|
||||
extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
|
||||
ext_shader_atomic_int64 = true;
|
||||
}
|
||||
}
|
||||
if (has_ext_transform_feedback) {
|
||||
VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
|
||||
tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
|
||||
|
|
|
@ -229,6 +229,11 @@ public:
|
|||
return ext_shader_stencil_export;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_shader_atomic_int64.
|
||||
bool IsExtShaderAtomicInt64Supported() const {
|
||||
return ext_shader_atomic_int64;
|
||||
}
|
||||
|
||||
/// Returns true when a known debugging tool is attached.
|
||||
bool HasDebuggingToolAttached() const {
|
||||
return has_renderdoc || has_nsight_graphics;
|
||||
|
@ -320,6 +325,7 @@ private:
|
|||
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
|
||||
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
|
||||
bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
|
||||
bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64.
|
||||
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
|
||||
bool has_renderdoc{}; ///< Has RenderDoc attached
|
||||
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
|
||||
|
|
Loading…
Reference in a new issue