From 2d7e73c5c41a251553800a45a8b43edea54cea1a Mon Sep 17 00:00:00 2001 From: citrabot Date: Sat, 24 Feb 2024 01:30:20 +0000 Subject: [PATCH] Merge PR 7372 --- src/tests/CMakeLists.txt | 1 + src/tests/video_core/pica_float.cpp | 30 ++++++++ .../video_core/shader/shader_jit_compiler.cpp | 10 ++- src/video_core/pica_types.h | 75 +++++++++++++++++-- .../renderer_software/sw_rasterizer.cpp | 21 +++--- 5 files changed, 115 insertions(+), 22 deletions(-) create mode 100644 src/tests/video_core/pica_float.cpp diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index baddc1ada..f5aa9a4a8 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -14,6 +14,7 @@ add_executable(tests audio_core/audio_fixures.h audio_core/decoder_tests.cpp video_core/shader/shader_jit_compiler.cpp + video_core/pica_float.cpp audio_core/merryhime_3ds_audio/merry_audio/merry_audio.cpp audio_core/merryhime_3ds_audio/merry_audio/merry_audio.h audio_core/merryhime_3ds_audio/merry_audio/service_fixture.cpp diff --git a/src/tests/video_core/pica_float.cpp b/src/tests/video_core/pica_float.cpp new file mode 100644 index 000000000..394420c99 --- /dev/null +++ b/src/tests/video_core/pica_float.cpp @@ -0,0 +1,30 @@ +// Copyright 2024 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include "video_core/pica_types.h" + +using Pica::f24; + +TEST_CASE("Infinities", "[video_core][pica_float]") { + REQUIRE(std::isinf(f24::FromFloat32(INFINITY).ToFloat32())); + REQUIRE(std::isinf(f24::FromFloat32(1.e20f).ToFloat32())); + REQUIRE(std::isinf(f24::FromFloat32(-1.e20f).ToFloat32())); +} + +TEST_CASE("Subnormals", "[video_core][pica_float]") { + REQUIRE(f24::FromFloat32(1e-20f).ToFloat32() == 0.f); +} + +TEST_CASE("NaN", "[video_core][pica_float]") { + const auto inf = f24::FromFloat32(INFINITY); + const auto nan = f24::FromFloat32(NAN); + + REQUIRE(std::isnan(nan.ToFloat32())); + REQUIRE(std::isnan((nan * f24::Zero()).ToFloat32())); + REQUIRE(std::isnan((inf - inf).ToFloat32())); + REQUIRE((inf * f24::Zero()).ToFloat32() == 0.f); +} diff --git a/src/tests/video_core/shader/shader_jit_compiler.cpp b/src/tests/video_core/shader/shader_jit_compiler.cpp index 01698a2e1..579bf7e99 100644 --- a/src/tests/video_core/shader/shader_jit_compiler.cpp +++ b/src/tests/video_core/shader/shader_jit_compiler.cpp @@ -260,7 +260,7 @@ TEST_CASE("LG2", "[video_core][shader][shader_jit]") { REQUIRE(std::isinf(shader.Run(0.f).x)); REQUIRE(shader.Run(4.f).x == Catch::Approx(2.f)); REQUIRE(shader.Run(64.f).x == Catch::Approx(6.f)); - REQUIRE(shader.Run(1.e24f).x == Catch::Approx(79.7262742773f)); + // REQUIRE(std::isinf(shader.Run(INFINITY).x)); } TEST_CASE("EX2", "[video_core][shader][shader_jit]") { @@ -277,8 +277,9 @@ TEST_CASE("EX2", "[video_core][shader][shader_jit]") { REQUIRE(shader.Run(0.f).x == Catch::Approx(1.f)); REQUIRE(shader.Run(2.f).x == Catch::Approx(4.f)); REQUIRE(shader.Run(6.f).x == Catch::Approx(64.f)); - REQUIRE(shader.Run(79.7262742773f).x == Catch::Approx(1.e24f)); REQUIRE(std::isinf(shader.Run(800.f).x)); + // If we respect f24 precision, 2^79 = inf, as 79 > 63 + // REQUIRE(std::isinf(shader.Run(79.7262742773f).x)); } TEST_CASE("MUL", "[video_core][shader][shader_jit]") { @@ -469,7 +470,7 @@ TEST_CASE("Uniform Read", "[video_core][shader][shader_jit]") { const float color = (i * 2.0f) / 255.0f; const auto color_f24 = Pica::f24::FromFloat32(color); shader.shader_setup->uniforms.f[i] = {color_f24, color_f24, color_f24, Pica::f24::One()}; - f_uniforms[i] = {color, color, color, 1.0f}; + f_uniforms[i] = {color_f24.ToFloat32(), color_f24.ToFloat32(), color_f24.ToFloat32(), 1.0f}; } for (u32 i = 0; i < 96; ++i) { @@ -506,7 +507,8 @@ TEST_CASE("Address Register Offset", "[video_core][shader][shader_jit]") { const auto color_f24 = Pica::f24::FromFloat32(color); shader.shader_setup->uniforms.f[i] = {color_f24, color_f24, color_f24, Pica::f24::One()}; - f_uniforms[i] = {color, color, color, 1.f}; + f_uniforms[i] = {color_f24.ToFloat32(), color_f24.ToFloat32(), color_f24.ToFloat32(), + 1.f}; } else if (i >= 0x60 && i < 0x64) { const u8 color = static_cast((i - 0x60) * 0x10); shader.shader_setup->uniforms.i[i - 0x60] = {color, color, color, 255}; diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index 4fd27f9fe..043691e87 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h @@ -4,8 +4,10 @@ #pragma once +#include #include #include +#include #include #include "common/common_types.h" @@ -28,6 +30,41 @@ public: static constexpr Float FromFloat32(float val) { Float ret; ret.value = val; + return Trunc(ret); + } + + static constexpr Float MinNormal() { + Float ret; + // Mininum normal value = 1.0 / (1 << ((1 << (E - 1)) - 2)); + if constexpr (E == 5) { + ret.value = 0x1.p-14; + } else { + // E == 7 + ret.value = (0x1.p-62); + } + return ret; + } + + // these values are approximate, rounded up + static constexpr Float Max() { + Float ret; + if constexpr (E == 5) { + ret.value = 0x1.p16; + } else { + // E == 7 + ret.value = 0x1.p64; + } + return ret; + } + + // before C++23 std::isnormal and std::abs aren't considered constexpr so this function can't be + // used as constexpr until the compilers support that. + static constexpr Float Trunc(const Float& val) { + Float ret = val.Flushed().InfChecked(); + if (std::isnormal(val.ToFloat32())) { + u32 hex = std::bit_cast(ret.ToFloat32()) & (0xffffffff ^ ((1 << (23 - M)) - 1)); + ret.value = std::bit_cast(hex); + } return ret; } @@ -50,17 +87,21 @@ public: hex = sign; } - std::memcpy(&res.value, &hex, sizeof(float)); + res.value = std::bit_cast(hex); return res; } static constexpr Float Zero() { - return FromFloat32(0.f); + Float ret; + ret.value = 0.f; + return ret; } static constexpr Float One() { - return FromFloat32(1.f); + Float ret; + ret.value = 1.f; + return ret; } // Not recommended for anything but logging @@ -68,6 +109,24 @@ public: return value; } + constexpr Float Flushed() const { + Float ret; + ret.value = value; + if (std::abs(value) < MinNormal().ToFloat32()) { + ret.value = 0; + } + return ret; + } + + constexpr Float InfChecked() const { + Float ret; + ret.value = value; + if (std::abs(value) > Max().ToFloat32()) { + ret.value = value * std::numeric_limits::infinity(); + } + return ret; + } + constexpr Float operator*(const Float& flt) const { float result = value * flt.ToFloat32(); // PICA gives 0 instead of NaN when multiplying by inf @@ -95,22 +154,24 @@ public: } constexpr Float& operator/=(const Float& flt) { - value /= flt.ToFloat32(); + value = operator/(flt).value; return *this; } constexpr Float& operator+=(const Float& flt) { - value += flt.ToFloat32(); + value = operator+(flt).value; return *this; } constexpr Float& operator-=(const Float& flt) { - value -= flt.ToFloat32(); + value = operator-(flt).value; return *this; } constexpr Float operator-() const { - return Float::FromFloat32(-ToFloat32()); + Float ret; + ret.value = -value; + return ret; } constexpr bool operator<(const Float& flt) const { diff --git a/src/video_core/renderer_software/sw_rasterizer.cpp b/src/video_core/renderer_software/sw_rasterizer.cpp index 36aa2b03b..d0f42fe7d 100644 --- a/src/video_core/renderer_software/sw_rasterizer.cpp +++ b/src/video_core/renderer_software/sw_rasterizer.cpp @@ -30,7 +30,7 @@ using Pica::Texture::TextureInfo; // negative/positive z values when computing with f32 precision, // causing some vertices to get erroneously clipped. To workaround this problem, // we can use a very small epsilon value for clip plane comparison. -constexpr f32 EPSILON_Z = 0.00000001f; +constexpr f32 EPSILON_Z = 0.f; struct Vertex : Pica::OutputVertex { Vertex(const OutputVertex& v) : OutputVertex(v) {} @@ -125,9 +125,8 @@ void RasterizerSoftware::AddTriangle(const Pica::OutputVertex& v0, const Pica::O auto* input_list = &buffer_b; // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value. - // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest - // epsilon possible within f24 accuracy. - static constexpr f24 EPSILON = f24::FromFloat32(0.00001f); + // TODO: Not sure if this is a valid approach. + static constexpr f24 EPSILON = f24::MinNormal(); static constexpr f24 f0 = f24::Zero(); static constexpr f24 f1 = f24::One(); static constexpr std::array clipping_edges = {{ @@ -287,11 +286,11 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask()); const int bias0 = - IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? -1 : 0; + IsRightSideOrFlatBottomEdge(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) ? 1 : 0; const int bias1 = - IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? -1 : 0; + IsRightSideOrFlatBottomEdge(vtxpos[1].xy(), vtxpos[2].xy(), vtxpos[0].xy()) ? 1 : 0; const int bias2 = - IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? -1 : 0; + IsRightSideOrFlatBottomEdge(vtxpos[2].xy(), vtxpos[0].xy(), vtxpos[1].xy()) ? 1 : 0; const auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); @@ -314,13 +313,13 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con } // Calculate the barycentric coordinates w0, w1 and w2 - const s32 w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); - const s32 w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); - const s32 w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); + const s32 w0 = SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); + const s32 w1 = SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); + const s32 w2 = SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); const s32 wsum = w0 + w1 + w2; // If current pixel is not covered by the current primitive - if (w0 < 0 || w1 < 0 || w2 < 0) { + if (w0 < bias0 || w1 < bias1 || w2 < bias2) { continue; }