mirror of
https://github.com/yuzu-emu/FasTC.git
synced 2025-01-07 01:25:36 +00:00
Fix SIMD recognition at least with GCC targets. Still need to test with MSVC
This commit is contained in:
parent
efdca4b5bb
commit
d68a119bc9
|
@ -1,24 +1,44 @@
|
|||
INCLUDE_DIRECTORIES(${TexC_SOURCE_DIR}/BPTCEncoder/include)
|
||||
INCLUDE_DIRECTORIES(${TexC_BINARY_DIR}/BPTCEncoder/include)
|
||||
|
||||
INCLUDE(CheckCXXSourceCompiles)
|
||||
INCLUDE(CheckCXXSourceRuns)
|
||||
|
||||
SET(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
IF(CMAKE_COMPILER_IS_GNUCC)
|
||||
SET(CMAKE_REQUIRED_FLAGS -msse4.1)
|
||||
CHECK_CXX_SOURCE_COMPILES("config/testsse4.1.cpp" HAS_SSE_41)
|
||||
SET(CMAKE_REQUIRED_FLAGS -msse4.1 -E)
|
||||
CHECK_CXX_SOURCE_RUNS("
|
||||
#include <smmintrin.h>
|
||||
int main() {
|
||||
const __m128 fv = _mm_set1_ps(1.0f);
|
||||
const __m128 fv2 = _mm_set1_ps(2.0f);
|
||||
|
||||
const __m128 ans = _mm_blend_ps(fv, fv2, 2);
|
||||
|
||||
return ((int *)(&ans))[0];
|
||||
}"
|
||||
HAS_SSE_41
|
||||
)
|
||||
|
||||
IF(HAS_SSE_41)
|
||||
SET(CMAKE_REQUIRED_FLAGS -msse4.2)
|
||||
CHECK_CXX_SOURCE_COMPILES("config/testsse4.2.cpp" HAS_SSE_POPCNT)
|
||||
CHECK_CXX_SOURCE_RUNS("
|
||||
#include <smmintrin.h>
|
||||
int main() {
|
||||
const unsigned int testMe = 5;
|
||||
return _mm_popcnt_u32(testMe);
|
||||
}"
|
||||
HAS_SSE_POPCNT
|
||||
)
|
||||
ENDIF(HAS_SSE_41)
|
||||
|
||||
ELSEIF(MSVC)
|
||||
#!FIXME!
|
||||
ENDIF()
|
||||
SET(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
|
||||
|
||||
CONFIGURE_FILE(
|
||||
"config/BC7Config.h.in"
|
||||
"src/BC7Config.h"
|
||||
"include/BC7Config.h"
|
||||
)
|
||||
|
||||
IF(CMAKE_COMPILER_IS_GNUCC)
|
||||
|
@ -38,6 +58,21 @@ SET( SOURCES
|
|||
)
|
||||
|
||||
IF( HAS_SSE_41 )
|
||||
|
||||
IF ( HAS_SSE_POPCNT )
|
||||
IF( MSVC )
|
||||
ADD_DEFINITIONS( /arch:SSE4.2 )
|
||||
ELSE() #Assume GCC
|
||||
ADD_DEFINITIONS( -msse4.2 )
|
||||
ENDIF()
|
||||
ELSE()
|
||||
IF( MSVC )
|
||||
ADD_DEFINITIONS( /arch:SSE4.1 )
|
||||
ELSE() #Assume GCC
|
||||
ADD_DEFINITIONS( -msse4.1 )
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
SET( HEADERS
|
||||
${HEADERS}
|
||||
src/RGBAEndpointsSIMD.h
|
||||
|
@ -52,6 +87,6 @@ IF( HAS_SSE_41 )
|
|||
ENDIF( HAS_SSE_41 )
|
||||
|
||||
ADD_LIBRARY( BPTCEncoder
|
||||
${HEADERS}
|
||||
${SOURCES}
|
||||
${SIMD_SOURCES}
|
||||
)
|
||||
|
|
|
@ -5,4 +5,4 @@
|
|||
// explicitly by the CMake build process.
|
||||
|
||||
// Do we have the proper popcnt instruction defined?
|
||||
#define HAS_SSE_POPCNT @HAS_SSE_POPCNT@
|
||||
#cmakedefine HAS_SSE_POPCNT
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
#include <smmintrin.h>
|
||||
|
||||
int main() {
|
||||
const __m128 fv = _mm_set1_ps(1.0f);
|
||||
const __m128 fv2 = _mm_set1_ps(2.0f);
|
||||
|
||||
const __m128 ans = _mm_blend_ps(fv, fv2, 2);
|
||||
|
||||
return ((int *)(&ans))[0];
|
||||
}
|
|
@ -119,7 +119,7 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const {
|
|||
|
||||
// !SPEED! We should figure out a way to get rid of these scalar operations.
|
||||
#ifdef HAS_SSE_POPCNT
|
||||
const uint32 prec = _mm_popcnt32(((uint32 *)(&qmask))[0]);
|
||||
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
|
||||
#else
|
||||
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
|
||||
#endif
|
||||
|
@ -160,7 +160,7 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
|
|||
|
||||
// !SPEED! We should figure out a way to get rid of these scalar operations.
|
||||
#ifdef HAS_SSE_POPCNT
|
||||
const uint32 prec = _mm_popcnt32(((uint32 *)(&qmask))[0]);
|
||||
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
|
||||
#else
|
||||
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
|
||||
#endif
|
||||
|
@ -283,7 +283,11 @@ float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVector
|
|||
// nBuckets should be a power of two.
|
||||
assert(!(nBuckets & (nBuckets - 1)));
|
||||
|
||||
#ifdef HAS_SSE_POPCNT
|
||||
const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
|
||||
#else
|
||||
const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF);
|
||||
#endif
|
||||
assert(indexPrec >= 2 && indexPrec <= 4);
|
||||
|
||||
typedef __m128i tInterpPair[2];
|
||||
|
|
Loading…
Reference in a new issue