mirror of
https://github.com/yuzu-emu/FasTC.git
synced 2025-01-23 19:21:09 +00:00
Fix SIMD recognition at least with GCC targets. Still need to test with MSVC
This commit is contained in:
parent
efdca4b5bb
commit
d68a119bc9
|
@ -1,24 +1,44 @@
|
||||||
INCLUDE_DIRECTORIES(${TexC_SOURCE_DIR}/BPTCEncoder/include)
|
INCLUDE_DIRECTORIES(${TexC_SOURCE_DIR}/BPTCEncoder/include)
|
||||||
|
INCLUDE_DIRECTORIES(${TexC_BINARY_DIR}/BPTCEncoder/include)
|
||||||
|
|
||||||
INCLUDE(CheckCXXSourceCompiles)
|
INCLUDE(CheckCXXSourceRuns)
|
||||||
|
|
||||||
SET(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
SET(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||||
IF(CMAKE_COMPILER_IS_GNUCC)
|
IF(CMAKE_COMPILER_IS_GNUCC)
|
||||||
SET(CMAKE_REQUIRED_FLAGS -msse4.1)
|
SET(CMAKE_REQUIRED_FLAGS -msse4.1 -E)
|
||||||
CHECK_CXX_SOURCE_COMPILES("config/testsse4.1.cpp" HAS_SSE_41)
|
CHECK_CXX_SOURCE_RUNS("
|
||||||
|
#include <smmintrin.h>
|
||||||
|
int main() {
|
||||||
|
const __m128 fv = _mm_set1_ps(1.0f);
|
||||||
|
const __m128 fv2 = _mm_set1_ps(2.0f);
|
||||||
|
|
||||||
|
const __m128 ans = _mm_blend_ps(fv, fv2, 2);
|
||||||
|
|
||||||
|
return ((int *)(&ans))[0];
|
||||||
|
}"
|
||||||
|
HAS_SSE_41
|
||||||
|
)
|
||||||
|
|
||||||
IF(HAS_SSE_41)
|
IF(HAS_SSE_41)
|
||||||
SET(CMAKE_REQUIRED_FLAGS -msse4.2)
|
SET(CMAKE_REQUIRED_FLAGS -msse4.2)
|
||||||
CHECK_CXX_SOURCE_COMPILES("config/testsse4.2.cpp" HAS_SSE_POPCNT)
|
CHECK_CXX_SOURCE_RUNS("
|
||||||
|
#include <smmintrin.h>
|
||||||
|
int main() {
|
||||||
|
const unsigned int testMe = 5;
|
||||||
|
return _mm_popcnt_u32(testMe);
|
||||||
|
}"
|
||||||
|
HAS_SSE_POPCNT
|
||||||
|
)
|
||||||
ENDIF(HAS_SSE_41)
|
ENDIF(HAS_SSE_41)
|
||||||
|
|
||||||
ELSEIF(MSVC)
|
ELSEIF(MSVC)
|
||||||
|
#!FIXME!
|
||||||
ENDIF()
|
ENDIF()
|
||||||
SET(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
|
SET(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
|
||||||
|
|
||||||
CONFIGURE_FILE(
|
CONFIGURE_FILE(
|
||||||
"config/BC7Config.h.in"
|
"config/BC7Config.h.in"
|
||||||
"src/BC7Config.h"
|
"include/BC7Config.h"
|
||||||
)
|
)
|
||||||
|
|
||||||
IF(CMAKE_COMPILER_IS_GNUCC)
|
IF(CMAKE_COMPILER_IS_GNUCC)
|
||||||
|
@ -38,6 +58,21 @@ SET( SOURCES
|
||||||
)
|
)
|
||||||
|
|
||||||
IF( HAS_SSE_41 )
|
IF( HAS_SSE_41 )
|
||||||
|
|
||||||
|
IF ( HAS_SSE_POPCNT )
|
||||||
|
IF( MSVC )
|
||||||
|
ADD_DEFINITIONS( /arch:SSE4.2 )
|
||||||
|
ELSE() #Assume GCC
|
||||||
|
ADD_DEFINITIONS( -msse4.2 )
|
||||||
|
ENDIF()
|
||||||
|
ELSE()
|
||||||
|
IF( MSVC )
|
||||||
|
ADD_DEFINITIONS( /arch:SSE4.1 )
|
||||||
|
ELSE() #Assume GCC
|
||||||
|
ADD_DEFINITIONS( -msse4.1 )
|
||||||
|
ENDIF()
|
||||||
|
ENDIF()
|
||||||
|
|
||||||
SET( HEADERS
|
SET( HEADERS
|
||||||
${HEADERS}
|
${HEADERS}
|
||||||
src/RGBAEndpointsSIMD.h
|
src/RGBAEndpointsSIMD.h
|
||||||
|
@ -52,6 +87,6 @@ IF( HAS_SSE_41 )
|
||||||
ENDIF( HAS_SSE_41 )
|
ENDIF( HAS_SSE_41 )
|
||||||
|
|
||||||
ADD_LIBRARY( BPTCEncoder
|
ADD_LIBRARY( BPTCEncoder
|
||||||
|
${HEADERS}
|
||||||
${SOURCES}
|
${SOURCES}
|
||||||
${SIMD_SOURCES}
|
|
||||||
)
|
)
|
||||||
|
|
|
@ -5,4 +5,4 @@
|
||||||
// explicitly by the CMake build process.
|
// explicitly by the CMake build process.
|
||||||
|
|
||||||
// Do we have the proper popcnt instruction defined?
|
// Do we have the proper popcnt instruction defined?
|
||||||
#define HAS_SSE_POPCNT @HAS_SSE_POPCNT@
|
#cmakedefine HAS_SSE_POPCNT
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
#include <smmintrin.h>
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
const __m128 fv = _mm_set1_ps(1.0f);
|
|
||||||
const __m128 fv2 = _mm_set1_ps(2.0f);
|
|
||||||
|
|
||||||
const __m128 ans = _mm_blend_ps(fv, fv2, 2);
|
|
||||||
|
|
||||||
return ((int *)(&ans))[0];
|
|
||||||
}
|
|
|
@ -119,7 +119,7 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const {
|
||||||
|
|
||||||
// !SPEED! We should figure out a way to get rid of these scalar operations.
|
// !SPEED! We should figure out a way to get rid of these scalar operations.
|
||||||
#ifdef HAS_SSE_POPCNT
|
#ifdef HAS_SSE_POPCNT
|
||||||
const uint32 prec = _mm_popcnt32(((uint32 *)(&qmask))[0]);
|
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
|
||||||
#else
|
#else
|
||||||
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
|
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
|
||||||
#endif
|
#endif
|
||||||
|
@ -160,7 +160,7 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
|
||||||
|
|
||||||
// !SPEED! We should figure out a way to get rid of these scalar operations.
|
// !SPEED! We should figure out a way to get rid of these scalar operations.
|
||||||
#ifdef HAS_SSE_POPCNT
|
#ifdef HAS_SSE_POPCNT
|
||||||
const uint32 prec = _mm_popcnt32(((uint32 *)(&qmask))[0]);
|
const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
|
||||||
#else
|
#else
|
||||||
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
|
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
|
||||||
#endif
|
#endif
|
||||||
|
@ -283,7 +283,11 @@ float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVector
|
||||||
// nBuckets should be a power of two.
|
// nBuckets should be a power of two.
|
||||||
assert(!(nBuckets & (nBuckets - 1)));
|
assert(!(nBuckets & (nBuckets - 1)));
|
||||||
|
|
||||||
|
#ifdef HAS_SSE_POPCNT
|
||||||
const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
|
const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
|
||||||
|
#else
|
||||||
|
const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF);
|
||||||
|
#endif
|
||||||
assert(indexPrec >= 2 && indexPrec <= 4);
|
assert(indexPrec >= 2 && indexPrec <= 4);
|
||||||
|
|
||||||
typedef __m128i tInterpPair[2];
|
typedef __m128i tInterpPair[2];
|
||||||
|
|
Loading…
Reference in a new issue