Fix SIMD recognition at least with GCC targets. Still need to test with MSVC

This commit is contained in:
Pavel Krajcevski 2012-08-25 12:58:20 -04:00
parent efdca4b5bb
commit d68a119bc9
4 changed files with 48 additions and 19 deletions

View file

@ -1,24 +1,44 @@
INCLUDE_DIRECTORIES(${TexC_SOURCE_DIR}/BPTCEncoder/include) INCLUDE_DIRECTORIES(${TexC_SOURCE_DIR}/BPTCEncoder/include)
INCLUDE_DIRECTORIES(${TexC_BINARY_DIR}/BPTCEncoder/include)
INCLUDE(CheckCXXSourceCompiles) INCLUDE(CheckCXXSourceRuns)
SET(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) SET(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
IF(CMAKE_COMPILER_IS_GNUCC) IF(CMAKE_COMPILER_IS_GNUCC)
SET(CMAKE_REQUIRED_FLAGS -msse4.1) SET(CMAKE_REQUIRED_FLAGS -msse4.1 -E)
CHECK_CXX_SOURCE_COMPILES("config/testsse4.1.cpp" HAS_SSE_41) CHECK_CXX_SOURCE_RUNS("
#include <smmintrin.h>
int main() {
const __m128 fv = _mm_set1_ps(1.0f);
const __m128 fv2 = _mm_set1_ps(2.0f);
const __m128 ans = _mm_blend_ps(fv, fv2, 2);
return ((int *)(&ans))[0];
}"
HAS_SSE_41
)
IF(HAS_SSE_41) IF(HAS_SSE_41)
SET(CMAKE_REQUIRED_FLAGS -msse4.2) SET(CMAKE_REQUIRED_FLAGS -msse4.2)
CHECK_CXX_SOURCE_COMPILES("config/testsse4.2.cpp" HAS_SSE_POPCNT) CHECK_CXX_SOURCE_RUNS("
#include <smmintrin.h>
int main() {
const unsigned int testMe = 5;
return _mm_popcnt_u32(testMe);
}"
HAS_SSE_POPCNT
)
ENDIF(HAS_SSE_41) ENDIF(HAS_SSE_41)
ELSEIF(MSVC) ELSEIF(MSVC)
#!FIXME!
ENDIF() ENDIF()
SET(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) SET(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
CONFIGURE_FILE( CONFIGURE_FILE(
"config/BC7Config.h.in" "config/BC7Config.h.in"
"src/BC7Config.h" "include/BC7Config.h"
) )
IF(CMAKE_COMPILER_IS_GNUCC) IF(CMAKE_COMPILER_IS_GNUCC)
@ -38,6 +58,21 @@ SET( SOURCES
) )
IF( HAS_SSE_41 ) IF( HAS_SSE_41 )
IF ( HAS_SSE_POPCNT )
IF( MSVC )
ADD_DEFINITIONS( /arch:SSE4.2 )
ELSE() #Assume GCC
ADD_DEFINITIONS( -msse4.2 )
ENDIF()
ELSE()
IF( MSVC )
ADD_DEFINITIONS( /arch:SSE4.1 )
ELSE() #Assume GCC
ADD_DEFINITIONS( -msse4.1 )
ENDIF()
ENDIF()
SET( HEADERS SET( HEADERS
${HEADERS} ${HEADERS}
src/RGBAEndpointsSIMD.h src/RGBAEndpointsSIMD.h
@ -52,6 +87,6 @@ IF( HAS_SSE_41 )
ENDIF( HAS_SSE_41 ) ENDIF( HAS_SSE_41 )
ADD_LIBRARY( BPTCEncoder ADD_LIBRARY( BPTCEncoder
${HEADERS}
${SOURCES} ${SOURCES}
${SIMD_SOURCES}
) )

View file

@ -5,4 +5,4 @@
// explicitly by the CMake build process. // explicitly by the CMake build process.
// Do we have the proper popcnt instruction defined? // Do we have the proper popcnt instruction defined?
#define HAS_SSE_POPCNT @HAS_SSE_POPCNT@ #cmakedefine HAS_SSE_POPCNT

View file

@ -1,10 +0,0 @@
#include <smmintrin.h>
int main() {
const __m128 fv = _mm_set1_ps(1.0f);
const __m128 fv2 = _mm_set1_ps(2.0f);
const __m128 ans = _mm_blend_ps(fv, fv2, 2);
return ((int *)(&ans))[0];
}

View file

@ -119,7 +119,7 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const {
// !SPEED! We should figure out a way to get rid of these scalar operations. // !SPEED! We should figure out a way to get rid of these scalar operations.
#ifdef HAS_SSE_POPCNT #ifdef HAS_SSE_POPCNT
const uint32 prec = _mm_popcnt32(((uint32 *)(&qmask))[0]); const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
#else #else
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]); const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
#endif #endif
@ -160,7 +160,7 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const {
// !SPEED! We should figure out a way to get rid of these scalar operations. // !SPEED! We should figure out a way to get rid of these scalar operations.
#ifdef HAS_SSE_POPCNT #ifdef HAS_SSE_POPCNT
const uint32 prec = _mm_popcnt32(((uint32 *)(&qmask))[0]); const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]);
#else #else
const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]); const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]);
#endif #endif
@ -283,7 +283,11 @@ float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVector
// nBuckets should be a power of two. // nBuckets should be a power of two.
assert(!(nBuckets & (nBuckets - 1))); assert(!(nBuckets & (nBuckets - 1)));
#ifdef HAS_SSE_POPCNT
const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF); const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF);
#else
const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF);
#endif
assert(indexPrec >= 2 && indexPrec <= 4); assert(indexPrec >= 2 && indexPrec <= 4);
typedef __m128i tInterpPair[2]; typedef __m128i tInterpPair[2];