diff --git a/BPTCEncoder/CMakeLists.txt b/BPTCEncoder/CMakeLists.txt index c053b96..de660e7 100644 --- a/BPTCEncoder/CMakeLists.txt +++ b/BPTCEncoder/CMakeLists.txt @@ -1,24 +1,44 @@ INCLUDE_DIRECTORIES(${TexC_SOURCE_DIR}/BPTCEncoder/include) +INCLUDE_DIRECTORIES(${TexC_BINARY_DIR}/BPTCEncoder/include) -INCLUDE(CheckCXXSourceCompiles) +INCLUDE(CheckCXXSourceRuns) SET(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) IF(CMAKE_COMPILER_IS_GNUCC) - SET(CMAKE_REQUIRED_FLAGS -msse4.1) - CHECK_CXX_SOURCE_COMPILES("config/testsse4.1.cpp" HAS_SSE_41) + SET(CMAKE_REQUIRED_FLAGS -msse4.1 -E) + CHECK_CXX_SOURCE_RUNS(" + #include + int main() { + const __m128 fv = _mm_set1_ps(1.0f); + const __m128 fv2 = _mm_set1_ps(2.0f); + + const __m128 ans = _mm_blend_ps(fv, fv2, 2); + + return ((int *)(&ans))[0]; + }" + HAS_SSE_41 + ) IF(HAS_SSE_41) SET(CMAKE_REQUIRED_FLAGS -msse4.2) - CHECK_CXX_SOURCE_COMPILES("config/testsse4.2.cpp" HAS_SSE_POPCNT) + CHECK_CXX_SOURCE_RUNS(" + #include + int main() { + const unsigned int testMe = 5; + return _mm_popcnt_u32(testMe); + }" + HAS_SSE_POPCNT + ) ENDIF(HAS_SSE_41) ELSEIF(MSVC) +#!FIXME! ENDIF() SET(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) CONFIGURE_FILE( "config/BC7Config.h.in" - "src/BC7Config.h" + "include/BC7Config.h" ) IF(CMAKE_COMPILER_IS_GNUCC) @@ -38,6 +58,21 @@ SET( SOURCES ) IF( HAS_SSE_41 ) + + IF ( HAS_SSE_POPCNT ) + IF( MSVC ) + ADD_DEFINITIONS( /arch:SSE4.2 ) + ELSE() #Assume GCC + ADD_DEFINITIONS( -msse4.2 ) + ENDIF() + ELSE() + IF( MSVC ) + ADD_DEFINITIONS( /arch:SSE4.1 ) + ELSE() #Assume GCC + ADD_DEFINITIONS( -msse4.1 ) + ENDIF() + ENDIF() + SET( HEADERS ${HEADERS} src/RGBAEndpointsSIMD.h @@ -52,6 +87,6 @@ IF( HAS_SSE_41 ) ENDIF( HAS_SSE_41 ) ADD_LIBRARY( BPTCEncoder + ${HEADERS} ${SOURCES} - ${SIMD_SOURCES} ) diff --git a/BPTCEncoder/config/BC7Config.h.in b/BPTCEncoder/config/BC7Config.h.in index 158812d..599835a 100644 --- a/BPTCEncoder/config/BC7Config.h.in +++ b/BPTCEncoder/config/BC7Config.h.in @@ -5,4 +5,4 @@ // explicitly by the CMake build process. // Do we have the proper popcnt instruction defined? -#define HAS_SSE_POPCNT @HAS_SSE_POPCNT@ +#cmakedefine HAS_SSE_POPCNT diff --git a/BPTCEncoder/config/testsse4.1.cpp b/BPTCEncoder/config/testsse4.1.cpp deleted file mode 100644 index 2792820..0000000 --- a/BPTCEncoder/config/testsse4.1.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include - -int main() { - const __m128 fv = _mm_set1_ps(1.0f); - const __m128 fv2 = _mm_set1_ps(2.0f); - - const __m128 ans = _mm_blend_ps(fv, fv2, 2); - - return ((int *)(&ans))[0]; -} diff --git a/BPTCEncoder/src/RGBAEndpointsSIMD.cpp b/BPTCEncoder/src/RGBAEndpointsSIMD.cpp index 7625bee..efe16ff 100755 --- a/BPTCEncoder/src/RGBAEndpointsSIMD.cpp +++ b/BPTCEncoder/src/RGBAEndpointsSIMD.cpp @@ -119,7 +119,7 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask) const { // !SPEED! We should figure out a way to get rid of these scalar operations. #ifdef HAS_SSE_POPCNT - const uint32 prec = _mm_popcnt32(((uint32 *)(&qmask))[0]); + const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]); #else const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]); #endif @@ -160,7 +160,7 @@ __m128i RGBAVectorSIMD::ToPixel(const __m128i &qmask, const int pBit) const { // !SPEED! We should figure out a way to get rid of these scalar operations. #ifdef HAS_SSE_POPCNT - const uint32 prec = _mm_popcnt32(((uint32 *)(&qmask))[0]); + const uint32 prec = _mm_popcnt_u32(((uint32 *)(&qmask))[0]); #else const uint32 prec = popcnt32(((uint32 *)(&qmask))[0]); #endif @@ -283,7 +283,11 @@ float RGBAClusterSIMD::QuantizedError(const RGBAVectorSIMD &p1, const RGBAVector // nBuckets should be a power of two. assert(!(nBuckets & (nBuckets - 1))); +#ifdef HAS_SSE_POPCNT const uint8 indexPrec = 8-_mm_popcnt_u32(~(nBuckets - 1) & 0xFF); +#else + const uint8 indexPrec = 8-popcnt32(~(nBuckets - 1) & 0xFF); +#endif assert(indexPrec >= 2 && indexPrec <= 4); typedef __m128i tInterpPair[2];