diff --git a/BPTCEncoder/include/BC7Compressor.h b/BPTCEncoder/include/BC7Compressor.h index a80ca66..656979b 100755 --- a/BPTCEncoder/include/BC7Compressor.h +++ b/BPTCEncoder/include/BC7Compressor.h @@ -64,6 +64,7 @@ //-------------------------------------------------------------------------------------- #include "BC7Config.h" +#include "CompressionJob.h" class BlockStatManager; @@ -124,15 +125,10 @@ namespace BC7C #endif #ifdef HAS_ATOMICS - // This is a threadsafe version of the compression function. Once it is called on a certain block of data, it will - // compress the entire amount of data. However, if the function is called multiple times from multiple threads then they - // will all dispatch to compress the data that they can and the one that finishes the compression resets the function. - // - // The function should be used as follows: - // for(int i = 0; i < NTHREADS; i++) { - // startThread(function, args); - // join_threads(); - void CompressImageBC7Atomic(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height); + // This is a threadsafe version of the compression function that is designed to compress a list of + // textures. If this function is called with the same argument from multiple threads, they will work + // together to compress all of the images in the list. + void CompressAtomic(CompressionJobList &); #endif // Decompress the image given as BC7 data to R8G8B8A8 format. Width and Height are the dimensions of the image in pixels. diff --git a/BPTCEncoder/src/BC7Compressor.cpp b/BPTCEncoder/src/BC7Compressor.cpp index 8a76a9b..b7ff093 100755 --- a/BPTCEncoder/src/BC7Compressor.cpp +++ b/BPTCEncoder/src/BC7Compressor.cpp @@ -91,13 +91,6 @@ #include #include -#ifdef _MSC_VER -# define ALIGN(x) __declspec( align(x) ) -#else -# define ALIGN(x) __attribute__((aligned(x))) -#endif -#define ALIGN_SSE ALIGN(16) - // #define USE_PCA_FOR_SHAPE_ESTIMATION enum EBlockStats { @@ -1424,6 +1417,8 @@ namespace BC7C static int gQualityLevel = 50; void SetQualityLevel(int q) { gQualityLevel = std::max(0, q); + const int kMaxIters = BC7CompressionMode::kMaxAnnealingIterations; + BC7CompressionMode::MaxAnnealingIterations = std::min(kMaxIters, GetQualityLevel()); } int GetQualityLevel() { return gQualityLevel; } @@ -1556,72 +1551,33 @@ namespace BC7C #endif // Variables used for synchronization in threadsafe implementation. - static ALIGN(32) uint32 _currentBlock = 0; - static ALIGN(32) uint32 _initialized = 0; - static const unsigned char *_inBuf; - static unsigned char *_outBuf; - static bool _initializedFlag = false; + void CompressAtomic(CompressionJobList &cjl) { + + uint32 jobIdx; + while((jobIdx = cjl.m_CurrentJobIndex) < cjl.GetNumJobs()) { - void CompressImageBC7Atomic( - const unsigned char *inBuf, - unsigned char *outBuf, - unsigned int width, - unsigned int height - ) { + // !HACK! ... Microsoft has this defined + #undef GetJob - bool myData = false; - while(!myData) { - - // Have we initialized any data? - if(!TestAndSet(&_initialized)) { - - // I'm the first one here... initialize MY data... - - const int kMaxIters = BC7CompressionMode::kMaxAnnealingIterations; - BC7CompressionMode::MaxAnnealingIterations = std::min(kMaxIters, GetQualityLevel()); - - _currentBlock = 0; - - _inBuf = inBuf; - _outBuf = outBuf; - myData = true; - - _initializedFlag = true; - } - - // We've initialized data... is it mine? - else if(_inBuf == inBuf && _outBuf == outBuf) { - myData = true; - } - - const uint32 nBlocks = (height * width) / 16; - - // Make sure that whoever is initializing data is working on it... - while(!_initializedFlag && _currentBlock < nBlocks) { - YieldThread(); - } + const CompressionJob *cj = cjl.GetJob(jobIdx); + const uint32 nBlocks = (cj->height * cj->width) / 16; // Help finish whatever texture we're compressing before we start again on my work... uint32 blockIdx; - while((blockIdx = FetchAndAdd(&_currentBlock)) < nBlocks) { - unsigned char *out = _outBuf + (16 * blockIdx); - const unsigned char *in = _inBuf + (64 * blockIdx); + while((blockIdx = FetchAndAdd(&(cjl.m_CurrentBlockIndex))) < nBlocks) { + unsigned char *out = cj->outBuf + (16 * blockIdx); + const unsigned char *in = cj->inBuf + (64 * blockIdx); CompressBC7Block((const uint32 *)in, out); - YieldThread(); // Just to give other threads a chance to make some progress } - // If we've allocated someone to compress the last block, then reset the initialization... - if(blockIdx == nBlocks) { - _initializedFlag = false; - ResetTestAndSet(&_initialized); - } - else if(blockIdx > nBlocks) { - // Wait for last block to finish.. - while(_initialized) { - YieldThread(); - } + if(TestAndSet(cjl.GetFinishedFlag(jobIdx))) { + cjl.m_CurrentJobIndex++; + cjl.m_CurrentBlockIndex = 0; } + + // Wait until this texture finishes. + while(cjl.m_CurrentJobIndex = jobIdx); } } #endif // HAS_ATOMICS diff --git a/Core/include/TexComp.h b/Core/include/TexComp.h index 61c05a6..5784f4a 100644 --- a/Core/include/TexComp.h +++ b/Core/include/TexComp.h @@ -45,10 +45,10 @@ #define _TEX_COMP_H_ #include "CompressedImage.h" +#include "CompressionJob.h" // Forward declarations class ImageFile; -class CompressedImage; class BlockStatManager; struct SCompressionSettings { diff --git a/Core/src/TexComp.cpp b/Core/src/TexComp.cpp index 9434a40..5da1a9c 100644 --- a/Core/src/TexComp.cpp +++ b/Core/src/TexComp.cpp @@ -105,11 +105,6 @@ static CompressionFunc ChooseFuncFromSettings(const SCompressionSettings &s) { return BC7C::CompressImageBC7SIMD; } #endif - -#ifdef HAS_ATOMICS - if(s.bUseAtomics) - return BC7C::CompressImageBC7Atomic; -#endif return BC7C::CompressImageBC7; } break; @@ -162,38 +157,30 @@ static double CompressImageInSerial( } class AtomicThreadUnit : public TCCallable { - const unsigned char *const m_InBuf; - unsigned char *m_OutBuf; - const unsigned int m_Height; - const unsigned int m_Width; + CompressionJobList &m_CompressionJobList; TCBarrier *m_Barrier; - const unsigned int m_NumCompressions; CompressionFunc m_CmpFnc; public: AtomicThreadUnit( - const unsigned char *const inBuf, - unsigned char *outBuf, - const unsigned int height, - const unsigned int width, + CompressionJobList &_cjl, TCBarrier *barrier, - const unsigned int nCompressions, CompressionFunc f ) : TCCallable(), - m_InBuf(inBuf), - m_OutBuf(outBuf), - m_Height(height), - m_Width(width), + m_CompressionJobList(_cjl), m_Barrier(barrier), - m_NumCompressions(nCompressions), m_CmpFnc(f) { } virtual ~AtomicThreadUnit() { } virtual void operator()() { m_Barrier->Wait(); - for(uint32 i = 0; i < m_NumCompressions; i++) - (*m_CmpFnc)(m_InBuf, m_OutBuf, m_Width, m_Height); + if(m_CmpFnc == BC7C::Compress) { + BC7C::CompressAtomic(m_CompressionJobList); + } + else { + assert(!"I don't know what we're compressing..."); + } } }; @@ -205,23 +192,35 @@ static double CompressImageWithAtomics( ) { CompressionFunc f = ChooseFuncFromSettings(settings); + // Setup compression list... const int nTimes = settings.iNumCompressions; + CompressionJobList cjl (nTimes); + for(int i = 0; i < nTimes; i++) { + if(!cjl.AddJob(CompressionJob(imgData, outBuf, height, width))) { + assert(!"Error adding compression job to job list!"); + } + } + const int nThreads = settings.iNumThreads; // Allocate resources... - TCBarrier barrier (nThreads); + TCBarrier barrier (nThreads+1); TCThread **threads = (TCThread **)malloc(nThreads * sizeof(TCThread *)); AtomicThreadUnit **units = (AtomicThreadUnit **)malloc(nThreads * sizeof(AtomicThreadUnit *)); // Launch threads... - StopWatch sw; - sw.Start(); for(int i = 0; i < nThreads; i++) { - AtomicThreadUnit *u = new AtomicThreadUnit(imgData, outBuf, height, width, &barrier, nTimes, f); + AtomicThreadUnit *u = new AtomicThreadUnit(cjl, &barrier, f); threads[i] = new TCThread(*u); units[i] = u; } + // Wait here to make sure that our timer is correct... + barrier.Wait(); + + StopWatch sw; + sw.Start(); + // Wait for threads to finish for(int i = 0; i < nThreads; i++) { threads[i]->Join();