diff --git a/Core/src/ThreadGroup.cpp b/Core/src/ThreadGroup.cpp index 12a2aec..83c4bd1 100644 --- a/Core/src/ThreadGroup.cpp +++ b/Core/src/ThreadGroup.cpp @@ -1,4 +1,5 @@ #include "ThreadGroup.h" +#include "BC7Compressor.h" #include #include @@ -22,18 +23,35 @@ void CmpThread::operator()() { } m_Barrier->wait(); + + (*m_CmpFunc)(m_InBuf, m_OutBuf, m_Width, m_Height); } -ThreadGroup::ThreadGroup( int numThreads, const ImageFile &, CompressionFunc func, unsigned char *outBuf ) +ThreadGroup::ThreadGroup( int numThreads, const ImageFile &image, CompressionFunc func, unsigned char *outBuf ) : m_Barrier(new boost::barrier(numThreads)) , m_NumThreads(numThreads) , m_ActiveThreads(0) -{ } + , m_Func(func) + , m_Image(image) + , m_OutBuf(outBuf) +{ + for(int i = 0; i < kMaxNumThreads; i++) + m_Threads[i].m_Barrier = m_Barrier; +} ThreadGroup::~ThreadGroup() { delete m_Barrier; } +unsigned int ThreadGroup::GetCompressedBlockSize() { + if(m_Func == BC7C::CompressImageBC7) return 16; + if(m_Func == BC7C::CompressImageBC7SIMD) return 16; +} + +unsigned int ThreadGroup::GetUncompressedBlockSize() { + if(m_Func == BC7C::CompressImageBC7) return 64; + if(m_Func == BC7C::CompressImageBC7SIMD) return 64; +} void ThreadGroup::Start() { @@ -42,12 +60,36 @@ void ThreadGroup::Start() { return; } + // Make sure that the image dimensions are multiples of 4 + assert((m_Image.GetWidth() & 3) == 0); + assert((m_Image.GetHeight() & 3) == 0); + + // We can assume that the image data is in block stream order + // so, the size of the data given to each thread will be (nb*4)x4 + int numBlocks = (m_Image.GetWidth() * m_Image.GetHeight()) / 16; + + int blocksProcessed = 0; + int blocksPerThread = (numBlocks/m_NumThreads) + ((numBlocks % m_NumThreads)? 1 : 0); + for(int i = 0; i < m_NumThreads; i++) { if(m_ActiveThreads >= kMaxNumThreads) break; + int numBlocksThisThread = blocksPerThread; + if(blocksProcessed + numBlocksThisThread > numBlocks) { + numBlocksThisThread = numBlocks - blocksProcessed; + } + CmpThread &t = m_Threads[m_ActiveThreads]; + t.m_Height = 4; + t.m_Width = numBlocksThisThread * 4; + t.m_CmpFunc = m_Func; + t.m_OutBuf = m_OutBuf + (blocksProcessed * GetCompressedBlockSize()); + t.m_InBuf = m_Image.GetPixels() + (blocksProcessed * GetUncompressedBlockSize()); + + blocksProcessed += numBlocksThisThread; + m_ThreadHandles[m_ActiveThreads] = new boost::thread(t); m_ActiveThreads++; diff --git a/Core/src/ThreadGroup.h b/Core/src/ThreadGroup.h index a0605bc..bb2a5ce 100644 --- a/Core/src/ThreadGroup.h +++ b/Core/src/ThreadGroup.h @@ -42,7 +42,7 @@ class ThreadGroup { const StopWatch &GetStopWatch() const { return m_StopWatch; } private: - boost::barrier *m_Barrier; + boost::barrier *const m_Barrier; static const int kMaxNumThreads = 256; const int m_NumThreads; @@ -52,6 +52,14 @@ class ThreadGroup { CmpThread m_Threads[kMaxNumThreads]; boost::thread *m_ThreadHandles[kMaxNumThreads]; + // State variables. + const ImageFile &m_Image; + const CompressionFunc m_Func; + unsigned char *m_OutBuf; + + unsigned int GetCompressedBlockSize(); + unsigned int GetUncompressedBlockSize(); + StopWatch m_StopWatch; };