From 790df0736f6f95dcd4af9b4de278c53dfe70720d Mon Sep 17 00:00:00 2001 From: Pavel Krajcevski Date: Thu, 1 Nov 2012 18:56:13 -0400 Subject: [PATCH] Make stats collection threadsafe. --- BPTCEncoder/src/BC7CompressionMode.h | 2 - BPTCEncoder/src/BC7Compressor.cpp | 141 ++++++++++++++++----------- CLTool/src/clunix.cpp | 6 -- Core/include/BlockStats.h | 2 + Core/src/BlockStats.cpp | 85 ++++++++-------- Core/src/TexComp.cpp | 78 ++++++++++----- Core/src/ThreadGroup.cpp | 99 +++++++++++++++---- Core/src/ThreadGroup.h | 22 ++++- Core/src/WorkerQueue.cpp | 67 +++++++++---- Core/src/WorkerQueue.h | 16 +++ 10 files changed, 350 insertions(+), 168 deletions(-) diff --git a/BPTCEncoder/src/BC7CompressionMode.h b/BPTCEncoder/src/BC7CompressionMode.h index edf4aa4..d261433 100755 --- a/BPTCEncoder/src/BC7CompressionMode.h +++ b/BPTCEncoder/src/BC7CompressionMode.h @@ -42,8 +42,6 @@ public: explicit BC7CompressionMode(int mode, bool opaque = true) : m_IsOpaque(opaque), m_Attributes(&(kModeAttributes[mode])), m_RotateMode(0), m_IndexMode(0) { } ~BC7CompressionMode() { } - static int NumUses[8]; - static void ResetNumUses() { memset(NumUses, 0, sizeof(NumUses)); } double Compress(BitStream &stream, const int shapeIdx, const RGBACluster *clusters); // This switch controls the quality of the simulated annealing optimizer. We will not make diff --git a/BPTCEncoder/src/BC7Compressor.cpp b/BPTCEncoder/src/BC7Compressor.cpp index 6105ac7..47798ae 100755 --- a/BPTCEncoder/src/BC7Compressor.cpp +++ b/BPTCEncoder/src/BC7Compressor.cpp @@ -286,7 +286,6 @@ const uint32 kBC7InterpolationValues[4][16][2] = { }; int BC7CompressionMode::MaxAnnealingIterations = 50; // This is a setting. -int BC7CompressionMode::NumUses[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; BC7CompressionMode::Attributes BC7CompressionMode::kModeAttributes[kNumModes] = { { 0, 4, 3, 3, 0, 4, 0, false, false, BC7CompressionMode::ePBitType_NotShared }, @@ -1471,11 +1470,6 @@ namespace BC7C stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31); } - static int gModeChosen = -1; - static int gBestMode = -1; - static double gModeEstimate[ BC7CompressionMode::kNumModes ]; - static double gModeError[ BC7CompressionMode::kNumModes ]; - static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]); // Compress an image using BC7 compression. Use the inBuf parameter to point to an image in @@ -1485,7 +1479,6 @@ namespace BC7C void CompressImageBC7(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height) { uint32 block[16]; - BC7CompressionMode::ResetNumUses(); BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel()); for(int j = 0; j < height; j += 4) @@ -1494,7 +1487,6 @@ namespace BC7C { // ExtractBlock(inBuf + i * 4, width, block); CompressBC7Block((const uint32 *)inBuf, outBuf); - BC7CompressionMode::NumUses[gBestMode]++; #ifndef NDEBUG uint8 *block = (uint8 *)outBuf; @@ -1530,7 +1522,6 @@ namespace BC7C BlockStatManager &statManager ) { uint32 block[16]; - BC7CompressionMode::ResetNumUses(); BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel()); for(int j = 0; j < height; j += 4) @@ -1539,7 +1530,6 @@ namespace BC7C { // ExtractBlock(inBuf + i * 4, width, block); CompressBC7Block((const uint32 *)inBuf, outBuf, statManager); - BC7CompressionMode::NumUses[gBestMode]++; #ifndef NDEBUG uint8 *block = (uint8 *)outBuf; @@ -1574,16 +1564,25 @@ namespace BC7C } } - static double CompressTwoClusters(int shapeIdx, const RGBACluster *clusters, uint8 *outBuf, bool opaque) { + static double CompressTwoClusters( + int shapeIdx, + const RGBACluster *clusters, + uint8 *outBuf, + bool opaque, + double *errors = NULL, + int *modeChosen = NULL + ) { uint8 tempBuf1[16]; BitStream tmpStream1(tempBuf1, 128, 0); BC7CompressionMode compressor1(1, opaque); double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters); - gModeError[1] = bestError; + + if(errors) errors[1] = bestError; + if(modeChosen) *modeChosen = 1; + memcpy(outBuf, tempBuf1, 16); - gModeChosen = 1; if(bestError == 0.0) { return 0.0; } @@ -1593,9 +1592,9 @@ namespace BC7C BC7CompressionMode compressor3(3, opaque); double error = compressor3.Compress(tmpStream3, shapeIdx, clusters); - gModeError[3] = error; + if(errors) errors[3] = error; if(error < bestError) { - gModeChosen = 3; + if(modeChosen) *modeChosen = 3; bestError = error; memcpy(outBuf, tempBuf3, 16); if(bestError == 0.0) { @@ -1610,10 +1609,10 @@ namespace BC7C BitStream tmpStream7(tempBuf7, 128, 0); BC7CompressionMode compressor7(7, opaque); error = compressor7.Compress(tmpStream7, shapeIdx, clusters); - gModeError[7] = error; + if(errors) errors[7] = error; if(error < bestError) { - gModeChosen = 7; - memcpy(outBuf, tempBuf7, 16); + if(modeChosen) *modeChosen = 7; + memcpy(outBuf, tempBuf7, 16); return error; } } @@ -1621,8 +1620,14 @@ namespace BC7C return bestError; } - static double CompressThreeClusters(int shapeIdx, const RGBACluster *clusters, uint8 *outBuf, bool opaque) { - + static double CompressThreeClusters( + int shapeIdx, + const RGBACluster *clusters, + uint8 *outBuf, + bool opaque, + double *errors = NULL, + int *modeChosen = NULL + ) { uint8 tempBuf0[16]; BitStream tmpStream0(tempBuf0, 128, 0); @@ -1635,22 +1640,23 @@ namespace BC7C double error, bestError; if(shapeIdx < 16) { bestError = compressor0.Compress(tmpStream0, shapeIdx, clusters); - gModeError[0] = bestError; + if(errors) errors[0] = bestError; } else { bestError = DBL_MAX; - gModeError[0] = -1.0; + if(errors) errors[0] = -1.0; } - gModeChosen = 0; + + if(modeChosen) *modeChosen = 0; memcpy(outBuf, tempBuf0, 16); if(bestError == 0.0) { return 0.0; } error = compressor2.Compress(tmpStream2, shapeIdx, clusters); - gModeError[2] = error; + if(errors) errors[2] = error; if(error < bestError) { - gModeChosen = 2; + if(modeChosen) *modeChosen = 2; memcpy(outBuf, tempBuf2, 16); return error; } @@ -1973,11 +1979,12 @@ namespace BC7C return error; } - static void UpdateErrorEstimate(uint32 mode, double est) { + static void UpdateErrorEstimate(double *estimates, uint32 mode, double est) { + assert(estimates); assert(mode >= 0); assert(mode < BC7CompressionMode::kNumModes); - if(gModeEstimate[mode] == -1.0 || est < gModeEstimate[mode]) { - gModeEstimate[mode] = est; + if(estimates[mode] == -1.0 || est < estimates[mode]) { + estimates[mode] = est; } } @@ -1988,43 +1995,62 @@ namespace BC7C private: uint32 m_BlockIdx; BlockStatManager &m_BSM; + + int *m_ModePtr; + double *m_Estimates; + double *m_Errors; + public: - RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) : m_BlockIdx(blockIdx), m_BSM(m) { } + RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) : m_BlockIdx(blockIdx), m_BSM(m) + , m_ModePtr(NULL), m_Estimates(NULL), m_Errors(NULL) { } + void SetMode(int *modePtr) { m_ModePtr = modePtr; } + void SetEstimates(double *estimates) { m_Estimates = estimates; } + void SetErrors(double *errors) { m_Errors = errors; } + ~RAIIStatSaver() { - BlockStat s (kBlockStatString[eBlockStat_Mode], gBestMode); + assert(m_ModePtr); + assert(m_Estimates); + assert(m_Errors); + + BlockStat s (kBlockStatString[eBlockStat_Mode], *m_ModePtr); m_BSM.AddStat(m_BlockIdx, s); for(int i = 0; i < BC7CompressionMode::kNumModes; i++) { - s = BlockStat(kBlockStatString[eBlockStat_ModeZeroEstimate + i], gModeEstimate[i]); + s = BlockStat(kBlockStatString[eBlockStat_ModeZeroEstimate + i], m_Estimates[i]); m_BSM.AddStat(m_BlockIdx, s); - s = BlockStat(kBlockStatString[eBlockStat_ModeZeroError + i], gModeError[i]); + s = BlockStat(kBlockStatString[eBlockStat_ModeZeroError + i], m_Errors[i]); m_BSM.AddStat(m_BlockIdx, s); } } }; - uint32 blockIdx = 0; + int bestMode = 0; + double modeEstimate[BC7CompressionMode::kNumModes]; + double modeError[BC7CompressionMode::kNumModes]; + // reset global variables... - gBestMode = 0; + bestMode = 0; for(int i = 0; i < BC7CompressionMode::kNumModes; i++){ - gModeError[i] = gModeEstimate[i] = -1.0; + modeError[i] = modeEstimate[i] = -1.0; } - blockIdx = statManager.BeginBlock(); - + uint32 blockIdx = statManager.BeginBlock(); for(int i = 0; i < kNumBlockStats; i++) { statManager.AddStat(blockIdx, BlockStat(kBlockStatString[i], 0)); } RAIIStatSaver __statsaver__(blockIdx, statManager); + __statsaver__.SetMode(&bestMode); + __statsaver__.SetEstimates(modeEstimate); + __statsaver__.SetErrors(modeError); // All a single color? if(AllOneColor(block)) { BitStream bStrm(outBuf, 128, 0); CompressOptimalColorBC7(*block, bStrm); - gBestMode = 5; + bestMode = 5; BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 0); statManager.AddStat(blockIdx, s); @@ -2050,7 +2076,7 @@ namespace BC7C if(transparent) { BitStream bStrm(outBuf, 128, 0); WriteTransparentBlock(bStrm); - gBestMode = 6; + bestMode = 6; BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 1); statManager.AddStat(blockIdx, s); @@ -2065,12 +2091,12 @@ namespace BC7C blockCluster.GetBoundingBox(Min, Max); v = Max - Min; if(v * v == 0) { - gModeEstimate[6] = 0.0; + modeEstimate[6] = 0.0; } else { const float *w = GetErrorMetric(); const double err = 0.0001 + blockCluster.QuantizedError(Min, Max, 4, 0xFEFEFEFE, RGBAVector(w[0], w[1], w[2], w[3])); - UpdateErrorEstimate(6, err); + UpdateErrorEstimate(modeEstimate, 6, err); #ifdef USE_PCA_FOR_SHAPE_ESTIMATION double eigOne = blockCluster.GetPrincipalEigenvalue(); @@ -2124,11 +2150,11 @@ namespace BC7C #endif if(errEstimate[0] != -1.0) { - UpdateErrorEstimate(1, errEstimate[0]); + UpdateErrorEstimate(modeEstimate, 1, errEstimate[0]); } if(errEstimate[1] != -1.0) { - UpdateErrorEstimate(3, errEstimate[1]); + UpdateErrorEstimate(modeEstimate, 3, errEstimate[1]); } if(err < bestError[0]) { @@ -2138,8 +2164,9 @@ namespace BC7C // If it's small, we'll take it! if(err < 1e-9) { - CompressTwoClusters(i, clusters, outBuf, opaque); - gBestMode = gModeChosen; + int modeChosen; + CompressTwoClusters(i, clusters, outBuf, opaque, modeError, &modeChosen); + bestMode = modeChosen; BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2); statManager.AddStat(blockIdx, s); @@ -2185,11 +2212,11 @@ namespace BC7C #endif if(errEstimate[0] != -1.0) { - UpdateErrorEstimate(0, errEstimate[0]); + UpdateErrorEstimate(modeEstimate, 0, errEstimate[0]); } if(errEstimate[1] != -1.0) { - UpdateErrorEstimate(2, errEstimate[1]); + UpdateErrorEstimate(modeEstimate, 2, errEstimate[1]); } if(err < bestError[1]) { @@ -2199,8 +2226,9 @@ namespace BC7C // If it's small, we'll take it! if(err < 1e-9) { - CompressThreeClusters(i, clusters, outBuf, opaque); - gBestMode = gModeChosen; + int modeChosen; + CompressThreeClusters(i, clusters, outBuf, opaque, modeError, &modeChosen); + bestMode = modeChosen; BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2); statManager.AddStat(blockIdx, s); @@ -2226,8 +2254,8 @@ namespace BC7C BitStream tempStream1 (tempBuf1, 128, 0); BC7CompressionMode compressor(6, opaque); double best = compressor.Compress(tempStream1, 0, &blockCluster); - gModeError[6] = best; - gBestMode = 6; + modeError[6] = best; + bestMode = 6; if(best == 0.0f) { memcpy(outBuf, tempBuf1, 16); return; @@ -2243,7 +2271,7 @@ namespace BC7C double error = compressorTry.Compress(tempStream2, 0, &blockCluster); if(error < best) { - gBestMode = mode; + bestMode = mode; best = error; if(best == 0.0f) { @@ -2257,10 +2285,11 @@ namespace BC7C } } - double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque); + int modeChosen; + double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque, modeError, &modeChosen); if(error < best) { - gBestMode = gModeChosen; + bestMode = modeChosen; best = error; if(error == 0.0f) { @@ -2273,9 +2302,9 @@ namespace BC7C } if(opaque) { - if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque) < best) { + if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque, modeError, &modeChosen) < best) { - gBestMode = gModeChosen; + bestMode = modeChosen; memcpy(outBuf, tempBuf2, 16); return; } diff --git a/CLTool/src/clunix.cpp b/CLTool/src/clunix.cpp index 26cc8f1..28a825c 100644 --- a/CLTool/src/clunix.cpp +++ b/CLTool/src/clunix.cpp @@ -113,12 +113,6 @@ int main(int argc, char **argv) { } while(knowArg && fileArg < argc); - if(numThreads > 1 && bSaveLog) { - bSaveLog = false; - fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n" - "If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n"); - } - if(fileArg == argc) { PrintUsage(); exit(1); diff --git a/Core/include/BlockStats.h b/Core/include/BlockStats.h index bc04219..7cea424 100644 --- a/Core/include/BlockStats.h +++ b/Core/include/BlockStats.h @@ -13,6 +13,8 @@ public: BlockStat(const BlockStat &); BlockStat &operator=(const BlockStat &); + + void ToString(char *buf, int bufSz) const; private: const enum Type { diff --git a/Core/src/BlockStats.cpp b/Core/src/BlockStats.cpp index a33077d..11a6f09 100644 --- a/Core/src/BlockStats.cpp +++ b/Core/src/BlockStats.cpp @@ -40,6 +40,22 @@ BlockStat &BlockStat::operator=(const BlockStat &other) { memcpy(this, &other, sizeof(*this)); } +void BlockStat::ToString(char *buf, int bufSz) const { + switch(m_Type) { + case BlockStat::eType_Float: + snprintf(buf, bufSz, "%s,%f", m_StatName, m_FloatStat); + break; + + case BlockStat::eType_Int: + snprintf(buf, bufSz, "%s,%llu", m_StatName, m_IntStat); + break; + + default: + assert(!"Unknown stat type!"); + break; + } +} + //////////////////////////////////////////////////////////////////////////////// // // BlockStat Manager Implementation @@ -71,7 +87,7 @@ uint32 BlockStatManager::BeginBlock() { return m_NextBlock-1; } - TCLock lock(m_Mutex); + TCLock lock (m_Mutex); return m_NextBlock++; } @@ -82,9 +98,38 @@ void BlockStatManager::AddStat(uint32 blockIdx, const BlockStat &stat) { return; } + TCLock lock (m_Mutex); m_BlockStatList[blockIdx].AddStat(stat); } +void BlockStatManager::ToFile(const CHAR *filename) { + + FileStream fstr (filename, eFileMode_Write); + + for(int i = 0; i < m_BlockStatListSz; i++) { + const BlockStatList *head = &(m_BlockStatList[i]); + while(head) { + BlockStat s = head->GetStat(); + + CHAR statStr[256]; + s.ToString(statStr, 256); + + CHAR str[256]; + snprintf(str, 256, "%d,%s\n", i, statStr); + + int strLen = strlen(str); + if(strLen > 255) { + str[255] = '\n'; + strLen = 256; + } + + fstr.Write((uint8 *)str, strLen); + + head = head->GetTail(); + } + } +} + //////////////////////////////////////////////////////////////////////////////// // // BlockStat List Implementation @@ -111,6 +156,7 @@ BlockStatManager::BlockStatList::~BlockStatList() { } void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) { + if(strncmp(stat.m_StatName, m_Stat.m_StatName, BlockStat::kStatNameSz) == 0) { m_Stat = stat; } @@ -126,40 +172,3 @@ void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) { } } } - -void BlockStatManager::ToFile(const CHAR *filename) { - - FileStream fstr (filename, eFileMode_Write); - - for(int i = 0; i < m_BlockStatListSz; i++) { - const BlockStatList *head = &(m_BlockStatList[i]); - while(head) { - BlockStat s = head->GetStat(); - - CHAR statStr[256]; - switch(s.m_Type) { - case BlockStat::eType_Float: - snprintf(statStr, 256, "%d,%s,%f\n", i, s.m_StatName, s.m_FloatStat); - break; - - case BlockStat::eType_Int: - snprintf(statStr, 256, "%d,%s,%llu\n", i, s.m_StatName, s.m_IntStat); - break; - - default: - assert(false); - break; - } - - int statStrLen = strlen(statStr); - if(statStrLen > 255) { - statStr[255] = '\n'; - statStrLen = 256; - } - fstr.Write((uint8 *)statStr, statStrLen); - - head = head->GetTail(); - } - } - -} diff --git a/Core/src/TexComp.cpp b/Core/src/TexComp.cpp index 45a45a3..a06951e 100644 --- a/Core/src/TexComp.cpp +++ b/Core/src/TexComp.cpp @@ -113,16 +113,7 @@ static double CompressImageInSerial( return cmpTime; } -static double CompressImageWithThreads( - const unsigned char *imgData, - const unsigned int imgDataSz, - const SCompressionSettings &settings, - unsigned char *outBuf -) { - - CompressionFunc f = ChooseFuncFromSettings(settings); - - ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, f, outBuf); +static double CompressThreadGroup(ThreadGroup &tgrp, const SCompressionSettings &settings) { if(!(tgrp.PrepareThreads())) { assert(!"Thread group failed to prepare threads?!"); return -1.0f; @@ -140,7 +131,29 @@ static double CompressImageWithThreads( cmpTimeTotal += tgrp.GetStopWatch().TimeInMilliseconds(); } - tgrp.CleanUpThreads(); + tgrp.CleanUpThreads(); + return cmpTimeTotal; +} + +static double CompressImageWithThreads( + const unsigned char *imgData, + const unsigned int imgDataSz, + const SCompressionSettings &settings, + unsigned char *outBuf +) { + + CompressionFunc f = ChooseFuncFromSettings(settings); + CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings); + + double cmpTimeTotal = 0.0; + if(fStats && settings.pStatManager) { + ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, fStats, *(settings.pStatManager), outBuf); + cmpTimeTotal = CompressThreadGroup(tgrp, settings); + } + else { + ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, f, outBuf); + cmpTimeTotal = CompressThreadGroup(tgrp, settings); + } double cmpTime = cmpTimeTotal / double(settings.iNumCompressions); return cmpTime; @@ -153,21 +166,40 @@ static double CompressImageWithWorkerQueue( unsigned char *outBuf ) { CompressionFunc f = ChooseFuncFromSettings(settings); + CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings); - WorkerQueue wq ( - settings.iNumCompressions, - settings.iNumThreads, - settings.iJobSize, - imgData, - imgDataSz, - f, - outBuf - ); + double cmpTimeTotal = 0.0; + if(fStats && settings.pStatManager) { + WorkerQueue wq ( + settings.iNumCompressions, + settings.iNumThreads, + settings.iJobSize, + imgData, + imgDataSz, + fStats, + *(settings.pStatManager), + outBuf + ); - wq.Run(); + wq.Run(); + cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds(); + } + else { + WorkerQueue wq ( + settings.iNumCompressions, + settings.iNumThreads, + settings.iJobSize, + imgData, + imgDataSz, + f, + outBuf + ); - return wq.GetStopWatch().TimeInMilliseconds() / - double(settings.iNumCompressions); + wq.Run(); + cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds(); + } + + return cmpTimeTotal / double(settings.iNumCompressions); } bool CompressImageData( diff --git a/Core/src/ThreadGroup.cpp b/Core/src/ThreadGroup.cpp index 819ba76..d76a116 100644 --- a/Core/src/ThreadGroup.cpp +++ b/Core/src/ThreadGroup.cpp @@ -13,13 +13,15 @@ CmpThread::CmpThread() , m_Width(0) , m_Height(0) , m_CmpFunc(NULL) + , m_CmpFuncWithStats(NULL) + , m_StatManager(NULL) , m_OutBuf(NULL) , m_InBuf(NULL) , m_ParentExitFlag(NULL) { } void CmpThread::operator()() { - if(!m_CmpFunc || !m_OutBuf || !m_InBuf + if(!m_OutBuf || !m_InBuf || !m_ParentCounter || !m_ParentCounterLock || !m_FinishCV || !m_StartBarrier || !m_ParentExitFlag @@ -28,6 +30,11 @@ void CmpThread::operator()() { return; } + if(!(m_CmpFunc || (m_CmpFuncWithStats && m_StatManager))) { + fprintf(stderr, "Incorrect thread function pointer.\n"); + return; + } + while(1) { // Wait for signal to start work... m_StartBarrier->Wait(); @@ -36,7 +43,10 @@ void CmpThread::operator()() { return; } - (*m_CmpFunc)(m_InBuf, m_OutBuf, m_Width, m_Height); + if(m_CmpFunc) + (*m_CmpFunc)(m_InBuf, m_OutBuf, m_Width, m_Height); + else + (*m_CmpFuncWithStats)(m_InBuf, m_OutBuf, m_Width, m_Height, *m_StatManager); { TCLock lock(*m_ParentCounterLock); @@ -47,19 +57,37 @@ void CmpThread::operator()() { } } - ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned int inBufSz, CompressionFunc func, unsigned char *outBuf ) : m_StartBarrier(new TCBarrier(numThreads + 1)) , m_FinishMutex(new TCMutex()) , m_FinishCV(new TCConditionVariable()) , m_NumThreads(numThreads) , m_ActiveThreads(0) - , m_Func(func) , m_ImageDataSz(inBufSz) , m_ImageData(inBuf) , m_OutBuf(outBuf) , m_ThreadState(eThreadState_Done) , m_ExitFlag(false) + , m_CompressedBlockSize( + (func == BC7C::CompressImageBC7 +#ifdef HAS_SSE_41 + || func == BC7C::CompressImageBC7SIMD +#endif + )? + 16 + : + 0 + ) + , m_UncompressedBlockSize( + (func == BC7C::CompressImageBC7 +#ifdef HAS_SSE_41 + || func == BC7C::CompressImageBC7SIMD +#endif + )? + 64 + : + 0 + ) { for(int i = 0; i < kMaxNumThreads; i++) { // Thread synchronization primitives @@ -68,6 +96,50 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i m_Threads[i].m_ParentCounter = &m_ThreadsFinished; m_Threads[i].m_StartBarrier = m_StartBarrier; m_Threads[i].m_ParentExitFlag = &m_ExitFlag; + m_Threads[i].m_CmpFunc = func; + } +} + +ThreadGroup::ThreadGroup( + int numThreads, + const unsigned char *inBuf, + unsigned int inBufSz, + CompressionFuncWithStats func, + BlockStatManager &statManager, + unsigned char *outBuf +) + : m_StartBarrier(new TCBarrier(numThreads + 1)) + , m_FinishMutex(new TCMutex()) + , m_FinishCV(new TCConditionVariable()) + , m_NumThreads(numThreads) + , m_ActiveThreads(0) + , m_ImageDataSz(inBufSz) + , m_ImageData(inBuf) + , m_OutBuf(outBuf) + , m_ThreadState(eThreadState_Done) + , m_ExitFlag(false) + , m_CompressedBlockSize( + (func == BC7C::CompressImageBC7Stats)? + 16 + : + 0 + ) + , m_UncompressedBlockSize( + (func == BC7C::CompressImageBC7Stats)? + 64 + : + 0 + ) +{ + for(int i = 0; i < kMaxNumThreads; i++) { + // Thread synchronization primitives + m_Threads[i].m_ParentCounterLock = m_FinishMutex; + m_Threads[i].m_FinishCV = m_FinishCV; + m_Threads[i].m_ParentCounter = &m_ThreadsFinished; + m_Threads[i].m_StartBarrier = m_StartBarrier; + m_Threads[i].m_ParentExitFlag = &m_ExitFlag; + m_Threads[i].m_CmpFuncWithStats = func; + m_Threads[i].m_StatManager = &statManager; } } @@ -77,20 +149,6 @@ ThreadGroup::~ThreadGroup() { delete m_FinishCV; } -unsigned int ThreadGroup::GetCompressedBlockSize() { - if(m_Func == BC7C::CompressImageBC7) return 16; -#ifdef HAS_SSE_41 - if(m_Func == BC7C::CompressImageBC7SIMD) return 16; -#endif -} - -unsigned int ThreadGroup::GetUncompressedBlockSize() { - if(m_Func == BC7C::CompressImageBC7) return 64; -#ifdef HAS_SSE_41 - if(m_Func == BC7C::CompressImageBC7SIMD) return 64; -#endif -} - bool ThreadGroup::PrepareThreads() { // Make sure that threads aren't running. @@ -126,9 +184,8 @@ bool ThreadGroup::PrepareThreads() { CmpThread &t = m_Threads[m_ActiveThreads]; t.m_Height = 4; t.m_Width = numBlocksThisThread * 4; - t.m_CmpFunc = m_Func; - t.m_OutBuf = m_OutBuf + (blocksProcessed * GetCompressedBlockSize()); - t.m_InBuf = m_ImageData + (blocksProcessed * GetUncompressedBlockSize()); + t.m_OutBuf = m_OutBuf + (blocksProcessed * m_CompressedBlockSize); + t.m_InBuf = m_ImageData + (blocksProcessed * m_UncompressedBlockSize); blocksProcessed += numBlocksThisThread; diff --git a/Core/src/ThreadGroup.h b/Core/src/ThreadGroup.h index 32aa2a4..03ee39a 100644 --- a/Core/src/ThreadGroup.h +++ b/Core/src/ThreadGroup.h @@ -21,6 +21,9 @@ private: CompressionFunc m_CmpFunc; + CompressionFuncWithStats m_CmpFuncWithStats; + BlockStatManager *m_StatManager; + unsigned char *m_OutBuf; const unsigned char *m_InBuf; @@ -29,10 +32,10 @@ private: CmpThread(); public: - void operator ()(); + virtual ~CmpThread() { } + virtual void operator ()(); }; - class ThreadGroup { public: ThreadGroup( @@ -42,6 +45,16 @@ class ThreadGroup { CompressionFunc func, unsigned char *outBuf ); + + ThreadGroup( + int numThreads, + const unsigned char *inBuf, + unsigned int inBufSz, + CompressionFuncWithStats func, + BlockStatManager &statManager, + unsigned char *outBuf + ); + ~ThreadGroup(); bool PrepareThreads(); @@ -75,11 +88,10 @@ class ThreadGroup { // State variables. const unsigned int m_ImageDataSz; const unsigned char *const m_ImageData; - const CompressionFunc m_Func; unsigned char *m_OutBuf; - unsigned int GetCompressedBlockSize(); - unsigned int GetUncompressedBlockSize(); + const unsigned int m_CompressedBlockSize; + const unsigned int m_UncompressedBlockSize; StopWatch m_StopWatch; diff --git a/Core/src/WorkerQueue.cpp b/Core/src/WorkerQueue.cpp index d3c8ee2..f8a3c23 100644 --- a/Core/src/WorkerQueue.cpp +++ b/Core/src/WorkerQueue.cpp @@ -1,20 +1,11 @@ #include "WorkerQueue.h" -#include "BC7Compressor.h" - #include #include #include +#include -template -static inline T max(const T &a, const T &b) { - return (a > b)? a : b; -} - -template -static inline T min(const T &a, const T &b) { - return (a < b)? a : b; -} +#include "BC7Compressor.h" template static inline void clamp(T &x, const T &min, const T &max) { @@ -36,7 +27,10 @@ void WorkerThread::operator()() { } CompressionFunc f = m_Parent->GetCompressionFunc(); - if(!f) { + CompressionFuncWithStats fStat = m_Parent->GetCompressionFuncWithStats(); + BlockStatManager *statManager = m_Parent->GetBlockStatManager(); + + if(!(f || (fStat && statManager))) { fprintf(stderr, "%s\n", "Illegal worker queue initialization -- compression func is NULL."); return; } @@ -63,7 +57,11 @@ void WorkerThread::operator()() { { const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx); uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx); - (*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4); + if(f) + (*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4); + else + (*fStat)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4, *statManager); + break; } @@ -91,22 +89,57 @@ WorkerQueue::WorkerQueue( uint8 *outBuf ) : m_NumCompressions(0) - , m_TotalNumCompressions(max(uint32(1), numCompressions)) + , m_TotalNumCompressions(std::max(uint32(1), numCompressions)) , m_NumThreads(numThreads) , m_WaitingThreads(0) , m_ActiveThreads(0) - , m_JobSize(max(uint32(1), jobSize)) + , m_JobSize(std::max(uint32(1), jobSize)) , m_InBufSz(inBufSz) , m_InBuf(inBuf) , m_OutBuf(outBuf) , m_NextBlock(0) , m_CompressionFunc(func) + , m_CompressionFuncWithStats(NULL) + , m_BlockStatManager(NULL) { clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads)); #ifndef NDEBUG if(m_InBufSz % 64) { - fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?"); + fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n"); + } +#endif +} + +WorkerQueue::WorkerQueue( + uint32 numCompressions, + uint32 numThreads, + uint32 jobSize, + const uint8 *inBuf, + uint32 inBufSz, + CompressionFuncWithStats func, + BlockStatManager &blockStatManager, + uint8 *outBuf +) + : m_NumCompressions(0) + , m_TotalNumCompressions(std::max(uint32(1), numCompressions)) + , m_NumThreads(numThreads) + , m_WaitingThreads(0) + , m_ActiveThreads(0) + , m_JobSize(std::max(uint32(1), jobSize)) + , m_InBufSz(inBufSz) + , m_InBuf(inBuf) + , m_OutBuf(outBuf) + , m_NextBlock(0) + , m_CompressionFunc(NULL) + , m_CompressionFuncWithStats(func) + , m_BlockStatManager(&blockStatManager) +{ + clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads)); + +#ifndef NDEBUG + if(m_InBufSz % 64) { + fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n"); } #endif } @@ -182,7 +215,7 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) { // The number of blocks to process is either the job size // or the number of blocks remaining. - int blocksProcessed = min(m_JobSize, totalBlocks - m_NextBlock); + int blocksProcessed = std::min(m_JobSize, totalBlocks - m_NextBlock); m_NumBlocks[threadIdx] = blocksProcessed; // Make sure the next block is updated. diff --git a/Core/src/WorkerQueue.h b/Core/src/WorkerQueue.h index c8aad39..3a13546 100644 --- a/Core/src/WorkerQueue.h +++ b/Core/src/WorkerQueue.h @@ -44,6 +44,17 @@ class WorkerQueue { uint8 *outBuf ); + WorkerQueue( + uint32 numCompressions, + uint32 numThreads, + uint32 jobSize, + const uint8 *inBuf, + uint32 inBufSz, + CompressionFuncWithStats func, + BlockStatManager &blockStatManager, + uint8 *outBuf + ); + ~WorkerQueue() { } // Runs the workers @@ -80,6 +91,11 @@ class WorkerQueue { const CompressionFunc m_CompressionFunc; CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; } + BlockStatManager *m_BlockStatManager; + const CompressionFuncWithStats m_CompressionFuncWithStats; + CompressionFuncWithStats GetCompressionFuncWithStats() const { return m_CompressionFuncWithStats; } + BlockStatManager *GetBlockStatManager() const { return m_BlockStatManager; } + StopWatch m_StopWatch; WorkerThread::EAction AcceptThreadData(uint32 threadIdx);