Make stats collection threadsafe.

This commit is contained in:
Pavel Krajcevski 2012-11-01 18:56:13 -04:00
parent 1abc54cf21
commit 790df0736f
10 changed files with 350 additions and 168 deletions

View file

@ -42,8 +42,6 @@ public:
explicit BC7CompressionMode(int mode, bool opaque = true) : m_IsOpaque(opaque), m_Attributes(&(kModeAttributes[mode])), m_RotateMode(0), m_IndexMode(0) { }
~BC7CompressionMode() { }
static int NumUses[8];
static void ResetNumUses() { memset(NumUses, 0, sizeof(NumUses)); }
double Compress(BitStream &stream, const int shapeIdx, const RGBACluster *clusters);
// This switch controls the quality of the simulated annealing optimizer. We will not make

View file

@ -286,7 +286,6 @@ const uint32 kBC7InterpolationValues[4][16][2] = {
};
int BC7CompressionMode::MaxAnnealingIterations = 50; // This is a setting.
int BC7CompressionMode::NumUses[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
BC7CompressionMode::Attributes BC7CompressionMode::kModeAttributes[kNumModes] = {
{ 0, 4, 3, 3, 0, 4, 0, false, false, BC7CompressionMode::ePBitType_NotShared },
@ -1471,11 +1470,6 @@ namespace BC7C
stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31);
}
static int gModeChosen = -1;
static int gBestMode = -1;
static double gModeEstimate[ BC7CompressionMode::kNumModes ];
static double gModeError[ BC7CompressionMode::kNumModes ];
static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]);
// Compress an image using BC7 compression. Use the inBuf parameter to point to an image in
@ -1485,7 +1479,6 @@ namespace BC7C
void CompressImageBC7(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height)
{
uint32 block[16];
BC7CompressionMode::ResetNumUses();
BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel());
for(int j = 0; j < height; j += 4)
@ -1494,7 +1487,6 @@ namespace BC7C
{
// ExtractBlock(inBuf + i * 4, width, block);
CompressBC7Block((const uint32 *)inBuf, outBuf);
BC7CompressionMode::NumUses[gBestMode]++;
#ifndef NDEBUG
uint8 *block = (uint8 *)outBuf;
@ -1530,7 +1522,6 @@ namespace BC7C
BlockStatManager &statManager
) {
uint32 block[16];
BC7CompressionMode::ResetNumUses();
BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel());
for(int j = 0; j < height; j += 4)
@ -1539,7 +1530,6 @@ namespace BC7C
{
// ExtractBlock(inBuf + i * 4, width, block);
CompressBC7Block((const uint32 *)inBuf, outBuf, statManager);
BC7CompressionMode::NumUses[gBestMode]++;
#ifndef NDEBUG
uint8 *block = (uint8 *)outBuf;
@ -1574,16 +1564,25 @@ namespace BC7C
}
}
static double CompressTwoClusters(int shapeIdx, const RGBACluster *clusters, uint8 *outBuf, bool opaque) {
static double CompressTwoClusters(
int shapeIdx,
const RGBACluster *clusters,
uint8 *outBuf,
bool opaque,
double *errors = NULL,
int *modeChosen = NULL
) {
uint8 tempBuf1[16];
BitStream tmpStream1(tempBuf1, 128, 0);
BC7CompressionMode compressor1(1, opaque);
double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters);
gModeError[1] = bestError;
if(errors) errors[1] = bestError;
if(modeChosen) *modeChosen = 1;
memcpy(outBuf, tempBuf1, 16);
gModeChosen = 1;
if(bestError == 0.0) {
return 0.0;
}
@ -1593,9 +1592,9 @@ namespace BC7C
BC7CompressionMode compressor3(3, opaque);
double error = compressor3.Compress(tmpStream3, shapeIdx, clusters);
gModeError[3] = error;
if(errors) errors[3] = error;
if(error < bestError) {
gModeChosen = 3;
if(modeChosen) *modeChosen = 3;
bestError = error;
memcpy(outBuf, tempBuf3, 16);
if(bestError == 0.0) {
@ -1610,10 +1609,10 @@ namespace BC7C
BitStream tmpStream7(tempBuf7, 128, 0);
BC7CompressionMode compressor7(7, opaque);
error = compressor7.Compress(tmpStream7, shapeIdx, clusters);
gModeError[7] = error;
if(errors) errors[7] = error;
if(error < bestError) {
gModeChosen = 7;
memcpy(outBuf, tempBuf7, 16);
if(modeChosen) *modeChosen = 7;
memcpy(outBuf, tempBuf7, 16);
return error;
}
}
@ -1621,8 +1620,14 @@ namespace BC7C
return bestError;
}
static double CompressThreeClusters(int shapeIdx, const RGBACluster *clusters, uint8 *outBuf, bool opaque) {
static double CompressThreeClusters(
int shapeIdx,
const RGBACluster *clusters,
uint8 *outBuf,
bool opaque,
double *errors = NULL,
int *modeChosen = NULL
) {
uint8 tempBuf0[16];
BitStream tmpStream0(tempBuf0, 128, 0);
@ -1635,22 +1640,23 @@ namespace BC7C
double error, bestError;
if(shapeIdx < 16) {
bestError = compressor0.Compress(tmpStream0, shapeIdx, clusters);
gModeError[0] = bestError;
if(errors) errors[0] = bestError;
}
else {
bestError = DBL_MAX;
gModeError[0] = -1.0;
if(errors) errors[0] = -1.0;
}
gModeChosen = 0;
if(modeChosen) *modeChosen = 0;
memcpy(outBuf, tempBuf0, 16);
if(bestError == 0.0) {
return 0.0;
}
error = compressor2.Compress(tmpStream2, shapeIdx, clusters);
gModeError[2] = error;
if(errors) errors[2] = error;
if(error < bestError) {
gModeChosen = 2;
if(modeChosen) *modeChosen = 2;
memcpy(outBuf, tempBuf2, 16);
return error;
}
@ -1973,11 +1979,12 @@ namespace BC7C
return error;
}
static void UpdateErrorEstimate(uint32 mode, double est) {
static void UpdateErrorEstimate(double *estimates, uint32 mode, double est) {
assert(estimates);
assert(mode >= 0);
assert(mode < BC7CompressionMode::kNumModes);
if(gModeEstimate[mode] == -1.0 || est < gModeEstimate[mode]) {
gModeEstimate[mode] = est;
if(estimates[mode] == -1.0 || est < estimates[mode]) {
estimates[mode] = est;
}
}
@ -1988,43 +1995,62 @@ namespace BC7C
private:
uint32 m_BlockIdx;
BlockStatManager &m_BSM;
int *m_ModePtr;
double *m_Estimates;
double *m_Errors;
public:
RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) : m_BlockIdx(blockIdx), m_BSM(m) { }
RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) : m_BlockIdx(blockIdx), m_BSM(m)
, m_ModePtr(NULL), m_Estimates(NULL), m_Errors(NULL) { }
void SetMode(int *modePtr) { m_ModePtr = modePtr; }
void SetEstimates(double *estimates) { m_Estimates = estimates; }
void SetErrors(double *errors) { m_Errors = errors; }
~RAIIStatSaver() {
BlockStat s (kBlockStatString[eBlockStat_Mode], gBestMode);
assert(m_ModePtr);
assert(m_Estimates);
assert(m_Errors);
BlockStat s (kBlockStatString[eBlockStat_Mode], *m_ModePtr);
m_BSM.AddStat(m_BlockIdx, s);
for(int i = 0; i < BC7CompressionMode::kNumModes; i++) {
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroEstimate + i], gModeEstimate[i]);
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroEstimate + i], m_Estimates[i]);
m_BSM.AddStat(m_BlockIdx, s);
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroError + i], gModeError[i]);
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroError + i], m_Errors[i]);
m_BSM.AddStat(m_BlockIdx, s);
}
}
};
uint32 blockIdx = 0;
int bestMode = 0;
double modeEstimate[BC7CompressionMode::kNumModes];
double modeError[BC7CompressionMode::kNumModes];
// reset global variables...
gBestMode = 0;
bestMode = 0;
for(int i = 0; i < BC7CompressionMode::kNumModes; i++){
gModeError[i] = gModeEstimate[i] = -1.0;
modeError[i] = modeEstimate[i] = -1.0;
}
blockIdx = statManager.BeginBlock();
uint32 blockIdx = statManager.BeginBlock();
for(int i = 0; i < kNumBlockStats; i++) {
statManager.AddStat(blockIdx, BlockStat(kBlockStatString[i], 0));
}
RAIIStatSaver __statsaver__(blockIdx, statManager);
__statsaver__.SetMode(&bestMode);
__statsaver__.SetEstimates(modeEstimate);
__statsaver__.SetErrors(modeError);
// All a single color?
if(AllOneColor(block)) {
BitStream bStrm(outBuf, 128, 0);
CompressOptimalColorBC7(*block, bStrm);
gBestMode = 5;
bestMode = 5;
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 0);
statManager.AddStat(blockIdx, s);
@ -2050,7 +2076,7 @@ namespace BC7C
if(transparent) {
BitStream bStrm(outBuf, 128, 0);
WriteTransparentBlock(bStrm);
gBestMode = 6;
bestMode = 6;
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 1);
statManager.AddStat(blockIdx, s);
@ -2065,12 +2091,12 @@ namespace BC7C
blockCluster.GetBoundingBox(Min, Max);
v = Max - Min;
if(v * v == 0) {
gModeEstimate[6] = 0.0;
modeEstimate[6] = 0.0;
}
else {
const float *w = GetErrorMetric();
const double err = 0.0001 + blockCluster.QuantizedError(Min, Max, 4, 0xFEFEFEFE, RGBAVector(w[0], w[1], w[2], w[3]));
UpdateErrorEstimate(6, err);
UpdateErrorEstimate(modeEstimate, 6, err);
#ifdef USE_PCA_FOR_SHAPE_ESTIMATION
double eigOne = blockCluster.GetPrincipalEigenvalue();
@ -2124,11 +2150,11 @@ namespace BC7C
#endif
if(errEstimate[0] != -1.0) {
UpdateErrorEstimate(1, errEstimate[0]);
UpdateErrorEstimate(modeEstimate, 1, errEstimate[0]);
}
if(errEstimate[1] != -1.0) {
UpdateErrorEstimate(3, errEstimate[1]);
UpdateErrorEstimate(modeEstimate, 3, errEstimate[1]);
}
if(err < bestError[0]) {
@ -2138,8 +2164,9 @@ namespace BC7C
// If it's small, we'll take it!
if(err < 1e-9) {
CompressTwoClusters(i, clusters, outBuf, opaque);
gBestMode = gModeChosen;
int modeChosen;
CompressTwoClusters(i, clusters, outBuf, opaque, modeError, &modeChosen);
bestMode = modeChosen;
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2);
statManager.AddStat(blockIdx, s);
@ -2185,11 +2212,11 @@ namespace BC7C
#endif
if(errEstimate[0] != -1.0) {
UpdateErrorEstimate(0, errEstimate[0]);
UpdateErrorEstimate(modeEstimate, 0, errEstimate[0]);
}
if(errEstimate[1] != -1.0) {
UpdateErrorEstimate(2, errEstimate[1]);
UpdateErrorEstimate(modeEstimate, 2, errEstimate[1]);
}
if(err < bestError[1]) {
@ -2199,8 +2226,9 @@ namespace BC7C
// If it's small, we'll take it!
if(err < 1e-9) {
CompressThreeClusters(i, clusters, outBuf, opaque);
gBestMode = gModeChosen;
int modeChosen;
CompressThreeClusters(i, clusters, outBuf, opaque, modeError, &modeChosen);
bestMode = modeChosen;
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2);
statManager.AddStat(blockIdx, s);
@ -2226,8 +2254,8 @@ namespace BC7C
BitStream tempStream1 (tempBuf1, 128, 0);
BC7CompressionMode compressor(6, opaque);
double best = compressor.Compress(tempStream1, 0, &blockCluster);
gModeError[6] = best;
gBestMode = 6;
modeError[6] = best;
bestMode = 6;
if(best == 0.0f) {
memcpy(outBuf, tempBuf1, 16);
return;
@ -2243,7 +2271,7 @@ namespace BC7C
double error = compressorTry.Compress(tempStream2, 0, &blockCluster);
if(error < best) {
gBestMode = mode;
bestMode = mode;
best = error;
if(best == 0.0f) {
@ -2257,10 +2285,11 @@ namespace BC7C
}
}
double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque);
int modeChosen;
double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque, modeError, &modeChosen);
if(error < best) {
gBestMode = gModeChosen;
bestMode = modeChosen;
best = error;
if(error == 0.0f) {
@ -2273,9 +2302,9 @@ namespace BC7C
}
if(opaque) {
if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque) < best) {
if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque, modeError, &modeChosen) < best) {
gBestMode = gModeChosen;
bestMode = modeChosen;
memcpy(outBuf, tempBuf2, 16);
return;
}

View file

@ -113,12 +113,6 @@ int main(int argc, char **argv) {
} while(knowArg && fileArg < argc);
if(numThreads > 1 && bSaveLog) {
bSaveLog = false;
fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n"
"If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n");
}
if(fileArg == argc) {
PrintUsage();
exit(1);

View file

@ -13,6 +13,8 @@ public:
BlockStat(const BlockStat &);
BlockStat &operator=(const BlockStat &);
void ToString(char *buf, int bufSz) const;
private:
const enum Type {

View file

@ -40,6 +40,22 @@ BlockStat &BlockStat::operator=(const BlockStat &other) {
memcpy(this, &other, sizeof(*this));
}
void BlockStat::ToString(char *buf, int bufSz) const {
switch(m_Type) {
case BlockStat::eType_Float:
snprintf(buf, bufSz, "%s,%f", m_StatName, m_FloatStat);
break;
case BlockStat::eType_Int:
snprintf(buf, bufSz, "%s,%llu", m_StatName, m_IntStat);
break;
default:
assert(!"Unknown stat type!");
break;
}
}
////////////////////////////////////////////////////////////////////////////////
//
// BlockStat Manager Implementation
@ -71,7 +87,7 @@ uint32 BlockStatManager::BeginBlock() {
return m_NextBlock-1;
}
TCLock lock(m_Mutex);
TCLock lock (m_Mutex);
return m_NextBlock++;
}
@ -82,9 +98,38 @@ void BlockStatManager::AddStat(uint32 blockIdx, const BlockStat &stat) {
return;
}
TCLock lock (m_Mutex);
m_BlockStatList[blockIdx].AddStat(stat);
}
void BlockStatManager::ToFile(const CHAR *filename) {
FileStream fstr (filename, eFileMode_Write);
for(int i = 0; i < m_BlockStatListSz; i++) {
const BlockStatList *head = &(m_BlockStatList[i]);
while(head) {
BlockStat s = head->GetStat();
CHAR statStr[256];
s.ToString(statStr, 256);
CHAR str[256];
snprintf(str, 256, "%d,%s\n", i, statStr);
int strLen = strlen(str);
if(strLen > 255) {
str[255] = '\n';
strLen = 256;
}
fstr.Write((uint8 *)str, strLen);
head = head->GetTail();
}
}
}
////////////////////////////////////////////////////////////////////////////////
//
// BlockStat List Implementation
@ -111,6 +156,7 @@ BlockStatManager::BlockStatList::~BlockStatList() {
}
void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) {
if(strncmp(stat.m_StatName, m_Stat.m_StatName, BlockStat::kStatNameSz) == 0) {
m_Stat = stat;
}
@ -126,40 +172,3 @@ void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) {
}
}
}
void BlockStatManager::ToFile(const CHAR *filename) {
FileStream fstr (filename, eFileMode_Write);
for(int i = 0; i < m_BlockStatListSz; i++) {
const BlockStatList *head = &(m_BlockStatList[i]);
while(head) {
BlockStat s = head->GetStat();
CHAR statStr[256];
switch(s.m_Type) {
case BlockStat::eType_Float:
snprintf(statStr, 256, "%d,%s,%f\n", i, s.m_StatName, s.m_FloatStat);
break;
case BlockStat::eType_Int:
snprintf(statStr, 256, "%d,%s,%llu\n", i, s.m_StatName, s.m_IntStat);
break;
default:
assert(false);
break;
}
int statStrLen = strlen(statStr);
if(statStrLen > 255) {
statStr[255] = '\n';
statStrLen = 256;
}
fstr.Write((uint8 *)statStr, statStrLen);
head = head->GetTail();
}
}
}

View file

@ -113,16 +113,7 @@ static double CompressImageInSerial(
return cmpTime;
}
static double CompressImageWithThreads(
const unsigned char *imgData,
const unsigned int imgDataSz,
const SCompressionSettings &settings,
unsigned char *outBuf
) {
CompressionFunc f = ChooseFuncFromSettings(settings);
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, f, outBuf);
static double CompressThreadGroup(ThreadGroup &tgrp, const SCompressionSettings &settings) {
if(!(tgrp.PrepareThreads())) {
assert(!"Thread group failed to prepare threads?!");
return -1.0f;
@ -140,7 +131,29 @@ static double CompressImageWithThreads(
cmpTimeTotal += tgrp.GetStopWatch().TimeInMilliseconds();
}
tgrp.CleanUpThreads();
tgrp.CleanUpThreads();
return cmpTimeTotal;
}
static double CompressImageWithThreads(
const unsigned char *imgData,
const unsigned int imgDataSz,
const SCompressionSettings &settings,
unsigned char *outBuf
) {
CompressionFunc f = ChooseFuncFromSettings(settings);
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
double cmpTimeTotal = 0.0;
if(fStats && settings.pStatManager) {
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, fStats, *(settings.pStatManager), outBuf);
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
}
else {
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, f, outBuf);
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
}
double cmpTime = cmpTimeTotal / double(settings.iNumCompressions);
return cmpTime;
@ -153,21 +166,40 @@ static double CompressImageWithWorkerQueue(
unsigned char *outBuf
) {
CompressionFunc f = ChooseFuncFromSettings(settings);
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
WorkerQueue wq (
settings.iNumCompressions,
settings.iNumThreads,
settings.iJobSize,
imgData,
imgDataSz,
f,
outBuf
);
double cmpTimeTotal = 0.0;
if(fStats && settings.pStatManager) {
WorkerQueue wq (
settings.iNumCompressions,
settings.iNumThreads,
settings.iJobSize,
imgData,
imgDataSz,
fStats,
*(settings.pStatManager),
outBuf
);
wq.Run();
wq.Run();
cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds();
}
else {
WorkerQueue wq (
settings.iNumCompressions,
settings.iNumThreads,
settings.iJobSize,
imgData,
imgDataSz,
f,
outBuf
);
return wq.GetStopWatch().TimeInMilliseconds() /
double(settings.iNumCompressions);
wq.Run();
cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds();
}
return cmpTimeTotal / double(settings.iNumCompressions);
}
bool CompressImageData(

View file

@ -13,13 +13,15 @@ CmpThread::CmpThread()
, m_Width(0)
, m_Height(0)
, m_CmpFunc(NULL)
, m_CmpFuncWithStats(NULL)
, m_StatManager(NULL)
, m_OutBuf(NULL)
, m_InBuf(NULL)
, m_ParentExitFlag(NULL)
{ }
void CmpThread::operator()() {
if(!m_CmpFunc || !m_OutBuf || !m_InBuf
if(!m_OutBuf || !m_InBuf
|| !m_ParentCounter || !m_ParentCounterLock || !m_FinishCV
|| !m_StartBarrier
|| !m_ParentExitFlag
@ -28,6 +30,11 @@ void CmpThread::operator()() {
return;
}
if(!(m_CmpFunc || (m_CmpFuncWithStats && m_StatManager))) {
fprintf(stderr, "Incorrect thread function pointer.\n");
return;
}
while(1) {
// Wait for signal to start work...
m_StartBarrier->Wait();
@ -36,7 +43,10 @@ void CmpThread::operator()() {
return;
}
(*m_CmpFunc)(m_InBuf, m_OutBuf, m_Width, m_Height);
if(m_CmpFunc)
(*m_CmpFunc)(m_InBuf, m_OutBuf, m_Width, m_Height);
else
(*m_CmpFuncWithStats)(m_InBuf, m_OutBuf, m_Width, m_Height, *m_StatManager);
{
TCLock lock(*m_ParentCounterLock);
@ -47,19 +57,37 @@ void CmpThread::operator()() {
}
}
ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned int inBufSz, CompressionFunc func, unsigned char *outBuf )
: m_StartBarrier(new TCBarrier(numThreads + 1))
, m_FinishMutex(new TCMutex())
, m_FinishCV(new TCConditionVariable())
, m_NumThreads(numThreads)
, m_ActiveThreads(0)
, m_Func(func)
, m_ImageDataSz(inBufSz)
, m_ImageData(inBuf)
, m_OutBuf(outBuf)
, m_ThreadState(eThreadState_Done)
, m_ExitFlag(false)
, m_CompressedBlockSize(
(func == BC7C::CompressImageBC7
#ifdef HAS_SSE_41
|| func == BC7C::CompressImageBC7SIMD
#endif
)?
16
:
0
)
, m_UncompressedBlockSize(
(func == BC7C::CompressImageBC7
#ifdef HAS_SSE_41
|| func == BC7C::CompressImageBC7SIMD
#endif
)?
64
:
0
)
{
for(int i = 0; i < kMaxNumThreads; i++) {
// Thread synchronization primitives
@ -68,6 +96,50 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
m_Threads[i].m_ParentCounter = &m_ThreadsFinished;
m_Threads[i].m_StartBarrier = m_StartBarrier;
m_Threads[i].m_ParentExitFlag = &m_ExitFlag;
m_Threads[i].m_CmpFunc = func;
}
}
ThreadGroup::ThreadGroup(
int numThreads,
const unsigned char *inBuf,
unsigned int inBufSz,
CompressionFuncWithStats func,
BlockStatManager &statManager,
unsigned char *outBuf
)
: m_StartBarrier(new TCBarrier(numThreads + 1))
, m_FinishMutex(new TCMutex())
, m_FinishCV(new TCConditionVariable())
, m_NumThreads(numThreads)
, m_ActiveThreads(0)
, m_ImageDataSz(inBufSz)
, m_ImageData(inBuf)
, m_OutBuf(outBuf)
, m_ThreadState(eThreadState_Done)
, m_ExitFlag(false)
, m_CompressedBlockSize(
(func == BC7C::CompressImageBC7Stats)?
16
:
0
)
, m_UncompressedBlockSize(
(func == BC7C::CompressImageBC7Stats)?
64
:
0
)
{
for(int i = 0; i < kMaxNumThreads; i++) {
// Thread synchronization primitives
m_Threads[i].m_ParentCounterLock = m_FinishMutex;
m_Threads[i].m_FinishCV = m_FinishCV;
m_Threads[i].m_ParentCounter = &m_ThreadsFinished;
m_Threads[i].m_StartBarrier = m_StartBarrier;
m_Threads[i].m_ParentExitFlag = &m_ExitFlag;
m_Threads[i].m_CmpFuncWithStats = func;
m_Threads[i].m_StatManager = &statManager;
}
}
@ -77,20 +149,6 @@ ThreadGroup::~ThreadGroup() {
delete m_FinishCV;
}
unsigned int ThreadGroup::GetCompressedBlockSize() {
if(m_Func == BC7C::CompressImageBC7) return 16;
#ifdef HAS_SSE_41
if(m_Func == BC7C::CompressImageBC7SIMD) return 16;
#endif
}
unsigned int ThreadGroup::GetUncompressedBlockSize() {
if(m_Func == BC7C::CompressImageBC7) return 64;
#ifdef HAS_SSE_41
if(m_Func == BC7C::CompressImageBC7SIMD) return 64;
#endif
}
bool ThreadGroup::PrepareThreads() {
// Make sure that threads aren't running.
@ -126,9 +184,8 @@ bool ThreadGroup::PrepareThreads() {
CmpThread &t = m_Threads[m_ActiveThreads];
t.m_Height = 4;
t.m_Width = numBlocksThisThread * 4;
t.m_CmpFunc = m_Func;
t.m_OutBuf = m_OutBuf + (blocksProcessed * GetCompressedBlockSize());
t.m_InBuf = m_ImageData + (blocksProcessed * GetUncompressedBlockSize());
t.m_OutBuf = m_OutBuf + (blocksProcessed * m_CompressedBlockSize);
t.m_InBuf = m_ImageData + (blocksProcessed * m_UncompressedBlockSize);
blocksProcessed += numBlocksThisThread;

View file

@ -21,6 +21,9 @@ private:
CompressionFunc m_CmpFunc;
CompressionFuncWithStats m_CmpFuncWithStats;
BlockStatManager *m_StatManager;
unsigned char *m_OutBuf;
const unsigned char *m_InBuf;
@ -29,10 +32,10 @@ private:
CmpThread();
public:
void operator ()();
virtual ~CmpThread() { }
virtual void operator ()();
};
class ThreadGroup {
public:
ThreadGroup(
@ -42,6 +45,16 @@ class ThreadGroup {
CompressionFunc func,
unsigned char *outBuf
);
ThreadGroup(
int numThreads,
const unsigned char *inBuf,
unsigned int inBufSz,
CompressionFuncWithStats func,
BlockStatManager &statManager,
unsigned char *outBuf
);
~ThreadGroup();
bool PrepareThreads();
@ -75,11 +88,10 @@ class ThreadGroup {
// State variables.
const unsigned int m_ImageDataSz;
const unsigned char *const m_ImageData;
const CompressionFunc m_Func;
unsigned char *m_OutBuf;
unsigned int GetCompressedBlockSize();
unsigned int GetUncompressedBlockSize();
const unsigned int m_CompressedBlockSize;
const unsigned int m_UncompressedBlockSize;
StopWatch m_StopWatch;

View file

@ -1,20 +1,11 @@
#include "WorkerQueue.h"
#include "BC7Compressor.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <algorithm>
template <typename T>
static inline T max(const T &a, const T &b) {
return (a > b)? a : b;
}
template <typename T>
static inline T min(const T &a, const T &b) {
return (a < b)? a : b;
}
#include "BC7Compressor.h"
template <typename T>
static inline void clamp(T &x, const T &min, const T &max) {
@ -36,7 +27,10 @@ void WorkerThread::operator()() {
}
CompressionFunc f = m_Parent->GetCompressionFunc();
if(!f) {
CompressionFuncWithStats fStat = m_Parent->GetCompressionFuncWithStats();
BlockStatManager *statManager = m_Parent->GetBlockStatManager();
if(!(f || (fStat && statManager))) {
fprintf(stderr, "%s\n", "Illegal worker queue initialization -- compression func is NULL.");
return;
}
@ -63,7 +57,11 @@ void WorkerThread::operator()() {
{
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
(*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
if(f)
(*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
else
(*fStat)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4, *statManager);
break;
}
@ -91,22 +89,57 @@ WorkerQueue::WorkerQueue(
uint8 *outBuf
)
: m_NumCompressions(0)
, m_TotalNumCompressions(max(uint32(1), numCompressions))
, m_TotalNumCompressions(std::max(uint32(1), numCompressions))
, m_NumThreads(numThreads)
, m_WaitingThreads(0)
, m_ActiveThreads(0)
, m_JobSize(max(uint32(1), jobSize))
, m_JobSize(std::max(uint32(1), jobSize))
, m_InBufSz(inBufSz)
, m_InBuf(inBuf)
, m_OutBuf(outBuf)
, m_NextBlock(0)
, m_CompressionFunc(func)
, m_CompressionFuncWithStats(NULL)
, m_BlockStatManager(NULL)
{
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
#ifndef NDEBUG
if(m_InBufSz % 64) {
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?");
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n");
}
#endif
}
WorkerQueue::WorkerQueue(
uint32 numCompressions,
uint32 numThreads,
uint32 jobSize,
const uint8 *inBuf,
uint32 inBufSz,
CompressionFuncWithStats func,
BlockStatManager &blockStatManager,
uint8 *outBuf
)
: m_NumCompressions(0)
, m_TotalNumCompressions(std::max(uint32(1), numCompressions))
, m_NumThreads(numThreads)
, m_WaitingThreads(0)
, m_ActiveThreads(0)
, m_JobSize(std::max(uint32(1), jobSize))
, m_InBufSz(inBufSz)
, m_InBuf(inBuf)
, m_OutBuf(outBuf)
, m_NextBlock(0)
, m_CompressionFunc(NULL)
, m_CompressionFuncWithStats(func)
, m_BlockStatManager(&blockStatManager)
{
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
#ifndef NDEBUG
if(m_InBufSz % 64) {
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n");
}
#endif
}
@ -182,7 +215,7 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
// The number of blocks to process is either the job size
// or the number of blocks remaining.
int blocksProcessed = min(m_JobSize, totalBlocks - m_NextBlock);
int blocksProcessed = std::min(m_JobSize, totalBlocks - m_NextBlock);
m_NumBlocks[threadIdx] = blocksProcessed;
// Make sure the next block is updated.

View file

@ -44,6 +44,17 @@ class WorkerQueue {
uint8 *outBuf
);
WorkerQueue(
uint32 numCompressions,
uint32 numThreads,
uint32 jobSize,
const uint8 *inBuf,
uint32 inBufSz,
CompressionFuncWithStats func,
BlockStatManager &blockStatManager,
uint8 *outBuf
);
~WorkerQueue() { }
// Runs the workers
@ -80,6 +91,11 @@ class WorkerQueue {
const CompressionFunc m_CompressionFunc;
CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; }
BlockStatManager *m_BlockStatManager;
const CompressionFuncWithStats m_CompressionFuncWithStats;
CompressionFuncWithStats GetCompressionFuncWithStats() const { return m_CompressionFuncWithStats; }
BlockStatManager *GetBlockStatManager() const { return m_BlockStatManager; }
StopWatch m_StopWatch;
WorkerThread::EAction AcceptThreadData(uint32 threadIdx);