Make stats collection threadsafe.

This commit is contained in:
Pavel Krajcevski 2012-11-01 18:56:13 -04:00
parent 1abc54cf21
commit 790df0736f
10 changed files with 350 additions and 168 deletions

View file

@ -42,8 +42,6 @@ public:
explicit BC7CompressionMode(int mode, bool opaque = true) : m_IsOpaque(opaque), m_Attributes(&(kModeAttributes[mode])), m_RotateMode(0), m_IndexMode(0) { } explicit BC7CompressionMode(int mode, bool opaque = true) : m_IsOpaque(opaque), m_Attributes(&(kModeAttributes[mode])), m_RotateMode(0), m_IndexMode(0) { }
~BC7CompressionMode() { } ~BC7CompressionMode() { }
static int NumUses[8];
static void ResetNumUses() { memset(NumUses, 0, sizeof(NumUses)); }
double Compress(BitStream &stream, const int shapeIdx, const RGBACluster *clusters); double Compress(BitStream &stream, const int shapeIdx, const RGBACluster *clusters);
// This switch controls the quality of the simulated annealing optimizer. We will not make // This switch controls the quality of the simulated annealing optimizer. We will not make

View file

@ -286,7 +286,6 @@ const uint32 kBC7InterpolationValues[4][16][2] = {
}; };
int BC7CompressionMode::MaxAnnealingIterations = 50; // This is a setting. int BC7CompressionMode::MaxAnnealingIterations = 50; // This is a setting.
int BC7CompressionMode::NumUses[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
BC7CompressionMode::Attributes BC7CompressionMode::kModeAttributes[kNumModes] = { BC7CompressionMode::Attributes BC7CompressionMode::kModeAttributes[kNumModes] = {
{ 0, 4, 3, 3, 0, 4, 0, false, false, BC7CompressionMode::ePBitType_NotShared }, { 0, 4, 3, 3, 0, 4, 0, false, false, BC7CompressionMode::ePBitType_NotShared },
@ -1471,11 +1470,6 @@ namespace BC7C
stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31); stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31);
} }
static int gModeChosen = -1;
static int gBestMode = -1;
static double gModeEstimate[ BC7CompressionMode::kNumModes ];
static double gModeError[ BC7CompressionMode::kNumModes ];
static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]); static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]);
// Compress an image using BC7 compression. Use the inBuf parameter to point to an image in // Compress an image using BC7 compression. Use the inBuf parameter to point to an image in
@ -1485,7 +1479,6 @@ namespace BC7C
void CompressImageBC7(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height) void CompressImageBC7(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height)
{ {
uint32 block[16]; uint32 block[16];
BC7CompressionMode::ResetNumUses();
BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel()); BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel());
for(int j = 0; j < height; j += 4) for(int j = 0; j < height; j += 4)
@ -1494,7 +1487,6 @@ namespace BC7C
{ {
// ExtractBlock(inBuf + i * 4, width, block); // ExtractBlock(inBuf + i * 4, width, block);
CompressBC7Block((const uint32 *)inBuf, outBuf); CompressBC7Block((const uint32 *)inBuf, outBuf);
BC7CompressionMode::NumUses[gBestMode]++;
#ifndef NDEBUG #ifndef NDEBUG
uint8 *block = (uint8 *)outBuf; uint8 *block = (uint8 *)outBuf;
@ -1530,7 +1522,6 @@ namespace BC7C
BlockStatManager &statManager BlockStatManager &statManager
) { ) {
uint32 block[16]; uint32 block[16];
BC7CompressionMode::ResetNumUses();
BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel()); BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel());
for(int j = 0; j < height; j += 4) for(int j = 0; j < height; j += 4)
@ -1539,7 +1530,6 @@ namespace BC7C
{ {
// ExtractBlock(inBuf + i * 4, width, block); // ExtractBlock(inBuf + i * 4, width, block);
CompressBC7Block((const uint32 *)inBuf, outBuf, statManager); CompressBC7Block((const uint32 *)inBuf, outBuf, statManager);
BC7CompressionMode::NumUses[gBestMode]++;
#ifndef NDEBUG #ifndef NDEBUG
uint8 *block = (uint8 *)outBuf; uint8 *block = (uint8 *)outBuf;
@ -1574,16 +1564,25 @@ namespace BC7C
} }
} }
static double CompressTwoClusters(int shapeIdx, const RGBACluster *clusters, uint8 *outBuf, bool opaque) { static double CompressTwoClusters(
int shapeIdx,
const RGBACluster *clusters,
uint8 *outBuf,
bool opaque,
double *errors = NULL,
int *modeChosen = NULL
) {
uint8 tempBuf1[16]; uint8 tempBuf1[16];
BitStream tmpStream1(tempBuf1, 128, 0); BitStream tmpStream1(tempBuf1, 128, 0);
BC7CompressionMode compressor1(1, opaque); BC7CompressionMode compressor1(1, opaque);
double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters); double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters);
gModeError[1] = bestError;
if(errors) errors[1] = bestError;
if(modeChosen) *modeChosen = 1;
memcpy(outBuf, tempBuf1, 16); memcpy(outBuf, tempBuf1, 16);
gModeChosen = 1;
if(bestError == 0.0) { if(bestError == 0.0) {
return 0.0; return 0.0;
} }
@ -1593,9 +1592,9 @@ namespace BC7C
BC7CompressionMode compressor3(3, opaque); BC7CompressionMode compressor3(3, opaque);
double error = compressor3.Compress(tmpStream3, shapeIdx, clusters); double error = compressor3.Compress(tmpStream3, shapeIdx, clusters);
gModeError[3] = error; if(errors) errors[3] = error;
if(error < bestError) { if(error < bestError) {
gModeChosen = 3; if(modeChosen) *modeChosen = 3;
bestError = error; bestError = error;
memcpy(outBuf, tempBuf3, 16); memcpy(outBuf, tempBuf3, 16);
if(bestError == 0.0) { if(bestError == 0.0) {
@ -1610,10 +1609,10 @@ namespace BC7C
BitStream tmpStream7(tempBuf7, 128, 0); BitStream tmpStream7(tempBuf7, 128, 0);
BC7CompressionMode compressor7(7, opaque); BC7CompressionMode compressor7(7, opaque);
error = compressor7.Compress(tmpStream7, shapeIdx, clusters); error = compressor7.Compress(tmpStream7, shapeIdx, clusters);
gModeError[7] = error; if(errors) errors[7] = error;
if(error < bestError) { if(error < bestError) {
gModeChosen = 7; if(modeChosen) *modeChosen = 7;
memcpy(outBuf, tempBuf7, 16); memcpy(outBuf, tempBuf7, 16);
return error; return error;
} }
} }
@ -1621,8 +1620,14 @@ namespace BC7C
return bestError; return bestError;
} }
static double CompressThreeClusters(int shapeIdx, const RGBACluster *clusters, uint8 *outBuf, bool opaque) { static double CompressThreeClusters(
int shapeIdx,
const RGBACluster *clusters,
uint8 *outBuf,
bool opaque,
double *errors = NULL,
int *modeChosen = NULL
) {
uint8 tempBuf0[16]; uint8 tempBuf0[16];
BitStream tmpStream0(tempBuf0, 128, 0); BitStream tmpStream0(tempBuf0, 128, 0);
@ -1635,22 +1640,23 @@ namespace BC7C
double error, bestError; double error, bestError;
if(shapeIdx < 16) { if(shapeIdx < 16) {
bestError = compressor0.Compress(tmpStream0, shapeIdx, clusters); bestError = compressor0.Compress(tmpStream0, shapeIdx, clusters);
gModeError[0] = bestError; if(errors) errors[0] = bestError;
} }
else { else {
bestError = DBL_MAX; bestError = DBL_MAX;
gModeError[0] = -1.0; if(errors) errors[0] = -1.0;
} }
gModeChosen = 0;
if(modeChosen) *modeChosen = 0;
memcpy(outBuf, tempBuf0, 16); memcpy(outBuf, tempBuf0, 16);
if(bestError == 0.0) { if(bestError == 0.0) {
return 0.0; return 0.0;
} }
error = compressor2.Compress(tmpStream2, shapeIdx, clusters); error = compressor2.Compress(tmpStream2, shapeIdx, clusters);
gModeError[2] = error; if(errors) errors[2] = error;
if(error < bestError) { if(error < bestError) {
gModeChosen = 2; if(modeChosen) *modeChosen = 2;
memcpy(outBuf, tempBuf2, 16); memcpy(outBuf, tempBuf2, 16);
return error; return error;
} }
@ -1973,11 +1979,12 @@ namespace BC7C
return error; return error;
} }
static void UpdateErrorEstimate(uint32 mode, double est) { static void UpdateErrorEstimate(double *estimates, uint32 mode, double est) {
assert(estimates);
assert(mode >= 0); assert(mode >= 0);
assert(mode < BC7CompressionMode::kNumModes); assert(mode < BC7CompressionMode::kNumModes);
if(gModeEstimate[mode] == -1.0 || est < gModeEstimate[mode]) { if(estimates[mode] == -1.0 || est < estimates[mode]) {
gModeEstimate[mode] = est; estimates[mode] = est;
} }
} }
@ -1988,43 +1995,62 @@ namespace BC7C
private: private:
uint32 m_BlockIdx; uint32 m_BlockIdx;
BlockStatManager &m_BSM; BlockStatManager &m_BSM;
int *m_ModePtr;
double *m_Estimates;
double *m_Errors;
public: public:
RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) : m_BlockIdx(blockIdx), m_BSM(m) { } RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) : m_BlockIdx(blockIdx), m_BSM(m)
, m_ModePtr(NULL), m_Estimates(NULL), m_Errors(NULL) { }
void SetMode(int *modePtr) { m_ModePtr = modePtr; }
void SetEstimates(double *estimates) { m_Estimates = estimates; }
void SetErrors(double *errors) { m_Errors = errors; }
~RAIIStatSaver() { ~RAIIStatSaver() {
BlockStat s (kBlockStatString[eBlockStat_Mode], gBestMode); assert(m_ModePtr);
assert(m_Estimates);
assert(m_Errors);
BlockStat s (kBlockStatString[eBlockStat_Mode], *m_ModePtr);
m_BSM.AddStat(m_BlockIdx, s); m_BSM.AddStat(m_BlockIdx, s);
for(int i = 0; i < BC7CompressionMode::kNumModes; i++) { for(int i = 0; i < BC7CompressionMode::kNumModes; i++) {
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroEstimate + i], gModeEstimate[i]); s = BlockStat(kBlockStatString[eBlockStat_ModeZeroEstimate + i], m_Estimates[i]);
m_BSM.AddStat(m_BlockIdx, s); m_BSM.AddStat(m_BlockIdx, s);
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroError + i], gModeError[i]); s = BlockStat(kBlockStatString[eBlockStat_ModeZeroError + i], m_Errors[i]);
m_BSM.AddStat(m_BlockIdx, s); m_BSM.AddStat(m_BlockIdx, s);
} }
} }
}; };
uint32 blockIdx = 0; int bestMode = 0;
double modeEstimate[BC7CompressionMode::kNumModes];
double modeError[BC7CompressionMode::kNumModes];
// reset global variables... // reset global variables...
gBestMode = 0; bestMode = 0;
for(int i = 0; i < BC7CompressionMode::kNumModes; i++){ for(int i = 0; i < BC7CompressionMode::kNumModes; i++){
gModeError[i] = gModeEstimate[i] = -1.0; modeError[i] = modeEstimate[i] = -1.0;
} }
blockIdx = statManager.BeginBlock(); uint32 blockIdx = statManager.BeginBlock();
for(int i = 0; i < kNumBlockStats; i++) { for(int i = 0; i < kNumBlockStats; i++) {
statManager.AddStat(blockIdx, BlockStat(kBlockStatString[i], 0)); statManager.AddStat(blockIdx, BlockStat(kBlockStatString[i], 0));
} }
RAIIStatSaver __statsaver__(blockIdx, statManager); RAIIStatSaver __statsaver__(blockIdx, statManager);
__statsaver__.SetMode(&bestMode);
__statsaver__.SetEstimates(modeEstimate);
__statsaver__.SetErrors(modeError);
// All a single color? // All a single color?
if(AllOneColor(block)) { if(AllOneColor(block)) {
BitStream bStrm(outBuf, 128, 0); BitStream bStrm(outBuf, 128, 0);
CompressOptimalColorBC7(*block, bStrm); CompressOptimalColorBC7(*block, bStrm);
gBestMode = 5; bestMode = 5;
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 0); BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 0);
statManager.AddStat(blockIdx, s); statManager.AddStat(blockIdx, s);
@ -2050,7 +2076,7 @@ namespace BC7C
if(transparent) { if(transparent) {
BitStream bStrm(outBuf, 128, 0); BitStream bStrm(outBuf, 128, 0);
WriteTransparentBlock(bStrm); WriteTransparentBlock(bStrm);
gBestMode = 6; bestMode = 6;
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 1); BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 1);
statManager.AddStat(blockIdx, s); statManager.AddStat(blockIdx, s);
@ -2065,12 +2091,12 @@ namespace BC7C
blockCluster.GetBoundingBox(Min, Max); blockCluster.GetBoundingBox(Min, Max);
v = Max - Min; v = Max - Min;
if(v * v == 0) { if(v * v == 0) {
gModeEstimate[6] = 0.0; modeEstimate[6] = 0.0;
} }
else { else {
const float *w = GetErrorMetric(); const float *w = GetErrorMetric();
const double err = 0.0001 + blockCluster.QuantizedError(Min, Max, 4, 0xFEFEFEFE, RGBAVector(w[0], w[1], w[2], w[3])); const double err = 0.0001 + blockCluster.QuantizedError(Min, Max, 4, 0xFEFEFEFE, RGBAVector(w[0], w[1], w[2], w[3]));
UpdateErrorEstimate(6, err); UpdateErrorEstimate(modeEstimate, 6, err);
#ifdef USE_PCA_FOR_SHAPE_ESTIMATION #ifdef USE_PCA_FOR_SHAPE_ESTIMATION
double eigOne = blockCluster.GetPrincipalEigenvalue(); double eigOne = blockCluster.GetPrincipalEigenvalue();
@ -2124,11 +2150,11 @@ namespace BC7C
#endif #endif
if(errEstimate[0] != -1.0) { if(errEstimate[0] != -1.0) {
UpdateErrorEstimate(1, errEstimate[0]); UpdateErrorEstimate(modeEstimate, 1, errEstimate[0]);
} }
if(errEstimate[1] != -1.0) { if(errEstimate[1] != -1.0) {
UpdateErrorEstimate(3, errEstimate[1]); UpdateErrorEstimate(modeEstimate, 3, errEstimate[1]);
} }
if(err < bestError[0]) { if(err < bestError[0]) {
@ -2138,8 +2164,9 @@ namespace BC7C
// If it's small, we'll take it! // If it's small, we'll take it!
if(err < 1e-9) { if(err < 1e-9) {
CompressTwoClusters(i, clusters, outBuf, opaque); int modeChosen;
gBestMode = gModeChosen; CompressTwoClusters(i, clusters, outBuf, opaque, modeError, &modeChosen);
bestMode = modeChosen;
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2); BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2);
statManager.AddStat(blockIdx, s); statManager.AddStat(blockIdx, s);
@ -2185,11 +2212,11 @@ namespace BC7C
#endif #endif
if(errEstimate[0] != -1.0) { if(errEstimate[0] != -1.0) {
UpdateErrorEstimate(0, errEstimate[0]); UpdateErrorEstimate(modeEstimate, 0, errEstimate[0]);
} }
if(errEstimate[1] != -1.0) { if(errEstimate[1] != -1.0) {
UpdateErrorEstimate(2, errEstimate[1]); UpdateErrorEstimate(modeEstimate, 2, errEstimate[1]);
} }
if(err < bestError[1]) { if(err < bestError[1]) {
@ -2199,8 +2226,9 @@ namespace BC7C
// If it's small, we'll take it! // If it's small, we'll take it!
if(err < 1e-9) { if(err < 1e-9) {
CompressThreeClusters(i, clusters, outBuf, opaque); int modeChosen;
gBestMode = gModeChosen; CompressThreeClusters(i, clusters, outBuf, opaque, modeError, &modeChosen);
bestMode = modeChosen;
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2); BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2);
statManager.AddStat(blockIdx, s); statManager.AddStat(blockIdx, s);
@ -2226,8 +2254,8 @@ namespace BC7C
BitStream tempStream1 (tempBuf1, 128, 0); BitStream tempStream1 (tempBuf1, 128, 0);
BC7CompressionMode compressor(6, opaque); BC7CompressionMode compressor(6, opaque);
double best = compressor.Compress(tempStream1, 0, &blockCluster); double best = compressor.Compress(tempStream1, 0, &blockCluster);
gModeError[6] = best; modeError[6] = best;
gBestMode = 6; bestMode = 6;
if(best == 0.0f) { if(best == 0.0f) {
memcpy(outBuf, tempBuf1, 16); memcpy(outBuf, tempBuf1, 16);
return; return;
@ -2243,7 +2271,7 @@ namespace BC7C
double error = compressorTry.Compress(tempStream2, 0, &blockCluster); double error = compressorTry.Compress(tempStream2, 0, &blockCluster);
if(error < best) { if(error < best) {
gBestMode = mode; bestMode = mode;
best = error; best = error;
if(best == 0.0f) { if(best == 0.0f) {
@ -2257,10 +2285,11 @@ namespace BC7C
} }
} }
double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque); int modeChosen;
double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque, modeError, &modeChosen);
if(error < best) { if(error < best) {
gBestMode = gModeChosen; bestMode = modeChosen;
best = error; best = error;
if(error == 0.0f) { if(error == 0.0f) {
@ -2273,9 +2302,9 @@ namespace BC7C
} }
if(opaque) { if(opaque) {
if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque) < best) { if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque, modeError, &modeChosen) < best) {
gBestMode = gModeChosen; bestMode = modeChosen;
memcpy(outBuf, tempBuf2, 16); memcpy(outBuf, tempBuf2, 16);
return; return;
} }

View file

@ -113,12 +113,6 @@ int main(int argc, char **argv) {
} while(knowArg && fileArg < argc); } while(knowArg && fileArg < argc);
if(numThreads > 1 && bSaveLog) {
bSaveLog = false;
fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n"
"If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n");
}
if(fileArg == argc) { if(fileArg == argc) {
PrintUsage(); PrintUsage();
exit(1); exit(1);

View file

@ -13,6 +13,8 @@ public:
BlockStat(const BlockStat &); BlockStat(const BlockStat &);
BlockStat &operator=(const BlockStat &); BlockStat &operator=(const BlockStat &);
void ToString(char *buf, int bufSz) const;
private: private:
const enum Type { const enum Type {

View file

@ -40,6 +40,22 @@ BlockStat &BlockStat::operator=(const BlockStat &other) {
memcpy(this, &other, sizeof(*this)); memcpy(this, &other, sizeof(*this));
} }
void BlockStat::ToString(char *buf, int bufSz) const {
switch(m_Type) {
case BlockStat::eType_Float:
snprintf(buf, bufSz, "%s,%f", m_StatName, m_FloatStat);
break;
case BlockStat::eType_Int:
snprintf(buf, bufSz, "%s,%llu", m_StatName, m_IntStat);
break;
default:
assert(!"Unknown stat type!");
break;
}
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// //
// BlockStat Manager Implementation // BlockStat Manager Implementation
@ -71,7 +87,7 @@ uint32 BlockStatManager::BeginBlock() {
return m_NextBlock-1; return m_NextBlock-1;
} }
TCLock lock(m_Mutex); TCLock lock (m_Mutex);
return m_NextBlock++; return m_NextBlock++;
} }
@ -82,9 +98,38 @@ void BlockStatManager::AddStat(uint32 blockIdx, const BlockStat &stat) {
return; return;
} }
TCLock lock (m_Mutex);
m_BlockStatList[blockIdx].AddStat(stat); m_BlockStatList[blockIdx].AddStat(stat);
} }
void BlockStatManager::ToFile(const CHAR *filename) {
FileStream fstr (filename, eFileMode_Write);
for(int i = 0; i < m_BlockStatListSz; i++) {
const BlockStatList *head = &(m_BlockStatList[i]);
while(head) {
BlockStat s = head->GetStat();
CHAR statStr[256];
s.ToString(statStr, 256);
CHAR str[256];
snprintf(str, 256, "%d,%s\n", i, statStr);
int strLen = strlen(str);
if(strLen > 255) {
str[255] = '\n';
strLen = 256;
}
fstr.Write((uint8 *)str, strLen);
head = head->GetTail();
}
}
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// //
// BlockStat List Implementation // BlockStat List Implementation
@ -111,6 +156,7 @@ BlockStatManager::BlockStatList::~BlockStatList() {
} }
void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) { void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) {
if(strncmp(stat.m_StatName, m_Stat.m_StatName, BlockStat::kStatNameSz) == 0) { if(strncmp(stat.m_StatName, m_Stat.m_StatName, BlockStat::kStatNameSz) == 0) {
m_Stat = stat; m_Stat = stat;
} }
@ -126,40 +172,3 @@ void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) {
} }
} }
} }
void BlockStatManager::ToFile(const CHAR *filename) {
FileStream fstr (filename, eFileMode_Write);
for(int i = 0; i < m_BlockStatListSz; i++) {
const BlockStatList *head = &(m_BlockStatList[i]);
while(head) {
BlockStat s = head->GetStat();
CHAR statStr[256];
switch(s.m_Type) {
case BlockStat::eType_Float:
snprintf(statStr, 256, "%d,%s,%f\n", i, s.m_StatName, s.m_FloatStat);
break;
case BlockStat::eType_Int:
snprintf(statStr, 256, "%d,%s,%llu\n", i, s.m_StatName, s.m_IntStat);
break;
default:
assert(false);
break;
}
int statStrLen = strlen(statStr);
if(statStrLen > 255) {
statStr[255] = '\n';
statStrLen = 256;
}
fstr.Write((uint8 *)statStr, statStrLen);
head = head->GetTail();
}
}
}

View file

@ -113,16 +113,7 @@ static double CompressImageInSerial(
return cmpTime; return cmpTime;
} }
static double CompressImageWithThreads( static double CompressThreadGroup(ThreadGroup &tgrp, const SCompressionSettings &settings) {
const unsigned char *imgData,
const unsigned int imgDataSz,
const SCompressionSettings &settings,
unsigned char *outBuf
) {
CompressionFunc f = ChooseFuncFromSettings(settings);
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, f, outBuf);
if(!(tgrp.PrepareThreads())) { if(!(tgrp.PrepareThreads())) {
assert(!"Thread group failed to prepare threads?!"); assert(!"Thread group failed to prepare threads?!");
return -1.0f; return -1.0f;
@ -140,7 +131,29 @@ static double CompressImageWithThreads(
cmpTimeTotal += tgrp.GetStopWatch().TimeInMilliseconds(); cmpTimeTotal += tgrp.GetStopWatch().TimeInMilliseconds();
} }
tgrp.CleanUpThreads(); tgrp.CleanUpThreads();
return cmpTimeTotal;
}
static double CompressImageWithThreads(
const unsigned char *imgData,
const unsigned int imgDataSz,
const SCompressionSettings &settings,
unsigned char *outBuf
) {
CompressionFunc f = ChooseFuncFromSettings(settings);
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
double cmpTimeTotal = 0.0;
if(fStats && settings.pStatManager) {
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, fStats, *(settings.pStatManager), outBuf);
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
}
else {
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, f, outBuf);
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
}
double cmpTime = cmpTimeTotal / double(settings.iNumCompressions); double cmpTime = cmpTimeTotal / double(settings.iNumCompressions);
return cmpTime; return cmpTime;
@ -153,21 +166,40 @@ static double CompressImageWithWorkerQueue(
unsigned char *outBuf unsigned char *outBuf
) { ) {
CompressionFunc f = ChooseFuncFromSettings(settings); CompressionFunc f = ChooseFuncFromSettings(settings);
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
WorkerQueue wq ( double cmpTimeTotal = 0.0;
settings.iNumCompressions, if(fStats && settings.pStatManager) {
settings.iNumThreads, WorkerQueue wq (
settings.iJobSize, settings.iNumCompressions,
imgData, settings.iNumThreads,
imgDataSz, settings.iJobSize,
f, imgData,
outBuf imgDataSz,
); fStats,
*(settings.pStatManager),
outBuf
);
wq.Run(); wq.Run();
cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds();
}
else {
WorkerQueue wq (
settings.iNumCompressions,
settings.iNumThreads,
settings.iJobSize,
imgData,
imgDataSz,
f,
outBuf
);
return wq.GetStopWatch().TimeInMilliseconds() / wq.Run();
double(settings.iNumCompressions); cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds();
}
return cmpTimeTotal / double(settings.iNumCompressions);
} }
bool CompressImageData( bool CompressImageData(

View file

@ -13,13 +13,15 @@ CmpThread::CmpThread()
, m_Width(0) , m_Width(0)
, m_Height(0) , m_Height(0)
, m_CmpFunc(NULL) , m_CmpFunc(NULL)
, m_CmpFuncWithStats(NULL)
, m_StatManager(NULL)
, m_OutBuf(NULL) , m_OutBuf(NULL)
, m_InBuf(NULL) , m_InBuf(NULL)
, m_ParentExitFlag(NULL) , m_ParentExitFlag(NULL)
{ } { }
void CmpThread::operator()() { void CmpThread::operator()() {
if(!m_CmpFunc || !m_OutBuf || !m_InBuf if(!m_OutBuf || !m_InBuf
|| !m_ParentCounter || !m_ParentCounterLock || !m_FinishCV || !m_ParentCounter || !m_ParentCounterLock || !m_FinishCV
|| !m_StartBarrier || !m_StartBarrier
|| !m_ParentExitFlag || !m_ParentExitFlag
@ -28,6 +30,11 @@ void CmpThread::operator()() {
return; return;
} }
if(!(m_CmpFunc || (m_CmpFuncWithStats && m_StatManager))) {
fprintf(stderr, "Incorrect thread function pointer.\n");
return;
}
while(1) { while(1) {
// Wait for signal to start work... // Wait for signal to start work...
m_StartBarrier->Wait(); m_StartBarrier->Wait();
@ -36,7 +43,10 @@ void CmpThread::operator()() {
return; return;
} }
(*m_CmpFunc)(m_InBuf, m_OutBuf, m_Width, m_Height); if(m_CmpFunc)
(*m_CmpFunc)(m_InBuf, m_OutBuf, m_Width, m_Height);
else
(*m_CmpFuncWithStats)(m_InBuf, m_OutBuf, m_Width, m_Height, *m_StatManager);
{ {
TCLock lock(*m_ParentCounterLock); TCLock lock(*m_ParentCounterLock);
@ -47,19 +57,37 @@ void CmpThread::operator()() {
} }
} }
ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned int inBufSz, CompressionFunc func, unsigned char *outBuf ) ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned int inBufSz, CompressionFunc func, unsigned char *outBuf )
: m_StartBarrier(new TCBarrier(numThreads + 1)) : m_StartBarrier(new TCBarrier(numThreads + 1))
, m_FinishMutex(new TCMutex()) , m_FinishMutex(new TCMutex())
, m_FinishCV(new TCConditionVariable()) , m_FinishCV(new TCConditionVariable())
, m_NumThreads(numThreads) , m_NumThreads(numThreads)
, m_ActiveThreads(0) , m_ActiveThreads(0)
, m_Func(func)
, m_ImageDataSz(inBufSz) , m_ImageDataSz(inBufSz)
, m_ImageData(inBuf) , m_ImageData(inBuf)
, m_OutBuf(outBuf) , m_OutBuf(outBuf)
, m_ThreadState(eThreadState_Done) , m_ThreadState(eThreadState_Done)
, m_ExitFlag(false) , m_ExitFlag(false)
, m_CompressedBlockSize(
(func == BC7C::CompressImageBC7
#ifdef HAS_SSE_41
|| func == BC7C::CompressImageBC7SIMD
#endif
)?
16
:
0
)
, m_UncompressedBlockSize(
(func == BC7C::CompressImageBC7
#ifdef HAS_SSE_41
|| func == BC7C::CompressImageBC7SIMD
#endif
)?
64
:
0
)
{ {
for(int i = 0; i < kMaxNumThreads; i++) { for(int i = 0; i < kMaxNumThreads; i++) {
// Thread synchronization primitives // Thread synchronization primitives
@ -68,6 +96,50 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
m_Threads[i].m_ParentCounter = &m_ThreadsFinished; m_Threads[i].m_ParentCounter = &m_ThreadsFinished;
m_Threads[i].m_StartBarrier = m_StartBarrier; m_Threads[i].m_StartBarrier = m_StartBarrier;
m_Threads[i].m_ParentExitFlag = &m_ExitFlag; m_Threads[i].m_ParentExitFlag = &m_ExitFlag;
m_Threads[i].m_CmpFunc = func;
}
}
ThreadGroup::ThreadGroup(
int numThreads,
const unsigned char *inBuf,
unsigned int inBufSz,
CompressionFuncWithStats func,
BlockStatManager &statManager,
unsigned char *outBuf
)
: m_StartBarrier(new TCBarrier(numThreads + 1))
, m_FinishMutex(new TCMutex())
, m_FinishCV(new TCConditionVariable())
, m_NumThreads(numThreads)
, m_ActiveThreads(0)
, m_ImageDataSz(inBufSz)
, m_ImageData(inBuf)
, m_OutBuf(outBuf)
, m_ThreadState(eThreadState_Done)
, m_ExitFlag(false)
, m_CompressedBlockSize(
(func == BC7C::CompressImageBC7Stats)?
16
:
0
)
, m_UncompressedBlockSize(
(func == BC7C::CompressImageBC7Stats)?
64
:
0
)
{
for(int i = 0; i < kMaxNumThreads; i++) {
// Thread synchronization primitives
m_Threads[i].m_ParentCounterLock = m_FinishMutex;
m_Threads[i].m_FinishCV = m_FinishCV;
m_Threads[i].m_ParentCounter = &m_ThreadsFinished;
m_Threads[i].m_StartBarrier = m_StartBarrier;
m_Threads[i].m_ParentExitFlag = &m_ExitFlag;
m_Threads[i].m_CmpFuncWithStats = func;
m_Threads[i].m_StatManager = &statManager;
} }
} }
@ -77,20 +149,6 @@ ThreadGroup::~ThreadGroup() {
delete m_FinishCV; delete m_FinishCV;
} }
unsigned int ThreadGroup::GetCompressedBlockSize() {
if(m_Func == BC7C::CompressImageBC7) return 16;
#ifdef HAS_SSE_41
if(m_Func == BC7C::CompressImageBC7SIMD) return 16;
#endif
}
unsigned int ThreadGroup::GetUncompressedBlockSize() {
if(m_Func == BC7C::CompressImageBC7) return 64;
#ifdef HAS_SSE_41
if(m_Func == BC7C::CompressImageBC7SIMD) return 64;
#endif
}
bool ThreadGroup::PrepareThreads() { bool ThreadGroup::PrepareThreads() {
// Make sure that threads aren't running. // Make sure that threads aren't running.
@ -126,9 +184,8 @@ bool ThreadGroup::PrepareThreads() {
CmpThread &t = m_Threads[m_ActiveThreads]; CmpThread &t = m_Threads[m_ActiveThreads];
t.m_Height = 4; t.m_Height = 4;
t.m_Width = numBlocksThisThread * 4; t.m_Width = numBlocksThisThread * 4;
t.m_CmpFunc = m_Func; t.m_OutBuf = m_OutBuf + (blocksProcessed * m_CompressedBlockSize);
t.m_OutBuf = m_OutBuf + (blocksProcessed * GetCompressedBlockSize()); t.m_InBuf = m_ImageData + (blocksProcessed * m_UncompressedBlockSize);
t.m_InBuf = m_ImageData + (blocksProcessed * GetUncompressedBlockSize());
blocksProcessed += numBlocksThisThread; blocksProcessed += numBlocksThisThread;

View file

@ -21,6 +21,9 @@ private:
CompressionFunc m_CmpFunc; CompressionFunc m_CmpFunc;
CompressionFuncWithStats m_CmpFuncWithStats;
BlockStatManager *m_StatManager;
unsigned char *m_OutBuf; unsigned char *m_OutBuf;
const unsigned char *m_InBuf; const unsigned char *m_InBuf;
@ -29,10 +32,10 @@ private:
CmpThread(); CmpThread();
public: public:
void operator ()(); virtual ~CmpThread() { }
virtual void operator ()();
}; };
class ThreadGroup { class ThreadGroup {
public: public:
ThreadGroup( ThreadGroup(
@ -42,6 +45,16 @@ class ThreadGroup {
CompressionFunc func, CompressionFunc func,
unsigned char *outBuf unsigned char *outBuf
); );
ThreadGroup(
int numThreads,
const unsigned char *inBuf,
unsigned int inBufSz,
CompressionFuncWithStats func,
BlockStatManager &statManager,
unsigned char *outBuf
);
~ThreadGroup(); ~ThreadGroup();
bool PrepareThreads(); bool PrepareThreads();
@ -75,11 +88,10 @@ class ThreadGroup {
// State variables. // State variables.
const unsigned int m_ImageDataSz; const unsigned int m_ImageDataSz;
const unsigned char *const m_ImageData; const unsigned char *const m_ImageData;
const CompressionFunc m_Func;
unsigned char *m_OutBuf; unsigned char *m_OutBuf;
unsigned int GetCompressedBlockSize(); const unsigned int m_CompressedBlockSize;
unsigned int GetUncompressedBlockSize(); const unsigned int m_UncompressedBlockSize;
StopWatch m_StopWatch; StopWatch m_StopWatch;

View file

@ -1,20 +1,11 @@
#include "WorkerQueue.h" #include "WorkerQueue.h"
#include "BC7Compressor.h"
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
#include <algorithm>
template <typename T> #include "BC7Compressor.h"
static inline T max(const T &a, const T &b) {
return (a > b)? a : b;
}
template <typename T>
static inline T min(const T &a, const T &b) {
return (a < b)? a : b;
}
template <typename T> template <typename T>
static inline void clamp(T &x, const T &min, const T &max) { static inline void clamp(T &x, const T &min, const T &max) {
@ -36,7 +27,10 @@ void WorkerThread::operator()() {
} }
CompressionFunc f = m_Parent->GetCompressionFunc(); CompressionFunc f = m_Parent->GetCompressionFunc();
if(!f) { CompressionFuncWithStats fStat = m_Parent->GetCompressionFuncWithStats();
BlockStatManager *statManager = m_Parent->GetBlockStatManager();
if(!(f || (fStat && statManager))) {
fprintf(stderr, "%s\n", "Illegal worker queue initialization -- compression func is NULL."); fprintf(stderr, "%s\n", "Illegal worker queue initialization -- compression func is NULL.");
return; return;
} }
@ -63,7 +57,11 @@ void WorkerThread::operator()() {
{ {
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx); const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx); uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
(*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4); if(f)
(*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
else
(*fStat)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4, *statManager);
break; break;
} }
@ -91,22 +89,57 @@ WorkerQueue::WorkerQueue(
uint8 *outBuf uint8 *outBuf
) )
: m_NumCompressions(0) : m_NumCompressions(0)
, m_TotalNumCompressions(max(uint32(1), numCompressions)) , m_TotalNumCompressions(std::max(uint32(1), numCompressions))
, m_NumThreads(numThreads) , m_NumThreads(numThreads)
, m_WaitingThreads(0) , m_WaitingThreads(0)
, m_ActiveThreads(0) , m_ActiveThreads(0)
, m_JobSize(max(uint32(1), jobSize)) , m_JobSize(std::max(uint32(1), jobSize))
, m_InBufSz(inBufSz) , m_InBufSz(inBufSz)
, m_InBuf(inBuf) , m_InBuf(inBuf)
, m_OutBuf(outBuf) , m_OutBuf(outBuf)
, m_NextBlock(0) , m_NextBlock(0)
, m_CompressionFunc(func) , m_CompressionFunc(func)
, m_CompressionFuncWithStats(NULL)
, m_BlockStatManager(NULL)
{ {
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads)); clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
#ifndef NDEBUG #ifndef NDEBUG
if(m_InBufSz % 64) { if(m_InBufSz % 64) {
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?"); fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n");
}
#endif
}
WorkerQueue::WorkerQueue(
uint32 numCompressions,
uint32 numThreads,
uint32 jobSize,
const uint8 *inBuf,
uint32 inBufSz,
CompressionFuncWithStats func,
BlockStatManager &blockStatManager,
uint8 *outBuf
)
: m_NumCompressions(0)
, m_TotalNumCompressions(std::max(uint32(1), numCompressions))
, m_NumThreads(numThreads)
, m_WaitingThreads(0)
, m_ActiveThreads(0)
, m_JobSize(std::max(uint32(1), jobSize))
, m_InBufSz(inBufSz)
, m_InBuf(inBuf)
, m_OutBuf(outBuf)
, m_NextBlock(0)
, m_CompressionFunc(NULL)
, m_CompressionFuncWithStats(func)
, m_BlockStatManager(&blockStatManager)
{
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
#ifndef NDEBUG
if(m_InBufSz % 64) {
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n");
} }
#endif #endif
} }
@ -182,7 +215,7 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
// The number of blocks to process is either the job size // The number of blocks to process is either the job size
// or the number of blocks remaining. // or the number of blocks remaining.
int blocksProcessed = min(m_JobSize, totalBlocks - m_NextBlock); int blocksProcessed = std::min(m_JobSize, totalBlocks - m_NextBlock);
m_NumBlocks[threadIdx] = blocksProcessed; m_NumBlocks[threadIdx] = blocksProcessed;
// Make sure the next block is updated. // Make sure the next block is updated.

View file

@ -44,6 +44,17 @@ class WorkerQueue {
uint8 *outBuf uint8 *outBuf
); );
WorkerQueue(
uint32 numCompressions,
uint32 numThreads,
uint32 jobSize,
const uint8 *inBuf,
uint32 inBufSz,
CompressionFuncWithStats func,
BlockStatManager &blockStatManager,
uint8 *outBuf
);
~WorkerQueue() { } ~WorkerQueue() { }
// Runs the workers // Runs the workers
@ -80,6 +91,11 @@ class WorkerQueue {
const CompressionFunc m_CompressionFunc; const CompressionFunc m_CompressionFunc;
CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; } CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; }
BlockStatManager *m_BlockStatManager;
const CompressionFuncWithStats m_CompressionFuncWithStats;
CompressionFuncWithStats GetCompressionFuncWithStats() const { return m_CompressionFuncWithStats; }
BlockStatManager *GetBlockStatManager() const { return m_BlockStatManager; }
StopWatch m_StopWatch; StopWatch m_StopWatch;
WorkerThread::EAction AcceptThreadData(uint32 threadIdx); WorkerThread::EAction AcceptThreadData(uint32 threadIdx);