mirror of
https://github.com/yuzu-emu/FasTC.git
synced 2025-01-08 05:45:27 +00:00
Make stats collection threadsafe.
This commit is contained in:
parent
1abc54cf21
commit
790df0736f
|
@ -42,8 +42,6 @@ public:
|
|||
explicit BC7CompressionMode(int mode, bool opaque = true) : m_IsOpaque(opaque), m_Attributes(&(kModeAttributes[mode])), m_RotateMode(0), m_IndexMode(0) { }
|
||||
~BC7CompressionMode() { }
|
||||
|
||||
static int NumUses[8];
|
||||
static void ResetNumUses() { memset(NumUses, 0, sizeof(NumUses)); }
|
||||
double Compress(BitStream &stream, const int shapeIdx, const RGBACluster *clusters);
|
||||
|
||||
// This switch controls the quality of the simulated annealing optimizer. We will not make
|
||||
|
|
|
@ -286,7 +286,6 @@ const uint32 kBC7InterpolationValues[4][16][2] = {
|
|||
};
|
||||
|
||||
int BC7CompressionMode::MaxAnnealingIterations = 50; // This is a setting.
|
||||
int BC7CompressionMode::NumUses[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
BC7CompressionMode::Attributes BC7CompressionMode::kModeAttributes[kNumModes] = {
|
||||
{ 0, 4, 3, 3, 0, 4, 0, false, false, BC7CompressionMode::ePBitType_NotShared },
|
||||
|
@ -1471,11 +1470,6 @@ namespace BC7C
|
|||
stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31);
|
||||
}
|
||||
|
||||
static int gModeChosen = -1;
|
||||
static int gBestMode = -1;
|
||||
static double gModeEstimate[ BC7CompressionMode::kNumModes ];
|
||||
static double gModeError[ BC7CompressionMode::kNumModes ];
|
||||
|
||||
static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]);
|
||||
|
||||
// Compress an image using BC7 compression. Use the inBuf parameter to point to an image in
|
||||
|
@ -1485,7 +1479,6 @@ namespace BC7C
|
|||
void CompressImageBC7(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height)
|
||||
{
|
||||
uint32 block[16];
|
||||
BC7CompressionMode::ResetNumUses();
|
||||
BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel());
|
||||
|
||||
for(int j = 0; j < height; j += 4)
|
||||
|
@ -1494,7 +1487,6 @@ namespace BC7C
|
|||
{
|
||||
// ExtractBlock(inBuf + i * 4, width, block);
|
||||
CompressBC7Block((const uint32 *)inBuf, outBuf);
|
||||
BC7CompressionMode::NumUses[gBestMode]++;
|
||||
|
||||
#ifndef NDEBUG
|
||||
uint8 *block = (uint8 *)outBuf;
|
||||
|
@ -1530,7 +1522,6 @@ namespace BC7C
|
|||
BlockStatManager &statManager
|
||||
) {
|
||||
uint32 block[16];
|
||||
BC7CompressionMode::ResetNumUses();
|
||||
BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel());
|
||||
|
||||
for(int j = 0; j < height; j += 4)
|
||||
|
@ -1539,7 +1530,6 @@ namespace BC7C
|
|||
{
|
||||
// ExtractBlock(inBuf + i * 4, width, block);
|
||||
CompressBC7Block((const uint32 *)inBuf, outBuf, statManager);
|
||||
BC7CompressionMode::NumUses[gBestMode]++;
|
||||
|
||||
#ifndef NDEBUG
|
||||
uint8 *block = (uint8 *)outBuf;
|
||||
|
@ -1574,16 +1564,25 @@ namespace BC7C
|
|||
}
|
||||
}
|
||||
|
||||
static double CompressTwoClusters(int shapeIdx, const RGBACluster *clusters, uint8 *outBuf, bool opaque) {
|
||||
static double CompressTwoClusters(
|
||||
int shapeIdx,
|
||||
const RGBACluster *clusters,
|
||||
uint8 *outBuf,
|
||||
bool opaque,
|
||||
double *errors = NULL,
|
||||
int *modeChosen = NULL
|
||||
) {
|
||||
|
||||
uint8 tempBuf1[16];
|
||||
BitStream tmpStream1(tempBuf1, 128, 0);
|
||||
BC7CompressionMode compressor1(1, opaque);
|
||||
|
||||
double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters);
|
||||
gModeError[1] = bestError;
|
||||
|
||||
if(errors) errors[1] = bestError;
|
||||
if(modeChosen) *modeChosen = 1;
|
||||
|
||||
memcpy(outBuf, tempBuf1, 16);
|
||||
gModeChosen = 1;
|
||||
if(bestError == 0.0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
@ -1593,9 +1592,9 @@ namespace BC7C
|
|||
BC7CompressionMode compressor3(3, opaque);
|
||||
|
||||
double error = compressor3.Compress(tmpStream3, shapeIdx, clusters);
|
||||
gModeError[3] = error;
|
||||
if(errors) errors[3] = error;
|
||||
if(error < bestError) {
|
||||
gModeChosen = 3;
|
||||
if(modeChosen) *modeChosen = 3;
|
||||
bestError = error;
|
||||
memcpy(outBuf, tempBuf3, 16);
|
||||
if(bestError == 0.0) {
|
||||
|
@ -1610,10 +1609,10 @@ namespace BC7C
|
|||
BitStream tmpStream7(tempBuf7, 128, 0);
|
||||
BC7CompressionMode compressor7(7, opaque);
|
||||
error = compressor7.Compress(tmpStream7, shapeIdx, clusters);
|
||||
gModeError[7] = error;
|
||||
if(errors) errors[7] = error;
|
||||
if(error < bestError) {
|
||||
gModeChosen = 7;
|
||||
memcpy(outBuf, tempBuf7, 16);
|
||||
if(modeChosen) *modeChosen = 7;
|
||||
memcpy(outBuf, tempBuf7, 16);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
@ -1621,8 +1620,14 @@ namespace BC7C
|
|||
return bestError;
|
||||
}
|
||||
|
||||
static double CompressThreeClusters(int shapeIdx, const RGBACluster *clusters, uint8 *outBuf, bool opaque) {
|
||||
|
||||
static double CompressThreeClusters(
|
||||
int shapeIdx,
|
||||
const RGBACluster *clusters,
|
||||
uint8 *outBuf,
|
||||
bool opaque,
|
||||
double *errors = NULL,
|
||||
int *modeChosen = NULL
|
||||
) {
|
||||
uint8 tempBuf0[16];
|
||||
BitStream tmpStream0(tempBuf0, 128, 0);
|
||||
|
||||
|
@ -1635,22 +1640,23 @@ namespace BC7C
|
|||
double error, bestError;
|
||||
if(shapeIdx < 16) {
|
||||
bestError = compressor0.Compress(tmpStream0, shapeIdx, clusters);
|
||||
gModeError[0] = bestError;
|
||||
if(errors) errors[0] = bestError;
|
||||
}
|
||||
else {
|
||||
bestError = DBL_MAX;
|
||||
gModeError[0] = -1.0;
|
||||
if(errors) errors[0] = -1.0;
|
||||
}
|
||||
gModeChosen = 0;
|
||||
|
||||
if(modeChosen) *modeChosen = 0;
|
||||
memcpy(outBuf, tempBuf0, 16);
|
||||
if(bestError == 0.0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
error = compressor2.Compress(tmpStream2, shapeIdx, clusters);
|
||||
gModeError[2] = error;
|
||||
if(errors) errors[2] = error;
|
||||
if(error < bestError) {
|
||||
gModeChosen = 2;
|
||||
if(modeChosen) *modeChosen = 2;
|
||||
memcpy(outBuf, tempBuf2, 16);
|
||||
return error;
|
||||
}
|
||||
|
@ -1973,11 +1979,12 @@ namespace BC7C
|
|||
return error;
|
||||
}
|
||||
|
||||
static void UpdateErrorEstimate(uint32 mode, double est) {
|
||||
static void UpdateErrorEstimate(double *estimates, uint32 mode, double est) {
|
||||
assert(estimates);
|
||||
assert(mode >= 0);
|
||||
assert(mode < BC7CompressionMode::kNumModes);
|
||||
if(gModeEstimate[mode] == -1.0 || est < gModeEstimate[mode]) {
|
||||
gModeEstimate[mode] = est;
|
||||
if(estimates[mode] == -1.0 || est < estimates[mode]) {
|
||||
estimates[mode] = est;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1988,43 +1995,62 @@ namespace BC7C
|
|||
private:
|
||||
uint32 m_BlockIdx;
|
||||
BlockStatManager &m_BSM;
|
||||
|
||||
int *m_ModePtr;
|
||||
double *m_Estimates;
|
||||
double *m_Errors;
|
||||
|
||||
public:
|
||||
RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) : m_BlockIdx(blockIdx), m_BSM(m) { }
|
||||
RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) : m_BlockIdx(blockIdx), m_BSM(m)
|
||||
, m_ModePtr(NULL), m_Estimates(NULL), m_Errors(NULL) { }
|
||||
void SetMode(int *modePtr) { m_ModePtr = modePtr; }
|
||||
void SetEstimates(double *estimates) { m_Estimates = estimates; }
|
||||
void SetErrors(double *errors) { m_Errors = errors; }
|
||||
|
||||
~RAIIStatSaver() {
|
||||
|
||||
BlockStat s (kBlockStatString[eBlockStat_Mode], gBestMode);
|
||||
assert(m_ModePtr);
|
||||
assert(m_Estimates);
|
||||
assert(m_Errors);
|
||||
|
||||
BlockStat s (kBlockStatString[eBlockStat_Mode], *m_ModePtr);
|
||||
m_BSM.AddStat(m_BlockIdx, s);
|
||||
|
||||
for(int i = 0; i < BC7CompressionMode::kNumModes; i++) {
|
||||
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroEstimate + i], gModeEstimate[i]);
|
||||
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroEstimate + i], m_Estimates[i]);
|
||||
m_BSM.AddStat(m_BlockIdx, s);
|
||||
|
||||
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroError + i], gModeError[i]);
|
||||
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroError + i], m_Errors[i]);
|
||||
m_BSM.AddStat(m_BlockIdx, s);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
uint32 blockIdx = 0;
|
||||
int bestMode = 0;
|
||||
double modeEstimate[BC7CompressionMode::kNumModes];
|
||||
double modeError[BC7CompressionMode::kNumModes];
|
||||
|
||||
// reset global variables...
|
||||
gBestMode = 0;
|
||||
bestMode = 0;
|
||||
for(int i = 0; i < BC7CompressionMode::kNumModes; i++){
|
||||
gModeError[i] = gModeEstimate[i] = -1.0;
|
||||
modeError[i] = modeEstimate[i] = -1.0;
|
||||
}
|
||||
|
||||
blockIdx = statManager.BeginBlock();
|
||||
|
||||
uint32 blockIdx = statManager.BeginBlock();
|
||||
for(int i = 0; i < kNumBlockStats; i++) {
|
||||
statManager.AddStat(blockIdx, BlockStat(kBlockStatString[i], 0));
|
||||
}
|
||||
|
||||
RAIIStatSaver __statsaver__(blockIdx, statManager);
|
||||
__statsaver__.SetMode(&bestMode);
|
||||
__statsaver__.SetEstimates(modeEstimate);
|
||||
__statsaver__.SetErrors(modeError);
|
||||
|
||||
// All a single color?
|
||||
if(AllOneColor(block)) {
|
||||
BitStream bStrm(outBuf, 128, 0);
|
||||
CompressOptimalColorBC7(*block, bStrm);
|
||||
gBestMode = 5;
|
||||
bestMode = 5;
|
||||
|
||||
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 0);
|
||||
statManager.AddStat(blockIdx, s);
|
||||
|
@ -2050,7 +2076,7 @@ namespace BC7C
|
|||
if(transparent) {
|
||||
BitStream bStrm(outBuf, 128, 0);
|
||||
WriteTransparentBlock(bStrm);
|
||||
gBestMode = 6;
|
||||
bestMode = 6;
|
||||
|
||||
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 1);
|
||||
statManager.AddStat(blockIdx, s);
|
||||
|
@ -2065,12 +2091,12 @@ namespace BC7C
|
|||
blockCluster.GetBoundingBox(Min, Max);
|
||||
v = Max - Min;
|
||||
if(v * v == 0) {
|
||||
gModeEstimate[6] = 0.0;
|
||||
modeEstimate[6] = 0.0;
|
||||
}
|
||||
else {
|
||||
const float *w = GetErrorMetric();
|
||||
const double err = 0.0001 + blockCluster.QuantizedError(Min, Max, 4, 0xFEFEFEFE, RGBAVector(w[0], w[1], w[2], w[3]));
|
||||
UpdateErrorEstimate(6, err);
|
||||
UpdateErrorEstimate(modeEstimate, 6, err);
|
||||
|
||||
#ifdef USE_PCA_FOR_SHAPE_ESTIMATION
|
||||
double eigOne = blockCluster.GetPrincipalEigenvalue();
|
||||
|
@ -2124,11 +2150,11 @@ namespace BC7C
|
|||
#endif
|
||||
|
||||
if(errEstimate[0] != -1.0) {
|
||||
UpdateErrorEstimate(1, errEstimate[0]);
|
||||
UpdateErrorEstimate(modeEstimate, 1, errEstimate[0]);
|
||||
}
|
||||
|
||||
if(errEstimate[1] != -1.0) {
|
||||
UpdateErrorEstimate(3, errEstimate[1]);
|
||||
UpdateErrorEstimate(modeEstimate, 3, errEstimate[1]);
|
||||
}
|
||||
|
||||
if(err < bestError[0]) {
|
||||
|
@ -2138,8 +2164,9 @@ namespace BC7C
|
|||
|
||||
// If it's small, we'll take it!
|
||||
if(err < 1e-9) {
|
||||
CompressTwoClusters(i, clusters, outBuf, opaque);
|
||||
gBestMode = gModeChosen;
|
||||
int modeChosen;
|
||||
CompressTwoClusters(i, clusters, outBuf, opaque, modeError, &modeChosen);
|
||||
bestMode = modeChosen;
|
||||
|
||||
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2);
|
||||
statManager.AddStat(blockIdx, s);
|
||||
|
@ -2185,11 +2212,11 @@ namespace BC7C
|
|||
#endif
|
||||
|
||||
if(errEstimate[0] != -1.0) {
|
||||
UpdateErrorEstimate(0, errEstimate[0]);
|
||||
UpdateErrorEstimate(modeEstimate, 0, errEstimate[0]);
|
||||
}
|
||||
|
||||
if(errEstimate[1] != -1.0) {
|
||||
UpdateErrorEstimate(2, errEstimate[1]);
|
||||
UpdateErrorEstimate(modeEstimate, 2, errEstimate[1]);
|
||||
}
|
||||
|
||||
if(err < bestError[1]) {
|
||||
|
@ -2199,8 +2226,9 @@ namespace BC7C
|
|||
|
||||
// If it's small, we'll take it!
|
||||
if(err < 1e-9) {
|
||||
CompressThreeClusters(i, clusters, outBuf, opaque);
|
||||
gBestMode = gModeChosen;
|
||||
int modeChosen;
|
||||
CompressThreeClusters(i, clusters, outBuf, opaque, modeError, &modeChosen);
|
||||
bestMode = modeChosen;
|
||||
|
||||
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2);
|
||||
statManager.AddStat(blockIdx, s);
|
||||
|
@ -2226,8 +2254,8 @@ namespace BC7C
|
|||
BitStream tempStream1 (tempBuf1, 128, 0);
|
||||
BC7CompressionMode compressor(6, opaque);
|
||||
double best = compressor.Compress(tempStream1, 0, &blockCluster);
|
||||
gModeError[6] = best;
|
||||
gBestMode = 6;
|
||||
modeError[6] = best;
|
||||
bestMode = 6;
|
||||
if(best == 0.0f) {
|
||||
memcpy(outBuf, tempBuf1, 16);
|
||||
return;
|
||||
|
@ -2243,7 +2271,7 @@ namespace BC7C
|
|||
double error = compressorTry.Compress(tempStream2, 0, &blockCluster);
|
||||
if(error < best) {
|
||||
|
||||
gBestMode = mode;
|
||||
bestMode = mode;
|
||||
best = error;
|
||||
|
||||
if(best == 0.0f) {
|
||||
|
@ -2257,10 +2285,11 @@ namespace BC7C
|
|||
}
|
||||
}
|
||||
|
||||
double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque);
|
||||
int modeChosen;
|
||||
double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque, modeError, &modeChosen);
|
||||
if(error < best) {
|
||||
|
||||
gBestMode = gModeChosen;
|
||||
bestMode = modeChosen;
|
||||
best = error;
|
||||
|
||||
if(error == 0.0f) {
|
||||
|
@ -2273,9 +2302,9 @@ namespace BC7C
|
|||
}
|
||||
|
||||
if(opaque) {
|
||||
if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque) < best) {
|
||||
if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque, modeError, &modeChosen) < best) {
|
||||
|
||||
gBestMode = gModeChosen;
|
||||
bestMode = modeChosen;
|
||||
memcpy(outBuf, tempBuf2, 16);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -113,12 +113,6 @@ int main(int argc, char **argv) {
|
|||
|
||||
} while(knowArg && fileArg < argc);
|
||||
|
||||
if(numThreads > 1 && bSaveLog) {
|
||||
bSaveLog = false;
|
||||
fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n"
|
||||
"If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n");
|
||||
}
|
||||
|
||||
if(fileArg == argc) {
|
||||
PrintUsage();
|
||||
exit(1);
|
||||
|
|
|
@ -13,6 +13,8 @@ public:
|
|||
|
||||
BlockStat(const BlockStat &);
|
||||
BlockStat &operator=(const BlockStat &);
|
||||
|
||||
void ToString(char *buf, int bufSz) const;
|
||||
|
||||
private:
|
||||
const enum Type {
|
||||
|
|
|
@ -40,6 +40,22 @@ BlockStat &BlockStat::operator=(const BlockStat &other) {
|
|||
memcpy(this, &other, sizeof(*this));
|
||||
}
|
||||
|
||||
void BlockStat::ToString(char *buf, int bufSz) const {
|
||||
switch(m_Type) {
|
||||
case BlockStat::eType_Float:
|
||||
snprintf(buf, bufSz, "%s,%f", m_StatName, m_FloatStat);
|
||||
break;
|
||||
|
||||
case BlockStat::eType_Int:
|
||||
snprintf(buf, bufSz, "%s,%llu", m_StatName, m_IntStat);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(!"Unknown stat type!");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// BlockStat Manager Implementation
|
||||
|
@ -71,7 +87,7 @@ uint32 BlockStatManager::BeginBlock() {
|
|||
return m_NextBlock-1;
|
||||
}
|
||||
|
||||
TCLock lock(m_Mutex);
|
||||
TCLock lock (m_Mutex);
|
||||
return m_NextBlock++;
|
||||
}
|
||||
|
||||
|
@ -82,9 +98,38 @@ void BlockStatManager::AddStat(uint32 blockIdx, const BlockStat &stat) {
|
|||
return;
|
||||
}
|
||||
|
||||
TCLock lock (m_Mutex);
|
||||
m_BlockStatList[blockIdx].AddStat(stat);
|
||||
}
|
||||
|
||||
void BlockStatManager::ToFile(const CHAR *filename) {
|
||||
|
||||
FileStream fstr (filename, eFileMode_Write);
|
||||
|
||||
for(int i = 0; i < m_BlockStatListSz; i++) {
|
||||
const BlockStatList *head = &(m_BlockStatList[i]);
|
||||
while(head) {
|
||||
BlockStat s = head->GetStat();
|
||||
|
||||
CHAR statStr[256];
|
||||
s.ToString(statStr, 256);
|
||||
|
||||
CHAR str[256];
|
||||
snprintf(str, 256, "%d,%s\n", i, statStr);
|
||||
|
||||
int strLen = strlen(str);
|
||||
if(strLen > 255) {
|
||||
str[255] = '\n';
|
||||
strLen = 256;
|
||||
}
|
||||
|
||||
fstr.Write((uint8 *)str, strLen);
|
||||
|
||||
head = head->GetTail();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// BlockStat List Implementation
|
||||
|
@ -111,6 +156,7 @@ BlockStatManager::BlockStatList::~BlockStatList() {
|
|||
}
|
||||
|
||||
void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) {
|
||||
|
||||
if(strncmp(stat.m_StatName, m_Stat.m_StatName, BlockStat::kStatNameSz) == 0) {
|
||||
m_Stat = stat;
|
||||
}
|
||||
|
@ -126,40 +172,3 @@ void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BlockStatManager::ToFile(const CHAR *filename) {
|
||||
|
||||
FileStream fstr (filename, eFileMode_Write);
|
||||
|
||||
for(int i = 0; i < m_BlockStatListSz; i++) {
|
||||
const BlockStatList *head = &(m_BlockStatList[i]);
|
||||
while(head) {
|
||||
BlockStat s = head->GetStat();
|
||||
|
||||
CHAR statStr[256];
|
||||
switch(s.m_Type) {
|
||||
case BlockStat::eType_Float:
|
||||
snprintf(statStr, 256, "%d,%s,%f\n", i, s.m_StatName, s.m_FloatStat);
|
||||
break;
|
||||
|
||||
case BlockStat::eType_Int:
|
||||
snprintf(statStr, 256, "%d,%s,%llu\n", i, s.m_StatName, s.m_IntStat);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
int statStrLen = strlen(statStr);
|
||||
if(statStrLen > 255) {
|
||||
statStr[255] = '\n';
|
||||
statStrLen = 256;
|
||||
}
|
||||
fstr.Write((uint8 *)statStr, statStrLen);
|
||||
|
||||
head = head->GetTail();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -113,16 +113,7 @@ static double CompressImageInSerial(
|
|||
return cmpTime;
|
||||
}
|
||||
|
||||
static double CompressImageWithThreads(
|
||||
const unsigned char *imgData,
|
||||
const unsigned int imgDataSz,
|
||||
const SCompressionSettings &settings,
|
||||
unsigned char *outBuf
|
||||
) {
|
||||
|
||||
CompressionFunc f = ChooseFuncFromSettings(settings);
|
||||
|
||||
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, f, outBuf);
|
||||
static double CompressThreadGroup(ThreadGroup &tgrp, const SCompressionSettings &settings) {
|
||||
if(!(tgrp.PrepareThreads())) {
|
||||
assert(!"Thread group failed to prepare threads?!");
|
||||
return -1.0f;
|
||||
|
@ -140,7 +131,29 @@ static double CompressImageWithThreads(
|
|||
cmpTimeTotal += tgrp.GetStopWatch().TimeInMilliseconds();
|
||||
}
|
||||
|
||||
tgrp.CleanUpThreads();
|
||||
tgrp.CleanUpThreads();
|
||||
return cmpTimeTotal;
|
||||
}
|
||||
|
||||
static double CompressImageWithThreads(
|
||||
const unsigned char *imgData,
|
||||
const unsigned int imgDataSz,
|
||||
const SCompressionSettings &settings,
|
||||
unsigned char *outBuf
|
||||
) {
|
||||
|
||||
CompressionFunc f = ChooseFuncFromSettings(settings);
|
||||
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
|
||||
|
||||
double cmpTimeTotal = 0.0;
|
||||
if(fStats && settings.pStatManager) {
|
||||
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, fStats, *(settings.pStatManager), outBuf);
|
||||
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
|
||||
}
|
||||
else {
|
||||
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, f, outBuf);
|
||||
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
|
||||
}
|
||||
|
||||
double cmpTime = cmpTimeTotal / double(settings.iNumCompressions);
|
||||
return cmpTime;
|
||||
|
@ -153,21 +166,40 @@ static double CompressImageWithWorkerQueue(
|
|||
unsigned char *outBuf
|
||||
) {
|
||||
CompressionFunc f = ChooseFuncFromSettings(settings);
|
||||
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
|
||||
|
||||
WorkerQueue wq (
|
||||
settings.iNumCompressions,
|
||||
settings.iNumThreads,
|
||||
settings.iJobSize,
|
||||
imgData,
|
||||
imgDataSz,
|
||||
f,
|
||||
outBuf
|
||||
);
|
||||
double cmpTimeTotal = 0.0;
|
||||
if(fStats && settings.pStatManager) {
|
||||
WorkerQueue wq (
|
||||
settings.iNumCompressions,
|
||||
settings.iNumThreads,
|
||||
settings.iJobSize,
|
||||
imgData,
|
||||
imgDataSz,
|
||||
fStats,
|
||||
*(settings.pStatManager),
|
||||
outBuf
|
||||
);
|
||||
|
||||
wq.Run();
|
||||
wq.Run();
|
||||
cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds();
|
||||
}
|
||||
else {
|
||||
WorkerQueue wq (
|
||||
settings.iNumCompressions,
|
||||
settings.iNumThreads,
|
||||
settings.iJobSize,
|
||||
imgData,
|
||||
imgDataSz,
|
||||
f,
|
||||
outBuf
|
||||
);
|
||||
|
||||
return wq.GetStopWatch().TimeInMilliseconds() /
|
||||
double(settings.iNumCompressions);
|
||||
wq.Run();
|
||||
cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds();
|
||||
}
|
||||
|
||||
return cmpTimeTotal / double(settings.iNumCompressions);
|
||||
}
|
||||
|
||||
bool CompressImageData(
|
||||
|
|
|
@ -13,13 +13,15 @@ CmpThread::CmpThread()
|
|||
, m_Width(0)
|
||||
, m_Height(0)
|
||||
, m_CmpFunc(NULL)
|
||||
, m_CmpFuncWithStats(NULL)
|
||||
, m_StatManager(NULL)
|
||||
, m_OutBuf(NULL)
|
||||
, m_InBuf(NULL)
|
||||
, m_ParentExitFlag(NULL)
|
||||
{ }
|
||||
|
||||
void CmpThread::operator()() {
|
||||
if(!m_CmpFunc || !m_OutBuf || !m_InBuf
|
||||
if(!m_OutBuf || !m_InBuf
|
||||
|| !m_ParentCounter || !m_ParentCounterLock || !m_FinishCV
|
||||
|| !m_StartBarrier
|
||||
|| !m_ParentExitFlag
|
||||
|
@ -28,6 +30,11 @@ void CmpThread::operator()() {
|
|||
return;
|
||||
}
|
||||
|
||||
if(!(m_CmpFunc || (m_CmpFuncWithStats && m_StatManager))) {
|
||||
fprintf(stderr, "Incorrect thread function pointer.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
while(1) {
|
||||
// Wait for signal to start work...
|
||||
m_StartBarrier->Wait();
|
||||
|
@ -36,7 +43,10 @@ void CmpThread::operator()() {
|
|||
return;
|
||||
}
|
||||
|
||||
(*m_CmpFunc)(m_InBuf, m_OutBuf, m_Width, m_Height);
|
||||
if(m_CmpFunc)
|
||||
(*m_CmpFunc)(m_InBuf, m_OutBuf, m_Width, m_Height);
|
||||
else
|
||||
(*m_CmpFuncWithStats)(m_InBuf, m_OutBuf, m_Width, m_Height, *m_StatManager);
|
||||
|
||||
{
|
||||
TCLock lock(*m_ParentCounterLock);
|
||||
|
@ -47,19 +57,37 @@ void CmpThread::operator()() {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned int inBufSz, CompressionFunc func, unsigned char *outBuf )
|
||||
: m_StartBarrier(new TCBarrier(numThreads + 1))
|
||||
, m_FinishMutex(new TCMutex())
|
||||
, m_FinishCV(new TCConditionVariable())
|
||||
, m_NumThreads(numThreads)
|
||||
, m_ActiveThreads(0)
|
||||
, m_Func(func)
|
||||
, m_ImageDataSz(inBufSz)
|
||||
, m_ImageData(inBuf)
|
||||
, m_OutBuf(outBuf)
|
||||
, m_ThreadState(eThreadState_Done)
|
||||
, m_ExitFlag(false)
|
||||
, m_CompressedBlockSize(
|
||||
(func == BC7C::CompressImageBC7
|
||||
#ifdef HAS_SSE_41
|
||||
|| func == BC7C::CompressImageBC7SIMD
|
||||
#endif
|
||||
)?
|
||||
16
|
||||
:
|
||||
0
|
||||
)
|
||||
, m_UncompressedBlockSize(
|
||||
(func == BC7C::CompressImageBC7
|
||||
#ifdef HAS_SSE_41
|
||||
|| func == BC7C::CompressImageBC7SIMD
|
||||
#endif
|
||||
)?
|
||||
64
|
||||
:
|
||||
0
|
||||
)
|
||||
{
|
||||
for(int i = 0; i < kMaxNumThreads; i++) {
|
||||
// Thread synchronization primitives
|
||||
|
@ -68,6 +96,50 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
|
|||
m_Threads[i].m_ParentCounter = &m_ThreadsFinished;
|
||||
m_Threads[i].m_StartBarrier = m_StartBarrier;
|
||||
m_Threads[i].m_ParentExitFlag = &m_ExitFlag;
|
||||
m_Threads[i].m_CmpFunc = func;
|
||||
}
|
||||
}
|
||||
|
||||
ThreadGroup::ThreadGroup(
|
||||
int numThreads,
|
||||
const unsigned char *inBuf,
|
||||
unsigned int inBufSz,
|
||||
CompressionFuncWithStats func,
|
||||
BlockStatManager &statManager,
|
||||
unsigned char *outBuf
|
||||
)
|
||||
: m_StartBarrier(new TCBarrier(numThreads + 1))
|
||||
, m_FinishMutex(new TCMutex())
|
||||
, m_FinishCV(new TCConditionVariable())
|
||||
, m_NumThreads(numThreads)
|
||||
, m_ActiveThreads(0)
|
||||
, m_ImageDataSz(inBufSz)
|
||||
, m_ImageData(inBuf)
|
||||
, m_OutBuf(outBuf)
|
||||
, m_ThreadState(eThreadState_Done)
|
||||
, m_ExitFlag(false)
|
||||
, m_CompressedBlockSize(
|
||||
(func == BC7C::CompressImageBC7Stats)?
|
||||
16
|
||||
:
|
||||
0
|
||||
)
|
||||
, m_UncompressedBlockSize(
|
||||
(func == BC7C::CompressImageBC7Stats)?
|
||||
64
|
||||
:
|
||||
0
|
||||
)
|
||||
{
|
||||
for(int i = 0; i < kMaxNumThreads; i++) {
|
||||
// Thread synchronization primitives
|
||||
m_Threads[i].m_ParentCounterLock = m_FinishMutex;
|
||||
m_Threads[i].m_FinishCV = m_FinishCV;
|
||||
m_Threads[i].m_ParentCounter = &m_ThreadsFinished;
|
||||
m_Threads[i].m_StartBarrier = m_StartBarrier;
|
||||
m_Threads[i].m_ParentExitFlag = &m_ExitFlag;
|
||||
m_Threads[i].m_CmpFuncWithStats = func;
|
||||
m_Threads[i].m_StatManager = &statManager;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,20 +149,6 @@ ThreadGroup::~ThreadGroup() {
|
|||
delete m_FinishCV;
|
||||
}
|
||||
|
||||
unsigned int ThreadGroup::GetCompressedBlockSize() {
|
||||
if(m_Func == BC7C::CompressImageBC7) return 16;
|
||||
#ifdef HAS_SSE_41
|
||||
if(m_Func == BC7C::CompressImageBC7SIMD) return 16;
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned int ThreadGroup::GetUncompressedBlockSize() {
|
||||
if(m_Func == BC7C::CompressImageBC7) return 64;
|
||||
#ifdef HAS_SSE_41
|
||||
if(m_Func == BC7C::CompressImageBC7SIMD) return 64;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool ThreadGroup::PrepareThreads() {
|
||||
|
||||
// Make sure that threads aren't running.
|
||||
|
@ -126,9 +184,8 @@ bool ThreadGroup::PrepareThreads() {
|
|||
CmpThread &t = m_Threads[m_ActiveThreads];
|
||||
t.m_Height = 4;
|
||||
t.m_Width = numBlocksThisThread * 4;
|
||||
t.m_CmpFunc = m_Func;
|
||||
t.m_OutBuf = m_OutBuf + (blocksProcessed * GetCompressedBlockSize());
|
||||
t.m_InBuf = m_ImageData + (blocksProcessed * GetUncompressedBlockSize());
|
||||
t.m_OutBuf = m_OutBuf + (blocksProcessed * m_CompressedBlockSize);
|
||||
t.m_InBuf = m_ImageData + (blocksProcessed * m_UncompressedBlockSize);
|
||||
|
||||
blocksProcessed += numBlocksThisThread;
|
||||
|
||||
|
|
|
@ -21,6 +21,9 @@ private:
|
|||
|
||||
CompressionFunc m_CmpFunc;
|
||||
|
||||
CompressionFuncWithStats m_CmpFuncWithStats;
|
||||
BlockStatManager *m_StatManager;
|
||||
|
||||
unsigned char *m_OutBuf;
|
||||
const unsigned char *m_InBuf;
|
||||
|
||||
|
@ -29,10 +32,10 @@ private:
|
|||
CmpThread();
|
||||
|
||||
public:
|
||||
void operator ()();
|
||||
virtual ~CmpThread() { }
|
||||
virtual void operator ()();
|
||||
};
|
||||
|
||||
|
||||
class ThreadGroup {
|
||||
public:
|
||||
ThreadGroup(
|
||||
|
@ -42,6 +45,16 @@ class ThreadGroup {
|
|||
CompressionFunc func,
|
||||
unsigned char *outBuf
|
||||
);
|
||||
|
||||
ThreadGroup(
|
||||
int numThreads,
|
||||
const unsigned char *inBuf,
|
||||
unsigned int inBufSz,
|
||||
CompressionFuncWithStats func,
|
||||
BlockStatManager &statManager,
|
||||
unsigned char *outBuf
|
||||
);
|
||||
|
||||
~ThreadGroup();
|
||||
|
||||
bool PrepareThreads();
|
||||
|
@ -75,11 +88,10 @@ class ThreadGroup {
|
|||
// State variables.
|
||||
const unsigned int m_ImageDataSz;
|
||||
const unsigned char *const m_ImageData;
|
||||
const CompressionFunc m_Func;
|
||||
unsigned char *m_OutBuf;
|
||||
|
||||
unsigned int GetCompressedBlockSize();
|
||||
unsigned int GetUncompressedBlockSize();
|
||||
const unsigned int m_CompressedBlockSize;
|
||||
const unsigned int m_UncompressedBlockSize;
|
||||
|
||||
StopWatch m_StopWatch;
|
||||
|
||||
|
|
|
@ -1,20 +1,11 @@
|
|||
#include "WorkerQueue.h"
|
||||
|
||||
#include "BC7Compressor.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <algorithm>
|
||||
|
||||
template <typename T>
|
||||
static inline T max(const T &a, const T &b) {
|
||||
return (a > b)? a : b;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline T min(const T &a, const T &b) {
|
||||
return (a < b)? a : b;
|
||||
}
|
||||
#include "BC7Compressor.h"
|
||||
|
||||
template <typename T>
|
||||
static inline void clamp(T &x, const T &min, const T &max) {
|
||||
|
@ -36,7 +27,10 @@ void WorkerThread::operator()() {
|
|||
}
|
||||
|
||||
CompressionFunc f = m_Parent->GetCompressionFunc();
|
||||
if(!f) {
|
||||
CompressionFuncWithStats fStat = m_Parent->GetCompressionFuncWithStats();
|
||||
BlockStatManager *statManager = m_Parent->GetBlockStatManager();
|
||||
|
||||
if(!(f || (fStat && statManager))) {
|
||||
fprintf(stderr, "%s\n", "Illegal worker queue initialization -- compression func is NULL.");
|
||||
return;
|
||||
}
|
||||
|
@ -63,7 +57,11 @@ void WorkerThread::operator()() {
|
|||
{
|
||||
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
|
||||
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
|
||||
(*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
|
||||
if(f)
|
||||
(*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
|
||||
else
|
||||
(*fStat)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4, *statManager);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -91,22 +89,57 @@ WorkerQueue::WorkerQueue(
|
|||
uint8 *outBuf
|
||||
)
|
||||
: m_NumCompressions(0)
|
||||
, m_TotalNumCompressions(max(uint32(1), numCompressions))
|
||||
, m_TotalNumCompressions(std::max(uint32(1), numCompressions))
|
||||
, m_NumThreads(numThreads)
|
||||
, m_WaitingThreads(0)
|
||||
, m_ActiveThreads(0)
|
||||
, m_JobSize(max(uint32(1), jobSize))
|
||||
, m_JobSize(std::max(uint32(1), jobSize))
|
||||
, m_InBufSz(inBufSz)
|
||||
, m_InBuf(inBuf)
|
||||
, m_OutBuf(outBuf)
|
||||
, m_NextBlock(0)
|
||||
, m_CompressionFunc(func)
|
||||
, m_CompressionFuncWithStats(NULL)
|
||||
, m_BlockStatManager(NULL)
|
||||
{
|
||||
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
|
||||
|
||||
#ifndef NDEBUG
|
||||
if(m_InBufSz % 64) {
|
||||
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?");
|
||||
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
WorkerQueue::WorkerQueue(
|
||||
uint32 numCompressions,
|
||||
uint32 numThreads,
|
||||
uint32 jobSize,
|
||||
const uint8 *inBuf,
|
||||
uint32 inBufSz,
|
||||
CompressionFuncWithStats func,
|
||||
BlockStatManager &blockStatManager,
|
||||
uint8 *outBuf
|
||||
)
|
||||
: m_NumCompressions(0)
|
||||
, m_TotalNumCompressions(std::max(uint32(1), numCompressions))
|
||||
, m_NumThreads(numThreads)
|
||||
, m_WaitingThreads(0)
|
||||
, m_ActiveThreads(0)
|
||||
, m_JobSize(std::max(uint32(1), jobSize))
|
||||
, m_InBufSz(inBufSz)
|
||||
, m_InBuf(inBuf)
|
||||
, m_OutBuf(outBuf)
|
||||
, m_NextBlock(0)
|
||||
, m_CompressionFunc(NULL)
|
||||
, m_CompressionFuncWithStats(func)
|
||||
, m_BlockStatManager(&blockStatManager)
|
||||
{
|
||||
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
|
||||
|
||||
#ifndef NDEBUG
|
||||
if(m_InBufSz % 64) {
|
||||
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -182,7 +215,7 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
|
|||
|
||||
// The number of blocks to process is either the job size
|
||||
// or the number of blocks remaining.
|
||||
int blocksProcessed = min(m_JobSize, totalBlocks - m_NextBlock);
|
||||
int blocksProcessed = std::min(m_JobSize, totalBlocks - m_NextBlock);
|
||||
m_NumBlocks[threadIdx] = blocksProcessed;
|
||||
|
||||
// Make sure the next block is updated.
|
||||
|
|
|
@ -44,6 +44,17 @@ class WorkerQueue {
|
|||
uint8 *outBuf
|
||||
);
|
||||
|
||||
WorkerQueue(
|
||||
uint32 numCompressions,
|
||||
uint32 numThreads,
|
||||
uint32 jobSize,
|
||||
const uint8 *inBuf,
|
||||
uint32 inBufSz,
|
||||
CompressionFuncWithStats func,
|
||||
BlockStatManager &blockStatManager,
|
||||
uint8 *outBuf
|
||||
);
|
||||
|
||||
~WorkerQueue() { }
|
||||
|
||||
// Runs the workers
|
||||
|
@ -80,6 +91,11 @@ class WorkerQueue {
|
|||
const CompressionFunc m_CompressionFunc;
|
||||
CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; }
|
||||
|
||||
BlockStatManager *m_BlockStatManager;
|
||||
const CompressionFuncWithStats m_CompressionFuncWithStats;
|
||||
CompressionFuncWithStats GetCompressionFuncWithStats() const { return m_CompressionFuncWithStats; }
|
||||
BlockStatManager *GetBlockStatManager() const { return m_BlockStatManager; }
|
||||
|
||||
StopWatch m_StopWatch;
|
||||
|
||||
WorkerThread::EAction AcceptThreadData(uint32 threadIdx);
|
||||
|
|
Loading…
Reference in a new issue