mirror of
https://github.com/yuzu-emu/FasTC.git
synced 2025-01-23 19:11:05 +00:00
Make stats collection threadsafe.
This commit is contained in:
parent
1abc54cf21
commit
790df0736f
|
@ -42,8 +42,6 @@ public:
|
||||||
explicit BC7CompressionMode(int mode, bool opaque = true) : m_IsOpaque(opaque), m_Attributes(&(kModeAttributes[mode])), m_RotateMode(0), m_IndexMode(0) { }
|
explicit BC7CompressionMode(int mode, bool opaque = true) : m_IsOpaque(opaque), m_Attributes(&(kModeAttributes[mode])), m_RotateMode(0), m_IndexMode(0) { }
|
||||||
~BC7CompressionMode() { }
|
~BC7CompressionMode() { }
|
||||||
|
|
||||||
static int NumUses[8];
|
|
||||||
static void ResetNumUses() { memset(NumUses, 0, sizeof(NumUses)); }
|
|
||||||
double Compress(BitStream &stream, const int shapeIdx, const RGBACluster *clusters);
|
double Compress(BitStream &stream, const int shapeIdx, const RGBACluster *clusters);
|
||||||
|
|
||||||
// This switch controls the quality of the simulated annealing optimizer. We will not make
|
// This switch controls the quality of the simulated annealing optimizer. We will not make
|
||||||
|
|
|
@ -286,7 +286,6 @@ const uint32 kBC7InterpolationValues[4][16][2] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
int BC7CompressionMode::MaxAnnealingIterations = 50; // This is a setting.
|
int BC7CompressionMode::MaxAnnealingIterations = 50; // This is a setting.
|
||||||
int BC7CompressionMode::NumUses[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
|
||||||
|
|
||||||
BC7CompressionMode::Attributes BC7CompressionMode::kModeAttributes[kNumModes] = {
|
BC7CompressionMode::Attributes BC7CompressionMode::kModeAttributes[kNumModes] = {
|
||||||
{ 0, 4, 3, 3, 0, 4, 0, false, false, BC7CompressionMode::ePBitType_NotShared },
|
{ 0, 4, 3, 3, 0, 4, 0, false, false, BC7CompressionMode::ePBitType_NotShared },
|
||||||
|
@ -1471,11 +1470,6 @@ namespace BC7C
|
||||||
stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31);
|
stream.WriteBits(kWMValues[gWMVal = (gWMVal+1) % kNumWMVals], 31);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gModeChosen = -1;
|
|
||||||
static int gBestMode = -1;
|
|
||||||
static double gModeEstimate[ BC7CompressionMode::kNumModes ];
|
|
||||||
static double gModeError[ BC7CompressionMode::kNumModes ];
|
|
||||||
|
|
||||||
static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]);
|
static void DecompressBC7Block(const uint8 block[16], uint32 outBuf[16]);
|
||||||
|
|
||||||
// Compress an image using BC7 compression. Use the inBuf parameter to point to an image in
|
// Compress an image using BC7 compression. Use the inBuf parameter to point to an image in
|
||||||
|
@ -1485,7 +1479,6 @@ namespace BC7C
|
||||||
void CompressImageBC7(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height)
|
void CompressImageBC7(const unsigned char *inBuf, unsigned char *outBuf, unsigned int width, unsigned int height)
|
||||||
{
|
{
|
||||||
uint32 block[16];
|
uint32 block[16];
|
||||||
BC7CompressionMode::ResetNumUses();
|
|
||||||
BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel());
|
BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel());
|
||||||
|
|
||||||
for(int j = 0; j < height; j += 4)
|
for(int j = 0; j < height; j += 4)
|
||||||
|
@ -1494,7 +1487,6 @@ namespace BC7C
|
||||||
{
|
{
|
||||||
// ExtractBlock(inBuf + i * 4, width, block);
|
// ExtractBlock(inBuf + i * 4, width, block);
|
||||||
CompressBC7Block((const uint32 *)inBuf, outBuf);
|
CompressBC7Block((const uint32 *)inBuf, outBuf);
|
||||||
BC7CompressionMode::NumUses[gBestMode]++;
|
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
uint8 *block = (uint8 *)outBuf;
|
uint8 *block = (uint8 *)outBuf;
|
||||||
|
@ -1530,7 +1522,6 @@ namespace BC7C
|
||||||
BlockStatManager &statManager
|
BlockStatManager &statManager
|
||||||
) {
|
) {
|
||||||
uint32 block[16];
|
uint32 block[16];
|
||||||
BC7CompressionMode::ResetNumUses();
|
|
||||||
BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel());
|
BC7CompressionMode::MaxAnnealingIterations = min(BC7CompressionMode::kMaxAnnealingIterations, GetQualityLevel());
|
||||||
|
|
||||||
for(int j = 0; j < height; j += 4)
|
for(int j = 0; j < height; j += 4)
|
||||||
|
@ -1539,7 +1530,6 @@ namespace BC7C
|
||||||
{
|
{
|
||||||
// ExtractBlock(inBuf + i * 4, width, block);
|
// ExtractBlock(inBuf + i * 4, width, block);
|
||||||
CompressBC7Block((const uint32 *)inBuf, outBuf, statManager);
|
CompressBC7Block((const uint32 *)inBuf, outBuf, statManager);
|
||||||
BC7CompressionMode::NumUses[gBestMode]++;
|
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
uint8 *block = (uint8 *)outBuf;
|
uint8 *block = (uint8 *)outBuf;
|
||||||
|
@ -1574,16 +1564,25 @@ namespace BC7C
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static double CompressTwoClusters(int shapeIdx, const RGBACluster *clusters, uint8 *outBuf, bool opaque) {
|
static double CompressTwoClusters(
|
||||||
|
int shapeIdx,
|
||||||
|
const RGBACluster *clusters,
|
||||||
|
uint8 *outBuf,
|
||||||
|
bool opaque,
|
||||||
|
double *errors = NULL,
|
||||||
|
int *modeChosen = NULL
|
||||||
|
) {
|
||||||
|
|
||||||
uint8 tempBuf1[16];
|
uint8 tempBuf1[16];
|
||||||
BitStream tmpStream1(tempBuf1, 128, 0);
|
BitStream tmpStream1(tempBuf1, 128, 0);
|
||||||
BC7CompressionMode compressor1(1, opaque);
|
BC7CompressionMode compressor1(1, opaque);
|
||||||
|
|
||||||
double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters);
|
double bestError = compressor1.Compress(tmpStream1, shapeIdx, clusters);
|
||||||
gModeError[1] = bestError;
|
|
||||||
|
if(errors) errors[1] = bestError;
|
||||||
|
if(modeChosen) *modeChosen = 1;
|
||||||
|
|
||||||
memcpy(outBuf, tempBuf1, 16);
|
memcpy(outBuf, tempBuf1, 16);
|
||||||
gModeChosen = 1;
|
|
||||||
if(bestError == 0.0) {
|
if(bestError == 0.0) {
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
@ -1593,9 +1592,9 @@ namespace BC7C
|
||||||
BC7CompressionMode compressor3(3, opaque);
|
BC7CompressionMode compressor3(3, opaque);
|
||||||
|
|
||||||
double error = compressor3.Compress(tmpStream3, shapeIdx, clusters);
|
double error = compressor3.Compress(tmpStream3, shapeIdx, clusters);
|
||||||
gModeError[3] = error;
|
if(errors) errors[3] = error;
|
||||||
if(error < bestError) {
|
if(error < bestError) {
|
||||||
gModeChosen = 3;
|
if(modeChosen) *modeChosen = 3;
|
||||||
bestError = error;
|
bestError = error;
|
||||||
memcpy(outBuf, tempBuf3, 16);
|
memcpy(outBuf, tempBuf3, 16);
|
||||||
if(bestError == 0.0) {
|
if(bestError == 0.0) {
|
||||||
|
@ -1610,10 +1609,10 @@ namespace BC7C
|
||||||
BitStream tmpStream7(tempBuf7, 128, 0);
|
BitStream tmpStream7(tempBuf7, 128, 0);
|
||||||
BC7CompressionMode compressor7(7, opaque);
|
BC7CompressionMode compressor7(7, opaque);
|
||||||
error = compressor7.Compress(tmpStream7, shapeIdx, clusters);
|
error = compressor7.Compress(tmpStream7, shapeIdx, clusters);
|
||||||
gModeError[7] = error;
|
if(errors) errors[7] = error;
|
||||||
if(error < bestError) {
|
if(error < bestError) {
|
||||||
gModeChosen = 7;
|
if(modeChosen) *modeChosen = 7;
|
||||||
memcpy(outBuf, tempBuf7, 16);
|
memcpy(outBuf, tempBuf7, 16);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1621,8 +1620,14 @@ namespace BC7C
|
||||||
return bestError;
|
return bestError;
|
||||||
}
|
}
|
||||||
|
|
||||||
static double CompressThreeClusters(int shapeIdx, const RGBACluster *clusters, uint8 *outBuf, bool opaque) {
|
static double CompressThreeClusters(
|
||||||
|
int shapeIdx,
|
||||||
|
const RGBACluster *clusters,
|
||||||
|
uint8 *outBuf,
|
||||||
|
bool opaque,
|
||||||
|
double *errors = NULL,
|
||||||
|
int *modeChosen = NULL
|
||||||
|
) {
|
||||||
uint8 tempBuf0[16];
|
uint8 tempBuf0[16];
|
||||||
BitStream tmpStream0(tempBuf0, 128, 0);
|
BitStream tmpStream0(tempBuf0, 128, 0);
|
||||||
|
|
||||||
|
@ -1635,22 +1640,23 @@ namespace BC7C
|
||||||
double error, bestError;
|
double error, bestError;
|
||||||
if(shapeIdx < 16) {
|
if(shapeIdx < 16) {
|
||||||
bestError = compressor0.Compress(tmpStream0, shapeIdx, clusters);
|
bestError = compressor0.Compress(tmpStream0, shapeIdx, clusters);
|
||||||
gModeError[0] = bestError;
|
if(errors) errors[0] = bestError;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
bestError = DBL_MAX;
|
bestError = DBL_MAX;
|
||||||
gModeError[0] = -1.0;
|
if(errors) errors[0] = -1.0;
|
||||||
}
|
}
|
||||||
gModeChosen = 0;
|
|
||||||
|
if(modeChosen) *modeChosen = 0;
|
||||||
memcpy(outBuf, tempBuf0, 16);
|
memcpy(outBuf, tempBuf0, 16);
|
||||||
if(bestError == 0.0) {
|
if(bestError == 0.0) {
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = compressor2.Compress(tmpStream2, shapeIdx, clusters);
|
error = compressor2.Compress(tmpStream2, shapeIdx, clusters);
|
||||||
gModeError[2] = error;
|
if(errors) errors[2] = error;
|
||||||
if(error < bestError) {
|
if(error < bestError) {
|
||||||
gModeChosen = 2;
|
if(modeChosen) *modeChosen = 2;
|
||||||
memcpy(outBuf, tempBuf2, 16);
|
memcpy(outBuf, tempBuf2, 16);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
@ -1973,11 +1979,12 @@ namespace BC7C
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void UpdateErrorEstimate(uint32 mode, double est) {
|
static void UpdateErrorEstimate(double *estimates, uint32 mode, double est) {
|
||||||
|
assert(estimates);
|
||||||
assert(mode >= 0);
|
assert(mode >= 0);
|
||||||
assert(mode < BC7CompressionMode::kNumModes);
|
assert(mode < BC7CompressionMode::kNumModes);
|
||||||
if(gModeEstimate[mode] == -1.0 || est < gModeEstimate[mode]) {
|
if(estimates[mode] == -1.0 || est < estimates[mode]) {
|
||||||
gModeEstimate[mode] = est;
|
estimates[mode] = est;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1988,43 +1995,62 @@ namespace BC7C
|
||||||
private:
|
private:
|
||||||
uint32 m_BlockIdx;
|
uint32 m_BlockIdx;
|
||||||
BlockStatManager &m_BSM;
|
BlockStatManager &m_BSM;
|
||||||
|
|
||||||
|
int *m_ModePtr;
|
||||||
|
double *m_Estimates;
|
||||||
|
double *m_Errors;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) : m_BlockIdx(blockIdx), m_BSM(m) { }
|
RAIIStatSaver(uint32 blockIdx, BlockStatManager &m) : m_BlockIdx(blockIdx), m_BSM(m)
|
||||||
|
, m_ModePtr(NULL), m_Estimates(NULL), m_Errors(NULL) { }
|
||||||
|
void SetMode(int *modePtr) { m_ModePtr = modePtr; }
|
||||||
|
void SetEstimates(double *estimates) { m_Estimates = estimates; }
|
||||||
|
void SetErrors(double *errors) { m_Errors = errors; }
|
||||||
|
|
||||||
~RAIIStatSaver() {
|
~RAIIStatSaver() {
|
||||||
|
|
||||||
BlockStat s (kBlockStatString[eBlockStat_Mode], gBestMode);
|
assert(m_ModePtr);
|
||||||
|
assert(m_Estimates);
|
||||||
|
assert(m_Errors);
|
||||||
|
|
||||||
|
BlockStat s (kBlockStatString[eBlockStat_Mode], *m_ModePtr);
|
||||||
m_BSM.AddStat(m_BlockIdx, s);
|
m_BSM.AddStat(m_BlockIdx, s);
|
||||||
|
|
||||||
for(int i = 0; i < BC7CompressionMode::kNumModes; i++) {
|
for(int i = 0; i < BC7CompressionMode::kNumModes; i++) {
|
||||||
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroEstimate + i], gModeEstimate[i]);
|
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroEstimate + i], m_Estimates[i]);
|
||||||
m_BSM.AddStat(m_BlockIdx, s);
|
m_BSM.AddStat(m_BlockIdx, s);
|
||||||
|
|
||||||
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroError + i], gModeError[i]);
|
s = BlockStat(kBlockStatString[eBlockStat_ModeZeroError + i], m_Errors[i]);
|
||||||
m_BSM.AddStat(m_BlockIdx, s);
|
m_BSM.AddStat(m_BlockIdx, s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 blockIdx = 0;
|
int bestMode = 0;
|
||||||
|
double modeEstimate[BC7CompressionMode::kNumModes];
|
||||||
|
double modeError[BC7CompressionMode::kNumModes];
|
||||||
|
|
||||||
// reset global variables...
|
// reset global variables...
|
||||||
gBestMode = 0;
|
bestMode = 0;
|
||||||
for(int i = 0; i < BC7CompressionMode::kNumModes; i++){
|
for(int i = 0; i < BC7CompressionMode::kNumModes; i++){
|
||||||
gModeError[i] = gModeEstimate[i] = -1.0;
|
modeError[i] = modeEstimate[i] = -1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
blockIdx = statManager.BeginBlock();
|
uint32 blockIdx = statManager.BeginBlock();
|
||||||
|
|
||||||
for(int i = 0; i < kNumBlockStats; i++) {
|
for(int i = 0; i < kNumBlockStats; i++) {
|
||||||
statManager.AddStat(blockIdx, BlockStat(kBlockStatString[i], 0));
|
statManager.AddStat(blockIdx, BlockStat(kBlockStatString[i], 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
RAIIStatSaver __statsaver__(blockIdx, statManager);
|
RAIIStatSaver __statsaver__(blockIdx, statManager);
|
||||||
|
__statsaver__.SetMode(&bestMode);
|
||||||
|
__statsaver__.SetEstimates(modeEstimate);
|
||||||
|
__statsaver__.SetErrors(modeError);
|
||||||
|
|
||||||
// All a single color?
|
// All a single color?
|
||||||
if(AllOneColor(block)) {
|
if(AllOneColor(block)) {
|
||||||
BitStream bStrm(outBuf, 128, 0);
|
BitStream bStrm(outBuf, 128, 0);
|
||||||
CompressOptimalColorBC7(*block, bStrm);
|
CompressOptimalColorBC7(*block, bStrm);
|
||||||
gBestMode = 5;
|
bestMode = 5;
|
||||||
|
|
||||||
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 0);
|
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 0);
|
||||||
statManager.AddStat(blockIdx, s);
|
statManager.AddStat(blockIdx, s);
|
||||||
|
@ -2050,7 +2076,7 @@ namespace BC7C
|
||||||
if(transparent) {
|
if(transparent) {
|
||||||
BitStream bStrm(outBuf, 128, 0);
|
BitStream bStrm(outBuf, 128, 0);
|
||||||
WriteTransparentBlock(bStrm);
|
WriteTransparentBlock(bStrm);
|
||||||
gBestMode = 6;
|
bestMode = 6;
|
||||||
|
|
||||||
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 1);
|
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 1);
|
||||||
statManager.AddStat(blockIdx, s);
|
statManager.AddStat(blockIdx, s);
|
||||||
|
@ -2065,12 +2091,12 @@ namespace BC7C
|
||||||
blockCluster.GetBoundingBox(Min, Max);
|
blockCluster.GetBoundingBox(Min, Max);
|
||||||
v = Max - Min;
|
v = Max - Min;
|
||||||
if(v * v == 0) {
|
if(v * v == 0) {
|
||||||
gModeEstimate[6] = 0.0;
|
modeEstimate[6] = 0.0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
const float *w = GetErrorMetric();
|
const float *w = GetErrorMetric();
|
||||||
const double err = 0.0001 + blockCluster.QuantizedError(Min, Max, 4, 0xFEFEFEFE, RGBAVector(w[0], w[1], w[2], w[3]));
|
const double err = 0.0001 + blockCluster.QuantizedError(Min, Max, 4, 0xFEFEFEFE, RGBAVector(w[0], w[1], w[2], w[3]));
|
||||||
UpdateErrorEstimate(6, err);
|
UpdateErrorEstimate(modeEstimate, 6, err);
|
||||||
|
|
||||||
#ifdef USE_PCA_FOR_SHAPE_ESTIMATION
|
#ifdef USE_PCA_FOR_SHAPE_ESTIMATION
|
||||||
double eigOne = blockCluster.GetPrincipalEigenvalue();
|
double eigOne = blockCluster.GetPrincipalEigenvalue();
|
||||||
|
@ -2124,11 +2150,11 @@ namespace BC7C
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(errEstimate[0] != -1.0) {
|
if(errEstimate[0] != -1.0) {
|
||||||
UpdateErrorEstimate(1, errEstimate[0]);
|
UpdateErrorEstimate(modeEstimate, 1, errEstimate[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(errEstimate[1] != -1.0) {
|
if(errEstimate[1] != -1.0) {
|
||||||
UpdateErrorEstimate(3, errEstimate[1]);
|
UpdateErrorEstimate(modeEstimate, 3, errEstimate[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(err < bestError[0]) {
|
if(err < bestError[0]) {
|
||||||
|
@ -2138,8 +2164,9 @@ namespace BC7C
|
||||||
|
|
||||||
// If it's small, we'll take it!
|
// If it's small, we'll take it!
|
||||||
if(err < 1e-9) {
|
if(err < 1e-9) {
|
||||||
CompressTwoClusters(i, clusters, outBuf, opaque);
|
int modeChosen;
|
||||||
gBestMode = gModeChosen;
|
CompressTwoClusters(i, clusters, outBuf, opaque, modeError, &modeChosen);
|
||||||
|
bestMode = modeChosen;
|
||||||
|
|
||||||
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2);
|
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2);
|
||||||
statManager.AddStat(blockIdx, s);
|
statManager.AddStat(blockIdx, s);
|
||||||
|
@ -2185,11 +2212,11 @@ namespace BC7C
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(errEstimate[0] != -1.0) {
|
if(errEstimate[0] != -1.0) {
|
||||||
UpdateErrorEstimate(0, errEstimate[0]);
|
UpdateErrorEstimate(modeEstimate, 0, errEstimate[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(errEstimate[1] != -1.0) {
|
if(errEstimate[1] != -1.0) {
|
||||||
UpdateErrorEstimate(2, errEstimate[1]);
|
UpdateErrorEstimate(modeEstimate, 2, errEstimate[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(err < bestError[1]) {
|
if(err < bestError[1]) {
|
||||||
|
@ -2199,8 +2226,9 @@ namespace BC7C
|
||||||
|
|
||||||
// If it's small, we'll take it!
|
// If it's small, we'll take it!
|
||||||
if(err < 1e-9) {
|
if(err < 1e-9) {
|
||||||
CompressThreeClusters(i, clusters, outBuf, opaque);
|
int modeChosen;
|
||||||
gBestMode = gModeChosen;
|
CompressThreeClusters(i, clusters, outBuf, opaque, modeError, &modeChosen);
|
||||||
|
bestMode = modeChosen;
|
||||||
|
|
||||||
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2);
|
BlockStat s = BlockStat(kBlockStatString[eBlockStat_Path], 2);
|
||||||
statManager.AddStat(blockIdx, s);
|
statManager.AddStat(blockIdx, s);
|
||||||
|
@ -2226,8 +2254,8 @@ namespace BC7C
|
||||||
BitStream tempStream1 (tempBuf1, 128, 0);
|
BitStream tempStream1 (tempBuf1, 128, 0);
|
||||||
BC7CompressionMode compressor(6, opaque);
|
BC7CompressionMode compressor(6, opaque);
|
||||||
double best = compressor.Compress(tempStream1, 0, &blockCluster);
|
double best = compressor.Compress(tempStream1, 0, &blockCluster);
|
||||||
gModeError[6] = best;
|
modeError[6] = best;
|
||||||
gBestMode = 6;
|
bestMode = 6;
|
||||||
if(best == 0.0f) {
|
if(best == 0.0f) {
|
||||||
memcpy(outBuf, tempBuf1, 16);
|
memcpy(outBuf, tempBuf1, 16);
|
||||||
return;
|
return;
|
||||||
|
@ -2243,7 +2271,7 @@ namespace BC7C
|
||||||
double error = compressorTry.Compress(tempStream2, 0, &blockCluster);
|
double error = compressorTry.Compress(tempStream2, 0, &blockCluster);
|
||||||
if(error < best) {
|
if(error < best) {
|
||||||
|
|
||||||
gBestMode = mode;
|
bestMode = mode;
|
||||||
best = error;
|
best = error;
|
||||||
|
|
||||||
if(best == 0.0f) {
|
if(best == 0.0f) {
|
||||||
|
@ -2257,10 +2285,11 @@ namespace BC7C
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque);
|
int modeChosen;
|
||||||
|
double error = CompressTwoClusters(bestShapeIdx[0], bestClusters[0], tempBuf2, opaque, modeError, &modeChosen);
|
||||||
if(error < best) {
|
if(error < best) {
|
||||||
|
|
||||||
gBestMode = gModeChosen;
|
bestMode = modeChosen;
|
||||||
best = error;
|
best = error;
|
||||||
|
|
||||||
if(error == 0.0f) {
|
if(error == 0.0f) {
|
||||||
|
@ -2273,9 +2302,9 @@ namespace BC7C
|
||||||
}
|
}
|
||||||
|
|
||||||
if(opaque) {
|
if(opaque) {
|
||||||
if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque) < best) {
|
if(CompressThreeClusters(bestShapeIdx[1], bestClusters[1], tempBuf2, opaque, modeError, &modeChosen) < best) {
|
||||||
|
|
||||||
gBestMode = gModeChosen;
|
bestMode = modeChosen;
|
||||||
memcpy(outBuf, tempBuf2, 16);
|
memcpy(outBuf, tempBuf2, 16);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -113,12 +113,6 @@ int main(int argc, char **argv) {
|
||||||
|
|
||||||
} while(knowArg && fileArg < argc);
|
} while(knowArg && fileArg < argc);
|
||||||
|
|
||||||
if(numThreads > 1 && bSaveLog) {
|
|
||||||
bSaveLog = false;
|
|
||||||
fprintf(stderr, "WARNING: Will not save log because implementation is not thread safe.\n"
|
|
||||||
"If you'd like, send a complaint to pavel@cs.unc.edu to get this done faster.\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
if(fileArg == argc) {
|
if(fileArg == argc) {
|
||||||
PrintUsage();
|
PrintUsage();
|
||||||
exit(1);
|
exit(1);
|
||||||
|
|
|
@ -13,6 +13,8 @@ public:
|
||||||
|
|
||||||
BlockStat(const BlockStat &);
|
BlockStat(const BlockStat &);
|
||||||
BlockStat &operator=(const BlockStat &);
|
BlockStat &operator=(const BlockStat &);
|
||||||
|
|
||||||
|
void ToString(char *buf, int bufSz) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const enum Type {
|
const enum Type {
|
||||||
|
|
|
@ -40,6 +40,22 @@ BlockStat &BlockStat::operator=(const BlockStat &other) {
|
||||||
memcpy(this, &other, sizeof(*this));
|
memcpy(this, &other, sizeof(*this));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BlockStat::ToString(char *buf, int bufSz) const {
|
||||||
|
switch(m_Type) {
|
||||||
|
case BlockStat::eType_Float:
|
||||||
|
snprintf(buf, bufSz, "%s,%f", m_StatName, m_FloatStat);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BlockStat::eType_Int:
|
||||||
|
snprintf(buf, bufSz, "%s,%llu", m_StatName, m_IntStat);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert(!"Unknown stat type!");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
// BlockStat Manager Implementation
|
// BlockStat Manager Implementation
|
||||||
|
@ -71,7 +87,7 @@ uint32 BlockStatManager::BeginBlock() {
|
||||||
return m_NextBlock-1;
|
return m_NextBlock-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
TCLock lock(m_Mutex);
|
TCLock lock (m_Mutex);
|
||||||
return m_NextBlock++;
|
return m_NextBlock++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,9 +98,38 @@ void BlockStatManager::AddStat(uint32 blockIdx, const BlockStat &stat) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TCLock lock (m_Mutex);
|
||||||
m_BlockStatList[blockIdx].AddStat(stat);
|
m_BlockStatList[blockIdx].AddStat(stat);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BlockStatManager::ToFile(const CHAR *filename) {
|
||||||
|
|
||||||
|
FileStream fstr (filename, eFileMode_Write);
|
||||||
|
|
||||||
|
for(int i = 0; i < m_BlockStatListSz; i++) {
|
||||||
|
const BlockStatList *head = &(m_BlockStatList[i]);
|
||||||
|
while(head) {
|
||||||
|
BlockStat s = head->GetStat();
|
||||||
|
|
||||||
|
CHAR statStr[256];
|
||||||
|
s.ToString(statStr, 256);
|
||||||
|
|
||||||
|
CHAR str[256];
|
||||||
|
snprintf(str, 256, "%d,%s\n", i, statStr);
|
||||||
|
|
||||||
|
int strLen = strlen(str);
|
||||||
|
if(strLen > 255) {
|
||||||
|
str[255] = '\n';
|
||||||
|
strLen = 256;
|
||||||
|
}
|
||||||
|
|
||||||
|
fstr.Write((uint8 *)str, strLen);
|
||||||
|
|
||||||
|
head = head->GetTail();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
// BlockStat List Implementation
|
// BlockStat List Implementation
|
||||||
|
@ -111,6 +156,7 @@ BlockStatManager::BlockStatList::~BlockStatList() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) {
|
void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) {
|
||||||
|
|
||||||
if(strncmp(stat.m_StatName, m_Stat.m_StatName, BlockStat::kStatNameSz) == 0) {
|
if(strncmp(stat.m_StatName, m_Stat.m_StatName, BlockStat::kStatNameSz) == 0) {
|
||||||
m_Stat = stat;
|
m_Stat = stat;
|
||||||
}
|
}
|
||||||
|
@ -126,40 +172,3 @@ void BlockStatManager::BlockStatList::AddStat(const BlockStat &stat) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void BlockStatManager::ToFile(const CHAR *filename) {
|
|
||||||
|
|
||||||
FileStream fstr (filename, eFileMode_Write);
|
|
||||||
|
|
||||||
for(int i = 0; i < m_BlockStatListSz; i++) {
|
|
||||||
const BlockStatList *head = &(m_BlockStatList[i]);
|
|
||||||
while(head) {
|
|
||||||
BlockStat s = head->GetStat();
|
|
||||||
|
|
||||||
CHAR statStr[256];
|
|
||||||
switch(s.m_Type) {
|
|
||||||
case BlockStat::eType_Float:
|
|
||||||
snprintf(statStr, 256, "%d,%s,%f\n", i, s.m_StatName, s.m_FloatStat);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case BlockStat::eType_Int:
|
|
||||||
snprintf(statStr, 256, "%d,%s,%llu\n", i, s.m_StatName, s.m_IntStat);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
assert(false);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
int statStrLen = strlen(statStr);
|
|
||||||
if(statStrLen > 255) {
|
|
||||||
statStr[255] = '\n';
|
|
||||||
statStrLen = 256;
|
|
||||||
}
|
|
||||||
fstr.Write((uint8 *)statStr, statStrLen);
|
|
||||||
|
|
||||||
head = head->GetTail();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
|
@ -113,16 +113,7 @@ static double CompressImageInSerial(
|
||||||
return cmpTime;
|
return cmpTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
static double CompressImageWithThreads(
|
static double CompressThreadGroup(ThreadGroup &tgrp, const SCompressionSettings &settings) {
|
||||||
const unsigned char *imgData,
|
|
||||||
const unsigned int imgDataSz,
|
|
||||||
const SCompressionSettings &settings,
|
|
||||||
unsigned char *outBuf
|
|
||||||
) {
|
|
||||||
|
|
||||||
CompressionFunc f = ChooseFuncFromSettings(settings);
|
|
||||||
|
|
||||||
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, f, outBuf);
|
|
||||||
if(!(tgrp.PrepareThreads())) {
|
if(!(tgrp.PrepareThreads())) {
|
||||||
assert(!"Thread group failed to prepare threads?!");
|
assert(!"Thread group failed to prepare threads?!");
|
||||||
return -1.0f;
|
return -1.0f;
|
||||||
|
@ -140,7 +131,29 @@ static double CompressImageWithThreads(
|
||||||
cmpTimeTotal += tgrp.GetStopWatch().TimeInMilliseconds();
|
cmpTimeTotal += tgrp.GetStopWatch().TimeInMilliseconds();
|
||||||
}
|
}
|
||||||
|
|
||||||
tgrp.CleanUpThreads();
|
tgrp.CleanUpThreads();
|
||||||
|
return cmpTimeTotal;
|
||||||
|
}
|
||||||
|
|
||||||
|
static double CompressImageWithThreads(
|
||||||
|
const unsigned char *imgData,
|
||||||
|
const unsigned int imgDataSz,
|
||||||
|
const SCompressionSettings &settings,
|
||||||
|
unsigned char *outBuf
|
||||||
|
) {
|
||||||
|
|
||||||
|
CompressionFunc f = ChooseFuncFromSettings(settings);
|
||||||
|
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
|
||||||
|
|
||||||
|
double cmpTimeTotal = 0.0;
|
||||||
|
if(fStats && settings.pStatManager) {
|
||||||
|
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, fStats, *(settings.pStatManager), outBuf);
|
||||||
|
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ThreadGroup tgrp (settings.iNumThreads, imgData, imgDataSz, f, outBuf);
|
||||||
|
cmpTimeTotal = CompressThreadGroup(tgrp, settings);
|
||||||
|
}
|
||||||
|
|
||||||
double cmpTime = cmpTimeTotal / double(settings.iNumCompressions);
|
double cmpTime = cmpTimeTotal / double(settings.iNumCompressions);
|
||||||
return cmpTime;
|
return cmpTime;
|
||||||
|
@ -153,21 +166,40 @@ static double CompressImageWithWorkerQueue(
|
||||||
unsigned char *outBuf
|
unsigned char *outBuf
|
||||||
) {
|
) {
|
||||||
CompressionFunc f = ChooseFuncFromSettings(settings);
|
CompressionFunc f = ChooseFuncFromSettings(settings);
|
||||||
|
CompressionFuncWithStats fStats = ChooseFuncFromSettingsWithStats(settings);
|
||||||
|
|
||||||
WorkerQueue wq (
|
double cmpTimeTotal = 0.0;
|
||||||
settings.iNumCompressions,
|
if(fStats && settings.pStatManager) {
|
||||||
settings.iNumThreads,
|
WorkerQueue wq (
|
||||||
settings.iJobSize,
|
settings.iNumCompressions,
|
||||||
imgData,
|
settings.iNumThreads,
|
||||||
imgDataSz,
|
settings.iJobSize,
|
||||||
f,
|
imgData,
|
||||||
outBuf
|
imgDataSz,
|
||||||
);
|
fStats,
|
||||||
|
*(settings.pStatManager),
|
||||||
|
outBuf
|
||||||
|
);
|
||||||
|
|
||||||
wq.Run();
|
wq.Run();
|
||||||
|
cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
WorkerQueue wq (
|
||||||
|
settings.iNumCompressions,
|
||||||
|
settings.iNumThreads,
|
||||||
|
settings.iJobSize,
|
||||||
|
imgData,
|
||||||
|
imgDataSz,
|
||||||
|
f,
|
||||||
|
outBuf
|
||||||
|
);
|
||||||
|
|
||||||
return wq.GetStopWatch().TimeInMilliseconds() /
|
wq.Run();
|
||||||
double(settings.iNumCompressions);
|
cmpTimeTotal = wq.GetStopWatch().TimeInMilliseconds();
|
||||||
|
}
|
||||||
|
|
||||||
|
return cmpTimeTotal / double(settings.iNumCompressions);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CompressImageData(
|
bool CompressImageData(
|
||||||
|
|
|
@ -13,13 +13,15 @@ CmpThread::CmpThread()
|
||||||
, m_Width(0)
|
, m_Width(0)
|
||||||
, m_Height(0)
|
, m_Height(0)
|
||||||
, m_CmpFunc(NULL)
|
, m_CmpFunc(NULL)
|
||||||
|
, m_CmpFuncWithStats(NULL)
|
||||||
|
, m_StatManager(NULL)
|
||||||
, m_OutBuf(NULL)
|
, m_OutBuf(NULL)
|
||||||
, m_InBuf(NULL)
|
, m_InBuf(NULL)
|
||||||
, m_ParentExitFlag(NULL)
|
, m_ParentExitFlag(NULL)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
void CmpThread::operator()() {
|
void CmpThread::operator()() {
|
||||||
if(!m_CmpFunc || !m_OutBuf || !m_InBuf
|
if(!m_OutBuf || !m_InBuf
|
||||||
|| !m_ParentCounter || !m_ParentCounterLock || !m_FinishCV
|
|| !m_ParentCounter || !m_ParentCounterLock || !m_FinishCV
|
||||||
|| !m_StartBarrier
|
|| !m_StartBarrier
|
||||||
|| !m_ParentExitFlag
|
|| !m_ParentExitFlag
|
||||||
|
@ -28,6 +30,11 @@ void CmpThread::operator()() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!(m_CmpFunc || (m_CmpFuncWithStats && m_StatManager))) {
|
||||||
|
fprintf(stderr, "Incorrect thread function pointer.\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
while(1) {
|
while(1) {
|
||||||
// Wait for signal to start work...
|
// Wait for signal to start work...
|
||||||
m_StartBarrier->Wait();
|
m_StartBarrier->Wait();
|
||||||
|
@ -36,7 +43,10 @@ void CmpThread::operator()() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
(*m_CmpFunc)(m_InBuf, m_OutBuf, m_Width, m_Height);
|
if(m_CmpFunc)
|
||||||
|
(*m_CmpFunc)(m_InBuf, m_OutBuf, m_Width, m_Height);
|
||||||
|
else
|
||||||
|
(*m_CmpFuncWithStats)(m_InBuf, m_OutBuf, m_Width, m_Height, *m_StatManager);
|
||||||
|
|
||||||
{
|
{
|
||||||
TCLock lock(*m_ParentCounterLock);
|
TCLock lock(*m_ParentCounterLock);
|
||||||
|
@ -47,19 +57,37 @@ void CmpThread::operator()() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned int inBufSz, CompressionFunc func, unsigned char *outBuf )
|
ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned int inBufSz, CompressionFunc func, unsigned char *outBuf )
|
||||||
: m_StartBarrier(new TCBarrier(numThreads + 1))
|
: m_StartBarrier(new TCBarrier(numThreads + 1))
|
||||||
, m_FinishMutex(new TCMutex())
|
, m_FinishMutex(new TCMutex())
|
||||||
, m_FinishCV(new TCConditionVariable())
|
, m_FinishCV(new TCConditionVariable())
|
||||||
, m_NumThreads(numThreads)
|
, m_NumThreads(numThreads)
|
||||||
, m_ActiveThreads(0)
|
, m_ActiveThreads(0)
|
||||||
, m_Func(func)
|
|
||||||
, m_ImageDataSz(inBufSz)
|
, m_ImageDataSz(inBufSz)
|
||||||
, m_ImageData(inBuf)
|
, m_ImageData(inBuf)
|
||||||
, m_OutBuf(outBuf)
|
, m_OutBuf(outBuf)
|
||||||
, m_ThreadState(eThreadState_Done)
|
, m_ThreadState(eThreadState_Done)
|
||||||
, m_ExitFlag(false)
|
, m_ExitFlag(false)
|
||||||
|
, m_CompressedBlockSize(
|
||||||
|
(func == BC7C::CompressImageBC7
|
||||||
|
#ifdef HAS_SSE_41
|
||||||
|
|| func == BC7C::CompressImageBC7SIMD
|
||||||
|
#endif
|
||||||
|
)?
|
||||||
|
16
|
||||||
|
:
|
||||||
|
0
|
||||||
|
)
|
||||||
|
, m_UncompressedBlockSize(
|
||||||
|
(func == BC7C::CompressImageBC7
|
||||||
|
#ifdef HAS_SSE_41
|
||||||
|
|| func == BC7C::CompressImageBC7SIMD
|
||||||
|
#endif
|
||||||
|
)?
|
||||||
|
64
|
||||||
|
:
|
||||||
|
0
|
||||||
|
)
|
||||||
{
|
{
|
||||||
for(int i = 0; i < kMaxNumThreads; i++) {
|
for(int i = 0; i < kMaxNumThreads; i++) {
|
||||||
// Thread synchronization primitives
|
// Thread synchronization primitives
|
||||||
|
@ -68,6 +96,50 @@ ThreadGroup::ThreadGroup( int numThreads, const unsigned char *inBuf, unsigned i
|
||||||
m_Threads[i].m_ParentCounter = &m_ThreadsFinished;
|
m_Threads[i].m_ParentCounter = &m_ThreadsFinished;
|
||||||
m_Threads[i].m_StartBarrier = m_StartBarrier;
|
m_Threads[i].m_StartBarrier = m_StartBarrier;
|
||||||
m_Threads[i].m_ParentExitFlag = &m_ExitFlag;
|
m_Threads[i].m_ParentExitFlag = &m_ExitFlag;
|
||||||
|
m_Threads[i].m_CmpFunc = func;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ThreadGroup::ThreadGroup(
|
||||||
|
int numThreads,
|
||||||
|
const unsigned char *inBuf,
|
||||||
|
unsigned int inBufSz,
|
||||||
|
CompressionFuncWithStats func,
|
||||||
|
BlockStatManager &statManager,
|
||||||
|
unsigned char *outBuf
|
||||||
|
)
|
||||||
|
: m_StartBarrier(new TCBarrier(numThreads + 1))
|
||||||
|
, m_FinishMutex(new TCMutex())
|
||||||
|
, m_FinishCV(new TCConditionVariable())
|
||||||
|
, m_NumThreads(numThreads)
|
||||||
|
, m_ActiveThreads(0)
|
||||||
|
, m_ImageDataSz(inBufSz)
|
||||||
|
, m_ImageData(inBuf)
|
||||||
|
, m_OutBuf(outBuf)
|
||||||
|
, m_ThreadState(eThreadState_Done)
|
||||||
|
, m_ExitFlag(false)
|
||||||
|
, m_CompressedBlockSize(
|
||||||
|
(func == BC7C::CompressImageBC7Stats)?
|
||||||
|
16
|
||||||
|
:
|
||||||
|
0
|
||||||
|
)
|
||||||
|
, m_UncompressedBlockSize(
|
||||||
|
(func == BC7C::CompressImageBC7Stats)?
|
||||||
|
64
|
||||||
|
:
|
||||||
|
0
|
||||||
|
)
|
||||||
|
{
|
||||||
|
for(int i = 0; i < kMaxNumThreads; i++) {
|
||||||
|
// Thread synchronization primitives
|
||||||
|
m_Threads[i].m_ParentCounterLock = m_FinishMutex;
|
||||||
|
m_Threads[i].m_FinishCV = m_FinishCV;
|
||||||
|
m_Threads[i].m_ParentCounter = &m_ThreadsFinished;
|
||||||
|
m_Threads[i].m_StartBarrier = m_StartBarrier;
|
||||||
|
m_Threads[i].m_ParentExitFlag = &m_ExitFlag;
|
||||||
|
m_Threads[i].m_CmpFuncWithStats = func;
|
||||||
|
m_Threads[i].m_StatManager = &statManager;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,20 +149,6 @@ ThreadGroup::~ThreadGroup() {
|
||||||
delete m_FinishCV;
|
delete m_FinishCV;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int ThreadGroup::GetCompressedBlockSize() {
|
|
||||||
if(m_Func == BC7C::CompressImageBC7) return 16;
|
|
||||||
#ifdef HAS_SSE_41
|
|
||||||
if(m_Func == BC7C::CompressImageBC7SIMD) return 16;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int ThreadGroup::GetUncompressedBlockSize() {
|
|
||||||
if(m_Func == BC7C::CompressImageBC7) return 64;
|
|
||||||
#ifdef HAS_SSE_41
|
|
||||||
if(m_Func == BC7C::CompressImageBC7SIMD) return 64;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ThreadGroup::PrepareThreads() {
|
bool ThreadGroup::PrepareThreads() {
|
||||||
|
|
||||||
// Make sure that threads aren't running.
|
// Make sure that threads aren't running.
|
||||||
|
@ -126,9 +184,8 @@ bool ThreadGroup::PrepareThreads() {
|
||||||
CmpThread &t = m_Threads[m_ActiveThreads];
|
CmpThread &t = m_Threads[m_ActiveThreads];
|
||||||
t.m_Height = 4;
|
t.m_Height = 4;
|
||||||
t.m_Width = numBlocksThisThread * 4;
|
t.m_Width = numBlocksThisThread * 4;
|
||||||
t.m_CmpFunc = m_Func;
|
t.m_OutBuf = m_OutBuf + (blocksProcessed * m_CompressedBlockSize);
|
||||||
t.m_OutBuf = m_OutBuf + (blocksProcessed * GetCompressedBlockSize());
|
t.m_InBuf = m_ImageData + (blocksProcessed * m_UncompressedBlockSize);
|
||||||
t.m_InBuf = m_ImageData + (blocksProcessed * GetUncompressedBlockSize());
|
|
||||||
|
|
||||||
blocksProcessed += numBlocksThisThread;
|
blocksProcessed += numBlocksThisThread;
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,9 @@ private:
|
||||||
|
|
||||||
CompressionFunc m_CmpFunc;
|
CompressionFunc m_CmpFunc;
|
||||||
|
|
||||||
|
CompressionFuncWithStats m_CmpFuncWithStats;
|
||||||
|
BlockStatManager *m_StatManager;
|
||||||
|
|
||||||
unsigned char *m_OutBuf;
|
unsigned char *m_OutBuf;
|
||||||
const unsigned char *m_InBuf;
|
const unsigned char *m_InBuf;
|
||||||
|
|
||||||
|
@ -29,10 +32,10 @@ private:
|
||||||
CmpThread();
|
CmpThread();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void operator ()();
|
virtual ~CmpThread() { }
|
||||||
|
virtual void operator ()();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class ThreadGroup {
|
class ThreadGroup {
|
||||||
public:
|
public:
|
||||||
ThreadGroup(
|
ThreadGroup(
|
||||||
|
@ -42,6 +45,16 @@ class ThreadGroup {
|
||||||
CompressionFunc func,
|
CompressionFunc func,
|
||||||
unsigned char *outBuf
|
unsigned char *outBuf
|
||||||
);
|
);
|
||||||
|
|
||||||
|
ThreadGroup(
|
||||||
|
int numThreads,
|
||||||
|
const unsigned char *inBuf,
|
||||||
|
unsigned int inBufSz,
|
||||||
|
CompressionFuncWithStats func,
|
||||||
|
BlockStatManager &statManager,
|
||||||
|
unsigned char *outBuf
|
||||||
|
);
|
||||||
|
|
||||||
~ThreadGroup();
|
~ThreadGroup();
|
||||||
|
|
||||||
bool PrepareThreads();
|
bool PrepareThreads();
|
||||||
|
@ -75,11 +88,10 @@ class ThreadGroup {
|
||||||
// State variables.
|
// State variables.
|
||||||
const unsigned int m_ImageDataSz;
|
const unsigned int m_ImageDataSz;
|
||||||
const unsigned char *const m_ImageData;
|
const unsigned char *const m_ImageData;
|
||||||
const CompressionFunc m_Func;
|
|
||||||
unsigned char *m_OutBuf;
|
unsigned char *m_OutBuf;
|
||||||
|
|
||||||
unsigned int GetCompressedBlockSize();
|
const unsigned int m_CompressedBlockSize;
|
||||||
unsigned int GetUncompressedBlockSize();
|
const unsigned int m_UncompressedBlockSize;
|
||||||
|
|
||||||
StopWatch m_StopWatch;
|
StopWatch m_StopWatch;
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,11 @@
|
||||||
#include "WorkerQueue.h"
|
#include "WorkerQueue.h"
|
||||||
|
|
||||||
#include "BC7Compressor.h"
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
template <typename T>
|
#include "BC7Compressor.h"
|
||||||
static inline T max(const T &a, const T &b) {
|
|
||||||
return (a > b)? a : b;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static inline T min(const T &a, const T &b) {
|
|
||||||
return (a < b)? a : b;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static inline void clamp(T &x, const T &min, const T &max) {
|
static inline void clamp(T &x, const T &min, const T &max) {
|
||||||
|
@ -36,7 +27,10 @@ void WorkerThread::operator()() {
|
||||||
}
|
}
|
||||||
|
|
||||||
CompressionFunc f = m_Parent->GetCompressionFunc();
|
CompressionFunc f = m_Parent->GetCompressionFunc();
|
||||||
if(!f) {
|
CompressionFuncWithStats fStat = m_Parent->GetCompressionFuncWithStats();
|
||||||
|
BlockStatManager *statManager = m_Parent->GetBlockStatManager();
|
||||||
|
|
||||||
|
if(!(f || (fStat && statManager))) {
|
||||||
fprintf(stderr, "%s\n", "Illegal worker queue initialization -- compression func is NULL.");
|
fprintf(stderr, "%s\n", "Illegal worker queue initialization -- compression func is NULL.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -63,7 +57,11 @@ void WorkerThread::operator()() {
|
||||||
{
|
{
|
||||||
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
|
const uint8 *src = m_Parent->GetSrcForThread(m_ThreadIdx);
|
||||||
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
|
uint8 *dst = m_Parent->GetDstForThread(m_ThreadIdx);
|
||||||
(*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
|
if(f)
|
||||||
|
(*f)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4);
|
||||||
|
else
|
||||||
|
(*fStat)(src, dst, 4 * m_Parent->GetNumBlocksForThread(m_ThreadIdx), 4, *statManager);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,22 +89,57 @@ WorkerQueue::WorkerQueue(
|
||||||
uint8 *outBuf
|
uint8 *outBuf
|
||||||
)
|
)
|
||||||
: m_NumCompressions(0)
|
: m_NumCompressions(0)
|
||||||
, m_TotalNumCompressions(max(uint32(1), numCompressions))
|
, m_TotalNumCompressions(std::max(uint32(1), numCompressions))
|
||||||
, m_NumThreads(numThreads)
|
, m_NumThreads(numThreads)
|
||||||
, m_WaitingThreads(0)
|
, m_WaitingThreads(0)
|
||||||
, m_ActiveThreads(0)
|
, m_ActiveThreads(0)
|
||||||
, m_JobSize(max(uint32(1), jobSize))
|
, m_JobSize(std::max(uint32(1), jobSize))
|
||||||
, m_InBufSz(inBufSz)
|
, m_InBufSz(inBufSz)
|
||||||
, m_InBuf(inBuf)
|
, m_InBuf(inBuf)
|
||||||
, m_OutBuf(outBuf)
|
, m_OutBuf(outBuf)
|
||||||
, m_NextBlock(0)
|
, m_NextBlock(0)
|
||||||
, m_CompressionFunc(func)
|
, m_CompressionFunc(func)
|
||||||
|
, m_CompressionFuncWithStats(NULL)
|
||||||
|
, m_BlockStatManager(NULL)
|
||||||
{
|
{
|
||||||
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
|
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
if(m_InBufSz % 64) {
|
if(m_InBufSz % 64) {
|
||||||
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?");
|
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
WorkerQueue::WorkerQueue(
|
||||||
|
uint32 numCompressions,
|
||||||
|
uint32 numThreads,
|
||||||
|
uint32 jobSize,
|
||||||
|
const uint8 *inBuf,
|
||||||
|
uint32 inBufSz,
|
||||||
|
CompressionFuncWithStats func,
|
||||||
|
BlockStatManager &blockStatManager,
|
||||||
|
uint8 *outBuf
|
||||||
|
)
|
||||||
|
: m_NumCompressions(0)
|
||||||
|
, m_TotalNumCompressions(std::max(uint32(1), numCompressions))
|
||||||
|
, m_NumThreads(numThreads)
|
||||||
|
, m_WaitingThreads(0)
|
||||||
|
, m_ActiveThreads(0)
|
||||||
|
, m_JobSize(std::max(uint32(1), jobSize))
|
||||||
|
, m_InBufSz(inBufSz)
|
||||||
|
, m_InBuf(inBuf)
|
||||||
|
, m_OutBuf(outBuf)
|
||||||
|
, m_NextBlock(0)
|
||||||
|
, m_CompressionFunc(NULL)
|
||||||
|
, m_CompressionFuncWithStats(func)
|
||||||
|
, m_BlockStatManager(&blockStatManager)
|
||||||
|
{
|
||||||
|
clamp(m_NumThreads, uint32(1), uint32(kMaxNumWorkerThreads));
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if(m_InBufSz % 64) {
|
||||||
|
fprintf(stderr, "WorkerQueue.cpp -- WARNING: InBufSz not a multiple of 64. Are you sure that your image dimensions are correct?\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -182,7 +215,7 @@ WorkerThread::EAction WorkerQueue::AcceptThreadData(uint32 threadIdx) {
|
||||||
|
|
||||||
// The number of blocks to process is either the job size
|
// The number of blocks to process is either the job size
|
||||||
// or the number of blocks remaining.
|
// or the number of blocks remaining.
|
||||||
int blocksProcessed = min(m_JobSize, totalBlocks - m_NextBlock);
|
int blocksProcessed = std::min(m_JobSize, totalBlocks - m_NextBlock);
|
||||||
m_NumBlocks[threadIdx] = blocksProcessed;
|
m_NumBlocks[threadIdx] = blocksProcessed;
|
||||||
|
|
||||||
// Make sure the next block is updated.
|
// Make sure the next block is updated.
|
||||||
|
|
|
@ -44,6 +44,17 @@ class WorkerQueue {
|
||||||
uint8 *outBuf
|
uint8 *outBuf
|
||||||
);
|
);
|
||||||
|
|
||||||
|
WorkerQueue(
|
||||||
|
uint32 numCompressions,
|
||||||
|
uint32 numThreads,
|
||||||
|
uint32 jobSize,
|
||||||
|
const uint8 *inBuf,
|
||||||
|
uint32 inBufSz,
|
||||||
|
CompressionFuncWithStats func,
|
||||||
|
BlockStatManager &blockStatManager,
|
||||||
|
uint8 *outBuf
|
||||||
|
);
|
||||||
|
|
||||||
~WorkerQueue() { }
|
~WorkerQueue() { }
|
||||||
|
|
||||||
// Runs the workers
|
// Runs the workers
|
||||||
|
@ -80,6 +91,11 @@ class WorkerQueue {
|
||||||
const CompressionFunc m_CompressionFunc;
|
const CompressionFunc m_CompressionFunc;
|
||||||
CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; }
|
CompressionFunc GetCompressionFunc() const { return m_CompressionFunc; }
|
||||||
|
|
||||||
|
BlockStatManager *m_BlockStatManager;
|
||||||
|
const CompressionFuncWithStats m_CompressionFuncWithStats;
|
||||||
|
CompressionFuncWithStats GetCompressionFuncWithStats() const { return m_CompressionFuncWithStats; }
|
||||||
|
BlockStatManager *GetBlockStatManager() const { return m_BlockStatManager; }
|
||||||
|
|
||||||
StopWatch m_StopWatch;
|
StopWatch m_StopWatch;
|
||||||
|
|
||||||
WorkerThread::EAction AcceptThreadData(uint32 threadIdx);
|
WorkerThread::EAction AcceptThreadData(uint32 threadIdx);
|
||||||
|
|
Loading…
Reference in a new issue