Fix multithreaded npot bug.

There was a bug where certain blocks were not being written due to
improper iterating over non power-of-two textures depending on the
thread that got split across a newline of blocks. This should now
be fixed.

The offending command line arguments looked something like:

CLTool/tc -q 0 -t 32 -j 32 /path/to/npot-texture.png
This commit is contained in:
Pavel Krajcevski 2014-04-10 13:39:04 -04:00
parent 127c825337
commit 8660b24ffe
4 changed files with 12 additions and 20 deletions

View file

@ -1573,10 +1573,9 @@ void Compress(const FasTC::CompressionJob &cj, CompressionSettings settings) {
uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz; uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz;
uint32 startX = cj.XStart(); uint32 startX = cj.XStart();
bool done = false; for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) {
const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width();
for(uint32 j = cj.YStart(); !done; j += 4) { for(uint32 i = startX; i < endX; i += 4) {
for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
uint32 block[16]; uint32 block[16];
GetBlock(i, j, cj.Width(), inPixels, block); GetBlock(i, j, cj.Width(), inPixels, block);
@ -1608,7 +1607,6 @@ void Compress(const FasTC::CompressionJob &cj, CompressionSettings settings) {
#endif #endif
outBuf += kBlockSz; outBuf += kBlockSz;
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
} }
startX = 0; startX = 0;
} }
@ -1676,9 +1674,9 @@ void CompressAtomic(FasTC::CompressionJobList &cjl) {
uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz; uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz;
uint32 startX = cj.XStart(); uint32 startX = cj.XStart();
bool done = false; for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) {
for(uint32 j = cj.YStart(); !done; j += 4) { const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width();
for(uint32 i = startX; !done && i < cj.Width(); i += 4) { for(uint32 i = startX; i < endX; i += 4) {
uint32 block[16]; uint32 block[16];
GetBlock(i, j, cj.Width(), inPixels, block); GetBlock(i, j, cj.Width(), inPixels, block);
@ -1709,7 +1707,6 @@ void CompressAtomic(FasTC::CompressionJobList &cjl) {
#endif #endif
outBuf += 16; outBuf += 16;
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
} }
startX = 0; startX = 0;

View file

@ -46,9 +46,9 @@ namespace DXTC
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf()); const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
uint32 startX = cj.XStart(); uint32 startX = cj.XStart();
bool done = false; for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) {
for(uint32 j = cj.YStart(); !done; j += 4) { const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width();
for(uint32 i = startX; !done && i < cj.Width(); i += 4) { for(uint32 i = startX; i < endX; i += 4) {
const uint32 kOffset = j*cj.Width() + i; const uint32 kOffset = j*cj.Width() + i;
ExtractBlock(inPixels + kOffset, cj.Width(), block); ExtractBlock(inPixels + kOffset, cj.Width(), block);
@ -56,7 +56,6 @@ namespace DXTC
EmitWord(outBuf, ColorTo565(maxColor)); EmitWord(outBuf, ColorTo565(maxColor));
EmitWord(outBuf, ColorTo565(minColor)); EmitWord(outBuf, ColorTo565(minColor));
EmitColorIndices(block, outBuf, minColor, maxColor); EmitColorIndices(block, outBuf, minColor, maxColor);
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
} }
startX = 0; startX = 0;
} }

View file

@ -66,9 +66,9 @@ namespace ETCC {
const uint32 startBlock = cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()); const uint32 startBlock = cj.CoordsToBlockIdx(cj.XStart(), cj.YStart());
uint8 *outBuf = cj.OutBuf() + startBlock * kBlockSz; uint8 *outBuf = cj.OutBuf() + startBlock * kBlockSz;
uint32 startX = cj.XStart(); uint32 startX = cj.XStart();
bool done = false; for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) {
for(uint32 j = cj.YStart(); !done; j += 4) { const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width();
for(uint32 i = startX; !done && i < cj.Width(); i += 4) { for(uint32 i = startX; i < endX; i += 4) {
uint32 pixels[16]; uint32 pixels[16];
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf()); const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
@ -78,9 +78,7 @@ namespace ETCC {
memcpy(pixels + 12, inPixels + (j+3)*cj.Width() + i, 4 * sizeof(uint32)); memcpy(pixels + 12, inPixels + (j+3)*cj.Width() + i, 4 * sizeof(uint32));
pack_etc1_block(outBuf, pixels, params); pack_etc1_block(outBuf, pixels, params);
outBuf += kBlockSz; outBuf += kBlockSz;
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
} }
startX = 0; startX = 0;
} }

View file

@ -78,12 +78,10 @@ unsigned int ImageLoader::GetChannelForPixel(uint32 x, uint32 y, uint32 ch) {
// First make sure that we're in bounds... // First make sure that we're in bounds...
if(x >= GetWidth()) { if(x >= GetWidth()) {
assert(!"Fix requirement that images have multiple-of-four dimensions");
return 0; return 0;
} }
if(y >= GetHeight()) { if(y >= GetHeight()) {
assert(!"Fix requirement that images have multiple-of-four dimensions");
return 0; return 0;
} }