Fix multithreaded npot bug.

There was a bug where certain blocks were not being written due to
improper iterating over non power-of-two textures depending on the
thread that got split across a newline of blocks. This should now
be fixed.

The offending command line arguments looked something like:

CLTool/tc -q 0 -t 32 -j 32 /path/to/npot-texture.png
This commit is contained in:
Pavel Krajcevski 2014-04-10 13:39:04 -04:00
parent 127c825337
commit 8660b24ffe
4 changed files with 12 additions and 20 deletions

View file

@ -1573,10 +1573,9 @@ void Compress(const FasTC::CompressionJob &cj, CompressionSettings settings) {
uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz;
uint32 startX = cj.XStart();
bool done = false;
for(uint32 j = cj.YStart(); !done; j += 4) {
for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) {
const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width();
for(uint32 i = startX; i < endX; i += 4) {
uint32 block[16];
GetBlock(i, j, cj.Width(), inPixels, block);
@ -1608,7 +1607,6 @@ void Compress(const FasTC::CompressionJob &cj, CompressionSettings settings) {
#endif
outBuf += kBlockSz;
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
}
startX = 0;
}
@ -1676,9 +1674,9 @@ void CompressAtomic(FasTC::CompressionJobList &cjl) {
uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz;
uint32 startX = cj.XStart();
bool done = false;
for(uint32 j = cj.YStart(); !done; j += 4) {
for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) {
const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width();
for(uint32 i = startX; i < endX; i += 4) {
uint32 block[16];
GetBlock(i, j, cj.Width(), inPixels, block);
@ -1709,7 +1707,6 @@ void CompressAtomic(FasTC::CompressionJobList &cjl) {
#endif
outBuf += 16;
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
}
startX = 0;

View file

@ -46,9 +46,9 @@ namespace DXTC
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
uint32 startX = cj.XStart();
bool done = false;
for(uint32 j = cj.YStart(); !done; j += 4) {
for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) {
const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width();
for(uint32 i = startX; i < endX; i += 4) {
const uint32 kOffset = j*cj.Width() + i;
ExtractBlock(inPixels + kOffset, cj.Width(), block);
@ -56,7 +56,6 @@ namespace DXTC
EmitWord(outBuf, ColorTo565(maxColor));
EmitWord(outBuf, ColorTo565(minColor));
EmitColorIndices(block, outBuf, minColor, maxColor);
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
}
startX = 0;
}

View file

@ -66,9 +66,9 @@ namespace ETCC {
const uint32 startBlock = cj.CoordsToBlockIdx(cj.XStart(), cj.YStart());
uint8 *outBuf = cj.OutBuf() + startBlock * kBlockSz;
uint32 startX = cj.XStart();
bool done = false;
for(uint32 j = cj.YStart(); !done; j += 4) {
for(uint32 i = startX; !done && i < cj.Width(); i += 4) {
for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) {
const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width();
for(uint32 i = startX; i < endX; i += 4) {
uint32 pixels[16];
const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.InBuf());
@ -78,9 +78,7 @@ namespace ETCC {
memcpy(pixels + 12, inPixels + (j+3)*cj.Width() + i, 4 * sizeof(uint32));
pack_etc1_block(outBuf, pixels, params);
outBuf += kBlockSz;
done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd();
}
startX = 0;
}

View file

@ -78,12 +78,10 @@ unsigned int ImageLoader::GetChannelForPixel(uint32 x, uint32 y, uint32 ch) {
// First make sure that we're in bounds...
if(x >= GetWidth()) {
assert(!"Fix requirement that images have multiple-of-four dimensions");
return 0;
}
if(y >= GetHeight()) {
assert(!"Fix requirement that images have multiple-of-four dimensions");
return 0;
}