From 8660b24ffe11894fe5299a9744b87dd14581fc1e Mon Sep 17 00:00:00 2001 From: Pavel Krajcevski Date: Thu, 10 Apr 2014 13:39:04 -0400 Subject: [PATCH] Fix multithreaded npot bug. There was a bug where certain blocks were not being written due to improper iterating over non power-of-two textures depending on the thread that got split across a newline of blocks. This should now be fixed. The offending command line arguments looked something like: CLTool/tc -q 0 -t 32 -j 32 /path/to/npot-texture.png --- BPTCEncoder/src/Compressor.cpp | 15 ++++++--------- DXTEncoder/src/DXTCompressor.cpp | 7 +++---- ETCEncoder/src/Compressor.cpp | 8 +++----- IO/src/ImageLoader.cpp | 2 -- 4 files changed, 12 insertions(+), 20 deletions(-) diff --git a/BPTCEncoder/src/Compressor.cpp b/BPTCEncoder/src/Compressor.cpp index cb6e133..0d2ff6b 100755 --- a/BPTCEncoder/src/Compressor.cpp +++ b/BPTCEncoder/src/Compressor.cpp @@ -1573,10 +1573,9 @@ void Compress(const FasTC::CompressionJob &cj, CompressionSettings settings) { uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz; uint32 startX = cj.XStart(); - bool done = false; - - for(uint32 j = cj.YStart(); !done; j += 4) { - for(uint32 i = startX; !done && i < cj.Width(); i += 4) { + for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) { + const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width(); + for(uint32 i = startX; i < endX; i += 4) { uint32 block[16]; GetBlock(i, j, cj.Width(), inPixels, block); @@ -1608,7 +1607,6 @@ void Compress(const FasTC::CompressionJob &cj, CompressionSettings settings) { #endif outBuf += kBlockSz; - done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd(); } startX = 0; } @@ -1676,9 +1674,9 @@ void CompressAtomic(FasTC::CompressionJobList &cjl) { uint8 *outBuf = cj.OutBuf() + cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()) * kBlockSz; uint32 startX = cj.XStart(); - bool done = false; - for(uint32 j = cj.YStart(); !done; j += 4) { - for(uint32 i = startX; !done && i < cj.Width(); i += 4) { + for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) { + const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width(); + for(uint32 i = startX; i < endX; i += 4) { uint32 block[16]; GetBlock(i, j, cj.Width(), inPixels, block); @@ -1709,7 +1707,6 @@ void CompressAtomic(FasTC::CompressionJobList &cjl) { #endif outBuf += 16; - done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd(); } startX = 0; diff --git a/DXTEncoder/src/DXTCompressor.cpp b/DXTEncoder/src/DXTCompressor.cpp index d5f129a..c98bc80 100755 --- a/DXTEncoder/src/DXTCompressor.cpp +++ b/DXTEncoder/src/DXTCompressor.cpp @@ -46,9 +46,9 @@ namespace DXTC const uint32 *inPixels = reinterpret_cast(cj.InBuf()); uint32 startX = cj.XStart(); - bool done = false; - for(uint32 j = cj.YStart(); !done; j += 4) { - for(uint32 i = startX; !done && i < cj.Width(); i += 4) { + for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) { + const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width(); + for(uint32 i = startX; i < endX; i += 4) { const uint32 kOffset = j*cj.Width() + i; ExtractBlock(inPixels + kOffset, cj.Width(), block); @@ -56,7 +56,6 @@ namespace DXTC EmitWord(outBuf, ColorTo565(maxColor)); EmitWord(outBuf, ColorTo565(minColor)); EmitColorIndices(block, outBuf, minColor, maxColor); - done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd(); } startX = 0; } diff --git a/ETCEncoder/src/Compressor.cpp b/ETCEncoder/src/Compressor.cpp index 576e451..90b4596 100644 --- a/ETCEncoder/src/Compressor.cpp +++ b/ETCEncoder/src/Compressor.cpp @@ -66,9 +66,9 @@ namespace ETCC { const uint32 startBlock = cj.CoordsToBlockIdx(cj.XStart(), cj.YStart()); uint8 *outBuf = cj.OutBuf() + startBlock * kBlockSz; uint32 startX = cj.XStart(); - bool done = false; - for(uint32 j = cj.YStart(); !done; j += 4) { - for(uint32 i = startX; !done && i < cj.Width(); i += 4) { + for(uint32 j = cj.YStart(); j <= cj.YEnd(); j += 4) { + const uint32 endX = j == cj.YEnd()? cj.XEnd() : cj.Width(); + for(uint32 i = startX; i < endX; i += 4) { uint32 pixels[16]; const uint32 *inPixels = reinterpret_cast(cj.InBuf()); @@ -78,9 +78,7 @@ namespace ETCC { memcpy(pixels + 12, inPixels + (j+3)*cj.Width() + i, 4 * sizeof(uint32)); pack_etc1_block(outBuf, pixels, params); - outBuf += kBlockSz; - done = i+4 >= cj.XEnd() && j+(i+4 == cj.Width()? 4 : 0) >= cj.YEnd(); } startX = 0; } diff --git a/IO/src/ImageLoader.cpp b/IO/src/ImageLoader.cpp index 0d7c0e0..9250116 100644 --- a/IO/src/ImageLoader.cpp +++ b/IO/src/ImageLoader.cpp @@ -78,12 +78,10 @@ unsigned int ImageLoader::GetChannelForPixel(uint32 x, uint32 y, uint32 ch) { // First make sure that we're in bounds... if(x >= GetWidth()) { - assert(!"Fix requirement that images have multiple-of-four dimensions"); return 0; } if(y >= GetHeight()) { - assert(!"Fix requirement that images have multiple-of-four dimensions"); return 0; }