diff --git a/BPTCEncoder/src/BC7Compressor.cpp b/BPTCEncoder/src/BC7Compressor.cpp index 648360c..dab20e5 100755 --- a/BPTCEncoder/src/BC7Compressor.cpp +++ b/BPTCEncoder/src/BC7Compressor.cpp @@ -503,19 +503,6 @@ static inline uint32 fastrand() { return (g_seed>>16) & RAND_MAX; } -static const int kNumStepDirections = 8; -static const RGBADir kStepDirections[kNumStepDirections] = { - // For pBit changes, we have 8 possible directions. - RGBADir(RGBAVector(1.0f, 1.0f, 1.0f, 0.0f)), - RGBADir(RGBAVector(-1.0f, 1.0f, 1.0f, 0.0f)), - RGBADir(RGBAVector(1.0f, -1.0f, 1.0f, 0.0f)), - RGBADir(RGBAVector(-1.0f, -1.0f, 1.0f, 0.0f)), - RGBADir(RGBAVector(1.0f, 1.0f, -1.0f, 0.0f)), - RGBADir(RGBAVector(-1.0f, 1.0f, -1.0f, 0.0f)), - RGBADir(RGBAVector(1.0f, -1.0f, -1.0f, 0.0f)), - RGBADir(RGBAVector(-1.0f, -1.0f, -1.0f, 0.0f)) -}; - static void ChangePointForDirWithoutPbitChange( RGBAVector &v, uint32 dir, const float step[kNumColorChannels] ) { @@ -1641,26 +1628,33 @@ namespace BC7C { // large enough to store the compressed image. This implementation has an 4:1 // compression ratio. void Compress(const CompressionJob &cj) { - const unsigned char *inBuf = cj.inBuf; + const uint32 *inPixels = reinterpret_cast(cj.inBuf); unsigned char *outBuf = cj.outBuf; for(uint32 j = 0; j < cj.height; j += 4) { for(uint32 i = 0; i < cj.width; i += 4) { - CompressBC7Block((const uint32 *)inBuf, outBuf); + uint32 block[16]; + memcpy(block, inPixels + j*cj.width + i, 4 * sizeof(uint32)); + memcpy(block + 4, inPixels + (j+1)*cj.width + i, 4 * sizeof(uint32)); + memcpy(block + 8, inPixels + (j+2)*cj.width + i, 4 * sizeof(uint32)); + memcpy(block + 12, inPixels + (j+3)*cj.width + i, 4 * sizeof(uint32)); + + CompressBC7Block(block, outBuf); #ifndef NDEBUG - uint8 *block = reinterpret_cast(outBuf); + const uint8 *inBlock = reinterpret_cast(block); + const uint8 *cmpblock = reinterpret_cast(outBuf); uint32 unComp[16]; - DecompressBC7Block(block, unComp); - uint8* unCompData = reinterpret_cast(unComp); + DecompressBC7Block(cmpblock, unComp); + const uint8* unCompData = reinterpret_cast(unComp); double diffSum = 0.0; for(int k = 0; k < 64; k+=4) { - double rdiff = sad(unCompData[k], inBuf[k]); - double gdiff = sad(unCompData[k+1], inBuf[k+1]); - double bdiff = sad(unCompData[k+2], inBuf[k+2]); - double adiff = sad(unCompData[k+3], inBuf[k+3]); - const double asrc = static_cast(inBuf[k+3]); + double rdiff = sad(unCompData[k], inBlock[k]); + double gdiff = sad(unCompData[k+1], inBlock[k+1]); + double bdiff = sad(unCompData[k+2], inBlock[k+2]); + double adiff = sad(unCompData[k+3], inBlock[k+3]); + const double asrc = static_cast(inBlock[k+3]); const double adst = static_cast(unCompData[k+3]); double avga = ((asrc + adst)*0.5)/255.0; diffSum += (rdiff + gdiff + bdiff + adiff) * avga; @@ -1673,7 +1667,6 @@ namespace BC7C { #endif outBuf += 16; - inBuf += 64; } } } @@ -1730,29 +1723,35 @@ namespace BC7C { #endif // HAS_ATOMICS void CompressWithStats(const CompressionJob &cj, std::ostream *logStream) { - const unsigned char *inBuf = cj.inBuf; + const uint32 *inPixels = reinterpret_cast(cj.inBuf); unsigned char *outBuf = cj.outBuf; for(uint32 j = 0; j < cj.height; j += 4) { for(uint32 i = 0; i < cj.width; i += 4) { - const uint32 *pixelBuf = reinterpret_cast(inBuf); + uint32 block[16]; + memcpy(block, inPixels + j*cj.width + i, 4 * sizeof(uint32)); + memcpy(block + 4, inPixels + (j+1)*cj.width + i, 4 * sizeof(uint32)); + memcpy(block + 8, inPixels + (j+2)*cj.width + i, 4 * sizeof(uint32)); + memcpy(block + 12, inPixels + (j+3)*cj.width + i, 4 * sizeof(uint32)); + if(logStream) { - uint64 blockIdx = reinterpret_cast(pixelBuf); - CompressBC7Block(pixelBuf, outBuf, BlockLogger(blockIdx, *logStream)); + uint64 blockIdx = reinterpret_cast(inPixels + j*cj.width + i); + CompressBC7Block(block, outBuf, BlockLogger(blockIdx, *logStream)); } else { - CompressBC7Block(pixelBuf, outBuf); + CompressBC7Block(block, outBuf); } #ifndef NDEBUG - uint8 *block = outBuf; + const uint8 *inBlock = reinterpret_cast(block); + const uint8 *cmpData = outBuf; uint32 unComp[16]; - DecompressBC7Block(block, unComp); - uint8* unCompData = reinterpret_cast(unComp); + DecompressBC7Block(cmpData, unComp); + const uint8* unCompData = reinterpret_cast(unComp); int diffSum = 0; for(int i = 0; i < 64; i++) { - diffSum += sad(unCompData[i], inBuf[i]); + diffSum += sad(unCompData[i], inBlock[i]); } double blockError = static_cast(diffSum) / 64.0; if(blockError > 50.0) { @@ -1762,7 +1761,6 @@ namespace BC7C { #endif outBuf += 16; - inBuf += 64; } } } @@ -2759,17 +2757,21 @@ namespace BC7C { // Convert the image from a BC7 buffer to a RGBA8 buffer void Decompress(const DecompressionJob &dj) { - unsigned char *outBuf = dj.outBuf; - unsigned int blockIdx = 0; + const uint8 *inBuf = dj.inBuf; + uint32 *outBuf = reinterpret_cast(dj.outBuf); for(unsigned int j = 0; j < dj.height; j += 4) { for(unsigned int i = 0; i < dj.width; i += 4) { uint32 pixels[16]; - DecompressBC7Block(dj.inBuf + (16*(blockIdx++)), pixels); + DecompressBC7Block(inBuf, pixels); - memcpy(outBuf, pixels, sizeof(pixels)); - outBuf += 64; + memcpy(outBuf + j*dj.width + i, pixels, 4 * sizeof(pixels[0])); + memcpy(outBuf + (j+1)*dj.width + i, pixels+4, 4 * sizeof(pixels[0])); + memcpy(outBuf + (j+2)*dj.width + i, pixels+8, 4 * sizeof(pixels[0])); + memcpy(outBuf + (j+3)*dj.width + i, pixels+12, 4 * sizeof(pixels[0])); + + inBuf += 16; } } } diff --git a/Base/include/Image.h b/Base/include/Image.h index ab9c974..f7b97fc 100644 --- a/Base/include/Image.h +++ b/Base/include/Image.h @@ -62,11 +62,9 @@ namespace FasTC { Image() : m_Width(0), m_Height(0), m_Pixels(0) { } Image(uint32 width, uint32 height); Image(uint32 width, uint32 height, - const PixelType *pixels, - bool bBlockStreamOrder = false); + const PixelType *pixels); Image(uint32 width, uint32 height, - const uint32 *pixels, - bool bBlockStreamOrder = false); + const uint32 *pixels); Image(const Image &); Image &operator=(const Image &); virtual ~Image(); @@ -87,15 +85,6 @@ namespace FasTC { uint32 GetHeight() const { return m_Height; } uint32 GetNumPixels() const { return GetWidth() * GetHeight(); } - void SetBlockStreamOrder(bool flag) { - if(flag) { - ConvertToBlockStreamOrder(); - } else { - ConvertFromBlockStreamOrder(); - } - } - bool GetBlockStreamOrder() const { return m_bBlockStreamOrder; } - template void ConvertTo(Image &other) const { for(uint32 j = 0; j < other.GetWidth(); j++) { @@ -127,16 +116,11 @@ namespace FasTC { uint32 m_Width; uint32 m_Height; - bool m_bBlockStreamOrder; - PixelType *m_Pixels; protected: void SetImageData(uint32 width, uint32 height, PixelType *data); - - void ConvertToBlockStreamOrder(); - void ConvertFromBlockStreamOrder(); }; extern void GenerateGaussianKernel(Image &out, uint32 size, float sigma); diff --git a/Base/src/Image.cpp b/Base/src/Image.cpp index 8eb19f7..0826060 100644 --- a/Base/src/Image.cpp +++ b/Base/src/Image.cpp @@ -70,17 +70,14 @@ template Image::Image(uint32 width, uint32 height) : m_Width(width) , m_Height(height) - , m_bBlockStreamOrder(false) , m_Pixels(new PixelType[GetNumPixels()]) { } template Image::Image(uint32 width, uint32 height, - const PixelType *pixels, - bool bBlockStreamOrder) + const PixelType *pixels) : m_Width(width) , m_Height(height) - , m_bBlockStreamOrder(false) { if(pixels) { m_Pixels = new PixelType[GetNumPixels()]; @@ -94,7 +91,6 @@ template Image::Image(const Image &other) : m_Width(other.m_Width) , m_Height(other.m_Height) - , m_bBlockStreamOrder(other.GetBlockStreamOrder()) , m_Pixels(new PixelType[GetNumPixels()]) { memcpy(m_Pixels, other.m_Pixels, GetNumPixels() * sizeof(PixelType)); @@ -112,10 +108,9 @@ bool Image::ReadPixels(const uint32 *rgba) { } template -Image::Image(uint32 width, uint32 height, const uint32 *pixels, bool bBlockStreamOrder) +Image::Image(uint32 width, uint32 height, const uint32 *pixels) : m_Width(width) , m_Height(height) - , m_bBlockStreamOrder(bBlockStreamOrder) { if(pixels) { m_Pixels = new PixelType[GetNumPixels()]; @@ -138,7 +133,6 @@ Image &Image::operator=(const Image &other) { m_Width = other.m_Width; m_Height = other.m_Height; - m_bBlockStreamOrder = other.GetBlockStreamOrder(); if(m_Pixels) { delete [] m_Pixels; @@ -467,59 +461,6 @@ double Image::ComputeEntropy() { return -ret; } -// !FIXME! These won't work for non-RGBA8 data. -template -void Image::ConvertToBlockStreamOrder() { - if(m_bBlockStreamOrder || !m_Pixels) - return; - - PixelType *newPixelData = new PixelType[GetWidth() * GetHeight()]; - for(uint32 j = 0; j < GetHeight(); j+=4) { - for(uint32 i = 0; i < GetWidth(); i+=4) { - uint32 blockX = i / 4; - uint32 blockY = j / 4; - uint32 blockIdx = blockY * (GetWidth() / 4) + blockX; - - uint32 offset = blockIdx * 4 * 4; - for(uint32 t = 0; t < 16; t++) { - uint32 x = i + t % 4; - uint32 y = j + t / 4; - newPixelData[offset + t] = m_Pixels[y*GetWidth() + x]; - } - } - } - - delete m_Pixels; - m_Pixels = newPixelData; - m_bBlockStreamOrder = true; -} - -template -void Image::ConvertFromBlockStreamOrder() { - if(!m_bBlockStreamOrder || !m_Pixels) - return; - - PixelType *newPixelData = new PixelType[GetWidth() * GetHeight()]; - for(uint32 j = 0; j < GetHeight(); j+=4) { - for(uint32 i = 0; i < GetWidth(); i+=4) { - uint32 blockX = i / 4; - uint32 blockY = j / 4; - uint32 blockIdx = blockY * (GetWidth() / 4) + blockX; - - uint32 offset = blockIdx * 4 * 4; - for(uint32 t = 0; t < 16; t++) { - uint32 x = i + t % 4; - uint32 y = j + t / 4; - newPixelData[y*GetWidth() + x] = m_Pixels[offset + t]; - } - } - } - - delete m_Pixels; - m_Pixels = newPixelData; - m_bBlockStreamOrder = false; -} - template void Image::SetImageData(uint32 width, uint32 height, PixelType *data) { if(m_Pixels) { diff --git a/CLTool/src/clunix.cpp b/CLTool/src/clunix.cpp index 80f6a4c..0dfcca9 100644 --- a/CLTool/src/clunix.cpp +++ b/CLTool/src/clunix.cpp @@ -56,6 +56,7 @@ void PrintUsage() { fprintf(stderr, "Usage: tc [OPTIONS] imagefile\n"); fprintf(stderr, "\n"); + fprintf(stderr, "\t-v\t\tVerbose mode: prints out Entropy, Mean Local Entropy, and MSSIM"); fprintf(stderr, "\t-f\t\tFormat to use. Either \"BPTC\", \"ETC1\", \"DXT1\", \"DXT5\", or \"PVRTC\". Default: BPTC\n"); fprintf(stderr, "\t-l\t\tSave an output log.\n"); fprintf(stderr, "\t-q \tSet compression quality level. Default: 50\n"); @@ -102,6 +103,7 @@ int main(int argc, char **argv) { bool bSaveLog = false; bool bUseAtomics = false; bool bUsePVRTexLib = false; + bool bVerbose = false; ECompressionFormat format = eCompressionFormat_BPTC; bool knowArg = false; @@ -154,6 +156,13 @@ int main(int argc, char **argv) { continue; } + if(strcmp(argv[fileArg], "-v") == 0) { + fileArg++; + bVerbose = true; + knowArg = true; + continue; + } + if(strcmp(argv[fileArg], "-simd") == 0) { fileArg++; bUseSIMD = true; @@ -224,12 +233,11 @@ int main(int argc, char **argv) { } FasTC::Image<> img(*file.GetImage()); - if(format != eCompressionFormat_BPTC) { - img.SetBlockStreamOrder(false); - } - fprintf(stdout, "Entropy: %.5f\n", img.ComputeEntropy()); - fprintf(stdout, "Mean Local Entropy: %.5f\n", img.ComputeMeanLocalEntropy()); + if(bVerbose) { + fprintf(stdout, "Entropy: %.5f\n", img.ComputeEntropy()); + fprintf(stdout, "Mean Local Entropy: %.5f\n", img.ComputeMeanLocalEntropy()); + } std::ofstream logFile; ThreadSafeStreambuf streamBuf(logFile); @@ -269,11 +277,13 @@ int main(int argc, char **argv) { fprintf(stderr, "Error computing PSNR\n"); } - double SSIM = img.ComputeSSIM(ci); - if(SSIM > 0.0) { - fprintf(stdout, "SSIM: %.9f\n", SSIM); - } else { - fprintf(stderr, "Error computing MSSIM\n"); + if(bVerbose) { + double SSIM = img.ComputeSSIM(ci); + if(SSIM > 0.0) { + fprintf(stdout, "SSIM: %.9f\n", SSIM); + } else { + fprintf(stderr, "Error computing SSIM\n"); + } } if(format == eCompressionFormat_BPTC) { diff --git a/CLTool/src/compare.cpp b/CLTool/src/compare.cpp index 308c543..c9982c4 100644 --- a/CLTool/src/compare.cpp +++ b/CLTool/src/compare.cpp @@ -85,9 +85,6 @@ int main(int argc, char **argv) { FasTC::Image<> img1(*img1f.GetImage()); FasTC::Image<> img2(*img2f.GetImage()); - img1.SetBlockStreamOrder(false); - img2.SetBlockStreamOrder(false); - double PSNR = img1.ComputePSNR(&img2); if(PSNR > 0.0) { fprintf(stdout, "PSNR: %.3f\n", PSNR); diff --git a/Core/src/CompressedImage.cpp b/Core/src/CompressedImage.cpp index 616bd01..01281f0 100644 --- a/Core/src/CompressedImage.cpp +++ b/Core/src/CompressedImage.cpp @@ -74,9 +74,7 @@ CompressedImage::CompressedImage( const ECompressionFormat format, const unsigned char *data ) - : FasTC::Image<>(width, height, - reinterpret_cast(NULL), - format == eCompressionFormat_BPTC) + : FasTC::Image<>(width, height, reinterpret_cast(NULL)) , m_Format(format) , m_CompressedData(0) { diff --git a/IO/src/ImageFile.cpp b/IO/src/ImageFile.cpp index 8b29a34..c38cc6c 100644 --- a/IO/src/ImageFile.cpp +++ b/IO/src/ImageFile.cpp @@ -207,7 +207,7 @@ FasTC::Image<> *ImageFile::LoadImage(const unsigned char *rawImageData) const { } uint32 *pixels = reinterpret_cast(pixelData); - FasTC::Image<> *i = new FasTC::Image<>(loader->GetWidth(), loader->GetHeight(), pixels, true); + FasTC::Image<> *i = new FasTC::Image<>(loader->GetWidth(), loader->GetHeight(), pixels); // Cleanup delete loader; diff --git a/IO/src/ImageLoader.cpp b/IO/src/ImageLoader.cpp index 57886b1..d2a2155 100644 --- a/IO/src/ImageLoader.cpp +++ b/IO/src/ImageLoader.cpp @@ -167,60 +167,46 @@ bool ImageLoader::LoadImage() { #endif int byteIdx = 0; - for(uint32 i = 0; i < ah; i+=4) { - for(uint32 j = 0; j < aw; j+= 4) { + for(uint32 j = 0; j < ah; j++) { + for(uint32 i = 0; i < aw; i++) { - // For each block, visit the pixels in sequential order - for(uint32 y = i; y < i+4; y++) { - for(uint32 x = j; x < j+4; x++) { + unsigned int redVal = GetChannelForPixel(i, j, 0); + if(redVal == INT_MAX) + return false; - if(y >= m_Height || x >= m_Width) { - m_PixelData[byteIdx++] = 0; // r - m_PixelData[byteIdx++] = 0; // g - m_PixelData[byteIdx++] = 0; // b - m_PixelData[byteIdx++] = 0; // a - continue; - } + unsigned int greenVal = redVal; + unsigned int blueVal = redVal; - unsigned int redVal = GetChannelForPixel(x, y, 0); - if(redVal == INT_MAX) - return false; - - unsigned int greenVal = redVal; - unsigned int blueVal = redVal; - - if(GetGreenChannelPrecision() > 0) { - greenVal = GetChannelForPixel(x, y, 1); - if(greenVal == INT_MAX) - return false; - } - - if(GetBlueChannelPrecision() > 0) { - blueVal = GetChannelForPixel(x, y, 2); - if(blueVal == INT_MAX) - return false; - } - - unsigned int alphaVal = 0xFF; - if(GetAlphaChannelPrecision() > 0) { - alphaVal = GetChannelForPixel(x, y, 3); - if(alphaVal == INT_MAX) - return false; - } - - // Red channel - m_PixelData[byteIdx++] = redVal & 0xFF; - - // Green channel - m_PixelData[byteIdx++] = greenVal & 0xFF; - - // Blue channel - m_PixelData[byteIdx++] = blueVal & 0xFF; - - // Alpha channel - m_PixelData[byteIdx++] = alphaVal & 0xFF; - } + if(GetGreenChannelPrecision() > 0) { + greenVal = GetChannelForPixel(i, j, 1); + if(greenVal == INT_MAX) + return false; } + + if(GetBlueChannelPrecision() > 0) { + blueVal = GetChannelForPixel(i, j, 2); + if(blueVal == INT_MAX) + return false; + } + + unsigned int alphaVal = 0xFF; + if(GetAlphaChannelPrecision() > 0) { + alphaVal = GetChannelForPixel(i, j, 3); + if(alphaVal == INT_MAX) + return false; + } + + // Red channel + m_PixelData[byteIdx++] = redVal & 0xFF; + + // Green channel + m_PixelData[byteIdx++] = greenVal & 0xFF; + + // Blue channel + m_PixelData[byteIdx++] = blueVal & 0xFF; + + // Alpha channel + m_PixelData[byteIdx++] = alphaVal & 0xFF; } } diff --git a/IO/src/ImageWriter.cpp b/IO/src/ImageWriter.cpp index c9a18b8..9df3901 100644 --- a/IO/src/ImageWriter.cpp +++ b/IO/src/ImageWriter.cpp @@ -45,22 +45,5 @@ #include "Pixel.h" uint32 ImageWriter::GetChannelForPixel(uint32 x, uint32 y, uint32 ch) { - - // Assume pixels are in block stream order, hence we would need to first find - // the block that contains pixel (x, y) and then find the byte location for it. - - const uint32 blocksPerRow = GetWidth() / 4; - const uint32 blockIdxX = x / 4; - const uint32 blockIdxY = y / 4; - const uint32 blockIdx = blockIdxY * blocksPerRow + blockIdxX; - - // Now we find the offset in the block - const uint32 blockOffsetX = x % 4; - const uint32 blockOffsetY = y % 4; - const uint32 pixelOffset = blockOffsetY * 4 + blockOffsetX; - - // There are 16 pixels per block... - uint32 dataOffset = blockIdx * 16 + pixelOffset; - - return m_Pixels[dataOffset].Component((ch+1) % 4); + return m_Pixels[y * GetWidth() + x].Component((ch+1) % 4); } diff --git a/IO/src/ImageWriterPNG.cpp b/IO/src/ImageWriterPNG.cpp index 1013253..5fe847a 100644 --- a/IO/src/ImageWriterPNG.cpp +++ b/IO/src/ImageWriterPNG.cpp @@ -87,7 +87,6 @@ public: ImageWriterPNG::ImageWriterPNG(FasTC::Image<> &im) : ImageWriter(im.GetWidth(), im.GetHeight(), im.GetPixels()) - , m_bBlockStreamOrder(im.GetBlockStreamOrder()) , m_StreamPosition(0) { im.ComputePixels(); @@ -132,13 +131,7 @@ bool ImageWriterPNG::WriteImage() { row_pointers[y] = row; for (uint32 x = 0; x < m_Width; ++x) { - if(m_bBlockStreamOrder) { - for(uint32 ch = 0; ch < 4; ch++) { - *row++ = GetChannelForPixel(x, y, ch); - } - } else { - reinterpret_cast(row)[x] = m_Pixels[y * m_Width + x].Pack(); - } + reinterpret_cast(row)[x] = m_Pixels[y * m_Width + x].Pack(); } } diff --git a/IO/src/ImageWriterPNG.h b/IO/src/ImageWriterPNG.h index 5bd03ca..b280869 100644 --- a/IO/src/ImageWriterPNG.h +++ b/IO/src/ImageWriterPNG.h @@ -55,7 +55,6 @@ class ImageWriterPNG : public ImageWriter { virtual bool WriteImage(); private: - bool m_bBlockStreamOrder; uint32 m_StreamPosition; friend class PNGStreamWriter; };