From 8e76d149baac51f515fd12fc42067ef1023ce71a Mon Sep 17 00:00:00 2001
From: Pavel Krajcevski <pavel@cs.unc.edu>
Date: Wed, 6 Nov 2013 18:23:19 -0500
Subject: [PATCH] Remove a bunch of code that assumes that we get our pixel
 data in block stream order...

---
 BPTCEncoder/src/BC7Compressor.cpp | 80 ++++++++++++++--------------
 Base/include/Image.h              | 20 +------
 Base/src/Image.cpp                | 63 +---------------------
 CLTool/src/clunix.cpp             | 30 +++++++----
 CLTool/src/compare.cpp            |  3 --
 Core/src/CompressedImage.cpp      |  4 +-
 IO/src/ImageFile.cpp              |  2 +-
 IO/src/ImageLoader.cpp            | 86 +++++++++++++------------------
 IO/src/ImageWriter.cpp            | 19 +------
 IO/src/ImageWriterPNG.cpp         |  9 +---
 IO/src/ImageWriterPNG.h           |  1 -
 11 files changed, 105 insertions(+), 212 deletions(-)
diff --git a/BPTCEncoder/src/BC7Compressor.cpp b/BPTCEncoder/src/BC7Compressor.cpp
index 648360c..dab20e5 100755
--- a/BPTCEncoder/src/BC7Compressor.cpp
+++ b/BPTCEncoder/src/BC7Compressor.cpp
@@ -503,19 +503,6 @@ static inline uint32 fastrand() {
   return (g_seed>>16) & RAND_MAX;
 }
 
-static const int kNumStepDirections = 8;
-static const RGBADir kStepDirections[kNumStepDirections] = {
-  // For pBit changes, we have 8 possible directions.
-  RGBADir(RGBAVector(1.0f, 1.0f, 1.0f, 0.0f)),
-  RGBADir(RGBAVector(-1.0f, 1.0f, 1.0f, 0.0f)),
-  RGBADir(RGBAVector(1.0f, -1.0f, 1.0f, 0.0f)),
-  RGBADir(RGBAVector(-1.0f, -1.0f, 1.0f, 0.0f)),
-  RGBADir(RGBAVector(1.0f, 1.0f, -1.0f, 0.0f)),
-  RGBADir(RGBAVector(-1.0f, 1.0f, -1.0f, 0.0f)),
-  RGBADir(RGBAVector(1.0f, -1.0f, -1.0f, 0.0f)),
-  RGBADir(RGBAVector(-1.0f, -1.0f, -1.0f, 0.0f))
-};
-
 static void ChangePointForDirWithoutPbitChange(
   RGBAVector &v, uint32 dir, const float step[kNumColorChannels]
 ) {
@@ -1641,26 +1628,33 @@ namespace BC7C {
   // large enough to store the compressed image. This implementation has an 4:1
   // compression ratio.
   void Compress(const CompressionJob &cj) {
-    const unsigned char *inBuf = cj.inBuf;
+    const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.inBuf);
     unsigned char *outBuf = cj.outBuf;
     for(uint32 j = 0; j < cj.height; j += 4) {
       for(uint32 i = 0; i < cj.width; i += 4) {
 
-        CompressBC7Block((const uint32 *)inBuf, outBuf);
+        uint32 block[16];
+        memcpy(block, inPixels + j*cj.width + i, 4 * sizeof(uint32));
+        memcpy(block + 4, inPixels + (j+1)*cj.width + i, 4 * sizeof(uint32));
+        memcpy(block + 8, inPixels + (j+2)*cj.width + i, 4 * sizeof(uint32));
+        memcpy(block + 12, inPixels + (j+3)*cj.width + i, 4 * sizeof(uint32));
+
+        CompressBC7Block(block, outBuf);
 
 #ifndef NDEBUG
-        uint8 *block = reinterpret_cast<uint8 *>(outBuf);
+        const uint8 *inBlock = reinterpret_cast<const uint8 *>(block);
+        const uint8 *cmpblock = reinterpret_cast<const uint8 *>(outBuf);
         uint32 unComp[16];
-        DecompressBC7Block(block, unComp);
-        uint8* unCompData = reinterpret_cast<uint8 *>(unComp);
+        DecompressBC7Block(cmpblock, unComp);
+        const uint8* unCompData = reinterpret_cast<const uint8 *>(unComp);
 
         double diffSum = 0.0;
         for(int k = 0; k < 64; k+=4) {
-          double rdiff = sad(unCompData[k], inBuf[k]);
-          double gdiff = sad(unCompData[k+1], inBuf[k+1]);
-          double bdiff = sad(unCompData[k+2], inBuf[k+2]);
-          double adiff = sad(unCompData[k+3], inBuf[k+3]);
-          const double asrc = static_cast<double>(inBuf[k+3]);
+          double rdiff = sad(unCompData[k], inBlock[k]);
+          double gdiff = sad(unCompData[k+1], inBlock[k+1]);
+          double bdiff = sad(unCompData[k+2], inBlock[k+2]);
+          double adiff = sad(unCompData[k+3], inBlock[k+3]);
+          const double asrc = static_cast<double>(inBlock[k+3]);
           const double adst = static_cast<double>(unCompData[k+3]);
           double avga = ((asrc + adst)*0.5)/255.0;
           diffSum += (rdiff + gdiff + bdiff + adiff) * avga;
@@ -1673,7 +1667,6 @@ namespace BC7C {
 #endif
 
         outBuf += 16;
-        inBuf += 64;
       }
     }
   }
@@ -1730,29 +1723,35 @@ namespace BC7C {
 #endif  // HAS_ATOMICS
 
   void CompressWithStats(const CompressionJob &cj, std::ostream *logStream) {
-    const unsigned char *inBuf = cj.inBuf;
+    const uint32 *inPixels = reinterpret_cast<const uint32 *>(cj.inBuf);
     unsigned char *outBuf = cj.outBuf;
 
     for(uint32 j = 0; j < cj.height; j += 4) {
       for(uint32 i = 0; i < cj.width; i += 4) {
 
-        const uint32 *pixelBuf = reinterpret_cast<const uint32 *>(inBuf);
+        uint32 block[16];
+        memcpy(block, inPixels + j*cj.width + i, 4 * sizeof(uint32));
+        memcpy(block + 4, inPixels + (j+1)*cj.width + i, 4 * sizeof(uint32));
+        memcpy(block + 8, inPixels + (j+2)*cj.width + i, 4 * sizeof(uint32));
+        memcpy(block + 12, inPixels + (j+3)*cj.width + i, 4 * sizeof(uint32));
+
         if(logStream) {
-          uint64 blockIdx = reinterpret_cast<uint64>(pixelBuf);
-          CompressBC7Block(pixelBuf, outBuf, BlockLogger(blockIdx, *logStream));
+          uint64 blockIdx = reinterpret_cast<uint64>(inPixels + j*cj.width + i);
+          CompressBC7Block(block, outBuf, BlockLogger(blockIdx, *logStream));
         } else {
-          CompressBC7Block(pixelBuf, outBuf);
+          CompressBC7Block(block, outBuf);
         }
 
 #ifndef NDEBUG
-        uint8 *block = outBuf;
+        const uint8 *inBlock = reinterpret_cast<const uint8 *>(block);
+        const uint8 *cmpData = outBuf;
         uint32 unComp[16];
-        DecompressBC7Block(block, unComp);
-        uint8* unCompData = reinterpret_cast<uint8 *>(unComp);
+        DecompressBC7Block(cmpData, unComp);
+        const uint8* unCompData = reinterpret_cast<uint8 *>(unComp);
 
         int diffSum = 0;
         for(int i = 0; i < 64; i++) {
-          diffSum += sad(unCompData[i], inBuf[i]);
+          diffSum += sad(unCompData[i], inBlock[i]);
         }
         double blockError = static_cast<double>(diffSum) / 64.0;
         if(blockError > 50.0) {
@@ -1762,7 +1761,6 @@ namespace BC7C {
 #endif
 
         outBuf += 16;
-        inBuf += 64;
       }
     }
   }
@@ -2759,17 +2757,21 @@ namespace BC7C {
   // Convert the image from a BC7 buffer to a RGBA8 buffer
   void Decompress(const DecompressionJob &dj) {
 
-    unsigned char *outBuf = dj.outBuf;
-    unsigned int blockIdx = 0;
+    const uint8 *inBuf = dj.inBuf;
+    uint32 *outBuf = reinterpret_cast<uint32 *>(dj.outBuf);
 
     for(unsigned int j = 0; j < dj.height; j += 4) {
       for(unsigned int i = 0; i < dj.width; i += 4) {
 
         uint32 pixels[16];
-        DecompressBC7Block(dj.inBuf + (16*(blockIdx++)), pixels);
+        DecompressBC7Block(inBuf, pixels);
 
-        memcpy(outBuf, pixels, sizeof(pixels));
-        outBuf += 64;
+        memcpy(outBuf + j*dj.width + i, pixels, 4 * sizeof(pixels[0]));
+        memcpy(outBuf + (j+1)*dj.width + i, pixels+4, 4 * sizeof(pixels[0]));
+        memcpy(outBuf + (j+2)*dj.width + i, pixels+8, 4 * sizeof(pixels[0]));
+        memcpy(outBuf + (j+3)*dj.width + i, pixels+12, 4 * sizeof(pixels[0]));
+
+        inBuf += 16;
       }
     }
   }
diff --git a/Base/include/Image.h b/Base/include/Image.h
index ab9c974..f7b97fc 100644
--- a/Base/include/Image.h
+++ b/Base/include/Image.h
@@ -62,11 +62,9 @@ namespace FasTC {
     Image() : m_Width(0), m_Height(0), m_Pixels(0) { }
     Image(uint32 width, uint32 height);
     Image(uint32 width, uint32 height,
-          const PixelType *pixels,
-          bool bBlockStreamOrder = false);
+          const PixelType *pixels);
     Image(uint32 width, uint32 height,
-          const uint32 *pixels,
-          bool bBlockStreamOrder = false);
+          const uint32 *pixels);
     Image(const Image<PixelType> &);
     Image &operator=(const Image<PixelType> &);
     virtual ~Image();
@@ -87,15 +85,6 @@ namespace FasTC {
     uint32 GetHeight() const { return m_Height; }
     uint32 GetNumPixels() const { return GetWidth() * GetHeight(); }
 
-    void SetBlockStreamOrder(bool flag) {
-      if(flag) {
-        ConvertToBlockStreamOrder();
-      } else {
-        ConvertFromBlockStreamOrder();
-      }
-    }
-    bool GetBlockStreamOrder() const { return m_bBlockStreamOrder; }
-
     template<typename OtherPixelType>
     void ConvertTo(Image<OtherPixelType> &other) const {
       for(uint32 j = 0; j < other.GetWidth(); j++) {
@@ -127,16 +116,11 @@ namespace FasTC {
     uint32 m_Width;
     uint32 m_Height;
 
-    bool m_bBlockStreamOrder;
-
     PixelType *m_Pixels;
 
    protected:
 
     void SetImageData(uint32 width, uint32 height, PixelType *data);
-
-    void ConvertToBlockStreamOrder();
-    void ConvertFromBlockStreamOrder();
   };
 
   extern void GenerateGaussianKernel(Image<IPixel> &out, uint32 size, float sigma);
diff --git a/Base/src/Image.cpp b/Base/src/Image.cpp
index 8eb19f7..0826060 100644
--- a/Base/src/Image.cpp
+++ b/Base/src/Image.cpp
@@ -70,17 +70,14 @@ template<typename PixelType>
 Image<PixelType>::Image(uint32 width, uint32 height)
   : m_Width(width)
   , m_Height(height)
-  , m_bBlockStreamOrder(false)
   , m_Pixels(new PixelType[GetNumPixels()])
 { }
 
 template<typename PixelType>
 Image<PixelType>::Image(uint32 width, uint32 height,
-                        const PixelType *pixels,
-                        bool bBlockStreamOrder)
+                        const PixelType *pixels)
   : m_Width(width)
   , m_Height(height)
-  , m_bBlockStreamOrder(false)
 {
   if(pixels) {
     m_Pixels = new PixelType[GetNumPixels()];
@@ -94,7 +91,6 @@ template<typename PixelType>
 Image<PixelType>::Image(const Image<PixelType> &other)
   : m_Width(other.m_Width)
   , m_Height(other.m_Height)
-  , m_bBlockStreamOrder(other.GetBlockStreamOrder())
   , m_Pixels(new PixelType[GetNumPixels()])
 {
   memcpy(m_Pixels, other.m_Pixels, GetNumPixels() * sizeof(PixelType));
@@ -112,10 +108,9 @@ bool Image<PixelType>::ReadPixels(const uint32 *rgba) {
 }
 
 template<typename PixelType>
-Image<PixelType>::Image(uint32 width, uint32 height, const uint32 *pixels, bool bBlockStreamOrder)
+Image<PixelType>::Image(uint32 width, uint32 height, const uint32 *pixels)
   : m_Width(width)
   , m_Height(height)
-  , m_bBlockStreamOrder(bBlockStreamOrder)
 {
   if(pixels) {
     m_Pixels = new PixelType[GetNumPixels()];
@@ -138,7 +133,6 @@ Image<PixelType> &Image<PixelType>::operator=(const Image &other) {
   
   m_Width = other.m_Width;
   m_Height = other.m_Height;
-  m_bBlockStreamOrder = other.GetBlockStreamOrder();
   
   if(m_Pixels) {
     delete [] m_Pixels;
@@ -467,59 +461,6 @@ double Image<PixelType>::ComputeEntropy() {
   return -ret;
 }
 
-// !FIXME! These won't work for non-RGBA8 data.
-template<typename PixelType>
-void Image<PixelType>::ConvertToBlockStreamOrder() {
-  if(m_bBlockStreamOrder || !m_Pixels)
-    return;
-
-  PixelType *newPixelData = new PixelType[GetWidth() * GetHeight()];
-  for(uint32 j = 0; j < GetHeight(); j+=4) {
-    for(uint32 i = 0; i < GetWidth(); i+=4) {
-      uint32 blockX = i / 4;
-      uint32 blockY = j / 4;
-      uint32 blockIdx = blockY * (GetWidth() / 4) + blockX;
-
-      uint32 offset = blockIdx * 4 * 4;
-      for(uint32 t = 0; t < 16; t++) {
-        uint32 x = i + t % 4;
-        uint32 y = j + t / 4;
-        newPixelData[offset + t] = m_Pixels[y*GetWidth() + x];
-      }
-    }
-  }
-
-  delete m_Pixels;
-  m_Pixels = newPixelData;
-  m_bBlockStreamOrder = true;
-}
-
-template<typename PixelType>
-void Image<PixelType>::ConvertFromBlockStreamOrder() {
-  if(!m_bBlockStreamOrder || !m_Pixels)
-    return;
-
-  PixelType *newPixelData = new PixelType[GetWidth() * GetHeight()];
-  for(uint32 j = 0; j < GetHeight(); j+=4) {
-    for(uint32 i = 0; i < GetWidth(); i+=4) {
-      uint32 blockX = i / 4;
-      uint32 blockY = j / 4;
-      uint32 blockIdx = blockY * (GetWidth() / 4) + blockX;
-
-      uint32 offset = blockIdx * 4 * 4;
-      for(uint32 t = 0; t < 16; t++) {
-        uint32 x = i + t % 4;
-        uint32 y = j + t / 4;
-        newPixelData[y*GetWidth() + x] = m_Pixels[offset + t];
-      }
-    }
-  }
-
-  delete m_Pixels;
-  m_Pixels = newPixelData;
-  m_bBlockStreamOrder = false;
-}
-
 template<typename PixelType>
 void Image<PixelType>::SetImageData(uint32 width, uint32 height, PixelType *data) {
   if(m_Pixels) {
diff --git a/CLTool/src/clunix.cpp b/CLTool/src/clunix.cpp
index 80f6a4c..0dfcca9 100644
--- a/CLTool/src/clunix.cpp
+++ b/CLTool/src/clunix.cpp
@@ -56,6 +56,7 @@
 void PrintUsage() {
   fprintf(stderr, "Usage: tc [OPTIONS] imagefile\n");
   fprintf(stderr, "\n");
+  fprintf(stderr, "\t-v\t\tVerbose mode: prints out Entropy, Mean Local Entropy, and MSSIM");
   fprintf(stderr, "\t-f\t\tFormat to use. Either \"BPTC\", \"ETC1\", \"DXT1\", \"DXT5\", or \"PVRTC\". Default: BPTC\n");
   fprintf(stderr, "\t-l\t\tSave an output log.\n");
   fprintf(stderr, "\t-q <quality>\tSet compression quality level. Default: 50\n");
@@ -102,6 +103,7 @@ int main(int argc, char **argv) {
   bool bSaveLog = false;
   bool bUseAtomics = false;
   bool bUsePVRTexLib = false;
+  bool bVerbose = false;
   ECompressionFormat format = eCompressionFormat_BPTC;
 
   bool knowArg = false;
@@ -154,6 +156,13 @@ int main(int argc, char **argv) {
       continue;
     }
     
+    if(strcmp(argv[fileArg], "-v") == 0) {
+      fileArg++;
+      bVerbose = true;
+      knowArg = true;
+      continue;
+    }
+    
     if(strcmp(argv[fileArg], "-simd") == 0) {
       fileArg++;
       bUseSIMD = true;
@@ -224,12 +233,11 @@ int main(int argc, char **argv) {
   }
 
   FasTC::Image<> img(*file.GetImage());
-  if(format != eCompressionFormat_BPTC) {
-    img.SetBlockStreamOrder(false);
-  }
 
-  fprintf(stdout, "Entropy: %.5f\n", img.ComputeEntropy());
-  fprintf(stdout, "Mean Local Entropy: %.5f\n", img.ComputeMeanLocalEntropy());
+  if(bVerbose) {
+    fprintf(stdout, "Entropy: %.5f\n", img.ComputeEntropy());
+    fprintf(stdout, "Mean Local Entropy: %.5f\n", img.ComputeMeanLocalEntropy());
+  }
 
   std::ofstream logFile;
   ThreadSafeStreambuf streamBuf(logFile);
@@ -269,11 +277,13 @@ int main(int argc, char **argv) {
     fprintf(stderr, "Error computing PSNR\n");
   }
 
-  double SSIM = img.ComputeSSIM(ci);
-  if(SSIM > 0.0) {
-    fprintf(stdout, "SSIM: %.9f\n", SSIM);
-  } else {
-    fprintf(stderr, "Error computing MSSIM\n");
+  if(bVerbose) {
+    double SSIM = img.ComputeSSIM(ci);
+    if(SSIM > 0.0) {
+      fprintf(stdout, "SSIM: %.9f\n", SSIM);
+    } else {
+      fprintf(stderr, "Error computing SSIM\n");
+    }
   }
 
   if(format == eCompressionFormat_BPTC) {
diff --git a/CLTool/src/compare.cpp b/CLTool/src/compare.cpp
index 308c543..c9982c4 100644
--- a/CLTool/src/compare.cpp
+++ b/CLTool/src/compare.cpp
@@ -85,9 +85,6 @@ int main(int argc, char **argv) {
   FasTC::Image<> img1(*img1f.GetImage());
   FasTC::Image<> img2(*img2f.GetImage());
 
-  img1.SetBlockStreamOrder(false);
-  img2.SetBlockStreamOrder(false);
-
   double PSNR = img1.ComputePSNR(&img2);
   if(PSNR > 0.0) {
     fprintf(stdout, "PSNR: %.3f\n", PSNR);
diff --git a/Core/src/CompressedImage.cpp b/Core/src/CompressedImage.cpp
index 616bd01..01281f0 100644
--- a/Core/src/CompressedImage.cpp
+++ b/Core/src/CompressedImage.cpp
@@ -74,9 +74,7 @@ CompressedImage::CompressedImage(
   const ECompressionFormat format,
   const unsigned char *data
 )
-  : FasTC::Image<>(width, height,
-                   reinterpret_cast<uint32 *>(NULL),
-                   format == eCompressionFormat_BPTC)
+  : FasTC::Image<>(width, height, reinterpret_cast<uint32 *>(NULL))
   , m_Format(format)
   , m_CompressedData(0)
 {
diff --git a/IO/src/ImageFile.cpp b/IO/src/ImageFile.cpp
index 8b29a34..c38cc6c 100644
--- a/IO/src/ImageFile.cpp
+++ b/IO/src/ImageFile.cpp
@@ -207,7 +207,7 @@ FasTC::Image<> *ImageFile::LoadImage(const unsigned char *rawImageData) const {
   }
 
   uint32 *pixels = reinterpret_cast<uint32 *>(pixelData);
-  FasTC::Image<> *i = new FasTC::Image<>(loader->GetWidth(), loader->GetHeight(), pixels, true);
+  FasTC::Image<> *i = new FasTC::Image<>(loader->GetWidth(), loader->GetHeight(), pixels);
 
   // Cleanup
   delete loader;
diff --git a/IO/src/ImageLoader.cpp b/IO/src/ImageLoader.cpp
index 57886b1..d2a2155 100644
--- a/IO/src/ImageLoader.cpp
+++ b/IO/src/ImageLoader.cpp
@@ -167,60 +167,46 @@ bool ImageLoader::LoadImage() {
 #endif
 
   int byteIdx = 0;
-  for(uint32 i = 0; i < ah; i+=4) {
-    for(uint32 j = 0; j < aw; j+= 4) {
+  for(uint32 j = 0; j < ah; j++) {
+    for(uint32 i = 0; i < aw; i++) {
 
-      // For each block, visit the pixels in sequential order
-      for(uint32 y = i; y < i+4; y++) {
-        for(uint32 x = j; x < j+4; x++) {
+      unsigned int redVal = GetChannelForPixel(i, j, 0);
+      if(redVal == INT_MAX)
+        return false;
 
-          if(y >= m_Height || x >= m_Width) {
-            m_PixelData[byteIdx++] = 0; // r
-            m_PixelData[byteIdx++] = 0; // g
-            m_PixelData[byteIdx++] = 0; // b
-            m_PixelData[byteIdx++] = 0; // a
-            continue;
-          }
+      unsigned int greenVal = redVal;
+      unsigned int blueVal = redVal;
 
-          unsigned int redVal = GetChannelForPixel(x, y, 0);
-          if(redVal == INT_MAX)
-            return false;
-
-          unsigned int greenVal = redVal;
-          unsigned int blueVal = redVal;
-
-          if(GetGreenChannelPrecision() > 0) {
-            greenVal = GetChannelForPixel(x, y, 1);
-            if(greenVal == INT_MAX)
-              return false;
-          }
-
-          if(GetBlueChannelPrecision() > 0) {
-            blueVal = GetChannelForPixel(x, y, 2);
-            if(blueVal == INT_MAX)
-              return false;
-          }
-
-          unsigned int alphaVal = 0xFF;
-          if(GetAlphaChannelPrecision() > 0) {
-            alphaVal = GetChannelForPixel(x, y, 3);
-            if(alphaVal == INT_MAX)
-              return false;
-          }
-
-          // Red channel
-          m_PixelData[byteIdx++] = redVal & 0xFF;
-
-          // Green channel
-          m_PixelData[byteIdx++] = greenVal & 0xFF;
-
-          // Blue channel
-          m_PixelData[byteIdx++] = blueVal & 0xFF;
-
-          // Alpha channel
-          m_PixelData[byteIdx++] = alphaVal & 0xFF;
-        }
+      if(GetGreenChannelPrecision() > 0) {
+        greenVal = GetChannelForPixel(i, j, 1);
+        if(greenVal == INT_MAX)
+          return false;
       }
+
+      if(GetBlueChannelPrecision() > 0) {
+        blueVal = GetChannelForPixel(i, j, 2);
+        if(blueVal == INT_MAX)
+          return false;
+      }
+
+      unsigned int alphaVal = 0xFF;
+      if(GetAlphaChannelPrecision() > 0) {
+        alphaVal = GetChannelForPixel(i, j, 3);
+        if(alphaVal == INT_MAX)
+          return false;
+      }
+
+      // Red channel
+      m_PixelData[byteIdx++] = redVal & 0xFF;
+
+      // Green channel
+      m_PixelData[byteIdx++] = greenVal & 0xFF;
+
+      // Blue channel
+      m_PixelData[byteIdx++] = blueVal & 0xFF;
+
+      // Alpha channel
+      m_PixelData[byteIdx++] = alphaVal & 0xFF;
     }
   }
 
diff --git a/IO/src/ImageWriter.cpp b/IO/src/ImageWriter.cpp
index c9a18b8..9df3901 100644
--- a/IO/src/ImageWriter.cpp
+++ b/IO/src/ImageWriter.cpp
@@ -45,22 +45,5 @@
 #include "Pixel.h"
 
 uint32 ImageWriter::GetChannelForPixel(uint32 x, uint32 y, uint32 ch) {
-
-  // Assume pixels are in block stream order, hence we would need to first find
-  // the block that contains pixel (x, y) and then find the byte location for it.
-
-  const uint32 blocksPerRow = GetWidth() / 4;
-  const uint32 blockIdxX = x / 4;
-  const uint32 blockIdxY = y / 4;
-  const uint32 blockIdx = blockIdxY * blocksPerRow + blockIdxX;
-
-  // Now we find the offset in the block
-  const uint32 blockOffsetX = x % 4;
-  const uint32 blockOffsetY = y % 4;
-  const uint32 pixelOffset = blockOffsetY * 4 + blockOffsetX;
-
-  // There are 16 pixels per block...
-  uint32 dataOffset = blockIdx * 16 + pixelOffset;
-
-  return m_Pixels[dataOffset].Component((ch+1) % 4);
+  return m_Pixels[y * GetWidth() + x].Component((ch+1) % 4);
 }
diff --git a/IO/src/ImageWriterPNG.cpp b/IO/src/ImageWriterPNG.cpp
index 1013253..5fe847a 100644
--- a/IO/src/ImageWriterPNG.cpp
+++ b/IO/src/ImageWriterPNG.cpp
@@ -87,7 +87,6 @@ public:
 
 ImageWriterPNG::ImageWriterPNG(FasTC::Image<> &im)
   : ImageWriter(im.GetWidth(), im.GetHeight(), im.GetPixels())
-  , m_bBlockStreamOrder(im.GetBlockStreamOrder())
   , m_StreamPosition(0)
 {
   im.ComputePixels();
@@ -132,13 +131,7 @@ bool ImageWriterPNG::WriteImage() {
     row_pointers[y] = row;
 
     for (uint32 x = 0; x < m_Width; ++x) {
-      if(m_bBlockStreamOrder) {
-        for(uint32 ch = 0; ch < 4; ch++) {
-          *row++ = GetChannelForPixel(x, y, ch);
-        }
-      } else {
-        reinterpret_cast<uint32 *>(row)[x] = m_Pixels[y * m_Width + x].Pack();
-      }
+      reinterpret_cast<uint32 *>(row)[x] = m_Pixels[y * m_Width + x].Pack();
     }
   }
     
diff --git a/IO/src/ImageWriterPNG.h b/IO/src/ImageWriterPNG.h
index 5bd03ca..b280869 100644
--- a/IO/src/ImageWriterPNG.h
+++ b/IO/src/ImageWriterPNG.h
@@ -55,7 +55,6 @@ class ImageWriterPNG : public ImageWriter {
 
   virtual bool WriteImage();
  private:
-  bool m_bBlockStreamOrder;
   uint32 m_StreamPosition;
   friend class PNGStreamWriter;
 };